Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/pre-commit-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ jobs:
- name: Generate Compile Database
run: |
make compile_db_all
- name: Generate JIT kernel bitcode header
# Only builds kernels.bc + kernels_bc.h, not the full project (~2s).
# Needed so clang-tidy can resolve #include "kernels_bc.h" in PrebuiltIR.cpp.
run: |
cmake --build _build/Release --target jit_kernels_bc
- name: Run pre-commit hooks
run: |
pre-commit run --all-files
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ repos:
PR base / merge queue base
stages: [pre-commit]
entry: ./scripts/run-clang-tidy.py
args: [--diff, auto]
args: [--diff, auto, --exclude, "bolt/jit/kernels/"]
language: python
pass_filenames: false
always_run: true
Expand Down
7 changes: 7 additions & 0 deletions bolt/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_subdirectory(kernels)

bolt_add_library(
bolt_thrustjit CompiledModule.cpp
ThrustJITv2.cpp
PrebuiltIR.cpp
RowContainer/RowContainerCodeGenerator.cpp
RowContainer/RowEqVectorsCodeGenerator.cpp
)

add_dependencies(bolt_thrustjit jit_kernels_bc)

target_include_directories(bolt_thrustjit PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels)

target_link_libraries(bolt_thrustjit PUBLIC llvm-core::llvm-core date::date fmt::fmt Folly::folly)

target_compile_options(
Expand Down
72 changes: 72 additions & 0 deletions bolt/jit/PrebuiltIR.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) ByteDance Ltd. and/or its affiliates
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifdef ENABLE_BOLT_JIT

#include "bolt/jit/PrebuiltIR.h"

#include <llvm/Bitcode/BitcodeReader.h>
#include <llvm/Linker/Linker.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/raw_ostream.h>

// Generated at build time by xxd -i
// Generated at build time by xxd -i (excluded from clang-tidy)
#include "kernels_bc.h"

namespace bytedance::bolt::jit {

void PrebuiltIR::linkInto(llvm::Module& target) {
auto buffer = llvm::MemoryBuffer::getMemBuffer(
llvm::StringRef(
reinterpret_cast<const char*>(kernels_bc), kernels_bc_len),
"prebuilt_kernels",
/*RequiresNullTerminator=*/false);

auto moduleOrErr =
llvm::parseBitcodeFile(buffer->getMemBufferRef(), target.getContext());
if (!moduleOrErr) {
llvm::errs() << "[JIT] Failed to parse prebuilt bitcode: "
<< llvm::toString(moduleOrErr.takeError()) << "\n";
return;
}

auto prebuilt = std::move(*moduleOrErr);

llvm::SmallVector<std::string, 8> prebuiltNames;
for (auto& func : *prebuilt) {
if (!func.isDeclaration()) {
prebuiltNames.push_back(func.getName().str());
}
}

prebuilt->setDataLayout(target.getDataLayout());
llvm::Linker::linkModules(target, std::move(prebuilt));

// Internalize pre-built functions so they don't get exported as
// global symbols into the JITDylib (avoiding "duplicate definition"
// errors). Internal functions are still callable within the same
// module β€” the JIT compiler resolves them during compilation.
for (auto& name : prebuiltNames) {
if (auto* fn = target.getFunction(name)) {
fn->setLinkage(llvm::GlobalValue::InternalLinkage);
}
}
}

} // namespace bytedance::bolt::jit

#endif // ENABLE_BOLT_JIT
60 changes: 60 additions & 0 deletions bolt/jit/PrebuiltIR.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) ByteDance Ltd. and/or its affiliates
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#ifdef ENABLE_BOLT_JIT

#include <llvm/IR/Module.h>
#include <memory>

namespace bytedance::bolt::jit {

/// Pre-built JIT IR: C++ kernel functions compiled to LLVM bitcode at build
/// time (kernels.cpp β†’ clang β†’ .bc β†’ xxd β†’ embedded byte array).
///
/// Usage with ThrustJITv2:
///
/// auto irGenerator = [](llvm::Module& m) -> bool {
/// // 1. Link pre-built kernels into this module
/// PrebuiltIR::linkInto(m);
///
/// // 2. Build outer function with IRBuilder, calling pre-built kernels
/// auto* kernel = m.getFunction("jit_store_i64");
/// builder.CreateCall(kernel, {row, offset, decoded, index, ...});
///
/// // 3. Verify the generated function
/// return llvm::verifyFunction(*func, &llvm::errs());
/// };
///
/// // ThrustJITv2 compiles the module. Its AlwaysInliner pass inlines
/// // the pre-built kernels into the outer function automatically.
/// auto mod = ThrustJITv2::getInstance()->CompileModule(irGenerator, name);
/// auto fn = mod->getFuncPtr(name);
///
/// See kernels.cpp for the list of available pre-built kernels.
class PrebuiltIR {
public:
/// Load pre-built bitcode and link into target module.
/// All pre-built functions are internalized (InternalLinkage) so they
/// don't conflict across modules in the JITDylib. The AlwaysInliner
/// pass in ThrustJITv2's IR transform layer inlines them into callers.
static void linkInto(llvm::Module& target);
};

} // namespace bytedance::bolt::jit

#endif // ENABLE_BOLT_JIT
59 changes: 58 additions & 1 deletion bolt/jit/ThrustJITv2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,22 @@
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Utils.h"

#include <glog/logging.h>

#include <atomic>
#include <chrono>
#include <condition_variable>
#include <functional>
#include <sstream>
#include <vector>

namespace bytedance::bolt::jit {
Expand Down Expand Up @@ -81,6 +91,46 @@ llvm::Expected<std::unique_ptr<ThrustJITv2>> ThrustJITv2::Create() {
}

result->jit_ = std::move(*jit);

// Add IR optimization pass: inline alwaysinline functions (pre-built kernels)
// and run basic optimizations on the inlined code.
result->jit_->getIRTransformLayer().setTransform(
[](llvm::orc::ThreadSafeModule TSM,
const llvm::orc::MaterializationResponsibility&)
-> llvm::Expected<llvm::orc::ThreadSafeModule> {
TSM.withModuleDo([](llvm::Module& M) {
// Module-level: inline alwaysinline functions
{
llvm::legacy::PassManager mpm;
mpm.add(llvm::createAlwaysInlinerLegacyPass());
mpm.run(M);
}

// Per-function: clean up inlined code
auto fpm = std::make_unique<llvm::legacy::FunctionPassManager>(&M);
fpm->add(llvm::createPromoteMemoryToRegisterPass());
fpm->add(llvm::createInstructionCombiningPass());
fpm->add(llvm::createGVNPass());
fpm->add(llvm::createCFGSimplificationPass());
fpm->doInitialization();
for (auto& func : M) {
if (!func.isDeclaration()) {
fpm->run(func);
}
}
// Dump final IR for debugging (enable with --v=1)
if (VLOG_IS_ON(1)) {
std::string irStr;
llvm::raw_string_ostream os(irStr);
M.print(os, nullptr);
VLOG(1) << "[JIT] Final IR for module '" << M.getModuleIdentifier()
<< "':\n"
<< irStr;
}
});
return std::move(TSM);
});

return result;
}

Expand Down Expand Up @@ -117,6 +167,7 @@ CompiledModuleSP ThrustJITv2::CompileModule(
compilingCv_.notify_all();
};

auto compileStart = std::chrono::steady_clock::now();
auto llvmContext = std::make_unique<llvm::LLVMContext>();
auto llvmModule = std::make_unique<llvm::Module>(funcName, *llvmContext);
llvmModule->setDataLayout(jit_->getDataLayout());
Expand All @@ -127,7 +178,7 @@ CompiledModuleSP ThrustJITv2::CompileModule(

std::vector<std::string> funcNames;
for (auto& function : *llvmModule) {
if (!function.isDeclaration()) {
if (!function.isDeclaration() && !function.hasInternalLinkage()) {
funcNames.emplace_back(function.getName().str());
}
}
Expand Down Expand Up @@ -207,6 +258,12 @@ CompiledModuleSP ThrustJITv2::CompileModule(
}
compilingCv_.notify_all();

auto compileMs = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - compileStart)
.count();
LOG(INFO) << "[JIT] Compiled '" << funcName << "' in " << compileMs
<< " ms, code size: " << codeSize << " bytes";

return compiledModule;
}

Expand Down
86 changes: 86 additions & 0 deletions bolt/jit/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Copyright (c) ByteDance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

message(STATUS "LLVM_TOOLS_BINARY_DIR = ${LLVM_TOOLS_BINARY_DIR}")
find_program(LLVM_CLANG clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
if(NOT LLVM_CLANG)
find_program(LLVM_CLANG clang REQUIRED)
endif()
message(STATUS "Using clang: ${LLVM_CLANG}")

# Verify clang version matches LLVM to ensure bitcode compatibility.
execute_process(
COMMAND ${LLVM_CLANG} --version
OUTPUT_VARIABLE _CLANG_VERSION_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" _CLANG_VERSION "${_CLANG_VERSION_OUTPUT}")
if(LLVM_PACKAGE_VERSION AND _CLANG_VERSION AND
NOT _CLANG_VERSION VERSION_EQUAL LLVM_PACKAGE_VERSION)
message(WARNING
"Clang version (${_CLANG_VERSION}) does not match "
"LLVM version (${LLVM_PACKAGE_VERSION}). "
"Pre-built bitcode may have compatibility issues.")
endif()

set(KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels.cpp)
set(KERNEL_BC ${CMAKE_CURRENT_BINARY_DIR}/kernels.bc)
set(KERNEL_HEADER ${CMAKE_CURRENT_BINARY_DIR}/kernels_bc.h)

# Generate kernel compile flags from CMake target properties.
# Add more targets if future kernels need additional headers.
set(KERNEL_DEP_TARGETS bolt_vector)
set(KERNEL_FLAGS_RSP ${CMAKE_CURRENT_BINARY_DIR}/kernel_flags.rsp)

# Merge INCLUDE_DIRECTORIES and COMPILE_DEFINITIONS from all dep targets.
set(_INC "")
set(_DEF "")
foreach(_target ${KERNEL_DEP_TARGETS})
list(APPEND _INC "$<TARGET_PROPERTY:${_target},INCLUDE_DIRECTORIES>")
list(APPEND _DEF "$<TARGET_PROPERTY:${_target},COMPILE_DEFINITIONS>")
endforeach()

# Deduplicate, add -I/-D prefixes, filter empty entries, join with newlines
set(_INC_DEDUP "$<REMOVE_DUPLICATES:${_INC}>")
set(_INC_FLAGS "$<LIST:TRANSFORM,${_INC_DEDUP},PREPEND,-I>")

set(_DEF_DEDUP "$<REMOVE_DUPLICATES:${_DEF}>")
set(_DEF_CLEAN "$<LIST:FILTER,${_DEF_DEDUP},EXCLUDE,^$>")
set(_DEF_FLAGS "$<LIST:TRANSFORM,${_DEF_CLEAN},PREPEND,-D>")

file(GENERATE OUTPUT ${KERNEL_FLAGS_RSP} CONTENT
"$<LIST:JOIN,${_INC_FLAGS},\n>\n$<LIST:JOIN,${_DEF_FLAGS},\n>\n-DNDEBUG\n"
)

# Compile kernels.cpp to LLVM bitcode with bolt headers
add_custom_command(
OUTPUT ${KERNEL_BC}
COMMAND ${LLVM_CLANG} -emit-llvm -c -O2 -std=c++17
@${KERNEL_FLAGS_RSP}
-o ${KERNEL_BC} ${KERNEL_SRC}
DEPENDS ${KERNEL_SRC} ${KERNEL_FLAGS_RSP}
COMMENT "Compiling JIT kernels to LLVM bitcode"
)

# Embed bitcode as C byte array header
add_custom_command(
OUTPUT ${KERNEL_HEADER}
COMMAND xxd -i kernels.bc > ${KERNEL_HEADER}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${KERNEL_BC}
COMMENT "Embedding JIT kernel bitcode as C header"
)

add_custom_target(jit_kernels_bc DEPENDS ${KERNEL_HEADER})
Loading