Compare commits
1 Commits
openmp-off
...
jit-next-g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9bae5f9a20 |
@@ -50,6 +50,7 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO")
|
||||
|
||||
# Once the plugins for the different targets are validated, they will be added to
|
||||
# the list of supported targets in the current system.
|
||||
@@ -73,7 +74,7 @@ endif()
|
||||
# Follow host OMPT support and check if host support has been requested.
|
||||
# LIBOMP_HAVE_OMPT_SUPPORT indicates whether host OMPT support has been implemented.
|
||||
# LIBOMP_OMPT_SUPPORT indicates whether host OMPT support has been requested (default is ON).
|
||||
# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON).
|
||||
# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON).
|
||||
set(OMPT_TARGET_DEFAULT FALSE)
|
||||
if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (LIBOMP_OMPT_SUPPORT) AND (NOT WIN32))
|
||||
set (OMPT_TARGET_DEFAULT TRUE)
|
||||
|
||||
@@ -33,6 +33,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
|
||||
# Define macro with the ELF ID for this target.
|
||||
add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
|
||||
|
||||
# Define target regiple
|
||||
add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")
|
||||
|
||||
add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen"
|
||||
SHARED
|
||||
|
||||
|
||||
@@ -12,7 +12,8 @@
|
||||
|
||||
# NOTE: Don't try to build `PluginInterface` using `add_llvm_library` because we
|
||||
# don't want to export `PluginInterface` while `add_llvm_library` requires that.
|
||||
add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp)
|
||||
add_library(PluginInterface OBJECT
|
||||
PluginInterface.cpp GlobalHandler.cpp JIT.cpp)
|
||||
|
||||
# This is required when using LLVM libraries.
|
||||
llvm_update_compile_flags(PluginInterface)
|
||||
@@ -20,7 +21,31 @@ llvm_update_compile_flags(PluginInterface)
|
||||
if (LLVM_LINK_LLVM_DYLIB)
|
||||
set(llvm_libs LLVM)
|
||||
else()
|
||||
llvm_map_components_to_libnames(llvm_libs Support)
|
||||
llvm_map_components_to_libnames(llvm_libs
|
||||
${LLVM_TARGETS_TO_BUILD}
|
||||
AggressiveInstCombine
|
||||
Analysis
|
||||
BinaryFormat
|
||||
BitReader
|
||||
BitWriter
|
||||
CodeGen
|
||||
Core
|
||||
Extensions
|
||||
InstCombine
|
||||
Instrumentation
|
||||
IPO
|
||||
IRReader
|
||||
Linker
|
||||
MC
|
||||
Object
|
||||
Passes
|
||||
Remarks
|
||||
ScalarOpts
|
||||
Support
|
||||
Target
|
||||
TransformUtils
|
||||
Vectorize
|
||||
)
|
||||
endif()
|
||||
|
||||
target_link_libraries(PluginInterface
|
||||
|
||||
@@ -0,0 +1,366 @@
|
||||
//===- JIT.cpp - Target independent JIT infrastructure --------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "JIT.h"
|
||||
#include "Debug.h"
|
||||
|
||||
#include "omptarget.h"
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/CommandFlags.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/LLVMRemarkStreamer.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Object/IRObjectFile.h"
|
||||
#include "llvm/Passes/OptimizationLevel.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/TimeProfiler.h"
|
||||
#include "llvm/Support/ToolOutputFile.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::object;
|
||||
using namespace omp;
|
||||
|
||||
static codegen::RegisterCodeGenFlags RCGF;
|
||||
|
||||
namespace {
|
||||
std::once_flag InitFlag;
|
||||
|
||||
void init(Triple TT) {
|
||||
if (TT.isNVPTX()) {
|
||||
LLVMInitializeNVPTXTargetInfo();
|
||||
LLVMInitializeNVPTXTarget();
|
||||
LLVMInitializeNVPTXTargetMC();
|
||||
LLVMInitializeNVPTXAsmPrinter();
|
||||
} else if (TT.isAMDGPU()) {
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
} else {
|
||||
FAILURE_MESSAGE("unsupported JIT target");
|
||||
abort();
|
||||
}
|
||||
|
||||
// Initialize passes
|
||||
PassRegistry &Registry = *PassRegistry::getPassRegistry();
|
||||
initializeCore(Registry);
|
||||
initializeScalarOpts(Registry);
|
||||
initializeVectorization(Registry);
|
||||
initializeIPO(Registry);
|
||||
initializeAnalysis(Registry);
|
||||
initializeTransformUtils(Registry);
|
||||
initializeInstCombine(Registry);
|
||||
initializeTarget(Registry);
|
||||
|
||||
initializeExpandLargeDivRemLegacyPassPass(Registry);
|
||||
initializeExpandLargeFpConvertLegacyPassPass(Registry);
|
||||
initializeExpandMemCmpPassPass(Registry);
|
||||
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
|
||||
initializeSelectOptimizePass(Registry);
|
||||
initializeCodeGenPreparePass(Registry);
|
||||
initializeAtomicExpandPass(Registry);
|
||||
initializeRewriteSymbolsLegacyPassPass(Registry);
|
||||
initializeWinEHPreparePass(Registry);
|
||||
initializeDwarfEHPrepareLegacyPassPass(Registry);
|
||||
initializeSafeStackLegacyPassPass(Registry);
|
||||
initializeSjLjEHPreparePass(Registry);
|
||||
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
|
||||
initializeGlobalMergePass(Registry);
|
||||
initializeIndirectBrExpandPassPass(Registry);
|
||||
initializeInterleavedLoadCombinePass(Registry);
|
||||
initializeInterleavedAccessPass(Registry);
|
||||
initializeUnreachableBlockElimLegacyPassPass(Registry);
|
||||
initializeExpandReductionsPass(Registry);
|
||||
initializeExpandVectorPredicationPass(Registry);
|
||||
initializeWasmEHPreparePass(Registry);
|
||||
initializeWriteBitcodePassPass(Registry);
|
||||
initializeHardwareLoopsPass(Registry);
|
||||
initializeTypePromotionPass(Registry);
|
||||
initializeReplaceWithVeclibLegacyPass(Registry);
|
||||
initializeJMCInstrumenterPass(Registry);
|
||||
}
|
||||
|
||||
Expected<std::unique_ptr<Module>>
|
||||
createModuleFromImage(__tgt_device_image *Image, LLVMContext &Context) {
|
||||
StringRef Data((const char *)Image->ImageStart,
|
||||
(char *)Image->ImageEnd - (char *)Image->ImageStart);
|
||||
std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
|
||||
Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
|
||||
SMDiagnostic Err;
|
||||
auto Mod = parseIR(*MB, Err, Context);
|
||||
if (!Mod)
|
||||
return make_error<StringError>("Failed to create module",
|
||||
inconvertibleErrorCode());
|
||||
return Mod;
|
||||
}
|
||||
|
||||
CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
|
||||
switch (OptLevel) {
|
||||
case 0:
|
||||
return CodeGenOpt::None;
|
||||
case 1:
|
||||
return CodeGenOpt::Less;
|
||||
case 2:
|
||||
return CodeGenOpt::Default;
|
||||
case 3:
|
||||
return CodeGenOpt::Aggressive;
|
||||
}
|
||||
llvm_unreachable("Invalid optimization level");
|
||||
}
|
||||
|
||||
OptimizationLevel getOptLevel(unsigned OptLevel) {
|
||||
switch (OptLevel) {
|
||||
case 0:
|
||||
return OptimizationLevel::O0;
|
||||
case 1:
|
||||
return OptimizationLevel::O1;
|
||||
case 2:
|
||||
return OptimizationLevel::O2;
|
||||
case 3:
|
||||
return OptimizationLevel::O3;
|
||||
}
|
||||
llvm_unreachable("Invalid optimization level");
|
||||
}
|
||||
|
||||
Expected<std::unique_ptr<TargetMachine>>
|
||||
createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
|
||||
Triple TT(M.getTargetTriple());
|
||||
CodeGenOpt::Level CGOptLevel = getCGOptLevel(OptLevel);
|
||||
|
||||
std::string Msg;
|
||||
const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
|
||||
if (!T)
|
||||
return make_error<StringError>(Msg, inconvertibleErrorCode());
|
||||
|
||||
SubtargetFeatures Features;
|
||||
Features.getDefaultSubtargetFeatures(TT);
|
||||
|
||||
std::optional<Reloc::Model> RelocModel;
|
||||
if (M.getModuleFlag("PIC Level"))
|
||||
RelocModel =
|
||||
M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
|
||||
|
||||
std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
|
||||
|
||||
TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TT);
|
||||
|
||||
std::unique_ptr<TargetMachine> TM(
|
||||
T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(),
|
||||
Options, RelocModel, CodeModel, CGOptLevel));
|
||||
if (!TM)
|
||||
return make_error<StringError>("Failed to create target machine",
|
||||
inconvertibleErrorCode());
|
||||
return TM;
|
||||
}
|
||||
|
||||
///
|
||||
class JITEngine {
|
||||
public:
|
||||
JITEngine(Triple::ArchType TA, std::string MCpu)
|
||||
: TT(Triple::getArchTypeName(TA)), CPU(MCpu) {
|
||||
std::call_once(InitFlag, init, TT);
|
||||
}
|
||||
|
||||
/// Run jit compilation. It is expected to get a memory buffer containing the
|
||||
/// generated device image that could be loaded to the device directly.
|
||||
Expected<std::unique_ptr<MemoryBuffer>>
|
||||
run(__tgt_device_image *Image, unsigned OptLevel,
|
||||
jit::PostProcessingFn PostProcessing);
|
||||
|
||||
private:
|
||||
/// Run backend, which contains optimization and code generation.
|
||||
Expected<std::unique_ptr<MemoryBuffer>> backend(Module &M, unsigned OptLevel);
|
||||
|
||||
/// Run optimization pipeline.
|
||||
void opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
|
||||
unsigned OptLevel);
|
||||
|
||||
/// Run code generation.
|
||||
void codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
|
||||
raw_pwrite_stream &OS);
|
||||
|
||||
LLVMContext Context;
|
||||
const Triple TT;
|
||||
const std::string CPU;
|
||||
};
|
||||
|
||||
void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
|
||||
unsigned OptLevel) {
|
||||
PipelineTuningOptions PTO;
|
||||
std::optional<PGOOptions> PGOOpt;
|
||||
|
||||
LoopAnalysisManager LAM;
|
||||
FunctionAnalysisManager FAM;
|
||||
CGSCCAnalysisManager CGAM;
|
||||
ModuleAnalysisManager MAM;
|
||||
ModulePassManager MPM;
|
||||
|
||||
PassBuilder PB(TM, PTO, PGOOpt, nullptr);
|
||||
|
||||
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
|
||||
|
||||
// Register all the basic analyses with the managers.
|
||||
PB.registerModuleAnalyses(MAM);
|
||||
PB.registerCGSCCAnalyses(CGAM);
|
||||
PB.registerFunctionAnalyses(FAM);
|
||||
PB.registerLoopAnalyses(LAM);
|
||||
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
|
||||
|
||||
MPM.addPass(PB.buildPerModuleDefaultPipeline(getOptLevel(OptLevel)));
|
||||
|
||||
MPM.run(M, MAM);
|
||||
}
|
||||
|
||||
void JITEngine::codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII,
|
||||
Module &M, raw_pwrite_stream &OS) {
|
||||
legacy::PassManager PM;
|
||||
PM.add(new TargetLibraryInfoWrapperPass(*TLII));
|
||||
MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(
|
||||
reinterpret_cast<LLVMTargetMachine *>(TM));
|
||||
TM->addPassesToEmitFile(PM, OS, nullptr,
|
||||
TT.isNVPTX() ? CGFT_AssemblyFile : CGFT_ObjectFile,
|
||||
/* DisableVerify */ false, MMIWP);
|
||||
|
||||
PM.run(M);
|
||||
}
|
||||
|
||||
Expected<std::unique_ptr<MemoryBuffer>> JITEngine::backend(Module &M,
|
||||
unsigned OptLevel) {
|
||||
|
||||
auto RemarksFileOrErr = setupLLVMOptimizationRemarks(
|
||||
Context, /* RemarksFilename */ "", /* RemarksPasses */ "",
|
||||
/* RemarksFormat */ "", /* RemarksWithHotness */ false);
|
||||
if (Error E = RemarksFileOrErr.takeError())
|
||||
return std::move(E);
|
||||
if (*RemarksFileOrErr)
|
||||
(*RemarksFileOrErr)->keep();
|
||||
|
||||
auto TMOrErr = createTargetMachine(M, CPU, OptLevel);
|
||||
if (!TMOrErr)
|
||||
return TMOrErr.takeError();
|
||||
|
||||
std::unique_ptr<TargetMachine> TM = std::move(*TMOrErr);
|
||||
TargetLibraryInfoImpl TLII(TT);
|
||||
|
||||
opt(TM.get(), &TLII, M, OptLevel);
|
||||
|
||||
// Prepare the output buffer and stream for codegen.
|
||||
SmallVector<char> CGOutputBuffer;
|
||||
raw_svector_ostream OS(CGOutputBuffer);
|
||||
|
||||
codegen(TM.get(), &TLII, M, OS);
|
||||
|
||||
return MemoryBuffer::getMemBufferCopy(OS.str());
|
||||
}
|
||||
|
||||
Expected<std::unique_ptr<MemoryBuffer>>
|
||||
JITEngine::run(__tgt_device_image *Image, unsigned OptLevel,
|
||||
jit::PostProcessingFn PostProcessing) {
|
||||
auto ModOrErr = createModuleFromImage(Image, Context);
|
||||
if (!ModOrErr)
|
||||
return ModOrErr.takeError();
|
||||
|
||||
auto Mod = std::move(*ModOrErr);
|
||||
|
||||
auto MBOrError = backend(*Mod, OptLevel);
|
||||
if (!MBOrError)
|
||||
return MBOrError.takeError();
|
||||
|
||||
return PostProcessing(std::move(*MBOrError));
|
||||
}
|
||||
|
||||
/// A map from a bitcode image start address to its corresponding triple. If the
|
||||
/// image is not in the map, it is not a bitcode image.
|
||||
DenseMap<void *, Triple::ArchType> BitcodeImageMap;
|
||||
|
||||
/// Output images generated from LLVM backend.
|
||||
SmallVector<std::unique_ptr<MemoryBuffer>, 4> JITImages;
|
||||
|
||||
/// A list of __tgt_device_image images.
|
||||
std::list<__tgt_device_image> TgtImages;
|
||||
} // namespace
|
||||
|
||||
namespace llvm {
|
||||
namespace omp {
|
||||
namespace jit {
|
||||
bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA) {
|
||||
TimeTraceScope TimeScope("Check bitcode image");
|
||||
|
||||
{
|
||||
auto Itr = BitcodeImageMap.find(Image->ImageStart);
|
||||
if (Itr != BitcodeImageMap.end() && Itr->second == TA)
|
||||
return true;
|
||||
}
|
||||
|
||||
StringRef Data(reinterpret_cast<const char *>(Image->ImageStart),
|
||||
reinterpret_cast<char *>(Image->ImageEnd) -
|
||||
reinterpret_cast<char *>(Image->ImageStart));
|
||||
std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
|
||||
Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
|
||||
if (!MB)
|
||||
return false;
|
||||
|
||||
Expected<object::IRSymtabFile> FOrErr = object::readIRSymtab(*MB);
|
||||
if (!FOrErr) {
|
||||
consumeError(FOrErr.takeError());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ActualTriple = FOrErr->TheReader.getTargetTriple();
|
||||
|
||||
if (Triple(ActualTriple).getArch() == TA) {
|
||||
BitcodeImageMap[Image->ImageStart] = TA;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Expected<__tgt_device_image *> compile(__tgt_device_image *Image,
|
||||
Triple::ArchType TA, std::string MCPU,
|
||||
unsigned OptLevel,
|
||||
PostProcessingFn PostProcessing) {
|
||||
JITEngine J(TA, MCPU);
|
||||
|
||||
auto ImageMBOrErr = J.run(Image, OptLevel, PostProcessing);
|
||||
if (!ImageMBOrErr)
|
||||
return ImageMBOrErr.takeError();
|
||||
|
||||
JITImages.push_back(std::move(*ImageMBOrErr));
|
||||
TgtImages.push_back(*Image);
|
||||
|
||||
auto &ImageMB = JITImages.back();
|
||||
auto *NewImage = &TgtImages.back();
|
||||
|
||||
NewImage->ImageStart = (void *)ImageMB->getBufferStart();
|
||||
NewImage->ImageEnd = (void *)ImageMB->getBufferEnd();
|
||||
|
||||
return NewImage;
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
} // namespace omp
|
||||
} // namespace llvm
|
||||
@@ -0,0 +1,50 @@
|
||||
//===- JIT.h - Target independent JIT infrastructure ----------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
|
||||
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
struct __tgt_device_image;
|
||||
|
||||
namespace llvm {
|
||||
class MemoryBuffer;
|
||||
|
||||
namespace omp {
|
||||
namespace jit {
|
||||
|
||||
/// Function type for a callback that will be called after the backend is
|
||||
/// called.
|
||||
using PostProcessingFn = std::function<Expected<std::unique_ptr<MemoryBuffer>>(
|
||||
std::unique_ptr<MemoryBuffer>)>;
|
||||
|
||||
/// Check if \p Image contains bitcode with triple \p Triple.
|
||||
bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA);
|
||||
|
||||
/// Compile the bitcode image \p Image and generate the binary image that can be
|
||||
/// loaded to the target device of the triple \p Triple architecture \p MCpu. \p
|
||||
/// PostProcessing will be called after codegen to handle cases such as assember
|
||||
/// as an external tool.
|
||||
Expected<__tgt_device_image *> compile(__tgt_device_image *Image,
|
||||
Triple::ArchType TA, std::string MCpu,
|
||||
unsigned OptLevel,
|
||||
PostProcessingFn PostProcessing);
|
||||
} // namespace jit
|
||||
} // namespace omp
|
||||
} // namespace llvm
|
||||
|
||||
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "PluginInterface.h"
|
||||
#include "Debug.h"
|
||||
#include "GlobalHandler.h"
|
||||
#include "JIT.h"
|
||||
#include "elf_common.h"
|
||||
#include "omptarget.h"
|
||||
#include "omptargetplugin.h"
|
||||
@@ -629,7 +630,10 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *TgtImage) {
|
||||
if (!Plugin::isActive())
|
||||
return false;
|
||||
|
||||
return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits());
|
||||
if (elf_check_machine(TgtImage, Plugin::get().getMagicElfBits()))
|
||||
return true;
|
||||
|
||||
return jit::checkBitcodeImage(TgtImage, Plugin::get().getTripleArch());
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
|
||||
@@ -700,7 +704,37 @@ int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId,
|
||||
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
|
||||
__tgt_device_image *TgtImage) {
|
||||
GenericPluginTy &Plugin = Plugin::get();
|
||||
auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage);
|
||||
GenericDeviceTy &Device = Plugin.getDevice(DeviceId);
|
||||
|
||||
// If it is a bitcode image, we have to jit the binary image before loading to
|
||||
// the device.
|
||||
{
|
||||
UInt32Envar JITOptLevel("LIBOMPTARGET_JIT_OPT_LEVEL", 3);
|
||||
Triple::ArchType TA = Plugin.getTripleArch();
|
||||
std::string Arch = Device.getArch();
|
||||
|
||||
jit::PostProcessingFn PostProcessing =
|
||||
[&Device](std::unique_ptr<MemoryBuffer> MB)
|
||||
-> Expected<std::unique_ptr<MemoryBuffer>> {
|
||||
return Device.doJITPostProcessing(std::move(MB));
|
||||
};
|
||||
|
||||
if (jit::checkBitcodeImage(TgtImage, TA)) {
|
||||
auto TgtImageOrErr =
|
||||
jit::compile(TgtImage, TA, Arch, JITOptLevel, PostProcessing);
|
||||
if (!TgtImageOrErr) {
|
||||
auto Err = TgtImageOrErr.takeError();
|
||||
REPORT("Failure to jit binary image from bitcode image %p on device "
|
||||
"%d: %s\n",
|
||||
TgtImage, DeviceId, toString(std::move(Err)).data());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TgtImage = *TgtImageOrErr;
|
||||
}
|
||||
}
|
||||
|
||||
auto TableOrErr = Device.loadBinary(Plugin, TgtImage);
|
||||
if (!TableOrErr) {
|
||||
auto Err = TableOrErr.takeError();
|
||||
REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "omptarget.h"
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
#include "llvm/Support/Allocator.h"
|
||||
@@ -377,6 +378,17 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
|
||||
}
|
||||
uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
|
||||
|
||||
/// Get target architecture.
|
||||
virtual std::string getArch() const {
|
||||
llvm_unreachable("device doesn't support JIT");
|
||||
}
|
||||
|
||||
/// Post processing after jit backend. The ownership of \p MB will be taken.
|
||||
virtual Expected<std::unique_ptr<MemoryBuffer>>
|
||||
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
|
||||
return MB;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Register offload entry for global variable.
|
||||
Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
|
||||
@@ -526,6 +538,11 @@ struct GenericPluginTy {
|
||||
/// Get the ELF code to recognize the binary image of this plugin.
|
||||
virtual uint16_t getMagicElfBits() const = 0;
|
||||
|
||||
/// Get the target triple of this plugin.
|
||||
virtual Triple::ArchType getTripleArch() const {
|
||||
llvm_unreachable("target doesn't support jit");
|
||||
}
|
||||
|
||||
/// Allocate a structure using the internal allocator.
|
||||
template <typename Ty> Ty *allocate() {
|
||||
return reinterpret_cast<Ty *>(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
|
||||
|
||||
@@ -278,6 +278,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
GridValues.GV_Warp_Size))
|
||||
return Err;
|
||||
|
||||
if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
|
||||
ComputeCapability.Major))
|
||||
return Err;
|
||||
|
||||
if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
|
||||
ComputeCapability.Minor))
|
||||
return Err;
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
@@ -794,6 +802,9 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
|
||||
}
|
||||
|
||||
/// See GenericDeviceTy::getArch().
|
||||
std::string getArch() const override { return ComputeCapability.str(); }
|
||||
|
||||
private:
|
||||
using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
|
||||
using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
|
||||
@@ -810,6 +821,15 @@ private:
|
||||
|
||||
/// The CUDA device handler.
|
||||
CUdevice Device = CU_DEVICE_INVALID;
|
||||
|
||||
/// The compute capability of the corresponding CUDA device.
|
||||
struct ComputeCapabilityTy {
|
||||
uint32_t Major;
|
||||
uint32_t Minor;
|
||||
std::string str() const {
|
||||
return "sm_" + std::to_string(Major * 10 + Minor);
|
||||
}
|
||||
} ComputeCapability;
|
||||
};
|
||||
|
||||
Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
@@ -908,6 +928,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
|
||||
/// Get the ELF code for recognizing the compatible image binary.
|
||||
uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
|
||||
|
||||
Triple::ArchType getTripleArch() const override {
|
||||
// TODO: I think we can drop the support for 32-bit NVPTX devices.
|
||||
return Triple::nvptx64;
|
||||
}
|
||||
|
||||
/// Check whether the image is compatible with the available CUDA devices.
|
||||
Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
|
||||
for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
|
||||
|
||||
@@ -364,6 +364,10 @@ struct GenELF64PluginTy final : public GenericPluginTy {
|
||||
Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
Triple::ArchType getTripleArch() const override {
|
||||
return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
|
||||
}
|
||||
};
|
||||
|
||||
GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
|
||||
|
||||
@@ -34,6 +34,15 @@ def append_dynamic_library_path(name, value, sep):
|
||||
else:
|
||||
config.environment[name] = value
|
||||
|
||||
# Evalute the environment variable which is a string boolean value.
|
||||
def evaluate_bool_env(env):
|
||||
env = env.lower()
|
||||
possible_true_values = ["on", "true", "1"]
|
||||
for v in possible_true_values:
|
||||
if env == v:
|
||||
return True
|
||||
return False
|
||||
|
||||
# name: The name of this test suite.
|
||||
config.name = 'libomptarget :: ' + config.libomptarget_current_target
|
||||
|
||||
@@ -111,10 +120,17 @@ else: # Unices
|
||||
config.test_flags += " --libomptarget-nvptx-bc-path=" + config.library_dir
|
||||
if config.libomptarget_current_target.endswith('-LTO'):
|
||||
config.test_flags += " -foffload-lto"
|
||||
if config.libomptarget_current_target.endswith('-JIT-LTO') and evaluate_bool_env(
|
||||
config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS']
|
||||
):
|
||||
config.test_flags += " -foffload-lto"
|
||||
config.test_flags += " -Wl,--embed-bitcode"
|
||||
|
||||
def remove_suffix_if_present(name):
|
||||
if name.endswith('-LTO'):
|
||||
return name[:-4]
|
||||
elif name.endswith('-JIT-LTO'):
|
||||
return name[:-8]
|
||||
else:
|
||||
return name
|
||||
|
||||
|
||||
Reference in New Issue
Block a user