Compare commits
1 Commits
llvm-test-
...
simple-tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4979b021fc |
@@ -254,6 +254,7 @@ LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading de
|
||||
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
|
||||
LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
|
||||
LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
|
||||
LANGOPT(OpenMPGlobalizeToGlobalSpace , 1, 0, "Globalize to global space for the globalized variables")
|
||||
LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
|
||||
LANGOPT(OpenMPNoNestedParallelism , 1, 0, "Assume that no thread in a parallel region will encounter a parallel region")
|
||||
LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
|
||||
|
||||
@@ -3353,6 +3353,10 @@ def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested
|
||||
HelpText<"Assert no nested parallel regions in the GPU">,
|
||||
MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
|
||||
|
||||
def fopenmp_globalize_to_global_space : Flag<["-"], "fopenmp-globalize-to-global-space">,
|
||||
HelpText<"Globalize to global space for the globalized variables">,
|
||||
MarshallingInfoFlag<LangOpts<"OpenMPGlobalizeToGlobalSpace">>;
|
||||
|
||||
} // let Group = f_Group
|
||||
} // let Visibility = [ClangOption, CC1Option, FC1Option]
|
||||
} // let Flags = [NoArgumentUnused, HelpHidden]
|
||||
|
||||
@@ -1067,10 +1067,12 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
|
||||
|
||||
// Allocate space for the variable to be globalized
|
||||
llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())};
|
||||
llvm::CallBase *VoidPtr =
|
||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
|
||||
CGM.getModule(), OMPRTL___kmpc_alloc_shared),
|
||||
AllocArgs, VD->getName());
|
||||
llvm::CallBase *VoidPtr = CGF.EmitRuntimeCall(
|
||||
OMPBuilder.getOrCreateRuntimeFunction(
|
||||
CGM.getModule(), CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace
|
||||
? OMPRTL_malloc
|
||||
: OMPRTL___kmpc_alloc_shared),
|
||||
AllocArgs, VD->getName());
|
||||
// FIXME: We should use the variables actual alignment as an argument.
|
||||
VoidPtr->addRetAttr(llvm::Attribute::get(
|
||||
CGM.getLLVMContext(), llvm::Attribute::Alignment,
|
||||
@@ -1162,20 +1164,29 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {
|
||||
// globalized in the prolog (i.e. emitGenericVarsProlog).
|
||||
for (const auto &AddrSizePair :
|
||||
llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
|
||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
|
||||
CGM.getModule(), OMPRTL___kmpc_free_shared),
|
||||
{AddrSizePair.first, AddrSizePair.second});
|
||||
if (CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace)
|
||||
CGF.EmitRuntimeCall(
|
||||
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL_free),
|
||||
{AddrSizePair.first});
|
||||
else
|
||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
|
||||
CGM.getModule(), OMPRTL___kmpc_free_shared),
|
||||
{AddrSizePair.first, AddrSizePair.second});
|
||||
}
|
||||
// Deallocate the memory for each globalized value
|
||||
for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {
|
||||
const auto *VD = cast<VarDecl>(Rec.first);
|
||||
I->getSecond().MappedParams->restore(CGF);
|
||||
|
||||
llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,
|
||||
CGF.getTypeSize(VD->getType())};
|
||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
|
||||
CGM.getModule(), OMPRTL___kmpc_free_shared),
|
||||
FreeArgs);
|
||||
if (CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace)
|
||||
CGF.EmitRuntimeCall(
|
||||
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL_free),
|
||||
{Rec.second.GlobalizedVal});
|
||||
else
|
||||
CGF.EmitRuntimeCall(
|
||||
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
|
||||
OMPRTL___kmpc_free_shared),
|
||||
{Rec.second.GlobalizedVal, CGF.getTypeSize(VD->getType())});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6397,6 +6397,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
|
||||
if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
|
||||
CmdArgs.push_back("-fopenmp-offload-mandatory");
|
||||
if (Args.hasArg(options::OPT_fopenmp_globalize_to_global_space))
|
||||
CmdArgs.push_back("-fopenmp-globalize-to-global-space");
|
||||
break;
|
||||
default:
|
||||
// By default, if Clang doesn't know how to generate useful OpenMP code
|
||||
|
||||
@@ -476,6 +476,8 @@ __OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
|
||||
|
||||
__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16)
|
||||
|
||||
__OMP_RTL(malloc, false, VoidPtr, SizeTy)
|
||||
__OMP_RTL(free, false, Void, VoidPtr)
|
||||
__OMP_RTL(__kmpc_alloc_shared, false, VoidPtr, SizeTy)
|
||||
__OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr, SizeTy)
|
||||
__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
//===--------------- CanonicalizeMainFunction.h -----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Utility function to canonicalize main function.
|
||||
// The canonical main function is defined as: int main(int argc, char *argv[]);
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TRANSFORMS_UTILS_CANONICALIZEMAINFUNCTION_H
|
||||
#define LLVM_TRANSFORMS_UTILS_CANONICALIZEMAINFUNCTION_H
|
||||
|
||||
#include "llvm/IR/PassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// A pass that canonicalizes main function in a module.
|
||||
class CanonicalizeMainFunctionPass
|
||||
: public PassInfoMixin<CanonicalizeMainFunctionPass> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_UTILS_CANONICALIZEMAINFUNCTION_H
|
||||
@@ -233,6 +233,7 @@
|
||||
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeMainFunction.h"
|
||||
#include "llvm/Transforms/Utils/CountVisits.h"
|
||||
#include "llvm/Transforms/Utils/Debugify.h"
|
||||
#include "llvm/Transforms/Utils/DXILUpgrade.h"
|
||||
|
||||
@@ -124,6 +124,7 @@
|
||||
#include "llvm/Transforms/Utils/AddDiscriminators.h"
|
||||
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeMainFunction.h"
|
||||
#include "llvm/Transforms/Utils/CountVisits.h"
|
||||
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
|
||||
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
|
||||
@@ -277,6 +278,10 @@ static cl::opt<bool> UseLoopVersioningLICM(
|
||||
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the experimental Loop Versioning LICM pass"));
|
||||
|
||||
static cl::opt<bool> EnableCanonicalizeMainFunction(
|
||||
"enable-canonicalize-main-function", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable CanonicalizeMainFunction pass"));
|
||||
|
||||
namespace llvm {
|
||||
cl::opt<bool> EnableMemProfContextDisambiguation(
|
||||
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
|
||||
@@ -1060,6 +1065,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
||||
PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
|
||||
}
|
||||
|
||||
if (EnableCanonicalizeMainFunction)
|
||||
MPM.addPass(CanonicalizeMainFunctionPass());
|
||||
|
||||
// Try to perform OpenMP specific optimizations on the module. This is a
|
||||
// (quick!) no-op if there are no OpenMP runtime calls present in the module.
|
||||
MPM.addPass(OpenMPOptPass());
|
||||
@@ -1611,6 +1619,9 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
|
||||
MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
|
||||
}
|
||||
|
||||
if (EnableCanonicalizeMainFunction)
|
||||
MPM.addPass(CanonicalizeMainFunctionPass());
|
||||
|
||||
if (Level == OptimizationLevel::O0) {
|
||||
// Run a second time to clean up any type tests left behind by WPD for use
|
||||
// in ICP.
|
||||
@@ -2059,6 +2070,9 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
|
||||
|
||||
MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
|
||||
|
||||
if (EnableCanonicalizeMainFunction)
|
||||
MPM.addPass(CanonicalizeMainFunctionPass());
|
||||
|
||||
return MPM;
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,7 @@ MODULE_PASS("openmp-opt", OpenMPOptPass())
|
||||
MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
|
||||
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
|
||||
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
|
||||
MODULE_PASS("canonicalize-main-function", CanonicalizeMainFunctionPass())
|
||||
MODULE_PASS("cg-profile", CGProfilePass())
|
||||
MODULE_PASS("check-debugify", NewPMCheckDebugifyPass())
|
||||
MODULE_PASS("constmerge", ConstantMergePass())
|
||||
|
||||
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMTransformUtils
|
||||
CallGraphUpdater.cpp
|
||||
CanonicalizeAliases.cpp
|
||||
CanonicalizeFreezeInLoops.cpp
|
||||
CanonicalizeMainFunction.cpp
|
||||
CloneFunction.cpp
|
||||
CloneModule.cpp
|
||||
CodeExtractor.cpp
|
||||
|
||||
103
llvm/lib/Transforms/Utils/CanonicalizeMainFunction.cpp
Normal file
103
llvm/lib/Transforms/Utils/CanonicalizeMainFunction.cpp
Normal file
@@ -0,0 +1,103 @@
|
||||
//===-------------- CanonicalizeMainFunction.cpp ----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Utility function to canonicalize main function.
|
||||
// The canonical main function is defined as: int main(int argc, char *argv[]);
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Utils/CanonicalizeMainFunction.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "canonicalize-main-function"
|
||||
|
||||
static cl::opt<std::string>
|
||||
MainFunctionName("canonical-main-function-name",
|
||||
cl::desc("New main function name"),
|
||||
cl::value_desc("main function name"));
|
||||
|
||||
bool rewriteMainFunction(Function &F) {
|
||||
if (F.arg_size() == 2 && F.getReturnType()->isIntegerTy(32))
|
||||
return false;
|
||||
|
||||
auto &Ctx = F.getContext();
|
||||
auto &DL = F.getParent()->getDataLayout();
|
||||
auto *Int32Ty = IntegerType::getInt32Ty(Ctx);
|
||||
auto *PtrTy = PointerType::get(Ctx, DL.getDefaultGlobalsAddressSpace());
|
||||
|
||||
FunctionType *NewFnTy =
|
||||
FunctionType::get(Int32Ty, {Int32Ty, PtrTy}, /* isVarArg */ false);
|
||||
Function *NewFn =
|
||||
Function::Create(NewFnTy, F.getLinkage(), F.getAddressSpace(), "");
|
||||
F.getParent()->getFunctionList().insert(F.getIterator(), NewFn);
|
||||
NewFn->takeName(&F);
|
||||
NewFn->copyAttributesFrom(&F);
|
||||
NewFn->setSubprogram(F.getSubprogram());
|
||||
F.setSubprogram(nullptr);
|
||||
NewFn->splice(NewFn->begin(), &F);
|
||||
|
||||
if (!F.getReturnType()->isIntegerTy(32)) {
|
||||
SmallVector<ReturnInst *> WorkList;
|
||||
for (BasicBlock &BB : *NewFn)
|
||||
for (Instruction &I : BB) {
|
||||
auto *RI = dyn_cast<ReturnInst>(&I);
|
||||
if (!RI)
|
||||
continue;
|
||||
assert(RI->getReturnValue() == nullptr &&
|
||||
"return value of a void main function is not nullptr");
|
||||
WorkList.push_back(RI);
|
||||
}
|
||||
for (auto *RI : WorkList) {
|
||||
(void)ReturnInst::Create(Ctx, ConstantInt::getNullValue(Int32Ty), RI);
|
||||
RI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
if (F.arg_size() == NewFn->arg_size())
|
||||
for (unsigned I = 0; I < NewFn->arg_size(); ++I) {
|
||||
Argument *OldArg = F.getArg(I);
|
||||
Argument *NewArg = NewFn->getArg(I);
|
||||
NewArg->takeName(OldArg);
|
||||
OldArg->replaceAllUsesWith(NewArg);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PreservedAnalyses CanonicalizeMainFunctionPass::run(Module &M,
|
||||
ModuleAnalysisManager &AM) {
|
||||
Function *MainFunc = nullptr;
|
||||
|
||||
for (Function &F : M)
|
||||
if (F.getName() == "main") {
|
||||
assert(MainFunc == nullptr && "more than one main function");
|
||||
MainFunc = &F;
|
||||
}
|
||||
|
||||
if (MainFunc == nullptr)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
if (!MainFunctionName.empty() && MainFunc->getName() != MainFunctionName) {
|
||||
MainFunc->setName(MainFunctionName);
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
if (rewriteMainFunction(*MainFunc)) {
|
||||
MainFunc->eraseFromParent();
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
|
||||
}
|
||||
@@ -116,7 +116,7 @@ set(src_files
|
||||
# vectorized accesses to the shared state. Generally, those are "good" but
|
||||
# the optimizer pipeline (esp. Attributor) does not fully support vectorized
|
||||
# instructions yet and we end up missing out on way more important constant
|
||||
# propagation. That said, we will run the vectorizer again after the runtime
|
||||
# propagation. That said, we will run the vectorizer again after the runtime
|
||||
# has been linked into the user program.
|
||||
set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false )
|
||||
set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false )
|
||||
|
||||
12
openmp/libomptarget/DirectGPUCompilation/clang-gpu
Executable file
12
openmp/libomptarget/DirectGPUCompilation/clang-gpu
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from driver.wrapper import run
|
||||
|
||||
|
||||
def main():
|
||||
run(is_cpp=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
12
openmp/libomptarget/DirectGPUCompilation/clang-gpu++
Executable file
12
openmp/libomptarget/DirectGPUCompilation/clang-gpu++
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from driver.wrapper import run
|
||||
|
||||
|
||||
def main():
|
||||
run(is_cpp=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
31
openmp/libomptarget/DirectGPUCompilation/driver/Main.c
Normal file
31
openmp/libomptarget/DirectGPUCompilation/driver/Main.c
Normal file
@@ -0,0 +1,31 @@
|
||||
//===------- Main.c - Direct compilation program start point ------ C -----===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <string.h>
|
||||
|
||||
extern int __user_main(int, char *[]);
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
#pragma omp target enter data map(to: argv[:argc])
|
||||
|
||||
for (int I = 0; I < argc; ++I) {
|
||||
#pragma omp target enter data map(to: argv[I][:strlen(argv[I])])
|
||||
}
|
||||
|
||||
int Ret = 0;
|
||||
#pragma omp target enter data map(to: Ret)
|
||||
|
||||
#pragma omp target teams num_teams(1) thread_limit(1024)
|
||||
{
|
||||
Ret = __user_main(argc, argv);
|
||||
}
|
||||
|
||||
#pragma omp target exit data map(from: Ret)
|
||||
|
||||
return Ret;
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
//===------- UserWrapper.h - User code wrapper --------------- C++ --------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef OPENMP_LIBOMPTARGET_DIRECTGPUCOMPILATION_USERWRAPPER_H
|
||||
#define OPENMP_LIBOMPTARGET_DIRECTGPUCOMPILATION_USERWRAPPER_H
|
||||
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
#endif
|
||||
0
openmp/libomptarget/DirectGPUCompilation/driver/__init__.py
Executable file
0
openmp/libomptarget/DirectGPUCompilation/driver/__init__.py
Executable file
139
openmp/libomptarget/DirectGPUCompilation/driver/wrapper.py
Executable file
139
openmp/libomptarget/DirectGPUCompilation/driver/wrapper.py
Executable file
@@ -0,0 +1,139 @@
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
|
||||
cwd = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
source_file_suffix = [".c", ".cpp", ".cu", ".hip", ".cc", ".cxx"]
|
||||
|
||||
|
||||
def print_version():
|
||||
cp = subprocess.run(["clang", "--version"])
|
||||
if cp.returncode != 0:
|
||||
sys.exit(cp.returncode)
|
||||
|
||||
|
||||
def compile_loader(loader_name, targets, verbose, dry_run):
|
||||
cmd = [
|
||||
"clang",
|
||||
"-c",
|
||||
"-fopenmp",
|
||||
"-foffload-lto",
|
||||
"-fopenmp-offload-mandatory",
|
||||
"-o",
|
||||
loader_name,
|
||||
]
|
||||
if targets:
|
||||
for arch in targets:
|
||||
cmd.append("--offload-arch={}".format(arch))
|
||||
else:
|
||||
cmd.append("--offload-arch=native")
|
||||
cmd.append(os.path.join(cwd, "Main.c"))
|
||||
if verbose:
|
||||
print(" ".join(cmd), file=sys.stderr)
|
||||
if dry_run:
|
||||
print(" ".join(cmd), file=sys.stderr)
|
||||
return
|
||||
cp = subprocess.run(cmd)
|
||||
if cp.returncode != 0:
|
||||
sys.exit(cp.returncode)
|
||||
|
||||
|
||||
def invoke_clang(is_cpp, args, targets, verbose, dry_run):
|
||||
cmd = [
|
||||
"clang++" if is_cpp else "clang",
|
||||
"-fopenmp",
|
||||
"-foffload-lto",
|
||||
"-fopenmp-offload-mandatory",
|
||||
"-fopenmp-globalize-to-global-space",
|
||||
"-include",
|
||||
os.path.join(cwd, "UserWrapper.h"),
|
||||
"--save-temps",
|
||||
"-rdynamic",
|
||||
"-mllvm",
|
||||
"-openmp-opt-disable-state-machine-rewrite",
|
||||
"-mllvm",
|
||||
"-enable-canonicalize-main-function",
|
||||
"-mllvm",
|
||||
"-canonical-main-function-name=__user_main",
|
||||
]
|
||||
if targets:
|
||||
for arch in targets:
|
||||
cmd.append("--offload-arch={}".format(arch))
|
||||
else:
|
||||
cmd.append("--offload-arch=native")
|
||||
cmd += args
|
||||
if verbose:
|
||||
print(" ".join(cmd))
|
||||
if dry_run:
|
||||
print(" ".join(cmd))
|
||||
return
|
||||
cp = subprocess.run(cmd)
|
||||
if cp.returncode != 0:
|
||||
sys.exit(cp.returncode)
|
||||
|
||||
|
||||
def run(is_cpp=False):
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="clang-gpu", description="clang LLVM GPU compiler"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
action="store_true",
|
||||
help="Only run preprocess, compile, and assemble steps",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", action="store_true", help="Show commands to run and use verbose output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--version", action="store_true", help="Print version information"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-###",
|
||||
action="store_true",
|
||||
help="Print (but do not run) the commands to run for this compilation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--offload-arch",
|
||||
type=str,
|
||||
help=(
|
||||
"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g."
|
||||
" sm_35). If 'native' is used the compiler will detect locally installed"
|
||||
" architectures. For HIP offloading, the device architecture can be"
|
||||
" followed by target ID features delimited by a colon (e.g."
|
||||
" gfx908:xnack+:sramecc-). May be specified more than once."
|
||||
),
|
||||
nargs="*",
|
||||
)
|
||||
|
||||
args, fwd_args = parser.parse_known_args()
|
||||
|
||||
if args.version:
|
||||
print_version()
|
||||
return
|
||||
|
||||
if args.c:
|
||||
fwd_args.append("-c")
|
||||
if args.v:
|
||||
fwd_args.append("-v")
|
||||
|
||||
dry_run = vars(args)["###"]
|
||||
loader_name = None
|
||||
temp_files = []
|
||||
|
||||
if not args.c:
|
||||
tf = tempfile.NamedTemporaryFile()
|
||||
loader_name = "{}.o".format(tf.name)
|
||||
compile_loader(loader_name, args.offload_arch, args.v, dry_run)
|
||||
fwd_args.append(loader_name)
|
||||
temp_files.append(loader_name)
|
||||
|
||||
invoke_clang(is_cpp, fwd_args, args.offload_arch, args.v, dry_run)
|
||||
|
||||
for f in temp_files:
|
||||
if os.path.isfile(f):
|
||||
os.unlink(f)
|
||||
Reference in New Issue
Block a user