Compare commits
2 Commits
globalisel
...
test-alloc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
abd54cf73d | ||
|
|
4c0a6df708 |
@@ -2468,33 +2468,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
(IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
|
(IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
|
||||||
}
|
}
|
||||||
|
|
||||||
Address DeclPtr = Address::invalid();
|
auto PushCleanupIfNeeded = [this, Ty, &D](Address DeclPtr) {
|
||||||
Address AllocaPtr = Address::invalid();
|
|
||||||
bool DoStore = false;
|
|
||||||
bool IsScalar = hasScalarEvaluationKind(Ty);
|
|
||||||
// If we already have a pointer to the argument, reuse the input pointer.
|
|
||||||
if (Arg.isIndirect()) {
|
|
||||||
// If we have a prettier pointer type at this point, bitcast to that.
|
|
||||||
DeclPtr = Arg.getIndirectAddress();
|
|
||||||
DeclPtr = Builder.CreateElementBitCast(DeclPtr, ConvertTypeForMem(Ty),
|
|
||||||
D.getName());
|
|
||||||
// Indirect argument is in alloca address space, which may be different
|
|
||||||
// from the default address space.
|
|
||||||
auto AllocaAS = CGM.getASTAllocaAddressSpace();
|
|
||||||
auto *V = DeclPtr.getPointer();
|
|
||||||
AllocaPtr = DeclPtr;
|
|
||||||
auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
|
|
||||||
auto DestLangAS =
|
|
||||||
getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
|
|
||||||
if (SrcLangAS != DestLangAS) {
|
|
||||||
assert(getContext().getTargetAddressSpace(SrcLangAS) ==
|
|
||||||
CGM.getDataLayout().getAllocaAddrSpace());
|
|
||||||
auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
|
|
||||||
auto *T = DeclPtr.getElementType()->getPointerTo(DestAS);
|
|
||||||
DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
|
|
||||||
*this, V, SrcLangAS, DestLangAS, T, true));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Push a destructor cleanup for this parameter if the ABI requires it.
|
// Push a destructor cleanup for this parameter if the ABI requires it.
|
||||||
// Don't push a cleanup in a thunk for a method that will also emit a
|
// Don't push a cleanup in a thunk for a method that will also emit a
|
||||||
// cleanup.
|
// cleanup.
|
||||||
@@ -2510,87 +2484,123 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
EHStack.stable_begin();
|
EHStack.stable_begin();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
};
|
||||||
// Check if the parameter address is controlled by OpenMP runtime.
|
|
||||||
Address OpenMPLocalAddr =
|
Address DeclPtr = Address::invalid();
|
||||||
getLangOpts().OpenMP
|
Address AllocaPtr = Address::invalid();
|
||||||
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
|
Address OpenMPLocalAddr =
|
||||||
: Address::invalid();
|
getLangOpts().OpenMP
|
||||||
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
|
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
|
||||||
DeclPtr = OpenMPLocalAddr;
|
: Address::invalid();
|
||||||
AllocaPtr = DeclPtr;
|
if (OpenMPLocalAddr.isValid()) {
|
||||||
|
DeclPtr = OpenMPLocalAddr;
|
||||||
|
AllocaPtr = DeclPtr;
|
||||||
|
LValue Dst = MakeAddrLValue(DeclPtr, Ty);
|
||||||
|
if (Arg.isIndirect()) {
|
||||||
|
LValue Src = MakeAddrLValue(Arg.getIndirectAddress(), Ty);
|
||||||
|
callCStructCopyConstructor(Dst, Src);
|
||||||
|
PushCleanupIfNeeded(Arg.getIndirectAddress());
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, create a temporary to hold the value.
|
EmitStoreOfScalar(Arg.getDirectValue(), Dst, /* isInitialization */ true);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bool DoStore = false;
|
||||||
|
bool IsScalar = hasScalarEvaluationKind(Ty);
|
||||||
|
// If we already have a pointer to the argument, reuse the input pointer.
|
||||||
|
if (Arg.isIndirect()) {
|
||||||
|
// If we have a prettier pointer type at this point, bitcast to that.
|
||||||
|
DeclPtr = Arg.getIndirectAddress();
|
||||||
|
DeclPtr = Builder.CreateElementBitCast(DeclPtr, ConvertTypeForMem(Ty),
|
||||||
|
D.getName());
|
||||||
|
// Indirect argument is in alloca address space, which may be different
|
||||||
|
// from the default address space.
|
||||||
|
auto AllocaAS = CGM.getASTAllocaAddressSpace();
|
||||||
|
auto *V = DeclPtr.getPointer();
|
||||||
|
AllocaPtr = DeclPtr;
|
||||||
|
auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
|
||||||
|
auto DestLangAS =
|
||||||
|
getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
|
||||||
|
if (SrcLangAS != DestLangAS) {
|
||||||
|
assert(getContext().getTargetAddressSpace(SrcLangAS) ==
|
||||||
|
CGM.getDataLayout().getAllocaAddrSpace());
|
||||||
|
auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
|
||||||
|
auto *T = DeclPtr.getElementType()->getPointerTo(DestAS);
|
||||||
|
DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
|
||||||
|
*this, V, SrcLangAS, DestLangAS, T, true));
|
||||||
|
}
|
||||||
|
PushCleanupIfNeeded(DeclPtr);
|
||||||
|
} else {
|
||||||
|
// Create a temporary to hold the value.
|
||||||
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
|
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
|
||||||
D.getName() + ".addr", &AllocaPtr);
|
D.getName() + ".addr", &AllocaPtr);
|
||||||
|
DoStore = true;
|
||||||
}
|
}
|
||||||
DoStore = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
|
llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
|
||||||
|
|
||||||
LValue lv = MakeAddrLValue(DeclPtr, Ty);
|
LValue lv = MakeAddrLValue(DeclPtr, Ty);
|
||||||
if (IsScalar) {
|
if (IsScalar) {
|
||||||
Qualifiers qs = Ty.getQualifiers();
|
Qualifiers qs = Ty.getQualifiers();
|
||||||
if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
|
if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
|
||||||
// We honor __attribute__((ns_consumed)) for types with lifetime.
|
// We honor __attribute__((ns_consumed)) for types with lifetime.
|
||||||
// For __strong, it's handled by just skipping the initial retain;
|
// For __strong, it's handled by just skipping the initial retain;
|
||||||
// otherwise we have to balance out the initial +1 with an extra
|
// otherwise we have to balance out the initial +1 with an extra
|
||||||
// cleanup to do the release at the end of the function.
|
// cleanup to do the release at the end of the function.
|
||||||
bool isConsumed = D.hasAttr<NSConsumedAttr>();
|
bool isConsumed = D.hasAttr<NSConsumedAttr>();
|
||||||
|
|
||||||
// If a parameter is pseudo-strong then we can omit the implicit retain.
|
// If a parameter is pseudo-strong then we can omit the implicit retain.
|
||||||
if (D.isARCPseudoStrong()) {
|
if (D.isARCPseudoStrong()) {
|
||||||
assert(lt == Qualifiers::OCL_Strong &&
|
assert(lt == Qualifiers::OCL_Strong &&
|
||||||
"pseudo-strong variable isn't strong?");
|
"pseudo-strong variable isn't strong?");
|
||||||
assert(qs.hasConst() && "pseudo-strong variable should be const!");
|
assert(qs.hasConst() && "pseudo-strong variable should be const!");
|
||||||
lt = Qualifiers::OCL_ExplicitNone;
|
lt = Qualifiers::OCL_ExplicitNone;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load objects passed indirectly.
|
// Load objects passed indirectly.
|
||||||
if (Arg.isIndirect() && !ArgVal)
|
if (Arg.isIndirect() && !ArgVal)
|
||||||
ArgVal = Builder.CreateLoad(DeclPtr);
|
ArgVal = Builder.CreateLoad(DeclPtr);
|
||||||
|
|
||||||
if (lt == Qualifiers::OCL_Strong) {
|
if (lt == Qualifiers::OCL_Strong) {
|
||||||
if (!isConsumed) {
|
if (!isConsumed) {
|
||||||
if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
|
if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
|
||||||
// use objc_storeStrong(&dest, value) for retaining the
|
// use objc_storeStrong(&dest, value) for retaining the
|
||||||
// object. But first, store a null into 'dest' because
|
// object. But first, store a null into 'dest' because
|
||||||
// objc_storeStrong attempts to release its old value.
|
// objc_storeStrong attempts to release its old value.
|
||||||
llvm::Value *Null = CGM.EmitNullConstant(D.getType());
|
llvm::Value *Null = CGM.EmitNullConstant(D.getType());
|
||||||
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
|
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
|
||||||
EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
|
EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
|
||||||
DoStore = false;
|
DoStore = false;
|
||||||
|
} else
|
||||||
|
// Don't use objc_retainBlock for block pointers, because we
|
||||||
|
// don't want to Block_copy something just because we got it
|
||||||
|
// as a parameter.
|
||||||
|
ArgVal = EmitARCRetainNonBlock(ArgVal);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Push the cleanup for a consumed parameter.
|
||||||
|
if (isConsumed) {
|
||||||
|
ARCPreciseLifetime_t precise =
|
||||||
|
(D.hasAttr<ObjCPreciseLifetimeAttr>() ? ARCPreciseLifetime
|
||||||
|
: ARCImpreciseLifetime);
|
||||||
|
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(),
|
||||||
|
ArgVal, precise);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lt == Qualifiers::OCL_Weak) {
|
||||||
|
EmitARCInitWeak(DeclPtr, ArgVal);
|
||||||
|
DoStore = false; // The weak init is a store, no need to do two.
|
||||||
}
|
}
|
||||||
else
|
|
||||||
// Don't use objc_retainBlock for block pointers, because we
|
|
||||||
// don't want to Block_copy something just because we got it
|
|
||||||
// as a parameter.
|
|
||||||
ArgVal = EmitARCRetainNonBlock(ArgVal);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Push the cleanup for a consumed parameter.
|
|
||||||
if (isConsumed) {
|
|
||||||
ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
|
|
||||||
? ARCPreciseLifetime : ARCImpreciseLifetime);
|
|
||||||
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), ArgVal,
|
|
||||||
precise);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lt == Qualifiers::OCL_Weak) {
|
// Enter the cleanup scope.
|
||||||
EmitARCInitWeak(DeclPtr, ArgVal);
|
EmitAutoVarWithLifetime(*this, D, DeclPtr, lt);
|
||||||
DoStore = false; // The weak init is a store, no need to do two.
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enter the cleanup scope.
|
|
||||||
EmitAutoVarWithLifetime(*this, D, DeclPtr, lt);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Store the initial value into the alloca.
|
// Store the initial value into the alloca.
|
||||||
if (DoStore)
|
if (DoStore)
|
||||||
EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
|
EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
|
||||||
|
}
|
||||||
|
|
||||||
setAddrOfLocalVar(&D, DeclPtr);
|
setAddrOfLocalVar(&D, DeclPtr);
|
||||||
|
|
||||||
|
|||||||
72
clang/test/OpenMP/globalization_byval_struct.cpp
Normal file
72
clang/test/OpenMP/globalization_byval_struct.cpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||||
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
|
||||||
|
// expected-no-diagnostics
|
||||||
|
|
||||||
|
extern int printf(const char *, ...);
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
struct S {
|
||||||
|
int a;
|
||||||
|
float b;
|
||||||
|
};
|
||||||
|
|
||||||
|
// CHECK: define{{.*}}void @test(%struct.S* noundef byval(%struct.S) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
|
||||||
|
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
|
||||||
|
// CHECK: bitcast i8* [[g]] to %struct.S*
|
||||||
|
// CHECK: bitcast %struct.S* [[arg]] to i8**
|
||||||
|
// CHECK: call void [[cc:@__copy_constructor[_0-9a-zA-Z]+]]
|
||||||
|
void test(S s) {
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < s.a; ++i) {
|
||||||
|
printf("%d : %d : %f\n", i, s.a, s.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void foo() {
|
||||||
|
#pragma omp target teams num_teams(1)
|
||||||
|
{
|
||||||
|
S s;
|
||||||
|
s.a = 7;
|
||||||
|
s.b = 11;
|
||||||
|
test(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BB;
|
||||||
|
|
||||||
|
struct SS {
|
||||||
|
int a;
|
||||||
|
double b;
|
||||||
|
BB *c;
|
||||||
|
SS() = default;
|
||||||
|
SS(const SS &);
|
||||||
|
SS(SS &&) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
// CHECK: define{{.*}}void @test2(%struct.SS* noundef byval(%struct.SS) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
|
||||||
|
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
|
||||||
|
// CHECK: bitcast i8* [[g]] to %struct.SS*
|
||||||
|
// CHECK: bitcast %struct.SS* [[arg]] to i8**
|
||||||
|
// CHECK: call void [[cc2:@__copy_constructor[_0-9a-zA-Z]+]]
|
||||||
|
void test2(SS s) {
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < s.a; ++i) {
|
||||||
|
printf("%d : %d : %f\n", i, s.a, s.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bar() {
|
||||||
|
#pragma omp target teams num_teams(1)
|
||||||
|
{
|
||||||
|
SS s;
|
||||||
|
s.a = 7;
|
||||||
|
s.b = 11;
|
||||||
|
test2(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK: void [[cc]]
|
||||||
|
// CHECK: void [[cc2]]
|
||||||
@@ -108,6 +108,7 @@ set(include_files
|
|||||||
${include_directory}/Debug.h
|
${include_directory}/Debug.h
|
||||||
${include_directory}/Interface.h
|
${include_directory}/Interface.h
|
||||||
${include_directory}/Mapping.h
|
${include_directory}/Mapping.h
|
||||||
|
${include_directory}/Memory.h
|
||||||
${include_directory}/State.h
|
${include_directory}/State.h
|
||||||
${include_directory}/Synchronization.h
|
${include_directory}/Synchronization.h
|
||||||
${include_directory}/Types.h
|
${include_directory}/Types.h
|
||||||
@@ -119,6 +120,7 @@ set(src_files
|
|||||||
${source_directory}/Debug.cpp
|
${source_directory}/Debug.cpp
|
||||||
${source_directory}/Kernel.cpp
|
${source_directory}/Kernel.cpp
|
||||||
${source_directory}/Mapping.cpp
|
${source_directory}/Mapping.cpp
|
||||||
|
${source_directory}/Memory.cpp
|
||||||
${source_directory}/Misc.cpp
|
${source_directory}/Misc.cpp
|
||||||
${source_directory}/Parallelism.cpp
|
${source_directory}/Parallelism.cpp
|
||||||
${source_directory}/Reduction.cpp
|
${source_directory}/Reduction.cpp
|
||||||
|
|||||||
26
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
26
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
//===--- Memory.h - OpenMP device runtime memory allocator -------- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef OMPTARGET_CONFIGURATION_H
|
||||||
|
#define OMPTARGET_CONFIGURATION_H
|
||||||
|
|
||||||
|
#include "Types.h"
|
||||||
|
|
||||||
|
using size_t = uint64_t;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
void *malloc(size_t Size);
|
||||||
|
|
||||||
|
void free(void *);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -3,6 +3,7 @@ target_sources(omptarget.devicertl PRIVATE
|
|||||||
Debug.cpp
|
Debug.cpp
|
||||||
Kernel.cpp
|
Kernel.cpp
|
||||||
Mapping.cpp
|
Mapping.cpp
|
||||||
|
Memory.cpp
|
||||||
Misc.cpp
|
Misc.cpp
|
||||||
Parallelism.cpp
|
Parallelism.cpp
|
||||||
Reduction.cpp
|
Reduction.cpp
|
||||||
|
|||||||
43
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
43
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
//===------- Memory.cpp - OpenMP device runtime memory allocator -- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#pragma omp begin declare target device_type(nohost)
|
||||||
|
|
||||||
|
#include "Memory.h"
|
||||||
|
|
||||||
|
char *CONSTANT(omptarget_device_heap_buffer)
|
||||||
|
__attribute__((used, retain, weak, visibility("protected")));
|
||||||
|
|
||||||
|
size_t CONSTANT(omptarget_device_heap_size)
|
||||||
|
__attribute__((used, retain, weak, visibility("protected")));
|
||||||
|
|
||||||
|
__attribute__((used, retain, weak, visibility("protected")))
|
||||||
|
size_t omptarget_device_heap_cur_pos = 0;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
void *malloc(size_t Size) {
|
||||||
|
constexpr const size_t Alignment = 16;
|
||||||
|
Size = (Size + Alignment - 1) & ~(Alignment - 1);
|
||||||
|
|
||||||
|
if (Size + omptarget_device_heap_cur_pos < omptarget_device_heap_size) {
|
||||||
|
void *R = omptarget_device_heap_buffer + omptarget_device_heap_cur_pos;
|
||||||
|
omptarget_device_heap_cur_pos += Size;
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free(void *) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp end declare target
|
||||||
@@ -964,6 +964,82 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize heap buffer
|
||||||
|
{
|
||||||
|
const char *BufferVarName = "omptarget_device_heap_buffer";
|
||||||
|
const char *SizeVarName = "omptarget_device_heap_size";
|
||||||
|
CUdeviceptr BufferVarPtr;
|
||||||
|
CUdeviceptr SizeVarPtr;
|
||||||
|
size_t BufferVarSize;
|
||||||
|
size_t SizeVarSize;
|
||||||
|
|
||||||
|
Err = cuModuleGetGlobal(&BufferVarPtr, &BufferVarSize, Module,
|
||||||
|
BufferVarName);
|
||||||
|
if (Err == CUDA_SUCCESS) {
|
||||||
|
if (BufferVarSize != sizeof(uint64_t)) {
|
||||||
|
REPORT("Global global heap buffer pointer '%s' - size mismatch (%zu "
|
||||||
|
"!= %zu)\n",
|
||||||
|
BufferVarName, BufferVarSize, sizeof(uint64_t));
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err = cuModuleGetGlobal(&SizeVarPtr, &SizeVarSize, Module, SizeVarName);
|
||||||
|
if (Err == CUDA_SUCCESS) {
|
||||||
|
if (SizeVarSize != sizeof(uint64_t)) {
|
||||||
|
REPORT("Global global heap size variable '%s' - size mismatch (%zu "
|
||||||
|
"!= %zu)\n",
|
||||||
|
SizeVarName, SizeVarSize, sizeof(uint64_t));
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUdeviceptr BufferPtr;
|
||||||
|
size_t HeapSize = 6442450944U;
|
||||||
|
|
||||||
|
Err = cuMemAlloc(&BufferPtr, HeapSize);
|
||||||
|
if (Err != CUDA_SUCCESS) {
|
||||||
|
REPORT("Error when allocating heap bufferm size = %zu\n", HeapSize);
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err = cuMemcpyHtoD(BufferVarPtr, &BufferPtr, BufferVarSize);
|
||||||
|
if (Err != CUDA_SUCCESS) {
|
||||||
|
REPORT("Error when copying data from host to device. Pointers: "
|
||||||
|
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||||
|
DPxPTR(&BufferPtr), DPxPTR(BufferVarPtr), BufferVarSize);
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err = cuMemcpyHtoD(SizeVarPtr, &HeapSize, SizeVarSize);
|
||||||
|
if (Err != CUDA_SUCCESS) {
|
||||||
|
REPORT("Error when copying data from host to device. Pointers: "
|
||||||
|
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||||
|
DPxPTR(&HeapSize), DPxPTR(SizeVarPtr), SizeVarSize);
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
DP("Successfully set heap buffer. omptarget_device_heap_buffer "
|
||||||
|
"= " DPxMOD ", omptarget_device_heap_size = %zu\n",
|
||||||
|
DPxPTR(BufferPtr), HeapSize);
|
||||||
|
} else {
|
||||||
|
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||||
|
SizeVarName);
|
||||||
|
DP("Continue, considering this is an image does not require heap "
|
||||||
|
"allocation.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||||
|
BufferVarName);
|
||||||
|
DP("Continue, considering this is an image does not require heap "
|
||||||
|
"allocation.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return getOffloadEntriesTable(DeviceId);
|
return getOffloadEntriesTable(DeviceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user