Compare commits
2 Commits
globalisel
...
test-alloc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
abd54cf73d | ||
|
|
4c0a6df708 |
@@ -2468,8 +2468,42 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
||||
(IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
|
||||
}
|
||||
|
||||
auto PushCleanupIfNeeded = [this, Ty, &D](Address DeclPtr) {
|
||||
// Push a destructor cleanup for this parameter if the ABI requires it.
|
||||
// Don't push a cleanup in a thunk for a method that will also emit a
|
||||
// cleanup.
|
||||
if (Ty->isRecordType() && !CurFuncIsThunk &&
|
||||
Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
|
||||
if (QualType::DestructionKind DtorKind =
|
||||
D.needsDestruction(getContext())) {
|
||||
assert((DtorKind == QualType::DK_cxx_destructor ||
|
||||
DtorKind == QualType::DK_nontrivial_c_struct) &&
|
||||
"unexpected destructor type");
|
||||
pushDestroy(DtorKind, DeclPtr, Ty);
|
||||
CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
|
||||
EHStack.stable_begin();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Address DeclPtr = Address::invalid();
|
||||
Address AllocaPtr = Address::invalid();
|
||||
Address OpenMPLocalAddr =
|
||||
getLangOpts().OpenMP
|
||||
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
|
||||
: Address::invalid();
|
||||
if (OpenMPLocalAddr.isValid()) {
|
||||
DeclPtr = OpenMPLocalAddr;
|
||||
AllocaPtr = DeclPtr;
|
||||
LValue Dst = MakeAddrLValue(DeclPtr, Ty);
|
||||
if (Arg.isIndirect()) {
|
||||
LValue Src = MakeAddrLValue(Arg.getIndirectAddress(), Ty);
|
||||
callCStructCopyConstructor(Dst, Src);
|
||||
PushCleanupIfNeeded(Arg.getIndirectAddress());
|
||||
} else {
|
||||
EmitStoreOfScalar(Arg.getDirectValue(), Dst, /* isInitialization */ true);
|
||||
}
|
||||
} else {
|
||||
bool DoStore = false;
|
||||
bool IsScalar = hasScalarEvaluationKind(Ty);
|
||||
// If we already have a pointer to the argument, reuse the input pointer.
|
||||
@@ -2494,36 +2528,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
||||
DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
|
||||
*this, V, SrcLangAS, DestLangAS, T, true));
|
||||
}
|
||||
|
||||
// Push a destructor cleanup for this parameter if the ABI requires it.
|
||||
// Don't push a cleanup in a thunk for a method that will also emit a
|
||||
// cleanup.
|
||||
if (Ty->isRecordType() && !CurFuncIsThunk &&
|
||||
Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
|
||||
if (QualType::DestructionKind DtorKind =
|
||||
D.needsDestruction(getContext())) {
|
||||
assert((DtorKind == QualType::DK_cxx_destructor ||
|
||||
DtorKind == QualType::DK_nontrivial_c_struct) &&
|
||||
"unexpected destructor type");
|
||||
pushDestroy(DtorKind, DeclPtr, Ty);
|
||||
CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
|
||||
EHStack.stable_begin();
|
||||
}
|
||||
}
|
||||
PushCleanupIfNeeded(DeclPtr);
|
||||
} else {
|
||||
// Check if the parameter address is controlled by OpenMP runtime.
|
||||
Address OpenMPLocalAddr =
|
||||
getLangOpts().OpenMP
|
||||
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
|
||||
: Address::invalid();
|
||||
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
|
||||
DeclPtr = OpenMPLocalAddr;
|
||||
AllocaPtr = DeclPtr;
|
||||
} else {
|
||||
// Otherwise, create a temporary to hold the value.
|
||||
// Create a temporary to hold the value.
|
||||
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
|
||||
D.getName() + ".addr", &AllocaPtr);
|
||||
}
|
||||
DoStore = true;
|
||||
}
|
||||
|
||||
@@ -2561,8 +2570,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
||||
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
|
||||
EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
|
||||
DoStore = false;
|
||||
}
|
||||
else
|
||||
} else
|
||||
// Don't use objc_retainBlock for block pointers, because we
|
||||
// don't want to Block_copy something just because we got it
|
||||
// as a parameter.
|
||||
@@ -2571,10 +2579,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
||||
} else {
|
||||
// Push the cleanup for a consumed parameter.
|
||||
if (isConsumed) {
|
||||
ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
|
||||
? ARCPreciseLifetime : ARCImpreciseLifetime);
|
||||
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), ArgVal,
|
||||
precise);
|
||||
ARCPreciseLifetime_t precise =
|
||||
(D.hasAttr<ObjCPreciseLifetimeAttr>() ? ARCPreciseLifetime
|
||||
: ARCImpreciseLifetime);
|
||||
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(),
|
||||
ArgVal, precise);
|
||||
}
|
||||
|
||||
if (lt == Qualifiers::OCL_Weak) {
|
||||
@@ -2591,6 +2600,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
||||
// Store the initial value into the alloca.
|
||||
if (DoStore)
|
||||
EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
|
||||
}
|
||||
|
||||
setAddrOfLocalVar(&D, DeclPtr);
|
||||
|
||||
|
||||
72
clang/test/OpenMP/globalization_byval_struct.cpp
Normal file
72
clang/test/OpenMP/globalization_byval_struct.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
extern int printf(const char *, ...);
|
||||
|
||||
extern "C" {
|
||||
struct S {
|
||||
int a;
|
||||
float b;
|
||||
};
|
||||
|
||||
// CHECK: define{{.*}}void @test(%struct.S* noundef byval(%struct.S) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
|
||||
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
|
||||
// CHECK: bitcast i8* [[g]] to %struct.S*
|
||||
// CHECK: bitcast %struct.S* [[arg]] to i8**
|
||||
// CHECK: call void [[cc:@__copy_constructor[_0-9a-zA-Z]+]]
|
||||
void test(S s) {
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < s.a; ++i) {
|
||||
printf("%d : %d : %f\n", i, s.a, s.b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void foo() {
|
||||
#pragma omp target teams num_teams(1)
|
||||
{
|
||||
S s;
|
||||
s.a = 7;
|
||||
s.b = 11;
|
||||
test(s);
|
||||
}
|
||||
}
|
||||
|
||||
struct BB;
|
||||
|
||||
struct SS {
|
||||
int a;
|
||||
double b;
|
||||
BB *c;
|
||||
SS() = default;
|
||||
SS(const SS &);
|
||||
SS(SS &&) = delete;
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
// CHECK: define{{.*}}void @test2(%struct.SS* noundef byval(%struct.SS) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
|
||||
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
|
||||
// CHECK: bitcast i8* [[g]] to %struct.SS*
|
||||
// CHECK: bitcast %struct.SS* [[arg]] to i8**
|
||||
// CHECK: call void [[cc2:@__copy_constructor[_0-9a-zA-Z]+]]
|
||||
void test2(SS s) {
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < s.a; ++i) {
|
||||
printf("%d : %d : %f\n", i, s.a, s.b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bar() {
|
||||
#pragma omp target teams num_teams(1)
|
||||
{
|
||||
SS s;
|
||||
s.a = 7;
|
||||
s.b = 11;
|
||||
test2(s);
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: void [[cc]]
|
||||
// CHECK: void [[cc2]]
|
||||
@@ -108,6 +108,7 @@ set(include_files
|
||||
${include_directory}/Debug.h
|
||||
${include_directory}/Interface.h
|
||||
${include_directory}/Mapping.h
|
||||
${include_directory}/Memory.h
|
||||
${include_directory}/State.h
|
||||
${include_directory}/Synchronization.h
|
||||
${include_directory}/Types.h
|
||||
@@ -119,6 +120,7 @@ set(src_files
|
||||
${source_directory}/Debug.cpp
|
||||
${source_directory}/Kernel.cpp
|
||||
${source_directory}/Mapping.cpp
|
||||
${source_directory}/Memory.cpp
|
||||
${source_directory}/Misc.cpp
|
||||
${source_directory}/Parallelism.cpp
|
||||
${source_directory}/Reduction.cpp
|
||||
|
||||
26
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
26
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
@@ -0,0 +1,26 @@
|
||||
//===--- Memory.h - OpenMP device runtime memory allocator -------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef OMPTARGET_CONFIGURATION_H
|
||||
#define OMPTARGET_CONFIGURATION_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
using size_t = uint64_t;
|
||||
|
||||
extern "C" {
|
||||
void *malloc(size_t Size);
|
||||
|
||||
void free(void *);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -3,6 +3,7 @@ target_sources(omptarget.devicertl PRIVATE
|
||||
Debug.cpp
|
||||
Kernel.cpp
|
||||
Mapping.cpp
|
||||
Memory.cpp
|
||||
Misc.cpp
|
||||
Parallelism.cpp
|
||||
Reduction.cpp
|
||||
|
||||
43
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
43
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
//===------- Memory.cpp - OpenMP device runtime memory allocator -- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
#include "Memory.h"
|
||||
|
||||
char *CONSTANT(omptarget_device_heap_buffer)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
size_t CONSTANT(omptarget_device_heap_size)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
__attribute__((used, retain, weak, visibility("protected")))
|
||||
size_t omptarget_device_heap_cur_pos = 0;
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *malloc(size_t Size) {
|
||||
constexpr const size_t Alignment = 16;
|
||||
Size = (Size + Alignment - 1) & ~(Alignment - 1);
|
||||
|
||||
if (Size + omptarget_device_heap_cur_pos < omptarget_device_heap_size) {
|
||||
void *R = omptarget_device_heap_buffer + omptarget_device_heap_cur_pos;
|
||||
omptarget_device_heap_cur_pos += Size;
|
||||
return R;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void free(void *) {}
|
||||
}
|
||||
|
||||
#pragma omp end declare target
|
||||
@@ -964,6 +964,82 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize heap buffer
|
||||
{
|
||||
const char *BufferVarName = "omptarget_device_heap_buffer";
|
||||
const char *SizeVarName = "omptarget_device_heap_size";
|
||||
CUdeviceptr BufferVarPtr;
|
||||
CUdeviceptr SizeVarPtr;
|
||||
size_t BufferVarSize;
|
||||
size_t SizeVarSize;
|
||||
|
||||
Err = cuModuleGetGlobal(&BufferVarPtr, &BufferVarSize, Module,
|
||||
BufferVarName);
|
||||
if (Err == CUDA_SUCCESS) {
|
||||
if (BufferVarSize != sizeof(uint64_t)) {
|
||||
REPORT("Global global heap buffer pointer '%s' - size mismatch (%zu "
|
||||
"!= %zu)\n",
|
||||
BufferVarName, BufferVarSize, sizeof(uint64_t));
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Err = cuModuleGetGlobal(&SizeVarPtr, &SizeVarSize, Module, SizeVarName);
|
||||
if (Err == CUDA_SUCCESS) {
|
||||
if (SizeVarSize != sizeof(uint64_t)) {
|
||||
REPORT("Global global heap size variable '%s' - size mismatch (%zu "
|
||||
"!= %zu)\n",
|
||||
SizeVarName, SizeVarSize, sizeof(uint64_t));
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CUdeviceptr BufferPtr;
|
||||
size_t HeapSize = 6442450944U;
|
||||
|
||||
Err = cuMemAlloc(&BufferPtr, HeapSize);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when allocating heap bufferm size = %zu\n", HeapSize);
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Err = cuMemcpyHtoD(BufferVarPtr, &BufferPtr, BufferVarSize);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when copying data from host to device. Pointers: "
|
||||
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||
DPxPTR(&BufferPtr), DPxPTR(BufferVarPtr), BufferVarSize);
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Err = cuMemcpyHtoD(SizeVarPtr, &HeapSize, SizeVarSize);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when copying data from host to device. Pointers: "
|
||||
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||
DPxPTR(&HeapSize), DPxPTR(SizeVarPtr), SizeVarSize);
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DP("Successfully set heap buffer. omptarget_device_heap_buffer "
|
||||
"= " DPxMOD ", omptarget_device_heap_size = %zu\n",
|
||||
DPxPTR(BufferPtr), HeapSize);
|
||||
} else {
|
||||
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||
SizeVarName);
|
||||
DP("Continue, considering this is an image does not require heap "
|
||||
"allocation.\n");
|
||||
}
|
||||
|
||||
} else {
|
||||
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||
BufferVarName);
|
||||
DP("Continue, considering this is an image does not require heap "
|
||||
"allocation.\n");
|
||||
}
|
||||
}
|
||||
|
||||
return getOffloadEntriesTable(DeviceId);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user