Compare commits

...

2 Commits

Author SHA1 Message Date
Shilei Tian
abd54cf73d [OpenMP] Introduce dynamic memory allocator for OpenMP target offloading 2022-07-09 22:58:29 -04:00
Shilei Tian
4c0a6df708 [Clang][OpenMP] Fix the issue that globalization doesn't work with byval struct function argument
This patch fixes the issue that the globalized variable is not properly
initialized when it is a byval struct function argument.

Differential Revision: https://reviews.llvm.org/D129008
2022-07-08 20:46:20 -04:00
7 changed files with 323 additions and 93 deletions

View File

@@ -2468,8 +2468,42 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
(IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
}
auto PushCleanupIfNeeded = [this, Ty, &D](Address DeclPtr) {
// Push a destructor cleanup for this parameter if the ABI requires it.
// Don't push a cleanup in a thunk for a method that will also emit a
// cleanup.
if (Ty->isRecordType() && !CurFuncIsThunk &&
Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
if (QualType::DestructionKind DtorKind =
D.needsDestruction(getContext())) {
assert((DtorKind == QualType::DK_cxx_destructor ||
DtorKind == QualType::DK_nontrivial_c_struct) &&
"unexpected destructor type");
pushDestroy(DtorKind, DeclPtr, Ty);
CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
EHStack.stable_begin();
}
}
};
Address DeclPtr = Address::invalid();
Address AllocaPtr = Address::invalid();
Address OpenMPLocalAddr =
getLangOpts().OpenMP
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
: Address::invalid();
if (OpenMPLocalAddr.isValid()) {
DeclPtr = OpenMPLocalAddr;
AllocaPtr = DeclPtr;
LValue Dst = MakeAddrLValue(DeclPtr, Ty);
if (Arg.isIndirect()) {
LValue Src = MakeAddrLValue(Arg.getIndirectAddress(), Ty);
callCStructCopyConstructor(Dst, Src);
PushCleanupIfNeeded(Arg.getIndirectAddress());
} else {
EmitStoreOfScalar(Arg.getDirectValue(), Dst, /* isInitialization */ true);
}
} else {
bool DoStore = false;
bool IsScalar = hasScalarEvaluationKind(Ty);
// If we already have a pointer to the argument, reuse the input pointer.
@@ -2494,36 +2528,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
*this, V, SrcLangAS, DestLangAS, T, true));
}
// Push a destructor cleanup for this parameter if the ABI requires it.
// Don't push a cleanup in a thunk for a method that will also emit a
// cleanup.
if (Ty->isRecordType() && !CurFuncIsThunk &&
Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
if (QualType::DestructionKind DtorKind =
D.needsDestruction(getContext())) {
assert((DtorKind == QualType::DK_cxx_destructor ||
DtorKind == QualType::DK_nontrivial_c_struct) &&
"unexpected destructor type");
pushDestroy(DtorKind, DeclPtr, Ty);
CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
EHStack.stable_begin();
}
}
PushCleanupIfNeeded(DeclPtr);
} else {
// Check if the parameter address is controlled by OpenMP runtime.
Address OpenMPLocalAddr =
getLangOpts().OpenMP
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
: Address::invalid();
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
DeclPtr = OpenMPLocalAddr;
AllocaPtr = DeclPtr;
} else {
// Otherwise, create a temporary to hold the value.
// Create a temporary to hold the value.
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
D.getName() + ".addr", &AllocaPtr);
}
DoStore = true;
}
@@ -2561,8 +2570,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
DoStore = false;
}
else
} else
// Don't use objc_retainBlock for block pointers, because we
// don't want to Block_copy something just because we got it
// as a parameter.
@@ -2571,10 +2579,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
} else {
// Push the cleanup for a consumed parameter.
if (isConsumed) {
ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
? ARCPreciseLifetime : ARCImpreciseLifetime);
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), ArgVal,
precise);
ARCPreciseLifetime_t precise =
(D.hasAttr<ObjCPreciseLifetimeAttr>() ? ARCPreciseLifetime
: ARCImpreciseLifetime);
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(),
ArgVal, precise);
}
if (lt == Qualifiers::OCL_Weak) {
@@ -2591,6 +2600,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// Store the initial value into the alloca.
if (DoStore)
EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
}
setAddrOfLocalVar(&D, DeclPtr);

View File

@@ -0,0 +1,72 @@
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
// expected-no-diagnostics
extern int printf(const char *, ...);
extern "C" {
struct S {
int a;
float b;
};
// CHECK: define{{.*}}void @test(%struct.S* noundef byval(%struct.S) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
// CHECK: bitcast i8* [[g]] to %struct.S*
// CHECK: bitcast %struct.S* [[arg]] to i8**
// CHECK: call void [[cc:@__copy_constructor[_0-9a-zA-Z]+]]
void test(S s) {
#pragma omp parallel for
for (int i = 0; i < s.a; ++i) {
printf("%d : %d : %f\n", i, s.a, s.b);
}
}
}
void foo() {
#pragma omp target teams num_teams(1)
{
S s;
s.a = 7;
s.b = 11;
test(s);
}
}
struct BB;
struct SS {
int a;
double b;
BB *c;
SS() = default;
SS(const SS &);
SS(SS &&) = delete;
};
extern "C" {
// CHECK: define{{.*}}void @test2(%struct.SS* noundef byval(%struct.SS) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
// CHECK: bitcast i8* [[g]] to %struct.SS*
// CHECK: bitcast %struct.SS* [[arg]] to i8**
// CHECK: call void [[cc2:@__copy_constructor[_0-9a-zA-Z]+]]
void test2(SS s) {
#pragma omp parallel for
for (int i = 0; i < s.a; ++i) {
printf("%d : %d : %f\n", i, s.a, s.b);
}
}
}
void bar() {
#pragma omp target teams num_teams(1)
{
SS s;
s.a = 7;
s.b = 11;
test2(s);
}
}
// CHECK: void [[cc]]
// CHECK: void [[cc2]]

View File

@@ -108,6 +108,7 @@ set(include_files
${include_directory}/Debug.h
${include_directory}/Interface.h
${include_directory}/Mapping.h
${include_directory}/Memory.h
${include_directory}/State.h
${include_directory}/Synchronization.h
${include_directory}/Types.h
@@ -119,6 +120,7 @@ set(src_files
${source_directory}/Debug.cpp
${source_directory}/Kernel.cpp
${source_directory}/Mapping.cpp
${source_directory}/Memory.cpp
${source_directory}/Misc.cpp
${source_directory}/Parallelism.cpp
${source_directory}/Reduction.cpp

View File

@@ -0,0 +1,26 @@
//===--- Memory.h - OpenMP device runtime memory allocator -------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_CONFIGURATION_H
#define OMPTARGET_CONFIGURATION_H
#include "Types.h"
using size_t = uint64_t;
extern "C" {
void *malloc(size_t Size);
void free(void *);
}
#endif

View File

@@ -3,6 +3,7 @@ target_sources(omptarget.devicertl PRIVATE
Debug.cpp
Kernel.cpp
Mapping.cpp
Memory.cpp
Misc.cpp
Parallelism.cpp
Reduction.cpp

View File

@@ -0,0 +1,43 @@
//===------- Memory.cpp - OpenMP device runtime memory allocator -- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#pragma omp begin declare target device_type(nohost)
#include "Memory.h"
char *CONSTANT(omptarget_device_heap_buffer)
__attribute__((used, retain, weak, visibility("protected")));
size_t CONSTANT(omptarget_device_heap_size)
__attribute__((used, retain, weak, visibility("protected")));
__attribute__((used, retain, weak, visibility("protected")))
size_t omptarget_device_heap_cur_pos = 0;
extern "C" {
void *malloc(size_t Size) {
constexpr const size_t Alignment = 16;
Size = (Size + Alignment - 1) & ~(Alignment - 1);
if (Size + omptarget_device_heap_cur_pos < omptarget_device_heap_size) {
void *R = omptarget_device_heap_buffer + omptarget_device_heap_cur_pos;
omptarget_device_heap_cur_pos += Size;
return R;
}
return nullptr;
}
void free(void *) {}
}
#pragma omp end declare target

View File

@@ -964,6 +964,82 @@ public:
}
}
// Initialize heap buffer
{
const char *BufferVarName = "omptarget_device_heap_buffer";
const char *SizeVarName = "omptarget_device_heap_size";
CUdeviceptr BufferVarPtr;
CUdeviceptr SizeVarPtr;
size_t BufferVarSize;
size_t SizeVarSize;
Err = cuModuleGetGlobal(&BufferVarPtr, &BufferVarSize, Module,
BufferVarName);
if (Err == CUDA_SUCCESS) {
if (BufferVarSize != sizeof(uint64_t)) {
REPORT("Global global heap buffer pointer '%s' - size mismatch (%zu "
"!= %zu)\n",
BufferVarName, BufferVarSize, sizeof(uint64_t));
CUDA_ERR_STRING(Err);
return nullptr;
}
Err = cuModuleGetGlobal(&SizeVarPtr, &SizeVarSize, Module, SizeVarName);
if (Err == CUDA_SUCCESS) {
if (SizeVarSize != sizeof(uint64_t)) {
REPORT("Global global heap size variable '%s' - size mismatch (%zu "
"!= %zu)\n",
SizeVarName, SizeVarSize, sizeof(uint64_t));
CUDA_ERR_STRING(Err);
return nullptr;
}
CUdeviceptr BufferPtr;
size_t HeapSize = 6442450944U;
Err = cuMemAlloc(&BufferPtr, HeapSize);
if (Err != CUDA_SUCCESS) {
REPORT("Error when allocating heap bufferm size = %zu\n", HeapSize);
CUDA_ERR_STRING(Err);
return nullptr;
}
Err = cuMemcpyHtoD(BufferVarPtr, &BufferPtr, BufferVarSize);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from host to device. Pointers: "
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
DPxPTR(&BufferPtr), DPxPTR(BufferVarPtr), BufferVarSize);
CUDA_ERR_STRING(Err);
return nullptr;
}
Err = cuMemcpyHtoD(SizeVarPtr, &HeapSize, SizeVarSize);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from host to device. Pointers: "
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
DPxPTR(&HeapSize), DPxPTR(SizeVarPtr), SizeVarSize);
CUDA_ERR_STRING(Err);
return nullptr;
}
DP("Successfully set heap buffer. omptarget_device_heap_buffer "
"= " DPxMOD ", omptarget_device_heap_size = %zu\n",
DPxPTR(BufferPtr), HeapSize);
} else {
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
SizeVarName);
DP("Continue, considering this is an image does not require heap "
"allocation.\n");
}
} else {
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
BufferVarName);
DP("Continue, considering this is an image does not require heap "
"allocation.\n");
}
}
return getOffloadEntriesTable(DeviceId);
}