Compare commits
2 Commits
globalisel
...
test-alloc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
abd54cf73d | ||
|
|
4c0a6df708 |
@@ -2468,8 +2468,42 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
(IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
|
(IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto PushCleanupIfNeeded = [this, Ty, &D](Address DeclPtr) {
|
||||||
|
// Push a destructor cleanup for this parameter if the ABI requires it.
|
||||||
|
// Don't push a cleanup in a thunk for a method that will also emit a
|
||||||
|
// cleanup.
|
||||||
|
if (Ty->isRecordType() && !CurFuncIsThunk &&
|
||||||
|
Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
|
||||||
|
if (QualType::DestructionKind DtorKind =
|
||||||
|
D.needsDestruction(getContext())) {
|
||||||
|
assert((DtorKind == QualType::DK_cxx_destructor ||
|
||||||
|
DtorKind == QualType::DK_nontrivial_c_struct) &&
|
||||||
|
"unexpected destructor type");
|
||||||
|
pushDestroy(DtorKind, DeclPtr, Ty);
|
||||||
|
CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
|
||||||
|
EHStack.stable_begin();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Address DeclPtr = Address::invalid();
|
Address DeclPtr = Address::invalid();
|
||||||
Address AllocaPtr = Address::invalid();
|
Address AllocaPtr = Address::invalid();
|
||||||
|
Address OpenMPLocalAddr =
|
||||||
|
getLangOpts().OpenMP
|
||||||
|
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
|
||||||
|
: Address::invalid();
|
||||||
|
if (OpenMPLocalAddr.isValid()) {
|
||||||
|
DeclPtr = OpenMPLocalAddr;
|
||||||
|
AllocaPtr = DeclPtr;
|
||||||
|
LValue Dst = MakeAddrLValue(DeclPtr, Ty);
|
||||||
|
if (Arg.isIndirect()) {
|
||||||
|
LValue Src = MakeAddrLValue(Arg.getIndirectAddress(), Ty);
|
||||||
|
callCStructCopyConstructor(Dst, Src);
|
||||||
|
PushCleanupIfNeeded(Arg.getIndirectAddress());
|
||||||
|
} else {
|
||||||
|
EmitStoreOfScalar(Arg.getDirectValue(), Dst, /* isInitialization */ true);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
bool DoStore = false;
|
bool DoStore = false;
|
||||||
bool IsScalar = hasScalarEvaluationKind(Ty);
|
bool IsScalar = hasScalarEvaluationKind(Ty);
|
||||||
// If we already have a pointer to the argument, reuse the input pointer.
|
// If we already have a pointer to the argument, reuse the input pointer.
|
||||||
@@ -2494,36 +2528,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
|
DeclPtr = DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
|
||||||
*this, V, SrcLangAS, DestLangAS, T, true));
|
*this, V, SrcLangAS, DestLangAS, T, true));
|
||||||
}
|
}
|
||||||
|
PushCleanupIfNeeded(DeclPtr);
|
||||||
// Push a destructor cleanup for this parameter if the ABI requires it.
|
|
||||||
// Don't push a cleanup in a thunk for a method that will also emit a
|
|
||||||
// cleanup.
|
|
||||||
if (Ty->isRecordType() && !CurFuncIsThunk &&
|
|
||||||
Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
|
|
||||||
if (QualType::DestructionKind DtorKind =
|
|
||||||
D.needsDestruction(getContext())) {
|
|
||||||
assert((DtorKind == QualType::DK_cxx_destructor ||
|
|
||||||
DtorKind == QualType::DK_nontrivial_c_struct) &&
|
|
||||||
"unexpected destructor type");
|
|
||||||
pushDestroy(DtorKind, DeclPtr, Ty);
|
|
||||||
CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
|
|
||||||
EHStack.stable_begin();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Check if the parameter address is controlled by OpenMP runtime.
|
// Create a temporary to hold the value.
|
||||||
Address OpenMPLocalAddr =
|
|
||||||
getLangOpts().OpenMP
|
|
||||||
? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
|
|
||||||
: Address::invalid();
|
|
||||||
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
|
|
||||||
DeclPtr = OpenMPLocalAddr;
|
|
||||||
AllocaPtr = DeclPtr;
|
|
||||||
} else {
|
|
||||||
// Otherwise, create a temporary to hold the value.
|
|
||||||
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
|
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
|
||||||
D.getName() + ".addr", &AllocaPtr);
|
D.getName() + ".addr", &AllocaPtr);
|
||||||
}
|
|
||||||
DoStore = true;
|
DoStore = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2561,8 +2570,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
|
EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
|
||||||
EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
|
EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
|
||||||
DoStore = false;
|
DoStore = false;
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
// Don't use objc_retainBlock for block pointers, because we
|
// Don't use objc_retainBlock for block pointers, because we
|
||||||
// don't want to Block_copy something just because we got it
|
// don't want to Block_copy something just because we got it
|
||||||
// as a parameter.
|
// as a parameter.
|
||||||
@@ -2571,10 +2579,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
} else {
|
} else {
|
||||||
// Push the cleanup for a consumed parameter.
|
// Push the cleanup for a consumed parameter.
|
||||||
if (isConsumed) {
|
if (isConsumed) {
|
||||||
ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
|
ARCPreciseLifetime_t precise =
|
||||||
? ARCPreciseLifetime : ARCImpreciseLifetime);
|
(D.hasAttr<ObjCPreciseLifetimeAttr>() ? ARCPreciseLifetime
|
||||||
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), ArgVal,
|
: ARCImpreciseLifetime);
|
||||||
precise);
|
EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(),
|
||||||
|
ArgVal, precise);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lt == Qualifiers::OCL_Weak) {
|
if (lt == Qualifiers::OCL_Weak) {
|
||||||
@@ -2591,6 +2600,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
|
|||||||
// Store the initial value into the alloca.
|
// Store the initial value into the alloca.
|
||||||
if (DoStore)
|
if (DoStore)
|
||||||
EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
|
EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
|
||||||
|
}
|
||||||
|
|
||||||
setAddrOfLocalVar(&D, DeclPtr);
|
setAddrOfLocalVar(&D, DeclPtr);
|
||||||
|
|
||||||
|
|||||||
72
clang/test/OpenMP/globalization_byval_struct.cpp
Normal file
72
clang/test/OpenMP/globalization_byval_struct.cpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||||
|
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
|
||||||
|
// expected-no-diagnostics
|
||||||
|
|
||||||
|
extern int printf(const char *, ...);
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
struct S {
|
||||||
|
int a;
|
||||||
|
float b;
|
||||||
|
};
|
||||||
|
|
||||||
|
// CHECK: define{{.*}}void @test(%struct.S* noundef byval(%struct.S) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
|
||||||
|
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
|
||||||
|
// CHECK: bitcast i8* [[g]] to %struct.S*
|
||||||
|
// CHECK: bitcast %struct.S* [[arg]] to i8**
|
||||||
|
// CHECK: call void [[cc:@__copy_constructor[_0-9a-zA-Z]+]]
|
||||||
|
void test(S s) {
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < s.a; ++i) {
|
||||||
|
printf("%d : %d : %f\n", i, s.a, s.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void foo() {
|
||||||
|
#pragma omp target teams num_teams(1)
|
||||||
|
{
|
||||||
|
S s;
|
||||||
|
s.a = 7;
|
||||||
|
s.b = 11;
|
||||||
|
test(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BB;
|
||||||
|
|
||||||
|
struct SS {
|
||||||
|
int a;
|
||||||
|
double b;
|
||||||
|
BB *c;
|
||||||
|
SS() = default;
|
||||||
|
SS(const SS &);
|
||||||
|
SS(SS &&) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
// CHECK: define{{.*}}void @test2(%struct.SS* noundef byval(%struct.SS) align {{[0-9]+}} [[arg:%[0-9a-zA-Z]+]])
|
||||||
|
// CHECK: [[g:%[0-9a-zA-Z]+]] = call align {{[0-9]+}} i8* @__kmpc_alloc_shared
|
||||||
|
// CHECK: bitcast i8* [[g]] to %struct.SS*
|
||||||
|
// CHECK: bitcast %struct.SS* [[arg]] to i8**
|
||||||
|
// CHECK: call void [[cc2:@__copy_constructor[_0-9a-zA-Z]+]]
|
||||||
|
void test2(SS s) {
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < s.a; ++i) {
|
||||||
|
printf("%d : %d : %f\n", i, s.a, s.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bar() {
|
||||||
|
#pragma omp target teams num_teams(1)
|
||||||
|
{
|
||||||
|
SS s;
|
||||||
|
s.a = 7;
|
||||||
|
s.b = 11;
|
||||||
|
test2(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK: void [[cc]]
|
||||||
|
// CHECK: void [[cc2]]
|
||||||
@@ -108,6 +108,7 @@ set(include_files
|
|||||||
${include_directory}/Debug.h
|
${include_directory}/Debug.h
|
||||||
${include_directory}/Interface.h
|
${include_directory}/Interface.h
|
||||||
${include_directory}/Mapping.h
|
${include_directory}/Mapping.h
|
||||||
|
${include_directory}/Memory.h
|
||||||
${include_directory}/State.h
|
${include_directory}/State.h
|
||||||
${include_directory}/Synchronization.h
|
${include_directory}/Synchronization.h
|
||||||
${include_directory}/Types.h
|
${include_directory}/Types.h
|
||||||
@@ -119,6 +120,7 @@ set(src_files
|
|||||||
${source_directory}/Debug.cpp
|
${source_directory}/Debug.cpp
|
||||||
${source_directory}/Kernel.cpp
|
${source_directory}/Kernel.cpp
|
||||||
${source_directory}/Mapping.cpp
|
${source_directory}/Mapping.cpp
|
||||||
|
${source_directory}/Memory.cpp
|
||||||
${source_directory}/Misc.cpp
|
${source_directory}/Misc.cpp
|
||||||
${source_directory}/Parallelism.cpp
|
${source_directory}/Parallelism.cpp
|
||||||
${source_directory}/Reduction.cpp
|
${source_directory}/Reduction.cpp
|
||||||
|
|||||||
26
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
26
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
//===--- Memory.h - OpenMP device runtime memory allocator -------- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef OMPTARGET_CONFIGURATION_H
|
||||||
|
#define OMPTARGET_CONFIGURATION_H
|
||||||
|
|
||||||
|
#include "Types.h"
|
||||||
|
|
||||||
|
using size_t = uint64_t;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
void *malloc(size_t Size);
|
||||||
|
|
||||||
|
void free(void *);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -3,6 +3,7 @@ target_sources(omptarget.devicertl PRIVATE
|
|||||||
Debug.cpp
|
Debug.cpp
|
||||||
Kernel.cpp
|
Kernel.cpp
|
||||||
Mapping.cpp
|
Mapping.cpp
|
||||||
|
Memory.cpp
|
||||||
Misc.cpp
|
Misc.cpp
|
||||||
Parallelism.cpp
|
Parallelism.cpp
|
||||||
Reduction.cpp
|
Reduction.cpp
|
||||||
|
|||||||
43
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
43
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
//===------- Memory.cpp - OpenMP device runtime memory allocator -- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#pragma omp begin declare target device_type(nohost)
|
||||||
|
|
||||||
|
#include "Memory.h"
|
||||||
|
|
||||||
|
char *CONSTANT(omptarget_device_heap_buffer)
|
||||||
|
__attribute__((used, retain, weak, visibility("protected")));
|
||||||
|
|
||||||
|
size_t CONSTANT(omptarget_device_heap_size)
|
||||||
|
__attribute__((used, retain, weak, visibility("protected")));
|
||||||
|
|
||||||
|
__attribute__((used, retain, weak, visibility("protected")))
|
||||||
|
size_t omptarget_device_heap_cur_pos = 0;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
void *malloc(size_t Size) {
|
||||||
|
constexpr const size_t Alignment = 16;
|
||||||
|
Size = (Size + Alignment - 1) & ~(Alignment - 1);
|
||||||
|
|
||||||
|
if (Size + omptarget_device_heap_cur_pos < omptarget_device_heap_size) {
|
||||||
|
void *R = omptarget_device_heap_buffer + omptarget_device_heap_cur_pos;
|
||||||
|
omptarget_device_heap_cur_pos += Size;
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free(void *) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp end declare target
|
||||||
@@ -964,6 +964,82 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize heap buffer
|
||||||
|
{
|
||||||
|
const char *BufferVarName = "omptarget_device_heap_buffer";
|
||||||
|
const char *SizeVarName = "omptarget_device_heap_size";
|
||||||
|
CUdeviceptr BufferVarPtr;
|
||||||
|
CUdeviceptr SizeVarPtr;
|
||||||
|
size_t BufferVarSize;
|
||||||
|
size_t SizeVarSize;
|
||||||
|
|
||||||
|
Err = cuModuleGetGlobal(&BufferVarPtr, &BufferVarSize, Module,
|
||||||
|
BufferVarName);
|
||||||
|
if (Err == CUDA_SUCCESS) {
|
||||||
|
if (BufferVarSize != sizeof(uint64_t)) {
|
||||||
|
REPORT("Global global heap buffer pointer '%s' - size mismatch (%zu "
|
||||||
|
"!= %zu)\n",
|
||||||
|
BufferVarName, BufferVarSize, sizeof(uint64_t));
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err = cuModuleGetGlobal(&SizeVarPtr, &SizeVarSize, Module, SizeVarName);
|
||||||
|
if (Err == CUDA_SUCCESS) {
|
||||||
|
if (SizeVarSize != sizeof(uint64_t)) {
|
||||||
|
REPORT("Global global heap size variable '%s' - size mismatch (%zu "
|
||||||
|
"!= %zu)\n",
|
||||||
|
SizeVarName, SizeVarSize, sizeof(uint64_t));
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUdeviceptr BufferPtr;
|
||||||
|
size_t HeapSize = 6442450944U;
|
||||||
|
|
||||||
|
Err = cuMemAlloc(&BufferPtr, HeapSize);
|
||||||
|
if (Err != CUDA_SUCCESS) {
|
||||||
|
REPORT("Error when allocating heap bufferm size = %zu\n", HeapSize);
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err = cuMemcpyHtoD(BufferVarPtr, &BufferPtr, BufferVarSize);
|
||||||
|
if (Err != CUDA_SUCCESS) {
|
||||||
|
REPORT("Error when copying data from host to device. Pointers: "
|
||||||
|
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||||
|
DPxPTR(&BufferPtr), DPxPTR(BufferVarPtr), BufferVarSize);
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err = cuMemcpyHtoD(SizeVarPtr, &HeapSize, SizeVarSize);
|
||||||
|
if (Err != CUDA_SUCCESS) {
|
||||||
|
REPORT("Error when copying data from host to device. Pointers: "
|
||||||
|
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||||
|
DPxPTR(&HeapSize), DPxPTR(SizeVarPtr), SizeVarSize);
|
||||||
|
CUDA_ERR_STRING(Err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
DP("Successfully set heap buffer. omptarget_device_heap_buffer "
|
||||||
|
"= " DPxMOD ", omptarget_device_heap_size = %zu\n",
|
||||||
|
DPxPTR(BufferPtr), HeapSize);
|
||||||
|
} else {
|
||||||
|
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||||
|
SizeVarName);
|
||||||
|
DP("Continue, considering this is an image does not require heap "
|
||||||
|
"allocation.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||||
|
BufferVarName);
|
||||||
|
DP("Continue, considering this is an image does not require heap "
|
||||||
|
"allocation.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return getOffloadEntriesTable(DeviceId);
|
return getOffloadEntriesTable(DeviceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user