Fixe GPU First RPCs to work when given a pointer
- Fixe GPUFirst Memory Allocator to work with new offload plugin. - Fixe TeamAllocator to not Ignore first Allocation.
This commit is contained in:
@@ -37,4 +37,8 @@ void *realloc(void *ptr, size_t new_size) {
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
void __kmpc_target_init_allocator() { return; }
|
||||
}
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
@@ -18,11 +18,13 @@
|
||||
|
||||
using namespace ompx;
|
||||
|
||||
char *CONSTANT(omptarget_device_heap_buffer)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
size_t CONSTANT(omptarget_device_heap_size)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility(
|
||||
"protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
|
||||
__omp_rtl_device_memory_pool_tracker;
|
||||
// TODO: implement Device Debug Allocation Tracker
|
||||
|
||||
namespace {
|
||||
size_t HeapCurPos = 0;
|
||||
@@ -250,8 +252,8 @@ void *malloc(size_t Size) {
|
||||
{
|
||||
mutex::LockGuard LG(HeapLock);
|
||||
|
||||
if (Size + HeapCurPos < omptarget_device_heap_size) {
|
||||
void *R = omptarget_device_heap_buffer + HeapCurPos;
|
||||
if (Size + HeapCurPos < __omp_rtl_device_memory_pool.Size) {
|
||||
void *R = reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr) + HeapCurPos;
|
||||
(void)atomic::add(&HeapCurPos, Size, atomic::acq_rel);
|
||||
MD = AllocationMetadata::getFromAddr(R);
|
||||
}
|
||||
|
||||
@@ -21,11 +21,13 @@
|
||||
|
||||
using namespace ompx;
|
||||
|
||||
char *CONSTANT(omptarget_device_heap_buffer)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
size_t CONSTANT(omptarget_device_heap_size)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility(
|
||||
"protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
|
||||
__omp_rtl_device_memory_pool_tracker;
|
||||
// TODO: implement Device Debug Allocation Tracker
|
||||
|
||||
namespace {
|
||||
constexpr const size_t Alignment = 16;
|
||||
@@ -92,7 +94,7 @@ template <uint32_t WARP_SIZE, uint32_t TEAM_SIZE> struct WarpAllocator {
|
||||
(mapping::getThreadIdInBlock() || mapping::getBlockIdInKernel()))
|
||||
return;
|
||||
|
||||
size_t HeapSize = omptarget_device_heap_size;
|
||||
size_t HeapSize = __omp_rtl_device_memory_pool.Size;
|
||||
|
||||
FirstThreadHeapSize = HeapSize * FirstThreadRatio / 100;
|
||||
FirstThreadHeapSize = utils::align_down(FirstThreadHeapSize, Alignment);
|
||||
@@ -104,7 +106,7 @@ template <uint32_t WARP_SIZE, uint32_t TEAM_SIZE> struct WarpAllocator {
|
||||
TeamHeapSize = utils::align_down(TeamHeapSize, Alignment);
|
||||
FirstTeamSize = TeamHeapSize;
|
||||
|
||||
char *LastLimit = omptarget_device_heap_buffer;
|
||||
char *LastLimit = reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr);
|
||||
for (int I = 0; I < WARP_SIZE; ++I) {
|
||||
for (int J = 0; J < TEAM_SIZE; ++J) {
|
||||
Entries[I][J] = nullptr;
|
||||
@@ -173,8 +175,8 @@ template <uint32_t WARP_SIZE, uint32_t TEAM_SIZE> struct WarpAllocator {
|
||||
}
|
||||
|
||||
memory::MemoryAllocationInfo getMemoryAllocationInfo(void *P) {
|
||||
if (!utils::isInRange(P, omptarget_device_heap_buffer,
|
||||
omptarget_device_heap_size))
|
||||
if (!utils::isInRange(P, reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr),
|
||||
__omp_rtl_device_memory_pool.Size))
|
||||
return {};
|
||||
|
||||
auto TeamSlot = getTeamSlot();
|
||||
@@ -192,14 +194,16 @@ template <uint32_t WARP_SIZE, uint32_t TEAM_SIZE> struct WarpAllocator {
|
||||
return {};
|
||||
if (E->getEndPtr() <= P)
|
||||
return {};
|
||||
do {
|
||||
bool isFirst = false;
|
||||
while (!isFirst) {
|
||||
if (E->getUserPtr() <= P && P < E->getEndPtr()) {
|
||||
if (!E->isUsed())
|
||||
return {};
|
||||
return {E->getUserPtr(), E->getUserSize()};
|
||||
}
|
||||
isFirst = E->isFirst();
|
||||
E = E->getPrev();
|
||||
} while (!E->isFirst());
|
||||
}
|
||||
}
|
||||
}
|
||||
return {};
|
||||
@@ -211,7 +215,7 @@ private:
|
||||
return Limits[TIdInWarp][TeamSlot - 1];
|
||||
if (TIdInWarp)
|
||||
return Limits[TIdInWarp - 1][TEAM_SIZE - 1];
|
||||
return omptarget_device_heap_buffer;
|
||||
return reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr);
|
||||
}
|
||||
char *getBlockEnd(int32_t TIdInWarp, int32_t TeamSlot) const {
|
||||
return Limits[TIdInWarp][TeamSlot];
|
||||
|
||||
@@ -21,11 +21,13 @@
|
||||
|
||||
using namespace ompx;
|
||||
|
||||
char *CONSTANT(omptarget_device_heap_buffer)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
size_t CONSTANT(omptarget_device_heap_size)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility(
|
||||
"protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
|
||||
[[gnu::used, gnu::retain, gnu::weak,
|
||||
gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
|
||||
__omp_rtl_device_memory_pool_tracker;
|
||||
// TODO: implement Device Debug Allocation Tracker
|
||||
|
||||
namespace {
|
||||
constexpr const size_t Alignment = 16;
|
||||
@@ -92,7 +94,7 @@ template <int32_t WARP_SIZE> struct WarpAllocator {
|
||||
(mapping::getThreadIdInBlock() || mapping::getBlockId()))
|
||||
return;
|
||||
|
||||
size_t HeapSize = omptarget_device_heap_size;
|
||||
size_t HeapSize = __omp_rtl_device_memory_pool.Size;
|
||||
size_t FirstThreadHeapSize = HeapSize * FirstThreadRatio / 100;
|
||||
FirstThreadHeapSize = utils::align_down(FirstThreadHeapSize, Alignment);
|
||||
size_t OtherThreadHeapSize =
|
||||
@@ -102,7 +104,7 @@ template <int32_t WARP_SIZE> struct WarpAllocator {
|
||||
for (int I = 0; I < WARP_SIZE; ++I) {
|
||||
Entries[I] = nullptr;
|
||||
size_t PrivateOffset = OtherThreadHeapSize * I + FirstThreadHeapSize;
|
||||
Limits[I] = omptarget_device_heap_buffer + PrivateOffset;
|
||||
Limits[I] = reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr) + PrivateOffset;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,8 +162,8 @@ template <int32_t WARP_SIZE> struct WarpAllocator {
|
||||
}
|
||||
|
||||
memory::MemoryAllocationInfo getMemoryAllocationInfo(void *P) {
|
||||
if (!utils::isInRange(P, omptarget_device_heap_buffer,
|
||||
omptarget_device_heap_size))
|
||||
if (!utils::isInRange(P, reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr),
|
||||
__omp_rtl_device_memory_pool.Size))
|
||||
return {};
|
||||
|
||||
auto TIdInWarp = mapping::getThreadIdInWarp();
|
||||
@@ -190,7 +192,7 @@ template <int32_t WARP_SIZE> struct WarpAllocator {
|
||||
|
||||
private:
|
||||
char *getBlockBegin(int32_t TIdInWarp) const {
|
||||
return TIdInWarp ? Limits[TIdInWarp - 1] : omptarget_device_heap_buffer;
|
||||
return TIdInWarp ? Limits[TIdInWarp - 1] : reinterpret_cast<char *>(__omp_rtl_device_memory_pool.Ptr);
|
||||
}
|
||||
char *getBlockEnd(int32_t TIdInWarp) const { return Limits[TIdInWarp]; }
|
||||
|
||||
|
||||
@@ -426,8 +426,8 @@ int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
|
||||
uint64_t &ReqPtrArgOffset);
|
||||
|
||||
// Host RPC support
|
||||
int64_t __kmpc_host_rpc_get_arg(void *Wrapper, int32_t ArgNo);
|
||||
void __kmpc_host_rpc_set_ret_val(void *Wrapper, int64_t RetVal);
|
||||
int64_t __kmpc_host_rpc_get_arg(void *descriptor, int32_t ArgNo);
|
||||
void __kmpc_host_rpc_set_ret_val(void *descriptor, int64_t RetVal);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -514,31 +514,19 @@ EXTERN void __tgt_target_nowait_query(void **AsyncHandle) {
|
||||
}
|
||||
|
||||
// Host RPC support functions.
|
||||
EXTERN int64_t __kmpc_host_rpc_get_arg(void *Wrapper, int32_t ArgNum) {
|
||||
auto *W = reinterpret_cast<hostrpc::DescriptorWrapper *>(Wrapper);
|
||||
auto &SD = W->D;
|
||||
EXTERN int64_t __kmpc_host_rpc_get_arg(void *descriptor, int32_t ArgNum) {
|
||||
hostrpc::Descriptor &SD = *reinterpret_cast<hostrpc::Descriptor *>(descriptor);
|
||||
|
||||
assert(ArgNum < SD.NumArgs && "out-of-range argument");
|
||||
|
||||
int64_t ArgVal = SD.Args[ArgNum].Value;
|
||||
void *ArgPtr = reinterpret_cast<void *>(ArgVal);
|
||||
|
||||
DP("[host-rpc] get argno=%d arg=%lx...\n", ArgNum, ArgVal);
|
||||
|
||||
if (W->StdIn && SD.Args[ArgNum].ArgType == hostrpc::ARG_POINTER &&
|
||||
ArgPtr == W->StdIn)
|
||||
return (int64_t)stdin;
|
||||
if (W->StdOut && SD.Args[ArgNum].ArgType == hostrpc::ARG_POINTER &&
|
||||
ArgPtr == W->StdOut)
|
||||
return (int64_t)stdout;
|
||||
if (W->StdErr && SD.Args[ArgNum].ArgType == hostrpc::ARG_POINTER &&
|
||||
ArgPtr == W->StdErr)
|
||||
return (int64_t)stderr;
|
||||
|
||||
return ArgVal;
|
||||
}
|
||||
|
||||
EXTERN void __kmpc_host_rpc_set_ret_val(void *Wrapper, int64_t RetVal) {
|
||||
auto &SD = reinterpret_cast<hostrpc::DescriptorWrapper *>(Wrapper)->D;
|
||||
EXTERN void __kmpc_host_rpc_set_ret_val(void *descriptor, int64_t RetVal) {
|
||||
hostrpc::Descriptor &SD = *reinterpret_cast<hostrpc::Descriptor *>(descriptor);
|
||||
SD.ReturnValue = RetVal;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user