Compare commits

...

1 Commits

Author SHA1 Message Date
Shilei Tian
67fee34b9d [PoC][OpenMP] Use OpenSHMEM memory allocator for Nvidia target offloading 2022-07-14 13:04:30 -04:00
10 changed files with 111 additions and 9 deletions

View File

@@ -461,6 +461,12 @@ struct DeviceTy {
int32_t destroyEvent(void *Event);
/// }
/// PoC
/// {
int32_t set_device_allocator(void *Allocator, void *Deallocator);
int32_t reset_device_allocator();
/// }
private:
// Call to RTL
void init(); // To be called only via DeviceTy::initOnce()

View File

@@ -339,6 +339,11 @@ int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
void __tgt_set_info_flag(uint32_t);
int __tgt_print_device_info(int64_t DeviceId);
// PoC
int __tgt_set_device_allocator(int64_t DeviceId, void *Allocator,
void *Deallocator);
int __tgt_reset_device_allocator(int64_t DeviceId);
#ifdef __cplusplus
}
#endif

View File

@@ -179,6 +179,13 @@ int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr);
int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
const char **ErrStr);
// PoC
// {
int32_t __tgt_rtl_set_device_allocator(int32_t ID, void *Allocator,
void *Deallocator);
int32_t __tgt_rtl_reset_device_allocator(int32_t ID);
// }
#ifdef __cplusplus
}
#endif

View File

@@ -67,6 +67,8 @@ struct RTLInfoTy {
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
const char **);
typedef int32_t(set_device_allocator_ty)(int64_t, void *, void *);
typedef int32_t(reset_device_allocator_ty)(int64_t);
int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
@@ -114,6 +116,8 @@ struct RTLInfoTy {
init_async_info_ty *init_async_info = nullptr;
init_device_into_ty *init_device_info = nullptr;
release_async_info_ty *release_async_info = nullptr;
set_device_allocator_ty *set_device_allocator = nullptr;
reset_device_allocator_ty *reset_device_allocator = nullptr;
// Are there images associated with this RTL.
bool IsUsed = false;

View File

@@ -363,8 +363,18 @@ class DeviceRTLTy {
/// allocate and free memory.
class CUDADeviceAllocatorTy : public DeviceAllocatorTy {
std::unordered_map<void *, TargetAllocTy> HostPinnedAllocs;
int DeviceId;
bool UseUserDefinedAllocator = false;
using UserDefinedAllocatorTy = void *(size_t, int);
using UserDefinedDeallocatorTy = void(void *, int);
UserDefinedAllocatorTy *UserDefinedAllocator;
UserDefinedDeallocatorTy *UserDefinedDeallocator;
public:
CUDADeviceAllocatorTy(int DeviceId) : DeviceId(DeviceId) {}
void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
if (Size == 0)
return nullptr;
@@ -374,11 +384,15 @@ class DeviceRTLTy {
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
CUdeviceptr DevicePtr;
Err = cuMemAlloc(&DevicePtr, Size);
MemAlloc = (void *)DevicePtr;
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
return nullptr;
if (UseUserDefinedAllocator) {
MemAlloc = UserDefinedAllocator(Size, DeviceId);
} else {
CUdeviceptr DevicePtr;
Err = cuMemAlloc(&DevicePtr, Size);
MemAlloc = (void *)DevicePtr;
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
return nullptr;
}
break;
case TARGET_ALLOC_HOST:
void *HostPtr;
@@ -411,9 +425,13 @@ class DeviceRTLTy {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
case TARGET_ALLOC_SHARED:
Err = cuMemFree((CUdeviceptr)TgtPtr);
if (!checkResult(Err, "Error returned from cuMemFree\n"))
return OFFLOAD_FAIL;
if (UseUserDefinedAllocator)
UserDefinedDeallocator(TgtPtr, DeviceId);
else {
Err = cuMemFree((CUdeviceptr)TgtPtr);
if (!checkResult(Err, "Error returned from cuMemFree\n"))
return OFFLOAD_FAIL;
}
break;
case TARGET_ALLOC_HOST:
Err = cuMemFreeHost(TgtPtr);
@@ -424,6 +442,20 @@ class DeviceRTLTy {
return OFFLOAD_SUCCESS;
}
int setAllocator(void *Allocator, void *Deallocator) {
UseUserDefinedAllocator = true;
UserDefinedAllocator = (UserDefinedAllocatorTy *)Allocator;
UserDefinedDeallocator = (UserDefinedDeallocatorTy *)Deallocator;
return OFFLOAD_SUCCESS;
}
int resetAllocator() {
UseUserDefinedAllocator = false;
return OFFLOAD_SUCCESS;
}
};
/// A vector of device allocators
@@ -557,7 +589,7 @@ public:
}
for (int I = 0; I < NumberOfDevices; ++I)
DeviceAllocators.emplace_back();
DeviceAllocators.emplace_back(I);
// Get the size threshold from environment variable
std::pair<size_t, bool> Res = MemoryManagerTy::getSizeThresholdFromEnv();
@@ -1505,6 +1537,14 @@ public:
return OFFLOAD_SUCCESS;
}
int setDeviceAllocator(int DeviceId, void *Allocator, void *Deallocator) {
return DeviceAllocators[DeviceId].setAllocator(Allocator, Deallocator);
}
int resetDeviceAllocator(int DeviceId) {
return DeviceAllocators[DeviceId].resetAllocator();
}
};
DeviceRTLTy DeviceRTL;
@@ -1816,6 +1856,15 @@ int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr);
}
int32_t __tgt_rtl_set_device_allocator(int32_t DeviceId, void *Allocator,
void *Deallocator) {
return DeviceRTL.setDeviceAllocator(DeviceId, Allocator, Deallocator);
}
int32_t __tgt_rtl_reset_device_allocator(int32_t DeviceId) {
return DeviceRTL.resetDeviceAllocator(DeviceId);
}
#ifdef __cplusplus
}
#endif

View File

@@ -32,6 +32,8 @@ VERS1.0 {
__tgt_rtl_destroy_event;
__tgt_rtl_init_device_info;
__tgt_rtl_init_async_info;
__tgt_rtl_set_device_allocator;
__tgt_rtl_reset_device_allocator;
local:
*;
};

View File

@@ -663,6 +663,20 @@ int32_t DeviceTy::destroyEvent(void *Event) {
return OFFLOAD_SUCCESS;
}
int32_t DeviceTy::set_device_allocator(void *Allocator, void *Deallocator) {
if (RTL->set_device_allocator)
return RTL->set_device_allocator(RTLDeviceID, Allocator, Deallocator);
return OFFLOAD_SUCCESS;
}
int32_t DeviceTy::reset_device_allocator() {
if (RTL->reset_device_allocator)
return RTL->reset_device_allocator(RTLDeviceID);
return OFFLOAD_SUCCESS;
}
/// Check whether a device has an associated RTL and initialize it if it's not
/// already initialized.
bool deviceIsReady(int DeviceNum) {

View File

@@ -55,6 +55,8 @@ VERS1.0 {
__tgt_interop_init;
__tgt_interop_use;
__tgt_interop_destroy;
__tgt_set_device_allocator;
__tgt_reset_device_allocator;
local:
*;
};

View File

@@ -303,3 +303,12 @@ EXTERN int __tgt_print_device_info(int64_t DeviceId) {
return PM->Devices[DeviceId]->printDeviceInfo(
PM->Devices[DeviceId]->RTLDeviceID);
}
// PoC
EXTERN int __tgt_set_device_allocator(int64_t DeviceId, void *Allocator,
void *Deallocator) {
return PM->Devices[DeviceId]->set_device_allocator(Allocator, Deallocator);
}
EXTERN int __tgt_reset_device_allocator(int64_t DeviceId) {
return PM->Devices[DeviceId]->reset_device_allocator();
}

View File

@@ -205,6 +205,10 @@ void RTLsTy::loadRTLs() {
dlsym(DynlibHandle, "__tgt_rtl_init_async_info");
*((void **)&R.init_device_info) =
dlsym(DynlibHandle, "__tgt_rtl_init_device_info");
*((void **)&R.set_device_allocator) =
dlsym(DynlibHandle, "__tgt_rtl_set_device_allocator");
*((void **)&R.reset_device_allocator) =
dlsym(DynlibHandle, "__tgt_rtl_reset_device_allocator");
}
DP("RTLs loaded!\n");