Compare commits
1 Commits
simple-tes
...
shmem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
67fee34b9d |
@@ -461,6 +461,12 @@ struct DeviceTy {
|
||||
int32_t destroyEvent(void *Event);
|
||||
/// }
|
||||
|
||||
/// PoC
|
||||
/// {
|
||||
int32_t set_device_allocator(void *Allocator, void *Deallocator);
|
||||
int32_t reset_device_allocator();
|
||||
/// }
|
||||
|
||||
private:
|
||||
// Call to RTL
|
||||
void init(); // To be called only via DeviceTy::initOnce()
|
||||
|
||||
@@ -339,6 +339,11 @@ int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
|
||||
void __tgt_set_info_flag(uint32_t);
|
||||
|
||||
int __tgt_print_device_info(int64_t DeviceId);
|
||||
|
||||
// PoC
|
||||
int __tgt_set_device_allocator(int64_t DeviceId, void *Allocator,
|
||||
void *Deallocator);
|
||||
int __tgt_reset_device_allocator(int64_t DeviceId);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -179,6 +179,13 @@ int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr);
|
||||
int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
|
||||
const char **ErrStr);
|
||||
|
||||
// PoC
|
||||
// {
|
||||
int32_t __tgt_rtl_set_device_allocator(int32_t ID, void *Allocator,
|
||||
void *Deallocator);
|
||||
int32_t __tgt_rtl_reset_device_allocator(int32_t ID);
|
||||
// }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -67,6 +67,8 @@ struct RTLInfoTy {
|
||||
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
|
||||
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
|
||||
const char **);
|
||||
typedef int32_t(set_device_allocator_ty)(int64_t, void *, void *);
|
||||
typedef int32_t(reset_device_allocator_ty)(int64_t);
|
||||
|
||||
int32_t Idx = -1; // RTL index, index is the number of devices
|
||||
// of other RTLs that were registered before,
|
||||
@@ -114,6 +116,8 @@ struct RTLInfoTy {
|
||||
init_async_info_ty *init_async_info = nullptr;
|
||||
init_device_into_ty *init_device_info = nullptr;
|
||||
release_async_info_ty *release_async_info = nullptr;
|
||||
set_device_allocator_ty *set_device_allocator = nullptr;
|
||||
reset_device_allocator_ty *reset_device_allocator = nullptr;
|
||||
|
||||
// Are there images associated with this RTL.
|
||||
bool IsUsed = false;
|
||||
|
||||
@@ -363,8 +363,18 @@ class DeviceRTLTy {
|
||||
/// allocate and free memory.
|
||||
class CUDADeviceAllocatorTy : public DeviceAllocatorTy {
|
||||
std::unordered_map<void *, TargetAllocTy> HostPinnedAllocs;
|
||||
int DeviceId;
|
||||
bool UseUserDefinedAllocator = false;
|
||||
|
||||
using UserDefinedAllocatorTy = void *(size_t, int);
|
||||
using UserDefinedDeallocatorTy = void(void *, int);
|
||||
|
||||
UserDefinedAllocatorTy *UserDefinedAllocator;
|
||||
UserDefinedDeallocatorTy *UserDefinedDeallocator;
|
||||
|
||||
public:
|
||||
CUDADeviceAllocatorTy(int DeviceId) : DeviceId(DeviceId) {}
|
||||
|
||||
void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
|
||||
if (Size == 0)
|
||||
return nullptr;
|
||||
@@ -374,11 +384,15 @@ class DeviceRTLTy {
|
||||
switch (Kind) {
|
||||
case TARGET_ALLOC_DEFAULT:
|
||||
case TARGET_ALLOC_DEVICE:
|
||||
CUdeviceptr DevicePtr;
|
||||
Err = cuMemAlloc(&DevicePtr, Size);
|
||||
MemAlloc = (void *)DevicePtr;
|
||||
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
|
||||
return nullptr;
|
||||
if (UseUserDefinedAllocator) {
|
||||
MemAlloc = UserDefinedAllocator(Size, DeviceId);
|
||||
} else {
|
||||
CUdeviceptr DevicePtr;
|
||||
Err = cuMemAlloc(&DevicePtr, Size);
|
||||
MemAlloc = (void *)DevicePtr;
|
||||
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
case TARGET_ALLOC_HOST:
|
||||
void *HostPtr;
|
||||
@@ -411,9 +425,13 @@ class DeviceRTLTy {
|
||||
case TARGET_ALLOC_DEFAULT:
|
||||
case TARGET_ALLOC_DEVICE:
|
||||
case TARGET_ALLOC_SHARED:
|
||||
Err = cuMemFree((CUdeviceptr)TgtPtr);
|
||||
if (!checkResult(Err, "Error returned from cuMemFree\n"))
|
||||
return OFFLOAD_FAIL;
|
||||
if (UseUserDefinedAllocator)
|
||||
UserDefinedDeallocator(TgtPtr, DeviceId);
|
||||
else {
|
||||
Err = cuMemFree((CUdeviceptr)TgtPtr);
|
||||
if (!checkResult(Err, "Error returned from cuMemFree\n"))
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
break;
|
||||
case TARGET_ALLOC_HOST:
|
||||
Err = cuMemFreeHost(TgtPtr);
|
||||
@@ -424,6 +442,20 @@ class DeviceRTLTy {
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int setAllocator(void *Allocator, void *Deallocator) {
|
||||
UseUserDefinedAllocator = true;
|
||||
|
||||
UserDefinedAllocator = (UserDefinedAllocatorTy *)Allocator;
|
||||
UserDefinedDeallocator = (UserDefinedDeallocatorTy *)Deallocator;
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int resetAllocator() {
|
||||
UseUserDefinedAllocator = false;
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
/// A vector of device allocators
|
||||
@@ -557,7 +589,7 @@ public:
|
||||
}
|
||||
|
||||
for (int I = 0; I < NumberOfDevices; ++I)
|
||||
DeviceAllocators.emplace_back();
|
||||
DeviceAllocators.emplace_back(I);
|
||||
|
||||
// Get the size threshold from environment variable
|
||||
std::pair<size_t, bool> Res = MemoryManagerTy::getSizeThresholdFromEnv();
|
||||
@@ -1505,6 +1537,14 @@ public:
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int setDeviceAllocator(int DeviceId, void *Allocator, void *Deallocator) {
|
||||
return DeviceAllocators[DeviceId].setAllocator(Allocator, Deallocator);
|
||||
}
|
||||
|
||||
int resetDeviceAllocator(int DeviceId) {
|
||||
return DeviceAllocators[DeviceId].resetAllocator();
|
||||
}
|
||||
};
|
||||
|
||||
DeviceRTLTy DeviceRTL;
|
||||
@@ -1816,6 +1856,15 @@ int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
|
||||
return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_set_device_allocator(int32_t DeviceId, void *Allocator,
|
||||
void *Deallocator) {
|
||||
return DeviceRTL.setDeviceAllocator(DeviceId, Allocator, Deallocator);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_reset_device_allocator(int32_t DeviceId) {
|
||||
return DeviceRTL.resetDeviceAllocator(DeviceId);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -32,6 +32,8 @@ VERS1.0 {
|
||||
__tgt_rtl_destroy_event;
|
||||
__tgt_rtl_init_device_info;
|
||||
__tgt_rtl_init_async_info;
|
||||
__tgt_rtl_set_device_allocator;
|
||||
__tgt_rtl_reset_device_allocator;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
||||
@@ -663,6 +663,20 @@ int32_t DeviceTy::destroyEvent(void *Event) {
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t DeviceTy::set_device_allocator(void *Allocator, void *Deallocator) {
|
||||
if (RTL->set_device_allocator)
|
||||
return RTL->set_device_allocator(RTLDeviceID, Allocator, Deallocator);
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t DeviceTy::reset_device_allocator() {
|
||||
if (RTL->reset_device_allocator)
|
||||
return RTL->reset_device_allocator(RTLDeviceID);
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
/// Check whether a device has an associated RTL and initialize it if it's not
|
||||
/// already initialized.
|
||||
bool deviceIsReady(int DeviceNum) {
|
||||
|
||||
@@ -55,6 +55,8 @@ VERS1.0 {
|
||||
__tgt_interop_init;
|
||||
__tgt_interop_use;
|
||||
__tgt_interop_destroy;
|
||||
__tgt_set_device_allocator;
|
||||
__tgt_reset_device_allocator;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
||||
@@ -303,3 +303,12 @@ EXTERN int __tgt_print_device_info(int64_t DeviceId) {
|
||||
return PM->Devices[DeviceId]->printDeviceInfo(
|
||||
PM->Devices[DeviceId]->RTLDeviceID);
|
||||
}
|
||||
|
||||
// PoC
|
||||
EXTERN int __tgt_set_device_allocator(int64_t DeviceId, void *Allocator,
|
||||
void *Deallocator) {
|
||||
return PM->Devices[DeviceId]->set_device_allocator(Allocator, Deallocator);
|
||||
}
|
||||
EXTERN int __tgt_reset_device_allocator(int64_t DeviceId) {
|
||||
return PM->Devices[DeviceId]->reset_device_allocator();
|
||||
}
|
||||
|
||||
@@ -205,6 +205,10 @@ void RTLsTy::loadRTLs() {
|
||||
dlsym(DynlibHandle, "__tgt_rtl_init_async_info");
|
||||
*((void **)&R.init_device_info) =
|
||||
dlsym(DynlibHandle, "__tgt_rtl_init_device_info");
|
||||
*((void **)&R.set_device_allocator) =
|
||||
dlsym(DynlibHandle, "__tgt_rtl_set_device_allocator");
|
||||
*((void **)&R.reset_device_allocator) =
|
||||
dlsym(DynlibHandle, "__tgt_rtl_reset_device_allocator");
|
||||
}
|
||||
|
||||
DP("RTLs loaded!\n");
|
||||
|
||||
Reference in New Issue
Block a user