Compare commits
1 Commits
simple-tes
...
shmem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
67fee34b9d |
@@ -461,6 +461,12 @@ struct DeviceTy {
|
|||||||
int32_t destroyEvent(void *Event);
|
int32_t destroyEvent(void *Event);
|
||||||
/// }
|
/// }
|
||||||
|
|
||||||
|
/// PoC
|
||||||
|
/// {
|
||||||
|
int32_t set_device_allocator(void *Allocator, void *Deallocator);
|
||||||
|
int32_t reset_device_allocator();
|
||||||
|
/// }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Call to RTL
|
// Call to RTL
|
||||||
void init(); // To be called only via DeviceTy::initOnce()
|
void init(); // To be called only via DeviceTy::initOnce()
|
||||||
|
|||||||
@@ -339,6 +339,11 @@ int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
|
|||||||
void __tgt_set_info_flag(uint32_t);
|
void __tgt_set_info_flag(uint32_t);
|
||||||
|
|
||||||
int __tgt_print_device_info(int64_t DeviceId);
|
int __tgt_print_device_info(int64_t DeviceId);
|
||||||
|
|
||||||
|
// PoC
|
||||||
|
int __tgt_set_device_allocator(int64_t DeviceId, void *Allocator,
|
||||||
|
void *Deallocator);
|
||||||
|
int __tgt_reset_device_allocator(int64_t DeviceId);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -179,6 +179,13 @@ int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr);
|
|||||||
int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
|
int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr,
|
||||||
const char **ErrStr);
|
const char **ErrStr);
|
||||||
|
|
||||||
|
// PoC
|
||||||
|
// {
|
||||||
|
int32_t __tgt_rtl_set_device_allocator(int32_t ID, void *Allocator,
|
||||||
|
void *Deallocator);
|
||||||
|
int32_t __tgt_rtl_reset_device_allocator(int32_t ID);
|
||||||
|
// }
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -67,6 +67,8 @@ struct RTLInfoTy {
|
|||||||
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
|
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
|
||||||
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
|
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
|
||||||
const char **);
|
const char **);
|
||||||
|
typedef int32_t(set_device_allocator_ty)(int64_t, void *, void *);
|
||||||
|
typedef int32_t(reset_device_allocator_ty)(int64_t);
|
||||||
|
|
||||||
int32_t Idx = -1; // RTL index, index is the number of devices
|
int32_t Idx = -1; // RTL index, index is the number of devices
|
||||||
// of other RTLs that were registered before,
|
// of other RTLs that were registered before,
|
||||||
@@ -114,6 +116,8 @@ struct RTLInfoTy {
|
|||||||
init_async_info_ty *init_async_info = nullptr;
|
init_async_info_ty *init_async_info = nullptr;
|
||||||
init_device_into_ty *init_device_info = nullptr;
|
init_device_into_ty *init_device_info = nullptr;
|
||||||
release_async_info_ty *release_async_info = nullptr;
|
release_async_info_ty *release_async_info = nullptr;
|
||||||
|
set_device_allocator_ty *set_device_allocator = nullptr;
|
||||||
|
reset_device_allocator_ty *reset_device_allocator = nullptr;
|
||||||
|
|
||||||
// Are there images associated with this RTL.
|
// Are there images associated with this RTL.
|
||||||
bool IsUsed = false;
|
bool IsUsed = false;
|
||||||
|
|||||||
@@ -363,8 +363,18 @@ class DeviceRTLTy {
|
|||||||
/// allocate and free memory.
|
/// allocate and free memory.
|
||||||
class CUDADeviceAllocatorTy : public DeviceAllocatorTy {
|
class CUDADeviceAllocatorTy : public DeviceAllocatorTy {
|
||||||
std::unordered_map<void *, TargetAllocTy> HostPinnedAllocs;
|
std::unordered_map<void *, TargetAllocTy> HostPinnedAllocs;
|
||||||
|
int DeviceId;
|
||||||
|
bool UseUserDefinedAllocator = false;
|
||||||
|
|
||||||
|
using UserDefinedAllocatorTy = void *(size_t, int);
|
||||||
|
using UserDefinedDeallocatorTy = void(void *, int);
|
||||||
|
|
||||||
|
UserDefinedAllocatorTy *UserDefinedAllocator;
|
||||||
|
UserDefinedDeallocatorTy *UserDefinedDeallocator;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
CUDADeviceAllocatorTy(int DeviceId) : DeviceId(DeviceId) {}
|
||||||
|
|
||||||
void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
|
void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
|
||||||
if (Size == 0)
|
if (Size == 0)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@@ -374,11 +384,15 @@ class DeviceRTLTy {
|
|||||||
switch (Kind) {
|
switch (Kind) {
|
||||||
case TARGET_ALLOC_DEFAULT:
|
case TARGET_ALLOC_DEFAULT:
|
||||||
case TARGET_ALLOC_DEVICE:
|
case TARGET_ALLOC_DEVICE:
|
||||||
CUdeviceptr DevicePtr;
|
if (UseUserDefinedAllocator) {
|
||||||
Err = cuMemAlloc(&DevicePtr, Size);
|
MemAlloc = UserDefinedAllocator(Size, DeviceId);
|
||||||
MemAlloc = (void *)DevicePtr;
|
} else {
|
||||||
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
|
CUdeviceptr DevicePtr;
|
||||||
return nullptr;
|
Err = cuMemAlloc(&DevicePtr, Size);
|
||||||
|
MemAlloc = (void *)DevicePtr;
|
||||||
|
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case TARGET_ALLOC_HOST:
|
case TARGET_ALLOC_HOST:
|
||||||
void *HostPtr;
|
void *HostPtr;
|
||||||
@@ -411,9 +425,13 @@ class DeviceRTLTy {
|
|||||||
case TARGET_ALLOC_DEFAULT:
|
case TARGET_ALLOC_DEFAULT:
|
||||||
case TARGET_ALLOC_DEVICE:
|
case TARGET_ALLOC_DEVICE:
|
||||||
case TARGET_ALLOC_SHARED:
|
case TARGET_ALLOC_SHARED:
|
||||||
Err = cuMemFree((CUdeviceptr)TgtPtr);
|
if (UseUserDefinedAllocator)
|
||||||
if (!checkResult(Err, "Error returned from cuMemFree\n"))
|
UserDefinedDeallocator(TgtPtr, DeviceId);
|
||||||
return OFFLOAD_FAIL;
|
else {
|
||||||
|
Err = cuMemFree((CUdeviceptr)TgtPtr);
|
||||||
|
if (!checkResult(Err, "Error returned from cuMemFree\n"))
|
||||||
|
return OFFLOAD_FAIL;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case TARGET_ALLOC_HOST:
|
case TARGET_ALLOC_HOST:
|
||||||
Err = cuMemFreeHost(TgtPtr);
|
Err = cuMemFreeHost(TgtPtr);
|
||||||
@@ -424,6 +442,20 @@ class DeviceRTLTy {
|
|||||||
|
|
||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int setAllocator(void *Allocator, void *Deallocator) {
|
||||||
|
UseUserDefinedAllocator = true;
|
||||||
|
|
||||||
|
UserDefinedAllocator = (UserDefinedAllocatorTy *)Allocator;
|
||||||
|
UserDefinedDeallocator = (UserDefinedDeallocatorTy *)Deallocator;
|
||||||
|
|
||||||
|
return OFFLOAD_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int resetAllocator() {
|
||||||
|
UseUserDefinedAllocator = false;
|
||||||
|
return OFFLOAD_SUCCESS;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A vector of device allocators
|
/// A vector of device allocators
|
||||||
@@ -557,7 +589,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int I = 0; I < NumberOfDevices; ++I)
|
for (int I = 0; I < NumberOfDevices; ++I)
|
||||||
DeviceAllocators.emplace_back();
|
DeviceAllocators.emplace_back(I);
|
||||||
|
|
||||||
// Get the size threshold from environment variable
|
// Get the size threshold from environment variable
|
||||||
std::pair<size_t, bool> Res = MemoryManagerTy::getSizeThresholdFromEnv();
|
std::pair<size_t, bool> Res = MemoryManagerTy::getSizeThresholdFromEnv();
|
||||||
@@ -1505,6 +1537,14 @@ public:
|
|||||||
|
|
||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int setDeviceAllocator(int DeviceId, void *Allocator, void *Deallocator) {
|
||||||
|
return DeviceAllocators[DeviceId].setAllocator(Allocator, Deallocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
int resetDeviceAllocator(int DeviceId) {
|
||||||
|
return DeviceAllocators[DeviceId].resetAllocator();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
DeviceRTLTy DeviceRTL;
|
DeviceRTLTy DeviceRTL;
|
||||||
@@ -1816,6 +1856,15 @@ int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
|
|||||||
return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr);
|
return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t __tgt_rtl_set_device_allocator(int32_t DeviceId, void *Allocator,
|
||||||
|
void *Deallocator) {
|
||||||
|
return DeviceRTL.setDeviceAllocator(DeviceId, Allocator, Deallocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t __tgt_rtl_reset_device_allocator(int32_t DeviceId) {
|
||||||
|
return DeviceRTL.resetDeviceAllocator(DeviceId);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ VERS1.0 {
|
|||||||
__tgt_rtl_destroy_event;
|
__tgt_rtl_destroy_event;
|
||||||
__tgt_rtl_init_device_info;
|
__tgt_rtl_init_device_info;
|
||||||
__tgt_rtl_init_async_info;
|
__tgt_rtl_init_async_info;
|
||||||
|
__tgt_rtl_set_device_allocator;
|
||||||
|
__tgt_rtl_reset_device_allocator;
|
||||||
local:
|
local:
|
||||||
*;
|
*;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -663,6 +663,20 @@ int32_t DeviceTy::destroyEvent(void *Event) {
|
|||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t DeviceTy::set_device_allocator(void *Allocator, void *Deallocator) {
|
||||||
|
if (RTL->set_device_allocator)
|
||||||
|
return RTL->set_device_allocator(RTLDeviceID, Allocator, Deallocator);
|
||||||
|
|
||||||
|
return OFFLOAD_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t DeviceTy::reset_device_allocator() {
|
||||||
|
if (RTL->reset_device_allocator)
|
||||||
|
return RTL->reset_device_allocator(RTLDeviceID);
|
||||||
|
|
||||||
|
return OFFLOAD_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/// Check whether a device has an associated RTL and initialize it if it's not
|
/// Check whether a device has an associated RTL and initialize it if it's not
|
||||||
/// already initialized.
|
/// already initialized.
|
||||||
bool deviceIsReady(int DeviceNum) {
|
bool deviceIsReady(int DeviceNum) {
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ VERS1.0 {
|
|||||||
__tgt_interop_init;
|
__tgt_interop_init;
|
||||||
__tgt_interop_use;
|
__tgt_interop_use;
|
||||||
__tgt_interop_destroy;
|
__tgt_interop_destroy;
|
||||||
|
__tgt_set_device_allocator;
|
||||||
|
__tgt_reset_device_allocator;
|
||||||
local:
|
local:
|
||||||
*;
|
*;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -303,3 +303,12 @@ EXTERN int __tgt_print_device_info(int64_t DeviceId) {
|
|||||||
return PM->Devices[DeviceId]->printDeviceInfo(
|
return PM->Devices[DeviceId]->printDeviceInfo(
|
||||||
PM->Devices[DeviceId]->RTLDeviceID);
|
PM->Devices[DeviceId]->RTLDeviceID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PoC
|
||||||
|
EXTERN int __tgt_set_device_allocator(int64_t DeviceId, void *Allocator,
|
||||||
|
void *Deallocator) {
|
||||||
|
return PM->Devices[DeviceId]->set_device_allocator(Allocator, Deallocator);
|
||||||
|
}
|
||||||
|
EXTERN int __tgt_reset_device_allocator(int64_t DeviceId) {
|
||||||
|
return PM->Devices[DeviceId]->reset_device_allocator();
|
||||||
|
}
|
||||||
|
|||||||
@@ -205,6 +205,10 @@ void RTLsTy::loadRTLs() {
|
|||||||
dlsym(DynlibHandle, "__tgt_rtl_init_async_info");
|
dlsym(DynlibHandle, "__tgt_rtl_init_async_info");
|
||||||
*((void **)&R.init_device_info) =
|
*((void **)&R.init_device_info) =
|
||||||
dlsym(DynlibHandle, "__tgt_rtl_init_device_info");
|
dlsym(DynlibHandle, "__tgt_rtl_init_device_info");
|
||||||
|
*((void **)&R.set_device_allocator) =
|
||||||
|
dlsym(DynlibHandle, "__tgt_rtl_set_device_allocator");
|
||||||
|
*((void **)&R.reset_device_allocator) =
|
||||||
|
dlsym(DynlibHandle, "__tgt_rtl_reset_device_allocator");
|
||||||
}
|
}
|
||||||
|
|
||||||
DP("RTLs loaded!\n");
|
DP("RTLs loaded!\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user