Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
555fb762be |
@@ -91,6 +91,7 @@ set(include_files
|
||||
${include_directory}/Debug.h
|
||||
${include_directory}/Interface.h
|
||||
${include_directory}/Mapping.h
|
||||
${include_directory}/Memory.h
|
||||
${include_directory}/State.h
|
||||
${include_directory}/Synchronization.h
|
||||
${include_directory}/Types.h
|
||||
@@ -102,6 +103,7 @@ set(src_files
|
||||
${source_directory}/Debug.cpp
|
||||
${source_directory}/Kernel.cpp
|
||||
${source_directory}/Mapping.cpp
|
||||
${source_directory}/Memory.cpp
|
||||
${source_directory}/Misc.cpp
|
||||
${source_directory}/Parallelism.cpp
|
||||
${source_directory}/Reduction.cpp
|
||||
|
||||
23
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
23
openmp/libomptarget/DeviceRTL/include/Memory.h
Normal file
@@ -0,0 +1,23 @@
|
||||
//===--- Memory.h - OpenMP device runtime memory allocator -------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef OMPTARGET_MEMORY_H
|
||||
#define OMPTARGET_MEMORY_H
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
extern "C" {
|
||||
__attribute__((leaf)) void *malloc(size_t Size);
|
||||
__attribute__((leaf)) void free(void *Ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -62,6 +62,9 @@ namespace atomic {
|
||||
/// Atomically load \p Addr with \p Ordering semantics.
|
||||
uint32_t load(uint32_t *Addr, int Ordering);
|
||||
|
||||
/// Atomically load \p Addr with \p Ordering semantics.
|
||||
uint64_t load(uint64_t *Addr, int Ordering);
|
||||
|
||||
/// Atomically store \p V to \p Addr with \p Ordering semantics.
|
||||
void store(uint32_t *Addr, uint32_t V, int Ordering);
|
||||
|
||||
@@ -76,6 +79,35 @@ uint64_t add(uint64_t *Addr, uint64_t V, int Ordering);
|
||||
|
||||
} // namespace atomic
|
||||
|
||||
namespace mutex {
|
||||
|
||||
class TicketLock {
|
||||
uint64_t NowServing = 0;
|
||||
uint64_t NextTicket = 0;
|
||||
|
||||
public:
|
||||
TicketLock() = default;
|
||||
|
||||
TicketLock(const TicketLock &) = delete;
|
||||
|
||||
TicketLock(TicketLock &&) = delete;
|
||||
|
||||
void lock();
|
||||
|
||||
void unlock();
|
||||
};
|
||||
|
||||
template <typename T> class LockGaurd {
|
||||
T &Lock;
|
||||
|
||||
public:
|
||||
explicit LockGaurd(T &L) : Lock(L) { Lock.lock(); }
|
||||
|
||||
~LockGaurd() { Lock.unlock(); }
|
||||
};
|
||||
|
||||
} // namespace mutex
|
||||
|
||||
} // namespace _OMP
|
||||
|
||||
#endif
|
||||
|
||||
@@ -32,6 +32,7 @@ using int32_t = int;
|
||||
using uint32_t = unsigned int;
|
||||
using int64_t = long;
|
||||
using uint64_t = unsigned long;
|
||||
using size_t = decltype(sizeof(char));;
|
||||
|
||||
static_assert(sizeof(int8_t) == 1, "type size mismatch");
|
||||
static_assert(sizeof(uint8_t) == 1, "type size mismatch");
|
||||
|
||||
@@ -3,6 +3,7 @@ target_sources(omptarget.devicertl PRIVATE
|
||||
Debug.cpp
|
||||
Kernel.cpp
|
||||
Mapping.cpp
|
||||
Memory.cpp
|
||||
Misc.cpp
|
||||
Parallelism.cpp
|
||||
Reduction.cpp
|
||||
|
||||
47
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
47
openmp/libomptarget/DeviceRTL/src/Memory.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
//===------- Memory.cpp - OpenMP device runtime memory allocator -- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
#include "Memory.h"
|
||||
#include "Synchronization.h"
|
||||
|
||||
using namespace _OMP;
|
||||
|
||||
char *CONSTANT(omptarget_device_heap_buffer)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
size_t CONSTANT(omptarget_device_heap_size)
|
||||
__attribute__((used, retain, weak, visibility("protected")));
|
||||
|
||||
namespace {
|
||||
size_t HeapCurPos = 0;
|
||||
mutex::TicketLock HeapLock;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *malloc(size_t Size) {
|
||||
mutex::LockGaurd LG(HeapLock);
|
||||
|
||||
if (Size + HeapCurPos < omptarget_device_heap_size) {
|
||||
void *R = omptarget_device_heap_buffer + HeapCurPos;
|
||||
atomic::add(&HeapCurPos, Size, __ATOMIC_SEQ_CST);
|
||||
return R;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void free(void *) {}
|
||||
}
|
||||
|
||||
#pragma omp end declare target
|
||||
@@ -12,6 +12,8 @@
|
||||
#include "Configuration.h"
|
||||
#include "Debug.h"
|
||||
#include "Interface.h"
|
||||
#include "Mapping.h"
|
||||
#include "Memory.h"
|
||||
#include "Synchronization.h"
|
||||
#include "Types.h"
|
||||
|
||||
@@ -34,36 +36,6 @@ extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
|
||||
|
||||
namespace {
|
||||
|
||||
/// Fallback implementations are missing to trigger a link time error.
|
||||
/// Implementations for new devices, including the host, should go into a
|
||||
/// dedicated begin/end declare variant.
|
||||
///
|
||||
///{
|
||||
|
||||
extern "C" {
|
||||
__attribute__((leaf)) void *malloc(uint64_t Size);
|
||||
__attribute__((leaf)) void free(void *Ptr);
|
||||
}
|
||||
|
||||
///}
|
||||
|
||||
/// AMDGCN implementations of the shuffle sync idiom.
|
||||
///
|
||||
///{
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
|
||||
extern "C" {
|
||||
void *malloc(uint64_t Size) {
|
||||
// TODO: Use some preallocated space for dynamic malloc.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void free(void *Ptr) {}
|
||||
}
|
||||
|
||||
#pragma omp end declare variant
|
||||
///}
|
||||
|
||||
/// A "smart" stack in shared memory.
|
||||
///
|
||||
/// The stack exposes a malloc/free interface but works like a stack internally.
|
||||
|
||||
@@ -35,6 +35,10 @@ uint32_t atomicLoad(uint32_t *Address, int Ordering) {
|
||||
return __atomic_fetch_add(Address, 0U, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
uint64_t atomicLoad(uint64_t *Address, int Ordering) {
|
||||
return __atomic_fetch_add(Address, 0U, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
void atomicStore(uint32_t *Address, uint32_t Val, int Ordering) {
|
||||
__atomic_store_n(Address, Val, Ordering);
|
||||
}
|
||||
@@ -320,6 +324,10 @@ uint32_t atomic::load(uint32_t *Addr, int Ordering) {
|
||||
return impl::atomicLoad(Addr, Ordering);
|
||||
}
|
||||
|
||||
uint64_t atomic::load(uint64_t *Addr, int Ordering) {
|
||||
return impl::atomicLoad(Addr, Ordering);
|
||||
}
|
||||
|
||||
void atomic::store(uint32_t *Addr, uint32_t V, int Ordering) {
|
||||
impl::atomicStore(Addr, V, Ordering);
|
||||
}
|
||||
@@ -336,6 +344,17 @@ uint64_t atomic::add(uint64_t *Addr, uint64_t V, int Ordering) {
|
||||
return impl::atomicAdd(Addr, V, Ordering);
|
||||
}
|
||||
|
||||
void mutex::TicketLock::lock() {
|
||||
uint64_t MyTicket = atomic::add(&NextTicket, 1U, __ATOMIC_SEQ_CST);
|
||||
|
||||
while (atomic::load(&NowServing, __ATOMIC_SEQ_CST) != MyTicket)
|
||||
;
|
||||
}
|
||||
|
||||
void mutex::TicketLock::unlock() {
|
||||
atomic::add(&NowServing, 1U, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
void __kmpc_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
|
||||
|
||||
|
||||
@@ -968,6 +968,82 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize heap buffer
|
||||
{
|
||||
const char *BufferVarName = "omptarget_device_heap_buffer";
|
||||
const char *SizeVarName = "omptarget_device_heap_size";
|
||||
CUdeviceptr BufferVarPtr;
|
||||
CUdeviceptr SizeVarPtr;
|
||||
size_t BufferVarSize;
|
||||
size_t SizeVarSize;
|
||||
|
||||
Err = cuModuleGetGlobal(&BufferVarPtr, &BufferVarSize, Module,
|
||||
BufferVarName);
|
||||
if (Err == CUDA_SUCCESS) {
|
||||
if (BufferVarSize != sizeof(uint64_t)) {
|
||||
REPORT("Global global heap buffer pointer '%s' - size mismatch (%zu "
|
||||
"!= %zu)\n",
|
||||
BufferVarName, BufferVarSize, sizeof(uint64_t));
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Err = cuModuleGetGlobal(&SizeVarPtr, &SizeVarSize, Module, SizeVarName);
|
||||
if (Err == CUDA_SUCCESS) {
|
||||
if (SizeVarSize != sizeof(uint64_t)) {
|
||||
REPORT("Global global heap size variable '%s' - size mismatch (%zu "
|
||||
"!= %zu)\n",
|
||||
SizeVarName, SizeVarSize, sizeof(uint64_t));
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CUdeviceptr BufferPtr;
|
||||
size_t HeapSize = 1024U * 1024 * 1024 * 2;
|
||||
|
||||
Err = cuMemAlloc(&BufferPtr, HeapSize);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when allocating heap bufferm size = %zu\n", HeapSize);
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Err = cuMemcpyHtoD(BufferVarPtr, &BufferPtr, BufferVarSize);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when copying data from host to device. Pointers: "
|
||||
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||
DPxPTR(&BufferPtr), DPxPTR(BufferVarPtr), BufferVarSize);
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Err = cuMemcpyHtoD(SizeVarPtr, &HeapSize, SizeVarSize);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when copying data from host to device. Pointers: "
|
||||
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
|
||||
DPxPTR(&HeapSize), DPxPTR(SizeVarPtr), SizeVarSize);
|
||||
CUDA_ERR_STRING(Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DP("Successfully set heap buffer. omptarget_device_heap_buffer "
|
||||
"= " DPxMOD ", omptarget_device_heap_size = %zu\n",
|
||||
DPxPTR(BufferPtr), HeapSize);
|
||||
} else {
|
||||
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||
SizeVarName);
|
||||
DP("Continue, considering this is an image does not require heap "
|
||||
"allocation.\n");
|
||||
}
|
||||
|
||||
} else {
|
||||
DP("Finding global heap buffer pointer '%s' - symbol missing.\n",
|
||||
BufferVarName);
|
||||
DP("Continue, considering this is an image does not require heap "
|
||||
"allocation.\n");
|
||||
}
|
||||
}
|
||||
|
||||
return getOffloadEntriesTable(DeviceId);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user