CUDA host device code with two code paths
Summary:
Allow CUDA host device functions with two code paths using __CUDA_ARCH__
to differentiate between code path being compiled.
For example:
__host__ __device__ void host_device_function(void) {
#ifdef __CUDA_ARCH__
device_only_function();
#else
host_only_function();
#endif
}
Patch by Jacques Pienaar.
Reviewed By: rnk
Differential Revision: http://reviews.llvm.org/D6457
llvm-svn: 223271
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
#include "clang/Sema/Sema.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/AST/Decl.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "clang/Sema/SemaDiagnostic.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
@@ -72,21 +73,29 @@ bool Sema::CheckCUDATarget(CUDAFunctionTarget CallerTarget,
|
||||
if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
|
||||
return true;
|
||||
|
||||
// CUDA B.1.1 "The __device__ qualifier declares a function that is...
|
||||
// CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
|
||||
// Callable from the device only."
|
||||
if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
|
||||
return true;
|
||||
|
||||
// CUDA B.1.2 "The __global__ qualifier declares a function that is...
|
||||
// CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
|
||||
// Callable from the host only."
|
||||
// CUDA B.1.3 "The __host__ qualifier declares a function that is...
|
||||
// CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
|
||||
// Callable from the host only."
|
||||
if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
|
||||
(CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
|
||||
return true;
|
||||
|
||||
if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice)
|
||||
return true;
|
||||
// CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
|
||||
// however, in which case the function is compiled for both the host and the
|
||||
// device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
|
||||
// paths between host and device."
|
||||
bool InDeviceMode = getLangOpts().CUDAIsDevice;
|
||||
if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
|
||||
if ((InDeviceMode && CalleeTarget != CFT_Device) ||
|
||||
(!InDeviceMode && CalleeTarget != CFT_Host))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user