Compare commits
25 Commits
ensembler
...
llvmorg-7.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69794107d9 | ||
|
|
6d8abb0718 | ||
|
|
91764583f2 | ||
|
|
8c6b6d1141 | ||
|
|
ceaf95f93d | ||
|
|
23798fa3ae | ||
|
|
e6324b725a | ||
|
|
1cca79b00b | ||
|
|
ce211412ad | ||
|
|
1d910ad0f5 | ||
|
|
f59f1ca9b0 | ||
|
|
3ab9eb5378 | ||
|
|
5006581f1e | ||
|
|
ceb5474679 | ||
|
|
5e3af0e9e3 | ||
|
|
ad32392b8c | ||
|
|
dd449c2432 | ||
|
|
63740db57a | ||
|
|
41c19c9620 | ||
|
|
d32543e590 | ||
|
|
f26bd8777b | ||
|
|
73825c44f4 | ||
|
|
d81a23816a | ||
|
|
d0e85c99da | ||
|
|
67cf759ac3 |
@@ -1,5 +0,0 @@
|
||||
# Low Level Virtual Machine (LLVM)
|
||||
|
||||
This directory and its subdirectories contain source code for LLVM,
|
||||
a toolkit for the construction of highly optimized compilers,
|
||||
optimizers, and runtime environments.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -36,7 +36,7 @@ Treat source input files as Objective-C inputs
|
||||
|
||||
Treat source input files as Objective-C++ inputs
|
||||
|
||||
.. option:: -Qn
|
||||
.. option:: -Qn, -fno-ident
|
||||
|
||||
Do not emit metadata containing compiler name and version
|
||||
|
||||
@@ -44,7 +44,7 @@ Do not emit metadata containing compiler name and version
|
||||
|
||||
Don't emit warning for unused driver arguments
|
||||
|
||||
.. option:: -Qy
|
||||
.. option:: -Qy, -fident
|
||||
|
||||
Emit metadata containing compiler name and version
|
||||
|
||||
@@ -214,9 +214,13 @@ Flush denormal floating point values to zero in CUDA device mode.
|
||||
|
||||
Generate relocatable device code, also known as separate compilation mode.
|
||||
|
||||
.. option:: -fcuda-short-ptr, -fno-cuda-short-ptr
|
||||
|
||||
Use 32-bit pointers for accessing const/local/shared address spaces.
|
||||
|
||||
.. option:: -ffixed-r19
|
||||
|
||||
Reserve the r19 register (Hexagon only)
|
||||
Reserve register r19 (Hexagon only)
|
||||
|
||||
.. option:: -fheinous-gnu-extensions
|
||||
|
||||
@@ -260,6 +264,10 @@ Display available options
|
||||
|
||||
Display help for hidden options
|
||||
|
||||
.. option:: --hip-link
|
||||
|
||||
Link clang-offload-bundler bundles for HIP
|
||||
|
||||
.. option:: -image\_base <arg>
|
||||
|
||||
.. option:: -index-header-map
|
||||
@@ -712,6 +720,12 @@ Attempt to match the ABI of Clang <version>
|
||||
|
||||
Treat each comma separated argument in <arg> as a documentation comment block command
|
||||
|
||||
.. option:: -fcomplete-member-pointers, -fno-complete-member-pointers
|
||||
|
||||
Require member pointer base types to be complete if they would be significant under the Microsoft ABI
|
||||
|
||||
.. option:: -fcrash-diagnostics-dir=<arg>
|
||||
|
||||
.. option:: -fdeclspec, -fno-declspec
|
||||
|
||||
Allow \_\_declspec as a keyword
|
||||
@@ -746,7 +760,7 @@ Enables an experimental new pass manager in LLVM.
|
||||
|
||||
.. option:: -ffine-grained-bitfield-accesses, -fno-fine-grained-bitfield-accesses
|
||||
|
||||
Use separate accesses for bitfields with legal widths and alignments.
|
||||
Use separate accesses for consecutive bitfield runs with legal widths and alignments.
|
||||
|
||||
.. option:: -finline-functions, -fno-inline-functions
|
||||
|
||||
@@ -854,6 +868,10 @@ Strip (or keep only, if negative) a given number of path components when emittin
|
||||
|
||||
Turn on runtime checks for various forms of undefined or suspicious behavior. See user manual for available checks
|
||||
|
||||
.. option:: -moutline, -mno-outline
|
||||
|
||||
Enable function outlining (AArch64 only)
|
||||
|
||||
.. option:: --param <arg>, --param=<arg>
|
||||
|
||||
.. option:: -std=<arg>, --std=<arg>, --std <arg>
|
||||
@@ -1151,6 +1169,10 @@ Target-independent compilation options
|
||||
|
||||
.. option:: -faccess-control, -fno-access-control
|
||||
|
||||
.. option:: -faddrsig, -fno-addrsig
|
||||
|
||||
Emit an address-significance table
|
||||
|
||||
.. option:: -falign-functions, -fno-align-functions
|
||||
|
||||
.. program:: clang1
|
||||
@@ -1229,6 +1251,10 @@ Load the clang builtins module map file.
|
||||
|
||||
Instrument control-flow architecture protection. Options: return, branch, full, none.
|
||||
|
||||
.. option:: -fchar8\_t, -fno-char8\_t
|
||||
|
||||
Enable C++ builtin type char8\_t
|
||||
|
||||
.. option:: -fclasspath=<arg>, --CLASSPATH <arg>, --CLASSPATH=<arg>, --classpath <arg>, --classpath=<arg>
|
||||
|
||||
.. option:: -fcolor-diagnostics, -fno-color-diagnostics
|
||||
@@ -1293,6 +1319,10 @@ Place debug types in their own section (ELF Only)
|
||||
|
||||
Parse templated function definitions at the end of the translation unit
|
||||
|
||||
.. option:: -fdelete-null-pointer-checks, -fno-delete-null-pointer-checks
|
||||
|
||||
Treat usage of null pointers as undefined behavior.
|
||||
|
||||
.. option:: -fdenormal-fp-math=<arg>
|
||||
|
||||
.. option:: -fdiagnostics-absolute-paths
|
||||
@@ -1325,6 +1355,10 @@ Print option name with mappable diagnostics
|
||||
|
||||
Print a template comparison tree for differing templates
|
||||
|
||||
.. option:: -fdigraphs, -fno-digraphs
|
||||
|
||||
Enable alternative token representations '<:', ':>', '<%', '%>', '%:', '%:%:' (default)
|
||||
|
||||
.. option:: -fdollars-in-identifiers, -fno-dollars-in-identifiers
|
||||
|
||||
Allow '$' in identifiers
|
||||
@@ -1375,8 +1409,16 @@ Allow aggressive, lossy floating-point optimizations
|
||||
|
||||
.. option:: -ffinite-math-only, -fno-finite-math-only
|
||||
|
||||
.. option:: -ffixed-point, -fno-fixed-point
|
||||
|
||||
Enable fixed point types
|
||||
|
||||
.. option:: -ffor-scope, -fno-for-scope
|
||||
|
||||
.. option:: -fforce-emit-vtables, -fno-force-emit-vtables
|
||||
|
||||
Emits more virtual tables to improve devirtualization
|
||||
|
||||
.. option:: -fforce-enable-int128, -fno-force-enable-int128
|
||||
|
||||
Enable support for int128\_t type
|
||||
@@ -1543,14 +1585,6 @@ Specifies the largest alignment guaranteed by '::operator new(size\_t)'
|
||||
|
||||
Disable implicit builtin knowledge of a specific function
|
||||
|
||||
.. option:: -fdelete-null-pointer-checks, -fno-delete-null-pointer-checks
|
||||
|
||||
When enabled, treat null pointer dereference, creation of a reference to null,
|
||||
or passing a null pointer to a function parameter annotated with the "nonnull"
|
||||
attribute as undefined behavior. (And, thus the optimizer may assume that any
|
||||
pointer used in such a way must not have been null and optimize away the
|
||||
branches accordingly.) On by default.
|
||||
|
||||
.. option:: -fno-elide-type
|
||||
|
||||
Do not elide types when printing diagnostics
|
||||
@@ -1834,6 +1868,10 @@ Emit full debug info for all types used by the program
|
||||
|
||||
Enable optimizations based on the strict definition of an enum's value range
|
||||
|
||||
.. option:: -fstrict-float-cast-overflow, -fno-strict-float-cast-overflow
|
||||
|
||||
Assume that overflowing float-to-int casts are undefined (default)
|
||||
|
||||
.. option:: -fstrict-overflow, -fno-strict-overflow
|
||||
|
||||
.. option:: -fstrict-return, -fno-strict-return
|
||||
@@ -1942,12 +1980,6 @@ Set the default symbol visibility for all global declarations
|
||||
|
||||
Enables whole-program vtable optimization. Requires -flto
|
||||
|
||||
.. option:: -fforce-emit-vtables, -fno-force-emit-vtables
|
||||
|
||||
In order to improve devirtualization, forces emitting of vtables even in
|
||||
modules where it isn't necessary. It causes more inline virtual functions
|
||||
to be emitted.
|
||||
|
||||
.. option:: -fwrapv, -fno-wrapv
|
||||
|
||||
Treat signed integer overflow as two's complement
|
||||
@@ -2078,12 +2110,6 @@ Put objects of at most <size> bytes into small data section (MIPS / Hexagon)
|
||||
|
||||
.. option:: -mabi=<arg>
|
||||
|
||||
.. option:: -mabicalls, -mno-abicalls
|
||||
|
||||
Enable SVR4-style position-independent code (Mips only)
|
||||
|
||||
.. option:: -mabs=<arg>
|
||||
|
||||
.. option:: -malign-double
|
||||
|
||||
Align doubles to two words in structs (x86 only)
|
||||
@@ -2096,54 +2122,32 @@ Align doubles to two words in structs (x86 only)
|
||||
|
||||
Link stack frames through backchain on System Z
|
||||
|
||||
.. option:: -mcheck-zero-division, -mno-check-zero-division
|
||||
|
||||
.. option:: -mcmodel=<arg>
|
||||
|
||||
.. option:: -mcompact-branches=<arg>
|
||||
|
||||
.. option:: -mconsole<arg>
|
||||
|
||||
.. option:: -mcpu=<arg>, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65)
|
||||
|
||||
.. option:: -mcrc, -mno-crc
|
||||
|
||||
Allow use of CRC instructions (ARM/Mips only)
|
||||
|
||||
.. option:: -mdefault-build-attributes<arg>, -mno-default-build-attributes<arg>
|
||||
|
||||
.. option:: -mdll<arg>
|
||||
|
||||
.. option:: -mdouble-float
|
||||
|
||||
.. option:: -mdsp, -mno-dsp
|
||||
|
||||
.. option:: -mdspr2, -mno-dspr2
|
||||
|
||||
.. option:: -mdynamic-no-pic<arg>
|
||||
|
||||
.. option:: -meabi <arg>
|
||||
|
||||
Set EABI type, e.g. 4, 5 or gnu (default depends on triple)
|
||||
|
||||
.. option:: -membedded-data, -mno-embedded-data
|
||||
|
||||
Place constants in the .rodata section instead of the .sdata section even if they meet the -G <size> threshold (MIPS)
|
||||
|
||||
.. option:: -mextern-sdata, -mno-extern-sdata
|
||||
|
||||
Assume that externally defined data is in the small data if it meets the -G <size> threshold (MIPS)
|
||||
|
||||
.. option:: -mfentry
|
||||
|
||||
Insert calls to fentry at function entry (x86 only)
|
||||
|
||||
.. option:: -mfloat-abi=<arg>
|
||||
|
||||
.. option:: -mfp32
|
||||
|
||||
Use 32-bit floating point registers (MIPS only)
|
||||
|
||||
.. option:: -mfp64
|
||||
|
||||
Use 64-bit floating point registers (MIPS only)
|
||||
|
||||
.. option:: -mfpmath=<arg>
|
||||
|
||||
.. option:: -mfpu=<arg>
|
||||
@@ -2152,10 +2156,6 @@ Use 64-bit floating point registers (MIPS only)
|
||||
|
||||
Enable merging of globals
|
||||
|
||||
.. option:: -mgpopt, -mno-gpopt
|
||||
|
||||
Use GP relative accesses for symbols known to be in a small data section (MIPS)
|
||||
|
||||
.. option:: -mhard-float
|
||||
|
||||
.. option:: -mhwdiv=<arg>, --mhwdiv <arg>, --mhwdiv=<arg>
|
||||
@@ -2172,22 +2172,10 @@ Use Intel MCU ABI
|
||||
|
||||
(integrated-as) Emit an object file which can be used with an incremental linker
|
||||
|
||||
.. option:: -mindirect-jump=<arg>
|
||||
|
||||
Change indirect jump instructions to inhibit speculation
|
||||
|
||||
.. option:: -miphoneos-version-min=<arg>, -mios-version-min=<arg>
|
||||
|
||||
.. option:: -mips16
|
||||
|
||||
.. option:: -mkernel
|
||||
|
||||
.. option:: -mldc1-sdc1, -mno-ldc1-sdc1
|
||||
|
||||
.. option:: -mlocal-sdata, -mno-local-sdata
|
||||
|
||||
Extend the -G behaviour to object local data (MIPS)
|
||||
|
||||
.. option:: -mlong-calls, -mno-long-calls
|
||||
|
||||
Generate branches with extended addressability, usually via indirect jumps.
|
||||
@@ -2196,30 +2184,12 @@ Generate branches with extended addressability, usually via indirect jumps.
|
||||
|
||||
Set Mac OS X deployment target
|
||||
|
||||
.. option:: -mmadd4, -mno-madd4
|
||||
|
||||
Enable the generation of 4-operand madd.s, madd.d and related instructions.
|
||||
|
||||
.. option:: -mmcu=<arg>
|
||||
|
||||
.. option:: -mmicromips, -mno-micromips
|
||||
|
||||
.. option:: -mms-bitfields, -mno-ms-bitfields
|
||||
|
||||
Set the default structure layout to be compatible with the Microsoft compiler standard
|
||||
|
||||
.. option:: -mmsa, -mno-msa
|
||||
|
||||
Enable MSA ASE (MIPS only)
|
||||
|
||||
.. option:: -mmt, -mno-mt
|
||||
|
||||
Enable MT ASE (MIPS only)
|
||||
|
||||
.. option:: -mnan=<arg>
|
||||
|
||||
.. option:: -mno-mips16
|
||||
|
||||
.. option:: -momit-leaf-frame-pointer, -mno-omit-leaf-frame-pointer
|
||||
|
||||
Omit frame pointer setup for leaf functions
|
||||
@@ -2256,8 +2226,6 @@ Enable hexagon-qdsp6 backward compatibility
|
||||
|
||||
Make StdCall calling convention the default
|
||||
|
||||
.. option:: -msingle-float
|
||||
|
||||
.. option:: -msoft-float, -mno-soft-float
|
||||
|
||||
Use software floating point
|
||||
@@ -2302,8 +2270,6 @@ The thread model to use, e.g. posix, single (posix by default)
|
||||
|
||||
.. option:: -mx32
|
||||
|
||||
.. option:: -mxgot, -mno-xgot
|
||||
|
||||
AARCH64
|
||||
-------
|
||||
.. option:: -ffixed-x18
|
||||
@@ -2334,10 +2300,6 @@ ARM
|
||||
|
||||
Reserve the r9 register (ARM only)
|
||||
|
||||
.. option:: -mcrc
|
||||
|
||||
Allow use of CRC instructions (ARM only)
|
||||
|
||||
.. option:: -mexecute-only, -mno-execute-only, -mpure-code
|
||||
|
||||
Disallow generation of data access to code sections (ARM only)
|
||||
@@ -2370,6 +2332,18 @@ Hexagon
|
||||
-------
|
||||
.. option:: -mieee-rnd-near
|
||||
|
||||
.. option:: -mmemops, -mno-memops
|
||||
|
||||
Enable generation of memop instructions
|
||||
|
||||
.. option:: -mnvj, -mno-nvj
|
||||
|
||||
Enable generation of new-value jumps
|
||||
|
||||
.. option:: -mnvs, -mno-nvs
|
||||
|
||||
Enable generation of new-value stores
|
||||
|
||||
.. option:: -mpackets, -mno-packets
|
||||
|
||||
Enable generation of instruction packets
|
||||
@@ -2390,6 +2364,82 @@ Set Hexagon Vector Length
|
||||
|
||||
Enable Hexagon Vector eXtensions
|
||||
|
||||
MIPS
|
||||
----
|
||||
.. option:: -mabicalls, -mno-abicalls
|
||||
|
||||
Enable SVR4-style position-independent code (Mips only)
|
||||
|
||||
.. option:: -mabs=<arg>
|
||||
|
||||
.. option:: -mcheck-zero-division, -mno-check-zero-division
|
||||
|
||||
.. option:: -mcompact-branches=<arg>
|
||||
|
||||
.. option:: -mdouble-float
|
||||
|
||||
.. option:: -mdsp, -mno-dsp
|
||||
|
||||
.. option:: -mdspr2, -mno-dspr2
|
||||
|
||||
.. option:: -membedded-data, -mno-embedded-data
|
||||
|
||||
Place constants in the .rodata section instead of the .sdata section even if they meet the -G <size> threshold (MIPS)
|
||||
|
||||
.. option:: -mextern-sdata, -mno-extern-sdata
|
||||
|
||||
Assume that externally defined data is in the small data if it meets the -G <size> threshold (MIPS)
|
||||
|
||||
.. option:: -mfp32
|
||||
|
||||
Use 32-bit floating point registers (MIPS only)
|
||||
|
||||
.. option:: -mfp64
|
||||
|
||||
Use 64-bit floating point registers (MIPS only)
|
||||
|
||||
.. option:: -mginv, -mno-ginv
|
||||
|
||||
.. option:: -mgpopt, -mno-gpopt
|
||||
|
||||
Use GP relative accesses for symbols known to be in a small data section (MIPS)
|
||||
|
||||
.. option:: -mindirect-jump=<arg>
|
||||
|
||||
Change indirect jump instructions to inhibit speculation
|
||||
|
||||
.. option:: -mips16
|
||||
|
||||
.. option:: -mldc1-sdc1, -mno-ldc1-sdc1
|
||||
|
||||
.. option:: -mlocal-sdata, -mno-local-sdata
|
||||
|
||||
Extend the -G behaviour to object local data (MIPS)
|
||||
|
||||
.. option:: -mmadd4, -mno-madd4
|
||||
|
||||
Enable the generation of 4-operand madd.s, madd.d and related instructions.
|
||||
|
||||
.. option:: -mmicromips, -mno-micromips
|
||||
|
||||
.. option:: -mmsa, -mno-msa
|
||||
|
||||
Enable MSA ASE (MIPS only)
|
||||
|
||||
.. option:: -mmt, -mno-mt
|
||||
|
||||
Enable MT ASE (MIPS only)
|
||||
|
||||
.. option:: -mnan=<arg>
|
||||
|
||||
.. option:: -mno-mips16
|
||||
|
||||
.. option:: -msingle-float
|
||||
|
||||
.. option:: -mvirt, -mno-virt
|
||||
|
||||
.. option:: -mxgot, -mno-xgot
|
||||
|
||||
PowerPC
|
||||
-------
|
||||
.. option:: -maltivec, -mno-altivec
|
||||
@@ -2504,6 +2554,8 @@ X86
|
||||
|
||||
.. option:: -mgfni, -mno-gfni
|
||||
|
||||
.. option:: -minvpcid, -mno-invpcid
|
||||
|
||||
.. option:: -mlwp, -mno-lwp
|
||||
|
||||
.. option:: -mlzcnt, -mno-lzcnt
|
||||
@@ -2512,16 +2564,18 @@ X86
|
||||
|
||||
.. option:: -mmovbe, -mno-movbe
|
||||
|
||||
.. option:: -mmovdiri, -mno-movdiri
|
||||
|
||||
.. option:: -mmovdir64b, -mno-movdir64b
|
||||
|
||||
.. option:: -mmovdiri, -mno-movdiri
|
||||
|
||||
.. option:: -mmpx, -mno-mpx
|
||||
|
||||
.. option:: -mmwaitx, -mno-mwaitx
|
||||
|
||||
.. option:: -mpclmul, -mno-pclmul
|
||||
|
||||
.. option:: -mpconfig, -mno-pconfig
|
||||
|
||||
.. option:: -mpku, -mno-pku
|
||||
|
||||
.. option:: -mpopcnt, -mno-popcnt
|
||||
@@ -2530,6 +2584,8 @@ X86
|
||||
|
||||
.. option:: -mprfchw, -mno-prfchw
|
||||
|
||||
.. option:: -mptwrite, -mno-ptwrite
|
||||
|
||||
.. option:: -mrdpid, -mno-rdpid
|
||||
|
||||
.. option:: -mrdrnd, -mno-rdrnd
|
||||
@@ -2588,6 +2644,12 @@ X86
|
||||
|
||||
.. option:: -mxsaves, -mno-xsaves
|
||||
|
||||
RISCV
|
||||
-----
|
||||
.. option:: -mrelax, -mno-relax
|
||||
|
||||
Enable linker relaxation
|
||||
|
||||
Optimization level
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -2671,7 +2733,7 @@ Debug information flags
|
||||
|
||||
Embed source text in DWARF debug sections
|
||||
|
||||
.. option:: -ggnu-pubnames
|
||||
.. option:: -ggnu-pubnames, -gno-gnu-pubnames
|
||||
|
||||
.. option:: -grecord-gcc-switches, -gno-record-gcc-switches
|
||||
|
||||
@@ -2886,6 +2948,14 @@ Pass <arg> to the linker
|
||||
|
||||
.. option:: -filelist <arg>
|
||||
|
||||
.. option:: --hip-device-lib-path=<arg>
|
||||
|
||||
HIP device library path
|
||||
|
||||
.. option:: --hip-device-lib=<arg>
|
||||
|
||||
HIP device library
|
||||
|
||||
.. option:: -l<arg>
|
||||
|
||||
.. option:: -r
|
||||
|
||||
@@ -471,8 +471,6 @@ BUILTIN(__builtin_wcslen, "zwC*", "nF")
|
||||
BUILTIN(__builtin_wcsncmp, "iwC*wC*z", "nF")
|
||||
BUILTIN(__builtin_wmemchr, "w*wC*wz", "nF")
|
||||
BUILTIN(__builtin_wmemcmp, "iwC*wC*z", "nF")
|
||||
BUILTIN(__builtin_wmemcpy, "w*w*wC*z", "nF")
|
||||
BUILTIN(__builtin_wmemmove, "w*w*wC*z", "nF")
|
||||
BUILTIN(__builtin_return_address, "v*IUi", "n")
|
||||
BUILTIN(__builtin_extract_return_addr, "v*v*", "n")
|
||||
BUILTIN(__builtin_frame_address, "v*IUi", "n")
|
||||
@@ -910,8 +908,6 @@ LIBBUILTIN(wcslen, "zwC*", "f", "wchar.h", ALL_LANGUAGES)
|
||||
LIBBUILTIN(wcsncmp, "iwC*wC*z", "f", "wchar.h", ALL_LANGUAGES)
|
||||
LIBBUILTIN(wmemchr, "w*wC*wz", "f", "wchar.h", ALL_LANGUAGES)
|
||||
LIBBUILTIN(wmemcmp, "iwC*wC*z", "f", "wchar.h", ALL_LANGUAGES)
|
||||
LIBBUILTIN(wmemcpy, "w*w*wC*z", "f", "wchar.h", ALL_LANGUAGES)
|
||||
LIBBUILTIN(wmemmove,"w*w*wC*z", "f", "wchar.h", ALL_LANGUAGES)
|
||||
|
||||
// C99
|
||||
// In some systems setjmp is a macro that expands to _setjmp. We undefine
|
||||
|
||||
@@ -163,20 +163,6 @@ def note_constexpr_unsupported_unsized_array : Note<
|
||||
def note_constexpr_unsized_array_indexed : Note<
|
||||
"indexing of array without known bound is not allowed "
|
||||
"in a constant expression">;
|
||||
def note_constexpr_memcpy_type_pun : Note<
|
||||
"cannot constant evaluate '%select{memcpy|memmove}0' from object of "
|
||||
"type %1 to object of type %2">;
|
||||
def note_constexpr_memcpy_nontrivial : Note<
|
||||
"cannot constant evaluate '%select{memcpy|memmove}0' between objects of "
|
||||
"non-trivially-copyable type %1">;
|
||||
def note_constexpr_memcpy_overlap : Note<
|
||||
"'%select{memcpy|wmemcpy}0' between overlapping memory regions">;
|
||||
def note_constexpr_memcpy_unsupported : Note<
|
||||
"'%select{%select{memcpy|wmemcpy}1|%select{memmove|wmemmove}1}0' "
|
||||
"not supported: %select{"
|
||||
"size to copy (%4) is not a multiple of size of element type %3 (%5)|"
|
||||
"source is not a contiguous array of at least %4 elements of type %3|"
|
||||
"destination is not a contiguous array of at least %4 elements of type %3}2">;
|
||||
|
||||
def warn_integer_constant_overflow : Warning<
|
||||
"overflow in expression; result is %0 with type %1">,
|
||||
|
||||
@@ -319,25 +319,6 @@ namespace {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Get the range of valid index adjustments in the form
|
||||
/// {maximum value that can be subtracted from this pointer,
|
||||
/// maximum value that can be added to this pointer}
|
||||
std::pair<uint64_t, uint64_t> validIndexAdjustments() {
|
||||
if (Invalid || isMostDerivedAnUnsizedArray())
|
||||
return {0, 0};
|
||||
|
||||
// [expr.add]p4: For the purposes of these operators, a pointer to a
|
||||
// nonarray object behaves the same as a pointer to the first element of
|
||||
// an array of length one with the type of the object as its element type.
|
||||
bool IsArray = MostDerivedPathLength == Entries.size() &&
|
||||
MostDerivedIsArrayElement;
|
||||
uint64_t ArrayIndex =
|
||||
IsArray ? Entries.back().ArrayIndex : (uint64_t)IsOnePastTheEnd;
|
||||
uint64_t ArraySize =
|
||||
IsArray ? getMostDerivedArraySize() : (uint64_t)1;
|
||||
return {ArrayIndex, ArraySize - ArrayIndex};
|
||||
}
|
||||
|
||||
/// Check that this refers to a valid subobject.
|
||||
bool isValidSubobject() const {
|
||||
if (Invalid)
|
||||
@@ -348,13 +329,6 @@ namespace {
|
||||
/// relevant diagnostic and set the designator as invalid.
|
||||
bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK);
|
||||
|
||||
/// Get the type of the designated object.
|
||||
QualType getType(ASTContext &Ctx) const {
|
||||
return MostDerivedPathLength == Entries.size()
|
||||
? MostDerivedType
|
||||
: Ctx.getRecordType(getAsBaseClass(Entries.back()));
|
||||
}
|
||||
|
||||
/// Update this designator to refer to the first element within this array.
|
||||
void addArrayUnchecked(const ConstantArrayType *CAT) {
|
||||
PathEntry Entry;
|
||||
@@ -1732,54 +1706,6 @@ static bool IsGlobalLValue(APValue::LValueBase B) {
|
||||
}
|
||||
}
|
||||
|
||||
static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
|
||||
return LVal.Base.dyn_cast<const ValueDecl*>();
|
||||
}
|
||||
|
||||
static bool IsLiteralLValue(const LValue &Value) {
|
||||
if (Value.getLValueCallIndex())
|
||||
return false;
|
||||
const Expr *E = Value.Base.dyn_cast<const Expr*>();
|
||||
return E && !isa<MaterializeTemporaryExpr>(E);
|
||||
}
|
||||
|
||||
static bool IsWeakLValue(const LValue &Value) {
|
||||
const ValueDecl *Decl = GetLValueBaseDecl(Value);
|
||||
return Decl && Decl->isWeak();
|
||||
}
|
||||
|
||||
static bool isZeroSized(const LValue &Value) {
|
||||
const ValueDecl *Decl = GetLValueBaseDecl(Value);
|
||||
if (Decl && isa<VarDecl>(Decl)) {
|
||||
QualType Ty = Decl->getType();
|
||||
if (Ty->isArrayType())
|
||||
return Ty->isIncompleteType() ||
|
||||
Decl->getASTContext().getTypeSize(Ty) == 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool HasSameBase(const LValue &A, const LValue &B) {
|
||||
if (!A.getLValueBase())
|
||||
return !B.getLValueBase();
|
||||
if (!B.getLValueBase())
|
||||
return false;
|
||||
|
||||
if (A.getLValueBase().getOpaqueValue() !=
|
||||
B.getLValueBase().getOpaqueValue()) {
|
||||
const Decl *ADecl = GetLValueBaseDecl(A);
|
||||
if (!ADecl)
|
||||
return false;
|
||||
const Decl *BDecl = GetLValueBaseDecl(B);
|
||||
if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl())
|
||||
return false;
|
||||
}
|
||||
|
||||
return IsGlobalLValue(A.getLValueBase()) ||
|
||||
(A.getLValueCallIndex() == B.getLValueCallIndex() &&
|
||||
A.getLValueVersion() == B.getLValueVersion());
|
||||
}
|
||||
|
||||
static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) {
|
||||
assert(Base && "no location for a null lvalue");
|
||||
const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>();
|
||||
@@ -1991,6 +1917,33 @@ CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type,
|
||||
return true;
|
||||
}
|
||||
|
||||
static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
|
||||
return LVal.Base.dyn_cast<const ValueDecl*>();
|
||||
}
|
||||
|
||||
static bool IsLiteralLValue(const LValue &Value) {
|
||||
if (Value.getLValueCallIndex())
|
||||
return false;
|
||||
const Expr *E = Value.Base.dyn_cast<const Expr*>();
|
||||
return E && !isa<MaterializeTemporaryExpr>(E);
|
||||
}
|
||||
|
||||
static bool IsWeakLValue(const LValue &Value) {
|
||||
const ValueDecl *Decl = GetLValueBaseDecl(Value);
|
||||
return Decl && Decl->isWeak();
|
||||
}
|
||||
|
||||
static bool isZeroSized(const LValue &Value) {
|
||||
const ValueDecl *Decl = GetLValueBaseDecl(Value);
|
||||
if (Decl && isa<VarDecl>(Decl)) {
|
||||
QualType Ty = Decl->getType();
|
||||
if (Ty->isArrayType())
|
||||
return Ty->isIncompleteType() ||
|
||||
Decl->getASTContext().getTypeSize(Ty) == 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool EvalPointerValueAsBool(const APValue &Value, bool &Result) {
|
||||
// A null base expression indicates a null pointer. These are always
|
||||
// evaluatable, and they are false unless the offset is zero.
|
||||
@@ -6164,130 +6117,6 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
return ZeroInitialization(E);
|
||||
}
|
||||
|
||||
case Builtin::BImemcpy:
|
||||
case Builtin::BImemmove:
|
||||
case Builtin::BIwmemcpy:
|
||||
case Builtin::BIwmemmove:
|
||||
if (Info.getLangOpts().CPlusPlus11)
|
||||
Info.CCEDiag(E, diag::note_constexpr_invalid_function)
|
||||
<< /*isConstexpr*/0 << /*isConstructor*/0
|
||||
<< (std::string("'") + Info.Ctx.BuiltinInfo.getName(BuiltinOp) + "'");
|
||||
else
|
||||
Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
|
||||
LLVM_FALLTHROUGH;
|
||||
case Builtin::BI__builtin_memcpy:
|
||||
case Builtin::BI__builtin_memmove:
|
||||
case Builtin::BI__builtin_wmemcpy:
|
||||
case Builtin::BI__builtin_wmemmove: {
|
||||
bool WChar = BuiltinOp == Builtin::BIwmemcpy ||
|
||||
BuiltinOp == Builtin::BIwmemmove ||
|
||||
BuiltinOp == Builtin::BI__builtin_wmemcpy ||
|
||||
BuiltinOp == Builtin::BI__builtin_wmemmove;
|
||||
bool Move = BuiltinOp == Builtin::BImemmove ||
|
||||
BuiltinOp == Builtin::BIwmemmove ||
|
||||
BuiltinOp == Builtin::BI__builtin_memmove ||
|
||||
BuiltinOp == Builtin::BI__builtin_wmemmove;
|
||||
|
||||
// The result of mem* is the first argument.
|
||||
if (!Visit(E->getArg(0)))
|
||||
return false;
|
||||
LValue Dest = Result;
|
||||
|
||||
LValue Src;
|
||||
if (!EvaluatePointer(E->getArg(1), Src, Info))
|
||||
return false;
|
||||
|
||||
APSInt N;
|
||||
if (!EvaluateInteger(E->getArg(2), N, Info))
|
||||
return false;
|
||||
assert(!N.isSigned() && "memcpy and friends take an unsigned size");
|
||||
|
||||
// If the size is zero, we treat this as always being a valid no-op.
|
||||
// (Even if one of the src and dest pointers is null.)
|
||||
if (!N)
|
||||
return true;
|
||||
|
||||
// We require that Src and Dest are both pointers to arrays of
|
||||
// trivially-copyable type. (For the wide version, the designator will be
|
||||
// invalid if the designated object is not a wchar_t.)
|
||||
QualType T = Dest.Designator.getType(Info.Ctx);
|
||||
QualType SrcT = Src.Designator.getType(Info.Ctx);
|
||||
if (!Info.Ctx.hasSameUnqualifiedType(T, SrcT)) {
|
||||
Info.FFDiag(E, diag::note_constexpr_memcpy_type_pun) << Move << SrcT << T;
|
||||
return false;
|
||||
}
|
||||
if (!T.isTriviallyCopyableType(Info.Ctx)) {
|
||||
Info.FFDiag(E, diag::note_constexpr_memcpy_nontrivial) << Move << T;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Figure out how many T's we're copying.
|
||||
uint64_t TSize = Info.Ctx.getTypeSizeInChars(T).getQuantity();
|
||||
if (!WChar) {
|
||||
uint64_t Remainder;
|
||||
llvm::APInt OrigN = N;
|
||||
llvm::APInt::udivrem(OrigN, TSize, N, Remainder);
|
||||
if (Remainder) {
|
||||
Info.FFDiag(E, diag::note_constexpr_memcpy_unsupported)
|
||||
<< Move << WChar << 0 << T << OrigN.toString(10, /*Signed*/false)
|
||||
<< (unsigned)TSize;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the copying will remain within the arrays, just so that we
|
||||
// can give a more meaningful diagnostic. This implicitly also checks that
|
||||
// N fits into 64 bits.
|
||||
uint64_t RemainingSrcSize = Src.Designator.validIndexAdjustments().second;
|
||||
uint64_t RemainingDestSize = Dest.Designator.validIndexAdjustments().second;
|
||||
if (N.ugt(RemainingSrcSize) || N.ugt(RemainingDestSize)) {
|
||||
Info.FFDiag(E, diag::note_constexpr_memcpy_unsupported)
|
||||
<< Move << WChar << (N.ugt(RemainingSrcSize) ? 1 : 2) << T
|
||||
<< N.toString(10, /*Signed*/false);
|
||||
return false;
|
||||
}
|
||||
uint64_t NElems = N.getZExtValue();
|
||||
uint64_t NBytes = NElems * TSize;
|
||||
|
||||
// Check for overlap.
|
||||
int Direction = 1;
|
||||
if (HasSameBase(Src, Dest)) {
|
||||
uint64_t SrcOffset = Src.getLValueOffset().getQuantity();
|
||||
uint64_t DestOffset = Dest.getLValueOffset().getQuantity();
|
||||
if (DestOffset >= SrcOffset && DestOffset - SrcOffset < NBytes) {
|
||||
// Dest is inside the source region.
|
||||
if (!Move) {
|
||||
Info.FFDiag(E, diag::note_constexpr_memcpy_overlap) << WChar;
|
||||
return false;
|
||||
}
|
||||
// For memmove and friends, copy backwards.
|
||||
if (!HandleLValueArrayAdjustment(Info, E, Src, T, NElems - 1) ||
|
||||
!HandleLValueArrayAdjustment(Info, E, Dest, T, NElems - 1))
|
||||
return false;
|
||||
Direction = -1;
|
||||
} else if (!Move && SrcOffset >= DestOffset &&
|
||||
SrcOffset - DestOffset < NBytes) {
|
||||
// Src is inside the destination region for memcpy: invalid.
|
||||
Info.FFDiag(E, diag::note_constexpr_memcpy_overlap) << WChar;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
while (true) {
|
||||
APValue Val;
|
||||
if (!handleLValueToRValueConversion(Info, E, T, Src, Val) ||
|
||||
!handleAssignment(Info, E, Dest, T, Val))
|
||||
return false;
|
||||
// Do not iterate past the last element; if we're copying backwards, that
|
||||
// might take us off the start of the array.
|
||||
if (--NElems == 0)
|
||||
return true;
|
||||
if (!HandleLValueArrayAdjustment(Info, E, Src, T, Direction) ||
|
||||
!HandleLValueArrayAdjustment(Info, E, Dest, T, Direction))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return visitNonBuiltinCallExpr(E);
|
||||
}
|
||||
@@ -8528,6 +8357,27 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
|
||||
}
|
||||
}
|
||||
|
||||
static bool HasSameBase(const LValue &A, const LValue &B) {
|
||||
if (!A.getLValueBase())
|
||||
return !B.getLValueBase();
|
||||
if (!B.getLValueBase())
|
||||
return false;
|
||||
|
||||
if (A.getLValueBase().getOpaqueValue() !=
|
||||
B.getLValueBase().getOpaqueValue()) {
|
||||
const Decl *ADecl = GetLValueBaseDecl(A);
|
||||
if (!ADecl)
|
||||
return false;
|
||||
const Decl *BDecl = GetLValueBaseDecl(B);
|
||||
if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl())
|
||||
return false;
|
||||
}
|
||||
|
||||
return IsGlobalLValue(A.getLValueBase()) ||
|
||||
(A.getLValueCallIndex() == B.getLValueCallIndex() &&
|
||||
A.getLValueVersion() == B.getLValueVersion());
|
||||
}
|
||||
|
||||
/// Determine whether this is a pointer past the end of the complete
|
||||
/// object referred to by the lvalue.
|
||||
static bool isOnePastTheEndOfCompleteObject(const ASTContext &Ctx,
|
||||
|
||||
@@ -6371,8 +6371,12 @@ static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) {
|
||||
const TypeSourceInfo *TSI = FD->getTypeSourceInfo();
|
||||
if (!TSI)
|
||||
return false;
|
||||
// Don't declare this variable in the second operand of the for-statement;
|
||||
// GCC miscompiles that by ending its lifetime before evaluating the
|
||||
// third operand. See gcc.gnu.org/PR86769.
|
||||
AttributedTypeLoc ATL;
|
||||
for (TypeLoc TL = TSI->getTypeLoc();
|
||||
auto ATL = TL.getAsAdjusted<AttributedTypeLoc>();
|
||||
(ATL = TL.getAsAdjusted<AttributedTypeLoc>());
|
||||
TL = ATL.getModifiedLoc()) {
|
||||
if (ATL.getAttrKind() == AttributedType::attr_lifetimebound)
|
||||
return true;
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s
|
||||
|
||||
typedef __WCHAR_TYPE__ wchar_t;
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
|
||||
// CHECK: @test1
|
||||
// CHECK: call void @llvm.memset.p0i8.i32
|
||||
// CHECK: call void @llvm.memset.p0i8.i32
|
||||
@@ -86,17 +83,3 @@ void test9() {
|
||||
// CHECK: call void @llvm.memcpy{{.*}} align 16 {{.*}} align 16 {{.*}} 16, i1 false)
|
||||
__builtin_memcpy(x, y, sizeof(y));
|
||||
}
|
||||
|
||||
wchar_t dest;
|
||||
wchar_t src;
|
||||
|
||||
// CHECK-LABEL: @test10
|
||||
// FIXME: Consider lowering these to llvm.memcpy / llvm.memmove.
|
||||
void test10() {
|
||||
// CHECK: call i32* @wmemcpy(i32* @dest, i32* @src, i32 4)
|
||||
__builtin_wmemcpy(&dest, &src, 4);
|
||||
|
||||
// CHECK: call i32* @wmemmove(i32* @dest, i32* @src, i32 4)
|
||||
__builtin_wmemmove(&dest, &src, 4);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Check that SDKROOT does not infer simulator on when it points to a regular
|
||||
// SDK.
|
||||
// REQUIRES: system-darwin
|
||||
// REQUIRES: system-darwin && native
|
||||
//
|
||||
// RUN: rm -rf %t/SDKs/iPhoneOS8.0.0.sdk
|
||||
// RUN: mkdir -p %t/SDKs/iPhoneOS8.0.0.sdk
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
// FIAS: cc1as
|
||||
|
||||
// RUN: %clang -### -fno-integrated-as -S %s 2>&1 \
|
||||
// RUN: %clang -target none -### -fno-integrated-as -S %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix NOFIAS
|
||||
|
||||
// NOFIAS-NOT: cc1as
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++1z -fsyntax-only -verify -pedantic
|
||||
// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++1z -fsyntax-only -verify -pedantic -fno-signed-char
|
||||
// RUN: %clang_cc1 %s -triple x86_64-linux-gnu -std=c++1z -fsyntax-only -verify -pedantic -fno-wchar -Dwchar_t=__WCHAR_TYPE__
|
||||
// RUN: %clang_cc1 %s -std=c++1z -fsyntax-only -verify -pedantic
|
||||
// RUN: %clang_cc1 %s -std=c++1z -fsyntax-only -verify -pedantic -fno-signed-char
|
||||
// RUN: %clang_cc1 %s -std=c++1z -fsyntax-only -verify -pedantic -fno-wchar -Dwchar_t=__WCHAR_TYPE__
|
||||
|
||||
# 6 "/usr/include/string.h" 1 3 4
|
||||
extern "C" {
|
||||
@@ -14,13 +14,10 @@ extern "C" {
|
||||
|
||||
extern char *strchr(const char *s, int c);
|
||||
extern void *memchr(const void *s, int c, size_t n);
|
||||
|
||||
extern void *memcpy(void *d, const void *s, size_t n);
|
||||
extern void *memmove(void *d, const void *s, size_t n);
|
||||
}
|
||||
# 22 "SemaCXX/constexpr-string.cpp" 2
|
||||
# 19 "SemaCXX/constexpr-string.cpp" 2
|
||||
|
||||
# 24 "/usr/include/wchar.h" 1 3 4
|
||||
# 21 "/usr/include/wchar.h" 1 3 4
|
||||
extern "C" {
|
||||
extern size_t wcslen(const wchar_t *p);
|
||||
|
||||
@@ -30,12 +27,9 @@ extern "C" {
|
||||
|
||||
extern wchar_t *wcschr(const wchar_t *s, wchar_t c);
|
||||
extern wchar_t *wmemchr(const wchar_t *s, wchar_t c, size_t n);
|
||||
|
||||
extern wchar_t *wmemcpy(wchar_t *d, const wchar_t *s, size_t n);
|
||||
extern wchar_t *wmemmove(wchar_t *d, const wchar_t *s, size_t n);
|
||||
}
|
||||
|
||||
# 39 "SemaCXX/constexpr-string.cpp" 2
|
||||
# 33 "SemaCXX/constexpr-string.cpp" 2
|
||||
namespace Strlen {
|
||||
constexpr int n = __builtin_strlen("hello"); // ok
|
||||
static_assert(n == 5);
|
||||
@@ -241,133 +235,3 @@ namespace WcschrEtc {
|
||||
constexpr bool a = !wcschr(L"hello", L'h'); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wcschr' cannot be used in a constant expression}}
|
||||
constexpr bool b = !wmemchr(L"hello", L'h', 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'wmemchr' cannot be used in a constant expression}}
|
||||
}
|
||||
|
||||
namespace MemcpyEtc {
|
||||
template<typename T>
|
||||
constexpr T result(T (&arr)[4]) {
|
||||
return arr[0] * 1000 + arr[1] * 100 + arr[2] * 10 + arr[3];
|
||||
}
|
||||
|
||||
constexpr int test_memcpy(int a, int b, int n) {
|
||||
int arr[4] = {1, 2, 3, 4};
|
||||
__builtin_memcpy(arr + a, arr + b, n);
|
||||
// expected-note@-1 2{{overlapping memory regions}}
|
||||
// expected-note@-2 {{size to copy (1) is not a multiple of size of element type 'int'}}
|
||||
// expected-note@-3 {{source is not a contiguous array of at least 2 elements of type 'int'}}
|
||||
// expected-note@-4 {{destination is not a contiguous array of at least 3 elements of type 'int'}}
|
||||
return result(arr);
|
||||
}
|
||||
constexpr int test_memmove(int a, int b, int n) {
|
||||
int arr[4] = {1, 2, 3, 4};
|
||||
__builtin_memmove(arr + a, arr + b, n);
|
||||
// expected-note@-1 {{size to copy (1) is not a multiple of size of element type 'int'}}
|
||||
// expected-note@-2 {{source is not a contiguous array of at least 2 elements of type 'int'}}
|
||||
// expected-note@-3 {{destination is not a contiguous array of at least 3 elements of type 'int'}}
|
||||
return result(arr);
|
||||
}
|
||||
constexpr int test_wmemcpy(int a, int b, int n) {
|
||||
wchar_t arr[4] = {1, 2, 3, 4};
|
||||
__builtin_wmemcpy(arr + a, arr + b, n);
|
||||
// expected-note@-1 2{{overlapping memory regions}}
|
||||
// expected-note-re@-2 {{source is not a contiguous array of at least 2 elements of type '{{wchar_t|int}}'}}
|
||||
// expected-note-re@-3 {{destination is not a contiguous array of at least 3 elements of type '{{wchar_t|int}}'}}
|
||||
return result(arr);
|
||||
}
|
||||
constexpr int test_wmemmove(int a, int b, int n) {
|
||||
wchar_t arr[4] = {1, 2, 3, 4};
|
||||
__builtin_wmemmove(arr + a, arr + b, n);
|
||||
// expected-note-re@-1 {{source is not a contiguous array of at least 2 elements of type '{{wchar_t|int}}'}}
|
||||
// expected-note-re@-2 {{destination is not a contiguous array of at least 3 elements of type '{{wchar_t|int}}'}}
|
||||
return result(arr);
|
||||
}
|
||||
|
||||
static_assert(test_memcpy(1, 2, 4) == 1334);
|
||||
static_assert(test_memcpy(2, 1, 4) == 1224);
|
||||
static_assert(test_memcpy(0, 1, 8) == 2334); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_memcpy(1, 0, 8) == 1124); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_memcpy(1, 2, 1) == 1334); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_memcpy(0, 3, 4) == 4234);
|
||||
static_assert(test_memcpy(0, 3, 8) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_memcpy(2, 0, 12) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
|
||||
static_assert(test_memmove(1, 2, 4) == 1334);
|
||||
static_assert(test_memmove(2, 1, 4) == 1224);
|
||||
static_assert(test_memmove(0, 1, 8) == 2334);
|
||||
static_assert(test_memmove(1, 0, 8) == 1124);
|
||||
static_assert(test_memmove(1, 2, 1) == 1334); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_memmove(0, 3, 4) == 4234);
|
||||
static_assert(test_memmove(0, 3, 8) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_memmove(2, 0, 12) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
|
||||
static_assert(test_wmemcpy(1, 2, 1) == 1334);
|
||||
static_assert(test_wmemcpy(2, 1, 1) == 1224);
|
||||
static_assert(test_wmemcpy(0, 1, 2) == 2334); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_wmemcpy(1, 0, 2) == 1124); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_wmemcpy(1, 2, 1) == 1334);
|
||||
static_assert(test_wmemcpy(0, 3, 1) == 4234);
|
||||
static_assert(test_wmemcpy(0, 3, 2) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_wmemcpy(2, 0, 3) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
|
||||
static_assert(test_wmemmove(1, 2, 1) == 1334);
|
||||
static_assert(test_wmemmove(2, 1, 1) == 1224);
|
||||
static_assert(test_wmemmove(0, 1, 2) == 2334);
|
||||
static_assert(test_wmemmove(1, 0, 2) == 1124);
|
||||
static_assert(test_wmemmove(1, 2, 1) == 1334);
|
||||
static_assert(test_wmemmove(0, 3, 1) == 4234);
|
||||
static_assert(test_wmemmove(0, 3, 2) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
static_assert(test_wmemmove(2, 0, 3) == 4234); // expected-error {{constant}} expected-note {{in call}}
|
||||
|
||||
// Copying is permitted for any trivially-copyable type.
|
||||
struct Trivial { char k; short s; constexpr bool ok() { return k == 3 && s == 4; } };
|
||||
constexpr bool test_trivial() {
|
||||
Trivial arr[3] = {{1, 2}, {3, 4}, {5, 6}};
|
||||
__builtin_memcpy(arr, arr+1, sizeof(Trivial));
|
||||
__builtin_memmove(arr+1, arr, 2 * sizeof(Trivial));
|
||||
return arr[0].ok() && arr[1].ok() && arr[2].ok();
|
||||
}
|
||||
static_assert(test_trivial());
|
||||
|
||||
// But not for a non-trivially-copyable type.
|
||||
struct NonTrivial {
|
||||
constexpr NonTrivial() : n(0) {}
|
||||
constexpr NonTrivial(const NonTrivial &) : n(1) {}
|
||||
int n;
|
||||
};
|
||||
constexpr bool test_nontrivial_memcpy() { // expected-error {{never produces a constant}}
|
||||
NonTrivial arr[3] = {};
|
||||
__builtin_memcpy(arr, arr + 1, sizeof(NonTrivial)); // expected-note 2{{non-trivially-copyable}}
|
||||
return true;
|
||||
}
|
||||
static_assert(test_nontrivial_memcpy()); // expected-error {{constant}} expected-note {{in call}}
|
||||
constexpr bool test_nontrivial_memmove() { // expected-error {{never produces a constant}}
|
||||
NonTrivial arr[3] = {};
|
||||
__builtin_memcpy(arr, arr + 1, sizeof(NonTrivial)); // expected-note 2{{non-trivially-copyable}}
|
||||
return true;
|
||||
}
|
||||
static_assert(test_nontrivial_memmove()); // expected-error {{constant}} expected-note {{in call}}
|
||||
|
||||
// Type puns via constant evaluated memcpy are not supported yet.
|
||||
constexpr float type_pun(const unsigned &n) {
|
||||
float f = 0.0f;
|
||||
__builtin_memcpy(&f, &n, 4); // expected-note {{cannot constant evaluate 'memcpy' from object of type 'const unsigned int' to object of type 'float'}}
|
||||
return f;
|
||||
}
|
||||
static_assert(type_pun(0x3f800000) == 1.0f); // expected-error {{constant}} expected-note {{in call}}
|
||||
|
||||
// Make sure we're not confused by derived-to-base conversions.
|
||||
struct Base { int a; };
|
||||
struct Derived : Base { int b; };
|
||||
constexpr int test_derived_to_base(int n) {
|
||||
Derived arr[2] = {1, 2, 3, 4};
|
||||
Base *p = &arr[0];
|
||||
Base *q = &arr[1];
|
||||
__builtin_memcpy(p, q, sizeof(Base) * n); // expected-note {{source is not a contiguous array of at least 2 elements of type 'MemcpyEtc::Base'}}
|
||||
return arr[0].a * 1000 + arr[0].b * 100 + arr[1].a * 10 + arr[1].b;
|
||||
}
|
||||
static_assert(test_derived_to_base(0) == 1234);
|
||||
static_assert(test_derived_to_base(1) == 3234);
|
||||
// FIXME: We could consider making this work by stripping elements off both
|
||||
// designators until we have a long enough matching size, if both designators
|
||||
// point to the start of their respective final elements.
|
||||
static_assert(test_derived_to_base(2) == 3434); // expected-error {{constant}} expected-note {{in call}}
|
||||
}
|
||||
|
||||
@@ -4460,6 +4460,7 @@ INTERCEPTOR(int, random_r, void *buf, u32 *result) {
|
||||
// its metadata. See
|
||||
// https://github.com/google/sanitizers/issues/321.
|
||||
#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET || \
|
||||
SANITIZER_INTERCEPT_PTHREAD_ATTR_GET_SCHED || \
|
||||
SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSSCHED || \
|
||||
SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GET || \
|
||||
SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GET || \
|
||||
|
||||
@@ -292,7 +292,7 @@ uptr ThreadDescriptorSize() {
|
||||
val = FIRST_32_SECOND_64(1168, 1776);
|
||||
else if (minor == 11 || (minor == 12 && patch == 1))
|
||||
val = FIRST_32_SECOND_64(1168, 2288);
|
||||
else if (minor <= 13)
|
||||
else if (minor <= 14)
|
||||
val = FIRST_32_SECOND_64(1168, 2304);
|
||||
else
|
||||
val = FIRST_32_SECOND_64(1216, 2304);
|
||||
|
||||
15
libclc/.gitignore
vendored
15
libclc/.gitignore
vendored
@@ -1,15 +0,0 @@
|
||||
Makefile
|
||||
amdgcn--
|
||||
amdgcn--amdhsa
|
||||
amdgcn-mesa-mesa3d
|
||||
build/*.pyc
|
||||
built_libs/
|
||||
generic--
|
||||
generic/lib/convert.cl
|
||||
libclc.pc
|
||||
nvptx--nvidiacl
|
||||
nvptx64--nvidiacl
|
||||
r600--
|
||||
utils/prepare-builtins
|
||||
utils/prepare-builtins.o
|
||||
utils/prepare-builtins.o.d
|
||||
@@ -1,86 +0,0 @@
|
||||
language: cpp
|
||||
|
||||
sudo: false
|
||||
dist: trusty
|
||||
|
||||
cache:
|
||||
apt: true
|
||||
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- env:
|
||||
- LABEL="make gcc LLVM-3.9"
|
||||
- LLVM_VERSION=3.9
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-3.9
|
||||
packages:
|
||||
- libedit-dev
|
||||
- g++-4.8
|
||||
# From sources above
|
||||
- llvm-3.9-dev
|
||||
- clang-3.9
|
||||
- env:
|
||||
- LABEL="make gcc LLVM-4.0"
|
||||
- LLVM_VERSION=4.0
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-4.0
|
||||
packages:
|
||||
- libedit-dev
|
||||
- g++-4.8
|
||||
# From sources above
|
||||
- llvm-4.0-dev
|
||||
- clang-4.0
|
||||
- env:
|
||||
- LABEL="make gcc LLVM-5.0"
|
||||
- LLVM_VERSION=5.0
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-5.0
|
||||
packages:
|
||||
- libedit-dev
|
||||
- g++-4.8
|
||||
# From sources above
|
||||
- llvm-5.0-dev
|
||||
- clang-5.0
|
||||
- env:
|
||||
- LABEL="make gcc LLVM-6.0"
|
||||
- LLVM_VERSION=6.0
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- CHECK_FILES="barts-r600--.bc cayman-r600--.bc cedar-r600--.bc cypress-r600--.bc tahiti-amdgcn--.bc amdgcn--amdhsa.bc nvptx--nvidiacl.bc nvptx64--nvidiacl.bc"
|
||||
# llvm passes -Werror=date-time which is only supported in gcc-4.9+
|
||||
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- sourceline: 'deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main'
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- libedit-dev
|
||||
# LLVM-6 needs libstdc++4.9
|
||||
- g++-4.9
|
||||
# From sources above
|
||||
- llvm-6.0-dev
|
||||
- clang-6.0
|
||||
|
||||
before_install:
|
||||
- eval "${MATRIX_EVAL}"
|
||||
|
||||
script:
|
||||
- $PYTHON ./configure.py --with-llvm-config=$LLVM_CONFIG --with-cxx-compiler=$CXX && make -j4
|
||||
- ret=0;
|
||||
for f in $CHECK_FILES; do
|
||||
./check_external_calls.sh built_libs/$f || ret=1;
|
||||
done;
|
||||
test $ret -eq 0
|
||||
@@ -1,2 +0,0 @@
|
||||
N: Peter Collingbourne
|
||||
E: peter@pcc.me.uk
|
||||
@@ -1,64 +0,0 @@
|
||||
==============================================================================
|
||||
libclc License
|
||||
==============================================================================
|
||||
|
||||
The libclc library is dual licensed under both the University of Illinois
|
||||
"BSD-Like" license and the MIT license. As a user of this code you may choose
|
||||
to use it under either license. As a contributor, you agree to allow your code
|
||||
to be used under both.
|
||||
|
||||
Full text of the relevant licenses is included below.
|
||||
|
||||
==============================================================================
|
||||
|
||||
Copyright (c) 2011-2018 by the contributors listed in CREDITS.TXT
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal with
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimers in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* The names of the contributors may not be used to endorse or promote
|
||||
products derived from this Software without specific prior written
|
||||
permission.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
||||
|
||||
==============================================================================
|
||||
|
||||
Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,52 +0,0 @@
|
||||
libclc
|
||||
------
|
||||
|
||||
libclc is an open source, BSD licensed implementation of the library
|
||||
requirements of the OpenCL C programming language, as specified by the
|
||||
OpenCL 1.1 Specification. The following sections of the specification
|
||||
impose library requirements:
|
||||
|
||||
* 6.1: Supported Data Types
|
||||
* 6.2.3: Explicit Conversions
|
||||
* 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
|
||||
* 6.9: Preprocessor Directives and Macros
|
||||
* 6.11: Built-in Functions
|
||||
* 9.3: Double Precision Floating-Point
|
||||
* 9.4: 64-bit Atomics
|
||||
* 9.5: Writing to 3D image memory objects
|
||||
* 9.6: Half Precision Floating-Point
|
||||
|
||||
libclc is intended to be used with the Clang compiler's OpenCL frontend.
|
||||
|
||||
libclc is designed to be portable and extensible. To this end, it provides
|
||||
generic implementations of most library requirements, allowing the target
|
||||
to override the generic implementation at the granularity of individual
|
||||
functions.
|
||||
|
||||
libclc currently only supports the PTX target, but support for more
|
||||
targets is welcome.
|
||||
|
||||
Compiling and installing with Make
|
||||
----------------------------------
|
||||
|
||||
$ ./configure.py --with-llvm-config=/path/to/llvm-config && make
|
||||
$ make install
|
||||
|
||||
Note you can use the DESTDIR Makefile variable to do staged installs.
|
||||
|
||||
$ make install DESTDIR=/path/for/staged/install
|
||||
|
||||
Compiling and installing with Ninja
|
||||
-----------------------------------
|
||||
|
||||
$ ./configure.py -g ninja --with-llvm-config=/path/to/llvm-config && ninja
|
||||
$ ninja install
|
||||
|
||||
Note you can use the DESTDIR environment variable to do staged installs.
|
||||
|
||||
$ DESTDIR=/path/for/staged/install ninja install
|
||||
|
||||
Website
|
||||
-------
|
||||
|
||||
http://www.pcc.me.uk/~peter/libclc/
|
||||
@@ -1,6 +0,0 @@
|
||||
workitem/get_num_groups.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_num_groups.40.ll
|
||||
workitem/get_global_size.40.ll
|
||||
workitem/get_local_size.40.ll
|
||||
@@ -1,3 +0,0 @@
|
||||
workitem/get_global_size.cl
|
||||
workitem/get_local_size.cl
|
||||
workitem/get_num_groups.39.ll
|
||||
@@ -1,2 +0,0 @@
|
||||
workitem/get_global_size.cl
|
||||
workitem/get_local_size.cl
|
||||
@@ -1,2 +0,0 @@
|
||||
workitem/get_global_size.cl
|
||||
workitem/get_local_size.cl
|
||||
@@ -1,3 +0,0 @@
|
||||
workitem/get_global_size.cl
|
||||
workitem/get_local_size.cl
|
||||
workitem/get_num_groups.cl
|
||||
@@ -1,2 +0,0 @@
|
||||
workitem/get_global_size.39.ll
|
||||
workitem/get_local_size.39.ll
|
||||
@@ -1,2 +0,0 @@
|
||||
workitem/get_global_size.50.ll
|
||||
workitem/get_local_size.50.ll
|
||||
@@ -1,2 +0,0 @@
|
||||
workitem/get_global_size.50.ll
|
||||
workitem/get_local_size.50.ll
|
||||
@@ -1,36 +0,0 @@
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
define i32 @get_global_size(i32 %dim) #1 {
|
||||
%dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
switch i32 %dim, label %default [
|
||||
i32 0, label %x
|
||||
i32 1, label %y
|
||||
i32 2, label %z
|
||||
]
|
||||
|
||||
x:
|
||||
%ptr_x = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 12
|
||||
%ptr_x32 = bitcast i8 addrspace(2)* %ptr_x to i32 addrspace(2)*
|
||||
%x32 = load i32, i32 addrspace(2)* %ptr_x32, align 4, !invariant.load !0
|
||||
ret i32 %x32
|
||||
|
||||
y:
|
||||
%ptr_y = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 16
|
||||
%ptr_y32 = bitcast i8 addrspace(2)* %ptr_y to i32 addrspace(2)*
|
||||
%y32 = load i32, i32 addrspace(2)* %ptr_y32, align 4, !invariant.load !0
|
||||
ret i32 %y32
|
||||
|
||||
z:
|
||||
%ptr_z = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 20
|
||||
%ptr_z32 = bitcast i8 addrspace(2)* %ptr_z to i32 addrspace(2)*
|
||||
%z32 = load i32, i32 addrspace(2)* %ptr_z32, align 4, !invariant.load !0
|
||||
ret i32 %z32
|
||||
|
||||
default:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { alwaysinline norecurse nounwind readonly }
|
||||
|
||||
!0 = !{}
|
||||
@@ -1,39 +0,0 @@
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
define i64 @get_global_size(i32 %dim) #1 {
|
||||
%dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
switch i32 %dim, label %default [
|
||||
i32 0, label %x
|
||||
i32 1, label %y
|
||||
i32 2, label %z
|
||||
]
|
||||
|
||||
x:
|
||||
%ptr_x = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 12
|
||||
%ptr_x32 = bitcast i8 addrspace(2)* %ptr_x to i32 addrspace(2)*
|
||||
%x32 = load i32, i32 addrspace(2)* %ptr_x32, align 4, !invariant.load !0
|
||||
%size_x = zext i32 %x32 to i64
|
||||
ret i64 %size_x
|
||||
|
||||
y:
|
||||
%ptr_y = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 16
|
||||
%ptr_y32 = bitcast i8 addrspace(2)* %ptr_y to i32 addrspace(2)*
|
||||
%y32 = load i32, i32 addrspace(2)* %ptr_y32, align 4, !invariant.load !0
|
||||
%size_y = zext i32 %y32 to i64
|
||||
ret i64 %size_y
|
||||
|
||||
z:
|
||||
%ptr_z = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 20
|
||||
%ptr_z32 = bitcast i8 addrspace(2)* %ptr_z to i32 addrspace(2)*
|
||||
%z32 = load i32, i32 addrspace(2)* %ptr_z32, align 4, !invariant.load !0
|
||||
%size_z = zext i32 %z32 to i64
|
||||
ret i64 %size_z
|
||||
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { alwaysinline norecurse nounwind readonly }
|
||||
|
||||
!0 = !{}
|
||||
@@ -1,16 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#if __clang_major__ >= 7
|
||||
#define CONST_AS __attribute__((address_space(4)))
|
||||
#else
|
||||
#define CONST_AS __attribute__((address_space(2)))
|
||||
#endif
|
||||
|
||||
_CLC_DEF size_t get_global_size(uint dim)
|
||||
{
|
||||
CONST_AS uint * ptr =
|
||||
(CONST_AS uint *) __builtin_amdgcn_dispatch_ptr();
|
||||
if (dim < 3)
|
||||
return ptr[3 + dim];
|
||||
return 1;
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
define i32 @get_local_size(i32 %dim) #1 {
|
||||
%dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i32 1
|
||||
%xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0
|
||||
switch i32 %dim, label %default [
|
||||
i32 0, label %x_dim
|
||||
i32 1, label %y_dim
|
||||
i32 2, label %z_dim
|
||||
]
|
||||
|
||||
x_dim:
|
||||
%x_size = and i32 %xy_size, 65535
|
||||
ret i32 %x_size
|
||||
|
||||
y_dim:
|
||||
%y_size = lshr i32 %xy_size, 16
|
||||
ret i32 %y_size
|
||||
|
||||
z_dim:
|
||||
%z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i32 2
|
||||
%z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1
|
||||
ret i32 %z_size
|
||||
|
||||
default:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { alwaysinline norecurse nounwind readonly }
|
||||
|
||||
!0 = !{}
|
||||
!1 = !{ i32 0, i32 257 }
|
||||
@@ -1,38 +0,0 @@
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
define i64 @get_local_size(i32 %dim) #1 {
|
||||
%dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
|
||||
%dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 1
|
||||
%xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0
|
||||
switch i32 %dim, label %default [
|
||||
i32 0, label %x_dim
|
||||
i32 1, label %y_dim
|
||||
i32 2, label %z_dim
|
||||
]
|
||||
|
||||
x_dim:
|
||||
%x_size = and i32 %xy_size, 65535
|
||||
%x_size.ext = zext i32 %x_size to i64
|
||||
ret i64 %x_size.ext
|
||||
|
||||
y_dim:
|
||||
%y_size = lshr i32 %xy_size, 16
|
||||
%y_size.ext = zext i32 %y_size to i64
|
||||
ret i64 %y_size.ext
|
||||
|
||||
z_dim:
|
||||
%z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 2
|
||||
%z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1
|
||||
%z_size.ext = zext i32 %z_size to i64
|
||||
ret i64 %z_size.ext
|
||||
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { alwaysinline norecurse nounwind readonly }
|
||||
|
||||
!0 = !{}
|
||||
!1 = !{ i32 0, i32 257 }
|
||||
@@ -1,22 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#if __clang_major__ >= 7
|
||||
#define CONST_AS __attribute__((address_space(4)))
|
||||
#else
|
||||
#define CONST_AS __attribute__((address_space(2)))
|
||||
#endif
|
||||
|
||||
_CLC_DEF size_t get_local_size(uint dim)
|
||||
{
|
||||
CONST_AS uint * ptr =
|
||||
(CONST_AS uint *) __builtin_amdgcn_dispatch_ptr();
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return ptr[1] & 0xffffu;
|
||||
case 1:
|
||||
return ptr[1] >> 16;
|
||||
case 2:
|
||||
return ptr[2] & 0xffffu;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
|
||||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF size_t get_num_groups(uint dim) {
|
||||
size_t global_size = get_global_size(dim);
|
||||
size_t local_size = get_local_size(dim);
|
||||
size_t num_groups = global_size / local_size;
|
||||
if (global_size % local_size != 0) {
|
||||
num_groups++;
|
||||
}
|
||||
return num_groups;
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_num_groups.ll
|
||||
@@ -1,4 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_num_groups.ll
|
||||
@@ -1,4 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_num_groups.ll
|
||||
@@ -1,4 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.ll
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_num_groups.ll
|
||||
@@ -1,14 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.ll
|
||||
integer/popcount.cl
|
||||
math/fmax.cl
|
||||
math/fmin.cl
|
||||
math/ldexp.cl
|
||||
mem_fence/fence.cl
|
||||
synchronization/barrier.cl
|
||||
workitem/get_global_offset.cl
|
||||
workitem/get_group_id.cl
|
||||
workitem/get_global_size.ll
|
||||
workitem/get_local_id.cl
|
||||
workitem/get_local_size.ll
|
||||
workitem/get_num_groups.ll
|
||||
workitem/get_work_dim.cl
|
||||
@@ -1,5 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.39.ll
|
||||
mem_fence/waitcnt.ll
|
||||
workitem/get_global_size.39.ll
|
||||
workitem/get_local_size.39.ll
|
||||
workitem/get_num_groups.39.ll
|
||||
@@ -1,5 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.39.ll
|
||||
mem_fence/waitcnt.ll
|
||||
workitem/get_global_size.40.ll
|
||||
workitem/get_local_size.40.ll
|
||||
workitem/get_num_groups.40.ll
|
||||
@@ -1,4 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.39.ll
|
||||
workitem/get_global_size.40.ll
|
||||
workitem/get_local_size.40.ll
|
||||
workitem/get_num_groups.40.ll
|
||||
@@ -1,4 +0,0 @@
|
||||
cl_khr_int64_extended_atomics/minmax_helpers.39.ll
|
||||
workitem/get_global_size.40.ll
|
||||
workitem/get_local_size.40.ll
|
||||
workitem/get_num_groups.40.ll
|
||||
@@ -1,49 +0,0 @@
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
|
||||
entry:
|
||||
%0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst
|
||||
ret i64 %0
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
#include <utils.h>
|
||||
#include <integer/popcount.h>
|
||||
|
||||
#define __CLC_BODY "popcount.inc"
|
||||
#include <clc/integer/gentype.inc>
|
||||
@@ -1,17 +0,0 @@
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE popcount(__CLC_GENTYPE x) {
|
||||
/* LLVM-4+ implements i16 ops for VI+ ASICs. However, ctpop implementation
|
||||
* is missing until r326535. Therefore we have to convert sub i32 types to uint
|
||||
* as a workaround. */
|
||||
#if __clang_major__ < 7 && __clang_major__ > 3 && __CLC_GENSIZE < 32
|
||||
/* Prevent sign extension on uint conversion */
|
||||
const __CLC_U_GENTYPE y = __CLC_XCONCAT(as_, __CLC_U_GENTYPE)(x);
|
||||
/* Convert to uintX */
|
||||
const __CLC_XCONCAT(uint, __CLC_VECSIZE) z = __CLC_XCONCAT(convert_uint, __CLC_VECSIZE)(y);
|
||||
/* Call popcount on uintX type */
|
||||
const __CLC_XCONCAT(uint, __CLC_VECSIZE) res = __clc_native_popcount(z);
|
||||
/* Convert the result back to gentype. */
|
||||
return __CLC_XCONCAT(convert_, __CLC_GENTYPE)(res);
|
||||
#else
|
||||
return __clc_native_popcount(x);
|
||||
#endif
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#include "../../../generic/lib/clcmacro.h"
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD float fmax(float x, float y)
|
||||
{
|
||||
/* fcanonicalize removes sNaNs and flushes denormals if not enabled.
|
||||
* Otherwise fmax instruction flushes the values for comparison,
|
||||
* but outputs original denormal */
|
||||
x = __builtin_canonicalizef(x);
|
||||
y = __builtin_canonicalizef(y);
|
||||
return __builtin_fmaxf(x, y);
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmax, float, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD double fmax(double x, double y)
|
||||
{
|
||||
x = __builtin_canonicalize(x);
|
||||
y = __builtin_canonicalize(y);
|
||||
return __builtin_fmax(x, y);
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
|
||||
{
|
||||
if (isnan(x))
|
||||
return y;
|
||||
if (isnan(y))
|
||||
return x;
|
||||
return (y < x) ? x : y;
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <../../../generic/lib/math/fmax.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,46 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#include "../../../generic/lib/clcmacro.h"
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD float fmin(float x, float y)
|
||||
{
|
||||
/* fcanonicalize removes sNaNs and flushes denormals if not enabled.
|
||||
* Otherwise fmin instruction flushes the values for comparison,
|
||||
* but outputs original denormal */
|
||||
x = __builtin_canonicalizef(x);
|
||||
y = __builtin_canonicalizef(y);
|
||||
return __builtin_fminf(x, y);
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmin, float, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD double fmin(double x, double y)
|
||||
{
|
||||
x = __builtin_canonicalize(x);
|
||||
y = __builtin_canonicalize(y);
|
||||
return __builtin_fmin(x, y);
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
|
||||
{
|
||||
if (isnan(x))
|
||||
return y;
|
||||
if (isnan(y))
|
||||
return x;
|
||||
return (y < x) ? y : x;
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <../../../generic/lib/math/fmin.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <clc/clc.h>
|
||||
|
||||
#include "../../../generic/lib/clcmacro.h"
|
||||
|
||||
#ifdef __HAS_LDEXPF__
|
||||
#define BUILTINF __builtin_amdgcn_ldexpf
|
||||
#else
|
||||
#include "math/clc_ldexp.h"
|
||||
#define BUILTINF __clc_ldexp
|
||||
#endif
|
||||
|
||||
// This defines all the ldexp(floatN, intN) variants.
|
||||
_CLC_DEFINE_BINARY_BUILTIN(float, ldexp, BUILTINF, float, int);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
// This defines all the ldexp(doubleN, intN) variants.
|
||||
_CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __builtin_amdgcn_ldexp, double, int);
|
||||
#endif
|
||||
|
||||
// This defines all the ldexp(GENTYPE, int);
|
||||
#define __CLC_BODY <../../../generic/lib/math/ldexp.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef BUILTINF
|
||||
@@ -1,39 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
void __clc_amdgcn_s_waitcnt(unsigned flags);
|
||||
|
||||
// s_waitcnt takes 16bit argument with a combined number of maximum allowed
|
||||
// pending operations:
|
||||
// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
|
||||
// [7] -- undefined
|
||||
// [6:4] -- exports, GDS, and mem write
|
||||
// [3:0] -- vector memory operations
|
||||
|
||||
// Newer clang supports __builtin_amdgcn_s_waitcnt
|
||||
#if __clang_major__ >= 5
|
||||
# define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
|
||||
#else
|
||||
# define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
|
||||
#endif
|
||||
|
||||
_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
|
||||
{
|
||||
if (flags & CLK_GLOBAL_MEM_FENCE) {
|
||||
// scalar loads are counted with LGKM but we don't know whether
|
||||
// the compiler turned any loads to scalar
|
||||
__waitcnt(0);
|
||||
} else if (flags & CLK_LOCAL_MEM_FENCE)
|
||||
__waitcnt(0xff); // LGKM is [12:8]
|
||||
}
|
||||
#undef __waitcnt
|
||||
|
||||
// We don't have separate mechanism for read and write fences
|
||||
_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
|
||||
{
|
||||
mem_fence(flags);
|
||||
}
|
||||
|
||||
_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
|
||||
{
|
||||
mem_fence(flags);
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
declare void @llvm.amdgcn.s.waitcnt(i32) #0
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
; Export waitcnt intrinsic for clang < 5
|
||||
define void @__clc_amdgcn_s_waitcnt(i32 %flags) #1 {
|
||||
entry:
|
||||
tail call void @llvm.amdgcn.s.waitcnt(i32 %flags)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind alwaysinline }
|
||||
@@ -1,7 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF void barrier(cl_mem_fence_flags flags)
|
||||
{
|
||||
mem_fence(flags);
|
||||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#if __clang_major__ >= 7
|
||||
#define CONST_AS __attribute__((address_space(4)))
|
||||
#else
|
||||
#define CONST_AS __attribute__((address_space(2)))
|
||||
#endif
|
||||
|
||||
_CLC_DEF size_t get_global_offset(uint dim)
|
||||
{
|
||||
CONST_AS uint * ptr =
|
||||
(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
|
||||
if (dim < 3)
|
||||
return ptr[dim + 1];
|
||||
return 0;
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
declare i32 @llvm.r600.read.global.size.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.global.size.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.global.size.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.global.size.x()
|
||||
ret i32 %x
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.global.size.y()
|
||||
ret i32 %y
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.global.size.z()
|
||||
ret i32 %z
|
||||
default:
|
||||
ret i32 1
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
declare i32 @llvm.r600.read.global.size.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.global.size.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.global.size.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.global.size.x()
|
||||
%x.ext = zext i32 %x to i64
|
||||
ret i64 %x.ext
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.global.size.y()
|
||||
%y.ext = zext i32 %y to i64
|
||||
ret i64 %y.ext
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.global.size.z()
|
||||
%z.ext = zext i32 %z to i64
|
||||
ret i64 %z.ext
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
declare i32 @llvm.r600.read.global.size.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.global.size.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.global.size.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.global.size.x()
|
||||
%x.ext = zext i32 %x to i64
|
||||
ret i64 %x.ext
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.global.size.y()
|
||||
%y.ext = zext i32 %y to i64
|
||||
ret i64 %y.ext
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.global.size.z()
|
||||
%z.ext = zext i32 %z to i64
|
||||
ret i64 %z.ext
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF size_t get_group_id(uint dim)
|
||||
{
|
||||
switch(dim) {
|
||||
case 0: return __builtin_amdgcn_workgroup_id_x();
|
||||
case 1: return __builtin_amdgcn_workgroup_id_y();
|
||||
case 2: return __builtin_amdgcn_workgroup_id_z();
|
||||
default: return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF size_t get_local_id(uint dim)
|
||||
{
|
||||
switch(dim) {
|
||||
case 0: return __builtin_amdgcn_workitem_id_x();
|
||||
case 1: return __builtin_amdgcn_workitem_id_y();
|
||||
case 2: return __builtin_amdgcn_workitem_id_z();
|
||||
default: return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
declare i32 @llvm.r600.read.local.size.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.local.size.x()
|
||||
ret i32 %x
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.local.size.y()
|
||||
ret i32 %y
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.local.size.z()
|
||||
ret i32 %z
|
||||
default:
|
||||
ret i32 1
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
declare i32 @llvm.r600.read.local.size.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.local.size.x()
|
||||
%x.ext = zext i32 %x to i64
|
||||
ret i64 %x.ext
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.local.size.y()
|
||||
%y.ext = zext i32 %y to i64
|
||||
ret i64 %y.ext
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.local.size.z()
|
||||
%z.ext = zext i32 %z to i64
|
||||
ret i64 %z.ext
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
declare i32 @llvm.r600.read.local.size.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.local.size.x()
|
||||
%x.ext = zext i32 %x to i64
|
||||
ret i64 %x.ext
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.local.size.y()
|
||||
%y.ext = zext i32 %y to i64
|
||||
ret i64 %y.ext
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.local.size.z()
|
||||
%z.ext = zext i32 %z to i64
|
||||
ret i64 %z.ext
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.ngroups.x()
|
||||
ret i32 %x
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.ngroups.y()
|
||||
ret i32 %y
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.ngroups.z()
|
||||
ret i32 %z
|
||||
default:
|
||||
ret i32 1
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.ngroups.x()
|
||||
%x.ext = zext i32 %x to i64
|
||||
ret i64 %x.ext
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.ngroups.y()
|
||||
%y.ext = zext i32 %y to i64
|
||||
ret i64 %y.ext
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.ngroups.z()
|
||||
%z.ext = zext i32 %z to i64
|
||||
ret i64 %z.ext
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
|
||||
declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
|
||||
declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
|
||||
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
|
||||
x_dim:
|
||||
%x = call i32 @llvm.r600.read.ngroups.x()
|
||||
%x.ext = zext i32 %x to i64
|
||||
ret i64 %x.ext
|
||||
y_dim:
|
||||
%y = call i32 @llvm.r600.read.ngroups.y()
|
||||
%y.ext = zext i32 %y to i64
|
||||
ret i64 %y.ext
|
||||
z_dim:
|
||||
%z = call i32 @llvm.r600.read.ngroups.z()
|
||||
%z.ext = zext i32 %z to i64
|
||||
ret i64 %z.ext
|
||||
default:
|
||||
ret i64 1
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#if __clang_major__ >= 7
|
||||
#define CONST_AS __attribute__((address_space(4)))
|
||||
#else
|
||||
#define CONST_AS __attribute__((address_space(2)))
|
||||
#endif
|
||||
|
||||
_CLC_DEF uint get_work_dim(void)
|
||||
{
|
||||
CONST_AS uint * ptr =
|
||||
(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
|
||||
return ptr[0];
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
workitem/get_group_id.cl
|
||||
workitem/get_global_size.cl
|
||||
@@ -1,14 +0,0 @@
|
||||
math/native_exp.cl
|
||||
math/native_log.cl
|
||||
math/native_log10.cl
|
||||
math/half_exp.cl
|
||||
math/half_exp10.cl
|
||||
math/half_exp2.cl
|
||||
math/half_log.cl
|
||||
math/half_log10.cl
|
||||
math/half_log2.cl
|
||||
math/half_recip.cl
|
||||
math/half_rsqrt.cl
|
||||
math/half_sqrt.cl
|
||||
math/nextafter.cl
|
||||
math/sqrt.cl
|
||||
@@ -1,2 +0,0 @@
|
||||
shared/vload_half_helpers.ll
|
||||
shared/vstore_half_helpers.ll
|
||||
@@ -1,2 +0,0 @@
|
||||
shared/vload_half_helpers.ll
|
||||
shared/vstore_half_helpers.ll
|
||||
@@ -1,2 +0,0 @@
|
||||
shared/vload_half_helpers.ll
|
||||
shared/vstore_half_helpers.ll
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC exp
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC exp10
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC exp2
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC log
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC log10
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC log2
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,11 +0,0 @@
|
||||
#include <utils.h>
|
||||
|
||||
#define __CLC_HALF_FUNC(x) __CLC_CONCAT(half_, x)
|
||||
#define __CLC_NATIVE_FUNC(x) __CLC_CONCAT(native_, x)
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_HALF_FUNC(__CLC_FUNC)(__CLC_GENTYPE val) {
|
||||
return __CLC_NATIVE_FUNC(__CLC_FUNC)(val);
|
||||
}
|
||||
|
||||
#undef __CLC_NATIVE_FUNC
|
||||
#undef __CLC_HALF_FUNC
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC recip
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC rsqrt
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_FUNC sqrt
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <half_native_unary.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,5 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_BODY <native_exp.inc>
|
||||
#define __FLOAT_ONLY
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,3 +0,0 @@
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp(__CLC_GENTYPE val) {
|
||||
return native_exp2(val * M_LOG2E_F);
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_BODY <native_log.inc>
|
||||
#define __FLOAT_ONLY
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,3 +0,0 @@
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log(__CLC_GENTYPE val) {
|
||||
return native_log2(val) * (1.0f / M_LOG2E_F);
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define __CLC_BODY <native_log10.inc>
|
||||
#define __FLOAT_ONLY
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,3 +0,0 @@
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log10(__CLC_GENTYPE val) {
|
||||
return native_log2(val) * (M_LN2_F / M_LN10_F);
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
#include <clc/clc.h>
|
||||
#include "../lib/clcmacro.h"
|
||||
#include <math/clc_nextafter.h>
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_DEFINE_BINARY_BUILTIN(half, nextafter, __clc_nextafter, half, half)
|
||||
#endif
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include "../../../generic/lib/clcmacro.h"
|
||||
#include "math/clc_sqrt.h"
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#define __clc_builtin_rsq __builtin_amdgcn_rsq
|
||||
#else
|
||||
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double sqrt(double x) {
|
||||
|
||||
uint vcc = x < 0x1p-767;
|
||||
uint exp0 = vcc ? 0x100 : 0;
|
||||
unsigned exp1 = vcc ? 0xffffff80 : 0;
|
||||
|
||||
double v01 = ldexp(x, exp0);
|
||||
double v23 = __clc_builtin_rsq(v01);
|
||||
double v45 = v01 * v23;
|
||||
v23 = v23 * 0.5;
|
||||
|
||||
double v67 = fma(-v23, v45, 0.5);
|
||||
v45 = fma(v45, v67, v45);
|
||||
double v89 = fma(-v45, v45, v01);
|
||||
v23 = fma(v23, v67, v23);
|
||||
v45 = fma(v89, v23, v45);
|
||||
v67 = fma(-v45, v45, v01);
|
||||
v23 = fma(v67, v23, v45);
|
||||
|
||||
v23 = ldexp(v23, exp1);
|
||||
return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23;
|
||||
}
|
||||
|
||||
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double);
|
||||
|
||||
#endif
|
||||
@@ -1,23 +0,0 @@
|
||||
define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
|
||||
%data = load half, half addrspace(0)* %ptr
|
||||
%res = fpext half %data to float
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
|
||||
%data = load half, half addrspace(1)* %ptr
|
||||
%res = fpext half %data to float
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
|
||||
%data = load half, half addrspace(3)* %ptr
|
||||
%res = fpext half %data to float
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline {
|
||||
%data = load half, half addrspace(2)* %ptr
|
||||
%res = fpext half %data to float
|
||||
ret float %res
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc float %data to half
|
||||
store half %res, half addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc float %data to half
|
||||
store half %res, half addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc float %data to half
|
||||
store half %res, half addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc double %data to half
|
||||
store half %res, half addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc double %data to half
|
||||
store half %res, half addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc double %data to half
|
||||
store half %res, half addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
import ninja_syntax
|
||||
import os
|
||||
|
||||
# Simple meta-build system.
|
||||
|
||||
class Make(object):
|
||||
def __init__(self):
|
||||
self.output = open(self.output_filename(), 'w')
|
||||
self.rules = {}
|
||||
self.rule_text = ''
|
||||
self.all_targets = []
|
||||
self.default_targets = []
|
||||
self.clean_files = []
|
||||
self.distclean_files = []
|
||||
self.output.write("""all::
|
||||
|
||||
ifndef VERBOSE
|
||||
Verb = @
|
||||
endif
|
||||
|
||||
""")
|
||||
|
||||
def output_filename(self):
|
||||
return 'Makefile'
|
||||
|
||||
def rule(self, name, command, description=None, depfile=None,
|
||||
generator=False):
|
||||
self.rules[name] = {'command': command, 'description': description,
|
||||
'depfile': depfile, 'generator': generator}
|
||||
|
||||
def build(self, output, rule, inputs=[], implicit=[], order_only=[]):
|
||||
inputs = self._as_list(inputs)
|
||||
implicit = self._as_list(implicit)
|
||||
order_only = self._as_list(order_only)
|
||||
|
||||
output_dir = os.path.dirname(output)
|
||||
if output_dir != '' and not os.path.isdir(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
dollar_in = ' '.join(inputs)
|
||||
subst = lambda text: text.replace('$in', dollar_in).replace('$out', output)
|
||||
|
||||
deps = ' '.join(inputs + implicit)
|
||||
if order_only:
|
||||
deps += ' | '
|
||||
deps += ' '.join(order_only)
|
||||
self.output.write('%s: %s\n' % (output, deps))
|
||||
|
||||
r = self.rules[rule]
|
||||
command = subst(r['command'])
|
||||
if r['description']:
|
||||
desc = subst(r['description'])
|
||||
self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command))
|
||||
else:
|
||||
self.output.write('\t%s\n' % command)
|
||||
if r['depfile']:
|
||||
depfile = subst(r['depfile'])
|
||||
self.output.write('-include '+depfile+'\n')
|
||||
self.output.write('\n')
|
||||
|
||||
self.all_targets.append(output)
|
||||
if r['generator']:
|
||||
self.distclean_files.append(output)
|
||||
if r['depfile']:
|
||||
self.distclean_files.append(depfile)
|
||||
else:
|
||||
self.clean_files.append(output)
|
||||
if r['depfile']:
|
||||
self.distclean_files.append(depfile)
|
||||
|
||||
|
||||
def _as_list(self, input):
|
||||
if isinstance(input, list):
|
||||
return input
|
||||
return [input]
|
||||
|
||||
def default(self, paths):
|
||||
self.default_targets += self._as_list(paths)
|
||||
|
||||
def finish(self):
|
||||
self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets))
|
||||
self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
|
||||
self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
|
||||
|
||||
class Ninja(ninja_syntax.Writer):
|
||||
def __init__(self):
|
||||
ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w'))
|
||||
|
||||
def output_filename(self):
|
||||
return 'build.ninja'
|
||||
|
||||
def finish(self):
|
||||
pass
|
||||
|
||||
def from_name(name):
|
||||
if name == 'make':
|
||||
return Make()
|
||||
if name == 'ninja':
|
||||
return Ninja()
|
||||
raise LookupError('unknown generator: %s; supported generators are make and ninja' % name)
|
||||
@@ -1,118 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
"""Python module for generating .ninja files.
|
||||
|
||||
Note that this is emphatically not a required piece of Ninja; it's
|
||||
just a helpful utility for build-file-generation systems that already
|
||||
use Python.
|
||||
"""
|
||||
|
||||
import textwrap
|
||||
import re
|
||||
|
||||
class Writer(object):
|
||||
def __init__(self, output, width=78):
|
||||
self.output = output
|
||||
self.width = width
|
||||
|
||||
def newline(self):
|
||||
self.output.write('\n')
|
||||
|
||||
def comment(self, text):
|
||||
for line in textwrap.wrap(text, self.width - 2):
|
||||
self.output.write('# ' + line + '\n')
|
||||
|
||||
def variable(self, key, value, indent=0):
|
||||
if value is None:
|
||||
return
|
||||
if isinstance(value, list):
|
||||
value = ' '.join(value)
|
||||
self._line('%s = %s' % (key, value), indent)
|
||||
|
||||
def rule(self, name, command, description=None, depfile=None,
|
||||
generator=False):
|
||||
self._line('rule %s' % name)
|
||||
self.variable('command', escape(command), indent=1)
|
||||
if description:
|
||||
self.variable('description', description, indent=1)
|
||||
if depfile:
|
||||
self.variable('depfile', depfile, indent=1)
|
||||
if generator:
|
||||
self.variable('generator', '1', indent=1)
|
||||
|
||||
def build(self, outputs, rule, inputs=None, implicit=None, order_only=None,
|
||||
variables=None):
|
||||
outputs = self._as_list(outputs)
|
||||
all_inputs = self._as_list(inputs)[:]
|
||||
|
||||
if implicit:
|
||||
all_inputs.append('|')
|
||||
all_inputs.extend(self._as_list(implicit))
|
||||
if order_only:
|
||||
all_inputs.append('||')
|
||||
all_inputs.extend(self._as_list(order_only))
|
||||
|
||||
self._line('build %s: %s %s' % (' '.join(outputs),
|
||||
rule,
|
||||
' '.join(all_inputs)))
|
||||
|
||||
if variables:
|
||||
for key, val in variables:
|
||||
self.variable(key, val, indent=1)
|
||||
|
||||
return outputs
|
||||
|
||||
def include(self, path):
|
||||
self._line('include %s' % path)
|
||||
|
||||
def subninja(self, path):
|
||||
self._line('subninja %s' % path)
|
||||
|
||||
def default(self, paths):
|
||||
self._line('default %s' % ' '.join(self._as_list(paths)))
|
||||
|
||||
def _line(self, text, indent=0):
|
||||
"""Write 'text' word-wrapped at self.width characters."""
|
||||
leading_space = ' ' * indent
|
||||
while len(text) > self.width:
|
||||
# The text is too wide; wrap if possible.
|
||||
|
||||
# Find the rightmost space that would obey our width constraint.
|
||||
available_space = self.width - len(leading_space) - len(' $')
|
||||
space = text.rfind(' ', 0, available_space)
|
||||
if space < 0:
|
||||
# No such space; just use the first space we can find.
|
||||
space = text.find(' ', available_space)
|
||||
if space < 0:
|
||||
# Give up on breaking.
|
||||
break
|
||||
|
||||
self.output.write(leading_space + text[0:space] + ' $\n')
|
||||
text = text[space+1:]
|
||||
|
||||
# Subsequent lines are continuations, so indent them.
|
||||
leading_space = ' ' * (indent+2)
|
||||
|
||||
self.output.write(leading_space + text + '\n')
|
||||
|
||||
def _as_list(self, input):
|
||||
if input is None:
|
||||
return []
|
||||
if isinstance(input, list):
|
||||
return input
|
||||
return [input]
|
||||
|
||||
|
||||
def escape(string):
|
||||
"""Escape a string such that Makefile and shell variables are
|
||||
correctly escaped for use in a Ninja file.
|
||||
"""
|
||||
assert '\n' not in string, 'Ninja syntax does not allow newlines'
|
||||
# We only have one special metacharacter: '$'.
|
||||
|
||||
# We should leave $in and $out untouched.
|
||||
# Just look for makefile/shell style substitutions
|
||||
return re.sub(r'(\$[{(][a-z_]+[})])',
|
||||
r'$\1',
|
||||
string,
|
||||
flags=re.IGNORECASE)
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
FILE=$1
|
||||
if [ ! -f $FILE ]; then
|
||||
echo "ERROR: Not a file: $FILE"
|
||||
exit 3
|
||||
fi
|
||||
ret=0
|
||||
if [ "x$LLVM_CONFIG" = "x" ]; then
|
||||
LLVM_CONFIG=llvm-config
|
||||
echo 'WARNING: $LLVM_CONFIG not set, falling back to $PATH llvm-config'
|
||||
ret=2
|
||||
fi
|
||||
|
||||
|
||||
BIN_DIR=$($LLVM_CONFIG --bindir)
|
||||
DIS="$BIN_DIR/llvm-dis"
|
||||
if [ ! -x $DIS ]; then
|
||||
echo "ERROR: Disassembler '$DIS' is not executable"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
TMP_FILE=$(mktemp)
|
||||
|
||||
# Check for calls. Calls to llvm intrinsics are OK
|
||||
$DIS < $FILE | grep ' call ' | grep -v '@llvm' > "$TMP_FILE"
|
||||
COUNT=$(wc -l < "$TMP_FILE")
|
||||
|
||||
if [ "$COUNT" -ne "0" ]; then
|
||||
echo "ERROR: $COUNT unresolved calls detected in $FILE"
|
||||
cat $TMP_FILE
|
||||
ret=1
|
||||
else
|
||||
echo "File $FILE is OK"
|
||||
fi
|
||||
exit $ret
|
||||
@@ -1,3 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
clang -target nvptx--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang nvptx--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers -Dcl_khr_fp64 "$@"
|
||||
@@ -1,308 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
from __future__ import print_function
|
||||
|
||||
def c_compiler_rule(b, name, description, compiler, flags):
|
||||
command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags)
|
||||
b.rule(name, command, description + " $out", depfile="$out.d")
|
||||
|
||||
version_major = 0;
|
||||
version_minor = 2;
|
||||
version_patch = 0;
|
||||
|
||||
from optparse import OptionParser
|
||||
import os
|
||||
import string
|
||||
from subprocess import *
|
||||
import sys
|
||||
|
||||
srcdir = os.path.dirname(sys.argv[0])
|
||||
|
||||
sys.path.insert(0, os.path.join(srcdir, 'build'))
|
||||
import metabuild
|
||||
|
||||
p = OptionParser()
|
||||
p.add_option('--with-llvm-config', metavar='PATH',
|
||||
help='use given llvm-config script')
|
||||
p.add_option('--with-cxx-compiler', metavar='PATH',
|
||||
help='use given C++ compiler')
|
||||
p.add_option('--prefix', metavar='PATH',
|
||||
help='install to given prefix')
|
||||
p.add_option('--libexecdir', metavar='PATH',
|
||||
help='install *.bc to given dir')
|
||||
p.add_option('--includedir', metavar='PATH',
|
||||
help='install include files to given dir')
|
||||
p.add_option('--pkgconfigdir', metavar='PATH',
|
||||
help='install clc.pc to given dir')
|
||||
p.add_option('-g', metavar='GENERATOR', default='make',
|
||||
help='use given generator (default: make)')
|
||||
p.add_option('--enable-runtime-subnormal', action="store_true", default=False,
|
||||
help='Allow runtimes to choose subnormal support')
|
||||
(options, args) = p.parse_args()
|
||||
|
||||
llvm_config_exe = options.with_llvm_config or "llvm-config"
|
||||
|
||||
prefix = options.prefix
|
||||
if not prefix:
|
||||
prefix = '/usr/local'
|
||||
|
||||
libexecdir = options.libexecdir
|
||||
if not libexecdir:
|
||||
libexecdir = os.path.join(prefix, 'lib/clc')
|
||||
|
||||
includedir = options.includedir
|
||||
if not includedir:
|
||||
includedir = os.path.join(prefix, 'include')
|
||||
|
||||
pkgconfigdir = options.pkgconfigdir
|
||||
if not pkgconfigdir:
|
||||
pkgconfigdir = os.path.join(prefix, 'share/pkgconfig')
|
||||
|
||||
def llvm_config(args):
|
||||
try:
|
||||
# Universal newlines translate different newline formats to '\n'
|
||||
# it also force the input to be string instead of bytes in python 3
|
||||
proc = Popen([llvm_config_exe] + args, stdout=PIPE, universal_newlines=True)
|
||||
return proc.communicate()[0].rstrip().replace('\n', ' ')
|
||||
except OSError:
|
||||
print("Error executing llvm-config.")
|
||||
print("Please ensure that llvm-config is in your $PATH, or use --with-llvm-config.")
|
||||
sys.exit(1)
|
||||
|
||||
llvm_version = llvm_config(['--version']).replace('svn', '').split('.')
|
||||
llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10
|
||||
llvm_string_version = llvm_version[0] + '.' + llvm_version[1]
|
||||
|
||||
if llvm_int_version < 390:
|
||||
print("libclc requires LLVM >= 3.9")
|
||||
sys.exit(1)
|
||||
|
||||
llvm_system_libs = llvm_config(['--system-libs'])
|
||||
llvm_bindir = llvm_config(['--bindir'])
|
||||
llvm_core_libs = llvm_config(['--libs', 'core', 'bitreader', 'bitwriter']) + ' ' + \
|
||||
llvm_system_libs + ' ' + \
|
||||
llvm_config(['--ldflags'])
|
||||
llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti ' + \
|
||||
'-DHAVE_LLVM=0x{:0=4}'.format(llvm_int_version)
|
||||
llvm_libdir = llvm_config(['--libdir'])
|
||||
|
||||
llvm_clang = os.path.join(llvm_bindir, 'clang')
|
||||
llvm_link = os.path.join(llvm_bindir, 'llvm-link')
|
||||
llvm_opt = os.path.join(llvm_bindir, 'opt')
|
||||
|
||||
cxx_compiler = options.with_cxx_compiler
|
||||
if not cxx_compiler:
|
||||
cxx_compiler = os.path.join(llvm_bindir, 'clang++')
|
||||
|
||||
available_targets = {
|
||||
'r600--' : { 'devices' :
|
||||
[{'gpu' : 'cedar', 'aliases' : ['palm', 'sumo', 'sumo2', 'redwood', 'juniper']},
|
||||
{'gpu' : 'cypress', 'aliases' : ['hemlock'] },
|
||||
{'gpu' : 'barts', 'aliases' : ['turks', 'caicos'] },
|
||||
{'gpu' : 'cayman', 'aliases' : ['aruba']} ]},
|
||||
'amdgcn--': { 'devices' :
|
||||
[{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'hainan', 'bonaire', 'kabini', 'kaveri', 'hawaii', 'mullins', 'tonga', 'iceland', 'carrizo', 'fiji', 'stoney', 'polaris10', 'polaris11', 'gfx900']} ]},
|
||||
'amdgcn--amdhsa': { 'devices' :
|
||||
[{'gpu' : '', 'aliases' : ['bonaire', 'kabini', 'kaveri', 'hawaii', 'mullins', 'tonga', 'iceland', 'carrizo', 'fiji', 'stoney', 'polaris10', 'polaris11', 'gfx900']} ]},
|
||||
'nvptx--' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
|
||||
'nvptx64--' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
|
||||
'nvptx--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
|
||||
'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []} ]},
|
||||
}
|
||||
|
||||
|
||||
default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--', 'amdgcn--amdhsa']
|
||||
|
||||
#mesa is using amdgcn-mesa-mesa3d since llvm-4.0
|
||||
if llvm_int_version > 390:
|
||||
available_targets['amdgcn-mesa-mesa3d'] = available_targets['amdgcn--']
|
||||
default_targets.append('amdgcn-mesa-mesa3d')
|
||||
|
||||
targets = args
|
||||
if not targets:
|
||||
targets = default_targets
|
||||
|
||||
b = metabuild.from_name(options.g)
|
||||
|
||||
b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"),
|
||||
'LLVM-AS $out')
|
||||
b.rule("LLVM_LINK", command = llvm_link + " -o $out $in",
|
||||
description = 'LLVM-LINK $out')
|
||||
b.rule("OPT", command = llvm_opt + " -O3 -o $out $in",
|
||||
description = 'OPT $out')
|
||||
|
||||
c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', cxx_compiler, llvm_cxxflags)
|
||||
b.rule("LLVM_TOOL_LINK", cxx_compiler + " -o $out $in %s" % llvm_core_libs + " -Wl,-rpath %s" % llvm_libdir, 'LINK $out')
|
||||
|
||||
prepare_builtins = os.path.join('utils', 'prepare-builtins')
|
||||
b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX",
|
||||
os.path.join(srcdir, 'utils', 'prepare-builtins.cpp'))
|
||||
b.build(prepare_builtins, "LLVM_TOOL_LINK",
|
||||
os.path.join('utils', 'prepare-builtins.o'))
|
||||
|
||||
b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
|
||||
'PREPARE-BUILTINS $out')
|
||||
b.rule("PYTHON_GEN", "python < $in > $out", "PYTHON_GEN $out")
|
||||
b.build('generic/lib/convert.cl', "PYTHON_GEN", ['generic/lib/gen_convert.py'])
|
||||
|
||||
manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
|
||||
os.path.join(srcdir, 'build', 'ninja_syntax.py')])
|
||||
|
||||
install_files_bc = []
|
||||
install_deps = []
|
||||
|
||||
# Create rules for subnormal helper objects
|
||||
for src in ['subnormal_disable.ll', 'subnormal_use_default.ll']:
|
||||
obj_name = src[:-2] + 'bc'
|
||||
obj = os.path.join('generic--', 'lib', obj_name)
|
||||
src_file = os.path.join('generic', 'lib', src)
|
||||
b.build(obj, 'LLVM_AS', src_file)
|
||||
b.default(obj)
|
||||
install_files_bc.append((obj, obj))
|
||||
install_deps.append(obj)
|
||||
|
||||
# Create libclc.pc
|
||||
clc = open('libclc.pc', 'w')
|
||||
clc.write('includedir=%(inc)s\nlibexecdir=%(lib)s\n\nName: libclc\nDescription: Library requirements of the OpenCL C programming language\nVersion: %(maj)s.%(min)s.%(pat)s\nCflags: -I${includedir}\nLibs: -L${libexecdir}' %
|
||||
{'inc': includedir, 'lib': libexecdir, 'maj': version_major, 'min': version_minor, 'pat': version_patch})
|
||||
clc.close()
|
||||
|
||||
for target in targets:
|
||||
(t_arch, t_vendor, t_os) = target.split('-')
|
||||
archs = [t_arch]
|
||||
if t_arch == 'nvptx' or t_arch == 'nvptx64':
|
||||
archs.append('ptx')
|
||||
archs.append('generic')
|
||||
|
||||
subdirs = []
|
||||
for arch in archs:
|
||||
subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
|
||||
subdirs.append("%s-%s" % (arch, t_os))
|
||||
if t_os == 'mesa3d':
|
||||
subdirs.append('amdgcn-amdhsa')
|
||||
subdirs.append(arch)
|
||||
if arch == 'amdgcn' or arch == 'r600':
|
||||
subdirs.append('amdgpu')
|
||||
|
||||
incdirs = filter(os.path.isdir,
|
||||
[os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
|
||||
libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')) or
|
||||
os.path.isfile(os.path.join(d, 'SOURCES_' + llvm_string_version)),
|
||||
[os.path.join(srcdir, subdir, 'lib') for subdir in subdirs])
|
||||
|
||||
# The above are iterables in python3 but we might use them multiple times
|
||||
# if more then one device is supported.
|
||||
incdirs = list(incdirs)
|
||||
libdirs = list(libdirs)
|
||||
clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs])
|
||||
|
||||
for device in available_targets[target]['devices']:
|
||||
# The rule for building a .bc file for the specified architecture using clang.
|
||||
clang_bc_flags = "-target %s -I`dirname $in` %s " \
|
||||
"-fno-builtin " \
|
||||
"-D__CLC_INTERNAL " \
|
||||
"-emit-llvm" % (target, clang_cl_includes)
|
||||
if device['gpu'] != '':
|
||||
clang_bc_flags += ' -mcpu=' + device['gpu']
|
||||
clang_bc_rule = "CLANG_CL_BC_" + target + "_" + device['gpu']
|
||||
c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags)
|
||||
|
||||
objects = []
|
||||
sources_seen = set()
|
||||
compats = []
|
||||
|
||||
if device['gpu'] == '':
|
||||
full_target_name = target
|
||||
obj_suffix = ''
|
||||
else:
|
||||
full_target_name = device['gpu'] + '-' + target
|
||||
obj_suffix = '.' + device['gpu']
|
||||
|
||||
for libdir in libdirs:
|
||||
subdir_list_file = os.path.join(libdir, 'SOURCES')
|
||||
if os.path.exists(subdir_list_file):
|
||||
manifest_deps.add(subdir_list_file)
|
||||
override_list_file = os.path.join(libdir, 'OVERRIDES')
|
||||
compat_list_file = os.path.join(libdir,
|
||||
'SOURCES_' + llvm_string_version)
|
||||
compat_list_override = os.path.join(libdir,
|
||||
'OVERRIDES_' + llvm_string_version)
|
||||
|
||||
# Build compat list
|
||||
if os.path.exists(compat_list_file):
|
||||
manifest_deps.add(compat_list_file)
|
||||
for compat in open(compat_list_file).readlines():
|
||||
compat = compat.rstrip()
|
||||
compats.append(compat)
|
||||
|
||||
# Add target compat overrides
|
||||
if os.path.exists(compat_list_override):
|
||||
for override in open(compat_list_override).readlines():
|
||||
override = override.rstrip()
|
||||
sources_seen.add(override)
|
||||
|
||||
# Add target overrides
|
||||
if os.path.exists(override_list_file):
|
||||
for override in open(override_list_file).readlines():
|
||||
override = override.rstrip()
|
||||
sources_seen.add(override)
|
||||
|
||||
files = open(subdir_list_file).readlines() if os.path.exists(subdir_list_file) else []
|
||||
for src in files + compats:
|
||||
src = src.rstrip()
|
||||
if src not in sources_seen:
|
||||
sources_seen.add(src)
|
||||
obj = os.path.join(target, 'lib', src + obj_suffix + '.bc')
|
||||
objects.append(obj)
|
||||
src_path = libdir
|
||||
src_file = os.path.join(src_path, src)
|
||||
ext = os.path.splitext(src)[1]
|
||||
if ext == '.ll':
|
||||
b.build(obj, 'LLVM_AS', src_file)
|
||||
else:
|
||||
b.build(obj, clang_bc_rule, src_file)
|
||||
|
||||
obj = os.path.join('generic--', 'lib', 'subnormal_use_default.bc')
|
||||
if not options.enable_runtime_subnormal:
|
||||
objects.append(obj)
|
||||
|
||||
builtins_link_bc = os.path.join(target, 'lib', 'builtins.link' + obj_suffix + '.bc')
|
||||
builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt' + obj_suffix + '.bc')
|
||||
builtins_bc = os.path.join('built_libs', full_target_name + '.bc')
|
||||
b.build(builtins_link_bc, "LLVM_LINK", objects)
|
||||
b.build(builtins_opt_bc, "OPT", builtins_link_bc)
|
||||
b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
|
||||
install_files_bc.append((builtins_bc, builtins_bc))
|
||||
install_deps.append(builtins_bc)
|
||||
for alias in device['aliases']:
|
||||
# Ninja cannot have multiple rules with same name so append suffix
|
||||
ruleName = "CREATE_ALIAS_{0}_for_{1}".format(alias, device['gpu'])
|
||||
b.rule(ruleName, "ln -fs %s $out" % os.path.basename(builtins_bc)
|
||||
,"CREATE-ALIAS $out")
|
||||
|
||||
alias_file = os.path.join('built_libs', alias + '-' + target + '.bc')
|
||||
b.build(alias_file, ruleName, builtins_bc)
|
||||
install_files_bc.append((alias_file, alias_file))
|
||||
install_deps.append(alias_file)
|
||||
b.default(builtins_bc)
|
||||
|
||||
|
||||
install_cmd = ' && '.join(['mkdir -p ${DESTDIR}/%(dst)s && cp -r %(src)s ${DESTDIR}/%(dst)s' %
|
||||
{'src': file,
|
||||
'dst': libexecdir}
|
||||
for (file, dest) in install_files_bc])
|
||||
install_cmd = ' && '.join(['%(old)s && mkdir -p ${DESTDIR}/%(dst)s && cp -r %(srcdir)s/generic/include/clc ${DESTDIR}/%(dst)s' %
|
||||
{'old': install_cmd,
|
||||
'dst': includedir,
|
||||
'srcdir': srcdir}])
|
||||
install_cmd = ' && '.join(['%(old)s && mkdir -p ${DESTDIR}/%(dst)s && cp -r libclc.pc ${DESTDIR}/%(dst)s' %
|
||||
{'old': install_cmd,
|
||||
'dst': pkgconfigdir}])
|
||||
|
||||
b.rule('install', command = install_cmd, description = 'INSTALL')
|
||||
b.build('install', 'install', install_deps)
|
||||
|
||||
b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
|
||||
generator = True)
|
||||
b.build(b.output_filename(), 'configure', list(manifest_deps))
|
||||
|
||||
b.finish()
|
||||
@@ -1,77 +0,0 @@
|
||||
#define as_char(x) __builtin_astype(x, char)
|
||||
#define as_uchar(x) __builtin_astype(x, uchar)
|
||||
#define as_short(x) __builtin_astype(x, short)
|
||||
#define as_ushort(x) __builtin_astype(x, ushort)
|
||||
#define as_int(x) __builtin_astype(x, int)
|
||||
#define as_uint(x) __builtin_astype(x, uint)
|
||||
#define as_long(x) __builtin_astype(x, long)
|
||||
#define as_ulong(x) __builtin_astype(x, ulong)
|
||||
#define as_float(x) __builtin_astype(x, float)
|
||||
|
||||
#define as_char2(x) __builtin_astype(x, char2)
|
||||
#define as_uchar2(x) __builtin_astype(x, uchar2)
|
||||
#define as_short2(x) __builtin_astype(x, short2)
|
||||
#define as_ushort2(x) __builtin_astype(x, ushort2)
|
||||
#define as_int2(x) __builtin_astype(x, int2)
|
||||
#define as_uint2(x) __builtin_astype(x, uint2)
|
||||
#define as_long2(x) __builtin_astype(x, long2)
|
||||
#define as_ulong2(x) __builtin_astype(x, ulong2)
|
||||
#define as_float2(x) __builtin_astype(x, float2)
|
||||
|
||||
#define as_char3(x) __builtin_astype(x, char3)
|
||||
#define as_uchar3(x) __builtin_astype(x, uchar3)
|
||||
#define as_short3(x) __builtin_astype(x, short3)
|
||||
#define as_ushort3(x) __builtin_astype(x, ushort3)
|
||||
#define as_int3(x) __builtin_astype(x, int3)
|
||||
#define as_uint3(x) __builtin_astype(x, uint3)
|
||||
#define as_long3(x) __builtin_astype(x, long3)
|
||||
#define as_ulong3(x) __builtin_astype(x, ulong3)
|
||||
#define as_float3(x) __builtin_astype(x, float3)
|
||||
|
||||
#define as_char4(x) __builtin_astype(x, char4)
|
||||
#define as_uchar4(x) __builtin_astype(x, uchar4)
|
||||
#define as_short4(x) __builtin_astype(x, short4)
|
||||
#define as_ushort4(x) __builtin_astype(x, ushort4)
|
||||
#define as_int4(x) __builtin_astype(x, int4)
|
||||
#define as_uint4(x) __builtin_astype(x, uint4)
|
||||
#define as_long4(x) __builtin_astype(x, long4)
|
||||
#define as_ulong4(x) __builtin_astype(x, ulong4)
|
||||
#define as_float4(x) __builtin_astype(x, float4)
|
||||
|
||||
#define as_char8(x) __builtin_astype(x, char8)
|
||||
#define as_uchar8(x) __builtin_astype(x, uchar8)
|
||||
#define as_short8(x) __builtin_astype(x, short8)
|
||||
#define as_ushort8(x) __builtin_astype(x, ushort8)
|
||||
#define as_int8(x) __builtin_astype(x, int8)
|
||||
#define as_uint8(x) __builtin_astype(x, uint8)
|
||||
#define as_long8(x) __builtin_astype(x, long8)
|
||||
#define as_ulong8(x) __builtin_astype(x, ulong8)
|
||||
#define as_float8(x) __builtin_astype(x, float8)
|
||||
|
||||
#define as_char16(x) __builtin_astype(x, char16)
|
||||
#define as_uchar16(x) __builtin_astype(x, uchar16)
|
||||
#define as_short16(x) __builtin_astype(x, short16)
|
||||
#define as_ushort16(x) __builtin_astype(x, ushort16)
|
||||
#define as_int16(x) __builtin_astype(x, int16)
|
||||
#define as_uint16(x) __builtin_astype(x, uint16)
|
||||
#define as_long16(x) __builtin_astype(x, long16)
|
||||
#define as_ulong16(x) __builtin_astype(x, ulong16)
|
||||
#define as_float16(x) __builtin_astype(x, float16)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#define as_double(x) __builtin_astype(x, double)
|
||||
#define as_double2(x) __builtin_astype(x, double2)
|
||||
#define as_double3(x) __builtin_astype(x, double3)
|
||||
#define as_double4(x) __builtin_astype(x, double4)
|
||||
#define as_double8(x) __builtin_astype(x, double8)
|
||||
#define as_double16(x) __builtin_astype(x, double16)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#define as_half(x) __builtin_astype(x, half)
|
||||
#define as_half2(x) __builtin_astype(x, half2)
|
||||
#define as_half3(x) __builtin_astype(x, half3)
|
||||
#define as_half4(x) __builtin_astype(x, half4)
|
||||
#define as_half8(x) __builtin_astype(x, half8)
|
||||
#define as_half16(x) __builtin_astype(x, half16)
|
||||
#endif
|
||||
@@ -1,15 +0,0 @@
|
||||
#define __CLC_DST_ADDR_SPACE local
|
||||
#define __CLC_SRC_ADDR_SPACE global
|
||||
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_DST_ADDR_SPACE
|
||||
#undef __CLC_SRC_ADDR_SPACE
|
||||
#undef __CLC_BODY
|
||||
|
||||
#define __CLC_DST_ADDR_SPACE global
|
||||
#define __CLC_SRC_ADDR_SPACE local
|
||||
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_DST_ADDR_SPACE
|
||||
#undef __CLC_SRC_ADDR_SPACE
|
||||
#undef __CLC_BODY
|
||||
@@ -1,5 +0,0 @@
|
||||
_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(
|
||||
__CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
|
||||
const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
|
||||
size_t num_gentypes,
|
||||
event_t event);
|
||||
@@ -1,15 +0,0 @@
|
||||
#define __CLC_DST_ADDR_SPACE local
|
||||
#define __CLC_SRC_ADDR_SPACE global
|
||||
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_DST_ADDR_SPACE
|
||||
#undef __CLC_SRC_ADDR_SPACE
|
||||
#undef __CLC_BODY
|
||||
|
||||
#define __CLC_DST_ADDR_SPACE global
|
||||
#define __CLC_SRC_ADDR_SPACE local
|
||||
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_DST_ADDR_SPACE
|
||||
#undef __CLC_SRC_ADDR_SPACE
|
||||
#undef __CLC_BODY
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user