[libc][NFC] Remove all trailing spaces from libc (#82831)
Summary: There are a lot of random training spaces on various lines. This patch just got rid of all of them with `sed 's/\ \+$//g'.
This commit is contained in:
@@ -49,7 +49,7 @@ __memchr_arm:
|
||||
and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
|
||||
|
||||
cmp r2,#16 @ If it's short don't bother with anything clever
|
||||
blt 20f
|
||||
blt 20f
|
||||
|
||||
tst r0, #7 @ If it's already aligned skip the next bit
|
||||
beq 10f
|
||||
@@ -62,7 +62,7 @@ __memchr_arm:
|
||||
beq 50f @ If it matches exit found
|
||||
tst r0, #7
|
||||
bne 5b @ If not aligned yet then do next byte
|
||||
|
||||
|
||||
10:
|
||||
@ At this point, we are aligned, we know we have at least 8 bytes to work with
|
||||
push {r4,r5,r6,r7}
|
||||
@@ -71,7 +71,7 @@ __memchr_arm:
|
||||
bic r4, r2, #7 @ Number of double words to work with
|
||||
mvns r7, #0 @ all F's
|
||||
movs r3, #0
|
||||
|
||||
|
||||
15:
|
||||
ldmia r0!,{r5,r6}
|
||||
subs r4, r4, #8
|
||||
@@ -87,7 +87,7 @@ __memchr_arm:
|
||||
pop {r4,r5,r6,r7}
|
||||
and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
|
||||
and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
|
||||
|
||||
|
||||
20:
|
||||
cbz r2, 40f @ 0 length or hit the end already then not found
|
||||
|
||||
|
||||
@@ -162,7 +162,7 @@ elseif(LIBC_CMAKE_VERBOSE_LOGGING)
|
||||
message(STATUS "Path for config files is: ${LIBC_CONFIG_PATH}")
|
||||
endif()
|
||||
|
||||
# option(LIBC_ENABLE_WIDE_CHARACTERS
|
||||
# option(LIBC_ENABLE_WIDE_CHARACTERS
|
||||
# "Whether to enable wide character functions on supported platforms. This may
|
||||
# also set flags to enable or disable wide character support within other
|
||||
# functions (e.g. printf)." ON)
|
||||
|
||||
@@ -17,7 +17,7 @@ git clone https://github.com/Z3Prover/z3.git
|
||||
python scripts/mk_make.py --prefix=<Z3_INSTALL_DIR>
|
||||
cd build
|
||||
make -j
|
||||
make install
|
||||
make install
|
||||
```
|
||||
|
||||
## Configuration
|
||||
@@ -68,7 +68,7 @@ Make sure to save the results of the benchmark as a json file.
|
||||
By default, each function is benchmarked for at least one second, here we lower it to 200ms.
|
||||
|
||||
- `--benchmark_filter="BM_Memset|BM_Bzero"`
|
||||
|
||||
|
||||
By default, all functions are benchmarked, here we restrict them to `memset` and `bzero`.
|
||||
|
||||
Other options might be useful, use `--help` for more information.
|
||||
|
||||
@@ -57,7 +57,7 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
|
||||
if(${feature} STREQUAL "float128")
|
||||
set(LIBC_COMPILER_HAS_FLOAT128 TRUE)
|
||||
elseif(${feature} STREQUAL "fixed_point")
|
||||
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
|
||||
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -6,7 +6,7 @@ function(_get_compile_options_from_flags output_var)
|
||||
endif()
|
||||
check_flag(ADD_SSE4_2_FLAG ${ROUND_OPT_FLAG} ${flags})
|
||||
check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${flags})
|
||||
|
||||
|
||||
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
|
||||
if(ADD_FMA_FLAG)
|
||||
if(LIBC_TARGET_ARCHITECTURE_IS_X86)
|
||||
|
||||
@@ -125,7 +125,7 @@ function(add_gpu_entrypoint_library target_name base_target_name)
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
|
||||
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
|
||||
"${prefix},file=$<JOIN:${object},,file=>" -o
|
||||
"${prefix},file=$<JOIN:${object},,file=>" -o
|
||||
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
|
||||
DEPENDS ${dep} ${base_target_name}
|
||||
COMMENT "Packaging LLVM offloading binary for '${object}'"
|
||||
@@ -142,7 +142,7 @@ function(add_gpu_entrypoint_library target_name base_target_name)
|
||||
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
|
||||
DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
|
||||
)
|
||||
add_custom_target(${dep}.__stub__
|
||||
add_custom_target(${dep}.__stub__
|
||||
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
|
||||
|
||||
add_library(${dep}.__fatbin__
|
||||
@@ -151,9 +151,9 @@ function(add_gpu_entrypoint_library target_name base_target_name)
|
||||
)
|
||||
|
||||
# This is always compiled for the LLVM host triple instead of the native GPU
|
||||
# triple that is used by default in the build.
|
||||
# triple that is used by default in the build.
|
||||
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
|
||||
target_compile_options(${dep}.__fatbin__ PRIVATE
|
||||
target_compile_options(${dep}.__fatbin__ PRIVATE
|
||||
--target=${LLVM_HOST_TRIPLE}
|
||||
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
|
||||
add_dependencies(${dep}.__fatbin__
|
||||
|
||||
@@ -284,7 +284,7 @@ function(create_entrypoint_object fq_target_name)
|
||||
|
||||
# The NVPTX target cannot use LTO for the internal targets used for testing.
|
||||
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
|
||||
target_compile_options(${internal_target_name} PRIVATE
|
||||
target_compile_options(${internal_target_name} PRIVATE
|
||||
-fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE})
|
||||
endif()
|
||||
|
||||
|
||||
@@ -318,8 +318,8 @@ function(add_libc_fuzzer target_name)
|
||||
target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR})
|
||||
target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR})
|
||||
|
||||
target_link_libraries(${fq_target_name} PRIVATE
|
||||
${link_object_files}
|
||||
target_link_libraries(${fq_target_name} PRIVATE
|
||||
${link_object_files}
|
||||
${LIBC_FUZZER_LINK_LIBRARIES}
|
||||
)
|
||||
|
||||
@@ -352,7 +352,7 @@ endif()
|
||||
# system libc are linked in to the final executable. The final exe is fully
|
||||
# statically linked. The libc that the final exe links to consists of only
|
||||
# the object files of the DEPENDS targets.
|
||||
#
|
||||
#
|
||||
# Usage:
|
||||
# add_integration_test(
|
||||
# <target name>
|
||||
@@ -462,7 +462,7 @@ function(add_integration_test test_name)
|
||||
target_compile_options(${fq_build_target_name} PRIVATE ${compile_options})
|
||||
|
||||
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
${LIBC_COMPILE_OPTIONS_DEFAULT}
|
||||
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
|
||||
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
|
||||
@@ -470,7 +470,7 @@ function(add_integration_test test_name)
|
||||
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
|
||||
# We need to use the internal object versions for NVPTX.
|
||||
set(internal_suffix ".__internal__")
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
${LIBC_COMPILE_OPTIONS_DEFAULT}
|
||||
"-Wl,--suppress-stack-size-warning"
|
||||
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
|
||||
@@ -645,7 +645,7 @@ function(add_libc_hermetic_test test_name)
|
||||
endforeach()
|
||||
|
||||
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
${LIBC_COMPILE_OPTIONS_DEFAULT}
|
||||
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
|
||||
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
|
||||
@@ -653,7 +653,7 @@ function(add_libc_hermetic_test test_name)
|
||||
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
|
||||
# We need to use the internal object versions for NVPTX.
|
||||
set(internal_suffix ".__internal__")
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
target_link_options(${fq_build_target_name} PRIVATE
|
||||
${LIBC_COMPILE_OPTIONS_DEFAULT}
|
||||
"-Wl,--suppress-stack-size-warning"
|
||||
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
|
||||
|
||||
@@ -16,7 +16,7 @@ set(TARGET_LIBC_ENTRYPOINTS
|
||||
libc.src.ctype.toascii
|
||||
libc.src.ctype.tolower
|
||||
libc.src.ctype.toupper
|
||||
|
||||
|
||||
# string.h entrypoints
|
||||
libc.src.string.bcmp
|
||||
libc.src.string.bzero
|
||||
|
||||
@@ -9,12 +9,12 @@ try_compile(
|
||||
)
|
||||
|
||||
if(NOT has_sys_random)
|
||||
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
|
||||
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
|
||||
libc.src.sys.stat.stat
|
||||
)
|
||||
# If we're doing a fullbuild we provide the random header ourselves.
|
||||
if(NOT LLVM_LIBC_FULL_BUILD)
|
||||
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
|
||||
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
|
||||
libc.src.sys.random.getrandom
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -116,7 +116,7 @@ set(TARGET_LIBM_ENTRYPOINTS
|
||||
libc.src.math.acoshf
|
||||
libc.src.math.asinf
|
||||
libc.src.math.asinhf
|
||||
libc.src.math.atanf
|
||||
libc.src.math.atanf
|
||||
libc.src.math.atanhf
|
||||
libc.src.math.copysign
|
||||
libc.src.math.copysignf
|
||||
|
||||
@@ -29,7 +29,7 @@ a list of open projects that one can start with:
|
||||
#. One is about adding CMake facilities to optionally link the libc's overlay
|
||||
static archive (see :ref:`overlay_mode`) with other LLVM tools/executables.
|
||||
#. The other is about putting plumbing in place to release the overlay static
|
||||
archive (see :ref:`overlay_mode`) as part of the LLVM binary releases.
|
||||
archive (see :ref:`overlay_mode`) as part of the LLVM binary releases.
|
||||
|
||||
#. **Implement Linux syscall wrappers** - A large portion of the POSIX API can
|
||||
be implemented as syscall wrappers on Linux. A good number have already been
|
||||
|
||||
@@ -26,7 +26,7 @@ Implementation Status
|
||||
|
||||
* To check date and time functions enabled for Windows:
|
||||
|
||||
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
|
||||
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
|
||||
|
||||
- windows-aarch64 - to be added
|
||||
|
||||
@@ -38,11 +38,11 @@ Implementation Status
|
||||
|
||||
* To check date and time functions enabled for GPU:
|
||||
|
||||
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
|
||||
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
|
||||
|
||||
* To check date and time functions enabled for embedded system:
|
||||
|
||||
- `barebone-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
|
||||
- `barebone-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
|
||||
|
||||
- barebone-riscv32 - to be added
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ a public header with non-namespaced functions like ``string.h`` is included.
|
||||
This check ensures any function call resolves to a function within the
|
||||
LIBC_NAMESPACE namespace.
|
||||
|
||||
There are exceptions for the following functions:
|
||||
There are exceptions for the following functions:
|
||||
``__errno_location`` so that ``errno`` can be set;
|
||||
``malloc``, ``calloc``, ``realloc``, ``aligned_alloc``, and ``free`` since they
|
||||
are always external and can be intercepted.
|
||||
|
||||
@@ -66,7 +66,7 @@ example, the option-dictionary is:
|
||||
{
|
||||
"LIBC_CONF_PRINTF_DISABLE_FLOAT": {
|
||||
"value": false,
|
||||
"doc":
|
||||
"doc":
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
Printf Behavior Under All Conditions
|
||||
====================================
|
||||
|
||||
Introduction:
|
||||
Introduction:
|
||||
=============
|
||||
On the "defining undefined behavior" page, I said you should write down your
|
||||
decisions regarding undefined behavior in your functions. This is that document
|
||||
@@ -102,7 +102,7 @@ behavior.
|
||||
LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
|
||||
-------------------------------------------------
|
||||
When set, the float to string decimal conversion algorithm will use a larger
|
||||
table to accelerate long double conversions. This larger table is around 5MB of
|
||||
table to accelerate long double conversions. This larger table is around 5MB of
|
||||
size when compiled.
|
||||
|
||||
LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT
|
||||
|
||||
@@ -87,7 +87,7 @@ After configuring the build with the above ``cmake`` command, one can build the
|
||||
the libc for the target with the following command:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
|
||||
$> ninja libc libm
|
||||
|
||||
The above ``ninja`` command will build the libc static archives ``libc.a`` and
|
||||
|
||||
@@ -11,14 +11,14 @@ Motivation and Limitations
|
||||
Motivation
|
||||
==========
|
||||
|
||||
This project aims to provide a large subset of the C standard library to users
|
||||
of GPU accelerators. We deliberately choose to only implement a subset of the C
|
||||
library as some features are not expressly useful or easily implemented on the
|
||||
GPU. This will be discussed further in `Limitations <libc_gpu_limitations>`_.
|
||||
The main motivation behind this project is to provide the well understood C
|
||||
This project aims to provide a large subset of the C standard library to users
|
||||
of GPU accelerators. We deliberately choose to only implement a subset of the C
|
||||
library as some features are not expressly useful or easily implemented on the
|
||||
GPU. This will be discussed further in `Limitations <libc_gpu_limitations>`_.
|
||||
The main motivation behind this project is to provide the well understood C
|
||||
library as a firm base for GPU development.
|
||||
|
||||
The main idea behind this project is that programming GPUs can be as
|
||||
The main idea behind this project is that programming GPUs can be as
|
||||
straightforward as programming on CPUs. This project aims to validate the GPU as
|
||||
a more general-purpose target. The implementations here will also enable more
|
||||
complex implementations of other libraries on the GPU, such as ``libc++``.
|
||||
@@ -31,10 +31,10 @@ toolchain. We also aim to provide these functions in a format compatible with
|
||||
offloading in ``Clang`` so that we can treat the C library for the GPU as a
|
||||
standard static library.
|
||||
|
||||
A valuable use for providing C library features on the GPU is for testing. For
|
||||
this reason we build `tests on the GPU <libc_gpu_testing>`_ that can run a unit
|
||||
test as if it were being run on the CPU. This also helps users port applications
|
||||
that traditionally were run on the CPU. With this support, we can expand test
|
||||
A valuable use for providing C library features on the GPU is for testing. For
|
||||
this reason we build `tests on the GPU <libc_gpu_testing>`_ that can run a unit
|
||||
test as if it were being run on the CPU. This also helps users port applications
|
||||
that traditionally were run on the CPU. With this support, we can expand test
|
||||
coverage for the GPU backend to the existing LLVM C library tests.
|
||||
|
||||
.. _libc_gpu_limitations:
|
||||
@@ -43,9 +43,9 @@ Limitations
|
||||
===========
|
||||
|
||||
We only implement a subset of the standard C library. The GPU does not
|
||||
currently support thread local variables in all cases, so variables like
|
||||
``errno`` are not provided. Furthermore, the GPU under the OpenCL execution
|
||||
model cannot safely provide a mutex interface. This means that features like
|
||||
file buffering are not implemented on the GPU. We can also not easily provide
|
||||
threading features on the GPU due to the execution model so these will be
|
||||
currently support thread local variables in all cases, so variables like
|
||||
``errno`` are not provided. Furthermore, the GPU under the OpenCL execution
|
||||
model cannot safely provide a mutex interface. This means that features like
|
||||
file buffering are not implemented on the GPU. We can also not easily provide
|
||||
threading features on the GPU due to the execution model so these will be
|
||||
ignored, as will features like ``locale`` or ``time``.
|
||||
|
||||
@@ -11,62 +11,62 @@ Remote Procedure Calls
|
||||
Remote Procedure Call Implementation
|
||||
====================================
|
||||
|
||||
Traditionally, the C library abstracts over several functions that interface
|
||||
with the platform's operating system through system calls. The GPU however does
|
||||
Traditionally, the C library abstracts over several functions that interface
|
||||
with the platform's operating system through system calls. The GPU however does
|
||||
not provide an operating system that can handle target dependent operations.
|
||||
Instead, we implemented remote procedure calls to interface with the host's
|
||||
Instead, we implemented remote procedure calls to interface with the host's
|
||||
operating system while executing on a GPU.
|
||||
|
||||
We implemented remote procedure calls using unified virtual memory to create a
|
||||
shared communicate channel between the two processes. This memory is often
|
||||
pinned memory that can be accessed asynchronously and atomically by multiple
|
||||
processes simultaneously. This supports means that we can simply provide mutual
|
||||
exclusion on a shared better to swap work back and forth between the host system
|
||||
and the GPU. We can then use this to create a simple client-server protocol
|
||||
We implemented remote procedure calls using unified virtual memory to create a
|
||||
shared communicate channel between the two processes. This memory is often
|
||||
pinned memory that can be accessed asynchronously and atomically by multiple
|
||||
processes simultaneously. This supports means that we can simply provide mutual
|
||||
exclusion on a shared better to swap work back and forth between the host system
|
||||
and the GPU. We can then use this to create a simple client-server protocol
|
||||
using this shared memory.
|
||||
|
||||
This work treats the GPU as a client and the host as a server. The client
|
||||
initiates a communication while the server listens for them. In order to
|
||||
communicate between the host and the device, we simply maintain a buffer of
|
||||
memory and two mailboxes. One mailbox is write-only while the other is
|
||||
read-only. This exposes three primitive operations: using the buffer, giving
|
||||
away ownership, and waiting for ownership. This is implemented as a half-duplex
|
||||
transmission channel between the two sides. We decided to assign ownership of
|
||||
the buffer to the client when the inbox and outbox bits are equal and to the
|
||||
This work treats the GPU as a client and the host as a server. The client
|
||||
initiates a communication while the server listens for them. In order to
|
||||
communicate between the host and the device, we simply maintain a buffer of
|
||||
memory and two mailboxes. One mailbox is write-only while the other is
|
||||
read-only. This exposes three primitive operations: using the buffer, giving
|
||||
away ownership, and waiting for ownership. This is implemented as a half-duplex
|
||||
transmission channel between the two sides. We decided to assign ownership of
|
||||
the buffer to the client when the inbox and outbox bits are equal and to the
|
||||
server when they are not.
|
||||
|
||||
In order to make this transmission channel thread-safe, we abstract ownership of
|
||||
the given mailbox pair and buffer around a port, effectively acting as a lock
|
||||
and an index into the allocated buffer slice. The server and device have
|
||||
independent locks around the given port. In this scheme, the buffer can be used
|
||||
to communicate intent and data generically with the server. We them simply
|
||||
In order to make this transmission channel thread-safe, we abstract ownership of
|
||||
the given mailbox pair and buffer around a port, effectively acting as a lock
|
||||
and an index into the allocated buffer slice. The server and device have
|
||||
independent locks around the given port. In this scheme, the buffer can be used
|
||||
to communicate intent and data generically with the server. We them simply
|
||||
provide multiple copies of this protocol and expose them as multiple ports.
|
||||
|
||||
If this were simply a standard CPU system, this would be sufficient. However,
|
||||
GPUs have my unique architectural challenges. First, GPU threads execute in
|
||||
lock-step with each other in groups typically called warps or wavefronts. We
|
||||
need to target the smallest unit of independent parallelism, so the RPC
|
||||
interface needs to handle an entire group of threads at once. This is done by
|
||||
increasing the size of the buffer and adding a thread mask argument so the
|
||||
server knows which threads are active when it handles the communication. Second,
|
||||
GPUs generally have no forward progress guarantees. In order to guarantee we do
|
||||
not encounter deadlocks while executing it is required that the number of ports
|
||||
matches the maximum amount of hardware parallelism on the device. It is also
|
||||
very important that the thread mask remains consistent while interfacing with
|
||||
If this were simply a standard CPU system, this would be sufficient. However,
|
||||
GPUs have my unique architectural challenges. First, GPU threads execute in
|
||||
lock-step with each other in groups typically called warps or wavefronts. We
|
||||
need to target the smallest unit of independent parallelism, so the RPC
|
||||
interface needs to handle an entire group of threads at once. This is done by
|
||||
increasing the size of the buffer and adding a thread mask argument so the
|
||||
server knows which threads are active when it handles the communication. Second,
|
||||
GPUs generally have no forward progress guarantees. In order to guarantee we do
|
||||
not encounter deadlocks while executing it is required that the number of ports
|
||||
matches the maximum amount of hardware parallelism on the device. It is also
|
||||
very important that the thread mask remains consistent while interfacing with
|
||||
the port.
|
||||
|
||||
.. image:: ./rpc-diagram.svg
|
||||
:width: 75%
|
||||
:align: center
|
||||
|
||||
The above diagram outlines the architecture of the RPC interface. For clarity
|
||||
the following list will explain the operations done by the client and server
|
||||
The above diagram outlines the architecture of the RPC interface. For clarity
|
||||
the following list will explain the operations done by the client and server
|
||||
respectively when initiating a communication.
|
||||
|
||||
First, a communication from the perspective of the client:
|
||||
|
||||
* The client searches for an available port and claims the lock.
|
||||
* The client checks that the port is still available to the current device and
|
||||
* The client checks that the port is still available to the current device and
|
||||
continues if so.
|
||||
* The client writes its data to the fixed-size packet and toggles its outbox.
|
||||
* The client waits until its inbox matches its outbox.
|
||||
@@ -75,51 +75,51 @@ First, a communication from the perspective of the client:
|
||||
|
||||
Now, the same communication from the perspective of the server:
|
||||
|
||||
* The server searches for an available port with pending work and claims the
|
||||
* The server searches for an available port with pending work and claims the
|
||||
lock.
|
||||
* The server checks that the port is still available to the current device.
|
||||
* The server reads the opcode to perform the expected operation, in this
|
||||
* The server reads the opcode to perform the expected operation, in this
|
||||
case a receive and then send.
|
||||
* The server reads the data from the fixed-size packet.
|
||||
* The server writes its data to the fixed-size packet and toggles its outbox.
|
||||
* The server closes the port and continues searching for ports that need to be
|
||||
* The server closes the port and continues searching for ports that need to be
|
||||
serviced
|
||||
|
||||
This architecture currently requires that the host periodically checks the RPC
|
||||
server's buffer for ports with pending work. Note that a port can be closed
|
||||
without waiting for its submitted work to be completed. This allows us to model
|
||||
asynchronous operations that do not need to wait until the server has completed
|
||||
them. If an operation requires more data than the fixed size buffer, we simply
|
||||
This architecture currently requires that the host periodically checks the RPC
|
||||
server's buffer for ports with pending work. Note that a port can be closed
|
||||
without waiting for its submitted work to be completed. This allows us to model
|
||||
asynchronous operations that do not need to wait until the server has completed
|
||||
them. If an operation requires more data than the fixed size buffer, we simply
|
||||
send multiple packets back and forth in a streaming fashion.
|
||||
|
||||
Server Library
|
||||
--------------
|
||||
|
||||
The RPC server's basic functionality is provided by the LLVM C library. A static
|
||||
library called ``libllvmlibc_rpc_server.a`` includes handling for the basic
|
||||
operations, such as printing or exiting. This has a small API that handles
|
||||
The RPC server's basic functionality is provided by the LLVM C library. A static
|
||||
library called ``libllvmlibc_rpc_server.a`` includes handling for the basic
|
||||
operations, such as printing or exiting. This has a small API that handles
|
||||
setting up the unified buffer and an interface to check the opcodes.
|
||||
|
||||
Some operations are too divergent to provide generic implementations for, such
|
||||
as allocating device accessible memory. For these cases, we provide a callback
|
||||
registration scheme to add a custom handler for any given opcode through the
|
||||
port API. More information can be found in the installed header
|
||||
Some operations are too divergent to provide generic implementations for, such
|
||||
as allocating device accessible memory. For these cases, we provide a callback
|
||||
registration scheme to add a custom handler for any given opcode through the
|
||||
port API. More information can be found in the installed header
|
||||
``<install>/include/llvmlibc_rpc_server.h``.
|
||||
|
||||
Client Example
|
||||
--------------
|
||||
|
||||
The Client API is not currently exported by the LLVM C library. This is
|
||||
primarily due to being written in C++ and relying on internal data structures.
|
||||
It uses a simple send and receive interface with a fixed-size packet. The
|
||||
following example uses the RPC interface to call a function pointer on the
|
||||
The Client API is not currently exported by the LLVM C library. This is
|
||||
primarily due to being written in C++ and relying on internal data structures.
|
||||
It uses a simple send and receive interface with a fixed-size packet. The
|
||||
following example uses the RPC interface to call a function pointer on the
|
||||
server.
|
||||
|
||||
This code first opens a port with the given opcode to facilitate the
|
||||
communication. It then copies over the argument struct to the server using the
|
||||
``send_n`` interface to stream arbitrary bytes. The next send operation provides
|
||||
the server with the function pointer that will be executed. The final receive
|
||||
operation is a no-op and simply forces the client to wait until the server is
|
||||
This code first opens a port with the given opcode to facilitate the
|
||||
communication. It then copies over the argument struct to the server using the
|
||||
``send_n`` interface to stream arbitrary bytes. The next send operation provides
|
||||
the server with the function pointer that will be executed. The final receive
|
||||
operation is a no-op and simply forces the client to wait until the server is
|
||||
done. It can be omitted if asynchronous execution is desired.
|
||||
|
||||
.. code-block:: c++
|
||||
@@ -137,23 +137,23 @@ done. It can be omitted if asynchronous execution is desired.
|
||||
Server Example
|
||||
--------------
|
||||
|
||||
This example shows the server-side handling of the previous client example. When
|
||||
the server is checked, if there are any ports with pending work it will check
|
||||
the opcode and perform the appropriate action. In this case, the action is to
|
||||
This example shows the server-side handling of the previous client example. When
|
||||
the server is checked, if there are any ports with pending work it will check
|
||||
the opcode and perform the appropriate action. In this case, the action is to
|
||||
call a function pointer provided by the client.
|
||||
|
||||
In this example, the server simply runs forever in a separate thread for
|
||||
brevity's sake. Because the client is a GPU potentially handling several threads
|
||||
at once, the server needs to loop over all the active threads on the GPU. We
|
||||
abstract this into the ``lane_size`` variable, which is simply the device's warp
|
||||
or wavefront size. The identifier is simply the threads index into the current
|
||||
warp or wavefront. We allocate memory to copy the struct data into, and then
|
||||
call the given function pointer with that copied data. The final send simply
|
||||
signals completion and uses the implicit thread mask to delete the temporary
|
||||
In this example, the server simply runs forever in a separate thread for
|
||||
brevity's sake. Because the client is a GPU potentially handling several threads
|
||||
at once, the server needs to loop over all the active threads on the GPU. We
|
||||
abstract this into the ``lane_size`` variable, which is simply the device's warp
|
||||
or wavefront size. The identifier is simply the threads index into the current
|
||||
warp or wavefront. We allocate memory to copy the struct data into, and then
|
||||
call the given function pointer with that copied data. The final send simply
|
||||
signals completion and uses the implicit thread mask to delete the temporary
|
||||
data.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
|
||||
for(;;) {
|
||||
auto port = server.try_open(index);
|
||||
if (!port)
|
||||
@@ -181,11 +181,11 @@ data.
|
||||
CUDA Server Example
|
||||
-------------------
|
||||
|
||||
The following code shows an example of using the exported RPC interface along
|
||||
with the C library to manually configure a working server using the CUDA
|
||||
language. Other runtimes can use the presence of the ``__llvm_libc_rpc_client``
|
||||
in the GPU executable as an indicator for whether or not the server can be
|
||||
checked. These details should ideally be handled by the GPU language runtime,
|
||||
The following code shows an example of using the exported RPC interface along
|
||||
with the C library to manually configure a working server using the CUDA
|
||||
language. Other runtimes can use the presence of the ``__llvm_libc_rpc_client``
|
||||
in the GPU executable as an indicator for whether or not the server can be
|
||||
checked. These details should ideally be handled by the GPU language runtime,
|
||||
but the following example shows how it can be used by a standard user.
|
||||
|
||||
.. code-block:: cuda
|
||||
@@ -193,30 +193,30 @@ but the following example shows how it can be used by a standard user.
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
|
||||
#include <llvmlibc_rpc_server.h>
|
||||
|
||||
|
||||
[[noreturn]] void handle_error(cudaError_t err) {
|
||||
fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(err));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
[[noreturn]] void handle_error(rpc_status_t err) {
|
||||
fprintf(stderr, "RPC error: %d\n", err);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
// The handle to the RPC client provided by the C library.
|
||||
extern "C" __device__ void *__llvm_libc_rpc_client;
|
||||
|
||||
|
||||
__global__ void get_client_ptr(void **ptr) { *ptr = __llvm_libc_rpc_client; }
|
||||
|
||||
|
||||
// Obtain the RPC client's handle from the device. The CUDA language cannot look
|
||||
// up the symbol directly like the driver API, so we launch a kernel to read it.
|
||||
void *get_rpc_client() {
|
||||
void *rpc_client = nullptr;
|
||||
void **rpc_client_d = nullptr;
|
||||
|
||||
|
||||
if (cudaError_t err = cudaMalloc(&rpc_client_d, sizeof(void *)))
|
||||
handle_error(err);
|
||||
get_client_ptr<<<1, 1>>>(rpc_client_d);
|
||||
@@ -227,7 +227,7 @@ but the following example shows how it can be used by a standard user.
|
||||
handle_error(err);
|
||||
return rpc_client;
|
||||
}
|
||||
|
||||
|
||||
// Routines to allocate mapped memory that both the host and the device can
|
||||
// access asychonrously to communicate with eachother.
|
||||
void *alloc_host(size_t size, void *) {
|
||||
@@ -236,64 +236,64 @@ but the following example shows how it can be used by a standard user.
|
||||
handle_error(err);
|
||||
return sharable_ptr;
|
||||
};
|
||||
|
||||
|
||||
void free_host(void *ptr, void *) {
|
||||
if (cudaError_t err = cudaFreeHost(ptr))
|
||||
handle_error(err);
|
||||
}
|
||||
|
||||
|
||||
// The device-side overload of the standard C function to call.
|
||||
extern "C" __device__ int puts(const char *);
|
||||
|
||||
|
||||
// Calls the C library function from the GPU C library.
|
||||
__global__ void hello() { puts("Hello world!"); }
|
||||
|
||||
|
||||
int main() {
|
||||
int device = 0;
|
||||
// Initialize the RPC server to run on a single device.
|
||||
if (rpc_status_t err = rpc_init(/*num_device=*/1))
|
||||
handle_error(err);
|
||||
|
||||
|
||||
// Initialize the RPC server to run on the given device.
|
||||
if (rpc_status_t err =
|
||||
rpc_server_init(device, RPC_MAXIMUM_PORT_COUNT,
|
||||
/*warp_size=*/32, alloc_host, /*data=*/nullptr))
|
||||
handle_error(err);
|
||||
|
||||
|
||||
// Initialize the RPC client by copying the buffer to the device's handle.
|
||||
void *rpc_client = get_rpc_client();
|
||||
if (cudaError_t err =
|
||||
cudaMemcpy(rpc_client, rpc_get_client_buffer(device),
|
||||
rpc_get_client_size(), cudaMemcpyHostToDevice))
|
||||
handle_error(err);
|
||||
|
||||
|
||||
cudaStream_t stream;
|
||||
if (cudaError_t err = cudaStreamCreate(&stream))
|
||||
handle_error(err);
|
||||
|
||||
|
||||
// Execute the kernel.
|
||||
hello<<<1, 1, 0, stream>>>();
|
||||
|
||||
|
||||
// While the kernel is executing, check the RPC server for work to do.
|
||||
while (cudaStreamQuery(stream) == cudaErrorNotReady)
|
||||
if (rpc_status_t err = rpc_handle_server(device))
|
||||
handle_error(err);
|
||||
|
||||
|
||||
// Shut down the server running on the given device.
|
||||
if (rpc_status_t err =
|
||||
rpc_server_shutdown(device, free_host, /*data=*/nullptr))
|
||||
handle_error(err);
|
||||
|
||||
|
||||
// Shut down the entire RPC server interface.
|
||||
if (rpc_status_t err = rpc_shutdown())
|
||||
handle_error(err);
|
||||
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
The above code must be compiled in CUDA's relocatable device code mode and with
|
||||
the advanced offloading driver to link in the library. Currently this can be
|
||||
done with the following invocation. Using LTO avoids the overhead normally
|
||||
The above code must be compiled in CUDA's relocatable device code mode and with
|
||||
the advanced offloading driver to link in the library. Currently this can be
|
||||
done with the following invocation. Using LTO avoids the overhead normally
|
||||
associated with relocatable device code linking.
|
||||
|
||||
.. code-block:: sh
|
||||
@@ -307,6 +307,6 @@ associated with relocatable device code linking.
|
||||
Extensions
|
||||
----------
|
||||
|
||||
We describe which operation the RPC server should take with a 16-bit opcode. We
|
||||
consider the first 32768 numbers to be reserved while the others are free to
|
||||
We describe which operation the RPC server should take with a 16-bit opcode. We
|
||||
consider the first 32768 numbers to be reserved while the others are free to
|
||||
use.
|
||||
|
||||
@@ -18,8 +18,8 @@ Testing Infrastructure
|
||||
======================
|
||||
|
||||
The testing support in LLVM's libc implementation for GPUs is designed to mimic
|
||||
the standard unit tests as much as possible. We use the :ref:`libc_gpu_rpc`
|
||||
support to provide the necessary utilities like printing from the GPU. Execution
|
||||
the standard unit tests as much as possible. We use the :ref:`libc_gpu_rpc`
|
||||
support to provide the necessary utilities like printing from the GPU. Execution
|
||||
is performed by emitting a ``_start`` kernel from the GPU
|
||||
that is then called by an external loader utility. This is an example of how
|
||||
this can be done manually:
|
||||
|
||||
@@ -14,11 +14,11 @@ Building the GPU library
|
||||
|
||||
LLVM's libc GPU support *must* be built with an up-to-date ``clang`` compiler
|
||||
due to heavy reliance on ``clang``'s GPU support. This can be done automatically
|
||||
using the LLVM runtimes support. The GPU build is done using cross-compilation
|
||||
to the GPU architecture. This project currently supports AMD and NVIDIA GPUs
|
||||
which can be targeted using the appropriate target name. The following
|
||||
invocation will enable a cross-compiling build for the GPU architecture and
|
||||
enable the ``libc`` project only for them.
|
||||
using the LLVM runtimes support. The GPU build is done using cross-compilation
|
||||
to the GPU architecture. This project currently supports AMD and NVIDIA GPUs
|
||||
which can be targeted using the appropriate target name. The following
|
||||
invocation will enable a cross-compiling build for the GPU architecture and
|
||||
enable the ``libc`` project only for them.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
@@ -29,8 +29,8 @@ enable the ``libc`` project only for them.
|
||||
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
|
||||
-DLLVM_ENABLE_RUNTIMES="openmp" \
|
||||
-DCMAKE_BUILD_TYPE=<Debug|Release> \ # Select build type
|
||||
-DCMAKE_INSTALL_PREFIX=<PATH> \ # Where 'libcgpu.a' will live
|
||||
-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=libc \
|
||||
-DCMAKE_INSTALL_PREFIX=<PATH> \ # Where 'libcgpu.a' will live
|
||||
-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=libc \
|
||||
-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=libc \
|
||||
-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda
|
||||
$> ninja install
|
||||
@@ -40,8 +40,8 @@ toolchain, we list them in ``LLVM_ENABLE_PROJECTS``. To ensure ``libc`` is built
|
||||
using a compatible compiler and to support ``openmp`` offloading, we list them
|
||||
in ``LLVM_ENABLE_RUNTIMES`` to build them after the enabled projects using the
|
||||
newly built compiler. ``CMAKE_INSTALL_PREFIX`` specifies the installation
|
||||
directory in which to install the ``libcgpu-nvptx.a`` and ``libcgpu-amdgpu.a``
|
||||
libraries and headers along with LLVM. The generated headers will be placed in
|
||||
directory in which to install the ``libcgpu-nvptx.a`` and ``libcgpu-amdgpu.a``
|
||||
libraries and headers along with LLVM. The generated headers will be placed in
|
||||
``include/<gpu-triple>``.
|
||||
|
||||
Usage
|
||||
|
||||
@@ -29,7 +29,7 @@ Type Name Available
|
||||
============================ =========
|
||||
ACTION |check|
|
||||
ENTRY |check|
|
||||
VISIT
|
||||
VISIT
|
||||
============================ =========
|
||||
|
||||
POSIX Standard Functions
|
||||
@@ -42,8 +42,8 @@ hcreate |check|
|
||||
hdestroy |check|
|
||||
hsearch |check|
|
||||
insque |check|
|
||||
lfind
|
||||
lsearch
|
||||
lfind
|
||||
lsearch
|
||||
remque |check|
|
||||
tdelete
|
||||
tfind
|
||||
|
||||
@@ -81,7 +81,7 @@ Implementation Status
|
||||
|
||||
* To check math functions enabled for Windows:
|
||||
|
||||
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
|
||||
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
|
||||
|
||||
- windows-aarch64 - to be added
|
||||
|
||||
@@ -93,11 +93,11 @@ Implementation Status
|
||||
|
||||
* To check math functions enabled for GPU:
|
||||
|
||||
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
|
||||
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
|
||||
|
||||
* To check math functions enabled for embedded system:
|
||||
|
||||
- `baremetal-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
|
||||
- `baremetal-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
|
||||
|
||||
- baremetal-riscv32 - to be added
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ Hence we have the following bound on `s`:
|
||||
In order for `s` to exist, we need that:
|
||||
|
||||
.. math::
|
||||
\frac{C - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}} >
|
||||
\frac{C - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}} >
|
||||
\frac{-C - k 2^{-M}}{1 + k 2^{-M}}
|
||||
|
||||
which is equivalent to:
|
||||
@@ -135,7 +135,7 @@ side of `\text{(C1)}` is bounded by:
|
||||
|
||||
.. math::
|
||||
2^{-M - 1} > \frac{2^{-M - 1}}{1 + (2k + 1) 2^{-M - 1}} \geq
|
||||
\frac{2^{-M - 1}}{1 + (2^{M + 1} - 1) 2^{-M - 1}} > 2^{-M - 2}.
|
||||
\frac{2^{-M - 1}}{1 + (2^{M + 1} - 1) 2^{-M - 1}} > 2^{-M - 2}.
|
||||
|
||||
Hence, from `\text{(C1)}`, being an exact power of 2, `C = 2^{-N}` is bounded below
|
||||
by:
|
||||
@@ -427,7 +427,7 @@ to look-up for the reduction constant `s_{i, k}`. In other word, `k` is given
|
||||
by the formula:
|
||||
|
||||
.. math::
|
||||
k = \left\lfloor 2^{N_i + M_i} u_i \right\rfloor
|
||||
k = \left\lfloor 2^{N_i + M_i} u_i \right\rfloor
|
||||
|
||||
Notice that our reduction constant `s_{i, k}` must work for all `u_i` in the
|
||||
interval `I = \{ v: k 2^{-N_i - M_i} \leq v < (k + 1) 2^{-N_i - M_i} \}`,
|
||||
|
||||
@@ -30,7 +30,7 @@ If you are starting to bring up LLVM's libc on a new operating system, the first
|
||||
step is to add a directory for that OS in the ``libc/config`` directory. Both
|
||||
`Linux <https://github.com/llvm/llvm-project/tree/main/libc/config/linux>`_ and
|
||||
`Windows <https://github.com/llvm/llvm-project/tree/main/libc/config/windows>`_,
|
||||
the two operating systems on which LLVM's libc is being actively developed,
|
||||
the two operating systems on which LLVM's libc is being actively developed,
|
||||
have their own config directory.
|
||||
|
||||
.. note:: Windows development is not as active as the development on Linux.
|
||||
|
||||
@@ -61,7 +61,7 @@ funlockfile |check|
|
||||
Operations on system files
|
||||
==========================
|
||||
|
||||
These functions operate on files on the host's system, without using the
|
||||
These functions operate on files on the host's system, without using the
|
||||
``FILE`` object type. They only take the name of the file being operated on.
|
||||
|
||||
============= =========
|
||||
|
||||
@@ -36,7 +36,7 @@ Function Name Available
|
||||
============= =========
|
||||
bzero |check|
|
||||
bcmp |check|
|
||||
bcopy |check|
|
||||
bcopy |check|
|
||||
memcpy |check|
|
||||
memset |check|
|
||||
memcmp |check|
|
||||
@@ -99,14 +99,14 @@ These functions are not in strings.h, but are still primarily string
|
||||
functions, and are therefore tracked along with the rest of the string
|
||||
functions.
|
||||
|
||||
The String to float functions were implemented using the Eisel-Lemire algorithm
|
||||
The String to float functions were implemented using the Eisel-Lemire algorithm
|
||||
(read more about the algorithm here: `The Eisel-Lemire ParseNumberF64 Algorithm
|
||||
<https://nigeltao.github.io/blog/2020/eisel-lemire.html>`_). This improved
|
||||
the performance of string to float and double, and allowed it to complete this
|
||||
comprehensive test 15% faster than glibc: `Parse Number FXX Test Data
|
||||
<https://github.com/nigeltao/parse-number-fxx-test-data>`_. The test was done
|
||||
<https://github.com/nigeltao/parse-number-fxx-test-data>`_. The test was done
|
||||
with LLVM-libc built on 2022-04-14 and Debian GLibc version 2.33-6. The targets
|
||||
``libc_str_to_float_comparison_test`` and
|
||||
``libc_str_to_float_comparison_test`` and
|
||||
``libc_system_str_to_float_comparison_test`` were built and run on the test data
|
||||
10 times each, skipping the first run since it was an outlier.
|
||||
|
||||
@@ -142,7 +142,7 @@ strerror_r |check|
|
||||
Localized String Functions
|
||||
==========================
|
||||
|
||||
These functions require locale.h, and will be finished when locale support is
|
||||
These functions require locale.h, and will be finished when locale support is
|
||||
implemented in LLVM-libc.
|
||||
|
||||
============= =========
|
||||
@@ -160,7 +160,7 @@ Many String functions have an equivalent _s version, which is intended to be
|
||||
more secure and safe than the previous standard. These functions add runtime
|
||||
error detection and overflow protection. While they can be seen as an
|
||||
improvement, adoption remains relatively low among users. In addition, they are
|
||||
being considered for removal, see
|
||||
being considered for removal, see
|
||||
`Field Experience With Annex K — Bounds Checking Interfaces
|
||||
<http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1967.htm>`_. For these reasons,
|
||||
<http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1967.htm>`_. For these reasons,
|
||||
there is no ongoing work to implement them.
|
||||
|
||||
@@ -14,4 +14,4 @@
|
||||
|
||||
%%public_api()
|
||||
|
||||
#endif // LLVM_LIBC_FCNTL_H
|
||||
#endif // LLVM_LIBC_FCNTL_H
|
||||
|
||||
@@ -14,4 +14,4 @@
|
||||
|
||||
%%public_api()
|
||||
|
||||
#endif // LLVM_LIBC_SCHED_H
|
||||
#endif // LLVM_LIBC_SCHED_H
|
||||
|
||||
@@ -13,4 +13,4 @@
|
||||
|
||||
%%public_api()
|
||||
|
||||
#endif // LLVM_LIBC_SPAWN_H
|
||||
#endif // LLVM_LIBC_SPAWN_H
|
||||
|
||||
@@ -4,7 +4,7 @@ def BsdExtensions : StandardSpec<"BSDExtensions"> {
|
||||
[], // Macros
|
||||
[], // Types
|
||||
[], // Enumerations
|
||||
[
|
||||
[
|
||||
FunctionSpec<
|
||||
"strlcat",
|
||||
RetValSpec<SizeTType>,
|
||||
|
||||
@@ -101,8 +101,8 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
|
||||
FunctionSpec<
|
||||
"hcreate_r",
|
||||
RetValSpec<IntType>,
|
||||
[
|
||||
ArgSpec<SizeTType>,
|
||||
[
|
||||
ArgSpec<SizeTType>,
|
||||
ArgSpec<StructHsearchDataPtr>
|
||||
]
|
||||
>,
|
||||
@@ -117,7 +117,7 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
|
||||
"hsearch_r",
|
||||
RetValSpec<IntType>,
|
||||
[
|
||||
ArgSpec<EntryType>,
|
||||
ArgSpec<EntryType>,
|
||||
ArgSpec<ActionType>,
|
||||
ArgSpec<EntryTypePtrPtr>,
|
||||
ArgSpec<StructHsearchDataPtr>
|
||||
@@ -207,8 +207,8 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
|
||||
[], // Enumerations
|
||||
[
|
||||
FunctionSpec<
|
||||
"qsort_r",
|
||||
RetValSpec<VoidType>,
|
||||
"qsort_r",
|
||||
RetValSpec<VoidType>,
|
||||
[ArgSpec<VoidPtr>, ArgSpec<SizeTType>, ArgSpec<SizeTType>, ArgSpec<QSortRCompareT>, ArgSpec<VoidPtr>]
|
||||
>,
|
||||
]
|
||||
|
||||
@@ -36,7 +36,7 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> {
|
||||
>,
|
||||
]
|
||||
>;
|
||||
|
||||
|
||||
HeaderSpec Sched = HeaderSpec<
|
||||
"sched.h",
|
||||
[], // Macros
|
||||
|
||||
@@ -1319,7 +1319,7 @@ def POSIX : StandardSpec<"POSIX"> {
|
||||
"hsearch",
|
||||
RetValSpec<EntryTypePtr>,
|
||||
[
|
||||
ArgSpec<EntryType>,
|
||||
ArgSpec<EntryType>,
|
||||
ArgSpec<ActionType>
|
||||
]
|
||||
>,
|
||||
@@ -1339,7 +1339,7 @@ def POSIX : StandardSpec<"POSIX"> {
|
||||
]
|
||||
>,
|
||||
]
|
||||
>;
|
||||
>;
|
||||
|
||||
HeaderSpec Termios = HeaderSpec<
|
||||
"termios.h",
|
||||
|
||||
@@ -14,7 +14,7 @@ list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index)
|
||||
if (NOT ${getrandom_index} EQUAL -1)
|
||||
message(STATUS "Using getrandom for hashtable randomness")
|
||||
set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM)
|
||||
set(randomness_extra_depends
|
||||
set(randomness_extra_depends
|
||||
libc.src.sys.random.getrandom libc.src.errno.errno)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -1437,7 +1437,7 @@ add_entrypoint_object(
|
||||
../sqrtf128.h
|
||||
DEPENDS
|
||||
libc.src.__support.macros.properties.float
|
||||
libc.src.__support.FPUtil.sqrt
|
||||
libc.src.__support.FPUtil.sqrt
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
add_object_library(
|
||||
global
|
||||
SRCS
|
||||
global.cpp
|
||||
global.cpp
|
||||
HDRS
|
||||
global.h
|
||||
)
|
||||
|
||||
@@ -110,7 +110,7 @@ add_object_library(
|
||||
)
|
||||
|
||||
if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD)
|
||||
# Not all platforms have a file implementation. If file is unvailable, and a
|
||||
# Not all platforms have a file implementation. If file is unvailable, and a
|
||||
# full build is requested, then we must skip all file based printf sections.
|
||||
return()
|
||||
endif()
|
||||
|
||||
@@ -75,7 +75,7 @@ add_object_library(
|
||||
)
|
||||
|
||||
if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD)
|
||||
# Not all platforms have a file implementation. If file is unvailable, and a
|
||||
# Not all platforms have a file implementation. If file is unvailable, and a
|
||||
# full build is requested, then we must skip all file based printf sections.
|
||||
return()
|
||||
endif()
|
||||
|
||||
@@ -268,7 +268,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
|
||||
set(SCUDO_DEPS "")
|
||||
|
||||
include(${LIBC_SOURCE_DIR}/../compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake)
|
||||
|
||||
|
||||
# scudo distinguishes riscv32 and riscv64, so we need to translate the architecture
|
||||
set(LIBC_TARGET_ARCHITECTURE_FOR_SCUDO ${LIBC_TARGET_ARCHITECTURE})
|
||||
if(LIBC_TARGET_ARCHITECTURE_IS_RISCV64)
|
||||
@@ -278,7 +278,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
|
||||
endif()
|
||||
|
||||
if(NOT (LIBC_TARGET_ARCHITECTURE_FOR_SCUDO IN_LIST ALL_SCUDO_STANDALONE_SUPPORTED_ARCH))
|
||||
message(FATAL_ERROR "Architecture ${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO} is not supported by SCUDO.
|
||||
message(FATAL_ERROR "Architecture ${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO} is not supported by SCUDO.
|
||||
Either disable LLVM_LIBC_INCLUDE_SCUDO or change your target architecture.")
|
||||
endif()
|
||||
|
||||
@@ -290,7 +290,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
|
||||
RTGwpAsanBacktraceLibc.${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO}
|
||||
RTGwpAsanSegvHandler.${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO}
|
||||
)
|
||||
|
||||
|
||||
add_entrypoint_external(
|
||||
malloc
|
||||
DEPENDS
|
||||
|
||||
@@ -5,7 +5,7 @@ add_entrypoint_object(
|
||||
wctob.cpp
|
||||
HDRS
|
||||
wctob.h
|
||||
DEPENDS
|
||||
DEPENDS
|
||||
libc.include.stdio
|
||||
libc.include.wchar
|
||||
libc.src.__support.wctype_utils
|
||||
|
||||
@@ -37,13 +37,13 @@ function(merge_relocatable_object name)
|
||||
add_dependencies(${fq_name} ${relocatable_target})
|
||||
target_link_libraries(${fq_name} INTERFACE ${fq_link_libraries})
|
||||
set_target_properties(
|
||||
${fq_name}
|
||||
${fq_name}
|
||||
PROPERTIES
|
||||
LINKER_LANGUAGE CXX
|
||||
IMPORTED_OBJECTS ${CMAKE_CURRENT_BINARY_DIR}/${name}.o
|
||||
TARGET_TYPE ${OBJECT_LIBRARY_TARGET_TYPE}
|
||||
DEPS "${fq_link_libraries}"
|
||||
)
|
||||
)
|
||||
endfunction()
|
||||
|
||||
function(add_startup_object name)
|
||||
@@ -56,7 +56,7 @@ function(add_startup_object name)
|
||||
)
|
||||
|
||||
get_fq_target_name(${name} fq_target_name)
|
||||
|
||||
|
||||
add_object_library(
|
||||
${name}
|
||||
SRCS ${ADD_STARTUP_OBJECT_SRC}
|
||||
|
||||
@@ -54,5 +54,5 @@ target_link_libraries(libc-gwp-asan-uaf-should-crash
|
||||
add_custom_command(TARGET libc-scudo-integration-test
|
||||
POST_BUILD
|
||||
COMMAND $<TARGET_FILE:libc-scudo-integration-test>
|
||||
COMMENT "Run the test after it is built."
|
||||
COMMENT "Run the test after it is built."
|
||||
VERBATIM)
|
||||
|
||||
@@ -182,7 +182,7 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
|
||||
POST_BUILD
|
||||
COMMAND $<TARGET_FILE:libc_str_to_float_comparison_test> ${float_test_file}
|
||||
DEPENDS ${float_test_file}
|
||||
COMMENT "Test the strtof and strtod implementations against precomputed results."
|
||||
COMMENT "Test the strtof and strtod implementations against precomputed results."
|
||||
VERBATIM)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ add_libc_unittest(
|
||||
|
||||
if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
|
||||
OR (${LIBC_TARGET_OS} STREQUAL "darwin")))
|
||||
# Sanitizers don't like SIGFPE. So, we will run the
|
||||
# Sanitizers don't like SIGFPE. So, we will run the
|
||||
# tests which raise SIGFPE only in non-sanitizer builds.
|
||||
# The tests are also disabled for Windows and MacOS as they fail currently.
|
||||
# TODO: Investigate and fix the windows failures and enable them for Windows
|
||||
|
||||
@@ -61,7 +61,7 @@ function(add_diff_binary target_name)
|
||||
|
||||
set_target_properties(${fq_target_name}
|
||||
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
||||
if(DIFF_CXX_STANDARD)
|
||||
set_target_properties(
|
||||
${fq_target_name}
|
||||
|
||||
@@ -1164,7 +1164,7 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
|
||||
libc.src.math.fmaxl
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
)
|
||||
|
||||
|
||||
add_fp_unittest(
|
||||
fmaxf128_test
|
||||
SUITE
|
||||
|
||||
@@ -11,7 +11,7 @@ add_libc_unittest(
|
||||
SRCS
|
||||
testfilter_test.cpp
|
||||
DEPENDS
|
||||
# TODO(michaelrj): Remove this dependancy. It's only here because all unit
|
||||
# TODO(michaelrj): Remove this dependancy. It's only here because all unit
|
||||
# tests must have at least one dependancy.
|
||||
libc.src.__support.CPP.bit
|
||||
libc.src.__support.CPP.bit
|
||||
)
|
||||
|
||||
@@ -7,14 +7,14 @@ if(LIBC_TESTS_CAN_USE_MPFR)
|
||||
target_compile_options(libcMPFRWrapper PRIVATE -O3)
|
||||
if (LLVM_LIBC_FULL_BUILD)
|
||||
# It is not easy to make libcMPFRWrapper a standalone library because gmp.h may unconditionally
|
||||
# pull in some STL headers. As a result, targets using this library will need to link against
|
||||
# pull in some STL headers. As a result, targets using this library will need to link against
|
||||
# C++ and unwind libraries. Since we are using MPFR anyway, we directly specifies the GNU toolchain.
|
||||
target_link_libraries(libcMPFRWrapper PUBLIC -lstdc++ -lgcc_s)
|
||||
endif()
|
||||
add_dependencies(
|
||||
libcMPFRWrapper
|
||||
libc.src.__support.CPP.string_view
|
||||
libc.src.__support.CPP.type_traits
|
||||
libcMPFRWrapper
|
||||
libc.src.__support.CPP.string_view
|
||||
libc.src.__support.CPP.type_traits
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
libc.src.__support.FPUtil.fpbits_str
|
||||
LibcTest.unit
|
||||
|
||||
@@ -14,7 +14,7 @@ target_compile_definitions(llvmlibc_rpc_server PUBLIC
|
||||
|
||||
# This utility needs to be compiled for the host system when cross compiling.
|
||||
if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
|
||||
target_compile_options(llvmlibc_rpc_server PUBLIC
|
||||
target_compile_options(llvmlibc_rpc_server PUBLIC
|
||||
--target=${LLVM_HOST_TRIPLE})
|
||||
target_link_libraries(llvmlibc_rpc_server PUBLIC
|
||||
"--target=${LLVM_HOST_TRIPLE}")
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
from math import *
|
||||
|
||||
"""
|
||||
This script is used to generate a table used by
|
||||
This script is used to generate a table used by
|
||||
libc/src/__support/high_precision_decimal.h.
|
||||
|
||||
For the ith entry in the table there are two values (indexed starting at 0).
|
||||
The first value is the number of digits longer the second value would be if
|
||||
multiplied by 2^i.
|
||||
The second value is the smallest number that would create that number of
|
||||
additional digits (which in base ten is always 5^i). Anything less creates one
|
||||
The second value is the smallest number that would create that number of
|
||||
additional digits (which in base ten is always 5^i). Anything less creates one
|
||||
fewer digit.
|
||||
|
||||
As an example, the 3rd entry in the table is {1, "125"}. This means that if
|
||||
As an example, the 3rd entry in the table is {1, "125"}. This means that if
|
||||
125 is multiplied by 2^3 = 8, it will have exactly one more digit.
|
||||
Multiplying it out we get 125 * 8 = 1000. 125 is the smallest number that gives
|
||||
that extra digit, for example 124 * 8 = 992, and all larger 3 digit numbers
|
||||
@@ -19,17 +19,17 @@ also give only one extra digit when multiplied by 8, for example 8 * 999 = 7992.
|
||||
This makes sense because 5^3 * 2^3 = 10^3, the smallest 4 digit number.
|
||||
|
||||
For numbers with more digits we can ignore the digits past what's in the second
|
||||
value, since the most significant digits determine how many extra digits there
|
||||
will be. Looking at the previous example, if we have 1000, and we look at just
|
||||
the first 3 digits (since 125 has 3 digits), we see that 100 < 125, so we get
|
||||
one fewer than 1 extra digits, which is 0.
|
||||
Multiplying it out we get 1000 * 8 = 8000, which fits the expectation.
|
||||
Another few quick examples:
|
||||
value, since the most significant digits determine how many extra digits there
|
||||
will be. Looking at the previous example, if we have 1000, and we look at just
|
||||
the first 3 digits (since 125 has 3 digits), we see that 100 < 125, so we get
|
||||
one fewer than 1 extra digits, which is 0.
|
||||
Multiplying it out we get 1000 * 8 = 8000, which fits the expectation.
|
||||
Another few quick examples:
|
||||
For 1255, 125 !< 125, so 1 digit more: 1255 * 8 = 10040
|
||||
For 9999, 999 !< 125, so 1 digit more: 9999 * 8 = 79992
|
||||
|
||||
Now let's try an example with the 10th entry: {4, "9765625"}. This one means
|
||||
that 9765625 * 2^10 will have 4 extra digits.
|
||||
Now let's try an example with the 10th entry: {4, "9765625"}. This one means
|
||||
that 9765625 * 2^10 will have 4 extra digits.
|
||||
Let's skip straight to the examples:
|
||||
For 1, 1 < 9765625, so 4-1=3 extra digits: 1 * 2^10 = 1024, 1 digit to 4 digits is a difference of 3.
|
||||
For 9765624, 9765624 < 9765625 so 3 extra digits: 9765624 * 1024 = 9999998976, 7 digits to 10 digits is a difference of 3.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
This file is used to generate the tables of values in
|
||||
This file is used to generate the tables of values in
|
||||
src/__support/ryu_constants.h and ryu_long_double constants.h. To use it, set
|
||||
the constants at the top of the file to the values you want to use for the Ryu
|
||||
algorithm, then run this file. It will output the appropriate tables to stdout,
|
||||
|
||||
Reference in New Issue
Block a user