[libc][NFC] Remove all trailing spaces from libc (#82831)

Summary:
There are a lot of random training spaces on various lines. This patch
just got rid of all of them with `sed 's/\ \+$//g'.
This commit is contained in:
Joseph Huber
2024-02-23 16:34:00 -06:00
committed by GitHub
parent 25940956e6
commit 69c0b2febe
52 changed files with 229 additions and 229 deletions

View File

@@ -49,7 +49,7 @@ __memchr_arm:
and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
cmp r2,#16 @ If it's short don't bother with anything clever
blt 20f
blt 20f
tst r0, #7 @ If it's already aligned skip the next bit
beq 10f
@@ -62,7 +62,7 @@ __memchr_arm:
beq 50f @ If it matches exit found
tst r0, #7
bne 5b @ If not aligned yet then do next byte
10:
@ At this point, we are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
@@ -71,7 +71,7 @@ __memchr_arm:
bic r4, r2, #7 @ Number of double words to work with
mvns r7, #0 @ all F's
movs r3, #0
15:
ldmia r0!,{r5,r6}
subs r4, r4, #8
@@ -87,7 +87,7 @@ __memchr_arm:
pop {r4,r5,r6,r7}
and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
20:
cbz r2, 40f @ 0 length or hit the end already then not found

View File

@@ -162,7 +162,7 @@ elseif(LIBC_CMAKE_VERBOSE_LOGGING)
message(STATUS "Path for config files is: ${LIBC_CONFIG_PATH}")
endif()
# option(LIBC_ENABLE_WIDE_CHARACTERS
# option(LIBC_ENABLE_WIDE_CHARACTERS
# "Whether to enable wide character functions on supported platforms. This may
# also set flags to enable or disable wide character support within other
# functions (e.g. printf)." ON)

View File

@@ -17,7 +17,7 @@ git clone https://github.com/Z3Prover/z3.git
python scripts/mk_make.py --prefix=<Z3_INSTALL_DIR>
cd build
make -j
make install
make install
```
## Configuration
@@ -68,7 +68,7 @@ Make sure to save the results of the benchmark as a json file.
By default, each function is benchmarked for at least one second, here we lower it to 200ms.
- `--benchmark_filter="BM_Memset|BM_Bzero"`
By default, all functions are benchmarked, here we restrict them to `memset` and `bzero`.
Other options might be useful, use `--help` for more information.

View File

@@ -57,7 +57,7 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
if(${feature} STREQUAL "float128")
set(LIBC_COMPILER_HAS_FLOAT128 TRUE)
elseif(${feature} STREQUAL "fixed_point")
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
endif()
endif()
endforeach()

View File

@@ -6,7 +6,7 @@ function(_get_compile_options_from_flags output_var)
endif()
check_flag(ADD_SSE4_2_FLAG ${ROUND_OPT_FLAG} ${flags})
check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${flags})
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
if(ADD_FMA_FLAG)
if(LIBC_TARGET_ARCHITECTURE_IS_X86)

View File

@@ -125,7 +125,7 @@ function(add_gpu_entrypoint_library target_name base_target_name)
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
"${prefix},file=$<JOIN:${object},,file=>" -o
"${prefix},file=$<JOIN:${object},,file=>" -o
${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
DEPENDS ${dep} ${base_target_name}
COMMENT "Packaging LLVM offloading binary for '${object}'"
@@ -142,7 +142,7 @@ function(add_gpu_entrypoint_library target_name base_target_name)
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
)
add_custom_target(${dep}.__stub__
add_custom_target(${dep}.__stub__
DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
add_library(${dep}.__fatbin__
@@ -151,9 +151,9 @@ function(add_gpu_entrypoint_library target_name base_target_name)
)
# This is always compiled for the LLVM host triple instead of the native GPU
# triple that is used by default in the build.
# triple that is used by default in the build.
target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
target_compile_options(${dep}.__fatbin__ PRIVATE
target_compile_options(${dep}.__fatbin__ PRIVATE
--target=${LLVM_HOST_TRIPLE}
"SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
add_dependencies(${dep}.__fatbin__

View File

@@ -284,7 +284,7 @@ function(create_entrypoint_object fq_target_name)
# The NVPTX target cannot use LTO for the internal targets used for testing.
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
target_compile_options(${internal_target_name} PRIVATE
target_compile_options(${internal_target_name} PRIVATE
-fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE})
endif()

View File

@@ -318,8 +318,8 @@ function(add_libc_fuzzer target_name)
target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR})
target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR})
target_link_libraries(${fq_target_name} PRIVATE
${link_object_files}
target_link_libraries(${fq_target_name} PRIVATE
${link_object_files}
${LIBC_FUZZER_LINK_LIBRARIES}
)
@@ -352,7 +352,7 @@ endif()
# system libc are linked in to the final executable. The final exe is fully
# statically linked. The libc that the final exe links to consists of only
# the object files of the DEPENDS targets.
#
#
# Usage:
# add_integration_test(
# <target name>
@@ -462,7 +462,7 @@ function(add_integration_test test_name)
target_compile_options(${fq_build_target_name} PRIVATE ${compile_options})
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT}
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
@@ -470,7 +470,7 @@ function(add_integration_test test_name)
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
# We need to use the internal object versions for NVPTX.
set(internal_suffix ".__internal__")
target_link_options(${fq_build_target_name} PRIVATE
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT}
"-Wl,--suppress-stack-size-warning"
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
@@ -645,7 +645,7 @@ function(add_libc_hermetic_test test_name)
endforeach()
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT}
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
@@ -653,7 +653,7 @@ function(add_libc_hermetic_test test_name)
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
# We need to use the internal object versions for NVPTX.
set(internal_suffix ".__internal__")
target_link_options(${fq_build_target_name} PRIVATE
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT}
"-Wl,--suppress-stack-size-warning"
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static

View File

@@ -16,7 +16,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.ctype.toascii
libc.src.ctype.tolower
libc.src.ctype.toupper
# string.h entrypoints
libc.src.string.bcmp
libc.src.string.bzero

View File

@@ -9,12 +9,12 @@ try_compile(
)
if(NOT has_sys_random)
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
libc.src.sys.stat.stat
)
# If we're doing a fullbuild we provide the random header ourselves.
if(NOT LLVM_LIBC_FULL_BUILD)
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS
libc.src.sys.random.getrandom
)
endif()

View File

@@ -116,7 +116,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atanf
libc.src.math.atanf
libc.src.math.atanhf
libc.src.math.copysign
libc.src.math.copysignf

View File

@@ -29,7 +29,7 @@ a list of open projects that one can start with:
#. One is about adding CMake facilities to optionally link the libc's overlay
static archive (see :ref:`overlay_mode`) with other LLVM tools/executables.
#. The other is about putting plumbing in place to release the overlay static
archive (see :ref:`overlay_mode`) as part of the LLVM binary releases.
archive (see :ref:`overlay_mode`) as part of the LLVM binary releases.
#. **Implement Linux syscall wrappers** - A large portion of the POSIX API can
be implemented as syscall wrappers on Linux. A good number have already been

View File

@@ -26,7 +26,7 @@ Implementation Status
* To check date and time functions enabled for Windows:
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
- windows-aarch64 - to be added
@@ -38,11 +38,11 @@ Implementation Status
* To check date and time functions enabled for GPU:
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
* To check date and time functions enabled for embedded system:
- `barebone-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
- `barebone-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
- barebone-riscv32 - to be added

View File

@@ -75,7 +75,7 @@ a public header with non-namespaced functions like ``string.h`` is included.
This check ensures any function call resolves to a function within the
LIBC_NAMESPACE namespace.
There are exceptions for the following functions:
There are exceptions for the following functions:
``__errno_location`` so that ``errno`` can be set;
``malloc``, ``calloc``, ``realloc``, ``aligned_alloc``, and ``free`` since they
are always external and can be intercepted.

View File

@@ -66,7 +66,7 @@ example, the option-dictionary is:
{
"LIBC_CONF_PRINTF_DISABLE_FLOAT": {
"value": false,
"doc":
"doc":
},
...
}

View File

@@ -2,7 +2,7 @@
Printf Behavior Under All Conditions
====================================
Introduction:
Introduction:
=============
On the "defining undefined behavior" page, I said you should write down your
decisions regarding undefined behavior in your functions. This is that document
@@ -102,7 +102,7 @@ behavior.
LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
-------------------------------------------------
When set, the float to string decimal conversion algorithm will use a larger
table to accelerate long double conversions. This larger table is around 5MB of
table to accelerate long double conversions. This larger table is around 5MB of
size when compiled.
LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT

View File

@@ -87,7 +87,7 @@ After configuring the build with the above ``cmake`` command, one can build the
the libc for the target with the following command:
.. code-block:: sh
$> ninja libc libm
The above ``ninja`` command will build the libc static archives ``libc.a`` and

View File

@@ -11,14 +11,14 @@ Motivation and Limitations
Motivation
==========
This project aims to provide a large subset of the C standard library to users
of GPU accelerators. We deliberately choose to only implement a subset of the C
library as some features are not expressly useful or easily implemented on the
GPU. This will be discussed further in `Limitations <libc_gpu_limitations>`_.
The main motivation behind this project is to provide the well understood C
This project aims to provide a large subset of the C standard library to users
of GPU accelerators. We deliberately choose to only implement a subset of the C
library as some features are not expressly useful or easily implemented on the
GPU. This will be discussed further in `Limitations <libc_gpu_limitations>`_.
The main motivation behind this project is to provide the well understood C
library as a firm base for GPU development.
The main idea behind this project is that programming GPUs can be as
The main idea behind this project is that programming GPUs can be as
straightforward as programming on CPUs. This project aims to validate the GPU as
a more general-purpose target. The implementations here will also enable more
complex implementations of other libraries on the GPU, such as ``libc++``.
@@ -31,10 +31,10 @@ toolchain. We also aim to provide these functions in a format compatible with
offloading in ``Clang`` so that we can treat the C library for the GPU as a
standard static library.
A valuable use for providing C library features on the GPU is for testing. For
this reason we build `tests on the GPU <libc_gpu_testing>`_ that can run a unit
test as if it were being run on the CPU. This also helps users port applications
that traditionally were run on the CPU. With this support, we can expand test
A valuable use for providing C library features on the GPU is for testing. For
this reason we build `tests on the GPU <libc_gpu_testing>`_ that can run a unit
test as if it were being run on the CPU. This also helps users port applications
that traditionally were run on the CPU. With this support, we can expand test
coverage for the GPU backend to the existing LLVM C library tests.
.. _libc_gpu_limitations:
@@ -43,9 +43,9 @@ Limitations
===========
We only implement a subset of the standard C library. The GPU does not
currently support thread local variables in all cases, so variables like
``errno`` are not provided. Furthermore, the GPU under the OpenCL execution
model cannot safely provide a mutex interface. This means that features like
file buffering are not implemented on the GPU. We can also not easily provide
threading features on the GPU due to the execution model so these will be
currently support thread local variables in all cases, so variables like
``errno`` are not provided. Furthermore, the GPU under the OpenCL execution
model cannot safely provide a mutex interface. This means that features like
file buffering are not implemented on the GPU. We can also not easily provide
threading features on the GPU due to the execution model so these will be
ignored, as will features like ``locale`` or ``time``.

View File

@@ -11,62 +11,62 @@ Remote Procedure Calls
Remote Procedure Call Implementation
====================================
Traditionally, the C library abstracts over several functions that interface
with the platform's operating system through system calls. The GPU however does
Traditionally, the C library abstracts over several functions that interface
with the platform's operating system through system calls. The GPU however does
not provide an operating system that can handle target dependent operations.
Instead, we implemented remote procedure calls to interface with the host's
Instead, we implemented remote procedure calls to interface with the host's
operating system while executing on a GPU.
We implemented remote procedure calls using unified virtual memory to create a
shared communicate channel between the two processes. This memory is often
pinned memory that can be accessed asynchronously and atomically by multiple
processes simultaneously. This supports means that we can simply provide mutual
exclusion on a shared better to swap work back and forth between the host system
and the GPU. We can then use this to create a simple client-server protocol
We implemented remote procedure calls using unified virtual memory to create a
shared communicate channel between the two processes. This memory is often
pinned memory that can be accessed asynchronously and atomically by multiple
processes simultaneously. This supports means that we can simply provide mutual
exclusion on a shared better to swap work back and forth between the host system
and the GPU. We can then use this to create a simple client-server protocol
using this shared memory.
This work treats the GPU as a client and the host as a server. The client
initiates a communication while the server listens for them. In order to
communicate between the host and the device, we simply maintain a buffer of
memory and two mailboxes. One mailbox is write-only while the other is
read-only. This exposes three primitive operations: using the buffer, giving
away ownership, and waiting for ownership. This is implemented as a half-duplex
transmission channel between the two sides. We decided to assign ownership of
the buffer to the client when the inbox and outbox bits are equal and to the
This work treats the GPU as a client and the host as a server. The client
initiates a communication while the server listens for them. In order to
communicate between the host and the device, we simply maintain a buffer of
memory and two mailboxes. One mailbox is write-only while the other is
read-only. This exposes three primitive operations: using the buffer, giving
away ownership, and waiting for ownership. This is implemented as a half-duplex
transmission channel between the two sides. We decided to assign ownership of
the buffer to the client when the inbox and outbox bits are equal and to the
server when they are not.
In order to make this transmission channel thread-safe, we abstract ownership of
the given mailbox pair and buffer around a port, effectively acting as a lock
and an index into the allocated buffer slice. The server and device have
independent locks around the given port. In this scheme, the buffer can be used
to communicate intent and data generically with the server. We them simply
In order to make this transmission channel thread-safe, we abstract ownership of
the given mailbox pair and buffer around a port, effectively acting as a lock
and an index into the allocated buffer slice. The server and device have
independent locks around the given port. In this scheme, the buffer can be used
to communicate intent and data generically with the server. We them simply
provide multiple copies of this protocol and expose them as multiple ports.
If this were simply a standard CPU system, this would be sufficient. However,
GPUs have my unique architectural challenges. First, GPU threads execute in
lock-step with each other in groups typically called warps or wavefronts. We
need to target the smallest unit of independent parallelism, so the RPC
interface needs to handle an entire group of threads at once. This is done by
increasing the size of the buffer and adding a thread mask argument so the
server knows which threads are active when it handles the communication. Second,
GPUs generally have no forward progress guarantees. In order to guarantee we do
not encounter deadlocks while executing it is required that the number of ports
matches the maximum amount of hardware parallelism on the device. It is also
very important that the thread mask remains consistent while interfacing with
If this were simply a standard CPU system, this would be sufficient. However,
GPUs have my unique architectural challenges. First, GPU threads execute in
lock-step with each other in groups typically called warps or wavefronts. We
need to target the smallest unit of independent parallelism, so the RPC
interface needs to handle an entire group of threads at once. This is done by
increasing the size of the buffer and adding a thread mask argument so the
server knows which threads are active when it handles the communication. Second,
GPUs generally have no forward progress guarantees. In order to guarantee we do
not encounter deadlocks while executing it is required that the number of ports
matches the maximum amount of hardware parallelism on the device. It is also
very important that the thread mask remains consistent while interfacing with
the port.
.. image:: ./rpc-diagram.svg
:width: 75%
:align: center
The above diagram outlines the architecture of the RPC interface. For clarity
the following list will explain the operations done by the client and server
The above diagram outlines the architecture of the RPC interface. For clarity
the following list will explain the operations done by the client and server
respectively when initiating a communication.
First, a communication from the perspective of the client:
* The client searches for an available port and claims the lock.
* The client checks that the port is still available to the current device and
* The client checks that the port is still available to the current device and
continues if so.
* The client writes its data to the fixed-size packet and toggles its outbox.
* The client waits until its inbox matches its outbox.
@@ -75,51 +75,51 @@ First, a communication from the perspective of the client:
Now, the same communication from the perspective of the server:
* The server searches for an available port with pending work and claims the
* The server searches for an available port with pending work and claims the
lock.
* The server checks that the port is still available to the current device.
* The server reads the opcode to perform the expected operation, in this
* The server reads the opcode to perform the expected operation, in this
case a receive and then send.
* The server reads the data from the fixed-size packet.
* The server writes its data to the fixed-size packet and toggles its outbox.
* The server closes the port and continues searching for ports that need to be
* The server closes the port and continues searching for ports that need to be
serviced
This architecture currently requires that the host periodically checks the RPC
server's buffer for ports with pending work. Note that a port can be closed
without waiting for its submitted work to be completed. This allows us to model
asynchronous operations that do not need to wait until the server has completed
them. If an operation requires more data than the fixed size buffer, we simply
This architecture currently requires that the host periodically checks the RPC
server's buffer for ports with pending work. Note that a port can be closed
without waiting for its submitted work to be completed. This allows us to model
asynchronous operations that do not need to wait until the server has completed
them. If an operation requires more data than the fixed size buffer, we simply
send multiple packets back and forth in a streaming fashion.
Server Library
--------------
The RPC server's basic functionality is provided by the LLVM C library. A static
library called ``libllvmlibc_rpc_server.a`` includes handling for the basic
operations, such as printing or exiting. This has a small API that handles
The RPC server's basic functionality is provided by the LLVM C library. A static
library called ``libllvmlibc_rpc_server.a`` includes handling for the basic
operations, such as printing or exiting. This has a small API that handles
setting up the unified buffer and an interface to check the opcodes.
Some operations are too divergent to provide generic implementations for, such
as allocating device accessible memory. For these cases, we provide a callback
registration scheme to add a custom handler for any given opcode through the
port API. More information can be found in the installed header
Some operations are too divergent to provide generic implementations for, such
as allocating device accessible memory. For these cases, we provide a callback
registration scheme to add a custom handler for any given opcode through the
port API. More information can be found in the installed header
``<install>/include/llvmlibc_rpc_server.h``.
Client Example
--------------
The Client API is not currently exported by the LLVM C library. This is
primarily due to being written in C++ and relying on internal data structures.
It uses a simple send and receive interface with a fixed-size packet. The
following example uses the RPC interface to call a function pointer on the
The Client API is not currently exported by the LLVM C library. This is
primarily due to being written in C++ and relying on internal data structures.
It uses a simple send and receive interface with a fixed-size packet. The
following example uses the RPC interface to call a function pointer on the
server.
This code first opens a port with the given opcode to facilitate the
communication. It then copies over the argument struct to the server using the
``send_n`` interface to stream arbitrary bytes. The next send operation provides
the server with the function pointer that will be executed. The final receive
operation is a no-op and simply forces the client to wait until the server is
This code first opens a port with the given opcode to facilitate the
communication. It then copies over the argument struct to the server using the
``send_n`` interface to stream arbitrary bytes. The next send operation provides
the server with the function pointer that will be executed. The final receive
operation is a no-op and simply forces the client to wait until the server is
done. It can be omitted if asynchronous execution is desired.
.. code-block:: c++
@@ -137,23 +137,23 @@ done. It can be omitted if asynchronous execution is desired.
Server Example
--------------
This example shows the server-side handling of the previous client example. When
the server is checked, if there are any ports with pending work it will check
the opcode and perform the appropriate action. In this case, the action is to
This example shows the server-side handling of the previous client example. When
the server is checked, if there are any ports with pending work it will check
the opcode and perform the appropriate action. In this case, the action is to
call a function pointer provided by the client.
In this example, the server simply runs forever in a separate thread for
brevity's sake. Because the client is a GPU potentially handling several threads
at once, the server needs to loop over all the active threads on the GPU. We
abstract this into the ``lane_size`` variable, which is simply the device's warp
or wavefront size. The identifier is simply the threads index into the current
warp or wavefront. We allocate memory to copy the struct data into, and then
call the given function pointer with that copied data. The final send simply
signals completion and uses the implicit thread mask to delete the temporary
In this example, the server simply runs forever in a separate thread for
brevity's sake. Because the client is a GPU potentially handling several threads
at once, the server needs to loop over all the active threads on the GPU. We
abstract this into the ``lane_size`` variable, which is simply the device's warp
or wavefront size. The identifier is simply the threads index into the current
warp or wavefront. We allocate memory to copy the struct data into, and then
call the given function pointer with that copied data. The final send simply
signals completion and uses the implicit thread mask to delete the temporary
data.
.. code-block:: c++
for(;;) {
auto port = server.try_open(index);
if (!port)
@@ -181,11 +181,11 @@ data.
CUDA Server Example
-------------------
The following code shows an example of using the exported RPC interface along
with the C library to manually configure a working server using the CUDA
language. Other runtimes can use the presence of the ``__llvm_libc_rpc_client``
in the GPU executable as an indicator for whether or not the server can be
checked. These details should ideally be handled by the GPU language runtime,
The following code shows an example of using the exported RPC interface along
with the C library to manually configure a working server using the CUDA
language. Other runtimes can use the presence of the ``__llvm_libc_rpc_client``
in the GPU executable as an indicator for whether or not the server can be
checked. These details should ideally be handled by the GPU language runtime,
but the following example shows how it can be used by a standard user.
.. code-block:: cuda
@@ -193,30 +193,30 @@ but the following example shows how it can be used by a standard user.
#include <cstdio>
#include <cstdlib>
#include <cuda_runtime.h>
#include <llvmlibc_rpc_server.h>
[[noreturn]] void handle_error(cudaError_t err) {
fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
[[noreturn]] void handle_error(rpc_status_t err) {
fprintf(stderr, "RPC error: %d\n", err);
exit(EXIT_FAILURE);
}
// The handle to the RPC client provided by the C library.
extern "C" __device__ void *__llvm_libc_rpc_client;
__global__ void get_client_ptr(void **ptr) { *ptr = __llvm_libc_rpc_client; }
// Obtain the RPC client's handle from the device. The CUDA language cannot look
// up the symbol directly like the driver API, so we launch a kernel to read it.
void *get_rpc_client() {
void *rpc_client = nullptr;
void **rpc_client_d = nullptr;
if (cudaError_t err = cudaMalloc(&rpc_client_d, sizeof(void *)))
handle_error(err);
get_client_ptr<<<1, 1>>>(rpc_client_d);
@@ -227,7 +227,7 @@ but the following example shows how it can be used by a standard user.
handle_error(err);
return rpc_client;
}
// Routines to allocate mapped memory that both the host and the device can
// access asychonrously to communicate with eachother.
void *alloc_host(size_t size, void *) {
@@ -236,64 +236,64 @@ but the following example shows how it can be used by a standard user.
handle_error(err);
return sharable_ptr;
};
void free_host(void *ptr, void *) {
if (cudaError_t err = cudaFreeHost(ptr))
handle_error(err);
}
// The device-side overload of the standard C function to call.
extern "C" __device__ int puts(const char *);
// Calls the C library function from the GPU C library.
__global__ void hello() { puts("Hello world!"); }
int main() {
int device = 0;
// Initialize the RPC server to run on a single device.
if (rpc_status_t err = rpc_init(/*num_device=*/1))
handle_error(err);
// Initialize the RPC server to run on the given device.
if (rpc_status_t err =
rpc_server_init(device, RPC_MAXIMUM_PORT_COUNT,
/*warp_size=*/32, alloc_host, /*data=*/nullptr))
handle_error(err);
// Initialize the RPC client by copying the buffer to the device's handle.
void *rpc_client = get_rpc_client();
if (cudaError_t err =
cudaMemcpy(rpc_client, rpc_get_client_buffer(device),
rpc_get_client_size(), cudaMemcpyHostToDevice))
handle_error(err);
cudaStream_t stream;
if (cudaError_t err = cudaStreamCreate(&stream))
handle_error(err);
// Execute the kernel.
hello<<<1, 1, 0, stream>>>();
// While the kernel is executing, check the RPC server for work to do.
while (cudaStreamQuery(stream) == cudaErrorNotReady)
if (rpc_status_t err = rpc_handle_server(device))
handle_error(err);
// Shut down the server running on the given device.
if (rpc_status_t err =
rpc_server_shutdown(device, free_host, /*data=*/nullptr))
handle_error(err);
// Shut down the entire RPC server interface.
if (rpc_status_t err = rpc_shutdown())
handle_error(err);
return EXIT_SUCCESS;
}
The above code must be compiled in CUDA's relocatable device code mode and with
the advanced offloading driver to link in the library. Currently this can be
done with the following invocation. Using LTO avoids the overhead normally
The above code must be compiled in CUDA's relocatable device code mode and with
the advanced offloading driver to link in the library. Currently this can be
done with the following invocation. Using LTO avoids the overhead normally
associated with relocatable device code linking.
.. code-block:: sh
@@ -307,6 +307,6 @@ associated with relocatable device code linking.
Extensions
----------
We describe which operation the RPC server should take with a 16-bit opcode. We
consider the first 32768 numbers to be reserved while the others are free to
We describe which operation the RPC server should take with a 16-bit opcode. We
consider the first 32768 numbers to be reserved while the others are free to
use.

View File

@@ -18,8 +18,8 @@ Testing Infrastructure
======================
The testing support in LLVM's libc implementation for GPUs is designed to mimic
the standard unit tests as much as possible. We use the :ref:`libc_gpu_rpc`
support to provide the necessary utilities like printing from the GPU. Execution
the standard unit tests as much as possible. We use the :ref:`libc_gpu_rpc`
support to provide the necessary utilities like printing from the GPU. Execution
is performed by emitting a ``_start`` kernel from the GPU
that is then called by an external loader utility. This is an example of how
this can be done manually:

View File

@@ -14,11 +14,11 @@ Building the GPU library
LLVM's libc GPU support *must* be built with an up-to-date ``clang`` compiler
due to heavy reliance on ``clang``'s GPU support. This can be done automatically
using the LLVM runtimes support. The GPU build is done using cross-compilation
to the GPU architecture. This project currently supports AMD and NVIDIA GPUs
which can be targeted using the appropriate target name. The following
invocation will enable a cross-compiling build for the GPU architecture and
enable the ``libc`` project only for them.
using the LLVM runtimes support. The GPU build is done using cross-compilation
to the GPU architecture. This project currently supports AMD and NVIDIA GPUs
which can be targeted using the appropriate target name. The following
invocation will enable a cross-compiling build for the GPU architecture and
enable the ``libc`` project only for them.
.. code-block:: sh
@@ -29,8 +29,8 @@ enable the ``libc`` project only for them.
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
-DLLVM_ENABLE_RUNTIMES="openmp" \
-DCMAKE_BUILD_TYPE=<Debug|Release> \ # Select build type
-DCMAKE_INSTALL_PREFIX=<PATH> \ # Where 'libcgpu.a' will live
-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=libc \
-DCMAKE_INSTALL_PREFIX=<PATH> \ # Where 'libcgpu.a' will live
-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=libc \
-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=libc \
-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda
$> ninja install
@@ -40,8 +40,8 @@ toolchain, we list them in ``LLVM_ENABLE_PROJECTS``. To ensure ``libc`` is built
using a compatible compiler and to support ``openmp`` offloading, we list them
in ``LLVM_ENABLE_RUNTIMES`` to build them after the enabled projects using the
newly built compiler. ``CMAKE_INSTALL_PREFIX`` specifies the installation
directory in which to install the ``libcgpu-nvptx.a`` and ``libcgpu-amdgpu.a``
libraries and headers along with LLVM. The generated headers will be placed in
directory in which to install the ``libcgpu-nvptx.a`` and ``libcgpu-amdgpu.a``
libraries and headers along with LLVM. The generated headers will be placed in
``include/<gpu-triple>``.
Usage

View File

@@ -29,7 +29,7 @@ Type Name Available
============================ =========
ACTION |check|
ENTRY |check|
VISIT
VISIT
============================ =========
POSIX Standard Functions
@@ -42,8 +42,8 @@ hcreate |check|
hdestroy |check|
hsearch |check|
insque |check|
lfind
lsearch
lfind
lsearch
remque |check|
tdelete
tfind

View File

@@ -81,7 +81,7 @@ Implementation Status
* To check math functions enabled for Windows:
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
- `windows-x86_64 <https://github.com/llvm/llvm-project/tree/main/libc/config/windows/entrypoints.txt>`_
- windows-aarch64 - to be added
@@ -93,11 +93,11 @@ Implementation Status
* To check math functions enabled for GPU:
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
- `gpu-entrypoints <https://github.com/llvm/llvm-project/tree/main/libc/config/gpu/entrypoints.txt>`_
* To check math functions enabled for embedded system:
- `baremetal-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
- `baremetal-aarch32 <https://github.com/llvm/llvm-project/tree/main/libc/config/baremetal/arm/entrypoints.txt>`_
- baremetal-riscv32 - to be added

View File

@@ -121,7 +121,7 @@ Hence we have the following bound on `s`:
In order for `s` to exist, we need that:
.. math::
\frac{C - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}} >
\frac{C - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}} >
\frac{-C - k 2^{-M}}{1 + k 2^{-M}}
which is equivalent to:
@@ -135,7 +135,7 @@ side of `\text{(C1)}` is bounded by:
.. math::
2^{-M - 1} > \frac{2^{-M - 1}}{1 + (2k + 1) 2^{-M - 1}} \geq
\frac{2^{-M - 1}}{1 + (2^{M + 1} - 1) 2^{-M - 1}} > 2^{-M - 2}.
\frac{2^{-M - 1}}{1 + (2^{M + 1} - 1) 2^{-M - 1}} > 2^{-M - 2}.
Hence, from `\text{(C1)}`, being an exact power of 2, `C = 2^{-N}` is bounded below
by:
@@ -427,7 +427,7 @@ to look-up for the reduction constant `s_{i, k}`. In other word, `k` is given
by the formula:
.. math::
k = \left\lfloor 2^{N_i + M_i} u_i \right\rfloor
k = \left\lfloor 2^{N_i + M_i} u_i \right\rfloor
Notice that our reduction constant `s_{i, k}` must work for all `u_i` in the
interval `I = \{ v: k 2^{-N_i - M_i} \leq v < (k + 1) 2^{-N_i - M_i} \}`,

View File

@@ -30,7 +30,7 @@ If you are starting to bring up LLVM's libc on a new operating system, the first
step is to add a directory for that OS in the ``libc/config`` directory. Both
`Linux <https://github.com/llvm/llvm-project/tree/main/libc/config/linux>`_ and
`Windows <https://github.com/llvm/llvm-project/tree/main/libc/config/windows>`_,
the two operating systems on which LLVM's libc is being actively developed,
the two operating systems on which LLVM's libc is being actively developed,
have their own config directory.
.. note:: Windows development is not as active as the development on Linux.

View File

@@ -61,7 +61,7 @@ funlockfile |check|
Operations on system files
==========================
These functions operate on files on the host's system, without using the
These functions operate on files on the host's system, without using the
``FILE`` object type. They only take the name of the file being operated on.
============= =========

View File

@@ -36,7 +36,7 @@ Function Name Available
============= =========
bzero |check|
bcmp |check|
bcopy |check|
bcopy |check|
memcpy |check|
memset |check|
memcmp |check|
@@ -99,14 +99,14 @@ These functions are not in strings.h, but are still primarily string
functions, and are therefore tracked along with the rest of the string
functions.
The String to float functions were implemented using the Eisel-Lemire algorithm
The String to float functions were implemented using the Eisel-Lemire algorithm
(read more about the algorithm here: `The Eisel-Lemire ParseNumberF64 Algorithm
<https://nigeltao.github.io/blog/2020/eisel-lemire.html>`_). This improved
the performance of string to float and double, and allowed it to complete this
comprehensive test 15% faster than glibc: `Parse Number FXX Test Data
<https://github.com/nigeltao/parse-number-fxx-test-data>`_. The test was done
<https://github.com/nigeltao/parse-number-fxx-test-data>`_. The test was done
with LLVM-libc built on 2022-04-14 and Debian GLibc version 2.33-6. The targets
``libc_str_to_float_comparison_test`` and
``libc_str_to_float_comparison_test`` and
``libc_system_str_to_float_comparison_test`` were built and run on the test data
10 times each, skipping the first run since it was an outlier.
@@ -142,7 +142,7 @@ strerror_r |check|
Localized String Functions
==========================
These functions require locale.h, and will be finished when locale support is
These functions require locale.h, and will be finished when locale support is
implemented in LLVM-libc.
============= =========
@@ -160,7 +160,7 @@ Many String functions have an equivalent _s version, which is intended to be
more secure and safe than the previous standard. These functions add runtime
error detection and overflow protection. While they can be seen as an
improvement, adoption remains relatively low among users. In addition, they are
being considered for removal, see
being considered for removal, see
`Field Experience With Annex K — Bounds Checking Interfaces
<http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1967.htm>`_. For these reasons,
<http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1967.htm>`_. For these reasons,
there is no ongoing work to implement them.

View File

@@ -14,4 +14,4 @@
%%public_api()
#endif // LLVM_LIBC_FCNTL_H
#endif // LLVM_LIBC_FCNTL_H

View File

@@ -14,4 +14,4 @@
%%public_api()
#endif // LLVM_LIBC_SCHED_H
#endif // LLVM_LIBC_SCHED_H

View File

@@ -13,4 +13,4 @@
%%public_api()
#endif // LLVM_LIBC_SPAWN_H
#endif // LLVM_LIBC_SPAWN_H

View File

@@ -4,7 +4,7 @@ def BsdExtensions : StandardSpec<"BSDExtensions"> {
[], // Macros
[], // Types
[], // Enumerations
[
[
FunctionSpec<
"strlcat",
RetValSpec<SizeTType>,

View File

@@ -101,8 +101,8 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
FunctionSpec<
"hcreate_r",
RetValSpec<IntType>,
[
ArgSpec<SizeTType>,
[
ArgSpec<SizeTType>,
ArgSpec<StructHsearchDataPtr>
]
>,
@@ -117,7 +117,7 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
"hsearch_r",
RetValSpec<IntType>,
[
ArgSpec<EntryType>,
ArgSpec<EntryType>,
ArgSpec<ActionType>,
ArgSpec<EntryTypePtrPtr>,
ArgSpec<StructHsearchDataPtr>
@@ -207,8 +207,8 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
[], // Enumerations
[
FunctionSpec<
"qsort_r",
RetValSpec<VoidType>,
"qsort_r",
RetValSpec<VoidType>,
[ArgSpec<VoidPtr>, ArgSpec<SizeTType>, ArgSpec<SizeTType>, ArgSpec<QSortRCompareT>, ArgSpec<VoidPtr>]
>,
]

View File

@@ -36,7 +36,7 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> {
>,
]
>;
HeaderSpec Sched = HeaderSpec<
"sched.h",
[], // Macros

View File

@@ -1319,7 +1319,7 @@ def POSIX : StandardSpec<"POSIX"> {
"hsearch",
RetValSpec<EntryTypePtr>,
[
ArgSpec<EntryType>,
ArgSpec<EntryType>,
ArgSpec<ActionType>
]
>,
@@ -1339,7 +1339,7 @@ def POSIX : StandardSpec<"POSIX"> {
]
>,
]
>;
>;
HeaderSpec Termios = HeaderSpec<
"termios.h",

View File

@@ -14,7 +14,7 @@ list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index)
if (NOT ${getrandom_index} EQUAL -1)
message(STATUS "Using getrandom for hashtable randomness")
set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM)
set(randomness_extra_depends
set(randomness_extra_depends
libc.src.sys.random.getrandom libc.src.errno.errno)
endif()

View File

@@ -1437,7 +1437,7 @@ add_entrypoint_object(
../sqrtf128.h
DEPENDS
libc.src.__support.macros.properties.float
libc.src.__support.FPUtil.sqrt
libc.src.__support.FPUtil.sqrt
COMPILE_OPTIONS
-O3
)

View File

@@ -1,7 +1,7 @@
add_object_library(
global
SRCS
global.cpp
global.cpp
HDRS
global.h
)

View File

@@ -110,7 +110,7 @@ add_object_library(
)
if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD)
# Not all platforms have a file implementation. If file is unvailable, and a
# Not all platforms have a file implementation. If file is unvailable, and a
# full build is requested, then we must skip all file based printf sections.
return()
endif()

View File

@@ -75,7 +75,7 @@ add_object_library(
)
if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD)
# Not all platforms have a file implementation. If file is unvailable, and a
# Not all platforms have a file implementation. If file is unvailable, and a
# full build is requested, then we must skip all file based printf sections.
return()
endif()

View File

@@ -268,7 +268,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
set(SCUDO_DEPS "")
include(${LIBC_SOURCE_DIR}/../compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake)
# scudo distinguishes riscv32 and riscv64, so we need to translate the architecture
set(LIBC_TARGET_ARCHITECTURE_FOR_SCUDO ${LIBC_TARGET_ARCHITECTURE})
if(LIBC_TARGET_ARCHITECTURE_IS_RISCV64)
@@ -278,7 +278,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
endif()
if(NOT (LIBC_TARGET_ARCHITECTURE_FOR_SCUDO IN_LIST ALL_SCUDO_STANDALONE_SUPPORTED_ARCH))
message(FATAL_ERROR "Architecture ${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO} is not supported by SCUDO.
message(FATAL_ERROR "Architecture ${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO} is not supported by SCUDO.
Either disable LLVM_LIBC_INCLUDE_SCUDO or change your target architecture.")
endif()
@@ -290,7 +290,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO)
RTGwpAsanBacktraceLibc.${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO}
RTGwpAsanSegvHandler.${LIBC_TARGET_ARCHITECTURE_FOR_SCUDO}
)
add_entrypoint_external(
malloc
DEPENDS

View File

@@ -5,7 +5,7 @@ add_entrypoint_object(
wctob.cpp
HDRS
wctob.h
DEPENDS
DEPENDS
libc.include.stdio
libc.include.wchar
libc.src.__support.wctype_utils

View File

@@ -37,13 +37,13 @@ function(merge_relocatable_object name)
add_dependencies(${fq_name} ${relocatable_target})
target_link_libraries(${fq_name} INTERFACE ${fq_link_libraries})
set_target_properties(
${fq_name}
${fq_name}
PROPERTIES
LINKER_LANGUAGE CXX
IMPORTED_OBJECTS ${CMAKE_CURRENT_BINARY_DIR}/${name}.o
TARGET_TYPE ${OBJECT_LIBRARY_TARGET_TYPE}
DEPS "${fq_link_libraries}"
)
)
endfunction()
function(add_startup_object name)
@@ -56,7 +56,7 @@ function(add_startup_object name)
)
get_fq_target_name(${name} fq_target_name)
add_object_library(
${name}
SRCS ${ADD_STARTUP_OBJECT_SRC}

View File

@@ -54,5 +54,5 @@ target_link_libraries(libc-gwp-asan-uaf-should-crash
add_custom_command(TARGET libc-scudo-integration-test
POST_BUILD
COMMAND $<TARGET_FILE:libc-scudo-integration-test>
COMMENT "Run the test after it is built."
COMMENT "Run the test after it is built."
VERBATIM)

View File

@@ -182,7 +182,7 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
POST_BUILD
COMMAND $<TARGET_FILE:libc_str_to_float_comparison_test> ${float_test_file}
DEPENDS ${float_test_file}
COMMENT "Test the strtof and strtod implementations against precomputed results."
COMMENT "Test the strtof and strtod implementations against precomputed results."
VERBATIM)
endif()

View File

@@ -87,7 +87,7 @@ add_libc_unittest(
if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
OR (${LIBC_TARGET_OS} STREQUAL "darwin")))
# Sanitizers don't like SIGFPE. So, we will run the
# Sanitizers don't like SIGFPE. So, we will run the
# tests which raise SIGFPE only in non-sanitizer builds.
# The tests are also disabled for Windows and MacOS as they fail currently.
# TODO: Investigate and fix the windows failures and enable them for Windows

View File

@@ -61,7 +61,7 @@ function(add_diff_binary target_name)
set_target_properties(${fq_target_name}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if(DIFF_CXX_STANDARD)
set_target_properties(
${fq_target_name}

View File

@@ -1164,7 +1164,7 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
libc.src.math.fmaxl
libc.src.__support.FPUtil.fp_bits
)
add_fp_unittest(
fmaxf128_test
SUITE

View File

@@ -11,7 +11,7 @@ add_libc_unittest(
SRCS
testfilter_test.cpp
DEPENDS
# TODO(michaelrj): Remove this dependancy. It's only here because all unit
# TODO(michaelrj): Remove this dependancy. It's only here because all unit
# tests must have at least one dependancy.
libc.src.__support.CPP.bit
libc.src.__support.CPP.bit
)

View File

@@ -7,14 +7,14 @@ if(LIBC_TESTS_CAN_USE_MPFR)
target_compile_options(libcMPFRWrapper PRIVATE -O3)
if (LLVM_LIBC_FULL_BUILD)
# It is not easy to make libcMPFRWrapper a standalone library because gmp.h may unconditionally
# pull in some STL headers. As a result, targets using this library will need to link against
# pull in some STL headers. As a result, targets using this library will need to link against
# C++ and unwind libraries. Since we are using MPFR anyway, we directly specifies the GNU toolchain.
target_link_libraries(libcMPFRWrapper PUBLIC -lstdc++ -lgcc_s)
endif()
add_dependencies(
libcMPFRWrapper
libc.src.__support.CPP.string_view
libc.src.__support.CPP.type_traits
libcMPFRWrapper
libc.src.__support.CPP.string_view
libc.src.__support.CPP.type_traits
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.fpbits_str
LibcTest.unit

View File

@@ -14,7 +14,7 @@ target_compile_definitions(llvmlibc_rpc_server PUBLIC
# This utility needs to be compiled for the host system when cross compiling.
if(LLVM_RUNTIMES_TARGET OR LIBC_TARGET_TRIPLE)
target_compile_options(llvmlibc_rpc_server PUBLIC
target_compile_options(llvmlibc_rpc_server PUBLIC
--target=${LLVM_HOST_TRIPLE})
target_link_libraries(llvmlibc_rpc_server PUBLIC
"--target=${LLVM_HOST_TRIPLE}")

View File

@@ -1,17 +1,17 @@
from math import *
"""
This script is used to generate a table used by
This script is used to generate a table used by
libc/src/__support/high_precision_decimal.h.
For the ith entry in the table there are two values (indexed starting at 0).
The first value is the number of digits longer the second value would be if
multiplied by 2^i.
The second value is the smallest number that would create that number of
additional digits (which in base ten is always 5^i). Anything less creates one
The second value is the smallest number that would create that number of
additional digits (which in base ten is always 5^i). Anything less creates one
fewer digit.
As an example, the 3rd entry in the table is {1, "125"}. This means that if
As an example, the 3rd entry in the table is {1, "125"}. This means that if
125 is multiplied by 2^3 = 8, it will have exactly one more digit.
Multiplying it out we get 125 * 8 = 1000. 125 is the smallest number that gives
that extra digit, for example 124 * 8 = 992, and all larger 3 digit numbers
@@ -19,17 +19,17 @@ also give only one extra digit when multiplied by 8, for example 8 * 999 = 7992.
This makes sense because 5^3 * 2^3 = 10^3, the smallest 4 digit number.
For numbers with more digits we can ignore the digits past what's in the second
value, since the most significant digits determine how many extra digits there
will be. Looking at the previous example, if we have 1000, and we look at just
the first 3 digits (since 125 has 3 digits), we see that 100 < 125, so we get
one fewer than 1 extra digits, which is 0.
Multiplying it out we get 1000 * 8 = 8000, which fits the expectation.
Another few quick examples:
value, since the most significant digits determine how many extra digits there
will be. Looking at the previous example, if we have 1000, and we look at just
the first 3 digits (since 125 has 3 digits), we see that 100 < 125, so we get
one fewer than 1 extra digits, which is 0.
Multiplying it out we get 1000 * 8 = 8000, which fits the expectation.
Another few quick examples:
For 1255, 125 !< 125, so 1 digit more: 1255 * 8 = 10040
For 9999, 999 !< 125, so 1 digit more: 9999 * 8 = 79992
Now let's try an example with the 10th entry: {4, "9765625"}. This one means
that 9765625 * 2^10 will have 4 extra digits.
Now let's try an example with the 10th entry: {4, "9765625"}. This one means
that 9765625 * 2^10 will have 4 extra digits.
Let's skip straight to the examples:
For 1, 1 < 9765625, so 4-1=3 extra digits: 1 * 2^10 = 1024, 1 digit to 4 digits is a difference of 3.
For 9765624, 9765624 < 9765625 so 3 extra digits: 9765624 * 1024 = 9999998976, 7 digits to 10 digits is a difference of 3.

View File

@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
This file is used to generate the tables of values in
This file is used to generate the tables of values in
src/__support/ryu_constants.h and ryu_long_double constants.h. To use it, set
the constants at the top of the file to the values you want to use for the Ryu
algorithm, then run this file. It will output the appropriate tables to stdout,