Files
Kristof Beyls 0928368f62 [libc] Provide Arm Optimized Routines for the LLVM libc project.
This adds the Arm Optimized Routines (see
https://github.com/ARM-software/optimized-routines) source code under the
the LLVM license. The version of the code provided in this patch is v20.02
of the Arm Optimized Routines project.

This entire contribution is being committed as is even though it does
not currently fit the LLVM libc model and does not follow the LLVM
coding style. In the near future, implementations from this patch will be
moved over to their right place in the LLVM-libc tree. This will be done
over many small patches, all of which will go through the normal LLVM code
review process. See this libc-dev post for the plan:
http://lists.llvm.org/pipermail/libc-dev/2020-March/000044.html

Differential revision of the original upload: https://reviews.llvm.org/D75355
2020-03-16 12:19:31 -07:00

143 lines
4.0 KiB
C

/*
* Double-precision log2(x) function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"
#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
#define A __log2_data.poly
#define InvLn2hi __log2_data.invln2hi
#define InvLn2lo __log2_data.invln2lo
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
return asuint64 (x) >> 48;
}
double
log2 (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64 (x);
top = top16 (x);
#if LOG2_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1.5b51p-5)
# define HI asuint64 (1.0 + 0x1.6ab2p-5)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
#if HAVE_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
#if LOG2_POLY1_ORDER == 11
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
kd = (double_t) k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif
/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;
/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
#if LOG2_POLY_ORDER == 7
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log2, __log2_finite)
hidden_alias (log2, __ieee754_log2)
# if LDBL_MANT_DIG == 53
long double log2l (long double x) { return log2 (x); }
# endif
#endif