diff --git a/src/math/asin.c b/src/math/asin.c index 3e8f99e..c926b18 100644 --- a/src/math/asin.c +++ b/src/math/asin.c @@ -82,11 +82,9 @@ double asin(double x) } /* |x| < 0.5 */ if (ix < 0x3fe00000) { - if (ix < 0x3e500000) { - /* |x|<0x1p-26, return x with inexact if x!=0*/ - FORCE_EVAL(x + 0x1p120f); + /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ + if (ix < 0x3e500000 && ix >= 0x00100000) return x; - } return x + x*R(x*x); } /* 1 > |x| >= 0.5 */ diff --git a/src/math/asinf.c b/src/math/asinf.c index 51fe6c6..bcd304a 100644 --- a/src/math/asinf.c +++ b/src/math/asinf.c @@ -46,10 +46,9 @@ float asinf(float x) return 0/(x-x); /* asin(|x|>1) is NaN */ } if (ix < 0x3f000000) { /* |x| < 0.5 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - FORCE_EVAL(x + 0x1p120f); - return x; /* return x with inexact if x!=0 */ - } + /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ + if (ix < 0x39800000 && ix >= 0x00800000) + return x; return x + x*R(x*x); } /* 1 > |x| >= 0.5 */ diff --git a/src/math/atan.c b/src/math/atan.c index 5a1d33e..63b5ad0 100644 --- a/src/math/atan.c +++ b/src/math/atan.c @@ -77,8 +77,9 @@ double atan(double x) } if (ix < 0x3fdc0000) { /* |x| < 0.4375 */ if (ix < 0x3e400000) { /* |x| < 2^-27 */ - /* raise inexact if x!=0 */ - FORCE_EVAL(x + 0x1p120f); + if (ix < 0x00100000) + /* raise underflow for subnormal x */ + FORCE_EVAL(x*x); return x; } id = -1; diff --git a/src/math/atanf.c b/src/math/atanf.c index ac8bfd0..178341b 100644 --- a/src/math/atanf.c +++ b/src/math/atanf.c @@ -55,8 +55,9 @@ float atanf(float x) } if (ix < 0x3ee00000) { /* |x| < 0.4375 */ if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x!=0 */ - FORCE_EVAL(x + 0x1p120f); + if (ix < 0x00800000) + /* raise underflow for subnormal x */ + FORCE_EVAL(x*x); return x; } id = -1; diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s index 932c754..7e49409 100644 --- a/src/math/i386/asin.s +++ b/src/math/i386/asin.s @@ -2,7 +2,20 @@ .type asinf,@function asinf: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fld %st(0) + fmul %st(1) + fstps (%esp) + pop %eax +2: ret .global asinl .type asinl,@function @@ -14,6 +27,18 @@ asinl: .type asin,@function asin: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fsts (%esp) + pop %eax +2: ret 1: fld %st(0) fld1 fsub %st(0),%st(1) diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s index 7e28b39..b46f5a3 100644 --- a/src/math/i386/atan.s +++ b/src/math/i386/atan.s @@ -2,6 +2,18 @@ .type atan,@function atan: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fsts (%esp) + pop %eax +2: ret diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s index 3cd4023..67cbf7c 100644 --- a/src/math/i386/atanf.s +++ b/src/math/i386/atanf.s @@ -2,6 +2,20 @@ .type atanf,@function atanf: flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fld %st(0) + fmul %st(1) + fstps (%esp) + pop %eax +2: ret diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e3b42af..8cf45c4 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,20 @@ .type expm1f,@function expm1f: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fld %st(0) + fmul %st(1) + fstps (%esp) + pop %eax +2: ret .global expm1l .type expm1l,@function @@ -14,10 +27,34 @@ expm1l: .type expm1,@function expm1: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fsts (%esp) + pop %eax +2: ret 1: fldl2e fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf fld1 - fld %st(1) + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) fabs fucom %st(1) fnstsw %ax diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s index 8ceb40d..61ef1dd 100644 --- a/src/math/i386/expl.s +++ b/src/math/i386/expl.s @@ -8,34 +8,27 @@ expl: fldt 4(%esp) - # special cases: 2*x is +-inf, nan or |x| < 0x1p-32 - # check (exponent|0x8000)+2 < 0xbfff+2-32 - movw 12(%esp), %ax - movw %ax, %dx - orw $0x8000, %dx - addw $2, %dx - cmpw $0xbfff-30, %dx - jnb 3f - cmpw $1, %dx - jbe 1f - # if |x|<0x1p-32 return 1+x + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 12(%esp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax fld1 - jmp 2f -1: testw %ax, %ax - jns 1f - # if 2*x == -inf,-nan return -0/x - fldz - fchs - fdivp + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) ret - # if 2*x == inf,nan return 2*x -1: fld %st(0) -2: faddp + # if |x|<0x1p-32 return 1+x +1: faddp ret - # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc # it will be wrong on non-nearest rounding mode -3: fldl2e +2: fldl2e subl $44, %esp # hi = log2e_hi*x # 2^hi = exp2l(hi) diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s index 9971e53..3203f06 100644 --- a/src/math/i386/log1p.s +++ b/src/math/i386/log1p.s @@ -7,9 +7,20 @@ log1p: fldl 4(%esp) cmp $0x3fd28f00,%eax ja 1f + cmp $0x00100000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + push %eax + fsts (%esp) + fstp %st(1) + pop %eax +1: ret diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s index 2680a8a..ada6109 100644 --- a/src/math/i386/log1pf.s +++ b/src/math/i386/log1pf.s @@ -7,9 +7,21 @@ log1pf: flds 4(%esp) cmp $0x3e940000,%eax ja 1f + cmp $0x00800000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + push %eax + fxch + fmul %st(1) + fstps (%esp) + pop %eax +1: ret diff --git a/src/math/log1p.c b/src/math/log1p.c index 6c67249..0cb71c6 100644 --- a/src/math/log1p.c +++ b/src/math/log1p.c @@ -104,9 +104,12 @@ double log1p(double x) return (x-x)/(x-x); /* log1p(x<-1)=NaN */ } if (ax < 0x3e200000) { /* |x| < 2**-29 */ - /* raise inexact */ - if (two54 + x > 0.0 && ax < 0x3c900000) /* |x| < 2**-54 */ + /* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */ + if (ax < 0x3c900000 && ax >= 0x00100000) return x; +#if FLT_EVAL_METHOD != 0 + FORCE_EVAL(x*x); +#endif return x - x*x*0.5; } if (hx > 0 || hx <= (int32_t)0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ diff --git a/src/math/log1pf.c b/src/math/log1pf.c index 39832d2..c38e0bc 100644 --- a/src/math/log1pf.c +++ b/src/math/log1pf.c @@ -43,9 +43,12 @@ float log1pf(float x) return (x-x)/(x-x); /* log1p(x<-1)=NaN */ } if (ax < 0x38000000) { /* |x| < 2**-15 */ - /* raise inexact */ - if (two25 + x > 0.0f && ax < 0x33800000) /* |x| < 2**-24 */ + /* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */ + if (ax < 0x33800000 && ax >= 0x00800000) return x; +#if FLT_EVAL_METHOD != 0 + FORCE_EVAL(x*x); +#endif return x - x*x*0.5f; } if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ diff --git a/src/math/pow.c b/src/math/pow.c index f257814..ac3abc0 100644 --- a/src/math/pow.c +++ b/src/math/pow.c @@ -143,7 +143,7 @@ double pow(double x, double y) return 1.0; else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */ return hy >= 0 ? y : 0.0; - else /* (|x|<1)**+-inf = 0,inf */ + else if ((ix|lx) != 0) /* (|x|<1)**+-inf = 0,inf if x!=0 */ return hy >= 0 ? 0.0 : -y; } if (iy == 0x3ff00000) /* y is +-1 */ diff --git a/src/math/powf.c b/src/math/powf.c index 427c896..59baf6f 100644 --- a/src/math/powf.c +++ b/src/math/powf.c @@ -90,7 +90,7 @@ float powf(float x, float y) return 1.0f; else if (ix > 0x3f800000) /* (|x|>1)**+-inf = inf,0 */ return hy >= 0 ? y : 0.0f; - else /* (|x|<1)**+-inf = 0,inf */ + else if (ix != 0) /* (|x|<1)**+-inf = 0,inf if x!=0 */ return hy >= 0 ? 0.0f: -y; } if (iy == 0x3f800000) /* y is +-1 */ diff --git a/src/math/scalbn.c b/src/math/scalbn.c index 003141e..1fec432 100644 --- a/src/math/scalbn.c +++ b/src/math/scalbn.c @@ -10,10 +10,8 @@ double scalbn(double x, int n) if (n > 1023) { x *= 0x1p1023; n -= 1023; - if (n > 1023) { - STRICT_ASSIGN(double, x, x * 0x1p1023); - return x; - } + if (n > 1023) + n = 1023; } } else if (n < -1022) { x *= 0x1p-1022; @@ -21,10 +19,8 @@ double scalbn(double x, int n) if (n < -1022) { x *= 0x1p-1022; n += 1022; - if (n < -1022) { - STRICT_ASSIGN(double, x, x * 0x1p-1022); - return x; - } + if (n < -1022) + n = -1022; } } INSERT_WORDS(scale, (uint32_t)(0x3ff+n)<<20, 0); diff --git a/src/math/scalbnf.c b/src/math/scalbnf.c index f94b5d5..c0eeaf8 100644 --- a/src/math/scalbnf.c +++ b/src/math/scalbnf.c @@ -10,10 +10,8 @@ float scalbnf(float x, int n) if (n > 127) { x *= 0x1p127f; n -= 127; - if (n > 127) { - STRICT_ASSIGN(float, x, x * 0x1p127f); - return x; - } + if (n > 127) + n = 127; } } else if (n < -126) { x *= 0x1p-126f; @@ -21,10 +19,8 @@ float scalbnf(float x, int n) if (n < -126) { x *= 0x1p-126f; n += 126; - if (n < -126) { - STRICT_ASSIGN(float, x, x * 0x1p-126f); - return x; - } + if (n < -126) + n = -126; } } SET_FLOAT_WORD(scale, (uint32_t)(0x7f+n)<<23); diff --git a/src/math/scalbnl.c b/src/math/scalbnl.c index c605b8d..7ad7688 100644 --- a/src/math/scalbnl.c +++ b/src/math/scalbnl.c @@ -17,7 +17,7 @@ long double scalbnl(long double x, int n) x *= 0x1p16383L; n -= 16383; if (n > 16383) - return x * 0x1p16383L; + n = 16383; } } else if (n < -16382) { x *= 0x1p-16382L; @@ -26,7 +26,7 @@ long double scalbnl(long double x, int n) x *= 0x1p-16382L; n += 16382; if (n < -16382) - return x * 0x1p-16382L; + n = -16382; } } scale.e = 1.0; diff --git a/src/math/sinh.c b/src/math/sinh.c index 47e36bf..00022c4 100644 --- a/src/math/sinh.c +++ b/src/math/sinh.c @@ -23,8 +23,8 @@ double sinh(double x) t = expm1(absx); if (w < 0x3ff00000) { if (w < 0x3ff00000 - (26<<20)) - /* note: inexact is raised by expm1 */ - /* note: this branch avoids underflow */ + /* note: inexact and underflow are raised by expm1 */ + /* note: this branch avoids spurious underflow */ return x; return h*(2*t - t*t/(t+1)); } diff --git a/src/math/tanh.c b/src/math/tanh.c index 0e766c5..65393c6 100644 --- a/src/math/tanh.c +++ b/src/math/tanh.c @@ -9,7 +9,7 @@ double tanh(double x) union {double f; uint64_t i;} u = {.f = x}; uint32_t w; int sign; - double t; + double_t t; /* x = |x| */ sign = u.i >> 63; @@ -22,8 +22,7 @@ double tanh(double x) if (w > 0x40340000) { /* |x| > 20 or nan */ /* note: this branch avoids raising overflow */ - /* raise inexact if x!=+-inf and handle nan */ - t = 1 + 0/(x + 0x1p-120f); + t = 1 - 0/x; } else { t = expm1(2*x); t = 1 - 2/(t+2); @@ -32,10 +31,15 @@ double tanh(double x) /* |x| > log(5/3)/2 ~= 0.2554 */ t = expm1(2*x); t = t/(t+2); - } else { - /* |x| is small, up to 2ulp error in [0.1,0.2554] */ + } else if (w >= 0x00100000) { + /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ t = expm1(-2*x); t = -t/(t+2); + } else { + /* |x| is subnormal */ + /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ + FORCE_EVAL(x*x); + t = x; } return sign ? -t : t; } diff --git a/src/math/tanhf.c b/src/math/tanhf.c index 8099ec3..10636fb 100644 --- a/src/math/tanhf.c +++ b/src/math/tanhf.c @@ -17,7 +17,7 @@ float tanhf(float x) /* |x| > log(3)/2 ~= 0.5493 or nan */ if (w > 0x41200000) { /* |x| > 10 */ - t = 1 + 0/(x + 0x1p-120f); + t = 1 + 0/x; } else { t = expm1f(2*x); t = 1 - 2/(t+2); @@ -26,10 +26,14 @@ float tanhf(float x) /* |x| > log(5/3)/2 ~= 0.2554 */ t = expm1f(2*x); t = t/(t+2); - } else { - /* |x| is small */ + } else if (w >= 0x00800000) { + /* |x| >= 0x1p-126 */ t = expm1f(-2*x); t = -t/(t+2); + } else { + /* |x| is subnormal */ + FORCE_EVAL(x*x); + t = x; } return sign ? -t : t; } diff --git a/src/math/tgamma.c b/src/math/tgamma.c index 691e86a..852dcf7 100644 --- a/src/math/tgamma.c +++ b/src/math/tgamma.c @@ -137,6 +137,7 @@ double tgamma(double x) /* x =< -184: tgamma(x)=+-0 with underflow */ if (absx >= 184) { if (x < 0) { + FORCE_EVAL(0x1p-1022/x); if (floor(x) * 0.5 == floor(x * 0.5)) return 0; return -0.0; diff --git a/src/math/x86_64/expl.s b/src/math/x86_64/expl.s index 6d5c1ce..107f3f5 100644 --- a/src/math/x86_64/expl.s +++ b/src/math/x86_64/expl.s @@ -8,32 +8,25 @@ expl: fldt 8(%rsp) - # special cases: 2*x is +-inf, nan or |x| < 0x1p-32 - # check (exponent|0x8000)+2 < 0xbfff+2-32 - movw 16(%rsp), %ax - movw %ax, %dx - orw $0x8000, %dx - addw $2, %dx - cmpw $0xbfff-30, %dx - jnb 3f - cmpw $1, %dx - jbe 1f - # if |x|<0x1p-32 return 1+x + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 16(%rsp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax fld1 - jmp 2f -1: testw %ax, %ax - jns 1f - # if 2*x == -inf,-nan return -0/x - fldz - fchs - fdivp + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) ret - # if 2*x == inf,nan return 2*x -1: fld %st(0) -2: faddp + # if |x|<0x1p-32 return 1+x +1: faddp ret - # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc # it will be wrong on non-nearest rounding mode 3: fldl2e subq $48, %rsp