diff --git a/src/math/asin.c b/src/math/asin.c
index 3e8f99e..c926b18 100644
--- a/src/math/asin.c
+++ b/src/math/asin.c
@@ -82,11 +82,9 @@ double asin(double x)
 	}
 	/* |x| < 0.5 */
 	if (ix < 0x3fe00000) {
-		if (ix < 0x3e500000) {
-			/* |x|<0x1p-26, return x with inexact if x!=0*/
-			FORCE_EVAL(x + 0x1p120f);
+		/* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
+		if (ix < 0x3e500000 && ix >= 0x00100000)
 			return x;
-		}
 		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
diff --git a/src/math/asinf.c b/src/math/asinf.c
index 51fe6c6..bcd304a 100644
--- a/src/math/asinf.c
+++ b/src/math/asinf.c
@@ -46,10 +46,9 @@ float asinf(float x)
 		return 0/(x-x);  /* asin(|x|>1) is NaN */
 	}
 	if (ix < 0x3f000000) {  /* |x| < 0.5 */
-		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			FORCE_EVAL(x + 0x1p120f);
-			return x; /* return x with inexact if x!=0 */
-		}
+		/* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
+		if (ix < 0x39800000 && ix >= 0x00800000)
+			return x;
 		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
diff --git a/src/math/atan.c b/src/math/atan.c
index 5a1d33e..63b5ad0 100644
--- a/src/math/atan.c
+++ b/src/math/atan.c
@@ -77,8 +77,9 @@ double atan(double x)
 	}
 	if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
 		if (ix < 0x3e400000) {  /* |x| < 2^-27 */
-			/* raise inexact if x!=0 */
-			FORCE_EVAL(x + 0x1p120f);
+			if (ix < 0x00100000)
+				/* raise underflow for subnormal x */
+				FORCE_EVAL(x*x);
 			return x;
 		}
 		id = -1;
diff --git a/src/math/atanf.c b/src/math/atanf.c
index ac8bfd0..178341b 100644
--- a/src/math/atanf.c
+++ b/src/math/atanf.c
@@ -55,8 +55,9 @@ float atanf(float x)
 	}
 	if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact if x!=0 */
-			FORCE_EVAL(x + 0x1p120f);
+			if (ix < 0x00800000)
+				/* raise underflow for subnormal x */
+				FORCE_EVAL(x*x);
 			return x;
 		}
 		id = -1;
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
index 932c754..7e49409 100644
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@@ -2,7 +2,20 @@
 .type asinf,@function
 asinf:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fld %st(0)
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+2:	ret
 
 .global asinl
 .type asinl,@function
@@ -14,6 +27,18 @@ asinl:
 .type asin,@function
 asin:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fsts (%esp)
+	pop %eax
+2:	ret
 1:	fld %st(0)
 	fld1
 	fsub %st(0),%st(1)
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
index 7e28b39..b46f5a3 100644
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@@ -2,6 +2,18 @@
 .type atan,@function
 atan:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fsts (%esp)
+	pop %eax
+2:	ret
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
index 3cd4023..67cbf7c 100644
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@@ -2,6 +2,20 @@
 .type atanf,@function
 atanf:
 	flds 4(%esp)
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fld %st(0)
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+2:	ret
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index e3b42af..8cf45c4 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -2,7 +2,20 @@
 .type expm1f,@function
 expm1f:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fld %st(0)
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+2:	ret
 
 .global expm1l
 .type expm1l,@function
@@ -14,10 +27,34 @@ expm1l:
 .type expm1,@function
 expm1:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fsts (%esp)
+	pop %eax
+2:	ret
 1:	fldl2e
 	fmulp
+	mov $0xc2820000,%eax
+	push %eax
+	flds (%esp)
+	pop %eax
+	fucomp %st(1)
+	fnstsw %ax
+	sahf
 	fld1
-	fld %st(1)
+	jb 1f
+		# x*log2e < -65, return -1 without underflow
+	fstp %st(1)
+	fchs
+	ret
+1:	fld %st(1)
 	fabs
 	fucom %st(1)
 	fnstsw %ax
diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s
index 8ceb40d..61ef1dd 100644
--- a/src/math/i386/expl.s
+++ b/src/math/i386/expl.s
@@ -8,34 +8,27 @@
 expl:
 	fldt 4(%esp)
 
-		# special cases: 2*x is +-inf, nan or |x| < 0x1p-32
-		# check (exponent|0x8000)+2 < 0xbfff+2-32
-	movw 12(%esp), %ax
-	movw %ax, %dx
-	orw $0x8000, %dx
-	addw $2, %dx
-	cmpw $0xbfff-30, %dx
-	jnb 3f
-	cmpw $1, %dx
-	jbe 1f
-		# if |x|<0x1p-32 return 1+x
+		# interesting case: 0x1p-32 <= |x| < 16384
+		# check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]
+	mov 12(%esp), %ax
+	or $0x8000, %ax
+	sub $0xbfdf, %ax
+	cmp $45, %ax
+	jbe 2f
+	test %ax, %ax
 	fld1
-	jmp 2f
-1:	testw %ax, %ax
-	jns 1f
-		# if 2*x == -inf,-nan return -0/x
-	fldz
-	fchs
-	fdivp
+	js 1f
+		# if |x|>=0x1p14 or nan return 2^trunc(x)
+	fscale
+	fstp %st(1)
 	ret
-		# if 2*x == inf,nan return 2*x
-1:	fld %st(0)
-2:	faddp
+		# if |x|<0x1p-32 return 1+x
+1:	faddp
 	ret
 
-		# should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
+		# should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc
 		# it will be wrong on non-nearest rounding mode
-3:	fldl2e
+2:	fldl2e
 	subl $44, %esp
 		# hi = log2e_hi*x
 		# 2^hi = exp2l(hi)
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
index 9971e53..3203f06 100644
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@@ -7,9 +7,20 @@ log1p:
 	fldl 4(%esp)
 	cmp $0x3fd28f00,%eax
 	ja 1f
+	cmp $0x00100000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	push %eax
+	fsts (%esp)
+	fstp %st(1)
+	pop %eax
+1:	ret
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
index 2680a8a..ada6109 100644
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@@ -7,9 +7,21 @@ log1pf:
 	flds 4(%esp)
 	cmp $0x3e940000,%eax
 	ja 1f
+	cmp $0x00800000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	push %eax
+	fxch
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+1:	ret
diff --git a/src/math/log1p.c b/src/math/log1p.c
index 6c67249..0cb71c6 100644
--- a/src/math/log1p.c
+++ b/src/math/log1p.c
@@ -104,9 +104,12 @@ double log1p(double x)
 			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */
 		}
 		if (ax < 0x3e200000) {   /* |x| < 2**-29 */
-			/* raise inexact */
-			if (two54 + x > 0.0 && ax < 0x3c900000)  /* |x| < 2**-54 */
+			/* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */
+			if (ax < 0x3c900000 && ax >= 0x00100000)
 				return x;
+#if FLT_EVAL_METHOD != 0
+			FORCE_EVAL(x*x);
+#endif
 			return x - x*x*0.5;
 		}
 		if (hx > 0 || hx <= (int32_t)0xbfd2bec4) {  /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
diff --git a/src/math/log1pf.c b/src/math/log1pf.c
index 39832d2..c38e0bc 100644
--- a/src/math/log1pf.c
+++ b/src/math/log1pf.c
@@ -43,9 +43,12 @@ float log1pf(float x)
 			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */
 		}
 		if (ax < 0x38000000) {   /* |x| < 2**-15 */
-			/* raise inexact */
-			if (two25 + x > 0.0f && ax < 0x33800000)  /* |x| < 2**-24 */
+			/* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */
+			if (ax < 0x33800000 && ax >= 0x00800000)
 				return x;
+#if FLT_EVAL_METHOD != 0
+			FORCE_EVAL(x*x);
+#endif
 			return x - x*x*0.5f;
 		}
 		if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
diff --git a/src/math/pow.c b/src/math/pow.c
index f257814..ac3abc0 100644
--- a/src/math/pow.c
+++ b/src/math/pow.c
@@ -143,7 +143,7 @@ double pow(double x, double y)
 				return 1.0;
 			else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */
 				return hy >= 0 ? y : 0.0;
-			else                       /* (|x|<1)**+-inf = 0,inf */
+			else if ((ix|lx) != 0)     /* (|x|<1)**+-inf = 0,inf if x!=0 */
 				return hy >= 0 ? 0.0 : -y;
 		}
 		if (iy == 0x3ff00000)    /* y is +-1 */
diff --git a/src/math/powf.c b/src/math/powf.c
index 427c896..59baf6f 100644
--- a/src/math/powf.c
+++ b/src/math/powf.c
@@ -90,7 +90,7 @@ float powf(float x, float y)
 			return 1.0f;
 		else if (ix > 0x3f800000)  /* (|x|>1)**+-inf = inf,0 */
 			return hy >= 0 ? y : 0.0f;
-		else                       /* (|x|<1)**+-inf = 0,inf */
+		else if (ix != 0)          /* (|x|<1)**+-inf = 0,inf if x!=0 */
 			return hy >= 0 ? 0.0f: -y;
 	}
 	if (iy == 0x3f800000)    /* y is +-1 */
diff --git a/src/math/scalbn.c b/src/math/scalbn.c
index 003141e..1fec432 100644
--- a/src/math/scalbn.c
+++ b/src/math/scalbn.c
@@ -10,10 +10,8 @@ double scalbn(double x, int n)
 		if (n > 1023) {
 			x *= 0x1p1023;
 			n -= 1023;
-			if (n > 1023) {
-				STRICT_ASSIGN(double, x, x * 0x1p1023);
-				return x;
-			}
+			if (n > 1023)
+				n = 1023;
 		}
 	} else if (n < -1022) {
 		x *= 0x1p-1022;
@@ -21,10 +19,8 @@ double scalbn(double x, int n)
 		if (n < -1022) {
 			x *= 0x1p-1022;
 			n += 1022;
-			if (n < -1022) {
-				STRICT_ASSIGN(double, x, x * 0x1p-1022);
-				return x;
-			}
+			if (n < -1022)
+				n = -1022;
 		}
 	}
 	INSERT_WORDS(scale, (uint32_t)(0x3ff+n)<<20, 0);
diff --git a/src/math/scalbnf.c b/src/math/scalbnf.c
index f94b5d5..c0eeaf8 100644
--- a/src/math/scalbnf.c
+++ b/src/math/scalbnf.c
@@ -10,10 +10,8 @@ float scalbnf(float x, int n)
 		if (n > 127) {
 			x *= 0x1p127f;
 			n -= 127;
-			if (n > 127) {
-				STRICT_ASSIGN(float, x, x * 0x1p127f);
-				return x;
-			}
+			if (n > 127)
+				n = 127;
 		}
 	} else if (n < -126) {
 		x *= 0x1p-126f;
@@ -21,10 +19,8 @@ float scalbnf(float x, int n)
 		if (n < -126) {
 			x *= 0x1p-126f;
 			n += 126;
-			if (n < -126) {
-				STRICT_ASSIGN(float, x, x * 0x1p-126f);
-				return x;
-			}
+			if (n < -126)
+				n = -126;
 		}
 	}
 	SET_FLOAT_WORD(scale, (uint32_t)(0x7f+n)<<23);
diff --git a/src/math/scalbnl.c b/src/math/scalbnl.c
index c605b8d..7ad7688 100644
--- a/src/math/scalbnl.c
+++ b/src/math/scalbnl.c
@@ -17,7 +17,7 @@ long double scalbnl(long double x, int n)
 			x *= 0x1p16383L;
 			n -= 16383;
 			if (n > 16383)
-				return x * 0x1p16383L;
+				n = 16383;
 		}
 	} else if (n < -16382) {
 		x *= 0x1p-16382L;
@@ -26,7 +26,7 @@ long double scalbnl(long double x, int n)
 			x *= 0x1p-16382L;
 			n += 16382;
 			if (n < -16382)
-				return x * 0x1p-16382L;
+				n = -16382;
 		}
 	}
 	scale.e = 1.0;
diff --git a/src/math/sinh.c b/src/math/sinh.c
index 47e36bf..00022c4 100644
--- a/src/math/sinh.c
+++ b/src/math/sinh.c
@@ -23,8 +23,8 @@ double sinh(double x)
 		t = expm1(absx);
 		if (w < 0x3ff00000) {
 			if (w < 0x3ff00000 - (26<<20))
-				/* note: inexact is raised by expm1 */
-				/* note: this branch avoids underflow */
+				/* note: inexact and underflow are raised by expm1 */
+				/* note: this branch avoids spurious underflow */
 				return x;
 			return h*(2*t - t*t/(t+1));
 		}
diff --git a/src/math/tanh.c b/src/math/tanh.c
index 0e766c5..65393c6 100644
--- a/src/math/tanh.c
+++ b/src/math/tanh.c
@@ -9,7 +9,7 @@ double tanh(double x)
 	union {double f; uint64_t i;} u = {.f = x};
 	uint32_t w;
 	int sign;
-	double t;
+	double_t t;
 
 	/* x = |x| */
 	sign = u.i >> 63;
@@ -22,8 +22,7 @@ double tanh(double x)
 		if (w > 0x40340000) {
 			/* |x| > 20 or nan */
 			/* note: this branch avoids raising overflow */
-			/* raise inexact if x!=+-inf and handle nan */
-			t = 1 + 0/(x + 0x1p-120f);
+			t = 1 - 0/x;
 		} else {
 			t = expm1(2*x);
 			t = 1 - 2/(t+2);
@@ -32,10 +31,15 @@ double tanh(double x)
 		/* |x| > log(5/3)/2 ~= 0.2554 */
 		t = expm1(2*x);
 		t = t/(t+2);
-	} else {
-		/* |x| is small, up to 2ulp error in [0.1,0.2554] */
+	} else if (w >= 0x00100000) {
+		/* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
 		t = expm1(-2*x);
 		t = -t/(t+2);
+	} else {
+		/* |x| is subnormal */
+		/* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
+		FORCE_EVAL(x*x);
+		t = x;
 	}
 	return sign ? -t : t;
 }
diff --git a/src/math/tanhf.c b/src/math/tanhf.c
index 8099ec3..10636fb 100644
--- a/src/math/tanhf.c
+++ b/src/math/tanhf.c
@@ -17,7 +17,7 @@ float tanhf(float x)
 		/* |x| > log(3)/2 ~= 0.5493 or nan */
 		if (w > 0x41200000) {
 			/* |x| > 10 */
-			t = 1 + 0/(x + 0x1p-120f);
+			t = 1 + 0/x;
 		} else {
 			t = expm1f(2*x);
 			t = 1 - 2/(t+2);
@@ -26,10 +26,14 @@ float tanhf(float x)
 		/* |x| > log(5/3)/2 ~= 0.2554 */
 		t = expm1f(2*x);
 		t = t/(t+2);
-	} else {
-		/* |x| is small */
+	} else if (w >= 0x00800000) {
+		/* |x| >= 0x1p-126 */
 		t = expm1f(-2*x);
 		t = -t/(t+2);
+	} else {
+		/* |x| is subnormal */
+		FORCE_EVAL(x*x);
+		t = x;
 	}
 	return sign ? -t : t;
 }
diff --git a/src/math/tgamma.c b/src/math/tgamma.c
index 691e86a..852dcf7 100644
--- a/src/math/tgamma.c
+++ b/src/math/tgamma.c
@@ -137,6 +137,7 @@ double tgamma(double x)
 	/* x =< -184: tgamma(x)=+-0 with underflow */
 	if (absx >= 184) {
 		if (x < 0) {
+			FORCE_EVAL(0x1p-1022/x);
 			if (floor(x) * 0.5 == floor(x * 0.5))
 				return 0;
 			return -0.0;
diff --git a/src/math/x86_64/expl.s b/src/math/x86_64/expl.s
index 6d5c1ce..107f3f5 100644
--- a/src/math/x86_64/expl.s
+++ b/src/math/x86_64/expl.s
@@ -8,32 +8,25 @@
 expl:
 	fldt 8(%rsp)
 
-		# special cases: 2*x is +-inf, nan or |x| < 0x1p-32
-		# check (exponent|0x8000)+2 < 0xbfff+2-32
-	movw 16(%rsp), %ax
-	movw %ax, %dx
-	orw $0x8000, %dx
-	addw $2, %dx
-	cmpw $0xbfff-30, %dx
-	jnb 3f
-	cmpw $1, %dx
-	jbe 1f
-		# if |x|<0x1p-32 return 1+x
+		# interesting case: 0x1p-32 <= |x| < 16384
+		# check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]
+	mov 16(%rsp), %ax
+	or $0x8000, %ax
+	sub $0xbfdf, %ax
+	cmp $45, %ax
+	jbe 2f
+	test %ax, %ax
 	fld1
-	jmp 2f
-1:	testw %ax, %ax
-	jns 1f
-		# if 2*x == -inf,-nan return -0/x
-	fldz
-	fchs
-	fdivp
+	js 1f
+		# if |x|>=0x1p14 or nan return 2^trunc(x)
+	fscale
+	fstp %st(1)
 	ret
-		# if 2*x == inf,nan return 2*x
-1:	fld %st(0)
-2:	faddp
+		# if |x|<0x1p-32 return 1+x
+1:	faddp
 	ret
 
-		# should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
+		# should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc
 		# it will be wrong on non-nearest rounding mode
 3:	fldl2e
 	subq $48, %rsp