|
|
Message-ID: <2C3325A208DA4260A1A0F7B4517D6DFA@H270>
Date: Tue, 10 Dec 2019 17:57:55 +0100
From: "Stefan Kanthak" <stefan.kanthak@...go.de>
To: <musl@...ts.openwall.com>
Subject: More patches for math subtree
Some more optimisations: the current implementations of ceil(), floor()
and trunc() for i386 change the rounding control using fldcw instructions,
which are SLOW; these patches provide faster and smaller branch-free (!)
implementations.
JFTR: I'm NOT subscribed to your mailing list, so CC: me in replies!
--- -/src/math/i386/floor.s
+++ +/src/math/i386/floor.s
@@ -1,67 +1,26 @@
.global floorf
.type floorf,@function
floorf:
flds 4(%esp)
jmp 1f
.global floorl
.type floorl,@function
floorl:
fldt 4(%esp)
jmp 1f
.global floor
.type floor,@function
floor:
fldl 4(%esp)
+1: fld %st(0)
+ frndint
+ fxch %st(1)
+ fucomip %st(1),%st(0)
+ fld1
+ fldz
+ fcmovb %st(1),%st(0)
+ fsubp %st(0),%st(2)
+ fstp %st(0)
+ ret
-1: mov $0x7,%al
-1: fstcw 4(%esp)
- mov 5(%esp),%ah
- mov %al,5(%esp)
- fldcw 4(%esp)
- frndint
- mov %ah,5(%esp)
- fldcw 4(%esp)
- ret
-
-.global ceil
-.type ceil,@function
-ceil:
- fldl 4(%esp)
- mov $0xb,%al
- jmp 1b
-
-.global ceilf
-.type ceilf,@function
-ceilf:
- flds 4(%esp)
- mov $0xb,%al
- jmp 1b
-
-.global ceill
-.type ceill,@function
-ceill:
- fldt 4(%esp)
- mov $0xb,%al
- jmp 1b
-
-.global trunc
-.type trunc,@function
-trunc:
- fldl 4(%esp)
- mov $0xf,%al
- jmp 1b
-
-.global truncf
-.type truncf,@function
-truncf:
- flds 4(%esp)
- mov $0xf,%al
- jmp 1b
-
-.global truncl
-.type truncl,@function
-truncl:
- fldt 4(%esp)
- mov $0xf,%al
- jmp 1b
--- -/src/math/i386/ceilf.s
+++ +/src/math/i386/ceilf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s
--- -/src/math/i386/ceill.s
+++ +/src/math/i386/ceill.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s
--- -/src/math/i386/ceil.s
+++ +/src/math/i386/ceil.s
@@ -1,1 +1,26 @@
-# see floor.s
+.global ceilf
+.type ceilf,@function
+ceilf:
+ flds 4(%esp)
+ jmp 1f
+
+.global ceill
+.type ceill,@function
+ceill:
+ fldt 4(%esp)
+ jmp 1f
+
+.global ceil
+.type ceil,@function
+ceil:
+ fldl 4(%esp)
+1: fld %st(0)
+ frndint
+ fxch %st(1)
+ fucomip %st(1),%st(0)
+ fld1
+ fldz
+ fcmovnbe %st(1),%st(0)
+ faddp %st(0),%st(1)
+ fstp %st(0)
+ ret
--- -/src/math/i386/truncf.s
+++ +/src/math/i386/truncf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s
--- -/src/math/i386/truncl.s
+++ +/src/math/i386/truncl.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s
--- -/src/math/i386/trunc.s
+++ +/src/math/i386/trunc.s
@@ -1,1 +1,32 @@
-# see floor.s
+.global truncf
+.type truncf,@function
+truncf:
+ flds 4(%esp)
+ jmp 1f
+
+.global truncl
+.type truncl,@function
+truncl:
+ fldt 4(%esp)
+ jmp 1f
+
+.global trunc
+.type trunc,@function
+trunc:
+ fldl 4(%esp)
+1: fld %st(0)
+ fabs
+ fld %st(0)
+ frndint
+ fxch %st(1)
+ fucomip %st(1),%st(0)
+ fldz
+ fld1
+ fcmovnb %st(1),%st(0)
+ fsubp %st(0),%st(2)
+ fucomip %st(2),%st(0)
+ fst %st(1)
+ fchs
+ fcmovbe %st(1),%st(0)
+ fstp %st(1)
+ ret
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.