>From 7a54c4fee1771cdc9de42445c813d9e7d43d272e Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 22 Sep 2018 21:43:42 +0000 Subject: [PATCH 4/5] x86_64: add single instruction fma fma is only available on recent x86_64 cpus and it is much faster than a software fma, so this should be done with a runtime check, however that requires more changes, this patch just adds the code so it can be tested when musl is compiled with -mfma or -mfma4. --- src/math/x32/fma.c | 23 +++++++++++++++++++++++ src/math/x32/fmaf.c | 23 +++++++++++++++++++++++ src/math/x86_64/fma.c | 23 +++++++++++++++++++++++ src/math/x86_64/fmaf.c | 23 +++++++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 src/math/x32/fma.c create mode 100644 src/math/x32/fmaf.c create mode 100644 src/math/x86_64/fma.c create mode 100644 src/math/x86_64/fmaf.c diff --git a/src/math/x32/fma.c b/src/math/x32/fma.c new file mode 100644 index 00000000..4dd53f2a --- /dev/null +++ b/src/math/x32/fma.c @@ -0,0 +1,23 @@ +#include + +#if __FMA__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); + return x; +} + +#elif __FMA4__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); + return x; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/x32/fmaf.c b/src/math/x32/fmaf.c new file mode 100644 index 00000000..30b971ff --- /dev/null +++ b/src/math/x32/fmaf.c @@ -0,0 +1,23 @@ +#include + +#if __FMA__ + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); + return x; +} + +#elif __FMA4__ + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); + return x; +} + +#else + +#include "../fmaf.c" + +#endif diff --git a/src/math/x86_64/fma.c b/src/math/x86_64/fma.c new file mode 100644 index 00000000..4dd53f2a --- /dev/null +++ b/src/math/x86_64/fma.c @@ -0,0 +1,23 @@ +#include + +#if __FMA__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); + return x; +} + +#elif __FMA4__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); + return x; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/x86_64/fmaf.c b/src/math/x86_64/fmaf.c new file mode 100644 index 00000000..30b971ff --- /dev/null +++ b/src/math/x86_64/fmaf.c @@ -0,0 +1,23 @@ +#include + +#if __FMA__ + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); + return x; +} + +#elif __FMA4__ + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); + return x; +} + +#else + +#include "../fmaf.c" + +#endif -- 2.18.0