>From e33ac3fd4a39416c4b681d610c7ad9737a279260 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 22 Sep 2018 18:47:27 +0000 Subject: [PATCH 3/5] arm: add single instruction fma vfma is available in the vfpv4 fpu and above, the ACLE standard feature test for double precision hardware fma support is __ARM_FEATURE_FMA && __ARM_FP&8 we need further checks to work around clang bugs (fixed in clang >=7.0) && !__SOFTFP__ because __ARM_FP is defined even with -mfloat-abi=soft && !BROKEN_VFP_ASM to disable the single precision code when inline asm handling is broken. For runtime selection the HWCAP_ARM_VFPv4 hwcap flag can be used, but that requires further work. --- src/math/arm/fma.c | 15 +++++++++++++++ src/math/arm/fmaf.c | 15 +++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/math/arm/fma.c create mode 100644 src/math/arm/fmaf.c diff --git a/src/math/arm/fma.c b/src/math/arm/fma.c new file mode 100644 index 00000000..2a9b8efa --- /dev/null +++ b/src/math/arm/fma.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_FEATURE_FMA && __ARM_FP&8 && !__SOFTFP__ + +double fma(double x, double y, double z) +{ + __asm__ ("vfma.f64 %P0, %P1, %P2" : "+w"(z) : "w"(x), "w"(y)); + return z; +} + +#else + +#include "../fma.c" + +#endif diff --git a/src/math/arm/fmaf.c b/src/math/arm/fmaf.c new file mode 100644 index 00000000..a1793d27 --- /dev/null +++ b/src/math/arm/fmaf.c @@ -0,0 +1,15 @@ +#include + +#if __ARM_FEATURE_FMA && __ARM_FP&4 && !__SOFTFP__ && !BROKEN_VFP_ASM + +float fmaf(float x, float y, float z) +{ + __asm__ ("vfma.f32 %0, %1, %2" : "+t"(z) : "t"(x), "t"(y)); + return z; +} + +#else + +#include "../fmaf.c" + +#endif -- 2.18.0