From 121aae8cbd37396fd3b0e4e6f6d42b70b9966671 Mon Sep 17 00:00:00 2001 From: Markus Wichmann Date: Sat, 16 Mar 2024 10:02:11 +0100 Subject: [PATCH 2/2] Runtime switch hardware fma on x86_64. Instead of only using hardware fma instructions if enabled at compile time (i.e. if compiling at an ISA level that requires these to be present), we can now switch them in at runtime. Compile time switches are still effective and eliminate the other implementations, so the semantics don't change there. But even at baseline ISA level, we can now use the hardware FMA if __cpuid says it's OK. --- src/math/x86_64/fma.c | 28 ++++++++++++++++++++-------- src/math/x86_64/fmaf.c | 28 ++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/math/x86_64/fma.c b/src/math/x86_64/fma.c index 4dd53f2a..04c6064a 100644 --- a/src/math/x86_64/fma.c +++ b/src/math/x86_64/fma.c @@ -1,23 +1,35 @@ #include -#if __FMA__ - -double fma(double x, double y, double z) +static inline double fma_fma(double x, double y, double z) { __asm__ ("vfmadd132sd %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); return x; } -#elif __FMA4__ - -double fma(double x, double y, double z) +static inline double fma4_fma(double x, double y, double z) { __asm__ ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); return x; } -#else - +#if !__FMA__ && !__FMA4__ +#include "x86_64/cpuid.h" +#define fma __soft_fma #include "../fma.c" +#undef fma +#endif +double fma(double x, double y, double z) +{ +#if __FMA__ + return fma_fma(x, y, z); +#elif __FMA4__ + return fma4_fma(x, y, z); +#else + if (__cpuid & X86_FEAT_FMA) + return fma_fma(x, y, z); + if (__cpuid & X86_FEAT_FMA4) + return fma4_fma(x, y, z); + return __soft_fma(x, y, z); #endif +} diff --git a/src/math/x86_64/fmaf.c b/src/math/x86_64/fmaf.c index 30b971ff..b4d9b714 100644 --- a/src/math/x86_64/fmaf.c +++ b/src/math/x86_64/fmaf.c @@ -1,23 +1,35 @@ #include -#if __FMA__ - -float fmaf(float x, float y, float z) +static inline float fma_fmaf(float x, float y, float z) { __asm__ ("vfmadd132ss %1, %2, %0" : "+x" (x) : "x" (y), "x" (z)); return x; } -#elif __FMA4__ - -float fmaf(float x, float y, float z) +static inline float fma4_fmaf(float x, float y, float z) { __asm__ ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "x" (y), "x" (z)); return x; } -#else - +#if !__FMA__ && !__FMA4__ +#include "x86_64/cpuid.h" +#define fmaf __soft_fmaf #include "../fmaf.c" +#undef fmaf +#endif +float fmaf(float x, float y, float z) +{ +#if __FMA__ + return fma_fmaf(x, y, z); +#elif __FMA4__ + return fma4_fmaf(x, y, z); +#else + if (__cpuid & X86_FEAT_FMA) + return fma_fmaf(x, y, z); + if (__cpuid & X86_FEAT_FMA4) + return fma4_fmaf(x, y, z); + return __soft_fmaf(x, y, z); #endif +} -- 2.39.2