>From 5f97370ff3e94bea812ec123a31d7482965a3b1b Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 24 Apr 2019 23:29:05 +0000 Subject: [PATCH] x86: optimize fp_arch.h Use fp register constraint instead of volatile store when sse2 math is available, and use memory constraint when only x87 fpu is available. --- arch/i386/fp_arch.h | 31 +++++++++++++++++++++++++++++++ arch/x32/fp_arch.h | 25 +++++++++++++++++++++++++ arch/x86_64/fp_arch.h | 25 +++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 arch/i386/fp_arch.h create mode 100644 arch/x32/fp_arch.h create mode 100644 arch/x86_64/fp_arch.h diff --git a/arch/i386/fp_arch.h b/arch/i386/fp_arch.h new file mode 100644 index 00000000..b4019de2 --- /dev/null +++ b/arch/i386/fp_arch.h @@ -0,0 +1,31 @@ +#ifdef __SSE2_MATH__ +#define FP_BARRIER(x) __asm__ __volatile__ ("" : "+x"(x)) +#else +#define FP_BARRIER(x) __asm__ __volatile__ ("" : "+m"(x)) +#endif + +#define fp_barrierf fp_barrierf +static inline float fp_barrierf(float x) +{ + FP_BARRIER(x); + return x; +} + +#define fp_barrier fp_barrier +static inline double fp_barrier(double x) +{ + FP_BARRIER(x); + return x; +} + +#define fp_force_evalf fp_force_evalf +static inline void fp_force_evalf(float x) +{ + FP_BARRIER(x); +} + +#define fp_force_eval fp_force_eval +static inline void fp_force_eval(double x) +{ + FP_BARRIER(x); +} diff --git a/arch/x32/fp_arch.h b/arch/x32/fp_arch.h new file mode 100644 index 00000000..ff9b8311 --- /dev/null +++ b/arch/x32/fp_arch.h @@ -0,0 +1,25 @@ +#define fp_barrierf fp_barrierf +static inline float fp_barrierf(float x) +{ + __asm__ __volatile__ ("" : "+x"(x)); + return x; +} + +#define fp_barrier fp_barrier +static inline double fp_barrier(double x) +{ + __asm__ __volatile__ ("" : "+x"(x)); + return x; +} + +#define fp_force_evalf fp_force_evalf +static inline void fp_force_evalf(float x) +{ + __asm__ __volatile__ ("" : "+x"(x)); +} + +#define fp_force_eval fp_force_eval +static inline void fp_force_eval(double x) +{ + __asm__ __volatile__ ("" : "+x"(x)); +} diff --git a/arch/x86_64/fp_arch.h b/arch/x86_64/fp_arch.h new file mode 100644 index 00000000..ff9b8311 --- /dev/null +++ b/arch/x86_64/fp_arch.h @@ -0,0 +1,25 @@ +#define fp_barrierf fp_barrierf +static inline float fp_barrierf(float x) +{ + __asm__ __volatile__ ("" : "+x"(x)); + return x; +} + +#define fp_barrier fp_barrier +static inline double fp_barrier(double x) +{ + __asm__ __volatile__ ("" : "+x"(x)); + return x; +} + +#define fp_force_evalf fp_force_evalf +static inline void fp_force_evalf(float x) +{ + __asm__ __volatile__ ("" : "+x"(x)); +} + +#define fp_force_eval fp_force_eval +static inline void fp_force_eval(double x) +{ + __asm__ __volatile__ ("" : "+x"(x)); +} -- 2.21.0