|
|
Message-ID: <6ce9a723-7733-4b12-b8bd-4e1b384076d3@foss.arm.com>
Date: Tue, 27 Jan 2026 19:45:47 -0600
From: Bill Roberts <bill.roberts@...s.arm.com>
To: Bill Roberts <bill.roberts@....com>, musl@...ts.openwall.com,
dalias@...c.org, dalias@...ifal.cx
Subject: Re: [PATCH] aarch64: rewrite fenv routines in C using inline
asm
On 1/28/26 4:13 PM, Szabolcs Nagy wrote:
> * Bill Roberts <bill.roberts@....com> [2026-01-27 00:04:27 -0600]:
>> Rewrite the AArch64 floating-point environment routines (fegetround,
>> __fesetround, fetestexcept, feclearexcept, feraiseexcept, fegetenv,
>> fesetenv) from assembly into C implementations using inline assembly.
>>
>> This change eliminates the need for handwritten function prologues and
>> epilogues in fenv.s, which simplifies maintenance and allows the compiler
>> to automatically insert architecture features such as BTI landing pads and
>> pointer authentication (PAC) sequences where applicable.
>>
>> The new implementations mirror the original assembly semantics exactly:
>> - access FPCR/FPSR using `mrs`/`msr` inline asm
>> - preserve the same FE_* masks and return conventions
>> - retain `__fesetround` as a hidden internal symbol
>> - support the special FE_DFL_ENV case in fesetenv
>>
>> Moving to C also enables the compiler to manage register allocation,
>> stack usage, and ABI compliance automatically while keeping the low-level
>> behavior (bitmasks and register accesses) explicit and verifiable.
>>
>> No functional changes intended.
>>
>> Signed-off-by: Bill Roberts <bill.roberts@....com>
>> ---
>> arch/aarch64/crt_arch.h | 29 +++++++------
>> src/fenv/aarch64/fenv.c | 96 +++++++++++++++++++++++++++++++++++++++++
>> src/fenv/aarch64/fenv.s | 68 -----------------------------
>> 3 files changed, 112 insertions(+), 81 deletions(-)
>> create mode 100644 src/fenv/aarch64/fenv.c
>> delete mode 100644 src/fenv/aarch64/fenv.s
>>
>> diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
>> index b64fb3dd..cff8edb3 100644
>> --- a/arch/aarch64/crt_arch.h
>> +++ b/arch/aarch64/crt_arch.h
>> @@ -1,15 +1,18 @@
>> __asm__(
>> -".text \n"
>> -".global " START "\n"
>> -".type " START ",%function\n"
>> -START ":\n"
>> -" mov x29, #0\n"
>> -" mov x30, #0\n"
>> -" mov x0, sp\n"
>> -".weak _DYNAMIC\n"
>> -".hidden _DYNAMIC\n"
>> -" adrp x1, _DYNAMIC\n"
>> -" add x1, x1, #:lo12:_DYNAMIC\n"
>> -" and sp, x0, #-16\n"
>> -" b " START "_c\n"
>> +".text \n\t"
>> +".global " START "\n\t"
>> +".type " START ",%function\n\t"
>> +START ":\n\t"
>> +#if defined(__ARM_FEATURE_BTI_DEFAULT)
>> +" hint 34\n\t"
>> +#endif
>> +" mov x29, #0\n\t"
>> +" mov x30, #0\n\t"
>> +" mov x0, sp\n\t"
>> +".weak _DYNAMIC\n\t"
>> +".hidden _DYNAMIC\n\t"
>> +" adrp x1, _DYNAMIC\n\t"
>> +" add x1, x1, #:lo12:_DYNAMIC\n\t"
>> +" and sp, x0, #-16\n\t"
>> +" b " START "_c\n\t"
>
> given the commit msg, this bit should be a separate patch.
>
> the \t changes are a distraction here
Yep I meant to drop those too, thanks.
>
>> );
>> diff --git a/src/fenv/aarch64/fenv.c b/src/fenv/aarch64/fenv.c
>> new file mode 100644
>> index 00000000..6d84feac
>> --- /dev/null
>> +++ b/src/fenv/aarch64/fenv.c
>> @@ -0,0 +1,96 @@
>> +#include <fenv.h>
>> +#include <stdint.h>
>> +
>> +#define FE_RMODE_MASK 0x00C00000u // FPCR RMode bits [23:22]
>> +#define FE_EXC_MASK 0x0000001Fu // FPSR exception flags [4:0]
>> +
>> +static inline uint32_t read_fpcr_u32(void)
>> +{
>> + uint64_t x;
>> + __asm__ volatile ("mrs %0, fpcr" : "=r"(x));
>> + return (uint32_t)x;
>> +}
>> +
>> +static inline void write_fpcr_u32(uint32_t v)
>> +{
>> + uint64_t x = (uint64_t)v;
>> + __asm__ volatile ("msr fpcr, %0" :: "r"(x) : "memory");
>> +}
>> +
>> +static inline uint32_t read_fpsr_u32(void)
>> +{
>> + uint64_t x;
>> + __asm__ volatile ("mrs %0, fpsr" : "=r"(x));
>> + return (uint32_t)x;
>> +}
>> +
> i dont think the casts are useful but the logic looks right
Yeah the cast to uint64_t doesn't need to be there and can be dropped.
I just usually add a cast by forced habit, to make it explicit
what I was intending, but I will drop.
>
> im not sure if the memory clobber is needed.
I think we need this, as the write needs to be in the
correct order with respect to other floating point operations
and the memory clobber seems to get us that. gcc doesn't come
right out and say it, but talking with compiler folks who
know a lot more than me, they say, "that's the canonical way to
do it".
V2 coming shortly.
>
>> +
>> +int fegetround(void)
>> +{
>> + uint32_t fpcr = read_fpcr_u32();
>> + return (int)(fpcr & FE_RMODE_MASK);
>> +}
>> +
>> +__attribute__((__visibility__("hidden")))
>
> use "hidden" from features.h
>
> the rest looks ok.
>
>> +int __fesetround(int rm)
>> +{
>> + uint32_t fpcr = read_fpcr_u32();
>> + fpcr &= ~FE_RMODE_MASK;
>> + fpcr |= (uint32_t)rm;
>> + write_fpcr_u32(fpcr);
>> + return 0;
>> +}
>> +
>> +int fetestexcept(int mask)
>> +{
>> + uint32_t m = (uint32_t)mask & FE_EXC_MASK;
>> + uint32_t fpsr = read_fpsr_u32();
>> + return (int)(m & fpsr);
>> +}
>> +
>> +int feclearexcept(int mask)
>> +{
>> + uint32_t m = (uint32_t)mask & FE_EXC_MASK;
>> + uint32_t fpsr = read_fpsr_u32();
>> + fpsr &= ~m;
>> + write_fpsr_u32(fpsr);
>> + return 0;
>> +}
>> +
>> +int feraiseexcept(int mask)
>> +{
>> + uint32_t m = (uint32_t)mask & FE_EXC_MASK;
>> + uint32_t fpsr = read_fpsr_u32();
>> + fpsr |= m;
>> + write_fpsr_u32(fpsr);
>> + return 0;
>> +}
>> +
>> +int fegetenv(fenv_t *env)
>> +{
>> + uint32_t fpcr = read_fpcr_u32();
>> + uint32_t fpsr = read_fpsr_u32();
>> + env->__fpcr = fpcr;
>> + env->__fpsr = fpsr;
>> + return 0;
>> +}
>> +
>> +int fesetenv(const fenv_t *env)
>> +{
>> + uint32_t fpcr = 0;
>> + uint32_t fpsr = 0;
>> +
>> + if (env != FE_DFL_ENV) {
>> + fpcr = env->__fpcr;
>> + fpsr = env->__fpsr;
>> + }
>> +
>> + write_fpcr_u32(fpcr);
>> + write_fpsr_u32(fpsr);
>> + return 0;
>> +}
>> diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
>> deleted file mode 100644
>> index 8f3ec965..00000000
>> --- a/src/fenv/aarch64/fenv.s
>> +++ /dev/null
>> @@ -1,68 +0,0 @@
>> -.global fegetround
>> -.type fegetround,%function
>> -fegetround:
>> - mrs x0, fpcr
>> - and w0, w0, #0xc00000
>> - ret
>> -
>> -.global __fesetround
>> -.hidden __fesetround
>> -.type __fesetround,%function
>> -__fesetround:
>> - mrs x1, fpcr
>> - bic w1, w1, #0xc00000
>> - orr w1, w1, w0
>> - msr fpcr, x1
>> - mov w0, #0
>> - ret
>> -
>> -.global fetestexcept
>> -.type fetestexcept,%function
>> -fetestexcept:
>> - and w0, w0, #0x1f
>> - mrs x1, fpsr
>> - and w0, w0, w1
>> - ret
>> -
>> -.global feclearexcept
>> -.type feclearexcept,%function
>> -feclearexcept:
>> - and w0, w0, #0x1f
>> - mrs x1, fpsr
>> - bic w1, w1, w0
>> - msr fpsr, x1
>> - mov w0, #0
>> - ret
>> -
>> -.global feraiseexcept
>> -.type feraiseexcept,%function
>> -feraiseexcept:
>> - and w0, w0, #0x1f
>> - mrs x1, fpsr
>> - orr w1, w1, w0
>> - msr fpsr, x1
>> - mov w0, #0
>> - ret
>> -
>> -.global fegetenv
>> -.type fegetenv,%function
>> -fegetenv:
>> - mrs x1, fpcr
>> - mrs x2, fpsr
>> - stp w1, w2, [x0]
>> - mov w0, #0
>> - ret
>> -
>> -// TODO preserve some bits
>> -.global fesetenv
>> -.type fesetenv,%function
>> -fesetenv:
>> - mov x1, #0
>> - mov x2, #0
>> - cmn x0, #1
>> - b.eq 1f
>> - ldp w1, w2, [x0]
>> -1: msr fpcr, x1
>> - msr fpsr, x2
>> - mov w0, #0
>> - ret
>> --
>> 2.51.0
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.