Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260128221323.GA3520958@port70.net>
Date: Wed, 28 Jan 2026 23:13:23 +0100
From: Szabolcs Nagy <nsz@...t70.net>
To: Bill Roberts <bill.roberts@....com>
Cc: musl@...ts.openwall.com, dalias@...c.org, dalias@...ifal.cx
Subject: Re: [PATCH] aarch64: rewrite fenv routines in C using inline
 asm

* Bill Roberts <bill.roberts@....com> [2026-01-27 00:04:27 -0600]:
> Rewrite the AArch64 floating-point environment routines (fegetround,
> __fesetround, fetestexcept, feclearexcept, feraiseexcept, fegetenv,
> fesetenv) from assembly into C implementations using inline assembly.
> 
> This change eliminates the need for handwritten function prologues and
> epilogues in fenv.s, which simplifies maintenance and allows the compiler
> to automatically insert architecture features such as BTI landing pads and
> pointer authentication (PAC) sequences where applicable.
> 
> The new implementations mirror the original assembly semantics exactly:
> - access FPCR/FPSR using `mrs`/`msr` inline asm
> - preserve the same FE_* masks and return conventions
> - retain `__fesetround` as a hidden internal symbol
> - support the special FE_DFL_ENV case in fesetenv
> 
> Moving to C also enables the compiler to manage register allocation,
> stack usage, and ABI compliance automatically while keeping the low-level
> behavior (bitmasks and register accesses) explicit and verifiable.
> 
> No functional changes intended.
> 
> Signed-off-by: Bill Roberts <bill.roberts@....com>
> ---
>  arch/aarch64/crt_arch.h | 29 +++++++------
>  src/fenv/aarch64/fenv.c | 96 +++++++++++++++++++++++++++++++++++++++++
>  src/fenv/aarch64/fenv.s | 68 -----------------------------
>  3 files changed, 112 insertions(+), 81 deletions(-)
>  create mode 100644 src/fenv/aarch64/fenv.c
>  delete mode 100644 src/fenv/aarch64/fenv.s
> 
> diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
> index b64fb3dd..cff8edb3 100644
> --- a/arch/aarch64/crt_arch.h
> +++ b/arch/aarch64/crt_arch.h
> @@ -1,15 +1,18 @@
>  __asm__(
> -".text \n"
> -".global " START "\n"
> -".type " START ",%function\n"
> -START ":\n"
> -"	mov x29, #0\n"
> -"	mov x30, #0\n"
> -"	mov x0, sp\n"
> -".weak _DYNAMIC\n"
> -".hidden _DYNAMIC\n"
> -"	adrp x1, _DYNAMIC\n"
> -"	add x1, x1, #:lo12:_DYNAMIC\n"
> -"	and sp, x0, #-16\n"
> -"	b " START "_c\n"
> +".text \n\t"
> +".global " START "\n\t"
> +".type " START ",%function\n\t"
> +START ":\n\t"
> +#if defined(__ARM_FEATURE_BTI_DEFAULT)
> +"	hint 34\n\t"
> +#endif
> +"	mov x29, #0\n\t"
> +"	mov x30, #0\n\t"
> +"	mov x0, sp\n\t"
> +".weak _DYNAMIC\n\t"
> +".hidden _DYNAMIC\n\t"
> +"	adrp x1, _DYNAMIC\n\t"
> +"	add x1, x1, #:lo12:_DYNAMIC\n\t"
> +"	and sp, x0, #-16\n\t"
> +"	b " START "_c\n\t"

given the commit msg, this bit should be a separate patch.

the \t changes are a distraction here

>  );
> diff --git a/src/fenv/aarch64/fenv.c b/src/fenv/aarch64/fenv.c
> new file mode 100644
> index 00000000..6d84feac
> --- /dev/null
> +++ b/src/fenv/aarch64/fenv.c
> @@ -0,0 +1,96 @@
> +#include <fenv.h>
> +#include <stdint.h>
> +
> +#define FE_RMODE_MASK 0x00C00000u  // FPCR RMode bits [23:22]
> +#define FE_EXC_MASK   0x0000001Fu  // FPSR exception flags [4:0]
> +
> +static inline uint32_t read_fpcr_u32(void)
> +{
> +	uint64_t x;
> +	__asm__ volatile ("mrs %0, fpcr" : "=r"(x));
> +	return (uint32_t)x;
> +}
> +
> +static inline void write_fpcr_u32(uint32_t v)
> +{
> +	uint64_t x = (uint64_t)v;
> +	__asm__ volatile ("msr fpcr, %0" :: "r"(x) : "memory");
> +}
> +
> +static inline uint32_t read_fpsr_u32(void)
> +{
> +	uint64_t x;
> +	__asm__ volatile ("mrs %0, fpsr" : "=r"(x));
> +	return (uint32_t)x;
> +}
> +
> +static inline void write_fpsr_u32(uint32_t v)
> +{
> +	uint64_t x = (uint64_t)v;
> +	__asm__ volatile ("msr fpsr, %0" :: "r"(x) : "memory");
> +}

i dont think the casts are useful but the logic looks right

im not sure if the memory clobber is needed.

> +
> +int fegetround(void)
> +{
> +	uint32_t fpcr = read_fpcr_u32();
> +	return (int)(fpcr & FE_RMODE_MASK);
> +}
> +
> +__attribute__((__visibility__("hidden")))

use "hidden" from features.h

the rest looks ok.

> +int __fesetround(int rm)
> +{
> +	uint32_t fpcr = read_fpcr_u32();
> +	fpcr &= ~FE_RMODE_MASK;
> +	fpcr |= (uint32_t)rm;
> +	write_fpcr_u32(fpcr);
> +	return 0;
> +}
> +
> +int fetestexcept(int mask)
> +{
> +	uint32_t m   = (uint32_t)mask & FE_EXC_MASK;
> +	uint32_t fpsr = read_fpsr_u32();
> +	return (int)(m & fpsr);
> +}
> +
> +int feclearexcept(int mask)
> +{
> +	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
> +	uint32_t fpsr = read_fpsr_u32();
> +	fpsr &= ~m;
> +	write_fpsr_u32(fpsr);
> +	return 0;
> +}
> +
> +int feraiseexcept(int mask)
> +{
> +	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
> +	uint32_t fpsr = read_fpsr_u32();
> +	fpsr |= m;
> +	write_fpsr_u32(fpsr);
> +	return 0;
> +}
> +
> +int fegetenv(fenv_t *env)
> +{
> +	uint32_t fpcr = read_fpcr_u32();
> +	uint32_t fpsr = read_fpsr_u32();
> +	env->__fpcr = fpcr;
> +	env->__fpsr = fpsr;
> +	return 0;
> +}
> +
> +int fesetenv(const fenv_t *env)
> +{
> +	uint32_t fpcr = 0;
> +	uint32_t fpsr = 0;
> +
> +	if (env != FE_DFL_ENV) {
> +		fpcr = env->__fpcr;
> +		fpsr = env->__fpsr;
> +	}
> +
> +	write_fpcr_u32(fpcr);
> +	write_fpsr_u32(fpsr);
> +	return 0;
> +}
> diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
> deleted file mode 100644
> index 8f3ec965..00000000
> --- a/src/fenv/aarch64/fenv.s
> +++ /dev/null
> @@ -1,68 +0,0 @@
> -.global fegetround
> -.type fegetround,%function
> -fegetround:
> -	mrs x0, fpcr
> -	and w0, w0, #0xc00000
> -	ret
> -
> -.global __fesetround
> -.hidden __fesetround
> -.type __fesetround,%function
> -__fesetround:
> -	mrs x1, fpcr
> -	bic w1, w1, #0xc00000
> -	orr w1, w1, w0
> -	msr fpcr, x1
> -	mov w0, #0
> -	ret
> -
> -.global fetestexcept
> -.type fetestexcept,%function
> -fetestexcept:
> -	and w0, w0, #0x1f
> -	mrs x1, fpsr
> -	and w0, w0, w1
> -	ret
> -
> -.global feclearexcept
> -.type feclearexcept,%function
> -feclearexcept:
> -	and w0, w0, #0x1f
> -	mrs x1, fpsr
> -	bic w1, w1, w0
> -	msr fpsr, x1
> -	mov w0, #0
> -	ret
> -
> -.global feraiseexcept
> -.type feraiseexcept,%function
> -feraiseexcept:
> -	and w0, w0, #0x1f
> -	mrs x1, fpsr
> -	orr w1, w1, w0
> -	msr fpsr, x1
> -	mov w0, #0
> -	ret
> -
> -.global fegetenv
> -.type fegetenv,%function
> -fegetenv:
> -	mrs x1, fpcr
> -	mrs x2, fpsr
> -	stp w1, w2, [x0]
> -	mov w0, #0
> -	ret
> -
> -// TODO preserve some bits
> -.global fesetenv
> -.type fesetenv,%function
> -fesetenv:
> -	mov x1, #0
> -	mov x2, #0
> -	cmn x0, #1
> -	b.eq 1f
> -	ldp w1, w2, [x0]
> -1:	msr fpcr, x1
> -	msr fpsr, x2
> -	mov w0, #0
> -	ret
> -- 
> 2.51.0

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.