|
|
Message-ID: <20251113200633.833127-4-bill.roberts@arm.com>
Date: Thu, 13 Nov 2025 13:44:28 -0600
From: Bill Roberts <bill.roberts@....com>
To: musl@...ts.openwall.com
Cc: Bill Roberts <bill.roberts@....com>
Subject: [PATCH 3/3] aarch64: enable PAC and BTI instruction support in musl build
This change adds support for Pointer Authentication (PAC) and Branch
Target Identification (BTI) within musl’s own code on AArch64. These
features improve control-flow integrity and mitigate return-oriented
programming attacks by hardening indirect branches and return
instructions.
To integrate these instructions robustly across toolchains:
- PAC and BTI instructions are inserted directly into the assembly,
rather than being emitted via CFI directives. This approach is taken
because it is far simpler to remove or rewrite instructions using AWK
than to identify and manually annotate every location that requires
them. New assembly code should therefore be written with PAC and BTI
awareness in mind.
- Since some older toolchains may not recognize PAC or BTI mnemonics,
the post-processing step rewrites them into equivalent `hint`
instructions. This allows musl to continue building successfully on
systems without assembler support for these instructions, while still
emitting the correct opcodes when using newer toolchains.
Together, these changes prepare musl for secure execution environments
with PAC and BTI enabled, while preserving backward compatibility.
References:
- Arm Architecture Reference Manual for A-profile architecture (Arm ARM):
https://developer.arm.com/documentation/ddi0487/latest
- ELF for the Arm® 64-bit Architecture (AArch64) ABI supplement:
https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
- Arm Community Blog – *Enabling Pointer Authentication and Branch Target Identification*:
1. https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-part1
2. https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-part2
3. https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-part3
Signed-off-by: Bill Roberts <bill.roberts@....com>
---
Makefile | 21 ++++--
arch/aarch64/crt_arch.h | 3 +
configure | 13 +++-
crt/aarch64/crti.s | 2 +
crt/aarch64/crtn.s | 2 +
src/fenv/aarch64/fenv.s | 7 ++
src/ldso/aarch64/tlsdesc.s | 2 +
src/process/aarch64/vfork.s | 1 +
src/setjmp/aarch64/longjmp.s | 1 +
src/setjmp/aarch64/setjmp.s | 1 +
src/signal/aarch64/restore.s | 1 +
src/signal/aarch64/sigsetjmp.s | 2 +
src/string/aarch64/memcpy.S | 1 +
src/string/aarch64/memset.S | 1 +
src/thread/aarch64/__unmapself.s | 1 +
src/thread/aarch64/clone.s | 1 +
src/thread/aarch64/syscall_cp.s | 1 +
tools/pac-bti-aarch64.awk | 122 +++++++++++++++++++++++++++++++
18 files changed, 176 insertions(+), 7 deletions(-)
create mode 100644 tools/pac-bti-aarch64.awk
diff --git a/Makefile b/Makefile
index 4e62e0b3..620636ec 100644
--- a/Makefile
+++ b/Makefile
@@ -132,14 +132,23 @@ $(CRT_OBJS): CFLAGS_ALL += -DCRT
$(LOBJS) $(LDSO_OBJS): CFLAGS_ALL += -fPIC
CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $<
+CCS_CMD = $(CC_CMD)
+AS_CMD = $(CC_CMD)
-# Choose invocation of assembler to be used
+AWK_OPTS :=
ifeq ($(ADD_CFI),yes)
- AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
- CCS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler-with-cpp -c -o $@ -
-else
- AS_CMD = $(CC_CMD)
- CCS_CMD = $(CC_CMD)
+AWK_OPTS += -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk
+endif
+
+ifeq ($(ARCH),aarch64)
+AWK_OPTS += -f $(srcdir)/tools/pac-bti-aarch64.awk
+AWK_OPTS += -vaarch64_pac=$(AARCH64_PAC) -vaarch64_bti=$(AARCH64_BTI)
+endif
+
+# Choose invocation of assembler to be used
+ifneq ($(AWK_OPTS),)
+ AS_CMD = LC_ALL=C awk $(AWK_OPTS) $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
+ CCS_CMD = LC_ALL=C awk $(AWK_OPTS) $< | $(CC) $(CFLAGS_ALL) -x assembler-with-cpp -c -o $@ -
endif
obj/%.o: $(srcdir)/%.s
diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
index b64fb3dd..9384dcba 100644
--- a/arch/aarch64/crt_arch.h
+++ b/arch/aarch64/crt_arch.h
@@ -3,6 +3,9 @@ __asm__(
".global " START "\n"
".type " START ",%function\n"
START ":\n"
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+" hint 34\n" /* bti c */
+#endif
" mov x29, #0\n"
" mov x30, #0\n"
" mov x0, sp\n"
diff --git a/configure b/configure
index 3809f24c..7033f8cd 100755
--- a/configure
+++ b/configure
@@ -111,7 +111,10 @@ return 1
fi
}
-
+get_macro_value () {
+printf "#ifdef $1\n$1_VALUE=$1\n#else\n$1_VALUE=$2\n#endif\n" \
+| ${CC} ${CFLAGS} -E - | grep "$1_VALUE" | cut -d'=' -f2-
+}
# Beginning of actual script
@@ -672,6 +675,12 @@ fi
if test "$ARCH" = "aarch64" ; then
trycppif __AARCH64EB__ "$t" && SUBARCH=${SUBARCH}_be
CFLAGS_AUTO="${CFLAGS_AUTO} -DARCH_SUPPORTS_DL_ADD_PROTECTIONS"
+printf "Checking if bti is enabled..."
+aarch64_bti=$(get_macro_value __ARM_FEATURE_BTI_DEFAULT 0)
+printf " $aarch64_bti\n"
+printf "Checking if pac is enabled..."
+aarch64_pac=$(get_macro_value __ARM_FEATURE_PAC_DEFAULT 0)
+printf " $aarch64_pac\n"
fi
if test "$ARCH" = "loongarch64" ; then
@@ -831,6 +840,8 @@ ALL_TOOLS = $tools
TOOL_LIBS = $tool_libs
ADD_CFI = $ADD_CFI
MALLOC_DIR = $malloc_dir
+AARCH64_BTI = $aarch64_bti
+AARCH64_PAC = $aarch64_pac
EOF
test "x$static" = xno && echo "STATIC_LIBS ="
test "x$shared" = xno && echo "SHARED_LIBS ="
diff --git a/crt/aarch64/crti.s b/crt/aarch64/crti.s
index 3776fa64..9b309f41 100644
--- a/crt/aarch64/crti.s
+++ b/crt/aarch64/crti.s
@@ -3,6 +3,7 @@
.type _init,%function
.align 2
_init:
+ paciasp
stp x29,x30,[sp,-16]!
mov x29,sp
@@ -11,5 +12,6 @@ _init:
.type _fini,%function
.align 2
_fini:
+ paciasp
stp x29,x30,[sp,-16]!
mov x29,sp
diff --git a/crt/aarch64/crtn.s b/crt/aarch64/crtn.s
index 73cab692..4da1882a 100644
--- a/crt/aarch64/crtn.s
+++ b/crt/aarch64/crtn.s
@@ -1,7 +1,9 @@
.section .init
ldp x29,x30,[sp],#16
+ autiasp
ret
.section .fini
ldp x29,x30,[sp],#16
+ autiasp
ret
diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
index 8f3ec965..c9649f8b 100644
--- a/src/fenv/aarch64/fenv.s
+++ b/src/fenv/aarch64/fenv.s
@@ -1,6 +1,7 @@
.global fegetround
.type fegetround,%function
fegetround:
+ bti c
mrs x0, fpcr
and w0, w0, #0xc00000
ret
@@ -9,6 +10,7 @@ fegetround:
.hidden __fesetround
.type __fesetround,%function
__fesetround:
+ bti c
mrs x1, fpcr
bic w1, w1, #0xc00000
orr w1, w1, w0
@@ -19,6 +21,7 @@ __fesetround:
.global fetestexcept
.type fetestexcept,%function
fetestexcept:
+ bti c
and w0, w0, #0x1f
mrs x1, fpsr
and w0, w0, w1
@@ -27,6 +30,7 @@ fetestexcept:
.global feclearexcept
.type feclearexcept,%function
feclearexcept:
+ bti c
and w0, w0, #0x1f
mrs x1, fpsr
bic w1, w1, w0
@@ -37,6 +41,7 @@ feclearexcept:
.global feraiseexcept
.type feraiseexcept,%function
feraiseexcept:
+ bti c
and w0, w0, #0x1f
mrs x1, fpsr
orr w1, w1, w0
@@ -47,6 +52,7 @@ feraiseexcept:
.global fegetenv
.type fegetenv,%function
fegetenv:
+ bti c
mrs x1, fpcr
mrs x2, fpsr
stp w1, w2, [x0]
@@ -57,6 +63,7 @@ fegetenv:
.global fesetenv
.type fesetenv,%function
fesetenv:
+ bti c
mov x1, #0
mov x2, #0
cmn x0, #1
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index c6c685b3..d68ff4a9 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -6,6 +6,7 @@
.hidden __tlsdesc_static
.type __tlsdesc_static,@function
__tlsdesc_static:
+ bti c
ldr x0,[x0,#8]
ret
@@ -19,6 +20,7 @@ __tlsdesc_static:
.hidden __tlsdesc_dynamic
.type __tlsdesc_dynamic,@function
__tlsdesc_dynamic:
+ bti c
stp x1,x2,[sp,#-16]!
mrs x1,tpidr_el0 // tp
ldr x0,[x0,#8] // p
diff --git a/src/process/aarch64/vfork.s b/src/process/aarch64/vfork.s
index 429bec8c..332aa4ae 100644
--- a/src/process/aarch64/vfork.s
+++ b/src/process/aarch64/vfork.s
@@ -1,6 +1,7 @@
.global vfork
.type vfork,%function
vfork:
+ bti c
mov x8, 220 // SYS_clone
mov x0, 0x4111 // SIGCHLD | CLONE_VM | CLONE_VFORK
mov x1, 0
diff --git a/src/setjmp/aarch64/longjmp.s b/src/setjmp/aarch64/longjmp.s
index 0af9c50e..990642a4 100644
--- a/src/setjmp/aarch64/longjmp.s
+++ b/src/setjmp/aarch64/longjmp.s
@@ -4,6 +4,7 @@
.type longjmp,%function
_longjmp:
longjmp:
+ bti c
// IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers
ldp x19, x20, [x0,#0]
ldp x21, x22, [x0,#16]
diff --git a/src/setjmp/aarch64/setjmp.s b/src/setjmp/aarch64/setjmp.s
index f49288aa..3e94371b 100644
--- a/src/setjmp/aarch64/setjmp.s
+++ b/src/setjmp/aarch64/setjmp.s
@@ -7,6 +7,7 @@
__setjmp:
_setjmp:
setjmp:
+ bti c
// IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers
stp x19, x20, [x0,#0]
stp x21, x22, [x0,#16]
diff --git a/src/signal/aarch64/restore.s b/src/signal/aarch64/restore.s
index d4e5fcf1..341a1766 100644
--- a/src/signal/aarch64/restore.s
+++ b/src/signal/aarch64/restore.s
@@ -6,5 +6,6 @@ __restore:
.hidden __restore_rt
.type __restore_rt,%function
__restore_rt:
+ bti c
mov x8,#139 // SYS_rt_sigreturn
svc 0
diff --git a/src/signal/aarch64/sigsetjmp.s b/src/signal/aarch64/sigsetjmp.s
index 75910c43..2b5d017f 100644
--- a/src/signal/aarch64/sigsetjmp.s
+++ b/src/signal/aarch64/sigsetjmp.s
@@ -4,6 +4,7 @@
.type __sigsetjmp,%function
sigsetjmp:
__sigsetjmp:
+ bti c
cbz x1,setjmp
str x30,[x0,#176]
@@ -11,6 +12,7 @@ __sigsetjmp:
mov x19,x0
bl setjmp
+ bti j
mov w1,w0
mov x0,x19
diff --git a/src/string/aarch64/memcpy.S b/src/string/aarch64/memcpy.S
index 48bb8a8d..5959afb4 100644
--- a/src/string/aarch64/memcpy.S
+++ b/src/string/aarch64/memcpy.S
@@ -53,6 +53,7 @@
.global memcpy
.type memcpy,%function
memcpy:
+ bti c
add srcend, src, count
add dstend, dstin, count
cmp count, 128
diff --git a/src/string/aarch64/memset.S b/src/string/aarch64/memset.S
index f0d29b7f..37fdbab0 100644
--- a/src/string/aarch64/memset.S
+++ b/src/string/aarch64/memset.S
@@ -23,6 +23,7 @@
.type memset,%function
memset:
+ bti c
dup v0.16B, valw
add dstend, dstin, count
diff --git a/src/thread/aarch64/__unmapself.s b/src/thread/aarch64/__unmapself.s
index 2c5d254f..f9987538 100644
--- a/src/thread/aarch64/__unmapself.s
+++ b/src/thread/aarch64/__unmapself.s
@@ -1,6 +1,7 @@
.global __unmapself
.type __unmapself,%function
__unmapself:
+ bti c
mov x8,#215 // SYS_munmap
svc 0
mov x8,#93 // SYS_exit
diff --git a/src/thread/aarch64/clone.s b/src/thread/aarch64/clone.s
index aff8155b..e900a92a 100644
--- a/src/thread/aarch64/clone.s
+++ b/src/thread/aarch64/clone.s
@@ -8,6 +8,7 @@
.hidden __clone
.type __clone,%function
__clone:
+ bti c
// align stack and save func,arg
and x1,x1,#-16
stp x0,x3,[x1,#-16]!
diff --git a/src/thread/aarch64/syscall_cp.s b/src/thread/aarch64/syscall_cp.s
index 41db68af..e6baeef5 100644
--- a/src/thread/aarch64/syscall_cp.s
+++ b/src/thread/aarch64/syscall_cp.s
@@ -16,6 +16,7 @@
.type __syscall_cp_asm,%function
__syscall_cp_asm:
__cp_begin:
+ bti c
ldr w0,[x0]
cbnz w0,__cp_cancel
mov x8,x1
diff --git a/tools/pac-bti-aarch64.awk b/tools/pac-bti-aarch64.awk
new file mode 100644
index 00000000..7b0222f1
--- /dev/null
+++ b/tools/pac-bti-aarch64.awk
@@ -0,0 +1,122 @@
+#!/usr/bin/env awk
+#
+# This script post processes aarch64 assembly to modify PAC and BTI instructions.
+# The aarch64 code is annotates as if PAC with the A key and BTI are enabled, and
+# then stripped or modified based on build time detection of compiler flags. Rather,
+# than attempt to insert the instructions, its easier to remove/modify after they are
+# added. This keeps the awk script and post processing much simpler. Note that we also
+# post process to use the hint instructions, as these are backwards compatible with
+# older binutils. Also to note, is that *these* PAC and BTI instructions, since they
+# are in the hint space also NOP on unsupoprted hardware. So there is no real penalty
+# to run a PAC/BTI aware binary on older hardware except for the cost of the NOP.
+#
+# Variables:
+# - aarch64_pac=[0|1|2] - Set this to 0 to disable pac, 1 to use the a key, and 2 to use the b key
+# - aarch64_bti=[0|1] - set this to 0 to disable bti, or 1 to enable bti
+
+# Details on PAC and BTI can be found in the manuals:
+# - https://developer.arm.com/documentation/ddi0487/latest
+# - https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst
+#
+# However, the TL;DR is the 3 part blog post that explores the relevant
+# parts for software:
+# - https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-on-aarch64
+
+BEGIN {
+ # Validate aarch64_pac
+ if (aarch64_pac !~ /^(0|1|2)$/) {
+ print "Error: invalid value for aarch64_pac (" aarch64_pac "). Must be one of: 0, 1, 2." > "/dev/stderr"
+ exit 1
+ }
+
+ # Validate aarch64_bti
+ if (aarch64_bti !~ /^(0|1)$/) {
+ print "Error: invalid value for aarch64_bti (" aarch64_bti "). Must be one of: 0, 1." > "/dev/stderr"
+ exit 1
+ }
+}
+
+# Body
+# Behavior based on this table
+# | case | aarch64_bti | aarch64_pac | action |
+# | ---- | ----------- | ----------- | ------ |
+# | 1 | 0 | 0 | strip all paciasp, autiasp and bti c or j instructions |
+# | 2 | 0 | a | strip all bti c or j instructions, rewrite pac using hints |
+# | 3 | 0 | b | change paciasp to pacibsp and autiasp to autibsp instructions, using hint, and strip all bti c instructions|
+# | 4 | 1 | 0 | change all paciasp to bti c instructions, using hints and strip all autiasp instructions |
+# | 5 | 1 | a | rewrite to hints |
+# | 6 | 1 | b | change paciasp to pacibsp and autiasp to autibsp instructions |
+{
+# Declare some variables to keep the hint instruction mapping in one spot
+ PACIASP = "hint 25"
+ AUTIASP = "hint 29"
+ PACIBSP = "hint 27"
+ AUTIBSP = "hint 31"
+ BTI_C = "hint 34"
+ BTI_J = "hint 36"
+
+ # case 1 - strip all
+ if (aarch64_bti == 0 && aarch64_pac == 0 &&
+ /(paciasp|autiasp|bti[[:space:]]+[cj])/) {
+ next
+ # case 2 - strip bti c
+ } else if (aarch64_bti == 0 && aarch64_pac == 1) {
+ if (/bti[[:space:]]+[cj]/) {
+ next
+ }
+ gsub(/paciasp/, PACIASP)
+ gsub(/autiasp/, AUTIASP)
+ # case 3 - swap for b key and strip bti c
+ } else if (aarch64_bti == 0 && aarch64_pac == 2) {
+ if (/bti[[:space:]]+[cj]/) {
+ next
+ } else {
+ gsub(/paciasp/, PACIBSP)
+ gsub(/autiasp/, AUTIBSP)
+ }
+ # case 4 - remove autiasp and swap paciasp for bti c and rewrite bti j
+ } else if (aarch64_bti == 1 && aarch64_pac == 0) {
+ if (/autiasp/) {
+ next
+ } else {
+ gsub(/paciasp/, BTI_C)
+ gsub(/bti c/, BTI_C)
+ gsub(/bti j/, BTI_J)
+ }
+ # case 5 - rewrite all to hints
+ } else if (aarch64_bti == 1 && aarch64_pac == 1) {
+ gsub(/paciasp/, PACIASP)
+ gsub(/autiasp/, AUTIASP)
+ gsub(/bti c/, BTI_C)
+ gsub(/bti j/, BTI_J)
+ # case 6 - swap for b key
+ } else if (aarch64_bti == 1 && aarch64_pac == 2) {
+ gsub(/paciasp/, PACIBSP)
+ gsub(/autiasp/, AUTIBSP)
+ gsub(/bti c/, BTI_C)
+ gsub(/bti j/, BTI_J)
+ }
+
+ print
+}
+
+END {
+ # Add the GNU Notes section indicating what the binary supports.
+ GNU_PROPERTY_AARCH64_BTI = aarch64_bti
+ GNU_PROPERTY_AARCH64_POINTER_AUTH = (aarch64_pac != 0 ? 2 : 0)
+
+ if (aarch64_bti != 0 || aarch64_pac != 0) {
+ print "\n\n" \
+ ".pushsection .note.gnu.property, \"a\"; /* Start a new allocatable section */\n" \
+ ".balign 8; /* align it on a byte boundry */\n" \
+ ".long 4; /* size of \"GNU\0\" */\n" \
+ ".long 0x10; /* size of descriptor */\n" \
+ ".long 0x5; /* NT_GNU_PROPERTY_TYPE_0 */\n" \
+ ".asciz \"GNU\";\n" \
+ ".long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */\n" \
+ ".long 4; /* Four bytes of data */\n" \
+ ".long ("GNU_PROPERTY_AARCH64_BTI"|"GNU_PROPERTY_AARCH64_POINTER_AUTH"); /* BTI or PAC is enabled */\n" \
+ ".long 0; /* padding for 8 byte alignment */\n" \
+ ".popsection; /* end the section */"
+ }
+}
--
2.51.0
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.