Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251113200633.833127-4-bill.roberts@arm.com>
Date: Thu, 13 Nov 2025 13:44:28 -0600
From: Bill Roberts <bill.roberts@....com>
To: musl@...ts.openwall.com
Cc: Bill Roberts <bill.roberts@....com>
Subject: [PATCH 3/3] aarch64: enable PAC and BTI instruction support in musl build

This change adds support for Pointer Authentication (PAC) and Branch
Target Identification (BTI) within musl’s own code on AArch64. These
features improve control-flow integrity and mitigate return-oriented
programming attacks by hardening indirect branches and return
instructions.

To integrate these instructions robustly across toolchains:

 - PAC and BTI instructions are inserted directly into the assembly,
   rather than being emitted via CFI directives. This approach is taken
   because it is far simpler to remove or rewrite instructions using AWK
   than to identify and manually annotate every location that requires
   them. New assembly code should therefore be written with PAC and BTI
   awareness in mind.

 - Since some older toolchains may not recognize PAC or BTI mnemonics,
   the post-processing step rewrites them into equivalent `hint`
   instructions. This allows musl to continue building successfully on
   systems without assembler support for these instructions, while still
   emitting the correct opcodes when using newer toolchains.

Together, these changes prepare musl for secure execution environments
with PAC and BTI enabled, while preserving backward compatibility.

References:
 - Arm Architecture Reference Manual for A-profile architecture (Arm ARM):
   https://developer.arm.com/documentation/ddi0487/latest
 - ELF for the Arm® 64-bit Architecture (AArch64) ABI supplement:
   https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
 - Arm Community Blog – *Enabling Pointer Authentication and Branch Target Identification*:
   1. https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-part1
   2. https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-part2
   3. https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-part3

Signed-off-by: Bill Roberts <bill.roberts@....com>
---
 Makefile                         |  21 ++++--
 arch/aarch64/crt_arch.h          |   3 +
 configure                        |  13 +++-
 crt/aarch64/crti.s               |   2 +
 crt/aarch64/crtn.s               |   2 +
 src/fenv/aarch64/fenv.s          |   7 ++
 src/ldso/aarch64/tlsdesc.s       |   2 +
 src/process/aarch64/vfork.s      |   1 +
 src/setjmp/aarch64/longjmp.s     |   1 +
 src/setjmp/aarch64/setjmp.s      |   1 +
 src/signal/aarch64/restore.s     |   1 +
 src/signal/aarch64/sigsetjmp.s   |   2 +
 src/string/aarch64/memcpy.S      |   1 +
 src/string/aarch64/memset.S      |   1 +
 src/thread/aarch64/__unmapself.s |   1 +
 src/thread/aarch64/clone.s       |   1 +
 src/thread/aarch64/syscall_cp.s  |   1 +
 tools/pac-bti-aarch64.awk        | 122 +++++++++++++++++++++++++++++++
 18 files changed, 176 insertions(+), 7 deletions(-)
 create mode 100644 tools/pac-bti-aarch64.awk

diff --git a/Makefile b/Makefile
index 4e62e0b3..620636ec 100644
--- a/Makefile
+++ b/Makefile
@@ -132,14 +132,23 @@ $(CRT_OBJS): CFLAGS_ALL += -DCRT
 $(LOBJS) $(LDSO_OBJS): CFLAGS_ALL += -fPIC
 
 CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $<
+CCS_CMD = $(CC_CMD)
+AS_CMD = $(CC_CMD)
 
-# Choose invocation of assembler to be used
+AWK_OPTS :=
 ifeq ($(ADD_CFI),yes)
-	AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
-	CCS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler-with-cpp -c -o $@ -
-else
-	AS_CMD = $(CC_CMD)
-	CCS_CMD = $(CC_CMD)
+AWK_OPTS += -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk
+endif
+
+ifeq ($(ARCH),aarch64)
+AWK_OPTS += -f $(srcdir)/tools/pac-bti-aarch64.awk
+AWK_OPTS += -vaarch64_pac=$(AARCH64_PAC) -vaarch64_bti=$(AARCH64_BTI)
+endif
+
+# Choose invocation of assembler to be used
+ifneq ($(AWK_OPTS),)
+    AS_CMD = LC_ALL=C awk $(AWK_OPTS) $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
+    CCS_CMD = LC_ALL=C awk $(AWK_OPTS) $< | $(CC) $(CFLAGS_ALL) -x assembler-with-cpp -c -o $@ -
 endif
 
 obj/%.o: $(srcdir)/%.s
diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
index b64fb3dd..9384dcba 100644
--- a/arch/aarch64/crt_arch.h
+++ b/arch/aarch64/crt_arch.h
@@ -3,6 +3,9 @@ __asm__(
 ".global " START "\n"
 ".type " START ",%function\n"
 START ":\n"
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+"	hint 34\n" /* bti c */
+#endif
 "	mov x29, #0\n"
 "	mov x30, #0\n"
 "	mov x0, sp\n"
diff --git a/configure b/configure
index 3809f24c..7033f8cd 100755
--- a/configure
+++ b/configure
@@ -111,7 +111,10 @@ return 1
 fi
 }
 
-
+get_macro_value () {
+printf "#ifdef $1\n$1_VALUE=$1\n#else\n$1_VALUE=$2\n#endif\n" \
+| ${CC} ${CFLAGS} -E - | grep "$1_VALUE" | cut -d'=' -f2-
+}
 
 # Beginning of actual script
 
@@ -672,6 +675,12 @@ fi
 if test "$ARCH" = "aarch64" ; then
 trycppif __AARCH64EB__ "$t" && SUBARCH=${SUBARCH}_be
 CFLAGS_AUTO="${CFLAGS_AUTO} -DARCH_SUPPORTS_DL_ADD_PROTECTIONS"
+printf "Checking if bti is enabled..."
+aarch64_bti=$(get_macro_value __ARM_FEATURE_BTI_DEFAULT 0)
+printf " $aarch64_bti\n"
+printf "Checking if pac is enabled..."
+aarch64_pac=$(get_macro_value __ARM_FEATURE_PAC_DEFAULT 0)
+printf " $aarch64_pac\n"
 fi
 
 if test "$ARCH" = "loongarch64" ; then
@@ -831,6 +840,8 @@ ALL_TOOLS = $tools
 TOOL_LIBS = $tool_libs
 ADD_CFI = $ADD_CFI
 MALLOC_DIR = $malloc_dir
+AARCH64_BTI = $aarch64_bti
+AARCH64_PAC = $aarch64_pac
 EOF
 test "x$static" = xno && echo "STATIC_LIBS ="
 test "x$shared" = xno && echo "SHARED_LIBS ="
diff --git a/crt/aarch64/crti.s b/crt/aarch64/crti.s
index 3776fa64..9b309f41 100644
--- a/crt/aarch64/crti.s
+++ b/crt/aarch64/crti.s
@@ -3,6 +3,7 @@
 .type _init,%function
 .align 2
 _init:
+	paciasp
 	stp x29,x30,[sp,-16]!
 	mov x29,sp
 
@@ -11,5 +12,6 @@ _init:
 .type _fini,%function
 .align 2
 _fini:
+	paciasp
 	stp x29,x30,[sp,-16]!
 	mov x29,sp
diff --git a/crt/aarch64/crtn.s b/crt/aarch64/crtn.s
index 73cab692..4da1882a 100644
--- a/crt/aarch64/crtn.s
+++ b/crt/aarch64/crtn.s
@@ -1,7 +1,9 @@
 .section .init
 	ldp x29,x30,[sp],#16
+	autiasp
 	ret
 
 .section .fini
 	ldp x29,x30,[sp],#16
+	autiasp
 	ret
diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
index 8f3ec965..c9649f8b 100644
--- a/src/fenv/aarch64/fenv.s
+++ b/src/fenv/aarch64/fenv.s
@@ -1,6 +1,7 @@
 .global fegetround
 .type fegetround,%function
 fegetround:
+	bti c
 	mrs x0, fpcr
 	and w0, w0, #0xc00000
 	ret
@@ -9,6 +10,7 @@ fegetround:
 .hidden __fesetround
 .type __fesetround,%function
 __fesetround:
+	bti c
 	mrs x1, fpcr
 	bic w1, w1, #0xc00000
 	orr w1, w1, w0
@@ -19,6 +21,7 @@ __fesetround:
 .global fetestexcept
 .type fetestexcept,%function
 fetestexcept:
+	bti c
 	and w0, w0, #0x1f
 	mrs x1, fpsr
 	and w0, w0, w1
@@ -27,6 +30,7 @@ fetestexcept:
 .global feclearexcept
 .type feclearexcept,%function
 feclearexcept:
+	bti c
 	and w0, w0, #0x1f
 	mrs x1, fpsr
 	bic w1, w1, w0
@@ -37,6 +41,7 @@ feclearexcept:
 .global feraiseexcept
 .type feraiseexcept,%function
 feraiseexcept:
+	bti c
 	and w0, w0, #0x1f
 	mrs x1, fpsr
 	orr w1, w1, w0
@@ -47,6 +52,7 @@ feraiseexcept:
 .global fegetenv
 .type fegetenv,%function
 fegetenv:
+	bti c
 	mrs x1, fpcr
 	mrs x2, fpsr
 	stp w1, w2, [x0]
@@ -57,6 +63,7 @@ fegetenv:
 .global fesetenv
 .type fesetenv,%function
 fesetenv:
+	bti c
 	mov x1, #0
 	mov x2, #0
 	cmn x0, #1
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index c6c685b3..d68ff4a9 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -6,6 +6,7 @@
 .hidden __tlsdesc_static
 .type __tlsdesc_static,@function
 __tlsdesc_static:
+	bti c
 	ldr x0,[x0,#8]
 	ret
 
@@ -19,6 +20,7 @@ __tlsdesc_static:
 .hidden __tlsdesc_dynamic
 .type __tlsdesc_dynamic,@function
 __tlsdesc_dynamic:
+	bti c
 	stp x1,x2,[sp,#-16]!
 	mrs x1,tpidr_el0      // tp
 	ldr x0,[x0,#8]        // p
diff --git a/src/process/aarch64/vfork.s b/src/process/aarch64/vfork.s
index 429bec8c..332aa4ae 100644
--- a/src/process/aarch64/vfork.s
+++ b/src/process/aarch64/vfork.s
@@ -1,6 +1,7 @@
 .global vfork
 .type vfork,%function
 vfork:
+	bti c
 	mov x8, 220    // SYS_clone
 	mov x0, 0x4111 // SIGCHLD | CLONE_VM | CLONE_VFORK
 	mov x1, 0
diff --git a/src/setjmp/aarch64/longjmp.s b/src/setjmp/aarch64/longjmp.s
index 0af9c50e..990642a4 100644
--- a/src/setjmp/aarch64/longjmp.s
+++ b/src/setjmp/aarch64/longjmp.s
@@ -4,6 +4,7 @@
 .type longjmp,%function
 _longjmp:
 longjmp:
+	bti c
 	// IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers
 	ldp x19, x20, [x0,#0]
 	ldp x21, x22, [x0,#16]
diff --git a/src/setjmp/aarch64/setjmp.s b/src/setjmp/aarch64/setjmp.s
index f49288aa..3e94371b 100644
--- a/src/setjmp/aarch64/setjmp.s
+++ b/src/setjmp/aarch64/setjmp.s
@@ -7,6 +7,7 @@
 __setjmp:
 _setjmp:
 setjmp:
+	bti c
 	// IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers
 	stp x19, x20, [x0,#0]
 	stp x21, x22, [x0,#16]
diff --git a/src/signal/aarch64/restore.s b/src/signal/aarch64/restore.s
index d4e5fcf1..341a1766 100644
--- a/src/signal/aarch64/restore.s
+++ b/src/signal/aarch64/restore.s
@@ -6,5 +6,6 @@ __restore:
 .hidden __restore_rt
 .type __restore_rt,%function
 __restore_rt:
+	bti c
 	mov x8,#139 // SYS_rt_sigreturn
 	svc 0
diff --git a/src/signal/aarch64/sigsetjmp.s b/src/signal/aarch64/sigsetjmp.s
index 75910c43..2b5d017f 100644
--- a/src/signal/aarch64/sigsetjmp.s
+++ b/src/signal/aarch64/sigsetjmp.s
@@ -4,6 +4,7 @@
 .type __sigsetjmp,%function
 sigsetjmp:
 __sigsetjmp:
+	bti c
 	cbz x1,setjmp
 
 	str x30,[x0,#176]
@@ -11,6 +12,7 @@ __sigsetjmp:
 	mov x19,x0
 
 	bl setjmp
+	bti j
 
 	mov w1,w0
 	mov x0,x19
diff --git a/src/string/aarch64/memcpy.S b/src/string/aarch64/memcpy.S
index 48bb8a8d..5959afb4 100644
--- a/src/string/aarch64/memcpy.S
+++ b/src/string/aarch64/memcpy.S
@@ -53,6 +53,7 @@
 .global memcpy
 .type memcpy,%function
 memcpy:
+	bti c
 	add     srcend, src, count
 	add     dstend, dstin, count
 	cmp     count, 128
diff --git a/src/string/aarch64/memset.S b/src/string/aarch64/memset.S
index f0d29b7f..37fdbab0 100644
--- a/src/string/aarch64/memset.S
+++ b/src/string/aarch64/memset.S
@@ -23,6 +23,7 @@
 .type memset,%function
 memset:
 
+	bti c
 	dup     v0.16B, valw
 	add     dstend, dstin, count
 
diff --git a/src/thread/aarch64/__unmapself.s b/src/thread/aarch64/__unmapself.s
index 2c5d254f..f9987538 100644
--- a/src/thread/aarch64/__unmapself.s
+++ b/src/thread/aarch64/__unmapself.s
@@ -1,6 +1,7 @@
 .global __unmapself
 .type   __unmapself,%function
 __unmapself:
+	bti c
 	mov x8,#215 // SYS_munmap
 	svc 0
 	mov x8,#93 // SYS_exit
diff --git a/src/thread/aarch64/clone.s b/src/thread/aarch64/clone.s
index aff8155b..e900a92a 100644
--- a/src/thread/aarch64/clone.s
+++ b/src/thread/aarch64/clone.s
@@ -8,6 +8,7 @@
 .hidden __clone
 .type   __clone,%function
 __clone:
+	bti c
 	// align stack and save func,arg
 	and x1,x1,#-16
 	stp x0,x3,[x1,#-16]!
diff --git a/src/thread/aarch64/syscall_cp.s b/src/thread/aarch64/syscall_cp.s
index 41db68af..e6baeef5 100644
--- a/src/thread/aarch64/syscall_cp.s
+++ b/src/thread/aarch64/syscall_cp.s
@@ -16,6 +16,7 @@
 .type __syscall_cp_asm,%function
 __syscall_cp_asm:
 __cp_begin:
+	bti c
 	ldr w0,[x0]
 	cbnz w0,__cp_cancel
 	mov x8,x1
diff --git a/tools/pac-bti-aarch64.awk b/tools/pac-bti-aarch64.awk
new file mode 100644
index 00000000..7b0222f1
--- /dev/null
+++ b/tools/pac-bti-aarch64.awk
@@ -0,0 +1,122 @@
+#!/usr/bin/env awk
+#
+# This script post processes aarch64 assembly to modify PAC and BTI instructions.
+# The aarch64 code is annotates as if PAC with the A key and BTI are enabled, and
+# then stripped or modified based on build time detection of compiler flags. Rather,
+# than attempt to insert the instructions, its easier to remove/modify after they are
+# added. This keeps the awk script and post processing much simpler. Note that we also
+# post process to use the hint instructions, as these are backwards compatible with
+# older binutils. Also to note, is that *these* PAC and BTI instructions, since they
+# are in the hint space also NOP on unsupoprted hardware. So there is no real penalty
+# to run a PAC/BTI aware binary on older hardware except for the cost of the NOP.
+#
+# Variables:
+# - aarch64_pac=[0|1|2] - Set this to 0 to disable pac, 1 to use the a key, and 2 to use the b key
+# - aarch64_bti=[0|1] - set this to 0 to disable bti, or 1 to enable bti
+
+# Details on PAC and BTI can be found in the manuals:
+#     - https://developer.arm.com/documentation/ddi0487/latest
+#     - https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst
+#
+# However, the TL;DR is the 3 part blog post that explores the relevant
+# parts for software:
+#  - https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-on-aarch64
+
+BEGIN {
+  # Validate aarch64_pac
+  if (aarch64_pac !~ /^(0|1|2)$/) {
+    print "Error: invalid value for aarch64_pac (" aarch64_pac "). Must be one of: 0, 1, 2." > "/dev/stderr"
+    exit 1
+  }
+
+  # Validate aarch64_bti
+  if (aarch64_bti !~ /^(0|1)$/) {
+    print "Error: invalid value for aarch64_bti (" aarch64_bti "). Must be one of: 0, 1." > "/dev/stderr"
+    exit 1
+  }
+}
+
+# Body
+# Behavior based on this table
+# | case | aarch64_bti | aarch64_pac | action |
+# | ---- | ----------- | ----------- | ------ |
+# |    1 |           0 |           0 | strip all paciasp, autiasp and bti c or j instructions |
+# |    2 |           0 |           a | strip all bti c or j instructions, rewrite pac using hints |
+# |    3 |           0 |           b | change paciasp to pacibsp and autiasp to autibsp instructions, using hint, and strip all bti c instructions|
+# |    4 |           1 |           0 | change all paciasp to bti c instructions, using hints and strip all autiasp instructions |
+# |    5 |           1 |           a | rewrite to hints |
+# |    6 |           1 |           b | change paciasp to pacibsp and autiasp to autibsp instructions |
+{
+# Declare some variables to keep the hint instruction mapping in one spot
+  PACIASP = "hint 25"
+  AUTIASP = "hint 29"
+  PACIBSP = "hint 27"
+  AUTIBSP = "hint 31"
+  BTI_C = "hint 34"
+  BTI_J = "hint 36"
+
+  # case 1 - strip all
+  if (aarch64_bti == 0 && aarch64_pac == 0 &&
+      /(paciasp|autiasp|bti[[:space:]]+[cj])/) {
+      next
+  # case 2 - strip bti c
+  } else if (aarch64_bti == 0 && aarch64_pac == 1) {
+    if (/bti[[:space:]]+[cj]/) {
+      next
+    }
+    gsub(/paciasp/, PACIASP)
+    gsub(/autiasp/, AUTIASP)
+  # case 3 - swap for b key and strip bti c
+  } else if (aarch64_bti == 0 && aarch64_pac == 2) {
+    if (/bti[[:space:]]+[cj]/) {
+      next
+    } else {
+      gsub(/paciasp/, PACIBSP)
+      gsub(/autiasp/, AUTIBSP)
+    }
+  # case 4 - remove autiasp and swap paciasp for bti c and rewrite bti j
+  } else if (aarch64_bti == 1 && aarch64_pac == 0) {
+    if (/autiasp/) {
+      next
+    } else {
+      gsub(/paciasp/, BTI_C)
+      gsub(/bti c/, BTI_C)
+      gsub(/bti j/, BTI_J)
+    }
+  # case 5 - rewrite all to hints
+  } else if (aarch64_bti == 1 && aarch64_pac == 1) {
+    gsub(/paciasp/, PACIASP)
+    gsub(/autiasp/, AUTIASP)
+    gsub(/bti c/, BTI_C)
+    gsub(/bti j/, BTI_J)
+  # case 6 - swap for b key
+  } else if (aarch64_bti == 1 && aarch64_pac == 2) {
+      gsub(/paciasp/, PACIBSP)
+      gsub(/autiasp/, AUTIBSP)
+      gsub(/bti c/, BTI_C)
+      gsub(/bti j/, BTI_J)
+  }
+
+  print
+}
+
+END {
+  # Add the GNU Notes section indicating what the binary supports.
+  GNU_PROPERTY_AARCH64_BTI = aarch64_bti
+  GNU_PROPERTY_AARCH64_POINTER_AUTH = (aarch64_pac != 0 ? 2 : 0)
+
+  if (aarch64_bti != 0 || aarch64_pac != 0) {
+    print "\n\n" \
+      ".pushsection .note.gnu.property, \"a\"; /* Start a new allocatable section */\n" \
+      ".balign 8; /* align it on a byte boundry */\n" \
+      ".long 4; /* size of \"GNU\0\" */\n" \
+      ".long 0x10; /* size of descriptor */\n" \
+      ".long 0x5; /* NT_GNU_PROPERTY_TYPE_0 */\n" \
+      ".asciz \"GNU\";\n" \
+      ".long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */\n" \
+      ".long 4; /* Four bytes of data */\n" \
+      ".long ("GNU_PROPERTY_AARCH64_BTI"|"GNU_PROPERTY_AARCH64_POINTER_AUTH"); /* BTI or PAC is enabled */\n" \
+      ".long 0; /* padding for 8 byte alignment */\n" \
+      ".popsection; /* end the section */"
+  }
+}
-- 
2.51.0

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.