Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [day] [month] [year] [list]
Date: Wed, 26 Jun 2024 11:43:45 +0300
From: Stefan Jumarea <stefanjumarea02@...il.com>
To: musl@...ts.openwall.com
Cc: dalias@...c.org
Subject: [PATCH v2] mallocng: Add MTE support for Aarch64

Add support for Memory Tagging Extension.

All the memory tagging code is placed within compiler guards, and is
enabled by using the `--enable-mte` configure option.
The option can only be used if compiling for Aarch64.

All the primitives for generating, storing and loading the memory tags
are placed in a new header under `arch/aarch64/`.

For now, only the actual user data is tagged. All metadata is untagged.

Signed-off-by: Stefan Jumarea <stefanjumarea02@...il.com>
---
 arch/aarch64/mte.h                  | 41 +++++++++++++++
 configure                           | 17 +++++++
 src/malloc/mallocng/aligned_alloc.c | 20 ++++----
 src/malloc/mallocng/free.c          | 25 +++++++--
 src/malloc/mallocng/malloc.c        |  9 +++-
 src/malloc/mallocng/meta.h          | 79 ++++++++++++++++++++++++-----
 src/malloc/mallocng/realloc.c       | 24 ++++++---
 7 files changed, 181 insertions(+), 34 deletions(-)
 create mode 100644 arch/aarch64/mte.h

diff --git a/arch/aarch64/mte.h b/arch/aarch64/mte.h
new file mode 100644
index 00000000..0cd52e1a
--- /dev/null
+++ b/arch/aarch64/mte.h
@@ -0,0 +1,41 @@
+#include <stdint.h>
+
+#define MTE_TAG_GRANULE		16
+#define MTE_TAG_MASK		(0xFULL << 56)
+
+/**
+ * Read the allocated tag for `addr`.
+ */
+static inline uintptr_t mte_load_tag(uintptr_t addr)
+{
+	uintptr_t tag;
+
+	__asm__ __volatile__ ("ldg	%0, [%1]\n"
+			      : "=&r" (tag) : "r"(addr));
+
+	return tag;
+}
+
+/**
+ * Store the allocated tag for `addr`.
+ * The tag is derived from `addr`.
+ */
+static inline void mte_store_tag(uintptr_t addr)
+{
+	__asm__ __volatile__ ("stg	%0, [%0]\n"
+			      : : "r"(addr) : "memory");
+}
+
+/**
+ * Tag `addr` with random tag.
+ * If the address is already tagged, make sure the new tag differs.
+ */
+static inline uintptr_t mte_insert_random_tag(uintptr_t addr)
+{
+	uintptr_t reg;
+
+	__asm__ __volatile__("gmi	%0, %1, xzr\n"
+			     "irg	%1, %1, %0\n"
+			     : "=&r"(reg), "+r" (addr));
+	return addr;
+}
diff --git a/configure b/configure
index bc9fbe48..edcd4911 100755
--- a/configure
+++ b/configure
@@ -34,6 +34,8 @@ Optional features:
   --enable-wrapper=...    build given musl toolchain wrapper [auto]
   --disable-shared        inhibit building shared library [enabled]
   --disable-static        inhibit building static library [enabled]
+  --enable-mte            build with MTE support [disabled]
+                          only available for aarch64 and mallocng
 
 Optional packages:
   --with-malloc=...       choose malloc implementation [mallocng]
@@ -139,6 +141,7 @@ debug=no
 warnings=yes
 shared=auto
 static=yes
+mte=no
 wrapper=auto
 gcc_wrapper=no
 clang_wrapper=no
@@ -158,6 +161,8 @@ case "$arg" in
 --disable-shared|--enable-shared=no) shared=no ;;
 --enable-static|--enable-static=yes) static=yes ;;
 --disable-static|--enable-static=no) static=no ;;
+--enable-mte|--enable-mte=yes) mte=yes ;;
+--disable-mte|--enable-mte=no) mte=no ;;
 --enable-optimize) optimize=yes ;;
 --enable-optimize=*) optimize=${arg#*=} ;;
 --disable-optimize) optimize=no ;;
@@ -790,6 +795,18 @@ if trycppif "__FAST_MATH__" \
 fail "$0: error: compiler has broken floating point; check CFLAGS"
 fi
 
+if test "$mte" = "yes" ; then
+	printf "Checking whether target architecture supports MTE... "
+	if test "$ARCH" != "aarch64"; then
+		printf "no\n"
+		fail "$0: error: mte only supported with aarch64"
+	fi
+
+	printf "yes\n"
+	CFLAGS_AUTO="$CFLAGS_AUTO -DMEMTAG -march=armv8.5-a+memtag"
+	SUBARCH=${SUBARCH}+memtag
+fi
+
 printf "creating config.mak... "
 
 cmdline=$(quote "$0")
diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c
index e0862a83..7a7182ed 100644
--- a/src/malloc/mallocng/aligned_alloc.c
+++ b/src/malloc/mallocng/aligned_alloc.c
@@ -25,31 +25,33 @@ void *aligned_alloc(size_t align, size_t len)
 	if (!p)
 		return 0;
 
+	unsigned char *untagged = untag(p);
 	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
+	int idx = get_slot_index(untagged);
 	size_t stride = get_stride(g);
 	unsigned char *start = g->mem->storage + stride*idx;
 	unsigned char *end = g->mem->storage + stride*(idx+1) - IB;
 	size_t adj = -(uintptr_t)p & (align-1);
 
 	if (!adj) {
-		set_size(p, end, len);
+		set_size(untagged, end, len);
 		return p;
 	}
 	p += adj;
+	untagged += adj;
 	uint32_t offset = (size_t)(p-g->mem->storage)/UNIT;
 	if (offset <= 0xffff) {
-		*(uint16_t *)(p-2) = offset;
-		p[-4] = 0;
+		*(uint16_t *)(untagged-2) = offset;
+		untagged[-4] = 0;
 	} else {
 		// use a 32-bit offset if 16-bit doesn't fit. for this,
 		// 16-bit field must be zero, [-4] byte nonzero.
-		*(uint16_t *)(p-2) = 0;
-		*(uint32_t *)(p-8) = offset;
-		p[-4] = 1;
+		*(uint16_t *)(untagged-2) = 0;
+		*(uint32_t *)(untagged-8) = offset;
+		untagged[-4] = 1;
 	}
-	p[-3] = idx;
-	set_size(p, end, len);
+	untagged[-3] = idx;
+	set_size(untagged, end, len);
 	// store offset to aligned enframing. this facilitates cycling
 	// offset and also iteration of heap for debugging/measurement.
 	// for extreme overalignment it won't fit but these are classless
diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c
index 43f32aad..39993a56 100644
--- a/src/malloc/mallocng/free.c
+++ b/src/malloc/mallocng/free.c
@@ -25,8 +25,9 @@ static struct mapinfo free_group(struct meta *g)
 		mi.len = g->maplen*4096UL;
 	} else {
 		void *p = g->mem;
+		unsigned char *untagged = untag(p);
 		struct meta *m = get_meta(p);
-		int idx = get_slot_index(p);
+		int idx = get_slot_index(untagged);
 		g->mem->meta = 0;
 		// not checking size/reserved here; it's intentionally invalid
 		mi = nontrivial_free(m, idx);
@@ -102,17 +103,31 @@ void free(void *p)
 {
 	if (!p) return;
 
+	void *untagged = untag(p);
+
 	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
+	int idx = get_slot_index(untagged);
 	size_t stride = get_stride(g);
 	unsigned char *start = g->mem->storage + stride*idx;
 	unsigned char *end = start + stride - IB;
-	get_nominal_size(p, end);
+
+#ifdef MEMTAG
+	size_t nom_size = get_nominal_size(untagged, end);
+
+	// Check that p has the proper tag before zero-tagging
+	// Should raise an exception if p has the wrong tag.
+	// If the pointer was obtained via  a 0-size alloc, skip the tag check.
+	if (nom_size > 0)
+		((unsigned char *)p)[0] = 0;
+
+	untag_region(untagged, 0, nom_size);
+#endif
+
 	uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1;
-	((unsigned char *)p)[-3] = 255;
+	((unsigned char *)untagged)[-3] = 255;
 	// invalidate offset to group header, and cycle offset of
 	// used region within slot if current offset is zero.
-	*(uint16_t *)((char *)p-2) = 0;
+	*(uint16_t *)((char *)untagged-2) = 0;
 
 	// release any whole pages contained in the slot to be freed
 	// unless it's a single-slot group that will be unmapped.
diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
index d695ab8e..1d910b26 100644
--- a/src/malloc/mallocng/malloc.c
+++ b/src/malloc/mallocng/malloc.c
@@ -304,6 +304,11 @@ void *malloc(size_t n)
 	int sc;
 	int idx;
 	int ctr;
+#ifdef MEMTAG
+	size_t required_size = ALIGN_UP(n, 16);
+#else
+	size_t required_size = n;
+#endif
 
 	if (n >= MMAP_THRESHOLD) {
 		size_t needed = n + IB + UNIT;
@@ -376,7 +381,9 @@ void *malloc(size_t n)
 success:
 	ctr = ctx.mmap_counter;
 	unlock();
-	return enframe(g, idx, n, ctr);
+
+	void *ptr = enframe(g, idx, required_size, ctr);
+	return tag_region(ptr, n);
 }
 
 int is_allzero(void *p)
diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h
index 61ec53f9..98ba17be 100644
--- a/src/malloc/mallocng/meta.h
+++ b/src/malloc/mallocng/meta.h
@@ -4,6 +4,9 @@
 #include <stdint.h>
 #include <errno.h>
 #include <limits.h>
+#ifdef MEMTAG
+#include <mte.h>
+#endif
 #include "glue.h"
 
 __attribute__((__visibility__("hidden")))
@@ -14,6 +17,10 @@ extern const uint16_t size_classes[];
 #define UNIT 16
 #define IB 4
 
+#ifndef ALIGN_UP
+#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
+#endif
+
 struct group {
 	struct meta *meta;
 	unsigned char active_idx:5;
@@ -72,6 +79,44 @@ struct meta *alloc_meta(void);
 __attribute__((__visibility__("hidden")))
 int is_allzero(void *);
 
+static inline unsigned char *untag(void *p)
+{
+#ifdef MEMTAG
+	return (unsigned char *)((uintptr_t)p & ~MTE_TAG_MASK);
+#else
+	return (unsigned char *)p;
+#endif
+}
+
+static inline void *tag_region(void *p, size_t n)
+{
+#ifdef MEMTAG
+	uintptr_t addr = mte_insert_random_tag((uintptr_t)p);
+
+	// if n == 0 implement the allocation as a wrong tag
+	// (the address is not tagged, but the returned pointer is).
+	// The pointer can be passed to free(), but accessing it will
+	// result in a tag mismatch.
+	if (n == 0)
+		return (void *)addr;
+
+	for (size_t i = 0; i < ALIGN_UP(n, 16); i += 16)
+		mte_store_tag(addr + i);
+
+	return (void *)addr;
+#else
+	return p;
+#endif
+}
+
+static inline void untag_region(void *p, size_t start, size_t end)
+{
+#ifdef MEMTAG
+	for (size_t i = ALIGN_UP(start, 16); i < ALIGN_UP(end, 16); i += 16)
+		mte_store_tag((uintptr_t)((char *)p + i));
+#endif
+}
+
 static inline void queue(struct meta **phead, struct meta *m)
 {
 	assert(!m->next);
@@ -129,14 +174,15 @@ static inline int get_slot_index(const unsigned char *p)
 static inline struct meta *get_meta(const unsigned char *p)
 {
 	assert(!((uintptr_t)p & 15));
-	int offset = *(const uint16_t *)(p - 2);
-	int index = get_slot_index(p);
-	if (p[-4]) {
+	const unsigned char *untagged = untag((void *)p);
+	int offset = *(const uint16_t *)(untagged - 2);
+	int index = get_slot_index(untagged);
+	if (untagged[-4]) {
 		assert(!offset);
-		offset = *(uint32_t *)(p - 8);
+		offset = *(uint32_t *)(untagged - 8);
 		assert(offset > 0xffff);
 	}
-	const struct group *base = (const void *)(p - UNIT*offset - UNIT);
+	const struct group *base = (const void *)(untagged - UNIT*offset - UNIT);
 	const struct meta *meta = base->meta;
 	assert(meta->mem == base);
 	assert(index <= meta->last_idx);
@@ -199,10 +245,11 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
 	size_t slack = (stride-IB-n)/UNIT;
 	unsigned char *p = g->mem->storage + stride*idx;
 	unsigned char *end = p+stride-IB;
+	unsigned char *untagged = untag(p);
 	// cycle offset within slot to increase interval to address
 	// reuse, facilitate trapping double-free.
-	int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255;
-	assert(!p[-4]);
+	int off = (untagged[-3] ? *(uint16_t *)(untagged-2) + 1 : ctr) & 255;
+	assert(!untagged[-4]);
 	if (off > slack) {
 		size_t m = slack;
 		m |= m>>1; m |= m>>2; m |= m>>4;
@@ -213,21 +260,27 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
 	if (off) {
 		// store offset in unused header at offset zero
 		// if enframing at non-zero offset.
-		*(uint16_t *)(p-2) = off;
-		p[-3] = 7<<5;
+		*(uint16_t *)(untagged-2) = off;
+		untagged[-3] = 7<<5;
 		p += UNIT*off;
+		untagged += UNIT*off;
 		// for nonzero offset there is no permanent check
 		// byte, so make one.
-		p[-4] = 0;
+		untagged[-4] = 0;
 	}
-	*(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT;
-	p[-3] = idx;
-	set_size(p, end, n);
+	*(uint16_t *)(untagged-2) = (size_t)(untagged-g->mem->storage)/UNIT;
+	untagged[-3] = idx;
+	set_size(untagged, end, n);
+
 	return p;
 }
 
 static inline int size_to_class(size_t n)
 {
+#ifdef MEMTAG
+	n = ALIGN_UP(n, 16);
+#endif
+
 	n = (n+IB-1)>>4;
 	if (n<10) return n;
 	n++;
diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c
index 18769f42..a22b8226 100644
--- a/src/malloc/mallocng/realloc.c
+++ b/src/malloc/mallocng/realloc.c
@@ -1,4 +1,5 @@
 #define _GNU_SOURCE
+#include <stdint.h>
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <string.h>
@@ -9,20 +10,31 @@ void *realloc(void *p, size_t n)
 	if (!p) return malloc(n);
 	if (size_overflows(n)) return 0;
 
+#ifdef MEMTAG
+	size_t required_size = ALIGN_UP(n, 16);
+#else
+	size_t required_size = n;
+#endif
+	unsigned char *untagged = untag(p);
 	struct meta *g = get_meta(p);
-	int idx = get_slot_index(p);
+	int idx = get_slot_index(untagged);
 	size_t stride = get_stride(g);
 	unsigned char *start = g->mem->storage + stride*idx;
 	unsigned char *end = start + stride - IB;
-	size_t old_size = get_nominal_size(p, end);
-	size_t avail_size = end-(unsigned char *)p;
+	size_t old_size = get_nominal_size(untagged, end);
+	size_t avail_size = end-(unsigned char *)untagged;
 	void *new;
 
 	// only resize in-place if size class matches
-	if (n <= avail_size && n<MMAP_THRESHOLD
+	if (required_size <= avail_size && n<MMAP_THRESHOLD
 	    && size_to_class(n)+1 >= g->sizeclass) {
-		set_size(p, end, n);
-		return p;
+
+		// zero-tag the free space left
+		untag_region(untagged, n, old_size);
+
+		set_size(untagged, end, required_size);
+
+		return tag_region(p, n);
 	}
 
 	// use mremap if old and new size are both mmap-worthy
-- 
2.43.0

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.