>From 228da39e38c1cae13cbe637e771412c1984dba5d Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Thu, 9 Apr 2026 22:51:30 -0400
Subject: [PATCH 1/3] qsort: fix leonardo heap corruption from bug in
 doubleword ctz primitive

the pntz function, implementing a "count trailing zeros" variant for a
bit vector consisting of two size_t words, erroneously returned zero
rather than the number of bits in the low word when the first bit set
was the low bit of the high word.

as a result, a loop in the trinkle function which should have a
guaranteed small bound on the number of iterations, could run
unboundedly, thereby overflowing a stack-based working-space array
which was sized for the bound.

CVE-2026-40200 has been assigned for this issue.
---
 src/stdlib/qsort.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index ab79dc6f..13219ab3 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -34,11 +34,11 @@
 
 typedef int (*cmpfun)(const void *, const void *, void *);
 
+/* returns index of first bit set, excluding the low bit assumed to always
+ * be set, starting from low bit of p[0] up through high bit of p[1] */
 static inline int pntz(size_t p[2]) {
-	int r = ntz(p[0] - 1);
-	if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
-		return r;
-	}
+	if (p[0] != 1) return ntz(p[0] - 1);
+	if (p[1]) return 8*sizeof(size_t) + ntz(p[1]);
 	return 0;
 }
 
-- 
2.21.0


>From b3291b9a9f77f1f993d2b4f8c68a26cf09221ae7 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Thu, 9 Apr 2026 23:40:53 -0400
Subject: [PATCH 2/3] qsort: hard-preclude oob array writes independent of any
 invariants

while the root cause of CVE-2026-40200 was a faulty ctz primitive, the
fallout of the bug would have been limited to erroneous sorting or
infinite loop if not for the stores to a stack-based array that
depended on trusting invariants in order not to go out of bounds.

increase the size of the array to a power of two so that we can mask
indices into it to force them into range. in the absence of any
further bug, the masking is a no-op, but it does not have any
measurable performance cost, and it makes spatial memory safety
trivial to prove (and for readers not familiar with the algorithms to
trust).
---
 src/stdlib/qsort.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 13219ab3..e4bce9f7 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -89,10 +89,16 @@ static inline void shr(size_t p[2], int n)
 	p[1] >>= n;
 }
 
+/* power-of-two length for working array so that we can mask indices and
+ * not depend on any invariant of the algorithm for spatial memory safety.
+ * the original size was just 14*sizeof(size_t)+1 */
+#define AR_LEN  (16 * sizeof(size_t))
+#define AR_MASK (AR_LEN - 1)
+
 static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[])
 {
 	unsigned char *rt, *lf;
-	unsigned char *ar[14 * sizeof(size_t) + 1];
+	unsigned char *ar[AR_LEN];
 	int i = 1;
 
 	ar[0] = head;
@@ -104,16 +110,16 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int p
 			break;
 		}
 		if(cmp(lf, rt, arg) >= 0) {
-			ar[i++] = lf;
+			ar[i++ & AR_MASK] = lf;
 			head = lf;
 			pshift -= 1;
 		} else {
-			ar[i++] = rt;
+			ar[i++ & AR_MASK] = rt;
 			head = rt;
 			pshift -= 2;
 		}
 	}
-	cycle(width, ar, i);
+	cycle(width, ar, i & AR_MASK);
 }
 
 static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
@@ -121,7 +127,7 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
 	unsigned char *stepson,
 	              *rt, *lf;
 	size_t p[2];
-	unsigned char *ar[14 * sizeof(size_t) + 1];
+	unsigned char *ar[AR_LEN];
 	int i = 1;
 	int trail;
 
@@ -142,7 +148,7 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
 			}
 		}
 
-		ar[i++] = stepson;
+		ar[i++ & AR_MASK] = stepson;
 		head = stepson;
 		trail = pntz(p);
 		shr(p, trail);
@@ -150,7 +156,7 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
 		trusty = 0;
 	}
 	if(!trusty) {
-		cycle(width, ar, i);
+		cycle(width, ar, i & AR_MASK);
 		sift(head, width, cmp, arg, pshift, lp);
 	}
 }
-- 
2.21.0


>From 5122f9f3c99fee366167c5de98b31546312921ab Mon Sep 17 00:00:00 2001
From: Luca Kellermann <mailto.luca.kellermann@gmail.com>
Date: Fri, 10 Apr 2026 03:03:22 +0200
Subject: [PATCH 3/3] qsort: fix shift UB in shl and shr

if shl() or shr() are called with n==8*sizeof(size_t), n is adjusted
to 0. the shift by (sizeof(size_t) * 8 - n) that then follows will
consequently shift by the width of size_t, which is UB and in practice
produces an incorrect result.

return early in this case. the bitvector p was already shifted by the
required amount.
---
 src/stdlib/qsort.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index e4bce9f7..28607450 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -71,6 +71,7 @@ static inline void shl(size_t p[2], int n)
 		n -= 8 * sizeof(size_t);
 		p[1] = p[0];
 		p[0] = 0;
+		if (!n) return;
 	}
 	p[1] <<= n;
 	p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
@@ -83,6 +84,7 @@ static inline void shr(size_t p[2], int n)
 		n -= 8 * sizeof(size_t);
 		p[0] = p[1];
 		p[1] = 0;
+		if (!n) return;
 	}
 	p[0] >>= n;
 	p[0] |= p[1] << (sizeof(size_t) * 8 - n);
-- 
2.21.0