Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 31 May 2023 16:15:49 +0200
From: Jens Gustedt <Jens.Gustedt@...ia.fr>
To: musl@...ts.openwall.com
Subject: [C23 128 bit 3/4] C23: implement w128 and wf128 for scanf and similar

These functions are possibly a bit slower with 128 bit support
enabled, since the core function `__intscan` uses wider data formats
and instructions in some places. There was already an optimization in
place for all formats which we now extend:

 - first with `unsigned`, then
 - switch to `uint64_t` if the value extends `UINT_MAX`, then
 - switch to `uwide128` (an internal 128 bit structure) if that is unavoidable.

If the scan is erroneous because there are more digits than expected,
the error might be noticed a bit later than previously, but the
`errno` number should be the same.
---
 src/internal/intscan.c | 105 ++++++++++++++++++++++++++++++++---------
 src/internal/intscan.h |   4 +-
 src/stdio/vfscanf.c    |  25 ++++++----
 src/stdio/vfwscanf.c   |  19 +++++---
 src/stdlib/strtol.c    |  12 ++---
 src/stdlib/wcstol.c    |  12 ++---
 6 files changed, 125 insertions(+), 52 deletions(-)

diff --git a/src/internal/intscan.c b/src/internal/intscan.c
index 57424554..c7c1e47e 100644
--- a/src/internal/intscan.c
+++ b/src/internal/intscan.c
@@ -1,6 +1,7 @@
 #include <limits.h>
 #include <errno.h>
 #include <ctype.h>
+#include "uwide128.h"
 #include "shgetc.h"
 
 /* Lookup table for digit values. -1==255>=36 -> invalid */
@@ -23,15 +24,59 @@ static const unsigned char table[] = { -1,
 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
 };
 
-unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long lim)
+static uwide128 const uwide128_max  = { { -1, -1, } };
+static uwide128 const uwide128_zero = { {  0,  0, } };
+
+static const uwide128 __uwide128_limit[37] = {
+	comp2(0,	0),
+	comp2(-1,	-1),
+	comp2(-1,	9223372036854775807),
+	comp2(6148914691236517205,	6148914691236517205),
+	comp2(-1,	4611686018427387903),
+	comp2(3689348814741910323,	3689348814741910323),
+	comp2(-6148914691236517206,	3074457345618258602),
+	comp2(5270498306774157604,	2635249153387078802),
+	comp2(-1,	2305843009213693951),
+	comp2(-4099276460824344804,	2049638230412172401),
+	comp2(-7378697629483820647,	1844674407370955161),
+	comp2(8384883669867978007,	1676976733973595601),
+	comp2(6148914691236517205,	1537228672809129301),
+	comp2(4256940940086819603,	1418980313362273201),
+	comp2(2635249153387078802,	1317624576693539401),
+	comp2(1229782938247303441,	1229782938247303441),
+	comp2(-1,	1152921504606846975),
+	comp2(1085102592571150095,	1085102592571150095),
+	comp2(-2049638230412172402,	1024819115206086200),
+	comp2(-1941762534074689644,	970881267037344821),
+	comp2(-3689348814741910324,	922337203685477580),
+	comp2(-4392081922311798004,	878416384462359600),
+	comp2(-5030930201920786805,	838488366986797800),
+	comp2(4812194106185100421,	802032351030850070),
+	comp2(-6148914691236517206,	768614336404564650),
+	comp2(-6640827866535438582,	737869762948382064),
+	comp2(-7094901566811366007,	709490156681136600),
+	comp2(-1366425486941448268,	683212743470724133),
+	comp2(-7905747460161236407,	658812288346769700),
+	comp2(-3180473116156819245,	636094623231363848),
+	comp2(-8608480567731124088,	614891469123651720),
+	comp2(-8925843906633654008,	595056260442243600),
+	comp2(-1,	576460752303423487),
+	comp2(8943875914525843207,	558992244657865200),
+	comp2(-8680820740569200761,	542551296285575047),
+	comp2(8432797290838652167,	527049830677415760),
+	comp2(8198552921648689607,	512409557603043100),
+};
+
+uwide128 __intscan(FILE *f, unsigned base, int pok, uwide128 lim)
 {
 	const unsigned char *val = table+1;
 	int c, neg=0;
 	unsigned x;
-	unsigned long long y;
+	uint64_t z;
+	uwide128 y;
 	if (base > 36 || base == 1) {
 		errno = EINVAL;
-		return 0;
+		return uwide128_zero;
 	}
 	while (isspace((c=shgetc(f))));
 	if (c=='+' || c=='-') {
@@ -46,7 +91,7 @@ unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long
 				shunget(f);
 				if (pok) shunget(f);
 				else shlim(f, 0);
-				return 0;
+				return uwide128_zero;
 			}
 			base = 16;
 		} else if ((c|32)=='b' && base != 16) {
@@ -55,7 +100,7 @@ unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long
 				shunget(f);
 				if (pok) shunget(f);
 				else shlim(f, 0);
-				return 0;
+				return uwide128_zero;
 			}
 			base = 2;
 		} else if (base == 0) {
@@ -67,43 +112,59 @@ unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long
 			shunget(f);
 			shlim(f, 0);
 			errno = EINVAL;
-			return 0;
+			return uwide128_zero;
 		}
 	}
 	if (base == 10) {
 		for (x=0; c-'0'<10U && x<=UINT_MAX/10-1; c=shgetc(f))
 			x = x*10 + (c-'0');
-		for (y=x; c-'0'<10U && y<=ULLONG_MAX/10 && 10*y<=ULLONG_MAX-(c-'0'); c=shgetc(f))
-			y = y*10 + (c-'0');
+		for (z=x; c-'0'<10U && z<=UINT64_MAX/10-1; c=shgetc(f))
+			z = z*10 + (c-'0');
+		y=__uwide128_u64(x);
+		if (c-'0'<10U) {
+			uwide128 y10 = __uwide128_mul(y, 10);
+			while (c-'0'<10U
+				&& __uwide128_le(y, __uwide128_limit[10])
+				&& __uwide128_le(y10, __uwide128_sub(uwide128_max, (c-'0')))) {
+				y = __uwide128_add(y10, (c-'0'));
+				y10 = __uwide128_mul(y, 10);
+				c=shgetc(f);
+			}
+		}
 		if (c-'0'>=10U) goto done;
-	} else if (!(base & base-1)) {
-		int bs = "\0\1\2\4\7\3\6\5"[(0x17*base)>>5&7];
-		for (x=0; val[c]<base && x<=UINT_MAX/32; c=shgetc(f))
-			x = x<<bs | val[c];
-		for (y=x; val[c]<base && y<=ULLONG_MAX>>bs; c=shgetc(f))
-			y = y<<bs | val[c];
 	} else {
 		for (x=0; val[c]<base && x<=UINT_MAX/36-1; c=shgetc(f))
 			x = x*base + val[c];
-		for (y=x; val[c]<base && y<=ULLONG_MAX/base && base*y<=ULLONG_MAX-val[c]; c=shgetc(f))
-			y = y*base + val[c];
+		for (z=x; val[c]<base && z<=UINT64_MAX/36-1; c=shgetc(f))
+			z = z*base + val[c];
+		y=__uwide128_u64(z);
+		if (val[c]<base) {
+			uwide128 yBase = __uwide128_mul(y, base);
+			while (val[c]<base
+				&& __uwide128_le(y, __uwide128_limit[base])
+				&& __uwide128_le(yBase, __uwide128_sub(uwide128_max, val[c]))) {
+				y = __uwide128_add(yBase, val[c]);
+				yBase = __uwide128_mul(y, base);
+				c=shgetc(f);
+			}
+		}
 	}
 	if (val[c]<base) {
 		for (; val[c]<base; c=shgetc(f));
 		errno = ERANGE;
 		y = lim;
-		if (lim&1) neg = 0;
+		if (lim.v64[word64(0)]&1) neg = 0;
 	}
 done:
 	shunget(f);
-	if (y>=lim) {
-		if (!(lim&1) && !neg) {
+	if (__uwide128_le(lim, y)) {
+		if (!(lim.v64[word64(0)]&1) && !neg) {
 			errno = ERANGE;
-			return lim-1;
-		} else if (y>lim) {
+			return __uwide128_sub(lim, 1);
+		} else if (!__uwide128_le(y, lim)) {
 			errno = ERANGE;
 			return lim;
 		}
 	}
-	return (y^neg)-neg;
+	return neg ? __uwide128_neg(y) : y;
 }
diff --git a/src/internal/intscan.h b/src/internal/intscan.h
index ccf9f112..cf7a9d8f 100644
--- a/src/internal/intscan.h
+++ b/src/internal/intscan.h
@@ -1,8 +1,10 @@
 #ifndef INTSCAN_H
 #define INTSCAN_H
 
+#include "stdio_impl.h"
 #include <stdio.h>
+#include "uwide128.h"
 
-hidden unsigned long long __intscan(FILE *, unsigned, int, unsigned long long);
+hidden uwide128 __intscan(FILE *, unsigned, int, uwide128);
 
 #endif
diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c
index f8ace92f..be922f82 100644
--- a/src/stdio/vfscanf.c
+++ b/src/stdio/vfscanf.c
@@ -18,25 +18,29 @@
 #define SIZE_l   1
 #define SIZE_L   2
 #define SIZE_ll  3
+#define SIZE_128 4
 
-static void store_int(void *dest, int size, unsigned long long i)
+static void store_int(void *dest, int size, uwide128 i)
 {
 	if (!dest) return;
 	switch (size) {
 	case SIZE_hh:
-		*(char *)dest = i;
+		*(char *)dest = i.v64[lo64];
 		break;
 	case SIZE_h:
-		*(short *)dest = i;
+		*(short *)dest = i.v64[lo64];
 		break;
 	case SIZE_def:
-		*(int *)dest = i;
+		*(int *)dest = i.v64[lo64];
 		break;
 	case SIZE_l:
-		*(long *)dest = i;
+		*(long *)dest = i.v64[lo64];
 		break;
 	case SIZE_ll:
-		*(long long *)dest = i;
+		*(long long *)dest = i.v64[lo64];
+		break;
+	case SIZE_128:
+		*(uwide128 *)dest = i;
 		break;
 	}
 }
@@ -77,7 +81,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
 	void *dest=NULL;
 	int invert;
 	int matches=0;
-	unsigned long long x;
+	uwide128 x;
 	long double y;
 	off_t pos = 0;
 	unsigned char scanset[257];
@@ -178,6 +182,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
 #else
 			case 64: size = SIZE_ll; break;
 #endif
+			case 128: size = SIZE_128; break;
 			}
 			break;
 		case 'b': case 'd': case 'i': case 'o': case 'u': case 'x':
@@ -206,7 +211,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
 		case '[':
 			break;
 		case 'n':
-			store_int(dest, size, pos);
+			store_int(dest, size, __uwide128_u64(pos));
 			/* do not increment match count, etc! */
 			continue;
 		default:
@@ -326,9 +331,9 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
 		case 'i':
 			base = 0;
 		int_common:
-			x = __intscan(f, base, 0, ULLONG_MAX);
+			x = __intscan(f, base, 0, UWIDE128_MAX);
 			if (!shcnt(f)) goto match_fail;
-			if (t=='p' && dest) *(void **)dest = (void *)(uintptr_t)x;
+			if (t=='p' && dest) *(void **)dest = (void *)(uintptr_t)x.v64[lo64];
 			else store_int(dest, size, x);
 			break;
 		case 'a': case 'A':
diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c
index 6fe2749c..f70d83a1 100644
--- a/src/stdio/vfwscanf.c
+++ b/src/stdio/vfwscanf.c
@@ -18,25 +18,29 @@
 #define SIZE_l   1
 #define SIZE_L   2
 #define SIZE_ll  3
+#define SIZE_128 4
 
-static void store_int(void *dest, int size, unsigned long long i)
+static void store_int(void *dest, int size, uwide128 i)
 {
 	if (!dest) return;
 	switch (size) {
 	case SIZE_hh:
-		*(char *)dest = i;
+		*(char *)dest = i.v64[lo64];
 		break;
 	case SIZE_h:
-		*(short *)dest = i;
+		*(short *)dest = i.v64[lo64];
 		break;
 	case SIZE_def:
-		*(int *)dest = i;
+		*(int *)dest = i.v64[lo64];
 		break;
 	case SIZE_l:
-		*(long *)dest = i;
+		*(long *)dest = i.v64[lo64];
 		break;
 	case SIZE_ll:
-		*(long long *)dest = i;
+		*(long long *)dest = i.v64[lo64];
+		break;
+	case SIZE_128:
+		*(uwide128 *)dest = i;
 		break;
 	}
 }
@@ -201,6 +205,7 @@ int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
 #else
 			case 64: size = SIZE_ll; break;
 #endif
+			case 128: size = SIZE_128; break;
 			}
 			break;
 		case 'b': case 'd': case 'i': case 'o': case 'u': case 'x':
@@ -234,7 +239,7 @@ int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
 
 		switch (t) {
 		case 'n':
-			store_int(dest, size, pos);
+			store_int(dest, size, __uwide128_i64(pos));
 			/* do not increment match count, etc! */
 			continue;
 
diff --git a/src/stdlib/strtol.c b/src/stdlib/strtol.c
index bfefea69..db5acadb 100644
--- a/src/stdlib/strtol.c
+++ b/src/stdlib/strtol.c
@@ -5,12 +5,12 @@
 #include <limits.h>
 #include <ctype.h>
 
-static unsigned long long strtox(const char *s, char **p, int base, unsigned long long lim)
+static unsigned long long strtox(const char *s, char **p, int base, uwide128 lim)
 {
 	FILE f;
 	sh_fromstring(&f, s);
 	shlim(&f, 0);
-	unsigned long long y = __intscan(&f, base, 1, lim);
+	unsigned long long y = __intscan(&f, base, 1, lim).v64[lo64];
 	if (p) {
 		size_t cnt = shcnt(&f);
 		*p = (char *)s + cnt;
@@ -20,22 +20,22 @@ static unsigned long long strtox(const char *s, char **p, int base, unsigned lon
 
 unsigned long long strtoull(const char *restrict s, char **restrict p, int base)
 {
-	return strtox(s, p, base, ULLONG_MAX);
+	return strtox(s, p, base, __uwide128_u64(ULLONG_MAX));
 }
 
 long long strtoll(const char *restrict s, char **restrict p, int base)
 {
-	return strtox(s, p, base, LLONG_MIN);
+	return strtox(s, p, base, __uwide128_u64(0ULL+LLONG_MIN));
 }
 
 unsigned long strtoul(const char *restrict s, char **restrict p, int base)
 {
-	return strtox(s, p, base, ULONG_MAX);
+	return strtox(s, p, base, __uwide128_u64(ULONG_MAX));
 }
 
 long strtol(const char *restrict s, char **restrict p, int base)
 {
-	return strtox(s, p, base, 0UL+LONG_MIN);
+	return strtox(s, p, base, __uwide128_u64(0UL+LONG_MIN));
 }
 
 intmax_t strtoimax(const char *restrict s, char **restrict p, int base)
diff --git a/src/stdlib/wcstol.c b/src/stdlib/wcstol.c
index 1eeb495f..d7662562 100644
--- a/src/stdlib/wcstol.c
+++ b/src/stdlib/wcstol.c
@@ -29,7 +29,7 @@ static size_t do_read(FILE *f, unsigned char *buf, size_t len)
 	return 0;
 }
 
-static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsigned long long lim)
+static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, uwide128 lim)
 {
 	wchar_t *t = (wchar_t *)s;
 	unsigned char buf[64];
@@ -42,7 +42,7 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign
 	while (iswspace(*t)) t++;
 	f.cookie = (void *)t;
 	shlim(&f, 0);
-	unsigned long long y = __intscan(&f, base, 1, lim);
+	unsigned long long y = __intscan(&f, base, 1, lim).v64[lo64];
 	if (p) {
 		size_t cnt = shcnt(&f);
 		*p = cnt ? t + cnt : (wchar_t *)s;
@@ -52,22 +52,22 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign
 
 unsigned long long wcstoull(const wchar_t *restrict s, wchar_t **restrict p, int base)
 {
-	return wcstox(s, p, base, ULLONG_MAX);
+	return wcstox(s, p, base, __uwide128_u64(ULLONG_MAX));
 }
 
 long long wcstoll(const wchar_t *restrict s, wchar_t **restrict p, int base)
 {
-	return wcstox(s, p, base, LLONG_MIN);
+	return wcstox(s, p, base, __uwide128_u64(0ULL+LLONG_MIN));
 }
 
 unsigned long wcstoul(const wchar_t *restrict s, wchar_t **restrict p, int base)
 {
-	return wcstox(s, p, base, ULONG_MAX);
+	return wcstox(s, p, base, __uwide128_u64(ULONG_MAX));
 }
 
 long wcstol(const wchar_t *restrict s, wchar_t **restrict p, int base)
 {
-	return wcstox(s, p, base, 0UL+LONG_MIN);
+	return wcstox(s, p, base, __uwide128_u64(0UL+LONG_MIN));
 }
 
 intmax_t wcstoimax(const wchar_t *restrict s, wchar_t **restrict p, int base)
-- 
2.34.1

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.