Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 31 May 2023 16:15:48 +0200
From: Jens Gustedt <Jens.Gustedt@...ia.fr>
To: musl@...ts.openwall.com
Subject: [C23 128 bit 2/4] C23: implement w128 and wf128 support for printf

C23 now allows extended integer types wider than `intmax_t` for the
case that they are used to implement some fixed-width integer
type. The length specifer wN can then be used for `printf` and friends
to print the type.

Gcc and clang provide `__int128` types for many architectures (in
particular x86_64) since a long time, and adding these types
"officially" is a recurrent user request. They are particularly nice
to have for bitsets.

Implementing w128 (and wf128) is a first step to provide `int128_t`
and `uint128_t` natively, independently of the fact if there already
is a compiler that implements these types.

This implementation hopefully only adds a very mild overhead in size
and processing time for those architectures where this type is
present. The impact is

- one extra state array for the 128 bit type (some 60 static byte or so)
- a widening of the local buffer needed to collect digits (512 bytes
  on the stack)
- widening of the static format functions to accept the 128 type
  (probably some byte for some extra load instructions)

These functions then are possibly a bit slower, since they use wider
instructions and/or combine several instructions. There was already an
optimization in place for the 'u' format, because here division and
modulo base 10 is needed, which could be a bit costly. We now apply
similar tricks for the other integer formats to avoid passing into the
128 bit emulation if that is possible.
---
 src/stdio/vfprintf.c  | 113 ++++++++++++++++++++++++++----------------
 src/stdio/vfwprintf.c |  45 ++++++++++-------
 2 files changed, 97 insertions(+), 61 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a531a513..aac065fc 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -1,4 +1,5 @@
 #include "stdio_impl.h"
+#include "uwide128.h"
 #include <errno.h>
 #include <ctype.h>
 #include <limits.h>
@@ -35,10 +36,12 @@ enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
 	ZTPRE, JPRE, WPRE,
 	STOP,
+	WWPRE,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
 	SHORT, USHORT, CHAR, UCHAR,
 	LLONG, SIZET, IMAX, UMAX, PDIFF, UIPTR,
+	INT128, UINT128,
 	DBL, LDBL,
 	NOARG,
 	MAXSTATE
@@ -114,7 +117,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 
 union arg
 {
-	uintmax_t i;
+	uwide128 i;
 	long double f;
 	void *p;
 };
@@ -122,22 +125,24 @@ union arg
 static void pop_arg(union arg *arg, int type, va_list *ap)
 {
 	switch (type) {
-	       case PTR:	arg->p = va_arg(*ap, void *);
-	break; case INT:	arg->i = va_arg(*ap, int);
-	break; case UINT:	arg->i = va_arg(*ap, unsigned int);
-	break; case LONG:	arg->i = va_arg(*ap, long);
-	break; case ULONG:	arg->i = va_arg(*ap, unsigned long);
-	break; case ULLONG:	arg->i = va_arg(*ap, unsigned long long);
-	break; case SHORT:	arg->i = (short)va_arg(*ap, int);
-	break; case USHORT:	arg->i = (unsigned short)va_arg(*ap, int);
-	break; case CHAR:	arg->i = (signed char)va_arg(*ap, int);
-	break; case UCHAR:	arg->i = (unsigned char)va_arg(*ap, int);
-	break; case LLONG:	arg->i = va_arg(*ap, long long);
-	break; case SIZET:	arg->i = va_arg(*ap, size_t);
-	break; case IMAX:	arg->i = va_arg(*ap, intmax_t);
-	break; case UMAX:	arg->i = va_arg(*ap, uintmax_t);
-	break; case PDIFF:	arg->i = va_arg(*ap, ptrdiff_t);
-	break; case UIPTR:	arg->i = (uintptr_t)va_arg(*ap, void *);
+		case PTR:	arg->p = va_arg(*ap, void *);
+	break; case INT:	arg->i = __uwide128_i64(va_arg(*ap, int));
+	break; case UINT:	arg->i = __uwide128_u64(va_arg(*ap, unsigned int));
+	break; case LONG:	arg->i = __uwide128_i64(va_arg(*ap, long));
+	break; case ULONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long));
+	break; case ULLONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long long));
+	break; case SHORT:	arg->i = __uwide128_i64((short)va_arg(*ap, int));
+	break; case USHORT:	arg->i = __uwide128_u64((unsigned short)va_arg(*ap, int));
+	break; case CHAR:	arg->i = __uwide128_i64((signed char)va_arg(*ap, int));
+	break; case UCHAR:	arg->i = __uwide128_u64((unsigned char)va_arg(*ap, int));
+	break; case LLONG:	arg->i = __uwide128_i64(va_arg(*ap, long long));
+	break; case SIZET:	arg->i = __uwide128_u64(va_arg(*ap, size_t));
+	break; case IMAX:	arg->i = __uwide128_i64(va_arg(*ap, intmax_t));
+	break; case UMAX:	arg->i = __uwide128_u64(va_arg(*ap, uintmax_t));
+	break; case PDIFF:	arg->i = __uwide128_i64(va_arg(*ap, ptrdiff_t));
+	break; case UIPTR:	arg->i = __uwide128_u64((uintptr_t)va_arg(*ap, void *));
+	break; case INT128:	arg->i = __uwide128_pop(ap);
+	break; case UINT128:	arg->i = __uwide128_pop(ap);
 	break; case DBL:	arg->f = va_arg(*ap, double);
 	break; case LDBL:	arg->f = va_arg(*ap, long double);
 	}
@@ -163,29 +168,51 @@ static const char xdigits[16] = {
 	"0123456789ABCDEF"
 };
 
-static char *fmt_x(uintmax_t x, char *s, int lower)
+static char *fmt_u(unsigned long long x, char *s)
 {
-	for (; x; x>>=4) *--s = xdigits[(x&15)]|lower;
+	unsigned long y;
+	for (   ; x>ULONG_MAX; x/=10) *--s = '0' + x%10;
+	for (y=x;           y; y/=10) *--s = '0' + y%10;
 	return s;
 }
 
-static char *fmt_b(uintmax_t x, char *s)
+static char *fmt128_x(uwide128 x, char *s, int lower)
 {
-	for (; x; x>>=1) *--s = '0' + (x&1);
+	uint32_t y;
+	uint64_t z;
+	while (x.v64[hi64]) *--s = xdigits[__uwide128_div16(&x)]|lower;
+	for (z = x.v64[lo64]; z>UINT32_MAX; z>>=4) *--s = xdigits[(z&15)]|lower;
+	for (y=z;           y; y>>=4) *--s = xdigits[(y&15)]|lower;
 	return s;
 }
 
-static char *fmt_o(uintmax_t x, char *s)
+static char *fmt128_b(uwide128 x, char *s)
 {
-	for (; x; x>>=3) *--s = '0' + (x&7);
+	uint64_t z;
+	uint32_t y;
+	while (x.v64[hi64]) *--s = '0' + __uwide128_div2(&x);
+	for (z = x.v64[lo64]; z>UINT32_MAX; z>>=1) *--s = '0' + z%2;
+	for (y=z;           y; y>>=1) *--s = '0' + y%2;
 	return s;
 }
 
-static char *fmt_u(uintmax_t x, char *s)
+static char *fmt128_o(uwide128 x, char *s)
 {
-	unsigned long y;
-	for (   ; x>ULONG_MAX; x/=10) *--s = '0' + x%10;
-	for (y=x;           y; y/=10) *--s = '0' + y%10;
+	uint64_t z;
+	uint32_t y;
+	while (x.v64[hi64]) *--s = '0' + __uwide128_div8(&x);
+	for (z = x.v64[lo64]; z>UINT32_MAX; z>>=3) *--s = '0' + z%8;
+	for (y=z;           y; y>>=3) *--s = '0' + y%8;
+	return s;
+}
+
+static char *fmt128_u(uwide128 x, char *s)
+{
+	uint64_t z;
+	uint32_t y;
+	while (x.v64[hi64]) *--s = '0' + __uwide128_div10(&x);
+	for (z = x.v64[lo64]; z>UINT32_MAX; z/=10) *--s = '0' + z%10;
+	for (y=z;           y; y/=10) *--s = '0' + y%10;
 	return s;
 }
 
@@ -456,7 +483,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps, width=0;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uwide128)*CHAR_BIT+3+LDBL_MANT_DIG/4];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -498,7 +525,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			if (isdigit(s[1]) && s[2]=='$') {
 				l10n=1;
 				if (!f) nl_type[s[1]-'0'] = INT, w = 0;
-				else w = nl_arg[s[1]-'0'].i;
+				else w = nl_arg[s[1]-'0'].i.v64[lo64];
 				s+=3;
 			} else if (!l10n) {
 				w = f ? va_arg(*ap, int) : 0;
@@ -511,7 +538,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (*s=='.' && s[1]=='*') {
 			if (isdigit(s[2]) && s[3]=='$') {
 				if (!f) nl_type[s[2]-'0'] = INT, p = 0;
-				else p = nl_arg[s[2]-'0'].i;
+				else p = nl_arg[s[2]-'0'].i.v64[lo64];
 				s+=4;
 			} else if (!l10n) {
 				p = f ? va_arg(*ap, int) : 0;
@@ -552,6 +579,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 #else
 		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
 #endif
+		case 128: ps = WWPRE; st = (st == UINT) ? UINT128 : ((st == INT) ? INT128 : PTR); break;
 		default:  goto inval;
 		}
 
@@ -592,6 +620,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			case HHPRE: *(unsigned char *)arg.p = cnt; break;
 			case ZTPRE: *(size_t *)arg.p = cnt; break;
 			case JPRE: *(uintmax_t *)arg.p = cnt; break;
+			case WWPRE: *(uwide128 *)arg.p = __uwide128_i64(cnt); break;
 			}
 			continue;
 		case 'p':
@@ -599,39 +628,39 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			t = 'x';
 			fl |= ALT_FORM;
 		case 'x': case 'X':
-			a = fmt_x(arg.i, z, t&32);
-			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
+			a = fmt128_x(arg.i, z, t&32);
+			if (!__uwide128_iszero(arg.i) && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
 		case 'b': case 'B':
-			a = fmt_b(arg.i, z);
-			if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
+			a = fmt128_b(arg.i, z);
+			if (!__uwide128_iszero(arg.i) && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
 			} if (0) {
 		case 'o':
-			a = fmt_o(arg.i, z);
+			a = fmt128_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
 			} if (0) {
 		case 'd': case 'i':
 			pl=1;
-			if (arg.i>INTMAX_MAX) {
-				arg.i=-arg.i;
+			if (arg.i.v64[hi64]>INT64_MAX) {
+				arg.i=__uwide128_neg(arg.i);
 			} else if (fl & MARK_POS) {
 				prefix++;
 			} else if (fl & PAD_POS) {
 				prefix+=2;
 			} else pl=0;
 		case 'u':
-			a = fmt_u(arg.i, z);
+			a = fmt128_u(arg.i, z);
 			}
 			if (xp && p<0) goto overflow;
 			if (xp) fl &= ~ZERO_PAD;
-			if (!arg.i && !p) {
+			if (__uwide128_iszero(arg.i) && !p) {
 				a=z;
 				break;
 			}
-			p = MAX(p, z-a + !arg.i);
+			p = MAX(p, z-a + __uwide128_iszero(arg.i));
 			break;
 		case 'c':
-			*(a=z-(p=1))=arg.i;
+			*(a=z-(p=1))=arg.i.v64[lo64];
 			fl &= ~ZERO_PAD;
 			break;
 		case 'm':
@@ -644,7 +673,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			fl &= ~ZERO_PAD;
 			break;
 		case 'C':
-			wc[0] = arg.i;
+			wc[0] = arg.i.v64[lo64];
 			wc[1] = 0;
 			arg.p = wc;
 			p = -1;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 3689c2d5..d510233f 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -1,4 +1,5 @@
 #include "stdio_impl.h"
+#include "uwide128.h"
 #include <errno.h>
 #include <ctype.h>
 #include <limits.h>
@@ -28,10 +29,12 @@ enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
 	ZTPRE, JPRE, WPRE,
 	STOP,
+	WWPRE,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
 	SHORT, USHORT, CHAR, UCHAR,
 	LLONG, SIZET, IMAX, UMAX, PDIFF, UIPTR,
+	INT128, UINT128,
 	DBL, LDBL,
 	NOARG,
 	MAXSTATE
@@ -107,7 +110,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 
 union arg
 {
-	uintmax_t i;
+	uwide128 i;
 	long double f;
 	void *p;
 };
@@ -116,21 +119,23 @@ static void pop_arg(union arg *arg, int type, va_list *ap)
 {
 	switch (type) {
 	       case PTR:	arg->p = va_arg(*ap, void *);
-	break; case INT:	arg->i = va_arg(*ap, int);
-	break; case UINT:	arg->i = va_arg(*ap, unsigned int);
-	break; case LONG:	arg->i = va_arg(*ap, long);
-	break; case ULONG:	arg->i = va_arg(*ap, unsigned long);
-	break; case ULLONG:	arg->i = va_arg(*ap, unsigned long long);
-	break; case SHORT:	arg->i = (short)va_arg(*ap, int);
-	break; case USHORT:	arg->i = (unsigned short)va_arg(*ap, int);
-	break; case CHAR:	arg->i = (signed char)va_arg(*ap, int);
-	break; case UCHAR:	arg->i = (unsigned char)va_arg(*ap, int);
-	break; case LLONG:	arg->i = va_arg(*ap, long long);
-	break; case SIZET:	arg->i = va_arg(*ap, size_t);
-	break; case IMAX:	arg->i = va_arg(*ap, intmax_t);
-	break; case UMAX:	arg->i = va_arg(*ap, uintmax_t);
-	break; case PDIFF:	arg->i = va_arg(*ap, ptrdiff_t);
-	break; case UIPTR:	arg->i = (uintptr_t)va_arg(*ap, void *);
+	break; case INT:	arg->i = __uwide128_i64(va_arg(*ap, int));
+	break; case UINT:	arg->i = __uwide128_u64(va_arg(*ap, unsigned int));
+	break; case LONG:	arg->i = __uwide128_i64(va_arg(*ap, long));
+	break; case ULONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long));
+	break; case ULLONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long long));
+	break; case SHORT:	arg->i = __uwide128_i64((short)va_arg(*ap, int));
+	break; case USHORT:	arg->i = __uwide128_u64((unsigned short)va_arg(*ap, int));
+	break; case CHAR:	arg->i = __uwide128_i64((signed char)va_arg(*ap, int));
+	break; case UCHAR:	arg->i = __uwide128_u64((unsigned char)va_arg(*ap, int));
+	break; case LLONG:	arg->i = __uwide128_i64(va_arg(*ap, long long));
+	break; case SIZET:	arg->i = __uwide128_u64(va_arg(*ap, size_t));
+	break; case IMAX:	arg->i = __uwide128_i64(va_arg(*ap, intmax_t));
+	break; case UMAX:	arg->i = __uwide128_u64(va_arg(*ap, uintmax_t));
+	break; case PDIFF:	arg->i = __uwide128_i64(va_arg(*ap, ptrdiff_t));
+	break; case UIPTR:	arg->i = __uwide128_u64((uintptr_t)va_arg(*ap, void *));
+	break; case INT128:	arg->i = __uwide128_pop(ap);
+	break; case UINT128:	arg->i = __uwide128_pop(ap);
 	break; case DBL:	arg->f = va_arg(*ap, double);
 	break; case LDBL:	arg->f = va_arg(*ap, long double);
 	}
@@ -213,7 +218,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			if (iswdigit(s[1]) && s[2]=='$') {
 				l10n=1;
 				nl_type[s[1]-'0'] = INT;
-				w = nl_arg[s[1]-'0'].i;
+				w = nl_arg[s[1]-'0'].i.v64[lo64];
 				s+=3;
 			} else if (!l10n) {
 				w = f ? va_arg(*ap, int) : 0;
@@ -226,7 +231,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		if (*s=='.' && s[1]=='*') {
 			if (isdigit(s[2]) && s[3]=='$') {
 				nl_type[s[2]-'0'] = INT;
-				p = nl_arg[s[2]-'0'].i;
+				p = nl_arg[s[2]-'0'].i.v64[lo64];
 				s+=4;
 			} else if (!l10n) {
 				p = f ? va_arg(*ap, int) : 0;
@@ -267,6 +272,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 #else
 		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
 #endif
+		case 128: ps = WWPRE; st = (st == UINT) ? UINT128 : ((st == INT) ? INT128 : PTR); break;
 		default:  goto inval;
 		}
 
@@ -297,13 +303,14 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			case HHPRE: *(unsigned char *)arg.p = cnt; break;
 			case ZTPRE: *(size_t *)arg.p = cnt; break;
 			case JPRE: *(uintmax_t *)arg.p = cnt; break;
+			case WWPRE: *(uwide128 *)arg.p = __uwide128_i64(cnt); break;
 			}
 			continue;
 		case 'c':
 		case 'C':
 			if (w<1) w=1;
 			pad(f, w-1, fl);
-			out(f, &(wchar_t){t=='C' ? arg.i : btowc(arg.i)}, 1);
+			out(f, &(wchar_t){t=='C' ? arg.i.v64[lo64] : btowc(arg.i.v64[lo64])}, 1);
 			pad(f, w-1, fl^LEFT_ADJ);
 			l = w;
 			continue;
-- 
2.34.1

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.