diff --git a/include/stdlib.h b/include/stdlib.h index 97ce5a7..d2c911f 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -76,7 +76,8 @@ size_t wcstombs (char *__restrict, const wchar_t *__restrict, size_t); #define EXIT_FAILURE 1 #define EXIT_SUCCESS 0 -#define MB_CUR_MAX ((size_t)+4) +size_t __ctype_get_mb_cur_max(void); +#define MB_CUR_MAX (__ctype_get_mb_cur_max()) #define RAND_MAX (0x7fffffff) diff --git a/src/ctype/__ctype_get_mb_cur_max.c b/src/ctype/__ctype_get_mb_cur_max.c index d235f4d..94b0bd4 100644 --- a/src/ctype/__ctype_get_mb_cur_max.c +++ b/src/ctype/__ctype_get_mb_cur_max.c @@ -1,6 +1,7 @@ #include +#include "locale_impl.h" size_t __ctype_get_mb_cur_max() { - return 4; + return MB_CUR_MAX; } diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h index f15e156..7577b51 100644 --- a/src/internal/locale_impl.h +++ b/src/internal/locale_impl.h @@ -33,3 +33,6 @@ const char *__lctrans_cur(const char *); #undef MB_CUR_MAX #define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1) + +#define CODEUNIT(c) (0xdfff & (signed char)(c)) +#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80) \ No newline at end of file diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h index e1325fe..72c5519 100644 --- a/src/internal/stdio_impl.h +++ b/src/internal/stdio_impl.h @@ -47,6 +47,7 @@ struct _IO_FILE { unsigned char *shend; off_t shlim, shcnt; FILE *prev_locked, *next_locked; + struct __locale_struct *locale; }; size_t __stdio_read(FILE *, unsigned char *, size_t); diff --git a/src/locale/iconv.c b/src/locale/iconv.c index e6121ae..1eeea94 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -5,6 +5,7 @@ #include #include #include +#include "locale_impl.h" #define UTF_32BE 0300 #define UTF_16LE 0301 @@ -165,9 +166,12 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr int err; unsigned char type = map[-1]; unsigned char totype = tomap[-1]; + locale_t *ploc = &CURRENT_LOCALE, loc = *ploc; if (!in || !*in || !*inb) return 0; + *ploc = UTF8_LOCALE; + for (; *inb; *in+=l, *inb-=l) { c = *(unsigned char *)*in; l = 1; @@ -431,6 +435,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr break; } } + *ploc = loc; return x; ilseq: err = EILSEQ; @@ -445,5 +450,6 @@ starved: x = -1; end: errno = err; + *ploc = loc; return x; } diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c index a1ada24..776b447 100644 --- a/src/locale/langinfo.c +++ b/src/locale/langinfo.c @@ -33,7 +33,8 @@ char *__nl_langinfo_l(nl_item item, locale_t loc) int idx = item & 65535; const char *str; - if (item == CODESET) return "UTF-8"; + if (item == CODESET) + return MB_CUR_MAX==1 ? "UTF-8-CODE-UNITS" : "UTF-8"; switch (cat) { case LC_NUMERIC: diff --git a/src/multibyte/btowc.c b/src/multibyte/btowc.c index 9d2c3b1..dc088a2 100644 --- a/src/multibyte/btowc.c +++ b/src/multibyte/btowc.c @@ -1,7 +1,10 @@ #include #include +#include "locale_impl.h" wint_t btowc(int c) { - return c<128U ? c : EOF; + if (c+1U <= 128) return c; + if (MB_CUR_MAX==1) return CODEUNIT(c); + return WEOF; } diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c index e7b3654..40e2e1a 100644 --- a/src/multibyte/mbrtowc.c +++ b/src/multibyte/mbrtowc.c @@ -6,6 +6,7 @@ #include #include +#include "locale_impl.h" #include "internal.h" size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate_t *restrict st) @@ -27,6 +28,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate if (!n) return -2; if (!c) { if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1; if (*s-SA > SB-SA) goto ilseq; c = bittab[*s++-SA]; n--; } diff --git a/src/multibyte/mbsrtowcs.c b/src/multibyte/mbsrtowcs.c index 3c1343a..eb8f72a 100644 --- a/src/multibyte/mbsrtowcs.c +++ b/src/multibyte/mbsrtowcs.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include "locale_impl.h" #include "internal.h" size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st) @@ -24,6 +26,23 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs } } + if (MB_CUR_MAX==1) { + if (!ws) return strlen((const char *)s); + for (;;) { + if (!wn) { + *src = (const void *)s; + return wn0; + } + if (!*s) break; + c = *s++; + *ws++ = CODEUNIT(c); + wn--; + } + *ws = 0; + *src = 0; + return wn0-wn; + } + if (!ws) for (;;) { if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c index 803d221..c147754 100644 --- a/src/multibyte/mbtowc.c +++ b/src/multibyte/mbtowc.c @@ -6,6 +6,7 @@ #include #include +#include "locale_impl.h" #include "internal.h" int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) @@ -19,6 +20,7 @@ int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) if (!wc) wc = &dummy; if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1; if (*s-SA > SB-SA) goto ilseq; c = bittab[*s++-SA]; diff --git a/src/multibyte/wcrtomb.c b/src/multibyte/wcrtomb.c index 59f733d..75c972c 100644 --- a/src/multibyte/wcrtomb.c +++ b/src/multibyte/wcrtomb.c @@ -6,6 +6,7 @@ #include #include +#include "locale_impl.h" size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st) { @@ -13,6 +14,13 @@ size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st) if ((unsigned)wc < 0x80) { *s = wc; return 1; + } else if (MB_CUR_MAX == 1) { + if (!IS_CODEUNIT(wc)) { + errno = EILSEQ; + return -1; + } + *s = wc; + return 1; } else if ((unsigned)wc < 0x800) { *s++ = 0xc0 | (wc>>6); *s = 0x80 | (wc&0x3f); diff --git a/src/multibyte/wctob.c b/src/multibyte/wctob.c index d6353ee..412e3c8 100644 --- a/src/multibyte/wctob.c +++ b/src/multibyte/wctob.c @@ -1,8 +1,10 @@ #include #include +#include "locale_impl.h" int wctob(wint_t c) { if (c < 128U) return c; + if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c; return EOF; } diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c index 8626d54..e455cfe 100644 --- a/src/stdio/fgetwc.c +++ b/src/stdio/fgetwc.c @@ -1,8 +1,9 @@ #include "stdio_impl.h" +#include "locale_impl.h" #include #include -wint_t __fgetwc_unlocked(FILE *f) +static wint_t __fgetwc_unlocked_internal(FILE *f) { mbstate_t st = { 0 }; wchar_t wc; @@ -10,8 +11,6 @@ wint_t __fgetwc_unlocked(FILE *f) unsigned char b; size_t l; - f->mode |= f->mode+1; - /* Convert character from buffer if possible */ if (f->rpos < f->rend) { l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st); @@ -39,6 +38,16 @@ wint_t __fgetwc_unlocked(FILE *f) return wc; } +wint_t __fgetwc_unlocked(FILE *f) +{ + locale_t *ploc = &CURRENT_LOCALE, loc = *ploc; + if (f->mode <= 0) fwide(f, 1); + *ploc = f->locale; + wchar_t wc = __fgetwc_unlocked_internal(f); + *ploc = loc; + return wc; +} + wint_t fgetwc(FILE *f) { wint_t c; diff --git a/src/stdio/fputwc.c b/src/stdio/fputwc.c index 7b621dd..a1c8ac8 100644 --- a/src/stdio/fputwc.c +++ b/src/stdio/fputwc.c @@ -1,4 +1,5 @@ #include "stdio_impl.h" +#include "locale_impl.h" #include #include #include @@ -7,8 +8,10 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f) { char mbc[MB_LEN_MAX]; int l; + locale_t *ploc = &CURRENT_LOCALE, loc = *ploc; - f->mode |= f->mode+1; + if (f->mode <= 0) fwide(f, 1); + *ploc = f->locale; if (isascii(c)) { c = putc_unlocked(c, f); @@ -20,6 +23,7 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f) l = wctomb(mbc, c); if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF; } + *ploc = loc; return c; } diff --git a/src/stdio/fputws.c b/src/stdio/fputws.c index 5723cbc..0ed02f1 100644 --- a/src/stdio/fputws.c +++ b/src/stdio/fputws.c @@ -1,23 +1,28 @@ #include "stdio_impl.h" +#include "locale_impl.h" #include int fputws(const wchar_t *restrict ws, FILE *restrict f) { unsigned char buf[BUFSIZ]; size_t l=0; + locale_t *ploc = &CURRENT_LOCALE, loc = *ploc; FLOCK(f); - f->mode |= f->mode+1; + fwide(f, 1); + *ploc = f->locale; while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1) if (__fwritex(buf, l, f) < l) { FUNLOCK(f); + *ploc = loc; return -1; } FUNLOCK(f); + *ploc = loc; return l; /* 0 or -1 */ } diff --git a/src/stdio/fwide.c b/src/stdio/fwide.c index 8088e7a..8410b15 100644 --- a/src/stdio/fwide.c +++ b/src/stdio/fwide.c @@ -1,13 +1,14 @@ -#include #include "stdio_impl.h" - -#define SH (8*sizeof(int)-1) -#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH)) +#include "locale_impl.h" int fwide(FILE *f, int mode) { FLOCK(f); - if (!f->mode) f->mode = NORMALIZE(mode); + if (mode) { + if (!f->locale) f->locale = MB_CUR_MAX==1 + ? C_LOCALE : UTF8_LOCALE; + if (!f->mode) f->mode = mode>0 ? 1 : -1; + } mode = f->mode; FUNLOCK(f); return mode; diff --git a/src/stdio/ungetwc.c b/src/stdio/ungetwc.c index 394f92a..80d6e20 100644 --- a/src/stdio/ungetwc.c +++ b/src/stdio/ungetwc.c @@ -1,4 +1,5 @@ #include "stdio_impl.h" +#include "locale_impl.h" #include #include #include @@ -8,15 +9,18 @@ wint_t ungetwc(wint_t c, FILE *f) { unsigned char mbc[MB_LEN_MAX]; int l=1; + locale_t *ploc = &CURRENT_LOCALE, loc = *ploc; FLOCK(f); - f->mode |= f->mode+1; + if (f->mode <= 0) fwide(f, 1); + *ploc = f->locale; if (!f->rpos) __toread(f); if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF || (!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) { FUNLOCK(f); + *ploc = loc; return WEOF; } @@ -26,5 +30,6 @@ wint_t ungetwc(wint_t c, FILE *f) f->flags &= ~F_EOF; FUNLOCK(f); + *ploc = loc; return c; }