diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c index 13cf2ee..efa0728 100644 --- a/src/env/__init_tls.c +++ b/src/env/__init_tls.c @@ -16,6 +16,7 @@ int __init_tp(void *p) if (!r) libc.can_do_threads = 1; libc.has_thread_pointer = 1; td->tid = td->pid = __syscall(SYS_set_tid_address, &td->tid); + td->locale = &libc.global_locale; return 0; } diff --git a/src/internal/libc.h b/src/internal/libc.h index fb4d9bc..037d16b 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -5,6 +5,11 @@ #include #include +struct __locale_struct { + int ctype_utf8; + char *messages_name; +}; + struct __libc { int has_thread_pointer; int can_do_threads; @@ -16,6 +21,9 @@ struct __libc { int ofl_lock[2]; size_t tls_size; size_t page_size; + volatile int uselocale_cnt; + volatile int bytelocale_cnt_minus_1; + struct __locale_struct global_locale; }; extern size_t __hwcap; diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h index f41c6f2..2747b85 100644 --- a/src/internal/locale_impl.h +++ b/src/internal/locale_impl.h @@ -1,5 +1,17 @@ #include +#include +#include "libc.h" +#include "pthread_impl.h" -struct __locale_struct { - int dummy; -}; +#define LOCALE_NAME_MAX 15 + +int __setlocalecat(locale_t, int, const char *); + +#define CURRENT_LOCALE \ + (libc.uselocale_cnt ? __pthread_self()->locale : &libc.global_locale) + +#define CURRENT_UTF8 \ + (libc.bytelocale_cnt_minus_1<0 || __pthread_self()->locale->ctype_utf8) + +#undef MB_CUR_MAX +#define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1) diff --git a/src/locale/__setlocalecat.c b/src/locale/__setlocalecat.c new file mode 100644 index 0000000..f1e4bf0 --- /dev/null +++ b/src/locale/__setlocalecat.c @@ -0,0 +1,46 @@ +#include +#include +#include "locale_impl.h" +#include "libc.h" +#include "atomic.h" + +static const char envvars[][12] = { + "LC_CTYPE", + "LC_NUMERIC", + "LC_TIME", + "LC_COLLATE", + "LC_MONETARY", + "LC_MESSAGES", +}; + +int __setlocalecat(locale_t loc, int cat, const char *val) +{ + if (!*val) { + (val = getenv("LC_ALL")) || + (val = getenv(envvars[cat])) || + (val = getenv("LANG")) || + (val = "C.UTF-8"); + } + + size_t n = strnlen(val, LOCALE_NAME_MAX); + int builtin = (val[0]=='C' && !val[1]) + || !strcmp(val, "C.UTF-8") + || !strcmp(val, "POSIX"); + + switch (cat) { + case LC_CTYPE: + a_store(&loc->ctype_utf8, !builtin || val[1]=='.'); + break; + case LC_MESSAGES: + if (builtin) { + loc->messages_name[0] = 0; + } else { + memcpy(loc->messages_name, val, n); + loc->messages_name[n] = 0; + } + /* fall through */ + default: + break; + } + return 0; +} diff --git a/src/locale/duplocale.c b/src/locale/duplocale.c index f9fc1ff..1336870 100644 --- a/src/locale/duplocale.c +++ b/src/locale/duplocale.c @@ -3,12 +3,19 @@ #include "locale_impl.h" #include "libc.h" -locale_t duplocale(locale_t old) +locale_t __duplocale(locale_t old) { - locale_t new; - new = calloc(1, sizeof *new); + locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1); + if (!new) return 0; + new->messages_name = (void *)(new+1); + + if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale; + new->ctype_utf8 = old->ctype_utf8; + if (old->messages_name) + strcpy(new->messages_name, old->messages_name); + if (new && old != LC_GLOBAL_LOCALE) memcpy(new, old, sizeof *new); return new; } -weak_alias(duplocale, __duplocale); +weak_alias(__duplocale, duplocale); diff --git a/src/locale/langinfo.c b/src/locale/langinfo.c index 7bb56ee..13abf45 100644 --- a/src/locale/langinfo.c +++ b/src/locale/langinfo.c @@ -1,5 +1,6 @@ #include #include +#include "locale_impl.h" #include "libc.h" static const char c_time[] = @@ -60,7 +61,7 @@ char *__nl_langinfo_l(nl_item item, locale_t loc) char *__nl_langinfo(nl_item item) { - return __nl_langinfo_l(item, 0); + return __nl_langinfo_l(item, CURRENT_LOCALE); } weak_alias(__nl_langinfo, nl_langinfo); diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index 447c8fc..39501d0 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -3,12 +3,24 @@ #include "locale_impl.h" #include "libc.h" -locale_t newlocale(int mask, const char *name, locale_t base) +locale_t __newlocale(int mask, const char *name, locale_t loc) { - if (*name && strcmp(name, "C") && strcmp(name, "POSIX")) - return 0; - if (!base) base = calloc(1, sizeof *base); - return base; + int i; + + if (!loc) { + loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1); + if (!loc) return 0; + loc->messages_name = (void *)(loc+1); + for (i=0; i +#include +#include +#include "locale_impl.h" +#include "libc.h" +#include "atomic.h" -char *setlocale(int category, const char *locale) +static char buf[2+4*(LOCALE_NAME_MAX+1)]; + +char *setlocale(int cat, const char *name) { - /* Note: plain "C" would be better, but puts some broken - * software into legacy 8-bit-codepage mode, ignoring - * the standard library's multibyte encoding */ - return "C.UTF-8"; + if (!libc.global_locale.messages_name) { + libc.global_locale.messages_name = + buf + 2 + 3*(LOCALE_NAME_MAX+1); + } + + if ((unsigned)cat > LC_ALL) return 0; + + /* For LC_ALL, setlocale is required to return a string which + * encodes the current setting for all categories. The format of + * this string is unspecified, and only the following code, which + * performs both the serialization and deserialization, depends + * on the format, so it can easily be changed if needed. */ + if (cat == LC_ALL) { + if (name) { + char part[LOCALE_NAME_MAX+1]; + int i, j; + if (name[0] && name[1]==';' + && strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) { + part[0] = name[0]; + part[1] = 0; + setlocale(LC_CTYPE, part); + part[LOCALE_NAME_MAX] = 0; + for (i=LC_TIME; i #include #include +#include "locale_impl.h" static ssize_t vstrfmon_l(char *s, size_t n, locale_t loc, const char *fmt, va_list ap) { @@ -93,7 +94,7 @@ ssize_t strfmon(char *restrict s, size_t n, const char *restrict fmt, ...) ssize_t ret; va_start(ap, fmt); - ret = vstrfmon_l(s, n, 0, fmt, ap); + ret = vstrfmon_l(s, n, CURRENT_LOCALE, fmt, ap); va_end(ap); return ret; diff --git a/src/locale/uselocale.c b/src/locale/uselocale.c index 4fc5c64..5106795 100644 --- a/src/locale/uselocale.c +++ b/src/locale/uselocale.c @@ -2,12 +2,25 @@ #include "pthread_impl.h" #include "libc.h" -locale_t uselocale(locale_t l) +locale_t __uselocale(locale_t new) { pthread_t self = __pthread_self(); locale_t old = self->locale; - if (l) self->locale = l; - return old; + locale_t global = &libc.global_locale; + + if (new == LC_GLOBAL_LOCALE) new = global; + + if (new && new != old) { + int adj = 0; + if (new == global) a_dec(&libc.uselocale_cnt); + else if (!new->ctype_utf8) adj++; + if (old == global) a_inc(&libc.uselocale_cnt); + else if (!old->ctype_utf8) adj--; + a_fetch_add(&libc.bytelocale_cnt_minus_1, adj); + self->locale = new; + } + + return old == global ? LC_GLOBAL_LOCALE : old; } -weak_alias(uselocale, __uselocale); +weak_alias(__uselocale, uselocale); diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index e9c8160..a7493c1 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -57,6 +57,12 @@ _Noreturn void pthread_exit(void *result) exit(0); } + if (self->locale != &libc.global_locale) { + a_dec(&libc.uselocale_cnt); + if (self->locale->ctype_utf8) + a_dec(&libc.bytelocale_cnt_minus_1); + } + if (self->detached && self->map_base) { /* Detached threads must avoid the kernel clear_child_tid * feature, since the virtual address will have been @@ -205,6 +211,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp new->start_arg = arg; new->self = new; new->tsd = (void *)tsd; + new->locale = &libc.global_locale; if (attr._a_detach) { new->detached = 1; flags -= CLONE_CHILD_CLEARTID; diff --git a/src/time/strftime.c b/src/time/strftime.c index 75ebca6..794fbe1 100644 --- a/src/time/strftime.c +++ b/src/time/strftime.c @@ -5,6 +5,7 @@ #include #include #include +#include "locale_impl.h" #include "libc.h" #include "time_impl.h" @@ -263,7 +264,7 @@ size_t __strftime_l(char *restrict s, size_t n, const char *restrict f, const st size_t strftime(char *restrict s, size_t n, const char *restrict f, const struct tm *restrict tm) { - return __strftime_l(s, n, f, tm, 0); + return __strftime_l(s, n, f, tm, CURRENT_LOCALE); } weak_alias(__strftime_l, strftime_l); diff --git a/src/time/wcsftime.c b/src/time/wcsftime.c index 8d2a2eb..638e64f 100644 --- a/src/time/wcsftime.c +++ b/src/time/wcsftime.c @@ -1,6 +1,7 @@ #include #include #include +#include "locale_impl.h" #include "libc.h" const char *__strftime_fmt_1(char (*s)[100], size_t *l, int f, const struct tm *tm, locale_t loc); @@ -64,7 +65,7 @@ size_t __wcsftime_l(wchar_t *restrict s, size_t n, const wchar_t *restrict f, co size_t wcsftime(wchar_t *restrict wcs, size_t n, const wchar_t *restrict f, const struct tm *restrict tm) { - return __wcsftime_l(wcs, n, f, tm, 0); + return __wcsftime_l(wcs, n, f, tm, CURRENT_LOCALE); } weak_alias(__wcsftime_l, wcsftime_l);