Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181216015704.GD23599@brightrain.aerifal.cx>
Date: Sat, 15 Dec 2018 20:57:04 -0500
From: Rich Felker <dalias@...c.org>
To: musl@...ts.openwall.com
Subject: Re: additional patches for musl

On Sun, Dec 16, 2018 at 02:01:56AM +0200, сергей волковичь wrote:
> cabulertion.
> i finded additional patches for implementing some missing musl features.



> --- a/include/stdlib.h
> +++ b/include/stdlib.h
> @@ -120,6 +120,9 @@
>  #if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) \
>   || defined(_BSD_SOURCE)
>  char *realpath (const char *__restrict, char *__restrict);
> +#ifdef _GNU_SOURCE
> +char *canonicalize_file_name (const char *__restrict);
> +#endif

Small nit but rather than adding new #ifdef _GNU_SOURCE sections for
each declaration, any new ones should be added under the existing
section if there is one (and in stdlib.h there is).

>  long int random (void);
>  void srandom (unsigned int);
>  char *initstate (unsigned int, char *, size_t);
> --- a/src/misc/realpath.c
> +++ b/src/misc/realpath.c
> @@ -42,4 +42,9 @@
>  err:
>  	__syscall(SYS_close, fd);
>  	return 0;
>  }
> +
> +char *canonicalize_file_name(const char *restrict filename)
> +{
> +	return realpath(filename, NULL);
> +}

This can't go in the same file as realpath because it's not in the
reserved namespace. Further it's probably not an acceptable addition
since it's not widely used and has no advantages over the standard way
of writing the exact same thing using realpath directly.

> --- a/include/netdb.h
> +++ b/include/netdb.h
> @@ -124,6 +124,8 @@
>  #define NO_RECOVERY    3
>  #define NO_DATA        4
>  #define NO_ADDRESS     NO_DATA
> +#define NETDB_INTERNAL -1
> +#define NETDB_SUCCESS  0
>  #endif




> --- a/include/ftw.h
> +++ b/include/ftw.h
> @@ -20,6 +20,14 @@
>  #define FTW_MOUNT 2
>  #define FTW_CHDIR 4
>  #define FTW_DEPTH 8
> +
> +#ifdef _GNU_SOURCE
> +#define FTW_ACTIONRETVAL 16
> +#define FTW_CONTINUE 0
> +#define FTW_STOP 1
> +#define FTW_SKIP_SUBTREE 2
> +#define FTW_SKIP_SIBLINGS 3
> +#endif

This one and the corresponding functional change is probably worth
consideration. I've seen a few a

>  
>  struct FTW {
>  	int base;
> --- a/src/misc/nftw.c
> +++ b/src/misc/nftw.c
> @@ -1,3 +1,8 @@
> +#define FTW_ACTIONRETVAL 16
> +#define FTW_CONTINUE 0
> +#define FTW_STOP 1
> +#define FTW_SKIP_SUBTREE 2
> +#define FTW_SKIP_SIBLINGS 3
>  #include <ftw.h>
>  #include <dirent.h>
>  #include <sys/stat.h>
> @@ -58,8 +58,20 @@
>  	lev.level = new.level;
>  	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
>  
> -	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
> -		return r;
> +	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev))) {
> +		if (flags & FTW_ACTIONRETVAL)
> +			switch (r) {
> +				case FTW_SKIP_SUBTREE:
> +					h = NULL;
> +				case FTW_CONTINUE:
> +					break;
> +				case FTW_SKIP_SIBLINGS:
> +				case FTW_STOP:
> +					return r;
> +			}
> +		else
> +			return r;
> +	}
>  
>  	for (; h; h = h->chain)
>  		if (h->dev == st.st_dev && h->ino == st.st_ino)
> @@ -82,7 +94,10 @@
>  				strcpy(path+j+1, de->d_name);
>  				if ((r=do_nftw(path, fn, fd_limit-1, flags, &new))) {
>  					closedir(d);
> -					return r;
> +					if ((flags & FTW_ACTIONRETVAL) && r == FTW_SKIP_SIBLINGS)
> +						break;
> +					else
> +						return r;
>  				}
>  			}
>  			closedir(d);
> @@ -93,8 +108,16 @@
>  	}
>  
>  	path[l] = 0;
> -	if ((flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
> -		return r;
> +	if ((flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev))) {
> +		if (flags & FTW_ACTIONRETVAL)
> +			switch (r) {
> +				case FTW_SKIP_SIBLINGS:
> +				case FTW_STOP:
> +					return r;
> +			}
> +		else
> +			return r;
> +	}
>  
>  	return 0;
>  }

> --- /dev/null
> +++ b/src/misc/parse-printf-format.c
> @@ -0,0 +1,265 @@
> +/***
> +  Copyright 2014 Emil Renner Berthing
> +  With parts from the musl C library
> +  Copyright 2005-2014 Rich Felker, et al.
> +
> +  This file is free software; you can redistribute it and/or modify it
> +  under the terms of the GNU Lesser General Public License as published by
> +  the Free Software Foundation; either version 2.1 of the License, or
> +  (at your option) any later version.

Please do not send license-incompatible patches to musl.

> --- /dev/null
> +++ b/include/printf.h
> @@ -0,0 +1,41 @@
> +/***
> +  Copyright 2014 Emil Renner Berthing
> +  With parts from the GNU C Library
> +  Copyright 1991-2014 Free Software Foundation, Inc.
> +
> +  This file is free software; you can redistribute it and/or modify it
> +  under the terms of the GNU Lesser General Public License as published by

Likewise.

> +  the Free Software Foundation; either version 2.1 of the License, or
> +  (at your option) any later version.
> +
> +  This file is distributed in the hope that it will be useful, but
> +  WITHOUT ANY WARRANTY; without even the implied warranty of
> +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +  Lesser General Public License for more details.
> +***/
> +
> +#pragma once

#pragma once is not valid C. There is a universal standard way to do this.

> --- a/include/stdlib.h
> +++ b/include/stdlib.h
> @@ -52,10 +52,13 @@
>  char *getenv (const char *);
>  
>  int system (const char *);
>  
> +typedef int (*__compar_fn_t)(const void *, const void *);

This is in the internal namespace and it's an error for any
application to be using it.

> +typedef int (*comparison_fn_t)(const void *, const void *);

This is presumably a GNU extension (so would need to be in the
appropriate #ifdef block if added) but it probably doesn't meet the
usefulness criterion. Any code using this non-portable construct can
trivially be fixed to be portable.

>  void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *));
>  void qsort (void *, size_t, size_t, int (*)(const void *, const void *));
> +void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);

qsort_r is an open issue pending action with the BSDs and the Austin
Group for unifying and standardizing it. Historically some BSDs had an
opposite-argument-order function by the same name which is why musl
has not added it. It might be good to go now; we need to check.

> --- a/src/stdlib/qsort.c	2017-01-01 04:27:17.000000000 +0100
> +++ b/src/stdlib/qsort.c	2017-01-29 00:21:27.194887091 +0100
> @@ -28,7 +28,7 @@
>  #include "atomic.h"
>  #define ntz(x) a_ctz_l((x))
>  
> -typedef int (*cmpfun)(const void *, const void *);
> +typedef int (*cmpfun)(const void *, const void *, void *);
>  
>  static inline int pntz(size_t p[2]) {
>  	int r = ntz(p[0] - 1);
> @@ -85,7 +85,7 @@
>  	p[1] >>= n;
>  }
>  
> -static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
> +static void sift(unsigned char *head, size_t width, cmpfun cmp, void* arg, int pshift, size_t lp[])

Passing the extra context arg all the way through everything possibly
has significant performance hit for the standard qsort function. This
needs to be measured, and if so, either 2 separate versions need to be
built, or qsort_r needs to be implemented on top of qsort using TLS.

> [...]
> +
> +void qsort(void *base, size_t nel, size_t width, int (*cmp)(const void *, const void *))
> +{
> +	return qsort_r (base, nel, width, (cmpfun)cmp, NULL);
> +}

This cast and subsequent calling the cmp function with the incorrect
signature has undefined behavior and can't be used.

> --- a/include/stdlib.h
> +++ b/include/stdlib.h
> @@ -50,7 +50,8 @@
>  int at_quick_exit (void (*) (void));
>  _Noreturn void quick_exit (int);
>  
>  char *getenv (const char *);
> +char *secure_getenv (const char *);

Addition of this function is probably ok, but it needs to be under the
proper _GNU_SOURCE test.

>  
>  int system (const char *);
>  
> --- /dev/null
> +++ b/src/env/secure_getenv.c
> @@ -0,0 +1,8 @@
> +#define _DEFAULT_SOURCE 1
> +#include <stdlib.h>
> +#include <unistd.h>
> +
> +char *secure_getenv(const char *name)
> +{
> +	return issetugid() ? NULL : getenv(name);
> +}

> --- a/include/string.h
> +++ b/include/string.h
> @@ -85,7 +85,21 @@
>  #endif
>  
>  #ifdef _GNU_SOURCE
> -#define	strdupa(x)	strcpy(alloca(strlen(x)+1),x)
> +#ifdef __GNUC__
> +#define strdupa(x)	(__extension__({ \
> +			const char* __old = (x); \
> +			size_t __len = strlen(x) + 1; \
> +			char* __new = (char*)alloca(__len); \
> +			(char*)memcpy(__new, __old, __len); \
> +			}))
> +#define strndupa(x,y)	(__extension__({ \
> +			const char* __old = (x); \
> +			size_t __len = strnlen(x, (y)); \
> +			char* __new = (char*)alloca(__len + 1); \
> +			__new[__len] = 0; \
> +			(char*)memcpy(__new, __old, __len); \
> +			}))
> +#endif

This has been discussed before and we have a pending patch somewhere
that does it in another way. I'll see if I can dig it up and comment
based on that. Ultimately stdndupa is a really bad idea, but if it's
purely inline in a header there's not much burden on us by having it.

> --- a/include/utmpx.h
> +++ b/include/utmpx.h
> @@ -55,6 +55,12 @@
>  #define USER_PROCESS    7
>  #define DEAD_PROCESS    8
>  
> +
> +#ifdef _GNU_SOURCE
> +#define _PATH_UTMPX "/dev/null/utmp"
> +#define _PATH_WTMPX "/dev/null/wtmp"
> +#endif
> +
>  #ifdef __cplusplus
>  }
>  #endif

I'm not sure if this is a good idea or not. Those paths have been
problematic in the past.

> define unimplemented macros to 0 so that systemd compiles
> --- a/include/glob.h
> +++ b/include/glob.h
> @@ -22,6 +22,7 @@
>  int  glob(const char *__restrict, int, int (*)(const char *, int), glob_t *__restrict);
>  void globfree(glob_t *);
>  
> +#define GLOB_BRACE 0

This is definitely not acceptable. It advertises the existence of a
feature we don't have, making it impossible for applications to make
correct use of #ifdef to decide whether to use that feature or whether
to use their own replacement for the glob function that provides it.

If your goal in all this is to build systemd, a better approach is
just using the systemd patches others have already done. systemd
policy is *opposed* to writing portable code, or to making any
promises about what level of GNU extensions they require and will
require in the future. So if we just added a bunch of random stuff to
make systemd work out of the box, a few months or a year from now we'd
find that it doesn't work anymore, and we have to either add whatever
other new glibc thing they're demanding is, or accept that we added a
bunch of junk for nothing. This has been discussed a lot before and
it's not a game musl is willing to play.

> --- a/include/regex.h
> +++ b/include/regex.h
> @@ -31,6 +31,7 @@
>  
>  #define REG_NOTBOL      1
>  #define REG_NOTEOL      2
> +#define REG_STARTEND    0

Likewise this. There has been a request to add it before, and I think
there's a patch somewhere doing just that. The main concern is
long-term cost. The current regex implementation has sufficient
overhead that it doesn't matter, but if we do a better one at some
point in the future it might come back to bite us. I think it's
probably a good idea to add though.

>  #define REG_OK          0
>  #define REG_NOMATCH     1
> --- a/include/netdb.h
> +++ b/include/netdb.h
> @@ -140,6 +140,8 @@
>  #define EAI_IDN_ENCODE -105
>  #define NI_MAXHOST 255
>  #define NI_MAXSERV 32
> +#define NI_IDN 0
> +#define NI_IDN_USE_STD3_ASCII_RULES 0
>  #endif

The intent is to do IDN without any special flag, in which case
defining it as 0 would possibly be the right thing to do, but at
present it's another case of wrongly advertising a feature that's not
present and breaks application logic.

Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.