musl - Re: crypt* files in crypt directory

Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120809115811.GA32316@port70.net>
Date: Thu, 9 Aug 2012 13:58:12 +0200
From: Szabolcs Nagy <nsz@...t70.net>
To: musl@...ts.openwall.com
Subject: Re: crypt* files in crypt directory

it's scary how these crypto codes mix various
signed and unsigned integer types

i thought these algorithms were designed and
implemented with the c type system in mind..


i added review comments
the style comments are subjective of course

* Solar Designer <solar@...nwall.com> [2012-08-09 14:53:48 +0400]:
> typedef unsigned int BF_word;
> typedef signed int BF_word_signed;
> 
> /* Number of Blowfish rounds, this is also hardcoded into a few places */
> #define BF_N				16
> 
> typedef BF_word BF_key[BF_N + 2];

i don't like these typedefs
it seems the code assumes 32bit unsigned BF_word
(eg. the way L>>24 is used below)

> #define BF_safe_atoi64(dst, src) \
> { \
> 	tmp = (unsigned char)(src); \
> 	if ((unsigned int)(tmp -= 0x20) >= 0x60) return -1; \

tmp is already unsigned int

> #define BF_ROUND(L, R, N) \
> 	tmp1 = L & 0xFF; \
> 	tmp2 = L >> 8; \
> 	tmp2 &= 0xFF; \
> 	tmp3 = L >> 16; \
> 	tmp3 &= 0xFF; \
> 	tmp4 = L >> 24; \
> 	tmp1 = ctx->s.S[3][tmp1]; \
> 	tmp2 = ctx->s.S[2][tmp2]; \
> 	tmp3 = ctx->s.S[1][tmp3]; \
> 	tmp3 += ctx->s.S[0][tmp4]; \
> 	tmp3 ^= tmp2; \
> 	R ^= ctx->s.P[N + 1]; \
> 	tmp3 += tmp1; \
> 	R ^= tmp3;

i guess this is performance critical, but
i wouldn't spread those expressions over
several lines

tmp1 = ctx->S[3][L & 0xff];
tmp2 = ctx->S[2][L>>8 & 0xff];
tmp3 = ctx->S[1][L>>16 & 0xff];
tmp4 = ctx->S[0][L>>24 & 0xff];
R ^= ctx->P[N+1];
R ^= ((tmp3 + tmp4) ^ tmp2) + tmp1;

> 	do {
> 		ptr += 2;
> 		L ^= ctx->s.P[0];
> 		BF_ROUND(L, R, 0);
> 		BF_ROUND(R, L, 1);
> 		BF_ROUND(L, R, 2);
> 		BF_ROUND(R, L, 3);
> 		BF_ROUND(L, R, 4);
> 		BF_ROUND(R, L, 5);
> 		BF_ROUND(L, R, 6);
> 		BF_ROUND(R, L, 7);
> 		BF_ROUND(L, R, 8);
> 		BF_ROUND(R, L, 9);
> 		BF_ROUND(L, R, 10);
> 		BF_ROUND(R, L, 11);
> 		BF_ROUND(L, R, 12);
> 		BF_ROUND(R, L, 13);
> 		BF_ROUND(L, R, 14);
> 		BF_ROUND(R, L, 15);
> 		tmp4 = R;
> 		R = L;
> 		L = tmp4 ^ ctx->s.P[BF_N + 1];
> 		*(ptr - 1) = R;
> 		*(ptr - 2) = L;
> 	} while (ptr < end);

why increase ptr at the begining?
it seems the idiomatic way would be

 *ptr++ = L;
 *ptr++ = R;

> 			tmp[1] |= (BF_word_signed)(signed char)*ptr; /* bug */

i don't think the BF_word_signed cast helps here
signed char is promoted to int and then
it will be converted to BF_word

> 	if (setting[0] != '$' ||
> 	    setting[1] != '2' ||
> 	    setting[2] < 'a' || setting[2] > 'z' ||
> 	    !flags_by_subtype[(unsigned int)(unsigned char)setting[2] - 'a'] ||

setting[2] is already range checked so
the casts are not necessary
(assuming 'a' < 'z')

> 	BF_set_key(key, data.expanded_key, data.ctx.s.P,
> 	    flags_by_subtype[(unsigned int)(unsigned char)setting[2] - 'a']);

ditto

> 		do {
> 			L = BF_encrypt(&data.ctx,
> 			    L ^ data.binary.salt[0], R ^ data.binary.salt[1],
> 			    ptr, ptr);
> 			R = *(ptr + 1);
> 			ptr += 2;
> 
> 			if (ptr >= &data.ctx.PS[BF_N + 2 + 4 * 0x100])
> 				break;
> 
> 			L = BF_encrypt(&data.ctx,
> 			    L ^ data.binary.salt[2], R ^ data.binary.salt[3],
> 			    ptr, ptr);
> 			R = *(ptr + 1);
> 			ptr += 2;
> 		} while (1);

i'd use for (;;) for infinite loops

eventhough most of the loops in the code are do{}while

> 	do {
> 		int done;
> 
> 		for (i = 0; i < BF_N + 2; i += 2) {
> 			data.ctx.s.P[i] ^= data.expanded_key[i];
> 			data.ctx.s.P[i + 1] ^= data.expanded_key[i + 1];
> 		}
> 
> 		done = 0;
> 		do {
> 			BF_encrypt(&data.ctx, 0, 0,
> 			    &data.ctx.PS[0],
> 			    &data.ctx.PS[BF_N + 2 + 4 * 0x100]);
> 
> 			if (done)
> 				break;
> 			done = 1;
> 
> 			{
> 				BF_word tmp1, tmp2, tmp3, tmp4;
> 
> 				tmp1 = data.binary.salt[0];
> 				tmp2 = data.binary.salt[1];
> 				tmp3 = data.binary.salt[2];
> 				tmp4 = data.binary.salt[3];
> 				for (i = 0; i < BF_N; i += 4) {
> 					data.ctx.s.P[i] ^= tmp1;
> 					data.ctx.s.P[i + 1] ^= tmp2;
> 					data.ctx.s.P[i + 2] ^= tmp3;
> 					data.ctx.s.P[i + 3] ^= tmp4;
> 				}
> 				data.ctx.s.P[16] ^= tmp1;
> 				data.ctx.s.P[17] ^= tmp2;
> 			}
> 		} while (1);

i like for better

for (done = 0; ; done++) {
	...
	if (done)
		break;
	...
}

> 		count = 64;
> 		do {
> 			L = BF_encrypt(&data.ctx, L, LR[1],
> 			    &LR[0], &LR[0]);
> 		} while (--count);

for (count = 0; count < 64; count++)

i assume it will be optimized to the same thing
(count is not used later)

> 	output[7 + 22 - 1] = BF_itoa64[(int)
> 		BF_atoi64[(int)setting[7 + 22 - 1] - 0x20] & 0x30];

is setting[28] range checked at this point?

the int casts do not help

> 	_crypt_output_magic(setting, output, size);
> 	retval = BF_crypt(key, setting, output, size, 16);
> 	save_errno = errno;

why save errno before the self test?

if the self test fails then errno can be anything anyway
if it does not fail then errno is unchanged i guess

> 	memcpy(buf.s, test_setting, sizeof(buf.s));

sizeof buf.s
without ()

> 	memset(buf.o, 0x55, sizeof(buf.o));
> 	buf.o[sizeof(buf.o) - 1] = 0;
> 	p = BF_crypt(test_key, buf.s, buf.o, sizeof(buf.o) - (1 + 1), 1);

ditto

i'd write (1 + 1) as 2

> 	    test_hash[(unsigned int)(unsigned char)buf.s[2] & 1],

the extra (unsigned int) cast is unnecessary
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.