diff -urp escrypt-23/crypto_scrypt-nosse.c escrypt-26/crypto_scrypt-nosse.c --- escrypt-23/crypto_scrypt-nosse.c 2010-01-16 20:48:20 +0000 +++ escrypt-26/crypto_scrypt-nosse.c 2013-03-17 20:24:44 +0000 @@ -1,5 +1,6 @@ /*- * Copyright 2009 Colin Percival + * Copyright 2013 Alexander Peslyak * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,7 +47,7 @@ static void blkxor(void *, void *, size_ static void salsa20_8(uint32_t[16]); static void blockmix_salsa8(uint32_t *, uint32_t *, uint32_t *, size_t); static uint64_t integerify(void *, size_t); -static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *); +static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *, int); static void blkcpy(void * dest, void * src, size_t len) @@ -163,7 +164,7 @@ integerify(void * B, size_t r) } /** - * smix(B, r, N, V, XY): + * smix(B, r, N, V, XY, defeat_tmto): * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; * the temporary storage V must be 128rN bytes in length; the temporary * storage XY must be 256r + 64 bytes in length. The value N must be a @@ -171,7 +172,8 @@ integerify(void * B, size_t r) * multiple of 64 bytes. */ static void -smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY) +smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY, + int defeat_tmto) { uint32_t * X = XY; uint32_t * Y = &XY[32 * r]; @@ -207,6 +209,10 @@ smix(uint8_t * B, size_t r, uint64_t N, /* 8: X <-- H(X \xor V_j) */ blkxor(X, &V[j * (32 * r)], 128 * r); blockmix_salsa8(X, Y, Z, r); + if (defeat_tmto) { + blkxor(Y, &V[(j ^ 1) * (32 * r)], 128 * r); + blkcpy(&V[(j ^ 1) * (32 * r)], Y, 128 * r); + } /* 7: j <-- Integerify(X) mod N */ j = integerify(Y, r) & (N - 1); @@ -214,6 +220,10 @@ smix(uint8_t * B, size_t r, uint64_t N, /* 8: X <-- H(X \xor V_j) */ blkxor(Y, &V[j * (32 * r)], 128 * r); blockmix_salsa8(Y, X, Z, r); + if (defeat_tmto) { + blkxor(X, &V[(j ^ 1) * (32 * r)], 128 * r); + blkcpy(&V[(j ^ 1) * (32 * r)], X, 128 * r); + } } /* 10: B' <-- X */ @@ -222,7 +232,8 @@ smix(uint8_t * B, size_t r, uint64_t N, } /** - * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen, + * defeat_tmto): * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, * p, buflen) and write the result into buf. The parameters r, p, and buflen * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N @@ -231,9 +242,9 @@ smix(uint8_t * B, size_t r, uint64_t N, * Return 0 on success; or -1 on error. */ int -crypto_scrypt(const uint8_t * passwd, size_t passwdlen, +crypto_escrypt(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t * buf, size_t buflen) + uint8_t * buf, size_t buflen, int defeat_tmto) { void * B0, * V0, * XY0; uint8_t * B; @@ -252,7 +263,7 @@ crypto_scrypt(const uint8_t * passwd, si errno = EFBIG; goto err0; } - if (((N & (N - 1)) != 0) || (N == 0)) { + if (((N & (N - 1)) != 0) || (N < 2)) { errno = EINVAL; goto err0; } @@ -309,7 +320,7 @@ crypto_scrypt(const uint8_t * passwd, si /* 2: for i = 0 to p - 1 do */ for (i = 0; i < p; i++) { /* 3: B_i <-- MF(B_i, N) */ - smix(&B[i * 128 * r], r, N, V, XY); + smix(&B[i * 128 * r], r, N, V, XY, defeat_tmto); } /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ diff -urp escrypt-23/crypto_scrypt-ref.c escrypt-26/crypto_scrypt-ref.c --- escrypt-23/crypto_scrypt-ref.c 2010-01-16 20:48:20 +0000 +++ escrypt-26/crypto_scrypt-ref.c 2013-03-17 20:20:55 +0000 @@ -1,5 +1,6 @@ /*- * Copyright 2009 Colin Percival + * Copyright 2013 Alexander Peslyak * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,7 +44,7 @@ static void blkxor(uint8_t *, uint8_t *, static void salsa20_8(uint8_t[64]); static void blockmix_salsa8(uint8_t *, uint8_t *, size_t); static uint64_t integerify(uint8_t *, size_t); -static void smix(uint8_t *, size_t, uint64_t, uint8_t *, uint8_t *); +static void smix(uint8_t *, size_t, uint64_t, uint8_t *, uint8_t *, int); static void blkcpy(uint8_t * dest, uint8_t * src, size_t len) @@ -170,7 +171,8 @@ integerify(uint8_t * B, size_t r) * XY must be 256r bytes in length. The value N must be a power of 2. */ static void -smix(uint8_t * B, size_t r, uint64_t N, uint8_t * V, uint8_t * XY) +smix(uint8_t * B, size_t r, uint64_t N, uint8_t * V, uint8_t * XY, + int defeat_tmto) { uint8_t * X = XY; uint8_t * Y = &XY[128 * r]; @@ -197,6 +199,10 @@ smix(uint8_t * B, size_t r, uint64_t N, /* 8: X <-- H(X \xor V_j) */ blkxor(X, &V[j * (128 * r)], 128 * r); blockmix_salsa8(X, Y, r); + if (defeat_tmto) { + blkxor(X, &V[(j ^ 1) * (128 * r)], 128 * r); + blkcpy(&V[(j ^ 1) * (128 * r)], X, 128 * r); + } } /* 10: B' <-- X */ @@ -204,7 +210,8 @@ smix(uint8_t * B, size_t r, uint64_t N, } /** - * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen, + * defeat_tmto): * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, * p, buflen) and write the result into buf. The parameters r, p, and buflen * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N @@ -213,9 +220,9 @@ smix(uint8_t * B, size_t r, uint64_t N, * Return 0 on success; or -1 on error. */ int -crypto_scrypt(const uint8_t * passwd, size_t passwdlen, +crypto_escrypt(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t * buf, size_t buflen) + uint8_t * buf, size_t buflen, int defeat_tmto) { uint8_t * B; uint8_t * V; @@ -233,7 +240,7 @@ crypto_scrypt(const uint8_t * passwd, si errno = EFBIG; goto err0; } - if (((N & (N - 1)) != 0) || (N == 0)) { + if (((N & (N - 1)) != 0) || (N < 2)) { errno = EINVAL; goto err0; } @@ -260,7 +267,7 @@ crypto_scrypt(const uint8_t * passwd, si /* 2: for i = 0 to p - 1 do */ for (i = 0; i < p; i++) { /* 3: B_i <-- MF(B_i, N) */ - smix(&B[i * 128 * r], r, N, V, XY); + smix(&B[i * 128 * r], r, N, V, XY, defeat_tmto); } /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ diff -urp escrypt-23/crypto_scrypt-sse.c escrypt-26/crypto_scrypt-sse.c --- escrypt-23/crypto_scrypt-sse.c 2013-03-17 00:05:35 +0000 +++ escrypt-26/crypto_scrypt-sse.c 2013-03-17 21:01:56 +0000 @@ -1,6 +1,6 @@ /*- * Copyright 2009 Colin Percival - * Copyright 2012 Alexander Peslyak + * Copyright 2012,2013 Alexander Peslyak * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -85,7 +85,7 @@ /** * Apply the salsa20/8 core to the block provided in (X0 ... X3) ^ in. */ -#define SALSA20_8_XOR(in, out) \ +#define SALSA20_8_XOR(in) \ { \ __m128i Y0 = X0 = _mm_xor_si128(X0, (in)[0]); \ __m128i Y1 = X1 = _mm_xor_si128(X1, (in)[1]); \ @@ -95,12 +95,18 @@ SALSA20_2ROUNDS \ SALSA20_2ROUNDS \ SALSA20_2ROUNDS \ - (out)[0] = X0 = _mm_add_epi32(X0, Y0); \ - (out)[1] = X1 = _mm_add_epi32(X1, Y1); \ - (out)[2] = X2 = _mm_add_epi32(X2, Y2); \ - (out)[3] = X3 = _mm_add_epi32(X3, Y3); \ + X0 = _mm_add_epi32(X0, Y0); \ + X1 = _mm_add_epi32(X1, Y1); \ + X2 = _mm_add_epi32(X2, Y2); \ + X3 = _mm_add_epi32(X3, Y3); \ } +#define OUT(out) \ + (out)[0] = X0; \ + (out)[1] = X1; \ + (out)[2] = X2; \ + (out)[3] = X3; + /** * blockmix_salsa8(Bin, Bout, r): * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r @@ -121,7 +127,8 @@ blockmix_salsa8(__m128i * Bin, __m128i * /* 3: X <-- H(X \xor B_i) */ /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - SALSA20_8_XOR(Bin, Bout) + SALSA20_8_XOR(Bin) + OUT(Bout) /* 2: for i = 0 to 2r - 1 do */ r--; @@ -129,20 +136,23 @@ blockmix_salsa8(__m128i * Bin, __m128i * /* 3: X <-- H(X \xor B_i) */ /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - SALSA20_8_XOR(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4]) + SALSA20_8_XOR(&Bin[i * 8 + 4]) + OUT(&Bout[(r + i) * 4 + 4]) i++; /* 3: X <-- H(X \xor B_i) */ /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - SALSA20_8_XOR(&Bin[i * 8], &Bout[i * 4]) + SALSA20_8_XOR(&Bin[i * 8]) + OUT(&Bout[i * 4]) } /* 3: X <-- H(X \xor B_i) */ /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - SALSA20_8_XOR(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4]) + SALSA20_8_XOR(&Bin[i * 8 + 4]) + OUT(&Bout[(r + i) * 4 + 4]) } #define XOR4(in) \ @@ -151,8 +161,32 @@ blockmix_salsa8(__m128i * Bin, __m128i * X2 = _mm_xor_si128(X2, (in)[2]); \ X3 = _mm_xor_si128(X3, (in)[3]); +#define CONDOUT(out, do_xor, xor) \ + if (do_xor) { \ + (xor)[0] = (out)[0] = _mm_xor_si128(X0, (xor)[0]); \ + (xor)[1] = (out)[1] = _mm_xor_si128(X1, (xor)[1]); \ + (xor)[2] = (out)[2] = _mm_xor_si128(X2, (xor)[2]); \ + (xor)[3] = (out)[3] = _mm_xor_si128(X3, (xor)[3]); \ + } else { \ + OUT(out) \ + } + +#define CONDOUT_RET(out, do_xor, xor) \ + if (do_xor) { \ + uint32_t ret = _mm_cvtsi128_si32( \ + (xor)[0] = (out)[0] = _mm_xor_si128(X0, (xor)[0])); \ + (xor)[1] = (out)[1] = _mm_xor_si128(X1, (xor)[1]); \ + (xor)[2] = (out)[2] = _mm_xor_si128(X2, (xor)[2]); \ + (xor)[3] = (out)[3] = _mm_xor_si128(X3, (xor)[3]); \ + return ret; \ + } else { \ + OUT(out) \ + return _mm_cvtsi128_si32(X0); \ + } + static inline uint32_t -blockmix_salsa8_xor(__m128i * Bin1, __m128i * Bin2, __m128i * Bout, size_t r) +blockmix_salsa8_xor(__m128i * Bin1, __m128i * Bin2, __m128i * Bout, + __m128i * Bxor, size_t r) { __m128i X0, X1, X2, X3; size_t i; @@ -167,7 +201,8 @@ blockmix_salsa8_xor(__m128i * Bin1, __m1 /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ XOR4(Bin1) - SALSA20_8_XOR(Bin2, Bout) + SALSA20_8_XOR(Bin2) + CONDOUT(Bout, Bxor, Bxor) /* 2: for i = 0 to 2r - 1 do */ r--; @@ -176,7 +211,8 @@ blockmix_salsa8_xor(__m128i * Bin1, __m1 /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ XOR4(&Bin1[i * 8 + 4]) - SALSA20_8_XOR(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4]) + SALSA20_8_XOR(&Bin2[i * 8 + 4]) + CONDOUT(&Bout[(r + i) * 4 + 4], Bxor, &Bxor[(r + i) * 4 + 4]) i++; @@ -184,22 +220,25 @@ blockmix_salsa8_xor(__m128i * Bin1, __m1 /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ XOR4(&Bin1[i * 8]) - SALSA20_8_XOR(&Bin2[i * 8], &Bout[i * 4]) + SALSA20_8_XOR(&Bin2[i * 8]) + CONDOUT(&Bout[i * 4], Bxor, &Bxor[i * 4]) } /* 3: X <-- H(X \xor B_i) */ /* 4: Y_i <-- X */ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ XOR4(&Bin1[i * 8 + 4]) - SALSA20_8_XOR(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4]) - - return _mm_cvtsi128_si32(X0); + SALSA20_8_XOR(&Bin2[i * 8 + 4]) + CONDOUT_RET(&Bout[(r + i) * 4 + 4], Bxor, &Bxor[(r + i) * 4 + 4]) } #undef ARX #undef SALSA20_2ROUNDS #undef SALSA20_8_XOR +#undef OUT #undef XOR4 +#undef CONDOUT +#undef CONDOUT_RET /** * integerify(B, r): @@ -212,7 +251,7 @@ integerify(void * B, size_t r) } /** - * smix(B, r, N, V, XY): + * smix(B, r, N, V, XY, defeat_tmto): * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; * the temporary storage V must be 128rN bytes in length; the temporary * storage XY must be 256r + 64 bytes in length. The value N must be a @@ -220,7 +259,7 @@ integerify(void * B, size_t r) * multiple of 64 bytes. */ static void -smix(uint8_t * B, size_t r, uint32_t N, void * V, void * XY) +smix(uint8_t * B, size_t r, uint32_t N, void * V, void * XY, int defeat_tmto) { __m128i * X = V, * Y, * V_j; uint32_t * X32 = V; @@ -264,19 +303,27 @@ smix(uint8_t * B, size_t r, uint32_t N, /* 7: j <-- Integerify(X) mod N */ j = integerify(X, r) & (N - 1); - V_j = (void *)((uintptr_t)(V) + j * 128 * r); /* 6: for i = 0 to N - 1 do */ for (i = 0; i < N; i += 2) { + __m128i * V_nj = NULL; + + V_j = (void *)((uintptr_t)(V) + j * 128 * r); + + if (defeat_tmto) + V_nj = (void *)((uintptr_t)(V) + (j ^ 1) * 128 * r); + /* 8: X <-- H(X \xor V_j) */ /* 7: j <-- Integerify(X) mod N */ - j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1); + j = blockmix_salsa8_xor(X, V_j, Y, V_nj, r) & (N - 1); V_j = (void *)((uintptr_t)(V) + j * 128 * r); + if (defeat_tmto) + V_nj = (void *)((uintptr_t)(V) + (j ^ 1) * 128 * r); + /* 8: X <-- H(X \xor V_j) */ /* 7: j <-- Integerify(X) mod N */ - j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1); - V_j = (void *)((uintptr_t)(V) + j * 128 * r); + j = blockmix_salsa8_xor(Y, V_j, X, V_nj, r) & (N - 1); } /* 10: B' <-- X */ @@ -289,7 +336,8 @@ smix(uint8_t * B, size_t r, uint32_t N, } /** - * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen, + * defeat_tmto): * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, * p, buflen) and write the result into buf. The parameters r, p, and buflen * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N @@ -298,9 +346,9 @@ smix(uint8_t * B, size_t r, uint32_t N, * Return 0 on success; or -1 on error. */ int -crypto_scrypt(const uint8_t * passwd, size_t passwdlen, +crypto_escrypt(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t * buf, size_t buflen) + uint8_t * buf, size_t buflen, int defeat_tmto) { void * B0, * V0, * XY0; uint8_t * B; @@ -323,7 +371,7 @@ crypto_scrypt(const uint8_t * passwd, si errno = EFBIG; goto err0; } - if (((N & (N - 1)) != 0) || (N == 0)) { + if (((N & (N - 1)) != 0) || (N < 2)) { errno = EINVAL; goto err0; } @@ -380,7 +428,7 @@ crypto_scrypt(const uint8_t * passwd, si /* 2: for i = 0 to p - 1 do */ for (i = 0; i < p; i++) { /* 3: B_i <-- MF(B_i, N) */ - smix(&B[i * 128 * r], r, N, V, XY); + smix(&B[i * 128 * r], r, N, V, XY, defeat_tmto); } /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ diff -urp escrypt-23/crypto_scrypt.h escrypt-26/crypto_scrypt.h --- escrypt-23/crypto_scrypt.h 2010-01-16 20:48:20 +0000 +++ escrypt-26/crypto_scrypt.h 2013-03-17 01:29:31 +0000 @@ -32,7 +32,8 @@ #include /** - * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen, + * defeat_tmto): * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, * p, buflen) and write the result into buf. The parameters r, p, and buflen * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N @@ -40,7 +41,7 @@ * * Return 0 on success; or -1 on error. */ -int crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t, uint64_t, - uint32_t, uint32_t, uint8_t *, size_t); +int crypto_escrypt(const uint8_t *, size_t, const uint8_t *, size_t, uint64_t, + uint32_t, uint32_t, uint8_t *, size_t, int); #endif /* !_CRYPTO_SCRYPT_H_ */ diff -urp escrypt-23/tests.c escrypt-26/tests.c --- escrypt-23/tests.c 2012-11-15 08:57:58 +0000 +++ escrypt-26/tests.c 2013-03-17 21:02:38 +0000 @@ -1,6 +1,8 @@ #include #include +#define defeat_tmto 0 + #undef TEST_PBKDF2_SHA256 #define TEST_SCRYPT @@ -52,8 +54,9 @@ print_scrypt(const char * passwd, const printf("scrypt(\"%s\", \"%s\", %llu, %u, %u) =", passwd, salt, (unsigned long long)N, r, p); - if (crypto_scrypt((const uint8_t *) passwd, strlen(passwd), - (const uint8_t *) salt, strlen(salt), N, r, p, dk, sizeof(dk))) { + if (crypto_escrypt((const uint8_t *) passwd, strlen(passwd), + (const uint8_t *) salt, strlen(salt), N, r, p, dk, sizeof(dk), + defeat_tmto)) { puts(" FAILED"); return; }