Follow us on Twitter or via RSS feeds with tweets or complete announcement texts or excerpts
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date: Mon, 18 Mar 2013 10:40:16 +0400
From: Solar Designer <solar@...nwall.com>
To: crypt-dev@...ts.openwall.com
Subject: Re: scrypt TMTO defeater

On Sun, Mar 17, 2013 at 08:03:21AM +0400, Solar Designer wrote:
> Unfortunately, it turns out that BlockMix's shuffling(*) gets in the way
> of efficient implementations, and _possibly_ the inefficiency mostly hurts
> cleaner CPU code, as opposed to hacks and ASICs.  Specifically, when the
> V element we write into is or might be the same as V_j, in a clean
> implementation we have to postpone those writes until the BlockMix
> completes, and this means re-reading the same values from memory
> (hopefully, from L1 cache) rather than simply storing register values
> into two locations at once (such as X and V_j).

The above applies when trying to store (or XOR) BlockMix output into a V
element, but that in itself is problematic: if V_j on the very next
iteration of the SMix loop is the same as the element we just wrote
into, the XORs may cancel each other, in some cases resulting in really
nasty consequences.  The previous TMTO defeater patches I posted in the
last 2 days suffer from this problem.  Oops.  I hope this is taken for
granted, but to be on the safe side let me say that none of these
early/experimental TMTO defeater patches are meant for actual use.

Now, I could fix this by adding even more complexity, such as to avoid
repeating j's (detect repeats and XOR with 1 if so, which may be done in
a branch-less fashion).  However, I chose to rethink the whole approach
instead, and realized that Anthony's original example did not have this
problem due to storage of the block value from just prior to a BlockMix.

A reason why I did not try implementing Anthony's original example right
away was that it was incompatible with an optimization I had made in
crypto_scrypt-sse.c, where the XORs were re-ordered.  Now I went for the
extra implementation complexity in this version of the code,
implementing a separate BlockMix function with the original order of
XORs so that the right value could be written back into V_j.  This
appears to work well, and is fast.  -ref and -nosse implement Anthony's
original example without such extra complexity.

New revision is attached.  Comments are welcome.

New MD5 of output of "tests" with defeat_tmto = 1:
2843045848a204727c8dd7677bd6b8e3

Because of all the partial bypasses of this kind of TMTO defeater (that
I've documented in separate postings), I am likely to also proceed to
implement a defeater by means of changes to the first SMix loop.  Then
the question will be whether to keep the defeater in the second loop as
well or not.

Alexander

diff -urp escrypt-23/crypto_scrypt-nosse.c escrypt-30/crypto_scrypt-nosse.c
--- escrypt-23/crypto_scrypt-nosse.c	2010-01-16 20:48:20 +0000
+++ escrypt-30/crypto_scrypt-nosse.c	2013-03-18 05:03:10 +0000
@@ -1,5 +1,6 @@
 /*-
  * Copyright 2009 Colin Percival
+ * Copyright 2013 Alexander Peslyak
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -46,7 +47,7 @@ static void blkxor(void *, void *, size_
 static void salsa20_8(uint32_t[16]);
 static void blockmix_salsa8(uint32_t *, uint32_t *, uint32_t *, size_t);
 static uint64_t integerify(void *, size_t);
-static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *);
+static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *, int);
 
 static void
 blkcpy(void * dest, void * src, size_t len)
@@ -163,7 +164,7 @@ integerify(void * B, size_t r)
 }
 
 /**
- * smix(B, r, N, V, XY):
+ * smix(B, r, N, V, XY, defeat_tmto):
  * Compute B = SMix_r(B, N).  The input B must be 128r bytes in length;
  * the temporary storage V must be 128rN bytes in length; the temporary
  * storage XY must be 256r + 64 bytes in length.  The value N must be a
@@ -171,7 +172,8 @@ integerify(void * B, size_t r)
  * multiple of 64 bytes.
  */
 static void
-smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY)
+smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY,
+    int defeat_tmto)
 {
 	uint32_t * X = XY;
 	uint32_t * Y = &XY[32 * r];
@@ -206,6 +208,8 @@ smix(uint8_t * B, size_t r, uint64_t N,
 
 		/* 8: X <-- H(X \xor V_j) */
 		blkxor(X, &V[j * (32 * r)], 128 * r);
+		if (defeat_tmto)
+			blkcpy(&V[j * (32 * r)], X, 128 * r);
 		blockmix_salsa8(X, Y, Z, r);
 
 		/* 7: j <-- Integerify(X) mod N */
@@ -213,6 +217,8 @@ smix(uint8_t * B, size_t r, uint64_t N,
 
 		/* 8: X <-- H(X \xor V_j) */
 		blkxor(Y, &V[j * (32 * r)], 128 * r);
+		if (defeat_tmto)
+			blkcpy(&V[j * (32 * r)], Y, 128 * r);
 		blockmix_salsa8(Y, X, Z, r);
 	}
 
@@ -222,7 +228,8 @@ smix(uint8_t * B, size_t r, uint64_t N,
 }
 
 /**
- * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
+ * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen,
+ *     defeat_tmto):
  * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
  * p, buflen) and write the result into buf.  The parameters r, p, and buflen
  * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
@@ -231,9 +238,9 @@ smix(uint8_t * B, size_t r, uint64_t N,
  * Return 0 on success; or -1 on error.
  */
 int
-crypto_scrypt(const uint8_t * passwd, size_t passwdlen,
+crypto_escrypt(const uint8_t * passwd, size_t passwdlen,
     const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int defeat_tmto)
 {
 	void * B0, * V0, * XY0;
 	uint8_t * B;
@@ -309,7 +316,7 @@ crypto_scrypt(const uint8_t * passwd, si
 	/* 2: for i = 0 to p - 1 do */
 	for (i = 0; i < p; i++) {
 		/* 3: B_i <-- MF(B_i, N) */
-		smix(&B[i * 128 * r], r, N, V, XY);
+		smix(&B[i * 128 * r], r, N, V, XY, defeat_tmto);
 	}
 
 	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
diff -urp escrypt-23/crypto_scrypt-ref.c escrypt-30/crypto_scrypt-ref.c
--- escrypt-23/crypto_scrypt-ref.c	2010-01-16 20:48:20 +0000
+++ escrypt-30/crypto_scrypt-ref.c	2013-03-18 05:03:01 +0000
@@ -1,5 +1,6 @@
 /*-
  * Copyright 2009 Colin Percival
+ * Copyright 2013 Alexander Peslyak
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,7 +44,7 @@ static void blkxor(uint8_t *, uint8_t *,
 static void salsa20_8(uint8_t[64]);
 static void blockmix_salsa8(uint8_t *, uint8_t *, size_t);
 static uint64_t integerify(uint8_t *, size_t);
-static void smix(uint8_t *, size_t, uint64_t, uint8_t *, uint8_t *);
+static void smix(uint8_t *, size_t, uint64_t, uint8_t *, uint8_t *, int);
 
 static void
 blkcpy(uint8_t * dest, uint8_t * src, size_t len)
@@ -170,7 +171,8 @@ integerify(uint8_t * B, size_t r)
  * XY must be 256r bytes in length.  The value N must be a power of 2.
  */
 static void
-smix(uint8_t * B, size_t r, uint64_t N, uint8_t * V, uint8_t * XY)
+smix(uint8_t * B, size_t r, uint64_t N, uint8_t * V, uint8_t * XY,
+    int defeat_tmto)
 {
 	uint8_t * X = XY;
 	uint8_t * Y = &XY[128 * r];
@@ -196,6 +198,8 @@ smix(uint8_t * B, size_t r, uint64_t N,
 
 		/* 8: X <-- H(X \xor V_j) */
 		blkxor(X, &V[j * (128 * r)], 128 * r);
+		if (defeat_tmto)
+			blkcpy(&V[j * (128 * r)], X, 128 * r);
 		blockmix_salsa8(X, Y, r);
 	}
 
@@ -204,7 +208,8 @@ smix(uint8_t * B, size_t r, uint64_t N,
 }
 
 /**
- * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
+ * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen,
+ *     defeat_tmto):
  * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
  * p, buflen) and write the result into buf.  The parameters r, p, and buflen
  * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
@@ -213,9 +218,9 @@ smix(uint8_t * B, size_t r, uint64_t N,
  * Return 0 on success; or -1 on error.
  */
 int
-crypto_scrypt(const uint8_t * passwd, size_t passwdlen,
+crypto_escrypt(const uint8_t * passwd, size_t passwdlen,
     const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int defeat_tmto)
 {
 	uint8_t * B;
 	uint8_t * V;
@@ -260,7 +265,7 @@ crypto_scrypt(const uint8_t * passwd, si
 	/* 2: for i = 0 to p - 1 do */
 	for (i = 0; i < p; i++) {
 		/* 3: B_i <-- MF(B_i, N) */
-		smix(&B[i * 128 * r], r, N, V, XY);
+		smix(&B[i * 128 * r], r, N, V, XY, defeat_tmto);
 	}
 
 	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
diff -urp escrypt-23/crypto_scrypt-sse.c escrypt-30/crypto_scrypt-sse.c
--- escrypt-23/crypto_scrypt-sse.c	2013-03-17 00:05:35 +0000
+++ escrypt-30/crypto_scrypt-sse.c	2013-03-18 05:58:25 +0000
@@ -1,6 +1,6 @@
 /*-
  * Copyright 2009 Colin Percival
- * Copyright 2012 Alexander Peslyak
+ * Copyright 2012,2013 Alexander Peslyak
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -83,14 +83,14 @@
 	X3 = _mm_shuffle_epi32(X3, 0x93);
 
 /**
- * Apply the salsa20/8 core to the block provided in (X0 ... X3) ^ in.
+ * Apply the salsa20/8 core to the block provided in (X0 ... X3) ^ (Z0 ... Z3).
  */
-#define SALSA20_8_XOR(in, out) \
+#define SALSA20_8_XOR_ANY(maybe_decl, Z0, Z1, Z2, Z3, out) \
 	{ \
-		__m128i Y0 = X0 = _mm_xor_si128(X0, (in)[0]); \
-		__m128i Y1 = X1 = _mm_xor_si128(X1, (in)[1]); \
-		__m128i Y2 = X2 = _mm_xor_si128(X2, (in)[2]); \
-		__m128i Y3 = X3 = _mm_xor_si128(X3, (in)[3]); \
+		maybe_decl Y0 = X0 = _mm_xor_si128(X0, Z0); \
+		maybe_decl Y1 = X1 = _mm_xor_si128(X1, Z1); \
+		maybe_decl Y2 = X2 = _mm_xor_si128(X2, Z2); \
+		maybe_decl Y3 = X3 = _mm_xor_si128(X3, Z3); \
 		SALSA20_2ROUNDS \
 		SALSA20_2ROUNDS \
 		SALSA20_2ROUNDS \
@@ -101,6 +101,12 @@
 		(out)[3] = X3 = _mm_add_epi32(X3, Y3); \
 	}
 
+#define SALSA20_8_XOR_MEM(in, out) \
+	SALSA20_8_XOR_ANY(__m128i, (in)[0], (in)[1], (in)[2], (in)[3], out)
+
+#define SALSA20_8_XOR_REG(out) \
+	SALSA20_8_XOR_ANY(/* empty */, Y0, Y1, Y2, Y3, out)
+
 /**
  * blockmix_salsa8(Bin, Bout, r):
  * Compute Bout = BlockMix_{salsa20/8, r}(Bin).  The input Bin must be 128r
@@ -121,7 +127,7 @@ blockmix_salsa8(__m128i * Bin, __m128i *
 	/* 3: X <-- H(X \xor B_i) */
 	/* 4: Y_i <-- X */
 	/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-	SALSA20_8_XOR(Bin, Bout)
+	SALSA20_8_XOR_MEM(Bin, Bout)
 
 	/* 2: for i = 0 to 2r - 1 do */
 	r--;
@@ -129,20 +135,20 @@ blockmix_salsa8(__m128i * Bin, __m128i *
 		/* 3: X <-- H(X \xor B_i) */
 		/* 4: Y_i <-- X */
 		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-		SALSA20_8_XOR(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4])
+		SALSA20_8_XOR_MEM(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4])
 
 		i++;
 
 		/* 3: X <-- H(X \xor B_i) */
 		/* 4: Y_i <-- X */
 		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-		SALSA20_8_XOR(&Bin[i * 8], &Bout[i * 4])
+		SALSA20_8_XOR_MEM(&Bin[i * 8], &Bout[i * 4])
 	}
 
 	/* 3: X <-- H(X \xor B_i) */
 	/* 4: Y_i <-- X */
 	/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
-	SALSA20_8_XOR(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4])
+	SALSA20_8_XOR_MEM(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4])
 }
 
 #define XOR4(in) \
@@ -151,6 +157,12 @@ blockmix_salsa8(__m128i * Bin, __m128i *
 	X2 = _mm_xor_si128(X2, (in)[2]); \
 	X3 = _mm_xor_si128(X3, (in)[3]);
 
+#define XOR4_2(in1, in2) \
+	X0 = _mm_xor_si128((in1)[0], (in2)[0]); \
+	X1 = _mm_xor_si128((in1)[1], (in2)[1]); \
+	X2 = _mm_xor_si128((in1)[2], (in2)[2]); \
+	X3 = _mm_xor_si128((in1)[3], (in2)[3]);
+
 static inline uint32_t
 blockmix_salsa8_xor(__m128i * Bin1, __m128i * Bin2, __m128i * Bout, size_t r)
 {
@@ -158,16 +170,13 @@ blockmix_salsa8_xor(__m128i * Bin1, __m1
 	size_t i;
 
 	/* 1: X <-- B_{2r - 1} */
-	X0 = _mm_xor_si128(Bin1[8 * r - 4], Bin2[8 * r - 4]);
-	X1 = _mm_xor_si128(Bin1[8 * r - 3], Bin2[8 * r - 3]);
-	X2 = _mm_xor_si128(Bin1[8 * r - 2], Bin2[8 * r - 2]);
-	X3 = _mm_xor_si128(Bin1[8 * r - 1], Bin2[8 * r - 1]);
+	XOR4_2(&Bin1[8 * r - 4], &Bin2[8 * r - 4])
 
 	/* 3: X <-- H(X \xor B_i) */
 	/* 4: Y_i <-- X */
 	/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
 	XOR4(Bin1)
-	SALSA20_8_XOR(Bin2, Bout)
+	SALSA20_8_XOR_MEM(Bin2, Bout)
 
 	/* 2: for i = 0 to 2r - 1 do */
 	r--;
@@ -176,7 +185,7 @@ blockmix_salsa8_xor(__m128i * Bin1, __m1
 		/* 4: Y_i <-- X */
 		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
 		XOR4(&Bin1[i * 8 + 4])
-		SALSA20_8_XOR(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4])
+		SALSA20_8_XOR_MEM(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4])
 
 		i++;
 
@@ -184,22 +193,75 @@ blockmix_salsa8_xor(__m128i * Bin1, __m1
 		/* 4: Y_i <-- X */
 		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
 		XOR4(&Bin1[i * 8])
-		SALSA20_8_XOR(&Bin2[i * 8], &Bout[i * 4])
+		SALSA20_8_XOR_MEM(&Bin2[i * 8], &Bout[i * 4])
 	}
 
 	/* 3: X <-- H(X \xor B_i) */
 	/* 4: Y_i <-- X */
 	/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
 	XOR4(&Bin1[i * 8 + 4])
-	SALSA20_8_XOR(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4])
+	SALSA20_8_XOR_MEM(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4])
+
+	return _mm_cvtsi128_si32(X0);
+}
+
+#undef XOR4
+#define XOR4(in, out) \
+	(out)[0] = Y0 = _mm_xor_si128((in)[0], (out)[0]); \
+	(out)[1] = Y1 = _mm_xor_si128((in)[1], (out)[1]); \
+	(out)[2] = Y2 = _mm_xor_si128((in)[2], (out)[2]); \
+	(out)[3] = Y3 = _mm_xor_si128((in)[3], (out)[3]);
+
+static inline uint32_t
+blockmix_salsa8_xor_save(__m128i * Bin1, __m128i * Bin2, __m128i * Bout,
+    size_t r)
+{
+	__m128i X0, X1, X2, X3, Y0, Y1, Y2, Y3;
+	size_t i;
+
+	/* 1: X <-- B_{2r - 1} */
+	XOR4_2(&Bin1[8 * r - 4], &Bin2[8 * r - 4])
+
+	/* 3: X <-- H(X \xor B_i) */
+	/* 4: Y_i <-- X */
+	/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+	XOR4(Bin1, Bin2)
+	SALSA20_8_XOR_REG(Bout)
+
+	/* 2: for i = 0 to 2r - 1 do */
+	r--;
+	for (i = 0; i < r;) {
+		/* 3: X <-- H(X \xor B_i) */
+		/* 4: Y_i <-- X */
+		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+		XOR4(&Bin1[i * 8 + 4], &Bin2[i * 8 + 4])
+		SALSA20_8_XOR_REG(&Bout[(r + i) * 4 + 4])
+
+		i++;
+
+		/* 3: X <-- H(X \xor B_i) */
+		/* 4: Y_i <-- X */
+		/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+		XOR4(&Bin1[i * 8], &Bin2[i * 8])
+		SALSA20_8_XOR_REG(&Bout[i * 4])
+	}
+
+	/* 3: X <-- H(X \xor B_i) */
+	/* 4: Y_i <-- X */
+	/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+	XOR4(&Bin1[i * 8 + 4], &Bin2[i * 8 + 4])
+	SALSA20_8_XOR_REG(&Bout[(r + i) * 4 + 4])
 
 	return _mm_cvtsi128_si32(X0);
 }
 
 #undef ARX
 #undef SALSA20_2ROUNDS
-#undef SALSA20_8_XOR
+#undef SALSA20_8_XOR_ANY
+#undef SALSA20_8_XOR_MEM
+#undef SALSA20_8_XOR_REG
 #undef XOR4
+#undef XOR4_2
 
 /**
  * integerify(B, r):
@@ -212,7 +274,7 @@ integerify(void * B, size_t r)
 }
 
 /**
- * smix(B, r, N, V, XY):
+ * smix(B, r, N, V, XY, defeat_tmto):
  * Compute B = SMix_r(B, N).  The input B must be 128r bytes in length;
  * the temporary storage V must be 128rN bytes in length; the temporary
  * storage XY must be 256r + 64 bytes in length.  The value N must be a
@@ -220,9 +282,9 @@ integerify(void * B, size_t r)
  * multiple of 64 bytes.
  */
 static void
-smix(uint8_t * B, size_t r, uint32_t N, void * V, void * XY)
+smix(uint8_t * B, size_t r, uint32_t N, void * V, void * XY, int defeat_tmto)
 {
-	__m128i * X = V, * Y, * V_j;
+	__m128i * X = V, * Y;
 	uint32_t * X32 = V;
 	uint32_t i, j;
 	size_t k;
@@ -264,19 +326,35 @@ smix(uint8_t * B, size_t r, uint32_t N,
 
 	/* 7: j <-- Integerify(X) mod N */
 	j = integerify(X, r) & (N - 1);
-	V_j = (void *)((uintptr_t)(V) + j * 128 * r);
 
-	/* 6: for i = 0 to N - 1 do */
-	for (i = 0; i < N; i += 2) {
-		/* 8: X <-- H(X \xor V_j) */
-		/* 7: j <-- Integerify(X) mod N */
-		j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1);
-		V_j = (void *)((uintptr_t)(V) + j * 128 * r);
-
-		/* 8: X <-- H(X \xor V_j) */
-		/* 7: j <-- Integerify(X) mod N */
-		j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1);
-		V_j = (void *)((uintptr_t)(V) + j * 128 * r);
+	if (defeat_tmto) {
+		/* 6: for i = 0 to N - 1 do */
+		for (i = 0; i < N; i += 2) {
+			__m128i * V_j = (void *)((uintptr_t)(V) + j * 128 * r);
+
+			/* 8: X <-- H(X \xor V_j) */
+			/* 7: j <-- Integerify(X) mod N */
+			j = blockmix_salsa8_xor_save(X, V_j, Y, r) & (N - 1);
+			V_j = (void *)((uintptr_t)(V) + j * 128 * r);
+
+			/* 8: X <-- H(X \xor V_j) */
+			/* 7: j <-- Integerify(X) mod N */
+			j = blockmix_salsa8_xor_save(Y, V_j, X, r) & (N - 1);
+		}
+	} else {
+		/* 6: for i = 0 to N - 1 do */
+		for (i = 0; i < N; i += 2) {
+			__m128i * V_j = (void *)((uintptr_t)(V) + j * 128 * r);
+
+			/* 8: X <-- H(X \xor V_j) */
+			/* 7: j <-- Integerify(X) mod N */
+			j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1);
+			V_j = (void *)((uintptr_t)(V) + j * 128 * r);
+
+			/* 8: X <-- H(X \xor V_j) */
+			/* 7: j <-- Integerify(X) mod N */
+			j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1);
+		}
 	}
 
 	/* 10: B' <-- X */
@@ -289,7 +367,8 @@ smix(uint8_t * B, size_t r, uint32_t N,
 }
 
 /**
- * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
+ * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen,
+ *     defeat_tmto):
  * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
  * p, buflen) and write the result into buf.  The parameters r, p, and buflen
  * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
@@ -298,9 +377,9 @@ smix(uint8_t * B, size_t r, uint32_t N,
  * Return 0 on success; or -1 on error.
  */
 int
-crypto_scrypt(const uint8_t * passwd, size_t passwdlen,
+crypto_escrypt(const uint8_t * passwd, size_t passwdlen,
     const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
-    uint8_t * buf, size_t buflen)
+    uint8_t * buf, size_t buflen, int defeat_tmto)
 {
 	void * B0, * V0, * XY0;
 	uint8_t * B;
@@ -380,7 +459,7 @@ crypto_scrypt(const uint8_t * passwd, si
 	/* 2: for i = 0 to p - 1 do */
 	for (i = 0; i < p; i++) {
 		/* 3: B_i <-- MF(B_i, N) */
-		smix(&B[i * 128 * r], r, N, V, XY);
+		smix(&B[i * 128 * r], r, N, V, XY, defeat_tmto);
 	}
 
 	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
diff -urp escrypt-23/crypto_scrypt.h escrypt-30/crypto_scrypt.h
--- escrypt-23/crypto_scrypt.h	2010-01-16 20:48:20 +0000
+++ escrypt-30/crypto_scrypt.h	2013-03-17 01:29:31 +0000
@@ -32,7 +32,8 @@
 #include <stdint.h>
 
 /**
- * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
+ * crypto_escrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen,
+ *     defeat_tmto):
  * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
  * p, buflen) and write the result into buf.  The parameters r, p, and buflen
  * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
@@ -40,7 +41,7 @@
  *
  * Return 0 on success; or -1 on error.
  */
-int crypto_scrypt(const uint8_t *, size_t, const uint8_t *, size_t, uint64_t,
-    uint32_t, uint32_t, uint8_t *, size_t);
+int crypto_escrypt(const uint8_t *, size_t, const uint8_t *, size_t, uint64_t,
+    uint32_t, uint32_t, uint8_t *, size_t, int);
 
 #endif /* !_CRYPTO_SCRYPT_H_ */
diff -urp escrypt-23/tests.c escrypt-30/tests.c
--- escrypt-23/tests.c	2012-11-15 08:57:58 +0000
+++ escrypt-30/tests.c	2013-03-18 06:07:48 +0000
@@ -1,6 +1,8 @@
 #include <stdio.h>
 #include <string.h>
 
+#define defeat_tmto 0
+
 #undef TEST_PBKDF2_SHA256
 #define TEST_SCRYPT
 
@@ -52,8 +54,9 @@ print_scrypt(const char * passwd, const
 	printf("scrypt(\"%s\", \"%s\", %llu, %u, %u) =",
 	    passwd, salt, (unsigned long long)N, r, p);
 
-	if (crypto_scrypt((const uint8_t *) passwd, strlen(passwd),
-	    (const uint8_t *) salt, strlen(salt), N, r, p, dk, sizeof(dk))) {
+	if (crypto_escrypt((const uint8_t *) passwd, strlen(passwd),
+	    (const uint8_t *) salt, strlen(salt), N, r, p, dk, sizeof(dk),
+	    defeat_tmto)) {
 		puts(" FAILED");
 		return;
 	}

[ CONTENT OF TYPE application/x-gzip SKIPPED ]

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux - Powered by OpenVZ