Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Sun, 27 May 2012 03:11:48 -0700 (PDT)
From: deepika dutta <deepikadutta_19@...oo.com>
To: "john-dev@...ts.openwall.com" <john-dev@...ts.openwall.com>
Subject: mschapv2 conversion

Hi,
I don't know why my earlier mail to john-dev is not visible on the mailing list, so I am resending the email along with updated patch.

>I have completed the bitsliced conversion for mschap. I tested using the samples provided in mschapv2_fmt_plug.c and everything is working fine. I have attached >the patch, I hope so I created the patch correctly. I have also attached appropriate source files.

 
Cheers,
Deepika
[ CONTENT OF TYPE text/html SKIPPED ]

/*
 * This file is part of John the Ripper password cracker,
 * Copyright (c) 1996-2001,2005,2010,2011 by Solar Designer
 */

/*
 * Bitslice DES implementation.
 */

#ifndef _JOHN_DES_BS_H
#define _JOHN_DES_BS_H

#include "arch.h"

#ifndef DES_BS_ALGORITHM_NAME
#define DES_BS_ALGORITHM_NAME		ARCH_BITS_STR "/" ARCH_BITS_STR " BS"
#endif

#if DES_BS_VECTOR
#define DES_BS_DEPTH			(ARCH_BITS * DES_BS_VECTOR)
#else
#define DES_BS_DEPTH			ARCH_BITS
#endif

#if DES_BS_VECTOR
#ifndef DES_BS_VECTOR_SIZE
#define DES_BS_VECTOR_SIZE		DES_BS_VECTOR
#endif
typedef ARCH_WORD DES_bs_vector[DES_BS_VECTOR_SIZE];
#else
#define DES_bs_vector			ARCH_WORD
#endif

/*
 * All bitslice DES parameters combined into one struct for more efficient
 * cache usage. Don't re-order unless you know what you're doing, as there
 * is an optimization that would produce undefined results if you did.
 *
 * This must match the definition in x86-mmx.S.
 */
typedef struct {
#if DES_BS_EXPAND
	ARCH_WORD *KSp[0x300];	/* Initial key schedule (key bit pointers) */
#endif
	union {
		ARCH_WORD *p[0x300];	/* Key bit pointers */
#if DES_BS_EXPAND
		DES_bs_vector v[0x300];	/* Key bit values */
#endif
	} KS;			/* Current key schedule */
	union {
		ARCH_WORD *E[96];	/* Expansion function (data bit ptrs) */
		unsigned char u[0x100];	/* Uppercase (for LM) */
	} E;
	DES_bs_vector K[56];	/* Keys */
	DES_bs_vector B[64];	/* Data blocks */
	#if DES_BS_ASM
	DES_bs_vector tmp[16];	/* Miscellaneous temporary storage */
#else
	DES_bs_vector zero;	/* All 0 bits */
	DES_bs_vector ones;	/* All 1 bits */
	DES_bs_vector masks[8];	/* Each byte set to 0x01 ... 0x80 */
#endif
	union {
		unsigned char c[8][8][sizeof(DES_bs_vector)];
		DES_bs_vector v[8][8];
	} xkeys;		/* Partially transposed key bits matrix */
	unsigned char *pxkeys[DES_BS_DEPTH]; /* Pointers into xkeys.c */
	int keys_changed;	/* If keys have changed */
	unsigned int salt;	/* Salt value corresponding to E[] contents */
	DES_bs_vector *Ens[48];	/* Pointers into B[] for non-salted E */
} DES_bs_combined;

//store plaintext//
extern DES_bs_vector Plaintext[64];


#if defined(_OPENMP) && !DES_BS_ASM
#define DES_bs_mt			1
#define DES_bs_cpt			32
#define DES_bs_mt_max			(DES_bs_cpt * 24)
extern int DES_bs_min_kpc, DES_bs_max_kpc;
extern int DES_bs_nt;
extern DES_bs_combined *DES_bs_all_p;
#define DES_bs_all_align		64
#define DES_bs_all_size \
	((sizeof(DES_bs_combined) + (DES_bs_all_align - 1)) & \
	    ~(DES_bs_all_align - 1))
#define DES_bs_all_by_tnum(tnum) \
	(*(DES_bs_combined *)((char *)DES_bs_all_p + (tnum) * DES_bs_all_size))
#ifdef __GNUC__
#define DES_bs_all \
	(*(DES_bs_combined *)((char *)DES_bs_all_p + t))
#define for_each_t(n) \
	for (t = 0; t < (n) * DES_bs_all_size; t += DES_bs_all_size)
#define init_t() \
	int t = (unsigned int)index / DES_BS_DEPTH * DES_bs_all_size; \
	index = (unsigned int)index % DES_BS_DEPTH;
#else
/*
 * For compilers that complain about the above e.g. with "iteration expression
 * of omp for loop does not have a canonical shape".
 */
#define DES_bs_all \
	DES_bs_all_by_tnum(t)
#define for_each_t(n) \
	for (t = 0; t < (n); t++)
#define init_t() \
	int t = (unsigned int)index / DES_BS_DEPTH; \
	index = (unsigned int)index % DES_BS_DEPTH;
#endif
#else
#define DES_bs_mt			0
#define DES_bs_cpt			1
extern DES_bs_combined DES_bs_all;
#define for_each_t(n)
#define init_t()
#endif

/*
 * Initializes the internal structures.
 */
extern void DES_bs_init(int LM, int cpt);

/*
 * Sets a salt for DES_bs_crypt().
 */
extern void DES_bs_set_salt(ARCH_WORD salt);
#if DES_bs_mt
extern void DES_bs_set_salt_for_thread(int t, unsigned int salt);
#endif

/*
 * Set a key for DES_bs_crypt() or DES_bs_crypt_LM(), respectively.
 */
extern void DES_bs_set_key(char *key, int index);
extern void DES_bs_set_key_LM(char *key, int index);

/*
 * Almost generic implementation: 24-bit salts, variable iteration count.
 */
extern void DES_bs_crypt(int count, int keys_count);

/*
 * A simplified special-case implementation: 12-bit salts, 25 iterations.
 */
extern void DES_bs_crypt_25(int keys_count);

/*
 * Another special-case version: a non-zero IV, no salts, no iterations.
 */
extern void DES_bs_crypt_LM(int keys_count);

/*
 * Converts an ASCII ciphertext to binary to be used with one of the
 * comparison functions.
 */
extern ARCH_WORD *DES_bs_get_binary(char *ciphertext);

/*
 * Similarly, for LM hashes.
 */
extern ARCH_WORD *DES_bs_get_binary_LM(char *ciphertext);

/*
 * Calculate a hash for a DES_bs_crypt() output.
 */
extern int DES_bs_get_hash_0(int index);
extern int DES_bs_get_hash_1(int index);
extern int DES_bs_get_hash_2(int index);
extern int DES_bs_get_hash_3(int index);
extern int DES_bs_get_hash_4(int index);
extern int DES_bs_get_hash_5(int index);
extern int DES_bs_get_hash_6(int index);

/*
 * Compares 32 bits of a given ciphertext against at least the first count of
 * the DES_bs_crypt*() outputs and returns zero if no matches detected.
 */
extern int DES_bs_cmp_all(ARCH_WORD *binary, int count);

/*
 * Compares count bits of a given ciphertext against one of the outputs.
 */
extern int DES_bs_cmp_one(ARCH_WORD *binary, int count, int index);

extern void DES_bs_crypt_one(int keys_count);
#endif

/*
 * This file is part of John the Ripper password cracker,
 * Copyright (c) 1996-2001,2003,2010,2011 by Solar Designer
 */

#include "arch.h"
#include "common.h"
#include "DES_bs.h"

#if DES_BS_ASM && defined(_OPENMP) && defined(__GNUC__)
#warning Assembly code and OpenMP are both requested - will provide the former, but not the latter (for DES-based hashes).  This may likely be corrected by enabling SIMD intrinsics with the C compiler (try adding -msse2 to OMPFLAGS).
#endif

#if !DES_BS_ASM

#define vzero (*(vtype *)&DES_bs_all.zero)
#if DES_bs_mt
#define vones (*(vtype *)&DES_bs_all_by_tnum(-1).ones)
#else
#define vones (*(vtype *)&DES_bs_all.ones)
#endif

#define DES_BS_VECTOR_LOOPS 0

#if defined(__ALTIVEC__) && DES_BS_DEPTH == 128
#ifdef __linux__
#include <altivec.h>
#endif

typedef vector signed int vtype;

#define vst(dst, ofs, src) \
	vec_st((src), (ofs) * sizeof(DES_bs_vector), &(dst))

#define vxorf(a, b) \
	vec_xor((a), (b))

#define vnot(dst, a) \
	(dst) = vec_nor((a), (a))
#define vand(dst, a, b) \
	(dst) = vec_and((a), (b))
#define vor(dst, a, b) \
	(dst) = vec_or((a), (b))
#define vandn(dst, a, b) \
	(dst) = vec_andc((a), (b))
#define vsel(dst, a, b, c) \
	(dst) = vec_sel((a), (b), (c))

#elif defined(__ALTIVEC__) && \
    ((ARCH_BITS == 64 && DES_BS_DEPTH == 192) || \
    (ARCH_BITS == 32 && DES_BS_DEPTH == 160))
#ifdef __linux__
#include <altivec.h>
#endif

typedef struct {
	vector signed int f;
	unsigned ARCH_WORD g;
} vtype;

#define vst(dst, ofs, src) \
	vec_st((src).f, (ofs) * sizeof(DES_bs_vector), &((vtype *)&(dst))->f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g

#define vxor(dst, a, b) \
	(dst).f = vec_xor((a).f, (b).f); \
	(dst).g = (a).g ^ (b).g

#define vnot(dst, a) \
	(dst).f = vec_nor((a).f, (a).f); \
	(dst).g = ~(a).g
#define vand(dst, a, b) \
	(dst).f = vec_and((a).f, (b).f); \
	(dst).g = (a).g & (b).g
#define vor(dst, a, b) \
	(dst).f = vec_or((a).f, (b).f); \
	(dst).g = (a).g | (b).g
#define vandn(dst, a, b) \
	(dst).f = vec_andc((a).f, (b).f); \
	(dst).g = (a).g & ~(b).g
#define vsel(dst, a, b, c) \
	(dst).f = vec_sel((a).f, (b).f, (c).f); \
	(dst).g = (((a).g & ~(c).g) ^ ((b).g & (c).g))

#elif defined(__ALTIVEC__) && DES_BS_DEPTH == 256
#ifdef __linux__
#include <altivec.h>
#endif

typedef struct {
	vector signed int f, g;
} vtype;

#define vst(dst, ofs, src) \
	vec_st((src).f, (ofs) * sizeof(DES_bs_vector), &((vtype *)&(dst))->f); \
	vec_st((src).g, (ofs) * sizeof(DES_bs_vector), &((vtype *)&(dst))->g)

#define vxor(dst, a, b) \
	(dst).f = vec_xor((a).f, (b).f); \
	(dst).g = vec_xor((a).g, (b).g)

#define vnot(dst, a) \
	(dst).f = vec_nor((a).f, (a).f); \
	(dst).g = vec_nor((a).g, (a).g)
#define vand(dst, a, b) \
	(dst).f = vec_and((a).f, (b).f); \
	(dst).g = vec_and((a).g, (b).g)
#define vor(dst, a, b) \
	(dst).f = vec_or((a).f, (b).f); \
	(dst).g = vec_or((a).g, (b).g)
#define vandn(dst, a, b) \
	(dst).f = vec_andc((a).f, (b).f); \
	(dst).g = vec_andc((a).g, (b).g)
#define vsel(dst, a, b, c) \
	(dst).f = vec_sel((a).f, (b).f, (c).f); \
	(dst).g = vec_sel((a).g, (b).g, (c).g)

#elif defined(__AVX__) && DES_BS_DEPTH == 256 && !defined(DES_BS_NO_AVX256)
#include <immintrin.h>

/* Not __m256i because bitwise ops are "floating-point" with AVX */
typedef __m256 vtype;

#define vst(dst, ofs, src) \
	_mm256_store_ps((float *)((DES_bs_vector *)&(dst) + (ofs)), (src))

#define vxorf(a, b) \
	_mm256_xor_ps((a), (b))

#define vand(dst, a, b) \
	(dst) = _mm256_and_ps((a), (b))
#define vor(dst, a, b) \
	(dst) = _mm256_or_ps((a), (b))
#define vandn(dst, a, b) \
	(dst) = _mm256_andnot_ps((b), (a))

#ifdef __XOP__
/* This could be _mm256_cmov_ps(), but it does not exist (yet?) */
#define vsel(dst, a, b, c) \
	(dst) = __builtin_ia32_vpcmov_v8sf256((b), (a), (c))
#endif

/*
 * We should be able to do 256-bit shifts with one instruction with AVX2, but
 * for plain AVX let's use pairs of 128-bit instructions (and likely incur
 * extra memory stores/loads because the rest of our AVX code is 256-bit). :-(
 */
#define vshl(dst, src, shift) \
	((__m128i *)&(dst))[0] = \
	    _mm_slli_epi64(((__m128i *)&(src))[0], (shift)); \
	((__m128i *)&(dst))[1] = \
	    _mm_slli_epi64(((__m128i *)&(src))[1], (shift))
#define vshr(dst, src, shift) \
	((__m128i *)&(dst))[0] = \
	    _mm_srli_epi64(((__m128i *)&(src))[0], (shift)); \
	((__m128i *)&(dst))[1] = \
	    _mm_srli_epi64(((__m128i *)&(src))[1], (shift))

#elif defined(__AVX__) && DES_BS_DEPTH == 384 && !defined(DES_BS_NO_AVX128)
#include <immintrin.h>
#ifdef __XOP__
#include <x86intrin.h>
#endif

typedef struct {
/* Not __m256i because bitwise ops are "floating-point" with AVX */
	__m256 f;
	__m128i g;
} vtype;

#define vst(dst, ofs, src) \
	_mm256_store_ps( \
	    (float *)&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	_mm_store_si128(&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g, \
	    (src).g)

#define vxor(dst, a, b) \
	(dst).f = _mm256_xor_ps((a).f, (b).f); \
	(dst).g = _mm_xor_si128((a).g, (b).g)

#define vand(dst, a, b) \
	(dst).f = _mm256_and_ps((a).f, (b).f); \
	(dst).g = _mm_and_si128((a).g, (b).g)
#define vor(dst, a, b) \
	(dst).f = _mm256_or_ps((a).f, (b).f); \
	(dst).g = _mm_or_si128((a).g, (b).g)
#define vandn(dst, a, b) \
	(dst).f = _mm256_andnot_ps((b).f, (a).f); \
	(dst).g = _mm_andnot_si128((b).g, (a).g)

#ifdef __XOP__
/* This could be _mm256_cmov_ps(), but it does not exist (yet?) */
#define vsel(dst, a, b, c) \
	(dst).f = __builtin_ia32_vpcmov_v8sf256((b).f, (a).f, (c).f); \
	(dst).g = _mm_cmov_si128((b).g, (a).g, (c).g)
#endif

#define vshl(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = _mm_slli_epi64((src).g, (shift))
#define vshr(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = _mm_srli_epi64((src).g, (shift))

#elif defined(__AVX__) && DES_BS_DEPTH == 512
#include <immintrin.h>

typedef struct {
/* Not __m256i because bitwise ops are "floating-point" with AVX */
	__m256 f, g;
} vtype;

#define vst(dst, ofs, src) \
	_mm256_store_ps( \
	    (float *)&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	_mm256_store_ps( \
	    (float *)&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g, \
	    (src).g)

#define vxor(dst, a, b) \
	(dst).f = _mm256_xor_ps((a).f, (b).f); \
	(dst).g = _mm256_xor_ps((a).g, (b).g)

#define vand(dst, a, b) \
	(dst).f = _mm256_and_ps((a).f, (b).f); \
	(dst).g = _mm256_and_ps((a).g, (b).g)
#define vor(dst, a, b) \
	(dst).f = _mm256_or_ps((a).f, (b).f); \
	(dst).g = _mm256_or_ps((a).g, (b).g)
#define vandn(dst, a, b) \
	(dst).f = _mm256_andnot_ps((b).f, (a).f); \
	(dst).g = _mm256_andnot_ps((b).g, (a).g)

#ifdef __XOP__
/* This could be _mm256_cmov_ps(), but it does not exist (yet?) */
#define vsel(dst, a, b, c) \
	(dst).f = __builtin_ia32_vpcmov_v8sf256((b).f, (a).f, (c).f); \
	(dst).g = __builtin_ia32_vpcmov_v8sf256((b).g, (a).g, (c).g)
#endif

#define vshl(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	((__m128i *)&(dst).g)[0] = \
	    _mm_slli_epi64(((__m128i *)&(src).g)[0], (shift)); \
	((__m128i *)&(dst).g)[1] = \
	    _mm_slli_epi64(((__m128i *)&(src).g)[1], (shift))
#define vshr(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	((__m128i *)&(dst).g)[0] = \
	    _mm_srli_epi64(((__m128i *)&(src).g)[0], (shift)); \
	((__m128i *)&(dst).g)[1] = \
	    _mm_srli_epi64(((__m128i *)&(src).g)[1], (shift))

#elif defined(__AVX__) && defined(__MMX__) && DES_BS_DEPTH == 320 && \
    !defined(DES_BS_NO_MMX)
#include <immintrin.h>
#include <mmintrin.h>

typedef struct {
/* Not __m256i because bitwise ops are "floating-point" with AVX */
	__m256 f;
	__m64 g;
} vtype;

#define vst(dst, ofs, src) \
	_mm256_store_ps( \
	    (float *)&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g

#define vxor(dst, a, b) \
	(dst).f = _mm256_xor_ps((a).f, (b).f); \
	(dst).g = _mm_xor_si64((a).g, (b).g)

#define vand(dst, a, b) \
	(dst).f = _mm256_and_ps((a).f, (b).f); \
	(dst).g = _mm_and_si64((a).g, (b).g)
#define vor(dst, a, b) \
	(dst).f = _mm256_or_ps((a).f, (b).f); \
	(dst).g = _mm_or_si64((a).g, (b).g)
#define vandn(dst, a, b) \
	(dst).f = _mm256_andnot_ps((b).f, (a).f); \
	(dst).g = _mm_andnot_si64((b).g, (a).g)

#define vshl(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = _mm_slli_si64((src).g, (shift))
#define vshr(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = _mm_srli_si64((src).g, (shift))

#elif defined(__AVX__) && \
    ((ARCH_BITS == 64 && DES_BS_DEPTH == 320) || \
    (ARCH_BITS == 32 && DES_BS_DEPTH == 288))
#include <immintrin.h>
#include <mmintrin.h>

typedef struct {
/* Not __m256i because bitwise ops are "floating-point" with AVX */
	__m256 f;
	unsigned ARCH_WORD g;
} vtype;

#define vst(dst, ofs, src) \
	_mm256_store_ps( \
	    (float *)&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g

#define vxor(dst, a, b) \
	(dst).f = _mm256_xor_ps((a).f, (b).f); \
	(dst).g = (a).g ^ (b).g

#define vnot(dst, a) \
	(dst).f = _mm256_xor_ps((a).f, vones.f); \
	(dst).g = ~(a).g
#define vand(dst, a, b) \
	(dst).f = _mm256_and_ps((a).f, (b).f); \
	(dst).g = (a).g & (b).g
#define vor(dst, a, b) \
	(dst).f = _mm256_or_ps((a).f, (b).f); \
	(dst).g = (a).g | (b).g
#define vandn(dst, a, b) \
	(dst).f = _mm256_andnot_ps((b).f, (a).f); \
	(dst).g = (a).g & ~(b).g

#define vshl(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = (src).g << (shift)
#define vshr(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = (src).g >> (shift)

#elif defined(__AVX__) && defined(__MMX__) && \
    ((ARCH_BITS == 64 && DES_BS_DEPTH == 384) || \
    (ARCH_BITS == 32 && DES_BS_DEPTH == 352))
#include <immintrin.h>
#include <mmintrin.h>

typedef struct {
/* Not __m256i because bitwise ops are "floating-point" with AVX */
	__m256 f;
	__m64 g;
	unsigned ARCH_WORD h;
} vtype;

#define vst(dst, ofs, src) \
	_mm256_store_ps( \
	    (float *)&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g; \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->h = (src).h

#define vxor(dst, a, b) \
	(dst).f = _mm256_xor_ps((a).f, (b).f); \
	(dst).g = _mm_xor_si64((a).g, (b).g); \
	(dst).h = (a).h ^ (b).h

#define vnot(dst, a) \
	(dst).f = _mm256_xor_ps((a).f, vones.f); \
	(dst).g = _mm_xor_si64((a).g, vones.g); \
	(dst).h = ~(a).h
#define vand(dst, a, b) \
	(dst).f = _mm256_and_ps((a).f, (b).f); \
	(dst).g = _mm_and_si64((a).g, (b).g); \
	(dst).h = (a).h & (b).h
#define vor(dst, a, b) \
	(dst).f = _mm256_or_ps((a).f, (b).f); \
	(dst).g = _mm_or_si64((a).g, (b).g); \
	(dst).h = (a).h | (b).h
#define vandn(dst, a, b) \
	(dst).f = _mm256_andnot_ps((b).f, (a).f); \
	(dst).g = _mm_andnot_si64((b).g, (a).g); \
	(dst).h = (a).h & ~(b).h

#define vshl(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_slli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = _mm_slli_si64((src).g, (shift)); \
	(dst).h = (src).h << (shift)
#define vshr(dst, src, shift) \
	((__m128i *)&(dst).f)[0] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[0], (shift)); \
	((__m128i *)&(dst).f)[1] = \
	    _mm_srli_epi64(((__m128i *)&(src).f)[1], (shift)); \
	(dst).g = _mm_srli_si64((src).g, (shift)); \
	(dst).h = (src).h >> (shift)

#elif defined(__SSE2__) && DES_BS_DEPTH == 128
#ifdef __AVX__
#include <immintrin.h>
#ifdef __XOP__
#include <x86intrin.h>
#endif
#else
#include <emmintrin.h>
#endif

typedef __m128i vtype;

#define vst(dst, ofs, src) \
	_mm_store_si128((vtype *)((DES_bs_vector *)&(dst) + (ofs)), (src))

#define vxorf(a, b) \
	_mm_xor_si128((a), (b))

#define vand(dst, a, b) \
	(dst) = _mm_and_si128((a), (b))
#define vor(dst, a, b) \
	(dst) = _mm_or_si128((a), (b))
#define vandn(dst, a, b) \
	(dst) = _mm_andnot_si128((b), (a))

#ifdef __XOP__
#define vsel(dst, a, b, c) \
	(dst) = _mm_cmov_si128((b), (a), (c))
#else
#define vsel(dst, a, b, c) \
	(dst) = _mm_xor_si128(_mm_andnot_si128((c), (a)), \
	    _mm_and_si128((c), (b)))
#endif

#define vshl1(dst, src) \
	(dst) = _mm_add_epi8((src), (src))
#define vshl(dst, src, shift) \
	(dst) = _mm_slli_epi64((src), (shift))
#define vshr(dst, src, shift) \
	(dst) = _mm_srli_epi64((src), (shift))

#elif defined(__SSE2__) && DES_BS_DEPTH == 256 && defined(DES_BS_NO_MMX)
#ifdef __AVX__
#include <immintrin.h>
#ifdef __XOP__
#include <x86intrin.h>
#endif
#else
#include <emmintrin.h>
#endif

typedef struct {
	__m128i f, g;
} vtype;

#define vst(dst, ofs, src) \
	_mm_store_si128(&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	_mm_store_si128(&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g, \
	    (src).g)

#define vxor(dst, a, b) \
	(dst).f = _mm_xor_si128((a).f, (b).f); \
	(dst).g = _mm_xor_si128((a).g, (b).g)

#define vand(dst, a, b) \
	(dst).f = _mm_and_si128((a).f, (b).f); \
	(dst).g = _mm_and_si128((a).g, (b).g)
#define vor(dst, a, b) \
	(dst).f = _mm_or_si128((a).f, (b).f); \
	(dst).g = _mm_or_si128((a).g, (b).g)
#define vandn(dst, a, b) \
	(dst).f = _mm_andnot_si128((b).f, (a).f); \
	(dst).g = _mm_andnot_si128((b).g, (a).g)

#ifdef __XOP__
#define vsel(dst, a, b, c) \
	(dst).f = _mm_cmov_si128((b).f, (a).f, (c).f); \
	(dst).g = _mm_cmov_si128((b).g, (a).g, (c).g)
#endif

#define vshl1(dst, src) \
	(dst).f = _mm_add_epi8((src).f, (src).f); \
	(dst).g = _mm_add_epi8((src).g, (src).g)
#define vshl(dst, src, shift) \
	(dst).f = _mm_slli_epi64((src).f, (shift)); \
	(dst).g = _mm_slli_epi64((src).g, (shift))
#define vshr(dst, src, shift) \
	(dst).f = _mm_srli_epi64((src).f, (shift)); \
	(dst).g = _mm_srli_epi64((src).g, (shift))

#elif defined(__SSE2__) && defined(__MMX__) && DES_BS_DEPTH == 192 && \
    !defined(DES_BS_NO_MMX)
#include <emmintrin.h>
#include <mmintrin.h>

typedef struct {
	__m128i f;
	__m64 g;
} vtype;

#define vst(dst, ofs, src) \
	_mm_store_si128(&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g

#define vxor(dst, a, b) \
	(dst).f = _mm_xor_si128((a).f, (b).f); \
	(dst).g = _mm_xor_si64((a).g, (b).g)

#define vand(dst, a, b) \
	(dst).f = _mm_and_si128((a).f, (b).f); \
	(dst).g = _mm_and_si64((a).g, (b).g)
#define vor(dst, a, b) \
	(dst).f = _mm_or_si128((a).f, (b).f); \
	(dst).g = _mm_or_si64((a).g, (b).g)
#define vandn(dst, a, b) \
	(dst).f = _mm_andnot_si128((b).f, (a).f); \
	(dst).g = _mm_andnot_si64((b).g, (a).g)

#define vshl1(dst, src) \
	(dst).f = _mm_add_epi8((src).f, (src).f); \
	(dst).g = _mm_add_pi8((src).g, (src).g)
#define vshl(dst, src, shift) \
	(dst).f = _mm_slli_epi64((src).f, (shift)); \
	(dst).g = _mm_slli_si64((src).g, (shift))
#define vshr(dst, src, shift) \
	(dst).f = _mm_srli_epi64((src).f, (shift)); \
	(dst).g = _mm_srli_si64((src).g, (shift))

#elif defined(__SSE2__) && \
    ((ARCH_BITS == 64 && DES_BS_DEPTH == 192) || \
    (ARCH_BITS == 32 && DES_BS_DEPTH == 160))
#include <emmintrin.h>

typedef struct {
	__m128i f;
	unsigned ARCH_WORD g;
} vtype;

#define vst(dst, ofs, src) \
	_mm_store_si128(&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g

#define vxor(dst, a, b) \
	(dst).f = _mm_xor_si128((a).f, (b).f); \
	(dst).g = (a).g ^ (b).g

#define vnot(dst, a) \
	(dst).f = _mm_xor_si128((a).f, vones.f); \
	(dst).g = ~(a).g
#define vand(dst, a, b) \
	(dst).f = _mm_and_si128((a).f, (b).f); \
	(dst).g = (a).g & (b).g
#define vor(dst, a, b) \
	(dst).f = _mm_or_si128((a).f, (b).f); \
	(dst).g = (a).g | (b).g
#define vandn(dst, a, b) \
	(dst).f = _mm_andnot_si128((b).f, (a).f); \
	(dst).g = (a).g & ~(b).g

#define vshl1(dst, src) \
	(dst).f = _mm_add_epi8((src).f, (src).f); \
	(dst).g = (src).g << 1
#define vshl(dst, src, shift) \
	(dst).f = _mm_slli_epi64((src).f, (shift)); \
	(dst).g = (src).g << (shift)
#define vshr(dst, src, shift) \
	(dst).f = _mm_srli_epi64((src).f, (shift)); \
	(dst).g = (src).g >> (shift)

#elif defined(__SSE2__) && defined(__MMX__) && \
    ((ARCH_BITS == 64 && DES_BS_DEPTH == 256) || \
    (ARCH_BITS == 32 && DES_BS_DEPTH == 224))
#include <emmintrin.h>
#include <mmintrin.h>

typedef struct {
	__m128i f;
	__m64 g;
	unsigned ARCH_WORD h;
} vtype;

#define vst(dst, ofs, src) \
	_mm_store_si128(&((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f, \
	    (src).f); \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g; \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->h = (src).h

#define vxor(dst, a, b) \
	(dst).f = _mm_xor_si128((a).f, (b).f); \
	(dst).g = _mm_xor_si64((a).g, (b).g); \
	(dst).h = (a).h ^ (b).h

#define vnot(dst, a) \
	(dst).f = _mm_xor_si128((a).f, vones.f); \
	(dst).g = _mm_xor_si64((a).g, vones.g); \
	(dst).h = ~(a).h
#define vand(dst, a, b) \
	(dst).f = _mm_and_si128((a).f, (b).f); \
	(dst).g = _mm_and_si64((a).g, (b).g); \
	(dst).h = (a).h & (b).h
#define vor(dst, a, b) \
	(dst).f = _mm_or_si128((a).f, (b).f); \
	(dst).g = _mm_or_si64((a).g, (b).g); \
	(dst).h = (a).h | (b).h
#define vandn(dst, a, b) \
	(dst).f = _mm_andnot_si128((b).f, (a).f); \
	(dst).g = _mm_andnot_si64((b).g, (a).g); \
	(dst).h = (a).h & ~(b).h

#define vshl1(dst, src) \
	(dst).f = _mm_add_epi8((src).f, (src).f); \
	(dst).g = _mm_add_pi8((src).g, (src).g); \
	(dst).h = (src).h << 1
#define vshl(dst, src, shift) \
	(dst).f = _mm_slli_epi64((src).f, (shift)); \
	(dst).g = _mm_slli_si64((src).g, (shift)); \
	(dst).h = (src).h << (shift)
#define vshr(dst, src, shift) \
	(dst).f = _mm_srli_epi64((src).f, (shift)); \
	(dst).g = _mm_srli_si64((src).g, (shift)); \
	(dst).h = (src).h >> (shift)

#elif defined(__MMX__) && ARCH_BITS != 64 && DES_BS_DEPTH == 64
#include <mmintrin.h>

typedef __m64 vtype;

#define vxorf(a, b) \
	_mm_xor_si64((a), (b))

#define vand(dst, a, b) \
	(dst) = _mm_and_si64((a), (b))
#define vor(dst, a, b) \
	(dst) = _mm_or_si64((a), (b))
#define vandn(dst, a, b) \
	(dst) = _mm_andnot_si64((b), (a))

#define vshl1(dst, src) \
	(dst) = _mm_add_pi8((src), (src))
#define vshl(dst, src, shift) \
	(dst) = _mm_slli_si64((src), (shift))
#define vshr(dst, src, shift) \
	(dst) = _mm_srli_si64((src), (shift))

#elif defined(__MMX__) && ARCH_BITS == 32 && DES_BS_DEPTH == 96
#include <mmintrin.h>

typedef struct {
	__m64 f;
	unsigned ARCH_WORD g;
} vtype;

#define vst(dst, ofs, src) \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->f = (src).f; \
	((vtype *)((DES_bs_vector *)&(dst) + (ofs)))->g = (src).g

#define vxor(dst, a, b) \
	(dst).f = _mm_xor_si64((a).f, (b).f); \
	(dst).g = (a).g ^ (b).g

#define vnot(dst, a) \
	(dst).f = _mm_xor_si64((a).f, vones.f); \
	(dst).g = ~(a).g
#define vand(dst, a, b) \
	(dst).f = _mm_and_si64((a).f, (b).f); \
	(dst).g = (a).g & (b).g
#define vor(dst, a, b) \
	(dst).f = _mm_or_si64((a).f, (b).f); \
	(dst).g = (a).g | (b).g
#define vandn(dst, a, b) \
	(dst).f = _mm_andnot_si64((b).f, (a).f); \
	(dst).g = (a).g & ~(b).g

#define vshl1(dst, src) \
	(dst).f = _mm_add_pi8((src).f, (src).f); \
	(dst).g = (src).g << 1
#define vshl(dst, src, shift) \
	(dst).f = _mm_slli_si64((src).f, (shift)); \
	(dst).g = (src).g << (shift)
#define vshr(dst, src, shift) \
	(dst).f = _mm_srli_si64((src).f, (shift)); \
	(dst).g = (src).g >> (shift)

#else

#if DES_BS_VECTOR
#undef DES_BS_VECTOR_LOOPS
#define DES_BS_VECTOR_LOOPS 1
#endif

typedef unsigned ARCH_WORD vtype;

#define vxorf(a, b) \
	((a) ^ (b))

#define vnot(dst, a) \
	(dst) = ~(a)
#define vand(dst, a, b) \
	(dst) = (a) & (b)
#define vor(dst, a, b) \
	(dst) = (a) | (b)
#define vandn(dst, a, b) \
	(dst) = (a) & ~(b)
#define vsel(dst, a, b, c) \
	(dst) = (((a) & ~(c)) ^ ((b) & (c)))

#define vshl(dst, src, shift) \
	(dst) = (src) << (shift)
#define vshr(dst, src, shift) \
	(dst) = (src) >> (shift)

/* Assume that 0 always fits in one load immediate instruction */
#undef vzero
#define vzero 0

/* Archs friendly to use of immediate values */
#if defined(__x86_64__) || defined(__i386__)
#undef vones
#define vones (~(vtype)0)
#endif

#endif

#ifndef vst
#define vst(dst, ofs, src) \
	*((vtype *)((DES_bs_vector *)&(dst) + (ofs))) = (src)
#endif

#if !defined(vxor) && defined(vxorf)
#define vxor(dst, a, b) \
	(dst) = vxorf((a), (b))
#endif
#if !defined(vxorf) && defined(vxor)
/*
 * This requires gcc's "Statement Exprs" extension (also supported by a number
 * of other C compilers).
 */
#define vxorf(a, b) \
	({ vtype tmp; vxor(tmp, (a), (b)); tmp; })
#endif

#ifndef vnot
#define vnot(dst, a) \
	vxor((dst), (a), vones)
#endif

#ifndef vshl1
#define vshl1(dst, src) \
	vshl((dst), (src), 1)
#endif

#if !DES_BS_VECTOR_LOOPS && defined(vshl) && defined(vshr)
#define DES_BS_VECTOR_LOOPS_K 0
#define DEPTH_K
#define for_each_depth_k()

#define kvtype vtype
#define kvand vand
#define kvor vor
#define kvshl1 vshl1
#define kvshl vshl
#define kvshr vshr
#else
#if DES_BS_VECTOR
#define DES_BS_VECTOR_LOOPS_K 1
#define DEPTH_K				[depth]
#define for_each_depth_k() \
	for (depth = 0; depth < DES_BS_VECTOR; depth++)
#else
#define DES_BS_VECTOR_LOOPS_K 0
#endif

typedef unsigned ARCH_WORD kvtype;
#define kvand(dst, a, b) \
	(dst) = (a) & (b)
#define kvor(dst, a, b) \
	(dst) = (a) | (b)
#define kvshl1(dst, src) \
	(dst) = (src) << 1
#define kvshl(dst, src, shift) \
	(dst) = (src) << (shift)
#define kvshr(dst, src, shift) \
	(dst) = (src) >> (shift)
#endif

#if !DES_BS_VECTOR || DES_BS_VECTOR_LOOPS_K
#ifdef __x86_64__
#define mask01 0x0101010101010101UL
#elif __i386__
#define mask01 0x01010101UL
#else
#undef mask01
#endif
#ifdef mask01
#define mask02 (mask01 << 1)
#define mask04 (mask01 << 2)
#define mask08 (mask01 << 3)
#define mask10 (mask01 << 4)
#define mask20 (mask01 << 5)
#define mask40 (mask01 << 6)
#define mask80 (mask01 << 7)
#endif
#endif

#ifndef mask01
#define mask01 (*(kvtype *)&DES_bs_all.masks[0])
#define mask02 (*(kvtype *)&DES_bs_all.masks[1])
#define mask04 (*(kvtype *)&DES_bs_all.masks[2])
#define mask08 (*(kvtype *)&DES_bs_all.masks[3])
#define mask10 (*(kvtype *)&DES_bs_all.masks[4])
#define mask20 (*(kvtype *)&DES_bs_all.masks[5])
#define mask40 (*(kvtype *)&DES_bs_all.masks[6])
#define mask80 (*(kvtype *)&DES_bs_all.masks[7])
#endif

#ifdef __i386__
/* register-starved */
#define LOAD_V \
	kvtype v0 = *(kvtype *)&vp[0]; \
	kvtype v4 = *(kvtype *)&vp[4];
#define v1 *(kvtype *)&vp[1]
#define v2 *(kvtype *)&vp[2]
#define v3 *(kvtype *)&vp[3]
#define v5 *(kvtype *)&vp[5]
#define v6 *(kvtype *)&vp[6]
#define v7 *(kvtype *)&vp[7]
#else
#define LOAD_V \
	kvtype v0 = *(kvtype *)&vp[0]; \
	kvtype v1 = *(kvtype *)&vp[1]; \
	kvtype v2 = *(kvtype *)&vp[2]; \
	kvtype v3 = *(kvtype *)&vp[3]; \
	kvtype v4 = *(kvtype *)&vp[4]; \
	kvtype v5 = *(kvtype *)&vp[5]; \
	kvtype v6 = *(kvtype *)&vp[6]; \
	kvtype v7 = *(kvtype *)&vp[7];
#endif

#define kvand_shl1_or(dst, src, mask) \
	kvand(tmp, src, mask); \
	kvshl1(tmp, tmp); \
	kvor(dst, dst, tmp)

#define kvand_shl_or(dst, src, mask, shift) \
	kvand(tmp, src, mask); \
	kvshl(tmp, tmp, shift); \
	kvor(dst, dst, tmp)

#define kvand_shl1(dst, src, mask) \
	kvand(tmp, src, mask); \
	kvshl1(dst, tmp)

#define kvand_or(dst, src, mask) \
	kvand(tmp, src, mask); \
	kvor(dst, dst, tmp)

#define kvand_shr_or(dst, src, mask, shift) \
	kvand(tmp, src, mask); \
	kvshr(tmp, tmp, shift); \
	kvor(dst, dst, tmp)

#define kvand_shr(dst, src, mask, shift) \
	kvand(tmp, src, mask); \
	kvshr(dst, tmp, shift)

#define FINALIZE_NEXT_KEY_BIT_0 { \
	kvtype m = mask01, va, vb, tmp; \
	kvand(va, v0, m); \
	kvand_shl1(vb, v1, m); \
	kvand_shl_or(va, v2, m, 2); \
	kvand_shl_or(vb, v3, m, 3); \
	kvand_shl_or(va, v4, m, 4); \
	kvand_shl_or(vb, v5, m, 5); \
	kvand_shl_or(va, v6, m, 6); \
	kvand_shl_or(vb, v7, m, 7); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_1 { \
	kvtype m = mask02, va, vb, tmp; \
	kvand_shr(va, v0, m, 1); \
	kvand(vb, v1, m); \
	kvand_shl1_or(va, v2, m); \
	kvand_shl_or(vb, v3, m, 2); \
	kvand_shl_or(va, v4, m, 3); \
	kvand_shl_or(vb, v5, m, 4); \
	kvand_shl_or(va, v6, m, 5); \
	kvand_shl_or(vb, v7, m, 6); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_2 { \
	kvtype m = mask04, va, vb, tmp; \
	kvand_shr(va, v0, m, 2); \
	kvand_shr(vb, v1, m, 1); \
	kvand_or(va, v2, m); \
	kvand_shl1_or(vb, v3, m); \
	kvand_shl_or(va, v4, m, 2); \
	kvand_shl_or(vb, v5, m, 3); \
	kvand_shl_or(va, v6, m, 4); \
	kvand_shl_or(vb, v7, m, 5); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_3 { \
	kvtype m = mask08, va, vb, tmp; \
	kvand_shr(va, v0, m, 3); \
	kvand_shr(vb, v1, m, 2); \
	kvand_shr_or(va, v2, m, 1); \
	kvand_or(vb, v3, m); \
	kvand_shl1_or(va, v4, m); \
	kvand_shl_or(vb, v5, m, 2); \
	kvand_shl_or(va, v6, m, 3); \
	kvand_shl_or(vb, v7, m, 4); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_4 { \
	kvtype m = mask10, va, vb, tmp; \
	kvand_shr(va, v0, m, 4); \
	kvand_shr(vb, v1, m, 3); \
	kvand_shr_or(va, v2, m, 2); \
	kvand_shr_or(vb, v3, m, 1); \
	kvand_or(va, v4, m); \
	kvand_shl1_or(vb, v5, m); \
	kvand_shl_or(va, v6, m, 2); \
	kvand_shl_or(vb, v7, m, 3); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_5 { \
	kvtype m = mask20, va, vb, tmp; \
	kvand_shr(va, v0, m, 5); \
	kvand_shr(vb, v1, m, 4); \
	kvand_shr_or(va, v2, m, 3); \
	kvand_shr_or(vb, v3, m, 2); \
	kvand_shr_or(va, v4, m, 1); \
	kvand_or(vb, v5, m); \
	kvand_shl1_or(va, v6, m); \
	kvand_shl_or(vb, v7, m, 2); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_6 { \
	kvtype m = mask40, va, vb, tmp; \
	kvand_shr(va, v0, m, 6); \
	kvand_shr(vb, v1, m, 5); \
	kvand_shr_or(va, v2, m, 4); \
	kvand_shr_or(vb, v3, m, 3); \
	kvand_shr_or(va, v4, m, 2); \
	kvand_shr_or(vb, v5, m, 1); \
	kvand_or(va, v6, m); \
	kvand_shl1_or(vb, v7, m); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#define FINALIZE_NEXT_KEY_BIT_7 { \
	kvtype m = mask80, va, vb, tmp; \
	kvand_shr(va, v0, m, 7); \
	kvand_shr(vb, v1, m, 6); \
	kvand_shr_or(va, v2, m, 5); \
	kvand_shr_or(vb, v3, m, 4); \
	kvand_shr_or(va, v4, m, 3); \
	kvand_shr_or(vb, v5, m, 2); \
	kvand_shr_or(va, v6, m, 1); \
	kvand_or(vb, v7, m); \
	kvor(*(kvtype *)kp, va, vb); \
	kp++; \
}

#if DES_bs_mt
static MAYBE_INLINE void DES_bs_finalize_keys(int t)
#else
static MAYBE_INLINE void DES_bs_finalize_keys(void)
#endif
{
#if DES_BS_VECTOR_LOOPS_K
	int depth;
#endif

	for_each_depth_k() {
		DES_bs_vector *kp = (DES_bs_vector *)&DES_bs_all.K[0] DEPTH_K;
		int ic;
		for (ic = 0; ic < 8; ic++) {
			DES_bs_vector *vp =
			    (DES_bs_vector *)&DES_bs_all.xkeys.v[ic][0] DEPTH_K;
			LOAD_V
			FINALIZE_NEXT_KEY_BIT_0
			FINALIZE_NEXT_KEY_BIT_1
			FINALIZE_NEXT_KEY_BIT_2
			FINALIZE_NEXT_KEY_BIT_3
			FINALIZE_NEXT_KEY_BIT_4
			FINALIZE_NEXT_KEY_BIT_5
			FINALIZE_NEXT_KEY_BIT_6
		}
	}

#if DES_BS_EXPAND
	{
		int index;
		for (index = 0; index < 0x300; index++)
		for_each_depth_k() {
#if DES_BS_VECTOR_LOOPS_K
			DES_bs_all.KS.v[index] DEPTH_K =
			    DES_bs_all.KSp[index] DEPTH_K;
#else
			vst(*(kvtype *)&DES_bs_all.KS.v[index], 0,
			    *(kvtype *)DES_bs_all.KSp[index]);
#endif
		}
	}
#endif
}

#endif

#if DES_bs_mt
MAYBE_INLINE void DES_bs_set_salt_for_thread(int t, unsigned int salt)
#else
void DES_bs_set_salt(ARCH_WORD salt)
#endif
{
	unsigned int new = salt;
	unsigned int old = DES_bs_all.salt;
	int dst;

	DES_bs_all.salt = new;

	for (dst = 0; dst < 24; dst++) {
		if ((new ^ old) & 1) {
			DES_bs_vector *sp1, *sp2;
			int src1 = dst;
			int src2 = dst + 24;
			if (new & 1) {
				src1 = src2;
				src2 = dst;
			}
			sp1 = DES_bs_all.Ens[src1];
			sp2 = DES_bs_all.Ens[src2];
			DES_bs_all.E.E[dst] = (ARCH_WORD *)sp1;
			DES_bs_all.E.E[dst + 24] = (ARCH_WORD *)sp2;
			DES_bs_all.E.E[dst + 48] = (ARCH_WORD *)(sp1 + 32);
			DES_bs_all.E.E[dst + 72] = (ARCH_WORD *)(sp2 + 32);
		}
		new >>= 1;
		old >>= 1;
		if (new == old)
			break;
	}
}

#if !DES_BS_ASM

/* Include the S-boxes here so that the compiler can inline them */
#if DES_BS == 3
#include "sboxes-s.c"
#elif DES_BS == 2
#include "sboxes.c"
#else
#undef andn
#include "nonstd.c"
#endif

#define b				DES_bs_all.B
#define e				DES_bs_all.E.E

#if DES_BS_VECTOR_LOOPS
#define kd				[depth]
#define bd				[depth]
#define ed				[depth]
#define DEPTH				[depth]
#define for_each_depth() \
	for (depth = 0; depth < DES_BS_VECTOR; depth++)
#else
#if DES_BS_EXPAND
#define kd
#else
#define kd				[0]
#endif
#define bd
#define ed				[0]
#define DEPTH
#define for_each_depth()
#endif

#define DES_bs_clear_block_8(i) \
	for_each_depth() { \
		vst(b[i] bd, 0, zero); \
		vst(b[i] bd, 1, zero); \
		vst(b[i] bd, 2, zero); \
		vst(b[i] bd, 3, zero); \
		vst(b[i] bd, 4, zero); \
		vst(b[i] bd, 5, zero); \
		vst(b[i] bd, 6, zero); \
		vst(b[i] bd, 7, zero); \
	}

#define DES_bs_clear_block \
	DES_bs_clear_block_8(0); \
	DES_bs_clear_block_8(8); \
	DES_bs_clear_block_8(16); \
	DES_bs_clear_block_8(24); \
	DES_bs_clear_block_8(32); \
	DES_bs_clear_block_8(40); \
	DES_bs_clear_block_8(48); \
	DES_bs_clear_block_8(56);

#define DES_bs_set_block_8(i, v0, v1, v2, v3, v4, v5, v6, v7) \
	for_each_depth() { \
		vst(b[i] bd, 0, v0); \
		vst(b[i] bd, 1, v1); \
		vst(b[i] bd, 2, v2); \
		vst(b[i] bd, 3, v3); \
		vst(b[i] bd, 4, v4); \
		vst(b[i] bd, 5, v5); \
		vst(b[i] bd, 6, v6); \
		vst(b[i] bd, 7, v7); \
	}

#define x(p) vxorf(*(vtype *)&e[p] ed, *(vtype *)&k[p] kd)
#define y(p, q) vxorf(*(vtype *)&b[p] bd, *(vtype *)&k[q] kd)
#define z(r) ((vtype *)&b[r] bd)

void DES_bs_crypt_25(int keys_count)
{

#if DES_bs_mt
	int t, n = (keys_count + (DES_BS_DEPTH - 1)) / DES_BS_DEPTH;
#endif

#ifdef _OPENMP
#pragma omp parallel for default(none) private(t) shared(n, DES_bs_all_p, keys_count)
#endif
	for_each_t(n) {
#if DES_BS_EXPAND
		DES_bs_vector *k;
#else
		ARCH_WORD **k;
#endif
		int iterations, rounds_and_swapped;
#if DES_BS_VECTOR_LOOPS
		int depth;
#endif

		if (DES_bs_all.keys_changed)
			goto finalize_keys;

body:
#if DES_bs_mt
		DES_bs_set_salt_for_thread(t, DES_bs_all_by_tnum(-1).salt);
#endif




		{
			vtype zero = vzero;
			DES_bs_clear_block
		}

#if DES_BS_EXPAND
		k = DES_bs_all.KS.v;
#else
		k = DES_bs_all.KS.p;
#endif
		rounds_and_swapped = 8;
		iterations = 25;

start:
		for_each_depth()
		s1(x(0), x(1), x(2), x(3), x(4), x(5),
			z(40), z(48), z(54), z(62));
		for_each_depth()
		s2(x(6), x(7), x(8), x(9), x(10), x(11),
			z(44), z(59), z(33), z(49));
		for_each_depth()
		s3(y(7, 12), y(8, 13), y(9, 14),
			y(10, 15), y(11, 16), y(12, 17),
			z(55), z(47), z(61), z(37));
		for_each_depth()
		s4(y(11, 18), y(12, 19), y(13, 20),
			y(14, 21), y(15, 22), y(16, 23),
			z(57), z(51), z(41), z(32));
		for_each_depth()
		s5(x(24), x(25), x(26), x(27), x(28), x(29),
			z(39), z(45), z(56), z(34));
		for_each_depth()
		s6(x(30), x(31), x(32), x(33), x(34), x(35),
			z(35), z(60), z(42), z(50));
		for_each_depth()
		s7(y(23, 36), y(24, 37), y(25, 38),
			y(26, 39), y(27, 40), y(28, 41),
			z(63), z(43), z(53), z(38));
		for_each_depth()
		s8(y(27, 42), y(28, 43), y(29, 44),
			y(30, 45), y(31, 46), y(0, 47),
			z(36), z(58), z(46), z(52));

		if (rounds_and_swapped == 0x100) goto next;

swap:
		for_each_depth()
		s1(x(48), x(49), x(50), x(51), x(52), x(53),
			z(8), z(16), z(22), z(30));
		for_each_depth()
		s2(x(54), x(55), x(56), x(57), x(58), x(59),
			z(12), z(27), z(1), z(17));
		for_each_depth()
		s3(y(39, 60), y(40, 61), y(41, 62),
			y(42, 63), y(43, 64), y(44, 65),
			z(23), z(15), z(29), z(5));
		for_each_depth()
		s4(y(43, 66), y(44, 67), y(45, 68),
			y(46, 69), y(47, 70), y(48, 71),
			z(25), z(19), z(9), z(0));
		for_each_depth()
		s5(x(72), x(73), x(74), x(75), x(76), x(77),
			z(7), z(13), z(24), z(2));
		for_each_depth()
		s6(x(78), x(79), x(80), x(81), x(82), x(83),
			z(3), z(28), z(10), z(18));
		for_each_depth()
		s7(y(55, 84), y(56, 85), y(57, 86),
			y(58, 87), y(59, 88), y(60, 89),
			z(31), z(11), z(21), z(6));
		for_each_depth()
		s8(y(59, 90), y(60, 91), y(61, 92),
			y(62, 93), y(63, 94), y(32, 95),
			z(4), z(26), z(14), z(20));

		k += 96;

		if (--rounds_and_swapped) goto start;
		k -= (0x300 + 48);
		rounds_and_swapped = 0x108;
		if (--iterations) goto swap;
#if DES_bs_mt
		continue;
#else
		return;
#endif

next:
		k -= (0x300 - 48);
		rounds_and_swapped = 8;
		iterations--;
		goto start;

finalize_keys:
		DES_bs_all.keys_changed = 0;
#if DES_bs_mt
		DES_bs_finalize_keys(t);
#else
		DES_bs_finalize_keys();
#endif
		goto body;
	}

}

void DES_bs_crypt(int count, int keys_count)
{
#if DES_bs_mt
	int t, n = (keys_count + (DES_BS_DEPTH - 1)) / DES_BS_DEPTH;
#endif

#ifdef _OPENMP
#pragma omp parallel for default(none) private(t) shared(n, DES_bs_all_p, count, keys_count)
#endif
	for_each_t(n) {
#if DES_BS_EXPAND
		DES_bs_vector *k;
#else
		ARCH_WORD **k;
#endif
		int iterations, rounds_and_swapped;
#if DES_BS_VECTOR_LOOPS
		int depth;
#endif

		if (DES_bs_all.keys_changed)
			goto finalize_keys;

body:
#if DES_bs_mt
		DES_bs_set_salt_for_thread(t, DES_bs_all_by_tnum(-1).salt);
#endif

		{
			vtype zero = vzero;
			DES_bs_clear_block
		}

#if DES_BS_EXPAND
		k = DES_bs_all.KS.v;
#else
		k = DES_bs_all.KS.p;
#endif
		rounds_and_swapped = 8;
		iterations = count;

start:
		for_each_depth()
		s1(x(0), x(1), x(2), x(3), x(4), x(5),
			z(40), z(48), z(54), z(62));
		for_each_depth()
		s2(x(6), x(7), x(8), x(9), x(10), x(11),
			z(44), z(59), z(33), z(49));
		for_each_depth()
		s3(x(12), x(13), x(14), x(15), x(16), x(17),
			z(55), z(47), z(61), z(37));
		for_each_depth()
		s4(x(18), x(19), x(20), x(21), x(22), x(23),
			z(57), z(51), z(41), z(32));
		for_each_depth()
		s5(x(24), x(25), x(26), x(27), x(28), x(29),
			z(39), z(45), z(56), z(34));
		for_each_depth()
		s6(x(30), x(31), x(32), x(33), x(34), x(35),
			z(35), z(60), z(42), z(50));
		for_each_depth()
		s7(x(36), x(37), x(38), x(39), x(40), x(41),
			z(63), z(43), z(53), z(38));
		for_each_depth()
		s8(x(42), x(43), x(44), x(45), x(46), x(47),
			z(36), z(58), z(46), z(52));

		if (rounds_and_swapped == 0x100) goto next;

swap:
		for_each_depth()
		s1(x(48), x(49), x(50), x(51), x(52), x(53),
			z(8), z(16), z(22), z(30));
		for_each_depth()
		s2(x(54), x(55), x(56), x(57), x(58), x(59),
			z(12), z(27), z(1), z(17));
		for_each_depth()
		s3(x(60), x(61), x(62), x(63), x(64), x(65),
			z(23), z(15), z(29), z(5));
		for_each_depth()
		s4(x(66), x(67), x(68), x(69), x(70), x(71),
			z(25), z(19), z(9), z(0));
		for_each_depth()
		s5(x(72), x(73), x(74), x(75), x(76), x(77),
			z(7), z(13), z(24), z(2));
		for_each_depth()
		s6(x(78), x(79), x(80), x(81), x(82), x(83),
			z(3), z(28), z(10), z(18));
		for_each_depth()
		s7(x(84), x(85), x(86), x(87), x(88), x(89),
			z(31), z(11), z(21), z(6));
		for_each_depth()
		s8(x(90), x(91), x(92), x(93), x(94), x(95),
			z(4), z(26), z(14), z(20));

		k += 96;

		if (--rounds_and_swapped) goto start;
		k -= (0x300 + 48);
		rounds_and_swapped = 0x108;
		if (--iterations) goto swap;
#if DES_bs_mt
		continue;
#else
		return;
#endif

next:
		k -= (0x300 - 48);
		rounds_and_swapped = 8;
		if (--iterations) goto start;
#if DES_bs_mt
		continue;
#else
		return;
#endif

finalize_keys:
		DES_bs_all.keys_changed = 0;
#if DES_bs_mt
		DES_bs_finalize_keys(t);
#else
		DES_bs_finalize_keys();
#endif
		goto body;
	}
}

#undef x

#if DES_bs_mt
static MAYBE_INLINE void DES_bs_finalize_keys_LM(int t)
#else
static MAYBE_INLINE void DES_bs_finalize_keys_LM(void)
#endif
{
#if DES_BS_VECTOR_LOOPS_K
	int depth;
#endif

	for_each_depth_k() {
		DES_bs_vector *kp = (DES_bs_vector *)&DES_bs_all.K[0] DEPTH_K;
		int ic;
		for (ic = 0; ic < 7; ic++) {
			DES_bs_vector *vp =
			    (DES_bs_vector *)&DES_bs_all.xkeys.v[ic][0] DEPTH_K;
			LOAD_V
			FINALIZE_NEXT_KEY_BIT_0
			FINALIZE_NEXT_KEY_BIT_1
			FINALIZE_NEXT_KEY_BIT_2
			FINALIZE_NEXT_KEY_BIT_3
			FINALIZE_NEXT_KEY_BIT_4
			FINALIZE_NEXT_KEY_BIT_5
			FINALIZE_NEXT_KEY_BIT_6
			FINALIZE_NEXT_KEY_BIT_7
		}
	}
}

#undef v1
#undef v2
#undef v3
#undef v5
#undef v6
#undef v7

#undef kd
#if DES_BS_VECTOR_LOOPS
#define kd				[depth]
#else
#define kd				[0]
#endif

void DES_bs_crypt_LM(int keys_count)
{
#if DES_bs_mt
	int t, n = (keys_count + (DES_BS_DEPTH - 1)) / DES_BS_DEPTH;
#endif

#ifdef _OPENMP
#pragma omp parallel for default(none) private(t) shared(n, DES_bs_all_p, keys_count)
#endif
	for_each_t(n) {
		ARCH_WORD **k;
		int rounds;
#if DES_BS_VECTOR_LOOPS
		int depth;
#endif

		{
			vtype z = vzero, o = vones;
			DES_bs_set_block_8(0, z, z, z, z, z, z, z, z);
			DES_bs_set_block_8(8, o, o, o, z, o, z, z, z);
			DES_bs_set_block_8(16, z, z, z, z, z, z, z, o);
			DES_bs_set_block_8(24, z, z, o, z, z, o, o, o);
			DES_bs_set_block_8(32, z, z, z, o, z, o, o, o);
			DES_bs_set_block_8(40, z, z, z, z, z, o, z, z);
			DES_bs_set_block_8(48, o, o, z, z, z, z, o, z);
			DES_bs_set_block_8(56, o, z, o, z, o, o, o, o);
		}

#if DES_bs_mt
		DES_bs_finalize_keys_LM(t);
#else
		DES_bs_finalize_keys_LM();
#endif

		k = DES_bs_all.KS.p;
		rounds = 8;

		do {
			for_each_depth()
			s1(y(31, 0), y(0, 1), y(1, 2),
				y(2, 3), y(3, 4), y(4, 5),
				z(40), z(48), z(54), z(62));
			for_each_depth()
			s2(y(3, 6), y(4, 7), y(5, 8),
				y(6, 9), y(7, 10), y(8, 11),
				z(44), z(59), z(33), z(49));
			for_each_depth()
			s3(y(7, 12), y(8, 13), y(9, 14),
				y(10, 15), y(11, 16), y(12, 17),
				z(55), z(47), z(61), z(37));
			for_each_depth()
			s4(y(11, 18), y(12, 19), y(13, 20),
				y(14, 21), y(15, 22), y(16, 23),
				z(57), z(51), z(41), z(32));
			for_each_depth()
			s5(y(15, 24), y(16, 25), y(17, 26),
				y(18, 27), y(19, 28), y(20, 29),
				z(39), z(45), z(56), z(34));
			for_each_depth()
			s6(y(19, 30), y(20, 31), y(21, 32),
				y(22, 33), y(23, 34), y(24, 35),
				z(35), z(60), z(42), z(50));
			for_each_depth()
			s7(y(23, 36), y(24, 37), y(25, 38),
				y(26, 39), y(27, 40), y(28, 41),
				z(63), z(43), z(53), z(38));
			for_each_depth()
			s8(y(27, 42), y(28, 43), y(29, 44),
				y(30, 45), y(31, 46), y(0, 47),
				z(36), z(58), z(46), z(52));

			for_each_depth()
			s1(y(63, 48), y(32, 49), y(33, 50),
				y(34, 51), y(35, 52), y(36, 53),
				z(8), z(16), z(22), z(30));
			for_each_depth()
			s2(y(35, 54), y(36, 55), y(37, 56),
				y(38, 57), y(39, 58), y(40, 59),
				z(12), z(27), z(1), z(17));
			for_each_depth()
			s3(y(39, 60), y(40, 61), y(41, 62),
				y(42, 63), y(43, 64), y(44, 65),
				z(23), z(15), z(29), z(5));
			for_each_depth()
			s4(y(43, 66), y(44, 67), y(45, 68),
				y(46, 69), y(47, 70), y(48, 71),
				z(25), z(19), z(9), z(0));
			for_each_depth()
			s5(y(47, 72), y(48, 73), y(49, 74),
				y(50, 75), y(51, 76), y(52, 77),
				z(7), z(13), z(24), z(2));
			for_each_depth()
			s6(y(51, 78), y(52, 79), y(53, 80),
				y(54, 81), y(55, 82), y(56, 83),
				z(3), z(28), z(10), z(18));
			for_each_depth()
			s7(y(55, 84), y(56, 85), y(57, 86),
				y(58, 87), y(59, 88), y(60, 89),
				z(31), z(11), z(21), z(6));
			for_each_depth()
			s8(y(59, 90), y(60, 91), y(61, 92),
				y(62, 93), y(63, 94), y(32, 95),
				z(4), z(26), z(14), z(20));

			k += 96;
		} while (--rounds);
	}
}
#endif

// one des encryption//
#undef kd
#define kd

void DES_bs_crypt_one(int keys_count)
{
for_each_t(n) {
	DES_bs_vector *k;
	int rounds;
	int i;

	DES_bs_finalize_keys();
#if DES_BS_EXPAND
		k = DES_bs_all.KS.v;
#else
		k = DES_bs_all.KS.p;
#endif

	rounds = 8;

	for(i=0; i<64; i++)
		DES_bs_all.B[i] = Plaintext[i];

	do {
		for_each_depth()
		s1(y(31, 0), y(0, 1), y(1, 2),
			y(2, 3), y(3, 4), y(4, 5),
			z(40), z(48), z(54), z(62));
		for_each_depth()
		s2(y(3, 6), y(4, 7), y(5, 8),
			y(6, 9), y(7, 10), y(8, 11),
			z(44), z(59), z(33), z(49));
		for_each_depth()
		s3(y(7, 12), y(8, 13), y(9, 14),
			y(10, 15), y(11, 16), y(12, 17),
			z(55), z(47), z(61), z(37));
		for_each_depth()
		s4(y(11, 18), y(12, 19), y(13, 20),
			y(14, 21), y(15, 22), y(16, 23),
			z(57), z(51), z(41), z(32));
		for_each_depth()
		s5(y(15, 24), y(16, 25), y(17, 26),
			y(18, 27), y(19, 28), y(20, 29),
			z(39), z(45), z(56), z(34));
		for_each_depth()
		s6(y(19, 30), y(20, 31), y(21, 32),
			y(22, 33), y(23, 34), y(24, 35),
			z(35), z(60), z(42), z(50));
		for_each_depth()
		s7(y(23, 36), y(24, 37), y(25, 38),
			y(26, 39), y(27, 40), y(28, 41),
			z(63), z(43), z(53), z(38));
		for_each_depth()
		s8(y(27, 42), y(28, 43), y(29, 44),
			y(30, 45), y(31, 46), y(0, 47),
			z(36), z(58), z(46), z(52));

		for_each_depth()
		s1(y(63, 48), y(32, 49), y(33, 50),
			y(34, 51), y(35, 52), y(36, 53),
			z(8), z(16), z(22), z(30));
		for_each_depth()
		s2(y(35, 54), y(36, 55), y(37, 56),
			y(38, 57), y(39, 58), y(40, 59),
			z(12), z(27), z(1), z(17));
		for_each_depth()
		s3(y(39, 60), y(40, 61), y(41, 62),
			y(42, 63), y(43, 64), y(44, 65),
			z(23), z(15), z(29), z(5));
		for_each_depth()
		s4(y(43, 66), y(44, 67), y(45, 68),
			y(46, 69), y(47, 70), y(48, 71),
			z(25), z(19), z(9), z(0));
		for_each_depth()
		s5(y(47, 72), y(48, 73), y(49, 74),
			y(50, 75), y(51, 76), y(52, 77),
			z(7), z(13), z(24), z(2));
		for_each_depth()
		s6(y(51, 78), y(52, 79), y(53, 80),
			y(54, 81), y(55, 82), y(56, 83),
			z(3), z(28), z(10), z(18));
		for_each_depth()
		s7(y(55, 84), y(56, 85), y(57, 86),
			y(58, 87), y(59, 88), y(60, 89),
			z(31), z(11), z(21), z(6));
		for_each_depth()
		s8(y(59, 90), y(60, 91), y(61, 92),
			y(62, 93), y(63, 94), y(32, 95),
			z(4), z(26), z(14), z(20));

		k += 96;
	} while (--rounds);
}
}


/*
 * MSCHAPv2_fmt.c -- Microsoft PPP CHAP Extensions, Version 2
 *
 * Written by JoMo-Kun <jmk at foofus.net> in 2010
 * and placed in the public domain.
 *
 * Modified for performance, OMP and utf-8 support
 * by magnum 2010-2011, no rights reserved
 *
 * This algorithm is designed for performing brute-force cracking of the
 * MSCHAPv2 challenge/response sets exchanged during network-based
 * authentication attempts. The captured challenge/response set from these
 * attempts should be stored using the following format:
 *
 * USERNAME:::AUTHENTICATOR CHALLENGE:MSCHAPv2 RESPONSE:PEER CHALLENGE
 * USERNAME::DOMAIN:AUTHENTICATOR CHALLENGE:MSCHAPv2 RESPONSE:PEER CHALLENGE
 * DOMAIN\USERNAME:::AUTHENTICATOR CHALLENGE:MSCHAPv2 RESPONSE:PEER CHALLENGE
 *
 * For example:
 * User:::5B5D7C7D7B3F2F3E3C2C602132262628:82309ECD8D708B5EA08FAA3981CD83544233114A3D85D6DF:21402324255E262A28295F2B3A337C7E
 * domain\fred:::56d64cbe7bad61349a0b752335100eaf:d7d829d9545cef1d631b4e568ffb7586050fa3a4d02dbc0b:7f8a466cff2a6bf0c80218bbf56d76bc
 *
 * http://freeradius.org/rfc/rfc2759.txt
 *
 */
#include "DES_bs.h"
#include <string.h>
#ifdef _OPENMP
#include <omp.h>
#endif

#include "misc.h"
#include "common.h"
#include "formats.h"
#include "options.h"
#include "memory.h"

#include "sha.h"

//Initial permutation//
extern unsigned char DES_IP[64];

#ifndef uchar
#define uchar unsigned char
#endif

#define FORMAT_LABEL         "mschapv2"
#define FORMAT_NAME          "MSCHAPv2 C/R MD4 DES"
#define ALGORITHM_NAME       "mschapv2"
#define BENCHMARK_COMMENT    ""
#define BENCHMARK_LENGTH     0
#define PLAINTEXT_LENGTH     125 /* lmcons.h - PWLEN (256) ? 127 ? */
#define USERNAME_LENGTH      256 /* lmcons.h - UNLEN (256) / LM20_UNLEN (20) */
#define DOMAIN_LENGTH        15  /* lmcons.h - CNLEN / DNLEN */
#define PARTIAL_BINARY_SIZE  8
#define BINARY_SIZE          24
#define CHALLENGE_LENGTH     64
#define SALT_SIZE            8
#define CIPHERTEXT_LENGTH    48
#define TOTAL_LENGTH         13 + USERNAME_LENGTH + CHALLENGE_LENGTH + CIPHERTEXT_LENGTH

// these may be altered in init() if running OMP
#define MIN_KEYS_PER_CRYPT	1
#define THREAD_RATIO		256
#ifdef _OPENMP
#define MAX_KEYS_PER_CRYPT	0x10000
#else
//#define MAX_KEYS_PER_CRYPT	THREAD_RATIO

//set to DES_BS_DEPTH for bitsliced des//
#define MAX_KEYS_PER_CRYPT      DES_BS_DEPTH
#endif

static struct fmt_tests tests[] = {
  {"$MSCHAPv2$4c092fd3fd98236502e8591100046326$b912ce522524d33123a982cf330a57f8e953fa7974042b5d$6a4915d0ce61d42be533640a75391925$1111", "2222"},
  {"$MSCHAPv2$5B5D7C7D7B3F2F3E3C2C602132262628$82309ECD8D708B5EA08FAA3981CD83544233114A3D85D6DF$21402324255E262A28295F2B3A337C7E$User", "clientPass"},
  {"$MSCHAPv2$d07054459a1fdbc266a006f0220e6fac$33c8331a9b03b7e003f09dd253d740a2bead544143cc8bde$3545cb1d89b507a5de104435e81b14a4$testuser1", "Cricket8"},
  {"$MSCHAPv2$56d64cbe7bad61349a0b752335100eaf$d7d829d9545cef1d631b4e568ffb7586050fa3a4d02dbc0b$7f8a466cff2a6bf0c80218bbf56d76bc$fred", "OMG!BBQ!11!one"}, /* domain\fred */
  {"$MSCHAPv2$b3c42db475b881d3c52ff3923d7b3bf8$f07c7a4eb391f5debe32d814679a5a69661b86b33227c4f8$6321f8649b971bd11ce8d5cb22a4a738$bOb", "asdblahblahblahblahblahblahblahblah"}, /* WorkGroup\bOb */
  {"$MSCHAPv2$d94e7c7972b2376b28c268583e162de7$eba25a3b04d2c7085d01f842e2befc91745c40db0f792356$0677ca7318fd7f65ae1b4f58c9f4f400$lameuser", ""}, /* no password */

  {"", "clientPass",     {"User",        "", "",    "5B5D7C7D7B3F2F3E3C2C602132262628", "82309ECD8D708B5EA08FAA3981CD83544233114A3D85D6DF", "21402324255E262A28295F2B3A337C7E"} },
  {"", "Cricket8",       {"testuser1",   "", "",    "d07054459a1fdbc266a006f0220e6fac", "33c8331a9b03b7e003f09dd253d740a2bead544143cc8bde", "3545cb1d89b507a5de104435e81b14a4"} },
  {"", "OMG!BBQ!11!one", {"domain\\fred", "", "",   "56d64cbe7bad61349a0b752335100eaf", "d7d829d9545cef1d631b4e568ffb7586050fa3a4d02dbc0b", "7f8a466cff2a6bf0c80218bbf56d76bc"} }, /* domain\fred */
  {"", "",               {"lameuser", "", "domain", "d94e7c7972b2376b28c268583e162de7", "eba25a3b04d2c7085d01f842e2befc91745c40db0f792356", "0677ca7318fd7f65ae1b4f58c9f4f400"} }, /* no password */
  {"", "asdblahblahblahblahblahblahblahblah", {"WorkGroup\\bOb", "", "", "b3c42db475b881d3c52ff3923d7b3bf8", "f07c7a4eb391f5debe32d814679a5a69661b86b33227c4f8", "6321f8649b971bd11ce8d5cb22a4a738"} }, /* WorkGroup\bOb */

  {NULL}
};

static uchar (*saved_plain)[PLAINTEXT_LENGTH + 1];
static int (*saved_len);
static uchar (*saved_key)[21];
static uchar (*output)[PARTIAL_BINARY_SIZE];
static uchar *challenge;
static int keys_prepared;

static void mschapv2_set_salt(void *salt);

#include "unicode.h"

static void init(struct fmt_main *pFmt)
{
#ifdef _OPENMP
	int n = MIN_KEYS_PER_CRYPT * omp_get_max_threads();
	if (n < MIN_KEYS_PER_CRYPT)
		n = MIN_KEYS_PER_CRYPT;
	if (n > MAX_KEYS_PER_CRYPT)
		n = MAX_KEYS_PER_CRYPT;
	pFmt->params.min_keys_per_crypt = n;
	n = n * (n << 1) * THREAD_RATIO;
	if (n > MAX_KEYS_PER_CRYPT)
		n = MAX_KEYS_PER_CRYPT;
	pFmt->params.max_keys_per_crypt = n;
#endif
	saved_plain = mem_calloc_tiny(sizeof(*saved_plain) * pFmt->params.max_keys_per_crypt, MEM_ALIGN_NONE);
	saved_len = mem_calloc_tiny(sizeof(*saved_len) * pFmt->params.max_keys_per_crypt, MEM_ALIGN_WORD);
	saved_key = mem_calloc_tiny(sizeof(*saved_key) * pFmt->params.max_keys_per_crypt, MEM_ALIGN_NONE);
	output = mem_alloc_tiny(sizeof(*output) * pFmt->params.max_keys_per_crypt, MEM_ALIGN_WORD);

	//LM =2 for one DES encryption//
	DES_bs_init(2, DES_bs_cpt);

}
static int mschapv2_valid(char *ciphertext, struct fmt_main *pFmt)
{
  char *pos, *pos2;

  if (ciphertext == NULL) return 0;
  else if (strncmp(ciphertext, "$MSCHAPv2$", 10)!=0) return 0;

  /* Validate Authenticator/Server Challenge Length */
  pos = &ciphertext[10];
  for (pos2 = pos; strncmp(pos2, "$", 1) != 0; pos2++)
    if (atoi16[ARCH_INDEX(*pos2)] == 0x7F)
      return 0;

  if ( !(*pos2 && (pos2 - pos == CHALLENGE_LENGTH / 2)) )
    return 0;

  /* Validate MSCHAPv2 Response Length */
  pos2++; pos = pos2;
  for (; strncmp(pos2, "$", 1) != 0; pos2++)
    if (atoi16[ARCH_INDEX(*pos2)] == 0x7F)
      return 0;

  if ( !(*pos2 && (pos2 - pos == CIPHERTEXT_LENGTH)) )
    return 0;

  /* Validate Peer/Client Challenge Length */
  pos2++; pos = pos2;
  for (; strncmp(pos2, "$", 1) != 0; pos2++)
    if (atoi16[ARCH_INDEX(*pos2)] == 0x7F)
      return 0;

  if ( !(*pos2 && (pos2 - pos == CHALLENGE_LENGTH / 2)) )
    return 0;

  /* Validate Username Length */
  if (strlen(++pos2) > USERNAME_LENGTH)
    return 0;

  return 1;
}

static char *mschapv2_prepare(char *split_fields[10], struct fmt_main *pFmt)
{
	char *username, *cp;

	if (!strncmp(split_fields[1], "$MSCHAPv2$", 10))
		return split_fields[1];
	if (!split_fields[0]||!split_fields[3]||!split_fields[4]||!split_fields[5])
		return split_fields[1];
	if (strlen(split_fields[3]) != CHALLENGE_LENGTH/2)
		return split_fields[1];
	if (strlen(split_fields[4]) != CIPHERTEXT_LENGTH)
		return split_fields[1];
	if (strlen(split_fields[5]) != CHALLENGE_LENGTH/2)
		return split_fields[1];

    /* DOMAIN\USERNAME -or - USERNAME -- ignore DOMAIN */
    if ((username = strstr(split_fields[0], "\\")) == NULL)
      username = split_fields[0];
    else
      username++;
	cp = mem_alloc(1+8+1+strlen(split_fields[3])+1+strlen(split_fields[4])+1+strlen(split_fields[5])+1+strlen(username)+1);
	sprintf(cp, "$MSCHAPv2$%s$%s$%s$%s", split_fields[3], split_fields[4], split_fields[5], username);
	if (mschapv2_valid(cp,pFmt)) {
		char *cp2 = str_alloc_copy(cp);
		MEM_FREE(cp);
		return cp2;
	}
	MEM_FREE(cp);
	return split_fields[1];
}

static char *mschapv2_split(char *ciphertext, int index)
{
  static char *out;
  int i;

  if (!out) out = mem_alloc_tiny(TOTAL_LENGTH + 1, MEM_ALIGN_WORD);

  memset(out, 0, TOTAL_LENGTH + 1);
  memcpy(out, ciphertext, strlen(ciphertext));

  /* convert hashes to lower-case - exclude $MSCHAPv2 and USERNAME */
  for (i = 10; i < 10 + 16*2 + 1 + 24*2 + 1 + 16*2; i++)
    if (out[i] >= 'A' && out[i] <= 'Z')
      out[i] |= 0x20;

  return out;
}

static void *mschapv2_get_binary(char *ciphertext)
{
  static uchar *binary;
  int i;

  if (!binary) binary = mem_alloc_tiny(BINARY_SIZE, MEM_ALIGN_WORD);

  ciphertext += 10 + 16*2 + 1; /* Skip - $MSCHAPv2$, Authenticator Challenge */

  for (i=0; i<BINARY_SIZE; i++)
  {
    binary[i] = (atoi16[ARCH_INDEX(ciphertext[i*2])])<<4;
    binary[i] |= (atoi16[ARCH_INDEX(ciphertext[i*2+1])]);
  }
  return binary;
}

static inline void setup_des_key(unsigned char key_56[], int index)
{
  char key[8];

  //left shifted by one to bring key in openssl format//
  key[0] = (key_56[0])>>1;
  key[1] = ((key_56[0] << 7) | (key_56[1] >> 1)) >>1;
  key[2] = ((key_56[1] << 6) | (key_56[2] >> 2)) >>1;
  key[3] = ((key_56[2] << 5) | (key_56[3] >> 3)) >>1;
  key[4] = ((key_56[3] << 4) | (key_56[4] >> 4)) >>1;
  key[5] = ((key_56[4] << 3) | (key_56[5] >> 5)) >>1;
  key[6] = ((key_56[5] << 2) | (key_56[6] >> 6)) >>1;
  key[7] = ((key_56[6] << 1)) >>1;

  DES_bs_set_key((char*)key, index);
}

//generates output buffer//
void generate_output(int count)
{
	int i, j;
	char *cipher;
	char temp;

	unsigned char inv_ip[64] = {
		39, 7, 47, 15, 55, 23, 63, 31,
		38, 6, 46, 14, 54, 22, 62, 30,
		37, 5, 45, 13, 53, 21, 61, 29,
		36, 4, 44, 12, 52, 20, 60, 28,
		35, 3, 43, 11, 51, 19, 59, 27,
		34, 2, 42, 10, 50, 18, 58, 26,
		33, 1, 41, 9,  49, 17, 57, 25,
		32, 0, 40, 8,  48, 16, 56, 24,
	};

	for(i=0; i<count; i++)
	{
		cipher = output[i];
		memset(cipher, 0, 8);
		for(j=0 ;j<64; j++)
		{
			temp = (unsigned char)((DES_bs_all.B[inv_ip[j]] >> i) & 0x01);
			cipher[j>>3] |= temp << (7 - j%8);
		}

	}

}

/* Calculate the MSCHAPv2 response for the given challenge, using the
   specified authentication identity (username), password and client
   nonce.
*/

static void mschapv2_crypt_all(int count)
{

	int i;

	if (!keys_prepared) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
		for(i=0; i<count; i++) {
			int len;
			/* Generate 16-byte NTLM hash */
			len = E_md4hash((uchar *) saved_plain[i], saved_len[i], saved_key[i]);

			if (len <= 0)
				saved_plain[i][-len] = 0; // match if it was truncated

			/* NULL-padding the 16-byte hash to 21-bytes is made in cmp_exact if needed */
		}
		keys_prepared = 1;
	}

#ifdef _OPENMP
#pragma omp parallel for default(none) private(i, ks) shared(count, output, challenge, saved_key)
#endif
	//bitsliced des encryption//

	for(i=0; i<count; i++)
    	setup_des_key(saved_key[i], i);

	DES_bs_crypt_one(count);
	generate_output(count);

}


static int mschapv2_cmp_all(void *binary, int count)
{
	int index = 0;
	for(; index<count; index++)
		if (!memcmp(output[index], binary, PARTIAL_BINARY_SIZE))
			return 1;
	return 0;
}

static int mschapv2_cmp_one(void *binary, int index)
{
	return (!memcmp(output[index], binary, PARTIAL_BINARY_SIZE));
}

static int mschapv2_cmp_exact(char *source, int index)
{
	uchar binary[24];
	void *salt;
	int i;
	salt = challenge;

	/* NULL-pad 16-byte NTLM hash to 21-bytes (postponed until now) */
	memset(&saved_key[index][16], 0, 5);

	/* Split resultant value into three 7-byte thirds
	   DES-encrypt challenge using each third as a key
	   Concatenate three 8-byte resulting values to form 24-byte LM response
	*/
	//bitsliced des//
	mschapv2_set_salt(salt);
	setup_des_key(saved_key[index], 0);
	DES_bs_crypt_one(0);
	generate_output(1);
	for(i=0;i<8;i++)
		binary[i] = output[0][i];

	mschapv2_set_salt(salt);
	setup_des_key(&saved_key[index][7], 0);
	DES_bs_crypt_one(0);
	generate_output(1);
	for(i=0;i<8;i++)
		binary[8 + i] = output[0][i];

	mschapv2_set_salt(salt);
	setup_des_key(&saved_key[index][14], 0);
	DES_bs_crypt_one(0);
	generate_output(1);
	for(i=0;i<8;i++)
		binary[16 + i] = output[0][i];

	return !memcmp(binary, mschapv2_get_binary(source), BINARY_SIZE);
}

/* We're essentially using three salts, but we're going to generate a single value here for later use.
   |Peer/Client Challenge (8 Bytes)|Authenticator/Server Challenge (8 Bytes)|Username (<=256)|
*/
static void *mschapv2_get_salt(char *ciphertext)
{
  static unsigned char binary_salt[SALT_SIZE];
  static SHA_CTX ctx;
  unsigned char tmp[16];
  int i;
  char *pos = NULL;
  unsigned char digest[20];

  memset(binary_salt, 0, SALT_SIZE);
  memset(digest, 0, 20);
  SHA1_Init(&ctx);

  /* Peer Challenge */
  pos = ciphertext + 10 + 16*2 + 1 + 24*2 + 1; /* Skip $MSCHAPv2$, Authenticator Challenge and Response Hash */

  memset(tmp, 0, 16);
  for (i = 0; i < 16; i++)
    tmp[i] = (atoi16[ARCH_INDEX(pos[i*2])] << 4) + atoi16[ARCH_INDEX(pos[i*2+1])];

  SHA1_Update(&ctx, tmp, 16);

  /* Authenticator Challenge */
  pos = ciphertext + 10; /* Skip $MSCHAPv2$ */

  memset(tmp, 0, 16);
  for (i = 0; i < 16; i++)
    tmp[i] = (atoi16[ARCH_INDEX(pos[i*2])] << 4) + atoi16[ARCH_INDEX(pos[i*2+1])];

  SHA1_Update(&ctx, tmp, 16);

  /* Username - Only the user name (as presented by the peer and
     excluding any prepended domain name) is used as input to SHAUpdate()
  */
  pos = ciphertext + 10 + 16*2 + 1 + 24*2 + 1 + 16*2 + 1; /* Skip $MSCHAPv2$, Authenticator, Response and Peer */
  SHA1_Update(&ctx, pos, strlen(pos));

  SHA1_Final(digest, &ctx);
  memcpy(binary_salt, digest, SALT_SIZE);

  return (void*)binary_salt;
}

static void mschapv2_set_salt(void *salt)
{
	int i,j,cnt, temp;
	challenge = salt; 

	//sets plaintext, plaintext is same for all keys due to brute force//
	for (i = 0; i < 64; i++) {
			cnt = DES_IP[i ^ 0x20];
			j = (int)((challenge[cnt >> 3] >> (7 - (cnt & 7))) & 1);
			if(j==0)
				Plaintext[i] = 0;
			else
				Plaintext[i] = -1;
		}

}

static void mschapv2_set_key(char *key, int index)
{
	saved_len[index] = strlen(key);
	memcpy(saved_plain[index], key, saved_len[index] + 1);
	keys_prepared = 0;
}

static char *mschapv2_get_key(int index)
{
	return (char *)saved_plain[index];
}

static int salt_hash(void *salt)
{
	return *(ARCH_WORD_32 *)salt & (SALT_HASH_SIZE - 1);
}

static int binary_hash_0(void *binary)
{
	return *(ARCH_WORD_32 *)binary & 0xF;
}

static int binary_hash_1(void *binary)
{
	return *(ARCH_WORD_32 *)binary & 0xFF;
}

static int binary_hash_2(void *binary)
{
	return *(ARCH_WORD_32 *)binary & 0xFFF;
}

static int binary_hash_3(void *binary)
{
	return *(ARCH_WORD_32 *)binary & 0xFFFF;
}

static int binary_hash_4(void *binary)
{
	return *(ARCH_WORD_32 *)binary & 0xFFFFF;
}

static int get_hash_0(int index)
{
	return *(ARCH_WORD_32 *)output[index] & 0xF;
}

static int get_hash_1(int index)
{
	return *(ARCH_WORD_32 *)output[index] & 0xFF;
}

static int get_hash_2(int index)
{
	return *(ARCH_WORD_32 *)output[index] & 0xFFF;
}

static int get_hash_3(int index)
{
	return *(ARCH_WORD_32 *)output[index] & 0xFFFF;
}

static int get_hash_4(int index)
{
	return *(ARCH_WORD_32 *)output[index] & 0xFFFFF;
}

struct fmt_main fmt_MSCHAPv2 = {
  {
    FORMAT_LABEL,
    FORMAT_NAME,
    ALGORITHM_NAME,
    BENCHMARK_COMMENT,
    BENCHMARK_LENGTH,
    PLAINTEXT_LENGTH,
    BINARY_SIZE,
    SALT_SIZE,
    MIN_KEYS_PER_CRYPT,
    MAX_KEYS_PER_CRYPT,
    FMT_CASE | FMT_8_BIT | FMT_SPLIT_UNIFIES_CASE | FMT_OMP | FMT_UNICODE | FMT_UTF8,
    tests
  }, {
    init,
	mschapv2_prepare,
    mschapv2_valid,
    mschapv2_split,
    mschapv2_get_binary,
    mschapv2_get_salt,
    {
	    binary_hash_0,
	    binary_hash_1,
	    binary_hash_2,
	    binary_hash_3,
	    binary_hash_4
    },
    salt_hash,
    mschapv2_set_salt,
    mschapv2_set_key,
    mschapv2_get_key,
    fmt_default_clear_keys,
    mschapv2_crypt_all,
    {
	    get_hash_0,
	    get_hash_1,
	    get_hash_2,
	    get_hash_3,
	    get_hash_4
    },
    mschapv2_cmp_all,
    mschapv2_cmp_one,
    mschapv2_cmp_exact
  }
};

[ CONTENT OF TYPE application/octet-stream SKIPPED ]

[ CONTENT OF TYPE application/octet-stream SKIPPED ]

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ