diff -urpN john-1.7.8-jumbo-6a/src/pkzip_fmt_plug.c john-1.7.8-jumbo-6b/src/pkzip_fmt_plug.c --- john-1.7.8-jumbo-6a/src/pkzip_fmt_plug.c 2011-09-01 01:24:51.703125000 +0000 +++ john-1.7.8-jumbo-6b/src/pkzip_fmt_plug.c 2011-09-01 04:46:09.718750000 +0000 @@ -710,6 +710,7 @@ static int decomp_validate(u8 *inp, int if (salt->compType == 0) { /* file is stored, so simply CRC the data) */ if (inplen != outlen) + return 0; for (i = 0; i < inplen; ++i) crc = pkzip_crc32(crc,inp[i]); return ~crc == crcExpect; @@ -771,6 +772,46 @@ static int decomp_validate(u8 *inp, int return 1; } +const char exBytesUTF8[64] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +static int isLegalUTF8_char(const u8 *source, int length) { + u8 a; + int len; + const u8 *srcptr; + + if (*source < 0xC0) + return 1; + len = exBytesUTF8[*source&0x3f]; + srcptr = source+len; + if (len+1 > length) + return -1; + + switch (len) { + default: return -1; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return -1; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return -1; + case 2: if ((a = (*--srcptr)) > 0xBF) return -1; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return -1; break; + case 0xED: if (a > 0x9F) return -1; break; + case 0xF0: if (a < 0x90) return -1; break; + case 0xF4: if (a > 0x8F) return -1; break; + default: if (a < 0x80) return -1; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return -1; + } + if (*source > 0xF4) return -1; + return len+1; +} + + static int decomp_validate_ascii(u8 *inp, u8 *out, int inplen, int outlen) { z_stream strm; @@ -809,18 +850,30 @@ static int decomp_validate_ascii(u8 *inp inflateEnd(&strm); - if (ret != Z_OK || strm.total_out < (inplen/2 + (inplen>>2))) + if (ret != Z_OK || strm.total_out < 30) return 0; for (i = 0; i < strm.total_out-1; ++i) { - if (out[i] > 0x7E) - return 0; - if (out[i] < 0x20) { + if (out[i] > 0x7E) { + // first check to 'see' if this is a valid utf8 character. If so, let it 'pass'. + if (out[i] > 0xC0) { + int len; + if(i > strm.total_out-4) + return 1; + len = isLegalUTF8_char(&out[i], 5); + if (len < 0) return 0; + i += (len-1); + } + else if (out[i] != 0xA9) + return 0; + } else if (out[i] < 0x20) { /* we do not need to deal with DOS EOF char 0x1a, since we will never have the 'end' of the file */ if (out[i]!='\n' && out[i]!='\r' && out[i]!='\t') return 0; } } + +//printf ("strm.total_out=%03d ", strm.total_out); #ifdef ZIP_DEBUG salt->ascii_ucmp[strm.total_out-1] = 0; printf ("ASCII find: decoded to: len=%ld [%s]\n", strm.total_out, salt->ascii_ucmp); diff -urpN john-1.7.8-jumbo-6a/src/zip2john.c john-1.7.8-jumbo-6b/src/zip2john.c --- john-1.7.8-jumbo-6a/src/zip2john.c 2011-09-01 01:24:51.703125000 +0000 +++ john-1.7.8-jumbo-6b/src/zip2john.c 2011-09-01 02:44:15.093750000 +0000 @@ -294,7 +294,7 @@ static void process_old_zip(const char * filename[filename_length] = 0; // we only handle implode or store. - if (version == 0x14 && (flags & 1) && (compression_method == 8 || compression_method == 0)) { + if ( (version == 0x14||version==0xA) && (flags & 1) && (compression_method == 8 || compression_method == 0)) { uint16_t extra_len_used = 0; if (flags & 8) { while (extra_len_used < extrafield_length) { @@ -326,7 +326,7 @@ static void process_old_zip(const char * "%s->%s PKZIP Encr:%s%s cmplen=%d, decmplen=%d, crc=%X\n", fname, filename, two_byte_check?" 2b chk,":"", check_in_crc?"":" TS_chk,", compressed_size, uncompressed_size, crc); - if (!checksum_only && uncompressed_size > 48 && uncompressed_size < best_size) { + if (!checksum_only && uncompressed_size > 24 && uncompressed_size < best_size) { best_size=uncompressed_size; best_off=ftell(fp); best_crc=crc; @@ -368,7 +368,7 @@ static void process_old_zip(const char * fseek(fp, compressed_size, SEEK_CUR); } else { - printf("%s->%s is not encrypted, or stored with non-handled compression type\n", fname, filename); + fprintf(stderr, "%s->%s is not encrypted, or stored with non-handled compression type\n", fname, filename); fseek(fp, extrafield_length, SEEK_CUR); fseek(fp, compressed_size, SEEK_CUR); } @@ -403,9 +403,9 @@ print_and_cleanup:; *best_fname = 0; *best_str_off = 0; - if (best_cmp_size < 90) // in this case, simply skip doing ANY ascii checking, and run right to the final checking. + if (best_cmp_size < 128+12) // in this case, simply skip doing ANY ascii checking, and run right to the final checking. { - Flag_bits &= (~4); + Flag_bits &= (~2); AD = ""; *sAL = 0; *sAT = 0; @@ -420,17 +420,17 @@ print_and_cleanup:; } int usage() { - printf("Usage: zip2john [options] [zip files]\n"); - printf("\tOptions (for 'old' PKZIP encrypted files):\n"); - printf("\t -a=filename This is a 'known' ASCII file\n"); - printf("\t Using 'ascii' mode is a serious speedup, IF all files are larger, and\n"); - printf("\t you KNOW that at least one of them starts out as 'pure' ASCII data\n"); - printf("\t -co This will create a 'checksum only' hash. If there are many encrypted\n"); - printf("\t files in the .zip file, then this may be an option, and there will be\n"); - printf("\t enough data that false possitives will not be seen. If the .zip is 2\n"); - printf("\t byte checksums, and there are 8 of them, then only 1 out of 2^128 chance\n"); - printf("\t that a bad password could sneak through. If there are 8 1-byte hashes,\n"); - printf("\t there is only 1 in 2^64 chance of a false positive.\n"); + fprintf(stderr, "Usage: zip2john [options] [zip files]\n"); + fprintf(stderr, "\tOptions (for 'old' PKZIP encrypted files):\n"); + fprintf(stderr, "\t -a=filename This is a 'known' ASCII file\n"); + fprintf(stderr, "\t Using 'ascii' mode is a serious speedup, IF all files are larger, and\n"); + fprintf(stderr, "\t you KNOW that at least one of them starts out as 'pure' ASCII data\n"); + fprintf(stderr, "\t -co This will create a 'checksum only' hash. If there are many encrypted\n"); + fprintf(stderr, "\t files in the .zip file, then this may be an option, and there will be\n"); + fprintf(stderr, "\t enough data that false possitives will not be seen. If the .zip is 2\n"); + fprintf(stderr, "\t byte checksums, and there are 8 of them, then only 1 out of 2^128 chance\n"); + fprintf(stderr, "\t that a bad password could sneak through. If there are 8 1-byte hashes,\n"); + fprintf(stderr, "\t there is only 1 in 2^64 chance of a false positive.\n"); return 0; } int zip2john(int argc, char **argv)