/* Draft implementation of library for bitslice based on macros */ /* Targets only 32 and 64 bit numbers for bitslice */ /* Copyright © 2015 Aleksey Cherepanov */ /* Redistribution and use in source and binary forms, with or without */ /* modification, are permitted. */ /* TODO: make definitions for non-bitslice variant too */ /* TODO: implement 64 bit variant */ /* TODO: implement various ops */ #include #include /* perl -e 'print "op(r ## _bit$_, a ## _bit$_, b ## _bit$_); \\\n" for 0 .. 31' */ #define f_regular_op32(r, a, b, op) \ op(r ## _bit0, a ## _bit0, b ## _bit0); \ op(r ## _bit1, a ## _bit1, b ## _bit1); \ op(r ## _bit2, a ## _bit2, b ## _bit2); \ op(r ## _bit3, a ## _bit3, b ## _bit3); \ op(r ## _bit4, a ## _bit4, b ## _bit4); \ op(r ## _bit5, a ## _bit5, b ## _bit5); \ op(r ## _bit6, a ## _bit6, b ## _bit6); \ op(r ## _bit7, a ## _bit7, b ## _bit7); \ op(r ## _bit8, a ## _bit8, b ## _bit8); \ op(r ## _bit9, a ## _bit9, b ## _bit9); \ op(r ## _bit10, a ## _bit10, b ## _bit10); \ op(r ## _bit11, a ## _bit11, b ## _bit11); \ op(r ## _bit12, a ## _bit12, b ## _bit12); \ op(r ## _bit13, a ## _bit13, b ## _bit13); \ op(r ## _bit14, a ## _bit14, b ## _bit14); \ op(r ## _bit15, a ## _bit15, b ## _bit15); \ op(r ## _bit16, a ## _bit16, b ## _bit16); \ op(r ## _bit17, a ## _bit17, b ## _bit17); \ op(r ## _bit18, a ## _bit18, b ## _bit18); \ op(r ## _bit19, a ## _bit19, b ## _bit19); \ op(r ## _bit20, a ## _bit20, b ## _bit20); \ op(r ## _bit21, a ## _bit21, b ## _bit21); \ op(r ## _bit22, a ## _bit22, b ## _bit22); \ op(r ## _bit23, a ## _bit23, b ## _bit23); \ op(r ## _bit24, a ## _bit24, b ## _bit24); \ op(r ## _bit25, a ## _bit25, b ## _bit25); \ op(r ## _bit26, a ## _bit26, b ## _bit26); \ op(r ## _bit27, a ## _bit27, b ## _bit27); \ op(r ## _bit28, a ## _bit28, b ## _bit28); \ op(r ## _bit29, a ## _bit29, b ## _bit29); \ op(r ## _bit30, a ## _bit30, b ## _bit30); \ op(r ## _bit31, a ## _bit31, b ## _bit31); /* Target: regular ops and unsigned long long (64 bit) */ #define f_single_xor(r, a, b) r = a ^ b #define f_single_declare(r, unused1, unused2) unsigned long long r #define fxor32(r, a, b) f_regular_op32(r, a, b, f_single_xor) /* We abuse f_regular_op32... */ #define f_declare32(var) f_regular_op32(var, unused, unused, f_single_declare) /* perl -e 'print "v$_, " for 0 .. 63' */ /* perl -e 'print "((v$_ & (1ULL << bit_number)) >> (bit_number - $_)) | " for 0 .. 63' */ /* NOTE: bit_number should be a constant number (not even constant expression). */ #define f_single_pack32(var, bit_number, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) \ var ## _bit ## bit_number = (((v0 & (1ULL << bit_number)) >> bit_number) << 0) | (((v1 & (1ULL << bit_number)) >> bit_number) << 1) | (((v2 & (1ULL << bit_number)) >> bit_number) << 2) | (((v3 & (1ULL << bit_number)) >> bit_number) << 3) | (((v4 & (1ULL << bit_number)) >> bit_number) << 4) | (((v5 & (1ULL << bit_number)) >> bit_number) << 5) | (((v6 & (1ULL << bit_number)) >> bit_number) << 6) | (((v7 & (1ULL << bit_number)) >> bit_number) << 7) | (((v8 & (1ULL << bit_number)) >> bit_number) << 8) | (((v9 & (1ULL << bit_number)) >> bit_number) << 9) | (((v10 & (1ULL << bit_number)) >> bit_number) << 10) | (((v11 & (1ULL << bit_number)) >> bit_number) << 11) | (((v12 & (1ULL << bit_number)) >> bit_number) << 12) | (((v13 & (1ULL << bit_number)) >> bit_number) << 13) | (((v14 & (1ULL << bit_number)) >> bit_number) << 14) | (((v15 & (1ULL << bit_number)) >> bit_number) << 15) | (((v16 & (1ULL << bit_number)) >> bit_number) << 16) | (((v17 & (1ULL << bit_number)) >> bit_number) << 17) | (((v18 & (1ULL << bit_number)) >> bit_number) << 18) | (((v19 & (1ULL << bit_number)) >> bit_number) << 19) | (((v20 & (1ULL << bit_number)) >> bit_number) << 20) | (((v21 & (1ULL << bit_number)) >> bit_number) << 21) | (((v22 & (1ULL << bit_number)) >> bit_number) << 22) | (((v23 & (1ULL << bit_number)) >> bit_number) << 23) | (((v24 & (1ULL << bit_number)) >> bit_number) << 24) | (((v25 & (1ULL << bit_number)) >> bit_number) << 25) | (((v26 & (1ULL << bit_number)) >> bit_number) << 26) | (((v27 & (1ULL << bit_number)) >> bit_number) << 27) | (((v28 & (1ULL << bit_number)) >> bit_number) << 28) | (((v29 & (1ULL << bit_number)) >> bit_number) << 29) | (((v30 & (1ULL << bit_number)) >> bit_number) << 30) | (((v31 & (1ULL << bit_number)) >> bit_number) << 31) | (((v32 & (1ULL << bit_number)) >> bit_number) << 32) | (((v33 & (1ULL << bit_number)) >> bit_number) << 33) | (((v34 & (1ULL << bit_number)) >> bit_number) << 34) | (((v35 & (1ULL << bit_number)) >> bit_number) << 35) | (((v36 & (1ULL << bit_number)) >> bit_number) << 36) | (((v37 & (1ULL << bit_number)) >> bit_number) << 37) | (((v38 & (1ULL << bit_number)) >> bit_number) << 38) | (((v39 & (1ULL << bit_number)) >> bit_number) << 39) | (((v40 & (1ULL << bit_number)) >> bit_number) << 40) | (((v41 & (1ULL << bit_number)) >> bit_number) << 41) | (((v42 & (1ULL << bit_number)) >> bit_number) << 42) | (((v43 & (1ULL << bit_number)) >> bit_number) << 43) | (((v44 & (1ULL << bit_number)) >> bit_number) << 44) | (((v45 & (1ULL << bit_number)) >> bit_number) << 45) | (((v46 & (1ULL << bit_number)) >> bit_number) << 46) | (((v47 & (1ULL << bit_number)) >> bit_number) << 47) | (((v48 & (1ULL << bit_number)) >> bit_number) << 48) | (((v49 & (1ULL << bit_number)) >> bit_number) << 49) | (((v50 & (1ULL << bit_number)) >> bit_number) << 50) | (((v51 & (1ULL << bit_number)) >> bit_number) << 51) | (((v52 & (1ULL << bit_number)) >> bit_number) << 52) | (((v53 & (1ULL << bit_number)) >> bit_number) << 53) | (((v54 & (1ULL << bit_number)) >> bit_number) << 54) | (((v55 & (1ULL << bit_number)) >> bit_number) << 55) | (((v56 & (1ULL << bit_number)) >> bit_number) << 56) | (((v57 & (1ULL << bit_number)) >> bit_number) << 57) | (((v58 & (1ULL << bit_number)) >> bit_number) << 58) | (((v59 & (1ULL << bit_number)) >> bit_number) << 59) | (((v60 & (1ULL << bit_number)) >> bit_number) << 60) | (((v61 & (1ULL << bit_number)) >> bit_number) << 61) | (((v62 & (1ULL << bit_number)) >> bit_number) << 62) | (((v63 & (1ULL << bit_number)) >> bit_number) << 63) /* perl -e '@bits = 0 .. 31; $a = join ", ", map { "v$_" } 0 .. 63; print "f_single_pack32(var, $_, $a); \\\n" for @bits' */ #define f_pack32(var, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) \ f_single_pack32(var, 0, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 1, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 2, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 3, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 4, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 5, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 6, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 7, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 8, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 9, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 10, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 11, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 12, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 13, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 14, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 16, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 17, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 18, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 19, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 20, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 21, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 22, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 23, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 24, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 25, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 26, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 27, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 28, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 29, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 30, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); \ f_single_pack32(var, 31, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63); /* r = (var << 3) | (var >> (32 - 3)) */ /* perl -e 'for (0 .. 31) { $a = ($_ + 3) % 32; print "r ## _bit$a = var ## _bit$_; \\\n" }' */ #define frotate3left32(r, var) \ r ## _bit3 = var ## _bit0; \ r ## _bit4 = var ## _bit1; \ r ## _bit5 = var ## _bit2; \ r ## _bit6 = var ## _bit3; \ r ## _bit7 = var ## _bit4; \ r ## _bit8 = var ## _bit5; \ r ## _bit9 = var ## _bit6; \ r ## _bit10 = var ## _bit7; \ r ## _bit11 = var ## _bit8; \ r ## _bit12 = var ## _bit9; \ r ## _bit13 = var ## _bit10; \ r ## _bit14 = var ## _bit11; \ r ## _bit15 = var ## _bit12; \ r ## _bit16 = var ## _bit13; \ r ## _bit17 = var ## _bit14; \ r ## _bit18 = var ## _bit15; \ r ## _bit19 = var ## _bit16; \ r ## _bit20 = var ## _bit17; \ r ## _bit21 = var ## _bit18; \ r ## _bit22 = var ## _bit19; \ r ## _bit23 = var ## _bit20; \ r ## _bit24 = var ## _bit21; \ r ## _bit25 = var ## _bit22; \ r ## _bit26 = var ## _bit23; \ r ## _bit27 = var ## _bit24; \ r ## _bit28 = var ## _bit25; \ r ## _bit29 = var ## _bit26; \ r ## _bit30 = var ## _bit27; \ r ## _bit31 = var ## _bit28; \ r ## _bit0 = var ## _bit29; \ r ## _bit1 = var ## _bit30; \ r ## _bit2 = var ## _bit31; #define V1 4243526649U, 2966948570U, 3131741946U, 2954837155U, 2465116046U, 948312014U, 2705969928U, 3993345006U, 2392065330U, 2120824545U, 2343104277U, 1133259544U, 3702185636U, 2980253889U, 40276393U, 3672734333U, 2130340708U, 3978680987U, 3593458614U, 3354563563U, 359059409U, 4294588828U, 1704638408U, 153646028U, 1353978226U, 536959668U, 4224999546U, 666988682U, 2442851475U, 22692508U, 139967019U, 2530685998U, 416653076U, 2575145498U, 1912114162U, 961031242U, 1892881532U, 479427203U, 2346329038U, 627414041U, 2904416084U, 700120705U, 2829586977U, 3010821348U, 3748887474U, 1211341590U, 316319839U, 2728556710U, 2890880787U, 3084158176U, 1731735298U, 224971507U, 1099266414U, 2604574110U, 1012860609U, 2368422933U, 1991650181U, 3724142066U, 1714694400U, 983050871U, 2183474813U, 611766120U, 1909767049U, 3223901766U #define V2 14136537U, 3924675130U, 1063080666U, 931429123U, 4019486318U, 2575068462U, 794848488U, 3596902478U, 3957056018U, 2767422017U, 4039329525U, 1027003768U, 3488065924U, 659206689U, 3124922249U, 2901961437U, 869073988U, 1319776763U, 243877782U, 916728395U, 2761823921U, 984655612U, 1611368872U, 3259043884U, 2771183186U, 624246292U, 2914283610U, 350974698U, 3139450739U, 1336736764U, 3634161675U, 4127390862U, 3976212468U, 4232481146U, 4111906770U, 3675003562U, 1508496220U, 20971491U, 497233326U, 828246137U, 2264247348U, 3945691617U, 3291208193U, 1386534724U, 3259971218U, 3492244598U, 1212616255U, 4023458054U, 2243944435U, 3593997376U, 3485012706U, 2877624659U, 457814094U, 1939292926U, 1274955937U, 1135186037U, 1268445285U, 3401775442U, 4223248608U, 4157791447U, 2162516317U, 1887549640U, 78010729U, 1639992998U static unsigned int test_v1[] = { V1 }; static unsigned int test_v2[] = { V2 }; int main(void) { /* Enforce our assumptions about types and bits. */ assert(sizeof(unsigned long long) == 8); assert(sizeof(unsigned int) == 4); /* Declare some vars */ f_declare32(myvar1); f_declare32(myvar2); f_declare32(myvar3); f_declare32(myvar4); /* Put some values into var */ /* perl -e 'print int(rand(2 ** 32)) . "U, " for 0 .. 63' */ /* Can't substitute V1 and V2 macros here... */ /* f_pack32(myvar1, V1); */ /* f_pack32(myvar2, V2); */ /* Substitute manually... */ f_pack32(myvar1, 4243526649U, 2966948570U, 3131741946U, 2954837155U, 2465116046U, 948312014U, 2705969928U, 3993345006U, 2392065330U, 2120824545U, 2343104277U, 1133259544U, 3702185636U, 2980253889U, 40276393U, 3672734333U, 2130340708U, 3978680987U, 3593458614U, 3354563563U, 359059409U, 4294588828U, 1704638408U, 153646028U, 1353978226U, 536959668U, 4224999546U, 666988682U, 2442851475U, 22692508U, 139967019U, 2530685998U, 416653076U, 2575145498U, 1912114162U, 961031242U, 1892881532U, 479427203U, 2346329038U, 627414041U, 2904416084U, 700120705U, 2829586977U, 3010821348U, 3748887474U, 1211341590U, 316319839U, 2728556710U, 2890880787U, 3084158176U, 1731735298U, 224971507U, 1099266414U, 2604574110U, 1012860609U, 2368422933U, 1991650181U, 3724142066U, 1714694400U, 983050871U, 2183474813U, 611766120U, 1909767049U, 3223901766U); f_pack32(myvar2, 14136537U, 3924675130U, 1063080666U, 931429123U, 4019486318U, 2575068462U, 794848488U, 3596902478U, 3957056018U, 2767422017U, 4039329525U, 1027003768U, 3488065924U, 659206689U, 3124922249U, 2901961437U, 869073988U, 1319776763U, 243877782U, 916728395U, 2761823921U, 984655612U, 1611368872U, 3259043884U, 2771183186U, 624246292U, 2914283610U, 350974698U, 3139450739U, 1336736764U, 3634161675U, 4127390862U, 3976212468U, 4232481146U, 4111906770U, 3675003562U, 1508496220U, 20971491U, 497233326U, 828246137U, 2264247348U, 3945691617U, 3291208193U, 1386534724U, 3259971218U, 3492244598U, 1212616255U, 4023458054U, 2243944435U, 3593997376U, 3485012706U, 2877624659U, 457814094U, 1939292926U, 1274955937U, 1135186037U, 1268445285U, 3401775442U, 4223248608U, 4157791447U, 2162516317U, 1887549640U, 78010729U, 1639992998U); /* Do some ops */ frotate3left32(myvar3, myvar1); fxor32(myvar4, myvar3, myvar2); /* Get and check results */ /* TODO: make interface to unpack values */ { int i; /* For all places in vector / packed values */ for (i = 0; i < 64; i++) { /* unsigned int expected = ((test_v1[i] << 3) | (test_v1[i] >> (32 - 3))) ^ test_v2[i]; */ unsigned int r = 0; /* For all bits in each value: pack scalar value */ /* perl -e 'print "r |= ((myvar4_bit$_ & (1ULL << i)) >> i) << $_;\n" for 0 .. 31' */ r |= ((myvar4_bit0 & (1ULL << i)) >> i) << 0; r |= ((myvar4_bit1 & (1ULL << i)) >> i) << 1; r |= ((myvar4_bit2 & (1ULL << i)) >> i) << 2; r |= ((myvar4_bit3 & (1ULL << i)) >> i) << 3; r |= ((myvar4_bit4 & (1ULL << i)) >> i) << 4; r |= ((myvar4_bit5 & (1ULL << i)) >> i) << 5; r |= ((myvar4_bit6 & (1ULL << i)) >> i) << 6; r |= ((myvar4_bit7 & (1ULL << i)) >> i) << 7; r |= ((myvar4_bit8 & (1ULL << i)) >> i) << 8; r |= ((myvar4_bit9 & (1ULL << i)) >> i) << 9; r |= ((myvar4_bit10 & (1ULL << i)) >> i) << 10; r |= ((myvar4_bit11 & (1ULL << i)) >> i) << 11; r |= ((myvar4_bit12 & (1ULL << i)) >> i) << 12; r |= ((myvar4_bit13 & (1ULL << i)) >> i) << 13; r |= ((myvar4_bit14 & (1ULL << i)) >> i) << 14; r |= ((myvar4_bit15 & (1ULL << i)) >> i) << 15; r |= ((myvar4_bit16 & (1ULL << i)) >> i) << 16; r |= ((myvar4_bit17 & (1ULL << i)) >> i) << 17; r |= ((myvar4_bit18 & (1ULL << i)) >> i) << 18; r |= ((myvar4_bit19 & (1ULL << i)) >> i) << 19; r |= ((myvar4_bit20 & (1ULL << i)) >> i) << 20; r |= ((myvar4_bit21 & (1ULL << i)) >> i) << 21; r |= ((myvar4_bit22 & (1ULL << i)) >> i) << 22; r |= ((myvar4_bit23 & (1ULL << i)) >> i) << 23; r |= ((myvar4_bit24 & (1ULL << i)) >> i) << 24; r |= ((myvar4_bit25 & (1ULL << i)) >> i) << 25; r |= ((myvar4_bit26 & (1ULL << i)) >> i) << 26; r |= ((myvar4_bit27 & (1ULL << i)) >> i) << 27; r |= ((myvar4_bit28 & (1ULL << i)) >> i) << 28; r |= ((myvar4_bit29 & (1ULL << i)) >> i) << 29; r |= ((myvar4_bit30 & (1ULL << i)) >> i) << 30; r |= ((myvar4_bit31 & (1ULL << i)) >> i) << 31; printf("r = %u\n", r); /* printf("%d #%d e = %u r = %u\n", r == expected, i, expected, r); */ } } return 0; }