/* * sboxes-alti.c * Bitslice DES faster implementation for PowerPC AltiVec(VMX) */ /* * Copyright 2008 DumplingerBoy (Dango-Chu). All Right Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static void s1( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; vector unsigned int x43, x44, x45, x46, x47, x48; x1 = vec_sel(a6, a4, a2); x2 = vec_xor(a2, a4); x3 = vec_sel(x2, x1, a1); x4 = vec_nor(x3, x3); x5 = vec_xor(x4, a3); x6 = vec_sel(a4, x2, x3); x7 = vec_or(a1, a2); x8 = vec_xor(x7, x6); x9 = vec_sel(x8, x6, a3); x10 = vec_sel(x9, x5, a5); x11 = vec_nor(x8, x8); x12 = vec_xor(a1, x2); x13 = vec_and(x12, x8); x14 = vec_sel(x13, x11, a3); x15 = vec_andc(x8, x14); x16 = vec_or(x15, x3); x17 = vec_sel(x16, x14, a5); x18 = vec_sel(x17, x10, a6); *out4 = vec_xor(*out4, x18); x19 = vec_nor(a2, x14); x20 = vec_xor(x19, x3); x21 = vec_sel(a6, x13, x7); x22 = vec_xor(a1, a4); x23 = vec_sel(x22, x21, a3); x24 = vec_sel(x23, x20, a5); x25 = vec_nor(x2, x3); x26 = vec_sel(a2, x25, x11); x27 = vec_sel(x26, x25, a3); x28 = vec_sel(x5, x23, x8); x29 = vec_sel(x28, x27, a5); x30 = vec_sel(x29, x24, a6); *out2 = vec_xor(*out2, x30); x31 = vec_xor(a3, a2); x32 = vec_xor(x31, x19); x33 = vec_sel(x12, x26, a3); x34 = vec_sel(x33, x32, a5); x35 = vec_sel(a3, x32, x3); x36 = vec_andc(x8, x23); x37 = vec_xor(x36, x32); x38 = vec_sel(x37, x35, a5); x39 = vec_sel(x38, x34, a6); *out3 = vec_xor(*out3, x39); x40 = vec_sel(x27, x7, x23); x41 = vec_or(x14, x33); x42 = vec_xor(x41, a4); x43 = vec_sel(x42, x40, a5); x44 = vec_sel(x37, x42, x20); x45 = vec_nor(a3, x42); x46 = vec_or(x45, x19); x47 = vec_sel(x46, x44, a5); x48 = vec_sel(x47, x43, a6); *out1 = vec_xor(*out1, x48); } static void s2( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; vector unsigned int x43, x44, x45; x1 = vec_andc(a6, a3); x2 = vec_xor(x1, a5); x3 = vec_sel(a4, a3, a5); x4 = vec_sel(x3, x2, a1); x5 = vec_or(a3, x2); x6 = vec_xor(x5, a4); x7 = vec_nor(x3, x3); x8 = vec_sel(x7, x6, a1); x9 = vec_sel(x8, x4, a2); x10 = vec_xor(a5, x7); x11 = vec_xor(x5, x10); x12 = vec_sel(x11, x10, a1); x13 = vec_sel(x11, x6, a5); x14 = vec_sel(x13, a4, a1); x15 = vec_sel(x14, x12, a2); x16 = vec_sel(x15, x9, a6); *out4 = vec_xor(*out4, x16); x17 = vec_andc(x5, x13); x18 = vec_xor(x17, a1); x19 = vec_xor(x1, x18); x20 = vec_sel(x19, x18, a2); x21 = vec_xor(a3, a4); x22 = vec_xor(x21, x10); x23 = vec_xor(a3, a5); x24 = vec_sel(x23, x22, a1); x25 = vec_sel(x8, x18, x5); x26 = vec_sel(x25, x24, a2); x27 = vec_sel(x26, x20, a6); *out2 = vec_xor(*out2, x27); x28 = vec_sel(x6, x14, a3); x29 = vec_xor(a4, x12); x30 = vec_xor(x29, x23); x31 = vec_sel(x30, x28, a2); x32 = vec_sel(x25, x17, a3); x33 = vec_sel(x21, x11, x25); x34 = vec_sel(x33, x32, a2); x35 = vec_sel(x34, x31, a6); *out1 = vec_xor(*out1, x35); x36 = vec_sel(x1, x11, a4); x37 = vec_sel(a4, x6, x19); x38 = vec_sel(x37, x36, a1); x39 = vec_sel(x10, x21, x38); x40 = vec_sel(x39, x38, a2); x41 = vec_sel(a5, x38, x33); x42 = vec_or(x3, x8); x43 = vec_xor(x42, x41); x44 = vec_sel(x43, x41, a2); x45 = vec_sel(x44, x40, a6); *out3 = vec_xor(*out3, x45); } static void s3( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; vector unsigned int x43, x44, x45; x1 = vec_xor(a3, a5); x2 = vec_xor(x1, a6); x3 = vec_nor(x2, x2); x4 = vec_xor(x3, a4); x5 = vec_nor(a5, x2); x6 = vec_sel(x4, x5, a3); x7 = vec_xor(a3, x6); x8 = vec_sel(x7, x6, a4); x9 = vec_sel(x8, x4, a2); x10 = vec_and(a3, a6); x11 = vec_or(x10, x1); x12 = vec_sel(x11, x3, a4); x13 = vec_xor(x5, x11); x14 = vec_nor(a3, a5); x15 = vec_sel(x14, x13, a4); x16 = vec_sel(x15, x12, a2); x17 = vec_sel(x16, x9, a1); *out1 = vec_xor(*out1, x17); x18 = vec_sel(x12, x4, x5); x19 = vec_sel(x4, x2, a6); x20 = vec_sel(x19, x18, a2); x21 = vec_sel(x8, x2, a5); x22 = vec_nor(x21, x21); x23 = vec_xor(x22, a2); x24 = vec_sel(x23, x20, a1); *out4 = vec_xor(*out4, x24); x25 = vec_andc(a4, a5); x26 = vec_or(x25, x3); x27 = vec_nor(a3, x22); x28 = vec_xor(x27, x4); x29 = vec_sel(x28, x26, a2); x30 = vec_andc(x2, x27); x31 = vec_sel(x30, x1, a4); x32 = vec_andc(x11, x18); x33 = vec_xor(x7, x10); x34 = vec_sel(x33, x32, a4); x35 = vec_sel(x34, x31, a2); x36 = vec_sel(x35, x29, a1); *out3 = vec_xor(*out3, x36); x37 = vec_sel(x2, x7, x12); x38 = vec_and(x3, x37); x39 = vec_nor(x38, x31); x40 = vec_sel(x39, x37, a2); x41 = vec_xor(a4, x28); x42 = vec_andc(x41, x38); x43 = vec_sel(x31, x37, x18); x44 = vec_sel(x43, x42, a2); x45 = vec_sel(x44, x40, a1); *out2 = vec_xor(*out2, x45); } static void s4( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33; x1 = vec_xor(a5, a3); x2 = vec_xor(x1, a2); x3 = vec_sel(a5, a2, a3); x4 = vec_sel(x3, x2, a4); x5 = vec_and(x1, x3); x6 = vec_nor(x2, x2); x7 = vec_nor(x3, x3); x8 = vec_sel(x7, x6, a5); x9 = vec_sel(x8, x5, a4); x10 = vec_sel(x9, x4, a1); x11 = vec_sel(a5, a3, x2); x12 = vec_sel(x1, x2, x8); x13 = vec_sel(x12, x11, a4); x14 = vec_sel(x6, a2, a5); x15 = vec_xor(x2, x7); x16 = vec_sel(x15, x14, a4); x17 = vec_sel(x16, x13, a1); x18 = vec_sel(x17, x10, a6); *out2 = vec_xor(*out2, x18); x19 = vec_nor(x10, x10); x20 = vec_sel(x19, x17, a6); *out1 = vec_xor(*out1, x20); x21 = vec_xor(x1, x14); x22 = vec_sel(x21, x3, a4); x23 = vec_sel(x6, x12, a2); x24 = vec_sel(x11, x23, a4); x25 = vec_sel(x24, x22, a1); x26 = vec_xor(x5, x12); x27 = vec_sel(a1, x15, x1); x28 = vec_sel(x27, x26, a4); x29 = vec_sel(x6, x15, a4); x30 = vec_sel(x29, x28, a1); x31 = vec_sel(x30, x25, a6); *out3 = vec_xor(*out3, x31); x32 = vec_nor(x25, x25); x33 = vec_sel(x32, x30, a6); *out4 = vec_xor(*out4, x33); } static void s5( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; vector unsigned int x43, x44, x45, x46, x47, x48; vector unsigned int x49; x1 = vec_nor(a2, a2); x2 = vec_xor(a3, x1); x3 = vec_sel(x2, x1, a5); x4 = vec_nor(x3, x3); x5 = vec_xor(x4, a6); x6 = vec_sel(a2, a5, a3); x7 = vec_xor(a5, x3); x8 = vec_xor(x7, x6); x9 = vec_sel(x8, x6, a6); x10 = vec_sel(x9, x5, a4); x11 = vec_xor(a6, a5); x12 = vec_xor(x11, x2); x13 = vec_sel(x1, x11, x12); x14 = vec_xor(a5, a3); x15 = vec_sel(x14, x13, a6); x16 = vec_sel(x15, x12, a4); x17 = vec_sel(x16, x10, a1); *out2 = vec_xor(*out2, x17); x18 = vec_xor(a6, x12); x19 = vec_nor(x18, x8); x20 = vec_or(x18, x8); x21 = vec_sel(a2, x19, x13); x22 = vec_sel(x21, x20, a6); x23 = vec_sel(x22, x19, a4); x24 = vec_xor(x11, x21); x25 = vec_sel(a3, x1, x7); x26 = vec_sel(x25, x5, a6); x27 = vec_sel(x26, x24, a4); x28 = vec_sel(x27, x23, a1); *out3 = vec_xor(*out3, x28); x29 = vec_sel(a5, x13, x2); x30 = vec_xor(x2, x8); x31 = vec_sel(x30, x29, a6); x32 = vec_sel(x5, x20, x31); x33 = vec_sel(x32, x31, a4); x34 = vec_xor(a2, x19); x35 = vec_sel(a2, a3, x21); x36 = vec_sel(x35, x34, a6); x37 = vec_or(x4, x11); x38 = vec_xor(x37, x36); x39 = vec_sel(x38, x36, a4); x40 = vec_sel(x39, x33, a1); *out1 = vec_xor(*out1, x40); x41 = vec_or(x3, x8); x42 = vec_xor(x41, x24); x43 = vec_xor(x6, x21); x44 = vec_xor(x43, x22); x45 = vec_sel(x44, x42, a4); x46 = vec_sel(x4, x13, x42); x47 = vec_sel(x32, x35, x24); x48 = vec_sel(x47, x46, a4); x49 = vec_sel(x48, x45, a1); *out4 = vec_xor(*out4, x49); } static void s6( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; vector unsigned int x43, x44, x45; x1 = vec_sel(a5, a4, a1); x2 = vec_sel(x1, a1, a4); x3 = vec_or(a1, a4); x4 = vec_sel(x3, x2, a5); x5 = vec_sel(x4, x1, a2); x6 = vec_nor(x2, x2); x7 = vec_xor(x3, x6); x8 = vec_sel(x7, x6, a5); x9 = vec_sel(x8, x5, a6); x10 = vec_xor(a1, a4); x11 = vec_sel(a5, x7, x6); x12 = vec_sel(x11, x10, a2); x13 = vec_or(x6, x8); x14 = vec_xor(x13, a2); x15 = vec_sel(x14, x12, a6); x16 = vec_sel(x15, x9, a3); *out1 = vec_xor(*out1, x16); x17 = vec_andc(a4, x1); x18 = vec_or(x17, x14); x19 = vec_xor(x3, x14); x20 = vec_sel(x19, x18, a6); x21 = vec_andc(a5, a4); x22 = vec_sel(x21, x4, a2); x23 = vec_andc(x4, x7); x24 = vec_xor(a4, x23); x25 = vec_sel(x24, x23, a2); x26 = vec_sel(x25, x22, a6); x27 = vec_sel(x26, x20, a3); *out4 = vec_xor(*out4, x27); x28 = vec_xor(x18, x23); x29 = vec_xor(x13, x24); x30 = vec_andc(a4, a5); x31 = vec_sel(x30, x29, a2); x32 = vec_sel(x31, x28, a6); x33 = vec_xor(x6, x28); x34 = vec_nor(x8, x8); x35 = vec_sel(x34, x33, a2); x36 = vec_sel(x1, x13, x28); x37 = vec_sel(x36, x35, a6); x38 = vec_sel(x37, x32, a3); *out2 = vec_xor(*out2, x38); x39 = vec_sel(x7, x23, a2); x40 = vec_sel(x35, x1, x19); x41 = vec_sel(x40, x39, a6); x42 = vec_sel(a6, x28, x4); x43 = vec_sel(x17, x3, x28); x44 = vec_sel(x43, x42, a6); x45 = vec_sel(x44, x41, a3); *out3 = vec_xor(*out3, x45); } static void s7( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; vector unsigned int x43, x44, x45; x1 = vec_and(a4, a5); x2 = vec_xor(x1, a3); x3 = vec_nor(x2, x2); x4 = vec_xor(x3, a6); x5 = vec_sel(x3, a3, a4); x6 = vec_sel(a4, a3, a5); x7 = vec_sel(x6, x5, a6); x8 = vec_sel(x7, x4, a2); x9 = vec_sel(a3, x3, x5); x10 = vec_xor(a5, x5); x11 = vec_sel(x10, x9, a6); x12 = vec_nor(x10, x11); x13 = vec_xor(x12, x1); x14 = vec_sel(x13, x11, a2); x15 = vec_sel(x14, x8, a1); *out3 = vec_xor(*out3, x15); x16 = vec_sel(x11, x13, a5); x17 = vec_xor(a5, x6); x18 = vec_xor(x2, x6); x19 = vec_sel(x18, x17, a6); x20 = vec_sel(x19, x16, a2); x21 = vec_sel(x6, x13, x5); x22 = vec_sel(x21, x10, a6); x23 = vec_xor(a4, x10); x24 = vec_xor(x9, x10); x25 = vec_sel(x24, x23, a6); x26 = vec_sel(x25, x22, a2); x27 = vec_sel(x26, x20, a1); *out1 = vec_xor(*out1, x27); x28 = vec_sel(a5, x16, a3); x29 = vec_sel(x26, x28, a6); x30 = vec_sel(x26, x29, a2); x31 = vec_sel(x25, x13, x1); x32 = vec_xor(a3, x10); x33 = vec_andc(x32, x21); x34 = vec_sel(x33, x31, a2); x35 = vec_sel(x34, x30, a1); *out2 = vec_xor(*out2, x35); x36 = vec_xor(a4, x3); x37 = vec_nor(x18, x32); x38 = vec_sel(x37, x36, a6); x39 = vec_or(a6, x16); x40 = vec_xor(x39, x22); x41 = vec_sel(x40, x38, a2); x42 = vec_sel(x19, x23, x4); x43 = vec_sel(x12, x39, x22); x44 = vec_sel(x43, x42, a2); x45 = vec_sel(x44, x41, a1); *out4 = vec_xor(*out4, x45); } static void s8( vector unsigned int a1, vector unsigned int a2, vector unsigned int a3, vector unsigned int a4, vector unsigned int a5, vector unsigned int a6, vector unsigned int *out1, vector unsigned int *out2, vector unsigned int *out3, vector unsigned int *out4 ) { vector unsigned int x1, x2, x3, x4, x5, x6; vector unsigned int x7, x8, x9, x10, x11, x12; vector unsigned int x13, x14, x15, x16, x17, x18; vector unsigned int x19, x20, x21, x22, x23, x24; vector unsigned int x25, x26, x27, x28, x29, x30; vector unsigned int x31, x32, x33, x34, x35, x36; vector unsigned int x37, x38, x39, x40, x41, x42; x1 = vec_and(a4, a5); x2 = vec_nor(x1, x1); x3 = vec_xor(x2, a3); x4 = vec_sel(a4, a3, a5); x5 = vec_sel(x4, x3, a2); x6 = vec_or(a4, a5); x7 = vec_xor(x6, x3); x8 = vec_xor(a3, x6); x9 = vec_sel(x8, x7, a2); x10 = vec_sel(x9, x5, a1); x11 = vec_sel(x8, a4, a5); x12 = vec_xor(x6, x11); x13 = vec_sel(x12, x11, a2); x14 = vec_xor(a4, x3); x15 = vec_sel(x2, x4, x8); x16 = vec_sel(x15, x14, a2); x17 = vec_sel(x16, x13, a1); x18 = vec_sel(x17, x10, a6); *out1 = vec_xor(*out1, x18); x19 = vec_sel(a3, x14, x15); x20 = vec_xor(a5, x3); x21 = vec_sel(x20, x19, a2); x22 = vec_sel(a4, a5, a3); x23 = vec_sel(x13, x22, a2); x24 = vec_sel(x23, x21, a1); x25 = vec_sel(x7, x11, a2); x26 = vec_xor(a5, x25); x27 = vec_xor(a2, x12); x28 = vec_sel(x27, x26, a1); x29 = vec_sel(x28, x24, a6); *out3 = vec_xor(*out3, x29); x30 = vec_xor(a2, x11); x31 = vec_sel(x5, x26, a5); x32 = vec_sel(x31, x30, a1); x33 = vec_sel(x26, x16, x6); x34 = vec_nor(x31, x31); x35 = vec_sel(x34, x33, a1); x36 = vec_sel(x35, x32, a6); *out2 = vec_xor(*out2, x36); x37 = vec_or(a5, x15); x38 = vec_xor(x37, x33); x39 = vec_sel(x14, x33, x20); x40 = vec_sel(x39, x38, a1); x41 = vec_nor(x10, x10); x42 = vec_sel(x41, x40, a6); *out4 = vec_xor(*out4, x42); }