#include #include "bs.h" /* TEMPORARY */ #define TERNARY(x, v0, v1) ((x) ? (v1) : (v0)) #if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ||\ defined(__amd64__) || defined(__amd32__)|| defined(__amd16__) #define bs2le(x) (x) #define bs2be(x) (x) #elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ||\ (defined(__sparc__)) #define bs2le(x) __builtin_bswap_wordsize(x) #define bs2be(x) __builtin_bswap_wordsize(x) #else #error "endianness not supported" #endif void bs_addroundkey(word_t * B, word_t * rk) { int i; for (i = 0; i < BLOCK_SIZE; i++) B[i] ^= rk[i]; } void bs_apply_sbox(word_t * input) { int i; for(i=0; i < BLOCK_SIZE; i+=8) { bs_sbox(input+i); } } void bs_apply_sbox_rev(word_t * input) { int i; for(i=0; i < BLOCK_SIZE; i+=8) { bs_sbox_rev(input+i); } } /*July 2011*/ /*Straight-line program for AES s box*/ /*Input is U[0], U[1],...,U[7]*/ /*Output is S[0], S[1],...,S[7]*/ // http://cs-www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html void bs_sbox_rev(word_t U[8]) { word_t W[8]; word_t T1,T2,T3,T4,T5,T6,T8, T9,T10,T13,T14,T15,T16, T17,T18,T19,T20,T22,T23,T24, T25, T26, T27; word_t M1,M2,M3,M4,M5,M6,M7,M8, M9,M10,M11,M12,M13,M14,M15, M16,M17,M18,M19,M20,M21,M22, M23,M24,M25,M26,M27,M28,M29, M30,M31,M32,M33,M34,M35,M36, M37,M38,M39,M40,M41,M42,M43, M44,M45,M46,M47,M48,M49,M50, M51,M52,M53,M54,M55,M56,M57, M58,M59,M60,M61,M62,M63; word_t P0,P1,P2,P3,P4,P5,P6,P7,P8, P9,P10,P11,P12,P13,P14, P15,P16,P17,P18,P19,P20, P21,P22,P23,P24,P25,P26, P27,P28,P29; word_t Y5, R5, R13, R17, R18, R19; T23 = U[7] ^ U[4]; T22 = ~(U[6] ^ U[4]); T2 = ~(U[7] ^ U[6]); T1 = U[4] ^ U[3]; T24 = ~(U[3] ^ U[0]); R5 = U[1] ^ U[0]; T8 = ~(U[6] ^ T23); T19 = T22 ^ R5; T9 = ~(U[0] ^ T1); T10 = T2 ^ T24; T13 = T2 ^ R5; T3 = T1 ^ R5; T25 = ~(U[5] ^ T1); R13 = U[6] ^ U[1]; T17 = ~(U[5] ^ T19); T20 = T24 ^ R13; T4 = U[3] ^ T8; R17 = ~(U[5] ^ U[2]); R18 = ~(U[2] ^ U[1]); R19 = ~(U[5] ^ U[3]); Y5 = U[7] ^ R17; T6 = T22 ^ R17; T16 = R13 ^ R19; T27 = T1 ^ R18; T15 = T10 ^ T27; T14 = T10 ^ R18; T26 = T3 ^ T16; M1 = T13 & T6; M2 = T23 & T8; M3 = T14 ^ M1; M4 = T19 & Y5; M5 = M4 ^ M1; M6 = T3 & T16; M7 = T22 & T9; M8 = T26 ^ M6; M9 = T20 & T17; M10 = M9 ^ M6; M11 = T1 & T15; M12 = T4 & T27; M13 = M12 ^ M11; M14 = T2 & T10; M15 = M14 ^ M11; M16 = M3 ^ M2; M17 = M5 ^ T24; M18 = M8 ^ M7; M19 = M10 ^ M15; M20 = M16 ^ M13; M21 = M17 ^ M15; M22 = M18 ^ M13; M23 = M19 ^ T25; M24 = M22 ^ M23; M25 = M22 & M20; M26 = M21 ^ M25; M27 = M20 ^ M21; M28 = M23 ^ M25; M29 = M28 & M27; M30 = M26 & M24; M31 = M20 & M23; M32 = M27 & M31; M33 = M27 ^ M25; M34 = M21 & M22; M35 = M24 & M34; M36 = M24 ^ M25; M37 = M21 ^ M29; M38 = M32 ^ M33; M39 = M23 ^ M30; M40 = M35 ^ M36; M41 = M38 ^ M40; M42 = M37 ^ M39; M43 = M37 ^ M38; M44 = M39 ^ M40; M45 = M42 ^ M41; M46 = M44 & T6; M47 = M40 & T8; M48 = M39 & Y5; M49 = M43 & T16; M50 = M38 & T9; M51 = M37 & T17; M52 = M42 & T15; M53 = M45 & T27; M54 = M41 & T10; M55 = M44 & T13; M56 = M40 & T23; M57 = M39 & T19; M58 = M43 & T3; M59 = M38 & T22; M60 = M37 & T20; M61 = M42 & T1; M62 = M45 & T4; M63 = M41 & T2; P0 = M52 ^ M61; P1 = M58 ^ M59; P2 = M54 ^ M62; P3 = M47 ^ M50; P4 = M48 ^ M56; P5 = M46 ^ M51; P6 = M49 ^ M60; P7 = P0 ^ P1; P8 = M50 ^ M53; P9 = M55 ^ M63; P10 = M57 ^ P4; P11 = P0 ^ P3; P12 = M46 ^ M48; P13 = M49 ^ M51; P14 = M49 ^ M62; P15 = M54 ^ M59; P16 = M57 ^ M61; P17 = M58 ^ P2; P18 = M63 ^ P5; P19 = P2 ^ P3; P20 = P4 ^ P6; P22 = P2 ^ P7; P23 = P7 ^ P8; P24 = P5 ^ P7; P25 = P6 ^ P10; P26 = P9 ^ P11; P27 = P10 ^ P18; P28 = P11 ^ P25; P29 = P15 ^ P20; W[7] = P13 ^ P22; W[6] = P26 ^ P29; W[5] = P17 ^ P28; W[4] = P12 ^ P22; W[3] = P23 ^ P27; W[2] = P19 ^ P24; W[1] = P14 ^ P23; W[0] = P9 ^ P16; memmove(U,W,sizeof(W)); } void bs_sbox(word_t U[8]) { word_t S[8]; word_t T1,T2,T3,T4,T5,T6,T7,T8, T9,T10,T11,T12,T13,T14,T15,T16, T17,T18,T19,T20,T21,T22,T23,T24, T25, T26, T27; word_t M1,M2,M3,M4,M5,M6,M7,M8, M9,M10,M11,M12,M13,M14,M15, M16,M17,M18,M19,M20,M21,M22, M23,M24,M25,M26,M27,M28,M29, M30,M31,M32,M33,M34,M35,M36, M37,M38,M39,M40,M41,M42,M43, M44,M45,M46,M47,M48,M49,M50, M51,M52,M53,M54,M55,M56,M57, M58,M59,M60,M61,M62,M63; word_t L0,L1,L2,L3,L4,L5,L6,L7,L8, L9,L10,L11,L12,L13,L14, L15,L16,L17,L18,L19,L20, L21,L22,L23,L24,L25,L26, L27,L28,L29; T1 = U[7] ^ U[4]; T2 = U[7] ^ U[2]; T3 = U[7] ^ U[1]; T4 = U[4] ^ U[2]; T5 = U[3] ^ U[1]; T6 = T1 ^ T5; T7 = U[6] ^ U[5]; T8 = U[0] ^ T6; T9 = U[0] ^ T7; T10 = T6 ^ T7; T11 = U[6] ^ U[2]; T12 = U[5] ^ U[2]; T13 = T3 ^ T4; T14 = T6 ^ T11; T15 = T5 ^ T11; T16 = T5 ^ T12; T17 = T9 ^ T16; T18 = U[4] ^ U[0]; T19 = T7 ^ T18; T20 = T1 ^ T19; T21 = U[1] ^ U[0]; T22 = T7 ^ T21; T23 = T2 ^ T22; T24 = T2 ^ T10; T25 = T20 ^ T17; T26 = T3 ^ T16; T27 = T1 ^ T12; M1 = T13 & T6; M2 = T23 & T8; M3 = T14 ^ M1; M4 = T19 & U[0]; M5 = M4 ^ M1; M6 = T3 & T16; M7 = T22 & T9; M8 = T26 ^ M6; M9 = T20 & T17; M10 = M9 ^ M6; M11 = T1 & T15; M12 = T4 & T27; M13 = M12 ^ M11; M14 = T2 & T10; M15 = M14 ^ M11; M16 = M3 ^ M2; M17 = M5 ^ T24; M18 = M8 ^ M7; M19 = M10 ^ M15; M20 = M16 ^ M13; M21 = M17 ^ M15; M22 = M18 ^ M13; M23 = M19 ^ T25; M24 = M22 ^ M23; M25 = M22 & M20; M26 = M21 ^ M25; M27 = M20 ^ M21; M28 = M23 ^ M25; M29 = M28 & M27; M30 = M26 & M24; M31 = M20 & M23; M32 = M27 & M31; M33 = M27 ^ M25; M34 = M21 & M22; M35 = M24 & M34; M36 = M24 ^ M25; M37 = M21 ^ M29; M38 = M32 ^ M33; M39 = M23 ^ M30; M40 = M35 ^ M36; M41 = M38 ^ M40; M42 = M37 ^ M39; M43 = M37 ^ M38; M44 = M39 ^ M40; M45 = M42 ^ M41; M46 = M44 & T6; M47 = M40 & T8; M48 = M39 & U[0]; M49 = M43 & T16; M50 = M38 & T9; M51 = M37 & T17; M52 = M42 & T15; M53 = M45 & T27; M54 = M41 & T10; M55 = M44 & T13; M56 = M40 & T23; M57 = M39 & T19; M58 = M43 & T3; M59 = M38 & T22; M60 = M37 & T20; M61 = M42 & T1; M62 = M45 & T4; M63 = M41 & T2; L0 = M61 ^ M62; L1 = M50 ^ M56; L2 = M46 ^ M48; L3 = M47 ^ M55; L4 = M54 ^ M58; L5 = M49 ^ M61; L6 = M62 ^ L5; L7 = M46 ^ L3; L8 = M51 ^ M59; L9 = M52 ^ M53; L10 = M53 ^ L4; L11 = M60 ^ L2; L12 = M48 ^ M51; L13 = M50 ^ L0; L14 = M52 ^ M61; L15 = M55 ^ L1; L16 = M56 ^ L0; L17 = M57 ^ L1; L18 = M58 ^ L8; L19 = M63 ^ L4; L20 = L0 ^ L1; L21 = L1 ^ L7; L22 = L3 ^ L12; L23 = L18 ^ L2; L24 = L15 ^ L9; L25 = L6 ^ L10; L26 = L7 ^ L9; L27 = L8 ^ L10; L28 = L11 ^ L14; L29 = L11 ^ L17; S[7] = L6 ^ L24; S[6] = ~(L16 ^ L26); S[5] = ~(L19 ^ L28); S[4] = L6 ^ L21; S[3] = L20 ^ L22; S[2] = L25 ^ L29; S[1] = ~(L13 ^ L27); S[0] = ~(L6 ^ L23); memmove(U,S,sizeof(S)); } void bs_transpose(word_t * blocks) { word_t transpose[BLOCK_SIZE]; memset(transpose, 0, sizeof(transpose)); bs_transpose_dst(transpose,blocks); memmove(blocks,transpose,sizeof(transpose)); } void bs_transpose_dst(word_t * transpose, word_t * blocks) { int i,k; word_t w; for(k=0; k < WORD_SIZE; k++) { int bitpos = ONE << k; for (i=0; i < WORDS_PER_BLOCK; i++) { w = bs2le(blocks[k * WORDS_PER_BLOCK + i]); int offset = i << MUL_SHIFT; #ifndef UNROLL_TRANSPOSE int j; #ifdef __COMPCERT__ word_t *transptr = transpose+offset; word_t bitmask = ONE; for(j=0; j < WORD_SIZE; j++) { word_t old = *transptr; *(transptr++) = TERNARY(w & bitmask, old, old|bitpos); bitmask <<= 1; } #else for(j=0; j < WORD_SIZE; j++) { // TODO make const time transpose[offset + j] |= (w & (ONE << j)) ? bitpos : 0; } #endif #else transpose[(offset)+ 0 ] |= (w & (ONE << 0 )) ? (bitpos) : 0; transpose[(offset)+ 1 ] |= (w & (ONE << 1 )) ? (bitpos) : 0; transpose[(offset)+ 2 ] |= (w & (ONE << 2 )) ? (bitpos) : 0; transpose[(offset)+ 3 ] |= (w & (ONE << 3 )) ? (bitpos) : 0; transpose[(offset)+ 4 ] |= (w & (ONE << 4 )) ? (bitpos) : 0; transpose[(offset)+ 5 ] |= (w & (ONE << 5 )) ? (bitpos) : 0; transpose[(offset)+ 6 ] |= (w & (ONE << 6 )) ? (bitpos) : 0; transpose[(offset)+ 7 ] |= (w & (ONE << 7 )) ? (bitpos) : 0; #if WORD_SIZE > 8 transpose[(offset)+ 8 ] |= (w & (ONE << 8 )) ? (bitpos) : 0; transpose[(offset)+ 9 ] |= (w & (ONE << 9 )) ? (bitpos) : 0; transpose[(offset)+ 10] |= (w & (ONE << 10)) ? (bitpos) : 0; transpose[(offset)+ 11] |= (w & (ONE << 11)) ? (bitpos) : 0; transpose[(offset)+ 12] |= (w & (ONE << 12)) ? (bitpos) : 0; transpose[(offset)+ 13] |= (w & (ONE << 13)) ? (bitpos) : 0; transpose[(offset)+ 14] |= (w & (ONE << 14)) ? (bitpos) : 0; transpose[(offset)+ 15] |= (w & (ONE << 15)) ? (bitpos) : 0; #endif #if WORD_SIZE > 16 transpose[(offset)+ 16] |= (w & (ONE << 16)) ? (bitpos) : 0; transpose[(offset)+ 17] |= (w & (ONE << 17)) ? (bitpos) : 0; transpose[(offset)+ 18] |= (w & (ONE << 18)) ? (bitpos) : 0; transpose[(offset)+ 19] |= (w & (ONE << 19)) ? (bitpos) : 0; transpose[(offset)+ 20] |= (w & (ONE << 20)) ? (bitpos) : 0; transpose[(offset)+ 21] |= (w & (ONE << 21)) ? (bitpos) : 0; transpose[(offset)+ 22] |= (w & (ONE << 22)) ? (bitpos) : 0; transpose[(offset)+ 23] |= (w & (ONE << 23)) ? (bitpos) : 0; transpose[(offset)+ 24] |= (w & (ONE << 24)) ? (bitpos) : 0; transpose[(offset)+ 25] |= (w & (ONE << 25)) ? (bitpos) : 0; transpose[(offset)+ 26] |= (w & (ONE << 26)) ? (bitpos) : 0; transpose[(offset)+ 27] |= (w & (ONE << 27)) ? (bitpos) : 0; transpose[(offset)+ 28] |= (w & (ONE << 28)) ? (bitpos) : 0; transpose[(offset)+ 29] |= (w & (ONE << 29)) ? (bitpos) : 0; transpose[(offset)+ 30] |= (w & (ONE << 30)) ? (bitpos) : 0; transpose[(offset)+ 31] |= (w & (ONE << 31)) ? (bitpos) : 0; #endif #if WORD_SIZE > 32 transpose[(offset)+ 32] |= (w & (ONE << 32)) ? (bitpos) : 0; transpose[(offset)+ 33] |= (w & (ONE << 33)) ? (bitpos) : 0; transpose[(offset)+ 34] |= (w & (ONE << 34)) ? (bitpos) : 0; transpose[(offset)+ 35] |= (w & (ONE << 35)) ? (bitpos) : 0; transpose[(offset)+ 36] |= (w & (ONE << 36)) ? (bitpos) : 0; transpose[(offset)+ 37] |= (w & (ONE << 37)) ? (bitpos) : 0; transpose[(offset)+ 38] |= (w & (ONE << 38)) ? (bitpos) : 0; transpose[(offset)+ 39] |= (w & (ONE << 39)) ? (bitpos) : 0; transpose[(offset)+ 40] |= (w & (ONE << 40)) ? (bitpos) : 0; transpose[(offset)+ 41] |= (w & (ONE << 41)) ? (bitpos) : 0; transpose[(offset)+ 42] |= (w & (ONE << 42)) ? (bitpos) : 0; transpose[(offset)+ 43] |= (w & (ONE << 43)) ? (bitpos) : 0; transpose[(offset)+ 44] |= (w & (ONE << 44)) ? (bitpos) : 0; transpose[(offset)+ 45] |= (w & (ONE << 45)) ? (bitpos) : 0; transpose[(offset)+ 46] |= (w & (ONE << 46)) ? (bitpos) : 0; transpose[(offset)+ 47] |= (w & (ONE << 47)) ? (bitpos) : 0; transpose[(offset)+ 48] |= (w & (ONE << 48)) ? (bitpos) : 0; transpose[(offset)+ 49] |= (w & (ONE << 49)) ? (bitpos) : 0; transpose[(offset)+ 50] |= (w & (ONE << 50)) ? (bitpos) : 0; transpose[(offset)+ 51] |= (w & (ONE << 51)) ? (bitpos) : 0; transpose[(offset)+ 52] |= (w & (ONE << 52)) ? (bitpos) : 0; transpose[(offset)+ 53] |= (w & (ONE << 53)) ? (bitpos) : 0; transpose[(offset)+ 54] |= (w & (ONE << 54)) ? (bitpos) : 0; transpose[(offset)+ 55] |= (w & (ONE << 55)) ? (bitpos) : 0; transpose[(offset)+ 56] |= (w & (ONE << 56)) ? (bitpos) : 0; transpose[(offset)+ 57] |= (w & (ONE << 57)) ? (bitpos) : 0; transpose[(offset)+ 58] |= (w & (ONE << 58)) ? (bitpos) : 0; transpose[(offset)+ 59] |= (w & (ONE << 59)) ? (bitpos) : 0; transpose[(offset)+ 60] |= (w & (ONE << 60)) ? (bitpos) : 0; transpose[(offset)+ 61] |= (w & (ONE << 61)) ? (bitpos) : 0; transpose[(offset)+ 62] |= (w & (ONE << 62)) ? (bitpos) : 0; transpose[(offset)+ 63] |= (w & (ONE << 63)) ? (bitpos) : 0; #endif #endif // constant time: //transpose[(i<>(WORD_SIZE-1)) & (ONE< 8 transpose[8 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 8 )) ? bitpos : 0; transpose[9 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 9 )) ? bitpos : 0; transpose[10 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 10)) ? bitpos : 0; transpose[11 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 11)) ? bitpos : 0; transpose[12 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 12)) ? bitpos : 0; transpose[13 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 13)) ? bitpos : 0; transpose[14 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 14)) ? bitpos : 0; transpose[15 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 15)) ? bitpos : 0; #endif #if WORD_SIZE > 16 transpose[16 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 16)) ? bitpos : 0; transpose[17 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 17)) ? bitpos : 0; transpose[18 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 18)) ? bitpos : 0; transpose[19 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 19)) ? bitpos : 0; transpose[20 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 20)) ? bitpos : 0; transpose[21 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 21)) ? bitpos : 0; transpose[22 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 22)) ? bitpos : 0; transpose[23 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 23)) ? bitpos : 0; transpose[24 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 24)) ? bitpos : 0; transpose[25 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 25)) ? bitpos : 0; transpose[26 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 26)) ? bitpos : 0; transpose[27 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 27)) ? bitpos : 0; transpose[28 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 28)) ? bitpos : 0; transpose[29 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 29)) ? bitpos : 0; transpose[30 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 30)) ? bitpos : 0; transpose[31 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 31)) ? bitpos : 0; #endif #if WORD_SIZE > 32 transpose[32 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 32)) ? bitpos : 0; transpose[33 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 33)) ? bitpos : 0; transpose[34 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 34)) ? bitpos : 0; transpose[35 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 35)) ? bitpos : 0; transpose[36 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 36)) ? bitpos : 0; transpose[37 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 37)) ? bitpos : 0; transpose[38 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 38)) ? bitpos : 0; transpose[39 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 39)) ? bitpos : 0; transpose[40 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 40)) ? bitpos : 0; transpose[41 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 41)) ? bitpos : 0; transpose[42 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 42)) ? bitpos : 0; transpose[43 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 43)) ? bitpos : 0; transpose[44 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 44)) ? bitpos : 0; transpose[45 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 45)) ? bitpos : 0; transpose[46 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 46)) ? bitpos : 0; transpose[47 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 47)) ? bitpos : 0; transpose[48 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 48)) ? bitpos : 0; transpose[49 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 49)) ? bitpos : 0; transpose[50 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 50)) ? bitpos : 0; transpose[51 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 51)) ? bitpos : 0; transpose[52 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 52)) ? bitpos : 0; transpose[53 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 53)) ? bitpos : 0; transpose[54 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 54)) ? bitpos : 0; transpose[55 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 55)) ? bitpos : 0; transpose[56 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 56)) ? bitpos : 0; transpose[57 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 57)) ? bitpos : 0; transpose[58 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 58)) ? bitpos : 0; transpose[59 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 59)) ? bitpos : 0; transpose[60 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 60)) ? bitpos : 0; transpose[61 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 61)) ? bitpos : 0; transpose[62 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 62)) ? bitpos : 0; transpose[63 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 63)) ? bitpos : 0; #endif #endif } memmove(blocks,transpose,sizeof(transpose)); } #define R0 0 #define R1 8 #define R2 16 #define R3 24 #define B0 0 #define B1 32 #define B2 64 #define B3 96 #define R0_shift (BLOCK_SIZE/4)*0 #define R1_shift (BLOCK_SIZE/4)*1 #define R2_shift (BLOCK_SIZE/4)*2 #define R3_shift (BLOCK_SIZE/4)*3 #define B_MOD (BLOCK_SIZE) void bs_shiftrows(word_t * B) { word_t Bp_space[BLOCK_SIZE]; word_t * Bp = Bp_space; word_t * Br0 = B + 0; word_t * Br1 = B + 32; word_t * Br2 = B + 64; word_t * Br3 = B + 96; uint8_t offsetr0 = 0; uint8_t offsetr1 = 32; uint8_t offsetr2 = 64; uint8_t offsetr3 = 96; int i; for(i=0; i<4; i++) { Bp[B0 + 0] = Br0[0]; Bp[B0 + 1] = Br0[1]; Bp[B0 + 2] = Br0[2]; Bp[B0 + 3] = Br0[3]; Bp[B0 + 4] = Br0[4]; Bp[B0 + 5] = Br0[5]; Bp[B0 + 6] = Br0[6]; Bp[B0 + 7] = Br0[7]; Bp[B1 + 0] = Br1[0]; Bp[B1 + 1] = Br1[1]; Bp[B1 + 2] = Br1[2]; Bp[B1 + 3] = Br1[3]; Bp[B1 + 4] = Br1[4]; Bp[B1 + 5] = Br1[5]; Bp[B1 + 6] = Br1[6]; Bp[B1 + 7] = Br1[7]; Bp[B2 + 0] = Br2[0]; Bp[B2 + 1] = Br2[1]; Bp[B2 + 2] = Br2[2]; Bp[B2 + 3] = Br2[3]; Bp[B2 + 4] = Br2[4]; Bp[B2 + 5] = Br2[5]; Bp[B2 + 6] = Br2[6]; Bp[B2 + 7] = Br2[7]; Bp[B3 + 0] = Br3[0]; Bp[B3 + 1] = Br3[1]; Bp[B3 + 2] = Br3[2]; Bp[B3 + 3] = Br3[3]; Bp[B3 + 4] = Br3[4]; Bp[B3 + 5] = Br3[5]; Bp[B3 + 6] = Br3[6]; Bp[B3 + 7] = Br3[7]; offsetr0 = (offsetr0 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; offsetr1 = (offsetr1 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; offsetr2 = (offsetr2 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; offsetr3 = (offsetr3 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; Br0 = B + offsetr0; Br1 = B + offsetr1; Br2 = B + offsetr2; Br3 = B + offsetr3; Bp += 8; } memmove(B,Bp_space,sizeof(Bp_space)); } void bs_shiftrows_rev(word_t * B) { word_t Bp_space[BLOCK_SIZE]; word_t * Bp = Bp_space; word_t * Br0 = Bp + 0; word_t * Br1 = Bp + 32; word_t * Br2 = Bp + 64; word_t * Br3 = Bp + 96; uint8_t offsetr0 = 0; uint8_t offsetr1 = 32; uint8_t offsetr2 = 64; uint8_t offsetr3 = 96; int i; for(i=0; i<4; i++) { Br0[0] = B[B0 + 0]; Br0[1] = B[B0 + 1]; Br0[2] = B[B0 + 2]; Br0[3] = B[B0 + 3]; Br0[4] = B[B0 + 4]; Br0[5] = B[B0 + 5]; Br0[6] = B[B0 + 6]; Br0[7] = B[B0 + 7]; Br1[0] = B[B1 + 0]; Br1[1] = B[B1 + 1]; Br1[2] = B[B1 + 2]; Br1[3] = B[B1 + 3]; Br1[4] = B[B1 + 4]; Br1[5] = B[B1 + 5]; Br1[6] = B[B1 + 6]; Br1[7] = B[B1 + 7]; Br2[0] = B[B2 + 0]; Br2[1] = B[B2 + 1]; Br2[2] = B[B2 + 2]; Br2[3] = B[B2 + 3]; Br2[4] = B[B2 + 4]; Br2[5] = B[B2 + 5]; Br2[6] = B[B2 + 6]; Br2[7] = B[B2 + 7]; Br3[0] = B[B3 + 0]; Br3[1] = B[B3 + 1]; Br3[2] = B[B3 + 2]; Br3[3] = B[B3 + 3]; Br3[4] = B[B3 + 4]; Br3[5] = B[B3 + 5]; Br3[6] = B[B3 + 6]; Br3[7] = B[B3 + 7]; offsetr0 = (offsetr0 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; offsetr1 = (offsetr1 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; offsetr2 = (offsetr2 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; offsetr3 = (offsetr3 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f; Br0 = Bp + offsetr0; Br1 = Bp + offsetr1; Br2 = Bp + offsetr2; Br3 = Bp + offsetr3; B += 8; } memmove(B - 8 * 4,Bp_space,sizeof(Bp_space)); } #define A0 0 #define A1 8 #define A2 16 #define A3 24 // Does shift rows and mix columns in same step void bs_shiftmix(word_t * B) { word_t Bp_space[BLOCK_SIZE]; word_t * Bp = Bp_space; word_t * Br0 = B + 0; word_t * Br1 = B + 32; word_t * Br2 = B + 64; word_t * Br3 = B + 96; uint8_t offsetr0 = 0; uint8_t offsetr1 = 32; uint8_t offsetr2 = 64; uint8_t offsetr3 = 96; Br0 = B + offsetr0; Br1 = B + offsetr1; Br2 = B + offsetr2; Br3 = B + offsetr3; int i; for (i = 0; i < 4; i++) { // B0 // 2*A0 2*A1 A1 A2 A3 word_t of =Br0[R0+7]^ Br1[R1+7]; Bp[A0+0] = Br1[R1+0] ^ Br2[R2+0] ^ Br3[R3+0] ^ of; Bp[A0+1] = Br0[R0+0] ^ Br1[R1+0] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br3[R3+1] ^ of; Bp[A0+2] = Br0[R0+1] ^ Br1[R1+1] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br3[R3+2]; Bp[A0+3] = Br0[R0+2] ^ Br1[R1+2] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br3[R3+3] ^ of; Bp[A0+4] = Br0[R0+3] ^ Br1[R1+3] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br3[R3+4] ^ of; Bp[A0+5] = Br0[R0+4] ^ Br1[R1+4] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br3[R3+5]; Bp[A0+6] = Br0[R0+5] ^ Br1[R1+5] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br3[R3+6]; Bp[A0+7] = Br0[R0+6] ^ Br1[R1+6] ^ Br1[R1+7] ^ Br2[R2+7] ^ Br3[R3+7]; // A0 2*A1 2*A2 A2 A3 of = Br1[R1+7] ^ Br2[R2+7]; Bp[A1+0] = Br0[R0+0] ^ Br2[R2+0] ^ Br3[R3+0] ^ of; Bp[A1+1] = Br0[R0+1] ^ Br1[R1+0] ^ Br2[R2+0] ^ Br2[R2+1] ^ Br3[R3+1] ^ of; Bp[A1+2] = Br0[R0+2] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br2[R2+2] ^ Br3[R3+2]; Bp[A1+3] = Br0[R0+3] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br2[R2+3] ^ Br3[R3+3] ^ of; Bp[A1+4] = Br0[R0+4] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br2[R2+4] ^ Br3[R3+4] ^ of; Bp[A1+5] = Br0[R0+5] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br2[R2+5] ^ Br3[R3+5]; Bp[A1+6] = Br0[R0+6] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br2[R2+6] ^ Br3[R3+6]; Bp[A1+7] = Br0[R0+7] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br2[R2+7] ^ Br3[R3+7]; // A0 A1 2*A2 2*A3 A3 of = Br2[R2+7] ^ Br3[R3+7]; Bp[A2+0] = Br0[R0+0] ^ Br1[R1+0] ^ Br3[R3+0] ^ of; Bp[A2+1] = Br0[R0+1] ^ Br1[R1+1] ^ Br2[R2+0] ^ Br3[R3+0] ^ Br3[R3+1] ^ of; Bp[A2+2] = Br0[R0+2] ^ Br1[R1+2] ^ Br2[R2+1] ^ Br3[R3+1] ^ Br3[R3+2]; Bp[A2+3] = Br0[R0+3] ^ Br1[R1+3] ^ Br2[R2+2] ^ Br3[R3+2] ^ Br3[R3+3] ^ of; Bp[A2+4] = Br0[R0+4] ^ Br1[R1+4] ^ Br2[R2+3] ^ Br3[R3+3] ^ Br3[R3+4] ^ of; Bp[A2+5] = Br0[R0+5] ^ Br1[R1+5] ^ Br2[R2+4] ^ Br3[R3+4] ^ Br3[R3+5]; Bp[A2+6] = Br0[R0+6] ^ Br1[R1+6] ^ Br2[R2+5] ^ Br3[R3+5] ^ Br3[R3+6]; Bp[A2+7] = Br0[R0+7] ^ Br1[R1+7] ^ Br2[R2+6] ^ Br3[R3+6] ^ Br3[R3+7]; // A0 2*A0 A1 A2 2*A3 of = Br0[R0+7] ^ Br3[R3+7]; Bp[A3+0] = Br0[R0+0] ^ Br1[R1+0] ^ Br2[R2+0] ^ of; Bp[A3+1] = Br0[R0+1] ^ Br0[R0+0] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br3[R3+0] ^ of; Bp[A3+2] = Br0[R0+2] ^ Br0[R0+1] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br3[R3+1]; Bp[A3+3] = Br0[R0+3] ^ Br0[R0+2] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br3[R3+2] ^ of; Bp[A3+4] = Br0[R0+4] ^ Br0[R0+3] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br3[R3+3] ^ of; Bp[A3+5] = Br0[R0+5] ^ Br0[R0+4] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br3[R3+4]; Bp[A3+6] = Br0[R0+6] ^ Br0[R0+5] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br3[R3+5]; Bp[A3+7] = Br0[R0+7] ^ Br0[R0+6] ^ Br1[R1+7] ^ Br2[R2+7] ^ Br3[R3+6]; Bp += BLOCK_SIZE/4; offsetr0 = (offsetr0 + BLOCK_SIZE/4) & 0x7f; offsetr1 = (offsetr1 + BLOCK_SIZE/4) & 0x7f; offsetr2 = (offsetr2 + BLOCK_SIZE/4) & 0x7f; offsetr3 = (offsetr3 + BLOCK_SIZE/4) & 0x7f; Br0 = B + offsetr0; Br1 = B + offsetr1; Br2 = B + offsetr2; Br3 = B + offsetr3; } memmove(B,Bp_space,sizeof(Bp_space)); } void bs_mixcolumns(word_t * B) { word_t Bp_space[BLOCK_SIZE]; word_t * Bp = Bp_space; // to understand this, see // https://en.wikipedia.org/wiki/Rijndael_mix_columns int i = 0; for (; i < 4; i++) { // of = A0 ^ A1; // A0 = A0 ^ (0x1b & ((signed char)of>>7)); //// 2 * A0 // A0 = A0 ^ (A0 << 1) //// + 3 * A1 // A0 = A0 ^ (A1) // A0 = A0 ^ (A1<<1) //// + A2 + A3 // A0 = A0 ^ (A2) // A0 = A0 ^ (A3) // A0.7 A1.7 word_t of = B[A0+7] ^ B[A1+7]; // 2*A0 2*A1 A1 A2 A3 Bp[A0+0] = B[A1+0] ^ B[A2+0] ^ B[A3+0] ^ of; Bp[A0+1] = B[A0+0] ^ B[A1+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+1] ^ of; Bp[A0+2] = B[A0+1] ^ B[A1+1] ^ B[A1+2] ^ B[A2+2] ^ B[A3+2]; Bp[A0+3] = B[A0+2] ^ B[A1+2] ^ B[A1+3] ^ B[A2+3] ^ B[A3+3] ^ of; Bp[A0+4] = B[A0+3] ^ B[A1+3] ^ B[A1+4] ^ B[A2+4] ^ B[A3+4] ^ of; Bp[A0+5] = B[A0+4] ^ B[A1+4] ^ B[A1+5] ^ B[A2+5] ^ B[A3+5]; Bp[A0+6] = B[A0+5] ^ B[A1+5] ^ B[A1+6] ^ B[A2+6] ^ B[A3+6]; Bp[A0+7] = B[A0+6] ^ B[A1+6] ^ B[A1+7] ^ B[A2+7] ^ B[A3+7]; // of = A1 ^ A2 // A1 = A1 ^ (0x1b & ((signed char)of>>7)); //// A0 // A1 = A1 ^ (A0) //// + 2 * A1 // A1 = A1 ^ (A1 << 1) //// + 3 * A2 // A1 = A1 ^ (A2) // A1 = A1 ^ (A2<<1) //// + A3 // A1 = A1 ^ (A3) of = B[A1+7] ^ B[A2+7]; // A0 2*A1 2*A2 A2 A3 Bp[A1+0] = B[A0+0] ^ B[A2+0] ^ B[A3+0] ^ of; Bp[A1+1] = B[A0+1] ^ B[A1+0] ^ B[A2+0] ^ B[A2+1] ^ B[A3+1] ^ of; Bp[A1+2] = B[A0+2] ^ B[A1+1] ^ B[A2+1] ^ B[A2+2] ^ B[A3+2]; Bp[A1+3] = B[A0+3] ^ B[A1+2] ^ B[A2+2] ^ B[A2+3] ^ B[A3+3] ^ of; Bp[A1+4] = B[A0+4] ^ B[A1+3] ^ B[A2+3] ^ B[A2+4] ^ B[A3+4] ^ of; Bp[A1+5] = B[A0+5] ^ B[A1+4] ^ B[A2+4] ^ B[A2+5] ^ B[A3+5]; Bp[A1+6] = B[A0+6] ^ B[A1+5] ^ B[A2+5] ^ B[A2+6] ^ B[A3+6]; Bp[A1+7] = B[A0+7] ^ B[A1+6] ^ B[A2+6] ^ B[A2+7] ^ B[A3+7]; // of = A2 ^ A3 // A2 = A2 ^ (0x1b & ((signed char)of>>7)); //// A0 + A1 // A2 = A2 ^ (A0) // A2 = A2 ^ (A1) //// + 2 * A2 // A2 = A2 ^ (A2 << 1) //// + 3 * A3 // A2 = A2 ^ (A3) // A2 = A2 ^ (A3<<1) of = B[A2+7] ^ B[A3+7]; // A0 A1 2*A2 2*A3 A3 Bp[A2+0] = B[A0+0] ^ B[A1+0] ^ B[A3+0] ^ of; Bp[A2+1] = B[A0+1] ^ B[A1+1] ^ B[A2+0] ^ B[A3+0] ^ B[A3+1] ^ of; Bp[A2+2] = B[A0+2] ^ B[A1+2] ^ B[A2+1] ^ B[A3+1] ^ B[A3+2]; Bp[A2+3] = B[A0+3] ^ B[A1+3] ^ B[A2+2] ^ B[A3+2] ^ B[A3+3] ^ of; Bp[A2+4] = B[A0+4] ^ B[A1+4] ^ B[A2+3] ^ B[A3+3] ^ B[A3+4] ^ of; Bp[A2+5] = B[A0+5] ^ B[A1+5] ^ B[A2+4] ^ B[A3+4] ^ B[A3+5]; Bp[A2+6] = B[A0+6] ^ B[A1+6] ^ B[A2+5] ^ B[A3+5] ^ B[A3+6]; Bp[A2+7] = B[A0+7] ^ B[A1+7] ^ B[A2+6] ^ B[A3+6] ^ B[A3+7]; // A3 = A0 ^ A3 // A3 = A3 ^ (0x1b & ((signed char)of>>7)); //// 3 * A0 // A3 = A3 ^ (A0) // A3 = A3 ^ (A0 << 1) //// + A1 + A2 // A3 = A3 ^ A1 // A3 = A3 ^ A2 //// + 2 * A3 // A3 = A3 ^ (A3<<1) of = B[A0+7] ^ B[A3+7]; // 2*A0 A0 A1 A2 2*A3 Bp[A3+0] = B[A0+0] ^ B[A1+0] ^ B[A2+0] ^ of; Bp[A3+1] = B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+0] ^ of; Bp[A3+2] = B[A0+2] ^ B[A0+1] ^ B[A1+2] ^ B[A2+2] ^ B[A3+1]; Bp[A3+3] = B[A0+3] ^ B[A0+2] ^ B[A1+3] ^ B[A2+3] ^ B[A3+2] ^ of; Bp[A3+4] = B[A0+4] ^ B[A0+3] ^ B[A1+4] ^ B[A2+4] ^ B[A3+3] ^ of; Bp[A3+5] = B[A0+5] ^ B[A0+4] ^ B[A1+5] ^ B[A2+5] ^ B[A3+4]; Bp[A3+6] = B[A0+6] ^ B[A0+5] ^ B[A1+6] ^ B[A2+6] ^ B[A3+5]; Bp[A3+7] = B[A0+7] ^ B[A0+6] ^ B[A1+7] ^ B[A2+7] ^ B[A3+6]; // Bp += BLOCK_SIZE/4; B += BLOCK_SIZE/4; } memmove(B - BLOCK_SIZE,Bp - BLOCK_SIZE,sizeof(Bp_space)); } void bs_mixcolumns_rev(word_t * B) { // to understand this, see // https://en.wikipedia.org/wiki/Rijndael_mix_columns // TODO combine with shiftrows for performance on decryption word_t Bp_space[BLOCK_SIZE]; word_t * Bp = Bp_space; int i = 0; for (; i < BLOCK_SIZE / 4; i += BLOCK_SIZE / 16) { //// state[i][0] = A0*0x0e + A1*0x0b + A2*0x0d + A3*0x09 // overflow: /* A0 * 0b1110 */ /* A1 * 0b1011 */ /* A2 * 0b1101 */ /* A3 * 0b1001 */ word_t of0 = ( (B[A0+7] ^ B[A0+6] ^ B[A0+5]) ^ (B[A1 + 7] ^ B[A1+5]) ^ (B[A2+6] ^ B[A2+5]) ^ ( B[A3+5] )); // 2 bit word_t of1 = ( (B[A0+7] ^ B[A0+6]) ^ ( B[A1+6]) ^ (B[A2+7] ^ B[A2+6]) ^ ( B[A3+6] )); // 3 bit word_t of2 = ( (B[A0+7]) ^ ( B[A1+7]) ^ ( B[A2+7]) ^ ( B[A3+7] )); // 4 bit // inverse: // 1110 1011 1101 1001 // A0 = A0 * 14 + A1 * 11 + A2 * 13 + A3 * 9 // A0 = A0 * (2+4+8) + A1 * (1+2+8) + A2 * (1+4+8) + A3 * (1+8) // (2*A0 + 4*A0 + 8*A0 ) + (8*A1 + 2*A1 + A1 ) + ( A2 + 4*A2 + 8*A2 ) + ( A3 + 8*A3) Bp[A0+0] = B[A1+0] ^ B[A2+0] ^ B[A3+0] ^ of0; Bp[A0+1] = B[A0+0] ^ B[A1+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+1] ^ of0 ^ of1; Bp[A0+2] = B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ B[A1+2] ^ B[A2+2] ^ B[A2+0] ^ B[A3+2] ^ of1 ^ of2; Bp[A0+3] = B[A0+2] ^ B[A0+1] ^ B[A0+0] ^ B[A1+0] ^ B[A1+2] ^ B[A1+3] ^ B[A2+3] ^ B[A2+1] ^ B[A2+0] ^ B[A3+3] ^ B[A3+0] ^ of0 ^ of2; Bp[A0+4] = B[A0+3] ^ B[A0+2] ^ B[A0+1] ^ B[A1+1] ^ B[A1+3] ^ B[A1+4] ^ B[A2+4] ^ B[A2+2] ^ B[A2+1] ^ B[A3+4] ^ B[A3+1] ^ of0 ^ of1; Bp[A0+5] = B[A0+4] ^ B[A0+3] ^ B[A0+2] ^ B[A1+2] ^ B[A1+4] ^ B[A1+5] ^ B[A2+5] ^ B[A2+3] ^ B[A2+2] ^ B[A3+5] ^ B[A3+2] ^ of1 ^ of2; Bp[A0+6] = B[A0+5] ^ B[A0+4] ^ B[A0+3] ^ B[A1+3] ^ B[A1+5] ^ B[A1+6] ^ B[A2+6] ^ B[A2+4] ^ B[A2+3] ^ B[A3+6] ^ B[A3+3] ^ of2; Bp[A0+7] = B[A0+6] ^ B[A0+5] ^ B[A0+4] ^ B[A1+4] ^ B[A1+6] ^ B[A1+7] ^ B[A2+7] ^ B[A2+5] ^ B[A2+4] ^ B[A3+7] ^ B[A3+4]; //// state[i][1] = A0*0x09 + A1*0xe + A2*0x0b + A3*0x0d // overflow: /* A0 * 0b1001 */ /* A1 * 0b1110 */ /* A2 * 0b101 1 */ /* A3 * 0b1101 */ of0 = ( (B[A0+5]) ^ (B[A1+7] ^ B[A1+6] ^ B[A1+5]) ^ (B[A2 + 7] ^ B[A2+5]) ^ (B[A3+6] ^ B[A3+5])); // 2 bit of1 = ( (B[A0+6]) ^ (B[A1+7] ^ B[A1+6]) ^ ( B[A2+6]) ^ (B[A3+7] ^ B[A3+6])); // 3 bit of2 = ( (B[A0+7]) ^ (B[A1+7]) ^ ( B[A2+7]) ^ ( B[A3+7])); // 4 bit // inverse: // 1001 1110 1011 1101 // A1 = A0 * 9 + A1 * 14 + A2 * 11 + A3 * 13 // A1 = A0 * (1+8) + A1 * (2+4+8) + A2 * (1+2+8) + A3 * (1+4+8) // (1*A0 + 8*A0 ) +(2*A1 + 4*A1 + 8*A1 ) + (1*A2 + 2*A2 + 8*A2 ) + (1*A3 + 4*A3 + 8*A3) Bp[A1+0] = B[A0+0] ^ B[A2+0] ^ B[A3+0] ^ of0; Bp[A1+1] = B[A0+1] ^ B[A1+0] ^ B[A2+1] ^ B[A2+0] ^ B[A3+1] ^ of0 ^ of1; Bp[A1+2] = B[A0+2] ^ B[A1+1] ^ B[A1+0] ^ B[A2+2] ^ B[A2+1] ^ B[A3+2] ^ B[A3+0] ^ of1 ^ of2; Bp[A1+3] = B[A0+3] ^ B[A0+0] ^ B[A1+2] ^ B[A1+1] ^ B[A1+0] ^ B[A2+3] ^ B[A2+2] ^ B[A2+0] ^ B[A3+3] ^ B[A3+1] ^ B[A3+0] ^ of0 ^ of2; Bp[A1+4] = B[A0+4] ^ B[A0+1] ^ B[A1+3] ^ B[A1+2] ^ B[A1+1] ^ B[A2+4] ^ B[A2+3] ^ B[A2+1] ^ B[A3+4] ^ B[A3+2] ^ B[A3+1] ^ of0 ^ of1; Bp[A1+5] = B[A0+5] ^ B[A0+2] ^ B[A1+4] ^ B[A1+3] ^ B[A1+2] ^ B[A2+5] ^ B[A2+4] ^ B[A2+2] ^ B[A3+5] ^ B[A3+3] ^ B[A3+2] ^ of1 ^ of2; Bp[A1+6] = B[A0+6] ^ B[A0+3] ^ B[A1+5] ^ B[A1+4] ^ B[A1+3] ^ B[A2+6] ^ B[A2+5] ^ B[A2+3] ^ B[A3+6] ^ B[A3+4] ^ B[A3+3] ^ of2; Bp[A1+7] = B[A0+7] ^ B[A0+4] ^ B[A1+6] ^ B[A1+5] ^ B[A1+4] ^ B[A2+7] ^ B[A2+6] ^ B[A2+4] ^ B[A3+7] ^ B[A3+5] ^ B[A3+4]; //// state[i][2] = A0*0x0d + A1*0x09 + A2*0x0e + A3*0x0b // overflow: /* A1 * 0b1001 */ /* A2 * 0b1110 */ /* A3 * 0b1011 */ /* A0 * 0b1101 */ of0 = ( (B[A1+5]) ^ (B[A2+7] ^ B[A2+6] ^ B[A2+5]) ^ (B[A3 + 7] ^ B[A3+5]) ^ (B[A0+6] ^ B[A0+5])); // 2 bit of1 = ( (B[A1+6]) ^ (B[A2+7] ^ B[A2+6]) ^ ( B[A3+6]) ^ (B[A0+7] ^ B[A0+6])); // 3 bit of2 = ( (B[A1+7]) ^ (B[A2+7]) ^ ( B[A3+7]) ^ ( B[A0+7])); // 4 bit // inverse: // 1001 1110 1011 1101 // A2 = A1 * 9 + A2 * 14 + A3 * 11 + A0 * 13 // A2 = A1 * (1+8) + A2 * (2+4+8) + A3 * (1+2+8) + A0 * (1+4+8) // (1*A1 + 8*A1) + ( 2*A2 + 4*A2 + 8*A2) + (1*A3 2*A2 + 8*A2) + (1*A0 + 4*A0 + 8*A0) Bp[A2+0] = B[A1+0] ^ B[A3+0] ^ B[A0+0] ^ of0; Bp[A2+1] = B[A1+1] ^ B[A2+0] ^ B[A3+1] ^ B[A3+0] ^ B[A0+1] ^ of0 ^ of1; Bp[A2+2] = B[A1+2] ^ B[A2+1] ^ B[A2+0] ^ B[A3+2] ^ B[A3+1] ^ B[A0+2] ^ B[A0+0] ^ of1 ^ of2; Bp[A2+3] = B[A1+3] ^ B[A1+0] ^ B[A2+2] ^ B[A2+1] ^ B[A2+0] ^ B[A3+3] ^ B[A3+2] ^ B[A3+0] ^ B[A0+3] ^ B[A0+1] ^ B[A0+0] ^ of0 ^ of2; Bp[A2+4] = B[A1+4] ^ B[A1+1] ^ B[A2+3] ^ B[A2+2] ^ B[A2+1] ^ B[A3+4] ^ B[A3+3] ^ B[A3+1] ^ B[A0+4] ^ B[A0+2] ^ B[A0+1] ^ of0 ^ of1; Bp[A2+5] = B[A1+5] ^ B[A1+2] ^ B[A2+4] ^ B[A2+3] ^ B[A2+2] ^ B[A3+5] ^ B[A3+4] ^ B[A3+2] ^ B[A0+5] ^ B[A0+3] ^ B[A0+2] ^ of1 ^ of2; Bp[A2+6] = B[A1+6] ^ B[A1+3] ^ B[A2+5] ^ B[A2+4] ^ B[A2+3] ^ B[A3+6] ^ B[A3+5] ^ B[A3+3] ^ B[A0+6] ^ B[A0+4] ^ B[A0+3] ^ of2; Bp[A2+7] = B[A1+7] ^ B[A1+4] ^ B[A2+6] ^ B[A2+5] ^ B[A2+4] ^ B[A3+7] ^ B[A3+6] ^ B[A3+4] ^ B[A0+7] ^ B[A0+5] ^ B[A0+4]; //// state[i][3] = A0*0x0b + A1*0x0d + A2*0x09 + A3*0x0e // overflow: /* A2 * 0b1001 */ /* A3 * 0b1110 */ /* A0 * 0b1011 */ /* A1 * 0b1101 */ of0 = ( (B[A2+5]) ^ (B[A3+7] ^ B[A3+6] ^ B[A3+5]) ^ (B[A0 + 7] ^ B[A0+5]) ^ (B[A1+6] ^ B[A1+5])); // 2 bit of1 = ( (B[A2+6]) ^ (B[A3+7] ^ B[A3+6]) ^ ( B[A0+6]) ^ (B[A1+7] ^ B[A1+6])); // 3 bit of2 = ( (B[A2+7]) ^ (B[A3+7]) ^ ( B[A0+7]) ^ ( B[A1+7])); // 4 bit // inverse: // 1001 1110 1011 1101 // A2 = A2 * 9 + A3 * 14 + A0 * 11 + A1 * 13 // A2 = A2 * (1+8) + A3 * (2+4+8) + A0 * (1+2+8) + A1 * (1+4+8) // (1*A2 + 8*A2) + ( 2*A3 + 4*A3 + 8*A3) + (1*A0 2*A0 + 8*A0) + (1*A1 + 4*A1 + 8*A1) Bp[A3+0] = B[A2+0] ^ B[A0+0] ^ B[A1+0] ^ of0; Bp[A3+1] = B[A2+1] ^ B[A3+0] ^ B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ of0 ^ of1; Bp[A3+2] = B[A2+2] ^ B[A3+1] ^ B[A3+0] ^ B[A0+2] ^ B[A0+1] ^ B[A1+2] ^ B[A1+0] ^ of1 ^ of2; Bp[A3+3] = B[A2+3] ^ B[A2+0] ^ B[A3+2] ^ B[A3+1] ^ B[A3+0] ^ B[A0+3] ^ B[A0+2] ^ B[A0+0] ^ B[A1+3] ^ B[A1+1] ^ B[A1+0] ^ of0 ^ of2; Bp[A3+4] = B[A2+4] ^ B[A2+1] ^ B[A3+3] ^ B[A3+2] ^ B[A3+1] ^ B[A0+4] ^ B[A0+3] ^ B[A0+1] ^ B[A1+4] ^ B[A1+2] ^ B[A1+1] ^ of0 ^ of1; Bp[A3+5] = B[A2+5] ^ B[A2+2] ^ B[A3+4] ^ B[A3+3] ^ B[A3+2] ^ B[A0+5] ^ B[A0+4] ^ B[A0+2] ^ B[A1+5] ^ B[A1+3] ^ B[A1+2] ^ of1 ^ of2; Bp[A3+6] = B[A2+6] ^ B[A2+3] ^ B[A3+5] ^ B[A3+4] ^ B[A3+3] ^ B[A0+6] ^ B[A0+5] ^ B[A0+3] ^ B[A1+6] ^ B[A1+4] ^ B[A1+3] ^ of2; Bp[A3+7] = B[A2+7] ^ B[A2+4] ^ B[A3+6] ^ B[A3+5] ^ B[A3+4] ^ B[A0+7] ^ B[A0+6] ^ B[A0+4] ^ B[A1+7] ^ B[A1+5] ^ B[A1+4]; Bp += BLOCK_SIZE/4; B += BLOCK_SIZE/4; } memmove(B - BLOCK_SIZE, Bp - BLOCK_SIZE,sizeof(Bp_space)); } void bs_expand_key(word_t (* rk)[BLOCK_SIZE], uint8_t * _key) { // TODO integrate this better uint8_t key[KEY_SCHEDULE_SIZE]; memmove(key,_key,BLOCK_SIZE/8); expand_key(key); int i, j = 0, k, l; for (i = 0; i < KEY_SCHEDULE_SIZE; i += (BLOCK_SIZE/8)) { memmove(rk[j], key + i, BLOCK_SIZE / 8); for (k = WORDS_PER_BLOCK; k < 128; k += WORDS_PER_BLOCK) { for (l = 0; l < WORDS_PER_BLOCK; l++) { rk[j][k + l] = rk[j][l]; } } bs_transpose(rk[j]); j++; } } void bs_cipher(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE]) { int round; bs_transpose(state); bs_addroundkey(state,rk[0]); for (round = 1; round < 10; round++) { bs_apply_sbox(state); /*bs_shiftrows(state);*/ /*bs_mixcolumns(state);*/ bs_shiftmix(state); bs_addroundkey(state,rk[round]); } bs_apply_sbox(state); bs_shiftrows(state); bs_addroundkey(state,rk[10]); bs_transpose_rev(state); } void bs_cipher_rev(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE]) { int round; bs_transpose(state); bs_addroundkey(state,rk[10]); for (round = 9; round > 0; round--) { bs_shiftrows_rev(state); bs_apply_sbox_rev(state); bs_addroundkey(state,rk[round]); bs_mixcolumns_rev(state); } bs_shiftrows_rev(state); bs_apply_sbox_rev(state); bs_addroundkey(state,rk[0]); bs_transpose_rev(state); }