aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-03-29 18:37:49 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-03-29 18:37:49 +0100
commitab50e8fd5baf3692fd6de00c71cf01f0c9e0ce4b (patch)
treebdebe68f4eea5035667c923d85b62e646e4478a8
parent0fdfe307defb3af858e0016e9d3d0883524aec20 (diff)
parent522d7e88ef611de8edde6ae49cb985da58b8963c (diff)
downloadcompcert-kvx-ab50e8fd5baf3692fd6de00c71cf01f0c9e0ce4b.tar.gz
compcert-kvx-ab50e8fd5baf3692fd6de00c71cf01f0c9e0ce4b.zip
Merge branch 'mppa-mul' into mppa_k0c
-rw-r--r--test/monniaux/bitsliced-aes/bs.c12
-rw-r--r--test/monniaux/bitsliced-aes/one_file/bitsliced-aes.c1542
-rwxr-xr-xtest/monniaux/bitsliced-aes/one_file/compare.sh12
-rw-r--r--test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c32
-rw-r--r--test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c20
-rwxr-xr-xtest/monniaux/bitsliced-aes/one_file/reduce/compare.sh16
-rw-r--r--test/monniaux/ocaml/Makefile32
-rw-r--r--test/monniaux/ocaml/byterun/caml/finalise.h2
-rw-r--r--test/monniaux/ocaml/byterun/caml/version.h6
-rw-r--r--test/monniaux/ocaml/byterun/compact.c1
-rw-r--r--test/monniaux/ocaml/byterun/main.c13
-rw-r--r--test/monniaux/ocaml/byterun/win32.c1019
-rw-r--r--test/monniaux/picosat-965/Makefile34
-rw-r--r--test/monniaux/picosat-965/app.c4
-rw-r--r--test/monniaux/picosat-965/main.c20
-rw-r--r--test/monniaux/picosat-965/picosat.c94
-rw-r--r--test/monniaux/rules.mk14
-rw-r--r--test/monniaux/ternary/Makefile26
-rw-r--r--test/monniaux/ternary/ternary.c29
-rw-r--r--test/monniaux/too_slow/Makefile27
-rw-r--r--test/monniaux/too_slow/memset_from_bitsliced-aes.c43
21 files changed, 1916 insertions, 1082 deletions
diff --git a/test/monniaux/bitsliced-aes/bs.c b/test/monniaux/bitsliced-aes/bs.c
index df5c1f6b..4a9df4aa 100644
--- a/test/monniaux/bitsliced-aes/bs.c
+++ b/test/monniaux/bitsliced-aes/bs.c
@@ -14,6 +14,11 @@
#error "endianness not supported"
#endif
+#if 1
+#define TERNARY_XY0(t, x) ((-((t) != 0)) & (x))
+#else
+#define TERNARY_XY0(t, x) (((t) != 0) ? (x) : (0))
+#endif
void bs_addroundkey(word_t * B, word_t * rk)
{
@@ -388,11 +393,14 @@ void bs_transpose_dst(word_t * transpose, word_t * blocks)
int offset = i << MUL_SHIFT;
#ifndef UNROLL_TRANSPOSE
+ /* DM experiments */
+ /* The normal ternary operator costs us a lot!
+ from 10145951 to 7995063 */
int j;
for(j=0; j < WORD_SIZE; j++)
{
// TODO make const time
- transpose[offset + j] |= (w & (ONE << j)) ? bitpos : 0;
+ transpose[offset + j] |= TERNARY_XY0(w & (ONE << j), bitpos);
}
#else
@@ -488,7 +496,7 @@ void bs_transpose_rev(word_t * blocks)
int j;
for(j=0; j < WORD_SIZE; j++)
{
- word_t bit = (w & (ONE << j)) ? (ONE << (k % WORD_SIZE)) : 0;
+ word_t bit = TERNARY_XY0((w & (ONE << j)), (ONE << (k % WORD_SIZE)));
transpose[j * WORDS_PER_BLOCK + (offset)] |= bit;
}
#else
diff --git a/test/monniaux/bitsliced-aes/one_file/bitsliced-aes.c b/test/monniaux/bitsliced-aes/one_file/bitsliced-aes.c
new file mode 100644
index 00000000..bfa9dba8
--- /dev/null
+++ b/test/monniaux/bitsliced-aes/one_file/bitsliced-aes.c
@@ -0,0 +1,1542 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "/home/monniaux/work/Kalray/CompCert/test/monniaux/clock.h"
+
+#define EXIT1
+
+void aes_ecb_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key);
+void aes_ecb_decrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key);
+
+void aes_ctr_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key, uint8_t * iv);
+#define aes_ctr_decrypt(outputb,inputb,size,key,iv) aes_ctr_encrypt(outputb,inputb,size,key,iv)
+
+#define BLOCK_SIZE 128
+#define KEY_SCHEDULE_SIZE 176
+#define WORD_SIZE 64
+#define BS_BLOCK_SIZE (BLOCK_SIZE * WORD_SIZE / 8)
+#define WORDS_PER_BLOCK (BLOCK_SIZE / WORD_SIZE)
+
+#if (WORD_SIZE==64)
+ typedef uint64_t word_t;
+ #define ONE 1ULL
+ #define MUL_SHIFT 6
+ #define WFMT "lx"
+ #define WPAD "016"
+ #define __builtin_bswap_wordsize(x) __builtin_bswap64(x)
+#elif (WORD_SIZE==32)
+ typedef uint32_t word_t;
+ #define ONE 1UL
+ #define MUL_SHIFT 5
+ #define WFMT "x"
+ #define WPAD "08"
+ #define __builtin_bswap_wordsize(x) __builtin_bswap32(x)
+#elif (WORD_SIZE==16)
+ typedef uint16_t word_t;
+ #define ONE 1
+ #define MUL_SHIFT 4
+ #define WFMT "hx"
+ #define WPAD "04"
+ #define __builtin_bswap_wordsize(x) __builtin_bswap16(x)
+#elif (WORD_SIZE==8)
+ typedef uint8_t word_t;
+ #define ONE 1
+ #define MUL_SHIFT 3
+ #define WFMT "hhx"
+ #define WPAD "02"
+ #define __builtin_bswap_wordsize(x) (x)
+#else
+#error "invalid word size"
+#endif
+
+void bs_transpose(word_t * blocks);
+void bs_transpose_rev(word_t * blocks);
+void bs_transpose_dst(word_t * transpose, word_t * blocks);
+
+void bs_sbox(word_t U[8]);
+void bs_sbox_rev(word_t U[8]);
+
+void bs_shiftrows(word_t * B);
+void bs_shiftrows_rev(word_t * B);
+
+void bs_mixcolumns(word_t * B);
+void bs_mixcolumns_rev(word_t * B);
+
+void bs_shiftmix(word_t * B);
+
+void bs_addroundkey(word_t * B, word_t * rk);
+void bs_apply_sbox(word_t * input);
+void bs_apply_sbox_rev(word_t * input);
+
+
+void expand_key(unsigned char *in);
+void bs_expand_key(word_t (* rk)[BLOCK_SIZE], uint8_t * key);
+
+void bs_cipher(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE]);
+void bs_cipher_rev(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE]);
+
+
+void dump_hex(uint8_t * h, int len);
+void dump_word(word_t * h, int len);
+void dump_block(word_t * h, int len);
+
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
+
+void aes_ecb_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key)
+{
+ word_t input_space[BLOCK_SIZE];
+ word_t rk[11][BLOCK_SIZE];
+
+ memset(outputb,0,size);
+ word_t * state = (word_t *)outputb;
+
+ bs_expand_key(rk, key);
+
+ while (size > 0)
+ {
+ if (size < BS_BLOCK_SIZE)
+ {
+ memset(input_space,0,BS_BLOCK_SIZE);
+ memmove(input_space, inputb, size);
+ bs_cipher(input_space,rk);
+ memmove(outputb, input_space, size);
+ size = 0;
+ state += size;
+ }
+ else
+ {
+ memmove(state,inputb,BS_BLOCK_SIZE);
+ bs_cipher(state,rk);
+ size -= BS_BLOCK_SIZE;
+ state += BS_BLOCK_SIZE;
+ }
+
+ }
+}
+
+void aes_ecb_decrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key)
+{
+ word_t input_space[BLOCK_SIZE];
+ word_t rk[11][BLOCK_SIZE];
+
+ memset(outputb,0,size);
+ word_t * state = (word_t *)outputb;
+
+ bs_expand_key(rk, key);
+
+ while (size > 0)
+ {
+ if (size < BS_BLOCK_SIZE)
+ {
+ memset(input_space,0,BS_BLOCK_SIZE);
+ memmove(input_space, inputb, size);
+ bs_cipher_rev(input_space,rk);
+ memmove(outputb, input_space, size);
+ size = 0;
+ state += size;
+ }
+ else
+ {
+ memmove(state,inputb,BS_BLOCK_SIZE);
+ bs_cipher_rev(state,rk);
+ size -= BS_BLOCK_SIZE;
+ state += BS_BLOCK_SIZE;
+ }
+
+ }
+}
+
+static void INC_CTR(uint8_t * ctr, uint8_t i)
+{
+ ctr += BLOCK_SIZE/8 - 1;
+ uint8_t n = *(ctr);
+ *ctr += i;
+ while(*ctr < n)
+ {
+ ctr--;
+ n = *ctr;
+ (*ctr)++;
+ }
+}
+
+void aes_ctr_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key, uint8_t * iv)
+{
+ word_t rk[11][BLOCK_SIZE];
+ word_t ctr[BLOCK_SIZE];
+ uint8_t iv_copy[BLOCK_SIZE/8];
+
+ memset(outputb,0,size);
+ memset(ctr,0,sizeof(ctr));
+ memmove(iv_copy,iv,BLOCK_SIZE/8);
+
+ word_t * state = (word_t *)outputb;
+ bs_expand_key(rk, key);
+
+ do
+ {
+ int chunk = MIN(size, BS_BLOCK_SIZE);
+ int blocks = chunk / (BLOCK_SIZE/8);
+ if (chunk % (BLOCK_SIZE/8))
+ {
+ blocks++;
+ }
+
+ int i;
+ for (i = 0; i < blocks; i++)
+ {
+ memmove(ctr + (i * WORDS_PER_BLOCK), iv_copy, BLOCK_SIZE/8);
+ INC_CTR(iv_copy,1);
+ }
+
+ bs_cipher(ctr, rk);
+ size -= chunk;
+
+ uint8_t * ctr_p = (uint8_t *) ctr;
+ while(chunk--)
+ {
+ *outputb++ = *ctr_p++ ^ *inputb++;
+ }
+
+ }
+ while(size);
+
+}
+
+void dump_hex(uint8_t * h, int len)
+{
+ while(len--)
+ printf("%02hhx",*h++);
+ printf("\n");
+}
+
+void dump_word(word_t * h, int len)
+{
+ while(len--)
+ if ((len+1) % 8) printf("%" WPAD WFMT "\n",*h++);
+ else printf("%d:\n%" WPAD WFMT "\n",128-len-1,*h++);
+
+ printf("\n");
+}
+
+void dump_block(word_t * h, int len)
+{
+ while(len-=2 >= 0)
+ printf("%" WPAD WFMT"%" WPAD WFMT "\n",*h++,*h++);
+ printf("\n");
+}
+
+static const uint8_t sbox[256] = {
+ //0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
+
+static void rotate(unsigned char *in) {
+ unsigned char a,c;
+ a = in[0];
+ for(c=0;c<3;c++)
+ in[c] = in[c + 1];
+ in[3] = a;
+ return;
+}
+
+/* Calculate the rcon used in key expansion */
+static unsigned char rcon(unsigned char in) {
+ unsigned char c=1;
+ if(in == 0)
+ return 0;
+ while(in != 1) {
+ unsigned char b;
+ b = c & 0x80;
+ c <<= 1;
+ if(b == 0x80) {
+ c ^= 0x1b;
+ }
+ in--;
+ }
+ return c;
+}
+
+/* This is the core key expansion, which, given a 4-byte value,
+ * does some scrambling */
+static void schedule_core(unsigned char *in, unsigned char i) {
+ char a;
+ /* Rotate the input 8 bits to the left */
+ rotate(in);
+ /* Apply Rijndael's s-box on all 4 bytes */
+ for(a = 0; a < 4; a++)
+ in[a] = sbox[in[a]];
+ /* On just the first byte, add 2^i to the byte */
+ in[0] ^= rcon(i);
+}
+
+void expand_key(unsigned char *in) {
+ unsigned char t[4];
+ /* c is 16 because the first sub-key is the user-supplied key */
+ unsigned char c = 16;
+ unsigned char i = 1;
+ unsigned char a;
+
+ /* We need 11 sets of sixteen bytes each for 128-bit mode */
+ while(c < 176) {
+ /* Copy the temporary variable over from the last 4-byte
+ * block */
+ for(a = 0; a < 4; a++)
+ t[a] = in[a + c - 4];
+ /* Every four blocks (of four bytes),
+ * do a complex calculation */
+ if(c % 16 == 0) {
+ schedule_core(t,i);
+ i++;
+ }
+ for(a = 0; a < 4; a++) {
+ in[c] = in[c - 16] ^ t[a];
+ c++;
+ }
+ }
+}
+
+#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ||\
+ defined(__amd64__) || defined(__amd32__)|| defined(__amd16__)
+#define bs2le(x) (x)
+#define bs2be(x) (x)
+#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ||\
+ (defined(__sparc__))
+#define bs2le(x) __builtin_bswap_wordsize(x)
+#define bs2be(x) __builtin_bswap_wordsize(x)
+#else
+#error "endianness not supported"
+#endif
+
+
+void bs_addroundkey(word_t * B, word_t * rk)
+{
+ int i;
+ for (i = 0; i < BLOCK_SIZE; i++)
+ B[i] ^= rk[i];
+}
+
+void bs_apply_sbox(word_t * input)
+{
+ int i;
+ for(i=0; i < BLOCK_SIZE; i+=8)
+ {
+ bs_sbox(input+i);
+ }
+}
+
+void bs_apply_sbox_rev(word_t * input)
+{
+ int i;
+ for(i=0; i < BLOCK_SIZE; i+=8)
+ {
+ bs_sbox_rev(input+i);
+ }
+}
+
+/*July 2011*/
+/*Straight-line program for AES s box*/
+
+/*Input is U[0], U[1],...,U[7]*/
+/*Output is S[0], S[1],...,S[7]*/
+// http://cs-www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html
+void bs_sbox_rev(word_t U[8])
+{
+ word_t W[8];
+ word_t
+ T1,T2,T3,T4,T5,T6,T8,
+ T9,T10,T13,T14,T15,T16,
+ T17,T18,T19,T20,T22,T23,T24,
+ T25, T26, T27;
+
+ word_t
+ M1,M2,M3,M4,M5,M6,M7,M8,
+ M9,M10,M11,M12,M13,M14,M15,
+ M16,M17,M18,M19,M20,M21,M22,
+ M23,M24,M25,M26,M27,M28,M29,
+ M30,M31,M32,M33,M34,M35,M36,
+ M37,M38,M39,M40,M41,M42,M43,
+ M44,M45,M46,M47,M48,M49,M50,
+ M51,M52,M53,M54,M55,M56,M57,
+ M58,M59,M60,M61,M62,M63;
+
+ word_t
+ P0,P1,P2,P3,P4,P5,P6,P7,P8,
+ P9,P10,P11,P12,P13,P14,
+ P15,P16,P17,P18,P19,P20,
+ P21,P22,P23,P24,P25,P26,
+ P27,P28,P29;
+
+ word_t Y5,
+ R5, R13, R17, R18, R19;
+
+
+ T23 = U[7] ^ U[4];
+ T22 = ~(U[6] ^ U[4]);
+ T2 = ~(U[7] ^ U[6]);
+ T1 = U[4] ^ U[3];
+ T24 = ~(U[3] ^ U[0]);
+ R5 = U[1] ^ U[0];
+ T8 = ~(U[6] ^ T23);
+ T19 = T22 ^ R5;
+ T9 = ~(U[0] ^ T1);
+ T10 = T2 ^ T24;
+ T13 = T2 ^ R5;
+ T3 = T1 ^ R5;
+ T25 = ~(U[5] ^ T1);
+ R13 = U[6] ^ U[1];
+ T17 = ~(U[5] ^ T19);
+ T20 = T24 ^ R13;
+ T4 = U[3] ^ T8;
+ R17 = ~(U[5] ^ U[2]);
+ R18 = ~(U[2] ^ U[1]);
+ R19 = ~(U[5] ^ U[3]);
+ Y5 = U[7] ^ R17;
+ T6 = T22 ^ R17;
+ T16 = R13 ^ R19;
+ T27 = T1 ^ R18;
+ T15 = T10 ^ T27;
+ T14 = T10 ^ R18;
+ T26 = T3 ^ T16;
+ M1 = T13 & T6;
+ M2 = T23 & T8;
+ M3 = T14 ^ M1;
+ M4 = T19 & Y5;
+ M5 = M4 ^ M1;
+ M6 = T3 & T16;
+ M7 = T22 & T9;
+ M8 = T26 ^ M6;
+ M9 = T20 & T17;
+ M10 = M9 ^ M6;
+ M11 = T1 & T15;
+ M12 = T4 & T27;
+ M13 = M12 ^ M11;
+ M14 = T2 & T10;
+ M15 = M14 ^ M11;
+ M16 = M3 ^ M2;
+ M17 = M5 ^ T24;
+ M18 = M8 ^ M7;
+ M19 = M10 ^ M15;
+ M20 = M16 ^ M13;
+ M21 = M17 ^ M15;
+ M22 = M18 ^ M13;
+ M23 = M19 ^ T25;
+ M24 = M22 ^ M23;
+ M25 = M22 & M20;
+ M26 = M21 ^ M25;
+ M27 = M20 ^ M21;
+ M28 = M23 ^ M25;
+ M29 = M28 & M27;
+ M30 = M26 & M24;
+ M31 = M20 & M23;
+ M32 = M27 & M31;
+ M33 = M27 ^ M25;
+ M34 = M21 & M22;
+ M35 = M24 & M34;
+ M36 = M24 ^ M25;
+ M37 = M21 ^ M29;
+ M38 = M32 ^ M33;
+ M39 = M23 ^ M30;
+ M40 = M35 ^ M36;
+ M41 = M38 ^ M40;
+ M42 = M37 ^ M39;
+ M43 = M37 ^ M38;
+ M44 = M39 ^ M40;
+ M45 = M42 ^ M41;
+ M46 = M44 & T6;
+ M47 = M40 & T8;
+ M48 = M39 & Y5;
+ M49 = M43 & T16;
+ M50 = M38 & T9;
+ M51 = M37 & T17;
+ M52 = M42 & T15;
+ M53 = M45 & T27;
+ M54 = M41 & T10;
+ M55 = M44 & T13;
+ M56 = M40 & T23;
+ M57 = M39 & T19;
+ M58 = M43 & T3;
+ M59 = M38 & T22;
+ M60 = M37 & T20;
+ M61 = M42 & T1;
+ M62 = M45 & T4;
+ M63 = M41 & T2;
+ P0 = M52 ^ M61;
+ P1 = M58 ^ M59;
+ P2 = M54 ^ M62;
+ P3 = M47 ^ M50;
+ P4 = M48 ^ M56;
+ P5 = M46 ^ M51;
+ P6 = M49 ^ M60;
+ P7 = P0 ^ P1;
+ P8 = M50 ^ M53;
+ P9 = M55 ^ M63;
+ P10 = M57 ^ P4;
+ P11 = P0 ^ P3;
+ P12 = M46 ^ M48;
+ P13 = M49 ^ M51;
+ P14 = M49 ^ M62;
+ P15 = M54 ^ M59;
+ P16 = M57 ^ M61;
+ P17 = M58 ^ P2;
+ P18 = M63 ^ P5;
+ P19 = P2 ^ P3;
+ P20 = P4 ^ P6;
+ P22 = P2 ^ P7;
+ P23 = P7 ^ P8;
+ P24 = P5 ^ P7;
+ P25 = P6 ^ P10;
+ P26 = P9 ^ P11;
+ P27 = P10 ^ P18;
+ P28 = P11 ^ P25;
+ P29 = P15 ^ P20;
+ W[7] = P13 ^ P22;
+ W[6] = P26 ^ P29;
+ W[5] = P17 ^ P28;
+ W[4] = P12 ^ P22;
+ W[3] = P23 ^ P27;
+ W[2] = P19 ^ P24;
+ W[1] = P14 ^ P23;
+ W[0] = P9 ^ P16;
+
+ memmove(U,W,sizeof(W));
+}
+
+void bs_sbox(word_t U[8])
+{
+ word_t S[8];
+ word_t
+ T1,T2,T3,T4,T5,T6,T7,T8,
+ T9,T10,T11,T12,T13,T14,T15,T16,
+ T17,T18,T19,T20,T21,T22,T23,T24,
+ T25, T26, T27;
+
+ word_t
+ M1,M2,M3,M4,M5,M6,M7,M8,
+ M9,M10,M11,M12,M13,M14,M15,
+ M16,M17,M18,M19,M20,M21,M22,
+ M23,M24,M25,M26,M27,M28,M29,
+ M30,M31,M32,M33,M34,M35,M36,
+ M37,M38,M39,M40,M41,M42,M43,
+ M44,M45,M46,M47,M48,M49,M50,
+ M51,M52,M53,M54,M55,M56,M57,
+ M58,M59,M60,M61,M62,M63;
+
+ word_t
+ L0,L1,L2,L3,L4,L5,L6,L7,L8,
+ L9,L10,L11,L12,L13,L14,
+ L15,L16,L17,L18,L19,L20,
+ L21,L22,L23,L24,L25,L26,
+ L27,L28,L29;
+
+ T1 = U[7] ^ U[4];
+ T2 = U[7] ^ U[2];
+ T3 = U[7] ^ U[1];
+ T4 = U[4] ^ U[2];
+ T5 = U[3] ^ U[1];
+ T6 = T1 ^ T5;
+ T7 = U[6] ^ U[5];
+ T8 = U[0] ^ T6;
+ T9 = U[0] ^ T7;
+ T10 = T6 ^ T7;
+ T11 = U[6] ^ U[2];
+ T12 = U[5] ^ U[2];
+ T13 = T3 ^ T4;
+ T14 = T6 ^ T11;
+ T15 = T5 ^ T11;
+ T16 = T5 ^ T12;
+ T17 = T9 ^ T16;
+ T18 = U[4] ^ U[0];
+ T19 = T7 ^ T18;
+ T20 = T1 ^ T19;
+ T21 = U[1] ^ U[0];
+ T22 = T7 ^ T21;
+ T23 = T2 ^ T22;
+ T24 = T2 ^ T10;
+ T25 = T20 ^ T17;
+ T26 = T3 ^ T16;
+ T27 = T1 ^ T12;
+ M1 = T13 & T6;
+ M2 = T23 & T8;
+ M3 = T14 ^ M1;
+ M4 = T19 & U[0];
+ M5 = M4 ^ M1;
+ M6 = T3 & T16;
+ M7 = T22 & T9;
+ M8 = T26 ^ M6;
+ M9 = T20 & T17;
+ M10 = M9 ^ M6;
+ M11 = T1 & T15;
+ M12 = T4 & T27;
+ M13 = M12 ^ M11;
+ M14 = T2 & T10;
+ M15 = M14 ^ M11;
+ M16 = M3 ^ M2;
+ M17 = M5 ^ T24;
+ M18 = M8 ^ M7;
+ M19 = M10 ^ M15;
+ M20 = M16 ^ M13;
+ M21 = M17 ^ M15;
+ M22 = M18 ^ M13;
+ M23 = M19 ^ T25;
+ M24 = M22 ^ M23;
+ M25 = M22 & M20;
+ M26 = M21 ^ M25;
+ M27 = M20 ^ M21;
+ M28 = M23 ^ M25;
+ M29 = M28 & M27;
+ M30 = M26 & M24;
+ M31 = M20 & M23;
+ M32 = M27 & M31;
+ M33 = M27 ^ M25;
+ M34 = M21 & M22;
+ M35 = M24 & M34;
+ M36 = M24 ^ M25;
+ M37 = M21 ^ M29;
+ M38 = M32 ^ M33;
+ M39 = M23 ^ M30;
+ M40 = M35 ^ M36;
+ M41 = M38 ^ M40;
+ M42 = M37 ^ M39;
+ M43 = M37 ^ M38;
+ M44 = M39 ^ M40;
+ M45 = M42 ^ M41;
+ M46 = M44 & T6;
+ M47 = M40 & T8;
+ M48 = M39 & U[0];
+ M49 = M43 & T16;
+ M50 = M38 & T9;
+ M51 = M37 & T17;
+ M52 = M42 & T15;
+ M53 = M45 & T27;
+ M54 = M41 & T10;
+ M55 = M44 & T13;
+ M56 = M40 & T23;
+ M57 = M39 & T19;
+ M58 = M43 & T3;
+ M59 = M38 & T22;
+ M60 = M37 & T20;
+ M61 = M42 & T1;
+ M62 = M45 & T4;
+ M63 = M41 & T2;
+ L0 = M61 ^ M62;
+ L1 = M50 ^ M56;
+ L2 = M46 ^ M48;
+ L3 = M47 ^ M55;
+ L4 = M54 ^ M58;
+ L5 = M49 ^ M61;
+ L6 = M62 ^ L5;
+ L7 = M46 ^ L3;
+ L8 = M51 ^ M59;
+ L9 = M52 ^ M53;
+ L10 = M53 ^ L4;
+ L11 = M60 ^ L2;
+ L12 = M48 ^ M51;
+ L13 = M50 ^ L0;
+ L14 = M52 ^ M61;
+ L15 = M55 ^ L1;
+ L16 = M56 ^ L0;
+ L17 = M57 ^ L1;
+ L18 = M58 ^ L8;
+ L19 = M63 ^ L4;
+ L20 = L0 ^ L1;
+ L21 = L1 ^ L7;
+ L22 = L3 ^ L12;
+ L23 = L18 ^ L2;
+ L24 = L15 ^ L9;
+ L25 = L6 ^ L10;
+ L26 = L7 ^ L9;
+ L27 = L8 ^ L10;
+ L28 = L11 ^ L14;
+ L29 = L11 ^ L17;
+ S[7] = L6 ^ L24;
+ S[6] = ~(L16 ^ L26);
+ S[5] = ~(L19 ^ L28);
+ S[4] = L6 ^ L21;
+ S[3] = L20 ^ L22;
+ S[2] = L25 ^ L29;
+ S[1] = ~(L13 ^ L27);
+ S[0] = ~(L6 ^ L23);
+
+ memmove(U,S,sizeof(S));
+}
+
+void bs_transpose(word_t * blocks)
+{
+ word_t transpose[BLOCK_SIZE];
+ memset(transpose, 0, sizeof(transpose));
+ bs_transpose_dst(transpose,blocks);
+ memmove(blocks,transpose,sizeof(transpose));
+}
+
+void bs_transpose_dst(word_t * transpose, word_t * blocks)
+{
+ int i,k;
+ word_t w;
+ for(k=0; k < WORD_SIZE; k++)
+ {
+ int bitpos = ONE << k;
+ for (i=0; i < WORDS_PER_BLOCK; i++)
+ {
+ w = bs2le(blocks[k * WORDS_PER_BLOCK + i]);
+ int offset = i << MUL_SHIFT;
+
+#ifndef UNROLL_TRANSPOSE
+ int j;
+ for(j=0; j < WORD_SIZE; j++)
+ {
+ // TODO make const time
+ transpose[offset + j] |= (w & (ONE << j)) ? bitpos : 0;
+ }
+#else
+
+ transpose[(offset)+ 0 ] |= (w & (ONE << 0 )) ? (bitpos) : 0;
+ transpose[(offset)+ 1 ] |= (w & (ONE << 1 )) ? (bitpos) : 0;
+ transpose[(offset)+ 2 ] |= (w & (ONE << 2 )) ? (bitpos) : 0;
+ transpose[(offset)+ 3 ] |= (w & (ONE << 3 )) ? (bitpos) : 0;
+ transpose[(offset)+ 4 ] |= (w & (ONE << 4 )) ? (bitpos) : 0;
+ transpose[(offset)+ 5 ] |= (w & (ONE << 5 )) ? (bitpos) : 0;
+ transpose[(offset)+ 6 ] |= (w & (ONE << 6 )) ? (bitpos) : 0;
+ transpose[(offset)+ 7 ] |= (w & (ONE << 7 )) ? (bitpos) : 0;
+#if WORD_SIZE > 8
+ transpose[(offset)+ 8 ] |= (w & (ONE << 8 )) ? (bitpos) : 0;
+ transpose[(offset)+ 9 ] |= (w & (ONE << 9 )) ? (bitpos) : 0;
+ transpose[(offset)+ 10] |= (w & (ONE << 10)) ? (bitpos) : 0;
+ transpose[(offset)+ 11] |= (w & (ONE << 11)) ? (bitpos) : 0;
+ transpose[(offset)+ 12] |= (w & (ONE << 12)) ? (bitpos) : 0;
+ transpose[(offset)+ 13] |= (w & (ONE << 13)) ? (bitpos) : 0;
+ transpose[(offset)+ 14] |= (w & (ONE << 14)) ? (bitpos) : 0;
+ transpose[(offset)+ 15] |= (w & (ONE << 15)) ? (bitpos) : 0;
+#endif
+#if WORD_SIZE > 16
+ transpose[(offset)+ 16] |= (w & (ONE << 16)) ? (bitpos) : 0;
+ transpose[(offset)+ 17] |= (w & (ONE << 17)) ? (bitpos) : 0;
+ transpose[(offset)+ 18] |= (w & (ONE << 18)) ? (bitpos) : 0;
+ transpose[(offset)+ 19] |= (w & (ONE << 19)) ? (bitpos) : 0;
+ transpose[(offset)+ 20] |= (w & (ONE << 20)) ? (bitpos) : 0;
+ transpose[(offset)+ 21] |= (w & (ONE << 21)) ? (bitpos) : 0;
+ transpose[(offset)+ 22] |= (w & (ONE << 22)) ? (bitpos) : 0;
+ transpose[(offset)+ 23] |= (w & (ONE << 23)) ? (bitpos) : 0;
+ transpose[(offset)+ 24] |= (w & (ONE << 24)) ? (bitpos) : 0;
+ transpose[(offset)+ 25] |= (w & (ONE << 25)) ? (bitpos) : 0;
+ transpose[(offset)+ 26] |= (w & (ONE << 26)) ? (bitpos) : 0;
+ transpose[(offset)+ 27] |= (w & (ONE << 27)) ? (bitpos) : 0;
+ transpose[(offset)+ 28] |= (w & (ONE << 28)) ? (bitpos) : 0;
+ transpose[(offset)+ 29] |= (w & (ONE << 29)) ? (bitpos) : 0;
+ transpose[(offset)+ 30] |= (w & (ONE << 30)) ? (bitpos) : 0;
+ transpose[(offset)+ 31] |= (w & (ONE << 31)) ? (bitpos) : 0;
+#endif
+#if WORD_SIZE > 32
+ transpose[(offset)+ 32] |= (w & (ONE << 32)) ? (bitpos) : 0;
+ transpose[(offset)+ 33] |= (w & (ONE << 33)) ? (bitpos) : 0;
+ transpose[(offset)+ 34] |= (w & (ONE << 34)) ? (bitpos) : 0;
+ transpose[(offset)+ 35] |= (w & (ONE << 35)) ? (bitpos) : 0;
+ transpose[(offset)+ 36] |= (w & (ONE << 36)) ? (bitpos) : 0;
+ transpose[(offset)+ 37] |= (w & (ONE << 37)) ? (bitpos) : 0;
+ transpose[(offset)+ 38] |= (w & (ONE << 38)) ? (bitpos) : 0;
+ transpose[(offset)+ 39] |= (w & (ONE << 39)) ? (bitpos) : 0;
+ transpose[(offset)+ 40] |= (w & (ONE << 40)) ? (bitpos) : 0;
+ transpose[(offset)+ 41] |= (w & (ONE << 41)) ? (bitpos) : 0;
+ transpose[(offset)+ 42] |= (w & (ONE << 42)) ? (bitpos) : 0;
+ transpose[(offset)+ 43] |= (w & (ONE << 43)) ? (bitpos) : 0;
+ transpose[(offset)+ 44] |= (w & (ONE << 44)) ? (bitpos) : 0;
+ transpose[(offset)+ 45] |= (w & (ONE << 45)) ? (bitpos) : 0;
+ transpose[(offset)+ 46] |= (w & (ONE << 46)) ? (bitpos) : 0;
+ transpose[(offset)+ 47] |= (w & (ONE << 47)) ? (bitpos) : 0;
+ transpose[(offset)+ 48] |= (w & (ONE << 48)) ? (bitpos) : 0;
+ transpose[(offset)+ 49] |= (w & (ONE << 49)) ? (bitpos) : 0;
+ transpose[(offset)+ 50] |= (w & (ONE << 50)) ? (bitpos) : 0;
+ transpose[(offset)+ 51] |= (w & (ONE << 51)) ? (bitpos) : 0;
+ transpose[(offset)+ 52] |= (w & (ONE << 52)) ? (bitpos) : 0;
+ transpose[(offset)+ 53] |= (w & (ONE << 53)) ? (bitpos) : 0;
+ transpose[(offset)+ 54] |= (w & (ONE << 54)) ? (bitpos) : 0;
+ transpose[(offset)+ 55] |= (w & (ONE << 55)) ? (bitpos) : 0;
+ transpose[(offset)+ 56] |= (w & (ONE << 56)) ? (bitpos) : 0;
+ transpose[(offset)+ 57] |= (w & (ONE << 57)) ? (bitpos) : 0;
+ transpose[(offset)+ 58] |= (w & (ONE << 58)) ? (bitpos) : 0;
+ transpose[(offset)+ 59] |= (w & (ONE << 59)) ? (bitpos) : 0;
+ transpose[(offset)+ 60] |= (w & (ONE << 60)) ? (bitpos) : 0;
+ transpose[(offset)+ 61] |= (w & (ONE << 61)) ? (bitpos) : 0;
+ transpose[(offset)+ 62] |= (w & (ONE << 62)) ? (bitpos) : 0;
+ transpose[(offset)+ 63] |= (w & (ONE << 63)) ? (bitpos) : 0;
+#endif
+#endif
+ // constant time:
+ //transpose[(i<<MUL_SHIFT)+ j] |= (((int64_t)((w & (ONE << j)) << (WORD_SIZE-1-j)))>>(WORD_SIZE-1)) & (ONE<<k);
+ }
+ }
+}
+
+void bs_transpose_rev(word_t * blocks)
+{
+ int i,k;
+ word_t w;
+ word_t transpose[BLOCK_SIZE];
+ memset(transpose, 0, sizeof(transpose));
+ for(k=0; k < BLOCK_SIZE; k++)
+ {
+ w = blocks[k];
+ word_t bitpos = bs2be(ONE << (k % WORD_SIZE));
+ word_t offset = k / WORD_SIZE;
+#ifndef UNROLL_TRANSPOSE
+ int j;
+ for(j=0; j < WORD_SIZE; j++)
+ {
+ word_t bit = (w & (ONE << j)) ? (ONE << (k % WORD_SIZE)) : 0;
+ transpose[j * WORDS_PER_BLOCK + (offset)] |= bit;
+ }
+#else
+ transpose[0 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 0 )) ? bitpos : 0;
+ transpose[1 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 1 )) ? bitpos : 0;
+ transpose[2 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 2 )) ? bitpos : 0;
+ transpose[3 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 3 )) ? bitpos : 0;
+ transpose[4 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 4 )) ? bitpos : 0;
+ transpose[5 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 5 )) ? bitpos : 0;
+ transpose[6 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 6 )) ? bitpos : 0;
+ transpose[7 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 7 )) ? bitpos : 0;
+#if WORD_SIZE > 8
+ transpose[8 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 8 )) ? bitpos : 0;
+ transpose[9 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 9 )) ? bitpos : 0;
+ transpose[10 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 10)) ? bitpos : 0;
+ transpose[11 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 11)) ? bitpos : 0;
+ transpose[12 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 12)) ? bitpos : 0;
+ transpose[13 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 13)) ? bitpos : 0;
+ transpose[14 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 14)) ? bitpos : 0;
+ transpose[15 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 15)) ? bitpos : 0;
+#endif
+#if WORD_SIZE > 16
+ transpose[16 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 16)) ? bitpos : 0;
+ transpose[17 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 17)) ? bitpos : 0;
+ transpose[18 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 18)) ? bitpos : 0;
+ transpose[19 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 19)) ? bitpos : 0;
+ transpose[20 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 20)) ? bitpos : 0;
+ transpose[21 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 21)) ? bitpos : 0;
+ transpose[22 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 22)) ? bitpos : 0;
+ transpose[23 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 23)) ? bitpos : 0;
+ transpose[24 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 24)) ? bitpos : 0;
+ transpose[25 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 25)) ? bitpos : 0;
+ transpose[26 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 26)) ? bitpos : 0;
+ transpose[27 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 27)) ? bitpos : 0;
+ transpose[28 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 28)) ? bitpos : 0;
+ transpose[29 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 29)) ? bitpos : 0;
+ transpose[30 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 30)) ? bitpos : 0;
+ transpose[31 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 31)) ? bitpos : 0;
+#endif
+#if WORD_SIZE > 32
+ transpose[32 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 32)) ? bitpos : 0;
+ transpose[33 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 33)) ? bitpos : 0;
+ transpose[34 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 34)) ? bitpos : 0;
+ transpose[35 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 35)) ? bitpos : 0;
+ transpose[36 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 36)) ? bitpos : 0;
+ transpose[37 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 37)) ? bitpos : 0;
+ transpose[38 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 38)) ? bitpos : 0;
+ transpose[39 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 39)) ? bitpos : 0;
+ transpose[40 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 40)) ? bitpos : 0;
+ transpose[41 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 41)) ? bitpos : 0;
+ transpose[42 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 42)) ? bitpos : 0;
+ transpose[43 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 43)) ? bitpos : 0;
+ transpose[44 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 44)) ? bitpos : 0;
+ transpose[45 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 45)) ? bitpos : 0;
+ transpose[46 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 46)) ? bitpos : 0;
+ transpose[47 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 47)) ? bitpos : 0;
+ transpose[48 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 48)) ? bitpos : 0;
+ transpose[49 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 49)) ? bitpos : 0;
+ transpose[50 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 50)) ? bitpos : 0;
+ transpose[51 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 51)) ? bitpos : 0;
+ transpose[52 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 52)) ? bitpos : 0;
+ transpose[53 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 53)) ? bitpos : 0;
+ transpose[54 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 54)) ? bitpos : 0;
+ transpose[55 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 55)) ? bitpos : 0;
+ transpose[56 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 56)) ? bitpos : 0;
+ transpose[57 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 57)) ? bitpos : 0;
+ transpose[58 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 58)) ? bitpos : 0;
+ transpose[59 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 59)) ? bitpos : 0;
+ transpose[60 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 60)) ? bitpos : 0;
+ transpose[61 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 61)) ? bitpos : 0;
+ transpose[62 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 62)) ? bitpos : 0;
+ transpose[63 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 63)) ? bitpos : 0;
+#endif
+#endif
+ }
+ memmove(blocks,transpose,sizeof(transpose));
+}
+
+
+#define R0 0
+#define R1 8
+#define R2 16
+#define R3 24
+
+#define B0 0
+#define B1 32
+#define B2 64
+#define B3 96
+
+#define R0_shift (BLOCK_SIZE/4)*0
+#define R1_shift (BLOCK_SIZE/4)*1
+#define R2_shift (BLOCK_SIZE/4)*2
+#define R3_shift (BLOCK_SIZE/4)*3
+#define B_MOD (BLOCK_SIZE)
+
+
+void bs_shiftrows(word_t * B)
+{
+ word_t Bp_space[BLOCK_SIZE];
+ word_t * Bp = Bp_space;
+ word_t * Br0 = B + 0;
+ word_t * Br1 = B + 32;
+ word_t * Br2 = B + 64;
+ word_t * Br3 = B + 96;
+ uint8_t offsetr0 = 0;
+ uint8_t offsetr1 = 32;
+ uint8_t offsetr2 = 64;
+ uint8_t offsetr3 = 96;
+
+
+ int i;
+ for(i=0; i<4; i++)
+ {
+ Bp[B0 + 0] = Br0[0];
+ Bp[B0 + 1] = Br0[1];
+ Bp[B0 + 2] = Br0[2];
+ Bp[B0 + 3] = Br0[3];
+ Bp[B0 + 4] = Br0[4];
+ Bp[B0 + 5] = Br0[5];
+ Bp[B0 + 6] = Br0[6];
+ Bp[B0 + 7] = Br0[7];
+ Bp[B1 + 0] = Br1[0];
+ Bp[B1 + 1] = Br1[1];
+ Bp[B1 + 2] = Br1[2];
+ Bp[B1 + 3] = Br1[3];
+ Bp[B1 + 4] = Br1[4];
+ Bp[B1 + 5] = Br1[5];
+ Bp[B1 + 6] = Br1[6];
+ Bp[B1 + 7] = Br1[7];
+ Bp[B2 + 0] = Br2[0];
+ Bp[B2 + 1] = Br2[1];
+ Bp[B2 + 2] = Br2[2];
+ Bp[B2 + 3] = Br2[3];
+ Bp[B2 + 4] = Br2[4];
+ Bp[B2 + 5] = Br2[5];
+ Bp[B2 + 6] = Br2[6];
+ Bp[B2 + 7] = Br2[7];
+ Bp[B3 + 0] = Br3[0];
+ Bp[B3 + 1] = Br3[1];
+ Bp[B3 + 2] = Br3[2];
+ Bp[B3 + 3] = Br3[3];
+ Bp[B3 + 4] = Br3[4];
+ Bp[B3 + 5] = Br3[5];
+ Bp[B3 + 6] = Br3[6];
+ Bp[B3 + 7] = Br3[7];
+
+ offsetr0 = (offsetr0 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+ offsetr1 = (offsetr1 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+ offsetr2 = (offsetr2 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+ offsetr3 = (offsetr3 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+
+ Br0 = B + offsetr0;
+ Br1 = B + offsetr1;
+ Br2 = B + offsetr2;
+ Br3 = B + offsetr3;
+
+ Bp += 8;
+ }
+ memmove(B,Bp_space,sizeof(Bp_space));
+}
+
+
+void bs_shiftrows_rev(word_t * B)
+{
+ word_t Bp_space[BLOCK_SIZE];
+ word_t * Bp = Bp_space;
+ word_t * Br0 = Bp + 0;
+ word_t * Br1 = Bp + 32;
+ word_t * Br2 = Bp + 64;
+ word_t * Br3 = Bp + 96;
+ uint8_t offsetr0 = 0;
+ uint8_t offsetr1 = 32;
+ uint8_t offsetr2 = 64;
+ uint8_t offsetr3 = 96;
+
+
+ int i;
+ for(i=0; i<4; i++)
+ {
+ Br0[0] = B[B0 + 0];
+ Br0[1] = B[B0 + 1];
+ Br0[2] = B[B0 + 2];
+ Br0[3] = B[B0 + 3];
+ Br0[4] = B[B0 + 4];
+ Br0[5] = B[B0 + 5];
+ Br0[6] = B[B0 + 6];
+ Br0[7] = B[B0 + 7];
+ Br1[0] = B[B1 + 0];
+ Br1[1] = B[B1 + 1];
+ Br1[2] = B[B1 + 2];
+ Br1[3] = B[B1 + 3];
+ Br1[4] = B[B1 + 4];
+ Br1[5] = B[B1 + 5];
+ Br1[6] = B[B1 + 6];
+ Br1[7] = B[B1 + 7];
+ Br2[0] = B[B2 + 0];
+ Br2[1] = B[B2 + 1];
+ Br2[2] = B[B2 + 2];
+ Br2[3] = B[B2 + 3];
+ Br2[4] = B[B2 + 4];
+ Br2[5] = B[B2 + 5];
+ Br2[6] = B[B2 + 6];
+ Br2[7] = B[B2 + 7];
+ Br3[0] = B[B3 + 0];
+ Br3[1] = B[B3 + 1];
+ Br3[2] = B[B3 + 2];
+ Br3[3] = B[B3 + 3];
+ Br3[4] = B[B3 + 4];
+ Br3[5] = B[B3 + 5];
+ Br3[6] = B[B3 + 6];
+ Br3[7] = B[B3 + 7];
+
+ offsetr0 = (offsetr0 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+ offsetr1 = (offsetr1 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+ offsetr2 = (offsetr2 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+ offsetr3 = (offsetr3 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
+
+ Br0 = Bp + offsetr0;
+ Br1 = Bp + offsetr1;
+ Br2 = Bp + offsetr2;
+ Br3 = Bp + offsetr3;
+
+ B += 8;
+ }
+ memmove(B - 8 * 4,Bp_space,sizeof(Bp_space));
+}
+
+
+#define A0 0
+#define A1 8
+#define A2 16
+#define A3 24
+
+// Does shift rows and mix columns in same step
+void bs_shiftmix(word_t * B)
+{
+ word_t Bp_space[BLOCK_SIZE];
+ word_t * Bp = Bp_space;
+
+ word_t * Br0 = B + 0;
+ word_t * Br1 = B + 32;
+ word_t * Br2 = B + 64;
+ word_t * Br3 = B + 96;
+
+ uint8_t offsetr0 = 0;
+ uint8_t offsetr1 = 32;
+ uint8_t offsetr2 = 64;
+ uint8_t offsetr3 = 96;
+
+ Br0 = B + offsetr0;
+ Br1 = B + offsetr1;
+ Br2 = B + offsetr2;
+ Br3 = B + offsetr3;
+
+
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ // B0
+ // 2*A0 2*A1 A1 A2 A3
+ word_t of =Br0[R0+7]^ Br1[R1+7];
+ Bp[A0+0] = Br1[R1+0] ^ Br2[R2+0] ^ Br3[R3+0] ^ of;
+ Bp[A0+1] = Br0[R0+0] ^ Br1[R1+0] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br3[R3+1] ^ of;
+ Bp[A0+2] = Br0[R0+1] ^ Br1[R1+1] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br3[R3+2];
+ Bp[A0+3] = Br0[R0+2] ^ Br1[R1+2] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br3[R3+3] ^ of;
+ Bp[A0+4] = Br0[R0+3] ^ Br1[R1+3] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br3[R3+4] ^ of;
+ Bp[A0+5] = Br0[R0+4] ^ Br1[R1+4] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br3[R3+5];
+ Bp[A0+6] = Br0[R0+5] ^ Br1[R1+5] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br3[R3+6];
+ Bp[A0+7] = Br0[R0+6] ^ Br1[R1+6] ^ Br1[R1+7] ^ Br2[R2+7] ^ Br3[R3+7];
+
+ // A0 2*A1 2*A2 A2 A3
+ of = Br1[R1+7] ^ Br2[R2+7];
+ Bp[A1+0] = Br0[R0+0] ^ Br2[R2+0] ^ Br3[R3+0] ^ of;
+ Bp[A1+1] = Br0[R0+1] ^ Br1[R1+0] ^ Br2[R2+0] ^ Br2[R2+1] ^ Br3[R3+1] ^ of;
+ Bp[A1+2] = Br0[R0+2] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br2[R2+2] ^ Br3[R3+2];
+ Bp[A1+3] = Br0[R0+3] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br2[R2+3] ^ Br3[R3+3] ^ of;
+ Bp[A1+4] = Br0[R0+4] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br2[R2+4] ^ Br3[R3+4] ^ of;
+ Bp[A1+5] = Br0[R0+5] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br2[R2+5] ^ Br3[R3+5];
+ Bp[A1+6] = Br0[R0+6] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br2[R2+6] ^ Br3[R3+6];
+ Bp[A1+7] = Br0[R0+7] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br2[R2+7] ^ Br3[R3+7];
+
+ // A0 A1 2*A2 2*A3 A3
+ of = Br2[R2+7] ^ Br3[R3+7];
+ Bp[A2+0] = Br0[R0+0] ^ Br1[R1+0] ^ Br3[R3+0] ^ of;
+ Bp[A2+1] = Br0[R0+1] ^ Br1[R1+1] ^ Br2[R2+0] ^ Br3[R3+0] ^ Br3[R3+1] ^ of;
+ Bp[A2+2] = Br0[R0+2] ^ Br1[R1+2] ^ Br2[R2+1] ^ Br3[R3+1] ^ Br3[R3+2];
+ Bp[A2+3] = Br0[R0+3] ^ Br1[R1+3] ^ Br2[R2+2] ^ Br3[R3+2] ^ Br3[R3+3] ^ of;
+ Bp[A2+4] = Br0[R0+4] ^ Br1[R1+4] ^ Br2[R2+3] ^ Br3[R3+3] ^ Br3[R3+4] ^ of;
+ Bp[A2+5] = Br0[R0+5] ^ Br1[R1+5] ^ Br2[R2+4] ^ Br3[R3+4] ^ Br3[R3+5];
+ Bp[A2+6] = Br0[R0+6] ^ Br1[R1+6] ^ Br2[R2+5] ^ Br3[R3+5] ^ Br3[R3+6];
+ Bp[A2+7] = Br0[R0+7] ^ Br1[R1+7] ^ Br2[R2+6] ^ Br3[R3+6] ^ Br3[R3+7];
+
+ // A0 2*A0 A1 A2 2*A3
+ of = Br0[R0+7] ^ Br3[R3+7];
+ Bp[A3+0] = Br0[R0+0] ^ Br1[R1+0] ^ Br2[R2+0] ^ of;
+ Bp[A3+1] = Br0[R0+1] ^ Br0[R0+0] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br3[R3+0] ^ of;
+ Bp[A3+2] = Br0[R0+2] ^ Br0[R0+1] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br3[R3+1];
+ Bp[A3+3] = Br0[R0+3] ^ Br0[R0+2] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br3[R3+2] ^ of;
+ Bp[A3+4] = Br0[R0+4] ^ Br0[R0+3] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br3[R3+3] ^ of;
+ Bp[A3+5] = Br0[R0+5] ^ Br0[R0+4] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br3[R3+4];
+ Bp[A3+6] = Br0[R0+6] ^ Br0[R0+5] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br3[R3+5];
+ Bp[A3+7] = Br0[R0+7] ^ Br0[R0+6] ^ Br1[R1+7] ^ Br2[R2+7] ^ Br3[R3+6];
+
+ Bp += BLOCK_SIZE/4;
+
+ offsetr0 = (offsetr0 + BLOCK_SIZE/4) & 0x7f;
+ offsetr1 = (offsetr1 + BLOCK_SIZE/4) & 0x7f;
+ offsetr2 = (offsetr2 + BLOCK_SIZE/4) & 0x7f;
+ offsetr3 = (offsetr3 + BLOCK_SIZE/4) & 0x7f;
+
+ Br0 = B + offsetr0;
+ Br1 = B + offsetr1;
+ Br2 = B + offsetr2;
+ Br3 = B + offsetr3;
+ }
+
+ memmove(B,Bp_space,sizeof(Bp_space));
+}
+
+
+
+void bs_mixcolumns(word_t * B)
+{
+ word_t Bp_space[BLOCK_SIZE];
+ word_t * Bp = Bp_space;
+ // to understand this, see
+ // https://en.wikipedia.org/wiki/Rijndael_mix_columns
+
+ int i = 0;
+ for (; i < 4; i++)
+ {
+ // of = A0 ^ A1;
+ // A0 = A0 ^ (0x1b & ((signed char)of>>7));
+
+ //// 2 * A0
+ // A0 = A0 ^ (A0 << 1)
+
+ //// + 3 * A1
+ // A0 = A0 ^ (A1)
+ // A0 = A0 ^ (A1<<1)
+
+ //// + A2 + A3
+ // A0 = A0 ^ (A2)
+ // A0 = A0 ^ (A3)
+ // A0.7 A1.7
+ word_t of = B[A0+7] ^ B[A1+7];
+
+ // 2*A0 2*A1 A1 A2 A3
+ Bp[A0+0] = B[A1+0] ^ B[A2+0] ^ B[A3+0] ^ of;
+ Bp[A0+1] = B[A0+0] ^ B[A1+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+1] ^ of;
+ Bp[A0+2] = B[A0+1] ^ B[A1+1] ^ B[A1+2] ^ B[A2+2] ^ B[A3+2];
+ Bp[A0+3] = B[A0+2] ^ B[A1+2] ^ B[A1+3] ^ B[A2+3] ^ B[A3+3] ^ of;
+ Bp[A0+4] = B[A0+3] ^ B[A1+3] ^ B[A1+4] ^ B[A2+4] ^ B[A3+4] ^ of;
+ Bp[A0+5] = B[A0+4] ^ B[A1+4] ^ B[A1+5] ^ B[A2+5] ^ B[A3+5];
+ Bp[A0+6] = B[A0+5] ^ B[A1+5] ^ B[A1+6] ^ B[A2+6] ^ B[A3+6];
+ Bp[A0+7] = B[A0+6] ^ B[A1+6] ^ B[A1+7] ^ B[A2+7] ^ B[A3+7];
+
+
+
+ // of = A1 ^ A2
+ // A1 = A1 ^ (0x1b & ((signed char)of>>7));
+
+ //// A0
+ // A1 = A1 ^ (A0)
+
+ //// + 2 * A1
+ // A1 = A1 ^ (A1 << 1)
+
+ //// + 3 * A2
+ // A1 = A1 ^ (A2)
+ // A1 = A1 ^ (A2<<1)
+
+ //// + A3
+ // A1 = A1 ^ (A3)
+
+ of = B[A1+7] ^ B[A2+7];
+
+ // A0 2*A1 2*A2 A2 A3
+ Bp[A1+0] = B[A0+0] ^ B[A2+0] ^ B[A3+0] ^ of;
+ Bp[A1+1] = B[A0+1] ^ B[A1+0] ^ B[A2+0] ^ B[A2+1] ^ B[A3+1] ^ of;
+ Bp[A1+2] = B[A0+2] ^ B[A1+1] ^ B[A2+1] ^ B[A2+2] ^ B[A3+2];
+ Bp[A1+3] = B[A0+3] ^ B[A1+2] ^ B[A2+2] ^ B[A2+3] ^ B[A3+3] ^ of;
+ Bp[A1+4] = B[A0+4] ^ B[A1+3] ^ B[A2+3] ^ B[A2+4] ^ B[A3+4] ^ of;
+ Bp[A1+5] = B[A0+5] ^ B[A1+4] ^ B[A2+4] ^ B[A2+5] ^ B[A3+5];
+ Bp[A1+6] = B[A0+6] ^ B[A1+5] ^ B[A2+5] ^ B[A2+6] ^ B[A3+6];
+ Bp[A1+7] = B[A0+7] ^ B[A1+6] ^ B[A2+6] ^ B[A2+7] ^ B[A3+7];
+
+
+ // of = A2 ^ A3
+ // A2 = A2 ^ (0x1b & ((signed char)of>>7));
+
+ //// A0 + A1
+ // A2 = A2 ^ (A0)
+ // A2 = A2 ^ (A1)
+
+ //// + 2 * A2
+ // A2 = A2 ^ (A2 << 1)
+
+ //// + 3 * A3
+ // A2 = A2 ^ (A3)
+ // A2 = A2 ^ (A3<<1)
+
+
+ of = B[A2+7] ^ B[A3+7];
+
+ // A0 A1 2*A2 2*A3 A3
+ Bp[A2+0] = B[A0+0] ^ B[A1+0] ^ B[A3+0] ^ of;
+ Bp[A2+1] = B[A0+1] ^ B[A1+1] ^ B[A2+0] ^ B[A3+0] ^ B[A3+1] ^ of;
+ Bp[A2+2] = B[A0+2] ^ B[A1+2] ^ B[A2+1] ^ B[A3+1] ^ B[A3+2];
+ Bp[A2+3] = B[A0+3] ^ B[A1+3] ^ B[A2+2] ^ B[A3+2] ^ B[A3+3] ^ of;
+ Bp[A2+4] = B[A0+4] ^ B[A1+4] ^ B[A2+3] ^ B[A3+3] ^ B[A3+4] ^ of;
+ Bp[A2+5] = B[A0+5] ^ B[A1+5] ^ B[A2+4] ^ B[A3+4] ^ B[A3+5];
+ Bp[A2+6] = B[A0+6] ^ B[A1+6] ^ B[A2+5] ^ B[A3+5] ^ B[A3+6];
+ Bp[A2+7] = B[A0+7] ^ B[A1+7] ^ B[A2+6] ^ B[A3+6] ^ B[A3+7];
+
+
+ // A3 = A0 ^ A3
+ // A3 = A3 ^ (0x1b & ((signed char)of>>7));
+
+ //// 3 * A0
+ // A3 = A3 ^ (A0)
+ // A3 = A3 ^ (A0 << 1)
+
+ //// + A1 + A2
+ // A3 = A3 ^ A1
+ // A3 = A3 ^ A2
+
+ //// + 2 * A3
+ // A3 = A3 ^ (A3<<1)
+
+ of = B[A0+7] ^ B[A3+7];
+
+ // 2*A0 A0 A1 A2 2*A3
+ Bp[A3+0] = B[A0+0] ^ B[A1+0] ^ B[A2+0] ^ of;
+ Bp[A3+1] = B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+0] ^ of;
+ Bp[A3+2] = B[A0+2] ^ B[A0+1] ^ B[A1+2] ^ B[A2+2] ^ B[A3+1];
+ Bp[A3+3] = B[A0+3] ^ B[A0+2] ^ B[A1+3] ^ B[A2+3] ^ B[A3+2] ^ of;
+ Bp[A3+4] = B[A0+4] ^ B[A0+3] ^ B[A1+4] ^ B[A2+4] ^ B[A3+3] ^ of;
+ Bp[A3+5] = B[A0+5] ^ B[A0+4] ^ B[A1+5] ^ B[A2+5] ^ B[A3+4];
+ Bp[A3+6] = B[A0+6] ^ B[A0+5] ^ B[A1+6] ^ B[A2+6] ^ B[A3+5];
+ Bp[A3+7] = B[A0+7] ^ B[A0+6] ^ B[A1+7] ^ B[A2+7] ^ B[A3+6];
+
+
+ //
+ Bp += BLOCK_SIZE/4;
+ B += BLOCK_SIZE/4;
+ }
+
+
+ memmove(B - BLOCK_SIZE,Bp - BLOCK_SIZE,sizeof(Bp_space));
+}
+
+void bs_mixcolumns_rev(word_t * B)
+{
+ // to understand this, see
+ // https://en.wikipedia.org/wiki/Rijndael_mix_columns
+ // TODO combine with shiftrows for performance on decryption
+ word_t Bp_space[BLOCK_SIZE];
+ word_t * Bp = Bp_space;
+
+
+ int i = 0;
+ for (; i < BLOCK_SIZE / 4; i += BLOCK_SIZE / 16)
+ {
+
+ //// state[i][0] = A0*0x0e + A1*0x0b + A2*0x0d + A3*0x09
+ // overflow:
+ /* A0 * 0b1110 */ /* A1 * 0b1011 */ /* A2 * 0b1101 */ /* A3 * 0b1001 */
+ word_t of0 = ( (B[A0+7] ^ B[A0+6] ^ B[A0+5]) ^ (B[A1 + 7] ^ B[A1+5]) ^ (B[A2+6] ^ B[A2+5]) ^ ( B[A3+5] )); // 2 bit
+ word_t of1 = ( (B[A0+7] ^ B[A0+6]) ^ ( B[A1+6]) ^ (B[A2+7] ^ B[A2+6]) ^ ( B[A3+6] )); // 3 bit
+ word_t of2 = ( (B[A0+7]) ^ ( B[A1+7]) ^ ( B[A2+7]) ^ ( B[A3+7] )); // 4 bit
+
+ // inverse:
+ // 1110 1011 1101 1001
+ // A0 = A0 * 14 + A1 * 11 + A2 * 13 + A3 * 9
+ // A0 = A0 * (2+4+8) + A1 * (1+2+8) + A2 * (1+4+8) + A3 * (1+8)
+
+ // (2*A0 + 4*A0 + 8*A0 ) + (8*A1 + 2*A1 + A1 ) + ( A2 + 4*A2 + 8*A2 ) + ( A3 + 8*A3)
+ Bp[A0+0] = B[A1+0] ^ B[A2+0] ^ B[A3+0] ^ of0;
+ Bp[A0+1] = B[A0+0] ^ B[A1+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+1] ^ of0 ^ of1;
+ Bp[A0+2] = B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ B[A1+2] ^ B[A2+2] ^ B[A2+0] ^ B[A3+2] ^ of1 ^ of2;
+ Bp[A0+3] = B[A0+2] ^ B[A0+1] ^ B[A0+0] ^ B[A1+0] ^ B[A1+2] ^ B[A1+3] ^ B[A2+3] ^ B[A2+1] ^ B[A2+0] ^ B[A3+3] ^ B[A3+0] ^ of0 ^ of2;
+ Bp[A0+4] = B[A0+3] ^ B[A0+2] ^ B[A0+1] ^ B[A1+1] ^ B[A1+3] ^ B[A1+4] ^ B[A2+4] ^ B[A2+2] ^ B[A2+1] ^ B[A3+4] ^ B[A3+1] ^ of0 ^ of1;
+ Bp[A0+5] = B[A0+4] ^ B[A0+3] ^ B[A0+2] ^ B[A1+2] ^ B[A1+4] ^ B[A1+5] ^ B[A2+5] ^ B[A2+3] ^ B[A2+2] ^ B[A3+5] ^ B[A3+2] ^ of1 ^ of2;
+ Bp[A0+6] = B[A0+5] ^ B[A0+4] ^ B[A0+3] ^ B[A1+3] ^ B[A1+5] ^ B[A1+6] ^ B[A2+6] ^ B[A2+4] ^ B[A2+3] ^ B[A3+6] ^ B[A3+3] ^ of2;
+ Bp[A0+7] = B[A0+6] ^ B[A0+5] ^ B[A0+4] ^ B[A1+4] ^ B[A1+6] ^ B[A1+7] ^ B[A2+7] ^ B[A2+5] ^ B[A2+4] ^ B[A3+7] ^ B[A3+4];
+
+
+
+ //// state[i][1] = A0*0x09 + A1*0xe + A2*0x0b + A3*0x0d
+ // overflow:
+ /* A0 * 0b1001 */ /* A1 * 0b1110 */ /* A2 * 0b101 1 */ /* A3 * 0b1101 */
+ of0 = ( (B[A0+5]) ^ (B[A1+7] ^ B[A1+6] ^ B[A1+5]) ^ (B[A2 + 7] ^ B[A2+5]) ^ (B[A3+6] ^ B[A3+5])); // 2 bit
+ of1 = ( (B[A0+6]) ^ (B[A1+7] ^ B[A1+6]) ^ ( B[A2+6]) ^ (B[A3+7] ^ B[A3+6])); // 3 bit
+ of2 = ( (B[A0+7]) ^ (B[A1+7]) ^ ( B[A2+7]) ^ ( B[A3+7])); // 4 bit
+
+ // inverse:
+ // 1001 1110 1011 1101
+ // A1 = A0 * 9 + A1 * 14 + A2 * 11 + A3 * 13
+ // A1 = A0 * (1+8) + A1 * (2+4+8) + A2 * (1+2+8) + A3 * (1+4+8)
+
+ // (1*A0 + 8*A0 ) +(2*A1 + 4*A1 + 8*A1 ) + (1*A2 + 2*A2 + 8*A2 ) + (1*A3 + 4*A3 + 8*A3)
+ Bp[A1+0] = B[A0+0] ^ B[A2+0] ^ B[A3+0] ^ of0;
+ Bp[A1+1] = B[A0+1] ^ B[A1+0] ^ B[A2+1] ^ B[A2+0] ^ B[A3+1] ^ of0 ^ of1;
+ Bp[A1+2] = B[A0+2] ^ B[A1+1] ^ B[A1+0] ^ B[A2+2] ^ B[A2+1] ^ B[A3+2] ^ B[A3+0] ^ of1 ^ of2;
+ Bp[A1+3] = B[A0+3] ^ B[A0+0] ^ B[A1+2] ^ B[A1+1] ^ B[A1+0] ^ B[A2+3] ^ B[A2+2] ^ B[A2+0] ^ B[A3+3] ^ B[A3+1] ^ B[A3+0] ^ of0 ^ of2;
+ Bp[A1+4] = B[A0+4] ^ B[A0+1] ^ B[A1+3] ^ B[A1+2] ^ B[A1+1] ^ B[A2+4] ^ B[A2+3] ^ B[A2+1] ^ B[A3+4] ^ B[A3+2] ^ B[A3+1] ^ of0 ^ of1;
+ Bp[A1+5] = B[A0+5] ^ B[A0+2] ^ B[A1+4] ^ B[A1+3] ^ B[A1+2] ^ B[A2+5] ^ B[A2+4] ^ B[A2+2] ^ B[A3+5] ^ B[A3+3] ^ B[A3+2] ^ of1 ^ of2;
+ Bp[A1+6] = B[A0+6] ^ B[A0+3] ^ B[A1+5] ^ B[A1+4] ^ B[A1+3] ^ B[A2+6] ^ B[A2+5] ^ B[A2+3] ^ B[A3+6] ^ B[A3+4] ^ B[A3+3] ^ of2;
+ Bp[A1+7] = B[A0+7] ^ B[A0+4] ^ B[A1+6] ^ B[A1+5] ^ B[A1+4] ^ B[A2+7] ^ B[A2+6] ^ B[A2+4] ^ B[A3+7] ^ B[A3+5] ^ B[A3+4];
+
+
+ //// state[i][2] = A0*0x0d + A1*0x09 + A2*0x0e + A3*0x0b
+ // overflow:
+ /* A1 * 0b1001 */ /* A2 * 0b1110 */ /* A3 * 0b1011 */ /* A0 * 0b1101 */
+ of0 = ( (B[A1+5]) ^ (B[A2+7] ^ B[A2+6] ^ B[A2+5]) ^ (B[A3 + 7] ^ B[A3+5]) ^ (B[A0+6] ^ B[A0+5])); // 2 bit
+ of1 = ( (B[A1+6]) ^ (B[A2+7] ^ B[A2+6]) ^ ( B[A3+6]) ^ (B[A0+7] ^ B[A0+6])); // 3 bit
+ of2 = ( (B[A1+7]) ^ (B[A2+7]) ^ ( B[A3+7]) ^ ( B[A0+7])); // 4 bit
+
+ // inverse:
+ // 1001 1110 1011 1101
+ // A2 = A1 * 9 + A2 * 14 + A3 * 11 + A0 * 13
+ // A2 = A1 * (1+8) + A2 * (2+4+8) + A3 * (1+2+8) + A0 * (1+4+8)
+
+ // (1*A1 + 8*A1) + ( 2*A2 + 4*A2 + 8*A2) + (1*A3 2*A2 + 8*A2) + (1*A0 + 4*A0 + 8*A0)
+ Bp[A2+0] = B[A1+0] ^ B[A3+0] ^ B[A0+0] ^ of0;
+ Bp[A2+1] = B[A1+1] ^ B[A2+0] ^ B[A3+1] ^ B[A3+0] ^ B[A0+1] ^ of0 ^ of1;
+ Bp[A2+2] = B[A1+2] ^ B[A2+1] ^ B[A2+0] ^ B[A3+2] ^ B[A3+1] ^ B[A0+2] ^ B[A0+0] ^ of1 ^ of2;
+ Bp[A2+3] = B[A1+3] ^ B[A1+0] ^ B[A2+2] ^ B[A2+1] ^ B[A2+0] ^ B[A3+3] ^ B[A3+2] ^ B[A3+0] ^ B[A0+3] ^ B[A0+1] ^ B[A0+0] ^ of0 ^ of2;
+ Bp[A2+4] = B[A1+4] ^ B[A1+1] ^ B[A2+3] ^ B[A2+2] ^ B[A2+1] ^ B[A3+4] ^ B[A3+3] ^ B[A3+1] ^ B[A0+4] ^ B[A0+2] ^ B[A0+1] ^ of0 ^ of1;
+ Bp[A2+5] = B[A1+5] ^ B[A1+2] ^ B[A2+4] ^ B[A2+3] ^ B[A2+2] ^ B[A3+5] ^ B[A3+4] ^ B[A3+2] ^ B[A0+5] ^ B[A0+3] ^ B[A0+2] ^ of1 ^ of2;
+ Bp[A2+6] = B[A1+6] ^ B[A1+3] ^ B[A2+5] ^ B[A2+4] ^ B[A2+3] ^ B[A3+6] ^ B[A3+5] ^ B[A3+3] ^ B[A0+6] ^ B[A0+4] ^ B[A0+3] ^ of2;
+ Bp[A2+7] = B[A1+7] ^ B[A1+4] ^ B[A2+6] ^ B[A2+5] ^ B[A2+4] ^ B[A3+7] ^ B[A3+6] ^ B[A3+4] ^ B[A0+7] ^ B[A0+5] ^ B[A0+4];
+
+
+
+ //// state[i][3] = A0*0x0b + A1*0x0d + A2*0x09 + A3*0x0e
+ // overflow:
+ /* A2 * 0b1001 */ /* A3 * 0b1110 */ /* A0 * 0b1011 */ /* A1 * 0b1101 */
+ of0 = ( (B[A2+5]) ^ (B[A3+7] ^ B[A3+6] ^ B[A3+5]) ^ (B[A0 + 7] ^ B[A0+5]) ^ (B[A1+6] ^ B[A1+5])); // 2 bit
+ of1 = ( (B[A2+6]) ^ (B[A3+7] ^ B[A3+6]) ^ ( B[A0+6]) ^ (B[A1+7] ^ B[A1+6])); // 3 bit
+ of2 = ( (B[A2+7]) ^ (B[A3+7]) ^ ( B[A0+7]) ^ ( B[A1+7])); // 4 bit
+
+ // inverse:
+ // 1001 1110 1011 1101
+ // A2 = A2 * 9 + A3 * 14 + A0 * 11 + A1 * 13
+ // A2 = A2 * (1+8) + A3 * (2+4+8) + A0 * (1+2+8) + A1 * (1+4+8)
+
+ // (1*A2 + 8*A2) + ( 2*A3 + 4*A3 + 8*A3) + (1*A0 2*A0 + 8*A0) + (1*A1 + 4*A1 + 8*A1)
+ Bp[A3+0] = B[A2+0] ^ B[A0+0] ^ B[A1+0] ^ of0;
+ Bp[A3+1] = B[A2+1] ^ B[A3+0] ^ B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ of0 ^ of1;
+ Bp[A3+2] = B[A2+2] ^ B[A3+1] ^ B[A3+0] ^ B[A0+2] ^ B[A0+1] ^ B[A1+2] ^ B[A1+0] ^ of1 ^ of2;
+ Bp[A3+3] = B[A2+3] ^ B[A2+0] ^ B[A3+2] ^ B[A3+1] ^ B[A3+0] ^ B[A0+3] ^ B[A0+2] ^ B[A0+0] ^ B[A1+3] ^ B[A1+1] ^ B[A1+0] ^ of0 ^ of2;
+ Bp[A3+4] = B[A2+4] ^ B[A2+1] ^ B[A3+3] ^ B[A3+2] ^ B[A3+1] ^ B[A0+4] ^ B[A0+3] ^ B[A0+1] ^ B[A1+4] ^ B[A1+2] ^ B[A1+1] ^ of0 ^ of1;
+ Bp[A3+5] = B[A2+5] ^ B[A2+2] ^ B[A3+4] ^ B[A3+3] ^ B[A3+2] ^ B[A0+5] ^ B[A0+4] ^ B[A0+2] ^ B[A1+5] ^ B[A1+3] ^ B[A1+2] ^ of1 ^ of2;
+ Bp[A3+6] = B[A2+6] ^ B[A2+3] ^ B[A3+5] ^ B[A3+4] ^ B[A3+3] ^ B[A0+6] ^ B[A0+5] ^ B[A0+3] ^ B[A1+6] ^ B[A1+4] ^ B[A1+3] ^ of2;
+ Bp[A3+7] = B[A2+7] ^ B[A2+4] ^ B[A3+6] ^ B[A3+5] ^ B[A3+4] ^ B[A0+7] ^ B[A0+6] ^ B[A0+4] ^ B[A1+7] ^ B[A1+5] ^ B[A1+4];
+
+ Bp += BLOCK_SIZE/4;
+ B += BLOCK_SIZE/4;
+ }
+
+ memmove(B - BLOCK_SIZE, Bp - BLOCK_SIZE,sizeof(Bp_space));
+
+}
+
+void bs_expand_key(word_t (* rk)[BLOCK_SIZE], uint8_t * _key)
+{
+ // TODO integrate this better
+ uint8_t key[KEY_SCHEDULE_SIZE];
+ memmove(key,_key,BLOCK_SIZE/8);
+ expand_key(key);
+
+ int i, j = 0, k, l;
+ for (i = 0; i < KEY_SCHEDULE_SIZE; i += (BLOCK_SIZE/8))
+ {
+ memmove(rk[j], key + i, BLOCK_SIZE / 8);
+
+ for (k = WORDS_PER_BLOCK; k < 128; k += WORDS_PER_BLOCK)
+ {
+ for (l = 0; l < WORDS_PER_BLOCK; l++)
+ {
+ rk[j][k + l] = rk[j][l];
+ }
+ }
+ bs_transpose(rk[j]);
+ j++;
+ }
+
+}
+
+void bs_cipher(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE])
+{
+ int round;
+ bs_transpose(state);
+
+
+ bs_addroundkey(state,rk[0]);
+ for (round = 1; round < 10; round++)
+ {
+ bs_apply_sbox(state);
+ /*bs_shiftrows(state);*/
+ /*bs_mixcolumns(state);*/
+ bs_shiftmix(state);
+ bs_addroundkey(state,rk[round]);
+ }
+ bs_apply_sbox(state);
+ bs_shiftrows(state);
+ bs_addroundkey(state,rk[10]);
+ bs_transpose_rev(state);
+}
+
+void bs_cipher_rev(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE])
+{
+ int round;
+ bs_transpose(state);
+
+ bs_addroundkey(state,rk[10]);
+ for (round = 9; round > 0; round--)
+ {
+ bs_shiftrows_rev(state);
+ bs_apply_sbox_rev(state);
+ bs_addroundkey(state,rk[round]);
+ bs_mixcolumns_rev(state);
+ }
+ bs_shiftrows_rev(state);
+ bs_apply_sbox_rev(state);
+ bs_addroundkey(state,rk[0]);
+
+ bs_transpose_rev(state);
+}
+
+void aes_ecb_test()
+{
+ uint8_t key_vector[16] = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c";
+ uint8_t pt_vector[16] = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a";
+ uint8_t ct_vector[16] = "\x3a\xd7\x7b\xb4\x0d\x7a\x36\x60\xa8\x9e\xca\xf3\x24\x66\xef\x97";
+ uint8_t output[16];
+ uint8_t input[16];
+
+ printf("AES ECB\n");
+
+ aes_ecb_encrypt(output, pt_vector,16,key_vector);
+
+
+ printf("cipher text: \n");
+ dump_hex(output, 16);
+
+ aes_ecb_decrypt(input, output, 16, key_vector);
+
+ printf("plain text: \n");
+ dump_hex((uint8_t * )input,16);
+
+ if (memcmp(pt_vector, input, 16) != 0)
+ {
+ fprintf(stderr,"error: decrypted ciphertext is not the same as the input plaintext\n");
+ EXIT1;
+ }
+ else if (memcmp(ct_vector, output, 16) != 0)
+ {
+ fprintf(stderr,"error: ciphertext is not the same as the test vector\n");
+ EXIT1;
+ }
+ else
+ {
+ printf("ECB passes test vector\n\n");
+ }
+}
+
+void aes_ctr_test()
+{
+// Test vector from NIST for 4 input blocks
+#define AES_CTR_TESTS_BYTES 64
+
+ uint8_t key_vector[16] =
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c";
+
+ uint8_t iv_vector[16] =
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
+
+ uint8_t pt_vector[AES_CTR_TESTS_BYTES] =
+ "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10"
+ ;
+
+ uint8_t ct_vector[AES_CTR_TESTS_BYTES] =
+ "\x87\x4d\x61\x91\xb6\x20\xe3\x26\x1b\xef\x68\x64\x99\x0d\xb6\xce"
+ "\x98\x06\xf6\x6b\x79\x70\xfd\xff\x86\x17\x18\x7b\xb9\xff\xfd\xff"
+ "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
+ "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1\x79\x21\x70\xa0\xf3\x00\x9c\xee"
+ ;
+
+ uint8_t output[AES_CTR_TESTS_BYTES];
+ uint8_t input[AES_CTR_TESTS_BYTES];
+
+ printf("AES CTR\n");
+
+ aes_ctr_encrypt(output,pt_vector,AES_CTR_TESTS_BYTES,key_vector, iv_vector);
+
+ printf("cipher text: \n");
+ dump_hex(output,AES_CTR_TESTS_BYTES);
+
+ aes_ctr_decrypt(input,output,AES_CTR_TESTS_BYTES,key_vector, iv_vector);
+
+ printf("plain text: \n");
+ dump_hex(input,AES_CTR_TESTS_BYTES);
+
+ if (memcmp(pt_vector, input, AES_CTR_TESTS_BYTES) != 0)
+ {
+ fprintf(stderr,"error: decrypted ciphertext is not the same as the input plaintext\n");
+ EXIT1;
+ }
+ else if (memcmp(ct_vector, output, AES_CTR_TESTS_BYTES) != 0)
+ {
+ fprintf(stderr,"error: ciphertext is not the same as the test vector\n");
+ EXIT1;
+ }
+ else
+ {
+ printf("CTR passes test vector\n\n");
+ }
+
+}
+
+
+int main(int argc, char * argv[])
+{
+ clock_prepare();
+
+ clock_start();
+
+ aes_ecb_test();
+ aes_ctr_test();
+
+
+ clock_stop();
+ print_total_clock();
+
+ return 0;
+}
diff --git a/test/monniaux/bitsliced-aes/one_file/compare.sh b/test/monniaux/bitsliced-aes/one_file/compare.sh
new file mode 100755
index 00000000..276a95ee
--- /dev/null
+++ b/test/monniaux/bitsliced-aes/one_file/compare.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+ROOT=/home/monniaux/work/Kalray/CompCert
+SRC=bitsliced-aes.c
+k1-mbr-gcc -Werror=implicit -Werror=uninitialized -O3 $SRC $ROOT/test/monniaux/clock.gcc.k1c.o -o bitsliced-aes.gcc.k1c &&
+$ROOT/ccomp -O3 -fno-unprototyped -O3 $SRC $ROOT/test/monniaux/clock.gcc.k1c.o -o bitsliced-aes.ccomp.k1c &&
+gcc -Werror=implicit -Werror=uninitialized -O3 $SRC $ROOT/test/monniaux/clock.gcc.host.o -o bitsliced-aes.gcc.host &&
+valgrind ./bitsliced-aes.gcc.host &&
+k1-cluster -- ./bitsliced-aes.gcc.k1c > ./bitsliced-aes.gcc.k1c.out &&
+k1-cluster -- ./bitsliced-aes.ccomp.k1c > ./bitsliced-aes.ccomp.k1c.out &&
+grep cycles ./bitsliced-aes.gcc.k1c.out | sed -e 's/cycles: //' > ./bitsliced-aes.gcc.k1c.cycles &&
+grep cycles ./bitsliced-aes.ccomp.k1c.out | sed -e 's/cycles: //' > ./bitsliced-aes.ccomp.k1c.cycles &&
+test $(cat ./bitsliced-aes.ccomp.k1c.cycles) -gt $(expr 2 '*' $(cat ./bitsliced-aes.gcc.k1c.cycles))
diff --git a/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c
new file mode 100644
index 00000000..5294ff1d
--- /dev/null
+++ b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c
@@ -0,0 +1,32 @@
+#include <stdint.h>
+#include <string.h>
+int i[1];
+int j, bs_transpose_dst_k, k, s, o;
+void a(int (*)[], uint8_t *);
+void b(uint8_t c, uint8_t d, size_t e, uint8_t f, uint8_t g) {
+ int l[1];
+ a(l, f);
+}
+void a(int (*l)[], uint8_t *m) {
+ for (; o < 76; o += 8) {
+ {
+ int *n = i;
+ bs_transpose_dst_k = 0;
+ for (; bs_transpose_dst_k < 64; bs_transpose_dst_k++) {
+ j = 0;
+ for (; j < 64; j++) {
+ k = &s;
+ n[j] = k & 1;
+ }
+ }
+ }
+ }
+}
+void aes_ecb_test() {}
+void aes_ctr_test() {
+ uint8_t p = "";
+ uint8_t q = "";
+ uint8_t r = "";
+ uint8_t output[4];
+ b(output, r, 4, p, q);
+}
diff --git a/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c
new file mode 100644
index 00000000..0d48b3b8
--- /dev/null
+++ b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c
@@ -0,0 +1,20 @@
+#include "/home/monniaux/work/Kalray/CompCert/test/monniaux/clock.h"
+
+void aes_ecb_test(void);
+void aes_ctr_test(void);
+
+int main(int argc, char * argv[])
+{
+ clock_prepare();
+
+ clock_start();
+
+ aes_ecb_test();
+ aes_ctr_test();
+
+
+ clock_stop();
+ print_total_clock();
+
+ return 0;
+}
diff --git a/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh b/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh
new file mode 100755
index 00000000..a21bb465
--- /dev/null
+++ b/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+ROOT=/home/monniaux/work/Kalray/CompCert
+SRC=bitsliced-aes_compute.c
+MAIN=/home/monniaux/work/Kalray/CompCert/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main
+k1-mbr-gcc -Werror=implicit -Werror=uninitialized -O3 $SRC $ROOT/test/monniaux/clock.gcc.k1c.o $MAIN.gcc.k1c.o -o bitsliced-aes.gcc.k1c &&
+$ROOT/ccomp -O3 -fno-unprototyped -O3 $SRC $ROOT/test/monniaux/clock.gcc.k1c.o $MAIN.gcc.k1c.o -o bitsliced-aes.ccomp.k1c &&
+gcc -Werror=implicit -Werror=uninitialized -O3 $SRC $ROOT/test/monniaux/clock.gcc.host.o $MAIN.c -o bitsliced-aes.gcc.host &&
+valgrind ./bitsliced-aes.gcc.host &&
+k1-cluster --cycle-based -- ./bitsliced-aes.gcc.k1c > ./bitsliced-aes.gcc.k1c.out &&
+k1-cluster --cycle-based -- ./bitsliced-aes.ccomp.k1c > ./bitsliced-aes.ccomp.k1c.out &&
+grep cycles ./bitsliced-aes.gcc.k1c.out > ./bitsliced-aes.gcc.k1c.cycles &&
+grep cycles ./bitsliced-aes.ccomp.k1c.out > ./bitsliced-aes.ccomp.k1c.cycles &&
+sed -i -e 's/cycles: //' ./bitsliced-aes.gcc.k1c.cycles &&
+sed -i -e 's/cycles: //' ./bitsliced-aes.ccomp.k1c.cycles &&
+test $(cat ./bitsliced-aes.gcc.k1c.cycles) -gt 100000 &&
+test $(cat ./bitsliced-aes.ccomp.k1c.cycles) -gt $(expr 2 '*' $(cat ./bitsliced-aes.gcc.k1c.cycles))
diff --git a/test/monniaux/ocaml/Makefile b/test/monniaux/ocaml/Makefile
index 46ce8994..0ae7c22f 100644
--- a/test/monniaux/ocaml/Makefile
+++ b/test/monniaux/ocaml/Makefile
@@ -1,7 +1,29 @@
-test: byterun/ocamlrun
- k1-cluster --syscall=libstd_scalls.so -- byterun/ocamlrun examples/quicksort
+ALL_CFLAGS=-Ibyterun
+EXECUTE_ARGS=examples/quicksort
-byterun/ocamlrun:
- (cd byterun ; $(MAKE))
+include ../rules.mk
-.PHONY: test
+ALL_CCOMPFLAGS=
+LDLIBS=-lm
+
+CFILES=$(wildcard byterun/*.c)
+
+CCOMP_K1C_S=$(patsubst %.c,%.ccomp.k1c.s,$(CFILES))
+CCOMP_HOST_S=$(patsubst %.c,%.ccomp.host.s,$(CFILES))
+
+GCC_K1C_S=$(patsubst %.c,%.gcc.k1c.s,$(CFILES))
+GCC_HOST_S=$(patsubst %.c,%.gcc.host.s,$(CFILES))
+
+all: $(CCOMP_K1C_S) $(GCC_K1C_S) ocamlrun.ccomp.k1c.out ocamlrun.gcc.k1c.out
+
+ocamlrun.ccomp.k1c : $(CCOMP_K1C_S) ../clock.gcc.k1c.o
+ $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@ $(LDLIBS)
+
+ocamlrun.ccomp.host : $(CCOMP_HOST_S) ../clock.gcc.host.o
+ $(CCOMP) $(CCOMPFLAGS) $+ -o $@ $(LDLIBS)
+
+ocamlrun.gcc.k1c : $(GCC_K1C_S) ../clock.gcc.k1c.o
+ $(K1C_CC) $(K1C_CFLAGS) $+ -o $@ $(LDLIBS)
+
+ocamlrun.gcc.host : $(GCC_HOST_S) ../clock.gcc.host.o
+ $(CC) $(CFLAGS) $+ -o $@ $(LDLIBS)
diff --git a/test/monniaux/ocaml/byterun/caml/finalise.h b/test/monniaux/ocaml/byterun/caml/finalise.h
index 5315ac21..b2052c21 100644
--- a/test/monniaux/ocaml/byterun/caml/finalise.h
+++ b/test/monniaux/ocaml/byterun/caml/finalise.h
@@ -25,7 +25,7 @@ void caml_final_update_clean_phase (void);
void caml_final_do_calls (void);
void caml_final_do_roots (scanning_action f);
void caml_final_invert_finalisable_values ();
-void caml_final_oldify_young_roots ();
+void caml_final_oldify_young_roots (void);
void caml_final_empty_young (void);
void caml_final_update_minor_roots(void);
value caml_final_register (value f, value v);
diff --git a/test/monniaux/ocaml/byterun/caml/version.h b/test/monniaux/ocaml/byterun/caml/version.h
new file mode 100644
index 00000000..68d7000e
--- /dev/null
+++ b/test/monniaux/ocaml/byterun/caml/version.h
@@ -0,0 +1,6 @@
+#define OCAML_VERSION_MAJOR 4
+#define OCAML_VERSION_MINOR 7
+#define OCAML_VERSION_PATCHLEVEL 1
+#undef OCAML_VERSION_ADDITIONAL
+#define OCAML_VERSION 40701
+#define OCAML_VERSION_STRING "4.07.1"
diff --git a/test/monniaux/ocaml/byterun/compact.c b/test/monniaux/ocaml/byterun/compact.c
index 7b7188ab..83e7ed0a 100644
--- a/test/monniaux/ocaml/byterun/compact.c
+++ b/test/monniaux/ocaml/byterun/compact.c
@@ -32,6 +32,7 @@
extern uintnat caml_percent_free; /* major_gc.c */
extern void caml_shrink_heap (char *); /* memory.c */
+extern void caml_final_invert_finalisable_values (void);
/* Encoded headers: the color is stored in the 2 least significant bits.
(For pointer inversion, we need to distinguish headers from pointers.)
diff --git a/test/monniaux/ocaml/byterun/main.c b/test/monniaux/ocaml/byterun/main.c
index 5e5839ff..498f3d18 100644
--- a/test/monniaux/ocaml/byterun/main.c
+++ b/test/monniaux/ocaml/byterun/main.c
@@ -13,6 +13,7 @@
/* */
/**************************************************************************/
+#define VERIMAG_MEASUREMENTS
#define CAML_INTERNALS
/* Main entry point (can be overridden by a user-provided main()
@@ -26,6 +27,10 @@
#include <windows.h>
#endif
+#ifdef VERIMAG_MEASUREMENTS
+#include "../../clock.h"
+#endif
+
CAMLextern void caml_main (char_os **);
#ifdef _WIN32
@@ -41,7 +46,15 @@ int main(int argc, char **argv)
caml_expand_command_line(&argc, &argv);
#endif
+#ifdef VERIMAG_MEASUREMENTS
+ clock_prepare();
+ clock_start();
+#endif
caml_main(argv);
+#ifdef VERIMAG_MEASUREMENTS
+ clock_stop();
+ print_total_clock();
+#endif
caml_sys_exit(Val_int(0));
return 0; /* not reached */
}
diff --git a/test/monniaux/ocaml/byterun/win32.c b/test/monniaux/ocaml/byterun/win32.c
deleted file mode 100644
index 1ce8ad5e..00000000
--- a/test/monniaux/ocaml/byterun/win32.c
+++ /dev/null
@@ -1,1019 +0,0 @@
-/**************************************************************************/
-/* */
-/* OCaml */
-/* */
-/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
-/* */
-/* Copyright 1996 Institut National de Recherche en Informatique et */
-/* en Automatique. */
-/* */
-/* All rights reserved. This file is distributed under the terms of */
-/* the GNU Lesser General Public License version 2.1, with the */
-/* special exception on linking described in the file LICENSE. */
-/* */
-/**************************************************************************/
-
-#define CAML_INTERNALS
-
-/* Win32-specific stuff */
-
-/* FILE_INFO_BY_HANDLE_CLASS and FILE_NAME_INFO are only available from Windows
- Vista onwards */
-#undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600
-
-#define WIN32_LEAN_AND_MEAN
-#include <wtypes.h>
-#include <winbase.h>
-#include <winsock2.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <io.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <ctype.h>
-#include <errno.h>
-#include <string.h>
-#include <signal.h>
-#include "caml/alloc.h"
-#include "caml/address_class.h"
-#include "caml/fail.h"
-#include "caml/io.h"
-#include "caml/memory.h"
-#include "caml/misc.h"
-#include "caml/osdeps.h"
-#include "caml/signals.h"
-#include "caml/sys.h"
-
-#include "caml/config.h"
-#ifdef SUPPORT_DYNAMIC_LINKING
-#include <flexdll.h>
-#endif
-
-#ifndef S_ISREG
-#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
-#endif
-
-unsigned short caml_win32_major = 0;
-unsigned short caml_win32_minor = 0;
-unsigned short caml_win32_build = 0;
-unsigned short caml_win32_revision = 0;
-
-CAMLnoreturn_start
-static void caml_win32_sys_error (int errnum)
-CAMLnoreturn_end;
-
-static void caml_win32_sys_error(int errnum)
-{
- wchar_t buffer[512];
- value msg;
- if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL,
- errnum,
- 0,
- buffer,
- sizeof(buffer)/sizeof(wchar_t),
- NULL)) {
- msg = caml_copy_string_of_utf16(buffer);
- } else {
- msg = caml_alloc_sprintf("unknown error #%d", errnum);
- }
- caml_raise_sys_error(msg);
-}
-
-int caml_read_fd(int fd, int flags, void * buf, int n)
-{
- int retcode;
- if ((flags & CHANNEL_FLAG_FROM_SOCKET) == 0) {
- caml_enter_blocking_section();
- retcode = read(fd, buf, n);
- /* Large reads from console can fail with ENOMEM. Reduce requested size
- and try again. */
- if (retcode == -1 && errno == ENOMEM && n > 16384) {
- retcode = read(fd, buf, 16384);
- }
- caml_leave_blocking_section();
- if (retcode == -1) caml_sys_io_error(NO_ARG);
- } else {
- caml_enter_blocking_section();
- retcode = recv((SOCKET) _get_osfhandle(fd), buf, n, 0);
- caml_leave_blocking_section();
- if (retcode == -1) caml_win32_sys_error(WSAGetLastError());
- }
- return retcode;
-}
-
-int caml_write_fd(int fd, int flags, void * buf, int n)
-{
- int retcode;
- if ((flags & CHANNEL_FLAG_FROM_SOCKET) == 0) {
-#if defined(NATIVE_CODE) && defined(WITH_SPACETIME)
- if (flags & CHANNEL_FLAG_BLOCKING_WRITE) {
- retcode = write(fd, buf, n);
- } else {
-#endif
- caml_enter_blocking_section();
- retcode = write(fd, buf, n);
- caml_leave_blocking_section();
-#if defined(NATIVE_CODE) && defined(WITH_SPACETIME)
- }
-#endif
- if (retcode == -1) caml_sys_io_error(NO_ARG);
- } else {
- caml_enter_blocking_section();
- retcode = send((SOCKET) _get_osfhandle(fd), buf, n, 0);
- caml_leave_blocking_section();
- if (retcode == -1) caml_win32_sys_error(WSAGetLastError());
- }
- CAMLassert (retcode > 0);
- return retcode;
-}
-
-wchar_t * caml_decompose_path(struct ext_table * tbl, wchar_t * path)
-{
- wchar_t * p, * q;
- int n;
-
- if (path == NULL) return NULL;
- p = caml_stat_wcsdup(path);
- q = p;
- while (1) {
- for (n = 0; q[n] != 0 && q[n] != L';'; n++) /*nothing*/;
- caml_ext_table_add(tbl, q);
- q = q + n;
- if (*q == 0) break;
- *q = 0;
- q += 1;
- }
- return p;
-}
-
-wchar_t * caml_search_in_path(struct ext_table * path, const wchar_t * name)
-{
- wchar_t * dir, * fullname;
- char * u8;
- const wchar_t * p;
- int i;
- struct _stati64 st;
-
- for (p = name; *p != 0; p++) {
- if (*p == '/' || *p == '\\') goto not_found;
- }
- for (i = 0; i < path->size; i++) {
- dir = path->contents[i];
- if (dir[0] == 0) continue;
- /* not sure what empty path components mean under Windows */
- fullname = caml_stat_wcsconcat(3, dir, L"\\", name);
- u8 = caml_stat_strdup_of_utf16(fullname);
- caml_gc_message(0x100, "Searching %s\n", u8);
- caml_stat_free(u8);
- if (_wstati64(fullname, &st) == 0 && S_ISREG(st.st_mode))
- return fullname;
- caml_stat_free(fullname);
- }
- not_found:
- u8 = caml_stat_strdup_of_utf16(name);
- caml_gc_message(0x100, "%s not found in search path\n", u8);
- caml_stat_free(u8);
- return caml_stat_wcsdup(name);
-}
-
-CAMLexport wchar_t * caml_search_exe_in_path(const wchar_t * name)
-{
- wchar_t * fullname, * filepart;
- char * u8;
- size_t fullnamelen;
- DWORD retcode;
-
- fullnamelen = wcslen(name) + 1;
- if (fullnamelen < 256) fullnamelen = 256;
- while (1) {
- fullname = caml_stat_alloc(fullnamelen*sizeof(wchar_t));
- retcode = SearchPath(NULL, /* use system search path */
- name,
- L".exe", /* add .exe extension if needed */
- fullnamelen,
- fullname,
- &filepart);
- if (retcode == 0) {
- u8 = caml_stat_strdup_of_utf16(name);
- caml_gc_message(0x100, "%s not found in search path\n", u8);
- caml_stat_free(u8);
- caml_stat_free(fullname);
- return caml_stat_strdup_os(name);
- }
- if (retcode < fullnamelen)
- return fullname;
- caml_stat_free(fullname);
- fullnamelen = retcode + 1;
- }
-}
-
-wchar_t * caml_search_dll_in_path(struct ext_table * path, const wchar_t * name)
-{
- wchar_t * dllname;
- wchar_t * res;
-
- dllname = caml_stat_wcsconcat(2, name, L".dll");
- res = caml_search_in_path(path, dllname);
- caml_stat_free(dllname);
- return res;
-}
-
-#ifdef SUPPORT_DYNAMIC_LINKING
-
-void * caml_dlopen(wchar_t * libname, int for_execution, int global)
-{
- void *handle;
- int flags = (global ? FLEXDLL_RTLD_GLOBAL : 0);
- if (!for_execution) flags |= FLEXDLL_RTLD_NOEXEC;
- handle = flexdll_wdlopen(libname, flags);
- if ((handle != NULL) && ((caml_verb_gc & 0x100) != 0)) {
- flexdll_dump_exports(handle);
- fflush(stdout);
- }
- return handle;
-}
-
-void caml_dlclose(void * handle)
-{
- flexdll_dlclose(handle);
-}
-
-void * caml_dlsym(void * handle, const char * name)
-{
- return flexdll_dlsym(handle, name);
-}
-
-void * caml_globalsym(const char * name)
-{
- return flexdll_dlsym(flexdll_dlopen(NULL,0), name);
-}
-
-char * caml_dlerror(void)
-{
- return flexdll_dlerror();
-}
-
-#else
-
-void * caml_dlopen(wchar_t * libname, int for_execution, int global)
-{
- return NULL;
-}
-
-void caml_dlclose(void * handle)
-{
-}
-
-void * caml_dlsym(void * handle, const char * name)
-{
- return NULL;
-}
-
-void * caml_globalsym(const char * name)
-{
- return NULL;
-}
-
-char * caml_dlerror(void)
-{
- return "dynamic loading not supported on this platform";
-}
-
-#endif
-
-/* Proper emulation of signal(), including ctrl-C and ctrl-break */
-
-typedef void (*sighandler)(int sig);
-static int ctrl_handler_installed = 0;
-static volatile sighandler ctrl_handler_action = SIG_DFL;
-
-static BOOL WINAPI ctrl_handler(DWORD event)
-{
- /* Only ctrl-C and ctrl-Break are handled */
- if (event != CTRL_C_EVENT && event != CTRL_BREAK_EVENT) return FALSE;
- /* Default behavior is to exit, which we get by not handling the event */
- if (ctrl_handler_action == SIG_DFL) return FALSE;
- /* Ignore behavior is to do nothing, which we get by claiming that we
- have handled the event */
- if (ctrl_handler_action == SIG_IGN) return TRUE;
- /* Win32 doesn't like it when we do a longjmp() at this point
- (it looks like we're running in a different thread than
- the main program!). So, just record the signal. */
- caml_record_signal(SIGINT);
- /* We have handled the event */
- return TRUE;
-}
-
-sighandler caml_win32_signal(int sig, sighandler action)
-{
- sighandler oldaction;
-
- if (sig != SIGINT) return signal(sig, action);
- if (! ctrl_handler_installed) {
- SetConsoleCtrlHandler(ctrl_handler, TRUE);
- ctrl_handler_installed = 1;
- }
- oldaction = ctrl_handler_action;
- ctrl_handler_action = action;
- return oldaction;
-}
-
-/* Expansion of @responsefile and *? file patterns in the command line */
-
-static int argc;
-static wchar_t ** argv;
-static int argvsize;
-
-static void store_argument(wchar_t * arg);
-static void expand_argument(wchar_t * arg);
-static void expand_pattern(wchar_t * arg);
-
-static void out_of_memory(void)
-{
- fprintf(stderr, "Out of memory while expanding command line\n");
- exit(2);
-}
-
-static void store_argument(wchar_t * arg)
-{
- if (argc + 1 >= argvsize) {
- argvsize *= 2;
- argv = (wchar_t **) caml_stat_resize_noexc(argv, argvsize * sizeof(wchar_t *));
- if (argv == NULL) out_of_memory();
- }
- argv[argc++] = arg;
-}
-
-static void expand_argument(wchar_t * arg)
-{
- wchar_t * p;
-
- for (p = arg; *p != 0; p++) {
- if (*p == L'*' || *p == L'?') {
- expand_pattern(arg);
- return;
- }
- }
- store_argument(arg);
-}
-
-static void expand_pattern(wchar_t * pat)
-{
- wchar_t * prefix, * p, * name;
- intptr_t handle;
- struct _wfinddata_t ffblk;
- size_t i;
-
- handle = _wfindfirst(pat, &ffblk);
- if (handle == -1) {
- store_argument(pat); /* a la Bourne shell */
- return;
- }
- prefix = caml_stat_wcsdup(pat);
- /* We need to stop at the first directory or drive boundary, because the
- * _findata_t structure contains the filename, not the leading directory. */
- for (i = wcslen(prefix); i > 0; i--) {
- wchar_t c = prefix[i - 1];
- if (c == L'\\' || c == L'/' || c == L':') { prefix[i] = 0; break; }
- }
- /* No separator was found, it's a filename pattern without a leading directory. */
- if (i == 0)
- prefix[0] = 0;
- do {
- name = caml_stat_wcsconcat(2, prefix, ffblk.name);
- store_argument(name);
- } while (_wfindnext(handle, &ffblk) != -1);
- _findclose(handle);
- caml_stat_free(prefix);
-}
-
-
-CAMLexport void caml_expand_command_line(int * argcp, wchar_t *** argvp)
-{
- int i;
- argc = 0;
- argvsize = 16;
- argv = (wchar_t **) caml_stat_alloc_noexc(argvsize * sizeof(wchar_t *));
- if (argv == NULL) out_of_memory();
- for (i = 0; i < *argcp; i++) expand_argument((*argvp)[i]);
- argv[argc] = NULL;
- *argcp = argc;
- *argvp = argv;
-}
-
-/* Add to [contents] the (short) names of the files contained in
- the directory named [dirname]. No entries are added for [.] and [..].
- Return 0 on success, -1 on error; set errno in the case of error. */
-
-int caml_read_directory(wchar_t * dirname, struct ext_table * contents)
-{
- size_t dirnamelen;
- wchar_t * template;
- intptr_t h;
- struct _wfinddata_t fileinfo;
-
- dirnamelen = wcslen(dirname);
- if (dirnamelen > 0 &&
- (dirname[dirnamelen - 1] == L'/'
- || dirname[dirnamelen - 1] == L'\\'
- || dirname[dirnamelen - 1] == L':'))
- template = caml_stat_wcsconcat(2, dirname, L"*.*");
- else
- template = caml_stat_wcsconcat(2, dirname, L"\\*.*");
- h = _wfindfirst(template, &fileinfo);
- if (h == -1) {
- caml_stat_free(template);
- return errno == ENOENT ? 0 : -1;
- }
- do {
- if (wcscmp(fileinfo.name, L".") != 0 && wcscmp(fileinfo.name, L"..") != 0) {
- caml_ext_table_add(contents, caml_stat_strdup_of_utf16(fileinfo.name));
- }
- } while (_wfindnext(h, &fileinfo) == 0);
- _findclose(h);
- caml_stat_free(template);
- return 0;
-}
-
-#ifndef NATIVE_CODE
-
-/* Set up a new thread for control-C emulation and termination */
-
-void caml_signal_thread(void * lpParam)
-{
- wchar_t *endptr;
- HANDLE h;
- /* Get an hexa-code raw handle through the environment */
- h = (HANDLE) (uintptr_t)
- wcstol(caml_secure_getenv(_T("CAMLSIGPIPE")), &endptr, 16);
- while (1) {
- DWORD numread;
- BOOL ret;
- char iobuf[2];
- /* This shall always return a single character */
- ret = ReadFile(h, iobuf, 1, &numread, NULL);
- if (!ret || numread != 1) caml_sys_exit(Val_int(2));
- switch (iobuf[0]) {
- case 'C':
- caml_record_signal(SIGINT);
- break;
- case 'T':
- raise(SIGTERM);
- return;
- }
- }
-}
-
-#endif /* NATIVE_CODE */
-
-#if defined(NATIVE_CODE)
-
-/* Handling of system stack overflow.
- * Based on code provided by Olivier Andrieu.
-
- * An EXCEPTION_STACK_OVERFLOW is signaled when the guard page at the
- * end of the stack has been accessed. Windows clears the PAGE_GUARD
- * protection (making it a regular PAGE_READWRITE) and then calls our
- * exception handler. This means that although we're handling an "out
- * of stack" condition, there is a bit of stack available to call
- * functions and allocate temporaries.
- *
- * PAGE_GUARD is a one-shot access protection mechanism: we need to
- * restore the PAGE_GUARD protection on this page otherwise the next
- * stack overflow won't be detected and the program will abruptly exit
- * with STATUS_ACCESS_VIOLATION.
- *
- * Visual Studio 2003 and later (_MSC_VER >= 1300) have a
- * _resetstkoflw() function that resets this protection.
- * Unfortunately, it cannot work when called directly from the
- * exception handler because at this point we are using the page that
- * is to be protected.
- *
- * A solution is to use an alternate stack when restoring the
- * protection. However it's not possible to use _resetstkoflw() then
- * since it determines the stack pointer by calling alloca(): it would
- * try to protect the alternate stack.
- *
- * Finally, we call caml_raise_stack_overflow; it will either call
- * caml_raise_exception which switches back to the normal stack, or
- * call caml_fatal_uncaught_exception which terminates the program
- * quickly.
- */
-
-static uintnat win32_alt_stack[0x100];
-
-static void caml_reset_stack (void *faulting_address)
-{
- SYSTEM_INFO si;
- DWORD page_size;
- MEMORY_BASIC_INFORMATION mbi;
- DWORD oldprot;
-
- /* get the system's page size. */
- GetSystemInfo (&si);
- page_size = si.dwPageSize;
-
- /* get some information on the page the fault occurred */
- if (! VirtualQuery (faulting_address, &mbi, sizeof mbi))
- goto failed;
-
- VirtualProtect (mbi.BaseAddress, page_size,
- mbi.Protect | PAGE_GUARD, &oldprot);
-
- failed:
- caml_raise_stack_overflow();
-}
-
-
-#ifndef _WIN64
-static LONG CALLBACK
- caml_stack_overflow_VEH (EXCEPTION_POINTERS* exn_info)
-{
- DWORD code = exn_info->ExceptionRecord->ExceptionCode;
- CONTEXT *ctx = exn_info->ContextRecord;
- DWORD *ctx_ip = &(ctx->Eip);
- DWORD *ctx_sp = &(ctx->Esp);
-
- if (code == EXCEPTION_STACK_OVERFLOW && Is_in_code_area (*ctx_ip))
- {
- uintnat faulting_address;
- uintnat * alt_esp;
-
- /* grab the address that caused the fault */
- faulting_address = exn_info->ExceptionRecord->ExceptionInformation[1];
-
- /* call caml_reset_stack(faulting_address) using the alternate stack */
- alt_esp = win32_alt_stack + sizeof(win32_alt_stack) / sizeof(uintnat);
- *--alt_esp = faulting_address;
- *ctx_sp = (uintnat) (alt_esp - 1);
- *ctx_ip = (uintnat) &caml_reset_stack;
-
- return EXCEPTION_CONTINUE_EXECUTION;
- }
-
- return EXCEPTION_CONTINUE_SEARCH;
-}
-
-#else
-extern char *caml_exception_pointer;
-extern value *caml_young_ptr;
-
-/* Do not use the macro from address_class.h here. */
-#undef Is_in_code_area
-#define Is_in_code_area(pc) \
- ( ((char *)(pc) >= caml_code_area_start && \
- (char *)(pc) <= caml_code_area_end) \
-|| ((char *)(pc) >= &caml_system__code_begin && \
- (char *)(pc) <= &caml_system__code_end) \
-|| (Classify_addr(pc) & In_code_area) )
-extern char caml_system__code_begin, caml_system__code_end;
-
-
-static LONG CALLBACK
- caml_stack_overflow_VEH (EXCEPTION_POINTERS* exn_info)
-{
- DWORD code = exn_info->ExceptionRecord->ExceptionCode;
- CONTEXT *ctx = exn_info->ContextRecord;
-
- if (code == EXCEPTION_STACK_OVERFLOW && Is_in_code_area (ctx->Rip))
- {
- uintnat faulting_address;
- uintnat * alt_rsp;
-
- /* grab the address that caused the fault */
- faulting_address = exn_info->ExceptionRecord->ExceptionInformation[1];
-
- /* refresh runtime parameters from registers */
- caml_exception_pointer = (char *) ctx->R14;
- caml_young_ptr = (value *) ctx->R15;
-
- /* call caml_reset_stack(faulting_address) using the alternate stack */
- alt_rsp = win32_alt_stack + sizeof(win32_alt_stack) / sizeof(uintnat);
- ctx->Rcx = faulting_address;
- ctx->Rsp = (uintnat) (alt_rsp - 4 - 1);
- ctx->Rip = (uintnat) &caml_reset_stack;
-
- return EXCEPTION_CONTINUE_EXECUTION;
- }
-
- return EXCEPTION_CONTINUE_SEARCH;
-}
-#endif /* _WIN64 */
-
-void caml_win32_overflow_detection(void)
-{
- AddVectoredExceptionHandler(1, caml_stack_overflow_VEH);
-}
-
-#endif /* NATIVE_CODE */
-
-/* Seeding of pseudo-random number generators */
-
-int caml_win32_random_seed (intnat data[16])
-{
- /* For better randomness, consider:
- http://msdn.microsoft.com/library/en-us/seccrypto/security/rtlgenrandom.asp
- http://blogs.msdn.com/b/michael_howard/archive/2005/01/14/353379.aspx
- */
- FILETIME t;
- LARGE_INTEGER pc;
- GetSystemTimeAsFileTime(&t);
- QueryPerformanceCounter(&pc); /* PR#6032 */
- data[0] = t.dwLowDateTime;
- data[1] = t.dwHighDateTime;
- data[2] = GetCurrentProcessId();
- data[3] = pc.LowPart;
- data[4] = pc.HighPart;
- return 5;
-}
-
-
-#if defined(_MSC_VER) && __STDC_SECURE_LIB__ >= 200411L
-
-static void invalid_parameter_handler(const wchar_t* expression,
- const wchar_t* function,
- const wchar_t* file,
- unsigned int line,
- uintptr_t pReserved)
-{
- /* no crash box */
-}
-
-
-void caml_install_invalid_parameter_handler()
-{
- _set_invalid_parameter_handler(invalid_parameter_handler);
-}
-
-#endif
-
-
-/* Recover executable name */
-
-wchar_t * caml_executable_name(void)
-{
- wchar_t * name;
- DWORD namelen, ret;
-
- namelen = 256;
- while (1) {
- name = caml_stat_alloc(namelen*sizeof(wchar_t));
- ret = GetModuleFileName(NULL, name, namelen);
- if (ret == 0) { caml_stat_free(name); return NULL; }
- if (ret < namelen) break;
- caml_stat_free(name);
- if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */
- namelen *= 2;
- }
- return name;
-}
-
-/* snprintf emulation */
-
-#ifdef LACKS_VSCPRINTF
-/* No _vscprintf until Visual Studio .NET 2002 and sadly no version number
- in the CRT headers until Visual Studio 2005 so forced to predicate this
- on the compiler version instead */
-int _vscprintf(const char * format, va_list args)
-{
- int n;
- int sz = 5;
- char* buf = (char*)malloc(sz);
- n = _vsnprintf(buf, sz, format, args);
- while (n < 0 || n > sz) {
- sz += 512;
- buf = (char*)realloc(buf, sz);
- n = _vsnprintf(buf, sz, format, args);
- }
- free(buf);
- return n;
-}
-#endif
-
-#if defined(_WIN32) && !defined(_UCRT)
-int caml_snprintf(char * buf, size_t size, const char * format, ...)
-{
- int len;
- va_list args;
-
- if (size > 0) {
- va_start(args, format);
- len = _vsnprintf(buf, size, format, args);
- va_end(args);
- if (len >= 0 && len < size) {
- /* [len] characters were stored in [buf],
- a null-terminator was appended. */
- return len;
- }
- /* [size] characters were stored in [buf], without null termination.
- Put a null terminator, truncating the output. */
- buf[size - 1] = 0;
- }
- /* Compute the actual length of output, excluding null terminator */
- va_start(args, format);
- len = _vscprintf(format, args);
- va_end(args);
- return len;
-}
-#endif
-
-wchar_t *caml_secure_getenv (wchar_t const *var)
-{
- /* Win32 doesn't have a notion of setuid bit, so getenv is safe. */
- return _wgetenv(var);
-}
-
-/* caml_win32_getenv is used to implement Sys.getenv and Unix.getenv in such a
- way that they get direct access to the Win32 environment rather than to the
- copy that is cached by the C runtime system. The result of caml_win32_getenv
- is dynamically allocated and must be explicitly deallocated.
-
- In contrast, the OCaml runtime system still calls _wgetenv from the C runtime
- system, via caml_secure_getenv. The result is statically allocated and needs
- no deallocation. */
-CAMLexport wchar_t *caml_win32_getenv(wchar_t const *lpName)
-{
- wchar_t * lpBuffer;
- DWORD nSize = 256, res;
-
- lpBuffer = caml_stat_alloc_noexc(nSize * sizeof(wchar_t));
-
- if (lpBuffer == NULL)
- return NULL;
-
- res = GetEnvironmentVariable(lpName, lpBuffer, nSize);
-
- if (res == 0) {
- caml_stat_free(lpBuffer);
- return NULL;
- }
-
- if (res < nSize)
- return lpBuffer;
-
- nSize = res;
- lpBuffer = caml_stat_resize_noexc(lpBuffer, nSize * sizeof(wchar_t));
-
- if (lpBuffer == NULL)
- return NULL;
-
- res = GetEnvironmentVariable(lpName, lpBuffer, nSize);
-
- if (res == 0 || res >= nSize) {
- caml_stat_free(lpBuffer);
- return NULL;
- }
-
- return lpBuffer;
-}
-
-/* The rename() implementation in MSVC's CRT is based on MoveFile()
- and therefore fails if the new name exists. This is inconsistent
- with POSIX and a problem in practice. Here we reimplement
- rename() using MoveFileEx() to make it more POSIX-like.
- There are no official guarantee that the rename operation is atomic,
- but it is widely believed to be atomic on NTFS. */
-
-int caml_win32_rename(const wchar_t * oldpath, const wchar_t * newpath)
-{
- /* MOVEFILE_REPLACE_EXISTING: to be closer to POSIX
- MOVEFILE_COPY_ALLOWED: MoveFile performs a copy if old and new
- paths are on different devices, so we do the same here for
- compatibility with the old rename()-based implementation.
- MOVEFILE_WRITE_THROUGH: not sure it's useful; affects only
- the case where a copy is done. */
- if (MoveFileEx(oldpath, newpath,
- MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH |
- MOVEFILE_COPY_ALLOWED)) {
- return 0;
- }
- /* Modest attempt at mapping Win32 error codes to POSIX error codes.
- The __dosmaperr() function from the CRT does a better job but is
- generally not accessible. */
- switch (GetLastError()) {
- case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND:
- errno = ENOENT; break;
- case ERROR_ACCESS_DENIED: case ERROR_WRITE_PROTECT: case ERROR_CANNOT_MAKE:
- errno = EACCES; break;
- case ERROR_CURRENT_DIRECTORY: case ERROR_BUSY:
- errno = EBUSY; break;
- case ERROR_NOT_SAME_DEVICE:
- errno = EXDEV; break;
- case ERROR_ALREADY_EXISTS:
- errno = EEXIST; break;
- default:
- errno = EINVAL;
- }
- return -1;
-}
-
-/* Windows Unicode support */
-static uintnat windows_unicode_enabled = WINDOWS_UNICODE;
-
-/* If [windows_unicode_strict] is non-zero, then illegal UTF-8 characters (on
- the OCaml side) or illegal UTF-16 characters (on the Windows side) cause an
- error to be signaled. What happens then depends on the variable
- [windows_unicode_fallback].
-
- If [windows_unicode_strict] is zero, then illegal characters are silently
- dropped. */
-static uintnat windows_unicode_strict = 1;
-
-/* If [windows_unicode_fallback] is non-zero, then if an error is signaled when
- translating to UTF-16, the translation is re-done under the assumption that
- the argument string is encoded in the local codepage. */
-static uintnat windows_unicode_fallback = 1;
-
-CAMLexport int win_multi_byte_to_wide_char(const char *s, int slen, wchar_t *out, int outlen)
-{
- int retcode;
-
- CAMLassert (s != NULL);
-
- if (slen == 0)
- return 0;
-
- if (windows_unicode_enabled != 0) {
- retcode = MultiByteToWideChar(CP_UTF8, windows_unicode_strict ? MB_ERR_INVALID_CHARS : 0, s, slen, out, outlen);
- if (retcode == 0 && windows_unicode_fallback != 0)
- retcode = MultiByteToWideChar(CP_THREAD_ACP, 0, s, slen, out, outlen);
- } else {
- retcode = MultiByteToWideChar(CP_THREAD_ACP, 0, s, slen, out, outlen);
- }
-
- if (retcode == 0)
- caml_win32_sys_error(GetLastError());
-
- return retcode;
-}
-
-#ifndef WC_ERR_INVALID_CHARS /* For old versions of Windows we simply ignore the flag */
-#define WC_ERR_INVALID_CHARS 0
-#endif
-
-CAMLexport int win_wide_char_to_multi_byte(const wchar_t *s, int slen, char *out, int outlen)
-{
- int retcode;
-
- CAMLassert(s != NULL);
-
- if (slen == 0)
- return 0;
-
- if (windows_unicode_enabled != 0)
- retcode = WideCharToMultiByte(CP_UTF8, windows_unicode_strict ? WC_ERR_INVALID_CHARS : 0, s, slen, out, outlen, NULL, NULL);
- else
- retcode = WideCharToMultiByte(CP_THREAD_ACP, 0, s, slen, out, outlen, NULL, NULL);
-
- if (retcode == 0)
- caml_win32_sys_error(GetLastError());
-
- return retcode;
-}
-
-CAMLexport value caml_copy_string_of_utf16(const wchar_t *s)
-{
- int retcode, slen;
- value v;
-
- slen = wcslen(s);
- retcode = win_wide_char_to_multi_byte(s, slen, NULL, 0); /* Do not include final NULL */
- v = caml_alloc_string(retcode);
- win_wide_char_to_multi_byte(s, slen, String_val(v), retcode);
-
- return v;
-}
-
-CAMLexport inline wchar_t* caml_stat_strdup_to_utf16(const char *s)
-{
- wchar_t * ws;
- int retcode;
-
- retcode = win_multi_byte_to_wide_char(s, -1, NULL, 0);
- ws = malloc(retcode * sizeof(*ws));
- win_multi_byte_to_wide_char(s, -1, ws, retcode);
-
- return ws;
-}
-
-CAMLexport caml_stat_string caml_stat_strdup_of_utf16(const wchar_t *s)
-{
- caml_stat_string out;
- int retcode;
-
- retcode = win_wide_char_to_multi_byte(s, -1, NULL, 0);
- out = caml_stat_alloc(retcode);
- win_wide_char_to_multi_byte(s, -1, out, retcode);
-
- return out;
-}
-
-void caml_probe_win32_version(void)
-{
- /* Determine the version of Windows we're running, and cache it */
- WCHAR fileName[MAX_PATH];
- DWORD size =
- GetModuleFileName(GetModuleHandle(L"kernel32"), fileName, MAX_PATH);
- DWORD dwHandle = 0;
- BYTE* versionInfo;
- fileName[size] = 0;
- size = GetFileVersionInfoSize(fileName, &dwHandle);
- versionInfo = (BYTE*)malloc(size * sizeof(BYTE));
- if (GetFileVersionInfo(fileName, 0, size, versionInfo)) {
- UINT len = 0;
- VS_FIXEDFILEINFO* vsfi = NULL;
- VerQueryValue(versionInfo, L"\\", (void**)&vsfi, &len);
- caml_win32_major = HIWORD(vsfi->dwProductVersionMS);
- caml_win32_minor = LOWORD(vsfi->dwProductVersionMS);
- caml_win32_build = HIWORD(vsfi->dwProductVersionLS);
- caml_win32_revision = LOWORD(vsfi->dwProductVersionLS);
- }
- free(versionInfo);
-}
-
-static UINT startup_codepage = 0;
-
-void caml_setup_win32_terminal(void)
-{
- if (caml_win32_major >= 10) {
- startup_codepage = GetConsoleOutputCP();
- if (startup_codepage != CP_UTF8)
- SetConsoleOutputCP(CP_UTF8);
- }
-}
-
-void caml_restore_win32_terminal(void)
-{
- if (startup_codepage != 0)
- SetConsoleOutputCP(startup_codepage);
-}
-
-/* Detect if a named pipe corresponds to a Cygwin/MSYS pty: see
- https://github.com/mirror/newlib-cygwin/blob/00e9bf2/winsup/cygwin/dtable.cc#L932
-*/
-typedef
-BOOL (WINAPI *tGetFileInformationByHandleEx)(HANDLE, FILE_INFO_BY_HANDLE_CLASS,
- LPVOID, DWORD);
-
-static int caml_win32_is_cygwin_pty(HANDLE hFile)
-{
- char buffer[1024];
- FILE_NAME_INFO * nameinfo = (FILE_NAME_INFO *) buffer;
- static tGetFileInformationByHandleEx pGetFileInformationByHandleEx = INVALID_HANDLE_VALUE;
-
- if (pGetFileInformationByHandleEx == INVALID_HANDLE_VALUE)
- pGetFileInformationByHandleEx =
- (tGetFileInformationByHandleEx)GetProcAddress(GetModuleHandle(L"KERNEL32.DLL"),
- "GetFileInformationByHandleEx");
-
- if (pGetFileInformationByHandleEx == NULL)
- return 0;
-
- /* Get pipe name. GetFileInformationByHandleEx does not NULL-terminate the string, so reduce
- the buffer size to allow for adding one. */
- if (! pGetFileInformationByHandleEx(hFile, FileNameInfo, buffer, sizeof(buffer) - sizeof(WCHAR)))
- return 0;
-
- nameinfo->FileName[nameinfo->FileNameLength / sizeof(WCHAR)] = L'\0';
-
- /* check if this could be a msys pty pipe ('msys-XXXX-ptyN-XX')
- or a cygwin pty pipe ('cygwin-XXXX-ptyN-XX') */
- if ((wcsstr(nameinfo->FileName, L"msys-") ||
- wcsstr(nameinfo->FileName, L"cygwin-")) && wcsstr(nameinfo->FileName, L"-pty"))
- return 1;
-
- return 0;
-}
-
-CAMLexport int caml_win32_isatty(int fd)
-{
- DWORD lpMode;
- HANDLE hFile = (HANDLE)_get_osfhandle(fd);
-
- if (hFile == INVALID_HANDLE_VALUE)
- return 0;
-
- switch (GetFileType(hFile)) {
- case FILE_TYPE_CHAR:
- /* Both console handles and the NUL device are FILE_TYPE_CHAR. The NUL
- device returns FALSE for a GetConsoleMode call. _isatty incorrectly
- only uses GetFileType (see GPR#1321). */
- return GetConsoleMode(hFile, &lpMode);
- case FILE_TYPE_PIPE:
- /* Cygwin PTYs are implemented using named pipes */
- return caml_win32_is_cygwin_pty(hFile);
- default:
- break;
- }
-
- return 0;
-}
-
-int caml_num_rows_fd(int fd)
-{
- return -1;
-}
diff --git a/test/monniaux/picosat-965/Makefile b/test/monniaux/picosat-965/Makefile
new file mode 100644
index 00000000..69613a79
--- /dev/null
+++ b/test/monniaux/picosat-965/Makefile
@@ -0,0 +1,34 @@
+EXECUTE_ARGS=sudoku.sat
+
+include ../rules.mk
+
+ALL_CFLAGS = -DNDEBUG
+EMBEDDED_CFLAGS = -DNALARM -DNZIP -DNGETRUSAGE
+K1C_CFLAGS += $(EMBEDDED_CFLAGS)
+K1C_CCOMPFLAGS += $(EMBEDDED_CFLAGS)
+CCOMPFLAGS += -fbitfields
+K1C_CCOMPFLAGS += -fbitfields
+
+K1C_CFLAGS += $(ALL_CFLAGS)
+K1C_CCOMPFLAGS += $(ALL_CFLAGS)
+CCOMPFLAGS += $(ALL_CFLAGS)
+CFLAGS += $(ALL_CFLAGS)
+
+all: picosat.ccomp.k1c.s version.ccomp.k1c.s app.ccomp.k1c.s main.ccomp.k1c.s picosat.gcc.k1c.s version.gcc.k1c.s app.gcc.k1c.s main.gcc.k1c.s picosat.ccomp.k1c.out picosat.gcc.k1c.out picosat.ccomp.host.out picosat.gcc.host.out
+
+picosat.ccomp.k1c : picosat.ccomp.k1c.s version.ccomp.k1c.s app.ccomp.k1c.s main.ccomp.k1c.s ../clock.gcc.k1c.o
+ $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
+
+picosat.gcc.k1c : picosat.gcc.k1c.s version.gcc.k1c.s app.gcc.k1c.s main.gcc.k1c.s ../clock.gcc.k1c.o
+ $(K1C_CC) $(K1C_CFLAGS) $+ -o $@
+
+picosat.ccomp.host : picosat.ccomp.host.s version.ccomp.host.s app.ccomp.host.s main.ccomp.host.s ../clock.gcc.host.o
+ $(CCOMP) $(CCOMPFLAGS) $+ -o $@
+
+picosat.gcc.host : picosat.gcc.host.s version.gcc.host.s app.gcc.host.s main.gcc.host.s ../clock.gcc.host.o
+ $(CC) $(FLAGS) $+ -o $@
+
+clean:
+ -rm -f *.s *.k1c *.out
+
+.PHONY: clean
diff --git a/test/monniaux/picosat-965/app.c b/test/monniaux/picosat-965/app.c
index d817cf21..64ebdbd0 100644
--- a/test/monniaux/picosat-965/app.c
+++ b/test/monniaux/picosat-965/app.c
@@ -12,7 +12,7 @@
#define BUNZIP2 "bzcat %s"
#define GZIP "gzip -c -f > %s"
-#if 0
+#ifndef NZIP
FILE * popen (const char *, const char*);
int pclose (FILE *);
#endif
@@ -542,7 +542,7 @@ picosat_main (int argc, char **argv)
unsigned seed;
FILE *file;
int trace;
-
+
start_time = picosat_time_stamp ();
sargc = argc;
diff --git a/test/monniaux/picosat-965/main.c b/test/monniaux/picosat-965/main.c
index 03fad79f..13d7b0e5 100644
--- a/test/monniaux/picosat-965/main.c
+++ b/test/monniaux/picosat-965/main.c
@@ -1,7 +1,25 @@
+#define VERIMAG_MEASUREMENTS
+#ifdef VERIMAG_MEASUREMENTS
+#include "../clock.h"
+#endif
+
int picosat_main (int, char **);
int
main (int argc, char **argv)
{
- return picosat_main (argc, argv);
+
+#ifdef VERIMAG_MEASUREMENTS
+ clock_prepare();
+ clock_start();
+#endif
+
+ int ret= picosat_main (argc, argv);
+
+#ifdef VERIMAG_MEASUREMENTS
+ clock_stop();
+ print_total_clock();
+#endif
+
+ return ret;
}
diff --git a/test/monniaux/picosat-965/picosat.c b/test/monniaux/picosat-965/picosat.c
index aca9d962..21442f44 100644
--- a/test/monniaux/picosat-965/picosat.c
+++ b/test/monniaux/picosat-965/picosat.c
@@ -31,6 +31,8 @@ IN THE SOFTWARE.
#include "picosat.h"
+#define INLINE inline
+
/* By default code for 'all different constraints' is disabled, since 'NADC'
* is defined.
*/
@@ -730,7 +732,7 @@ struct PicoSAT
typedef PicoSAT PS;
-static Flt
+static INLINE Flt
packflt (unsigned m, int e)
{
Flt res;
@@ -942,13 +944,13 @@ flt2double (Flt f)
#endif
-static int
+static INLINE int
log2flt (Flt a)
{
return FLTEXPONENT (a) + 24;
}
-static int
+static INLINE int
cmpflt (Flt a, Flt b)
{
if (a < b)
@@ -1058,19 +1060,19 @@ resize (PS * ps, void *void_ptr, size_t old_size, size_t new_size)
return b->data;
}
-static unsigned
+static INLINE unsigned
int2unsigned (int l)
{
return (l < 0) ? 1 + 2 * -l : 2 * l;
}
-static Lit *
+static INLINE Lit *
int2lit (PS * ps, int l)
{
return ps->lits + int2unsigned (l);
}
-static Lit **
+static INLINE Lit **
end_of_lits (Cls * c)
{
return (Lit**)c->lits + c->size;
@@ -1153,7 +1155,7 @@ dumpcnf (PS * ps)
#endif
-static void
+static INLINE void
delete_prefix (PS * ps)
{
if (!ps->prefix)
@@ -1437,7 +1439,7 @@ lrelease (PS * ps, Ltk * stk)
#ifndef NADC
-static unsigned
+static INLINE unsigned
llength (Lit ** a)
{
Lit ** p;
@@ -1446,7 +1448,7 @@ llength (Lit ** a)
return p - a;
}
-static void
+static INLINE void
resetadoconflict (PS * ps)
{
assert (ps->adoconflict);
@@ -1454,7 +1456,7 @@ resetadoconflict (PS * ps)
ps->adoconflict = 0;
}
-static void
+static INLINE void
reset_ados (PS * ps)
{
Lit *** p;
@@ -1565,7 +1567,7 @@ tpush (PS * ps, Lit * lit)
*ps->thead++ = lit;
}
-static void
+static INLINE void
assign_reason (PS * ps, Var * v, Cls * reason)
{
#if defined(NO_BINARY_CLAUSES) && !defined(NDEBUG)
@@ -1665,7 +1667,7 @@ cmp_added (PS * ps, Lit * k, Lit * l)
return u - v; /* smaller index first */
}
-static void
+static INLINE void
sorttwolits (Lit ** v)
{
Lit * a = v[0], * b = v[1];
@@ -1689,7 +1691,7 @@ sortlits (PS * ps, Lit ** v, unsigned size)
}
#ifdef NO_BINARY_CLAUSES
-static Cls *
+static INLINE Cls *
setimpl (PS * ps, Lit * a, Lit * b)
{
assert (!ps->implvalid);
@@ -1704,7 +1706,7 @@ setimpl (PS * ps, Lit * a, Lit * b)
return &ps->impl;
}
-static void
+static INLINE void
resetimpl (PS * ps)
{
ps->implvalid = 0;
@@ -1725,7 +1727,7 @@ setcimpl (PS * ps, Lit * a, Lit * b)
return &ps->cimpl;
}
-static void
+static INLINE void
resetcimpl (PS * ps)
{
assert (ps->cimplvalid);
@@ -1734,7 +1736,7 @@ resetcimpl (PS * ps)
#endif
-static int
+static INLINE int
cmp_ptr (PS * ps, void *l, void *k)
{
(void) ps;
@@ -1831,7 +1833,7 @@ add_antecedent (PS * ps, Cls * c)
#endif /* TRACE */
-static void
+static INLINE void
add_lit (PS * ps, Lit * lit)
{
assert (lit);
@@ -1842,7 +1844,7 @@ add_lit (PS * ps, Lit * lit)
*ps->ahead++ = lit;
}
-static void
+static INLINE void
push_var_as_marked (PS * ps, Var * v)
{
if (ps->mhead == ps->eom)
@@ -1851,7 +1853,7 @@ push_var_as_marked (PS * ps, Var * v)
*ps->mhead++ = v;
}
-static void
+static INLINE void
mark_var (PS * ps, Var * v)
{
assert (!v->mark);
@@ -1960,7 +1962,7 @@ fixvar (PS * ps, Var * v)
hup (ps, r);
}
-static void
+static INLINE void
use_var (PS * ps, Var * v)
{
if (v->used)
@@ -2104,7 +2106,7 @@ zpush (PS * ps, Zhn * zhain)
*ps->zhead++ = zhain;
}
-static int
+static INLINE int
cmp_resolved (PS * ps, Cls * c, Cls * d)
{
#ifndef NDEBUG
@@ -2115,7 +2117,7 @@ cmp_resolved (PS * ps, Cls * c, Cls * d)
return CLS2IDX (c) - CLS2IDX (d);
}
-static void
+static INLINE void
bpushc (PS * ps, unsigned char ch)
{
if (ps->bhead == ps->eob)
@@ -2124,7 +2126,7 @@ bpushc (PS * ps, unsigned char ch)
*ps->bhead++ = ch;
}
-static void
+static INLINE void
bpushu (PS * ps, unsigned u)
{
while (u & ~0x7f)
@@ -2136,7 +2138,7 @@ bpushu (PS * ps, unsigned u)
bpushc (ps, u);
}
-static void
+static INLINE void
bpushd (PS * ps, unsigned prev, unsigned this)
{
unsigned delta;
@@ -2802,7 +2804,7 @@ hpush (PS * ps, Rnk * r)
hup (ps, r);
}
-static void
+static INLINE void
fix_trail_lits (PS * ps, long delta)
{
Lit **p;
@@ -2847,7 +2849,7 @@ fix_clause_lits (PS * ps, long delta)
}
}
-static void
+static INLINE void
fix_added_lits (PS * ps, long delta)
{
Lit **p;
@@ -2855,7 +2857,7 @@ fix_added_lits (PS * ps, long delta)
*p += delta;
}
-static void
+static INLINE void
fix_assumed_lits (PS * ps, long delta)
{
Lit **p;
@@ -2863,7 +2865,7 @@ fix_assumed_lits (PS * ps, long delta)
*p += delta;
}
-static void
+static INLINE void
fix_cls_lits (PS * ps, long delta)
{
Lit **p;
@@ -2871,7 +2873,7 @@ fix_cls_lits (PS * ps, long delta)
*p += delta;
}
-static void
+static INLINE void
fix_heap_rnks (PS * ps, long delta)
{
Rnk **p;
@@ -2882,7 +2884,7 @@ fix_heap_rnks (PS * ps, long delta)
#ifndef NADC
-static void
+static INLINE void
fix_ado (long delta, Lit ** ado)
{
Lit ** p;
@@ -2890,7 +2892,7 @@ fix_ado (long delta, Lit ** ado)
*p += delta;
}
-static void
+static INLINE void
fix_ados (PS * ps, long delta)
{
Lit *** p;
@@ -3051,7 +3053,7 @@ var2reason (PS * ps, Var * var)
return res;
}
-static void
+static INLINE void
mark_clause_to_be_collected (Cls * c)
{
assert (!c->collect);
@@ -3171,7 +3173,7 @@ mb (PS * ps)
return ps->current_bytes / (double) (1 << 20);
}
-static double
+static INLINE double
avglevel (PS * ps)
{
return ps->decisions ? ps->levelsum / ps->decisions : 0.0;
@@ -3497,13 +3499,13 @@ inc_activity (PS * ps, Cls * c)
*p = addflt (*p, ps->cinc);
}
-static unsigned
+static INLINE unsigned
hashlevel (unsigned l)
{
return 1u << (l & 31);
}
-static void
+static INLINE void
push (PS * ps, Var * v)
{
if (ps->dhead == ps->eod)
@@ -3512,7 +3514,7 @@ push (PS * ps, Var * v)
*ps->dhead++ = v;
}
-static Var *
+static INLINE Var *
pop (PS * ps)
{
assert (ps->dfs < ps->dhead);
@@ -4551,7 +4553,7 @@ force (PS * ps, Cls * c)
assign_forced (ps, forced, reason);
}
-static void
+static INLINE void
inc_lreduce (PS * ps)
{
#ifdef STATS
@@ -4811,7 +4813,7 @@ collect_clauses (PS * ps)
return res;
}
-static int
+static INLINE int
need_to_reduce (PS * ps)
{
return ps->nlclauses >= reduce_limit_on_lclauses (ps);
@@ -4975,7 +4977,7 @@ assign_decision (PS * ps, Lit * lit)
#ifndef NFL
-static int
+static INLINE int
lit_has_binary_clauses (PS * ps, Lit * lit)
{
#ifdef NO_BINARY_CLAUSES
@@ -4998,7 +5000,7 @@ flbcp (PS * ps)
#endif
}
-inline static int
+inline static INLINE int
cmp_inverse_rnk (PS * ps, Rnk * a, Rnk * b)
{
(void) ps;
@@ -5635,7 +5637,7 @@ init_reduce (PS * ps)
ps->prefix, ps->prefix, ps->lreduce, ps->prefix);
}
-static unsigned
+static INLINE unsigned
rng (PS * ps)
{
unsigned res = ps->srng;
@@ -6429,25 +6431,25 @@ reset_assumptions (PS * ps)
ps->adecidelevel = 0;
}
-static void
+static INLINE void
check_ready (PS * ps)
{
ABORTIF (!ps || ps->state == RESET, "API usage: uninitialized");
}
-static void
+static INLINE void
check_sat_state (PS * ps)
{
ABORTIF (ps->state != SAT, "API usage: expected to be in SAT state");
}
-static void
+static INLINE void
check_unsat_state (PS * ps)
{
ABORTIF (ps->state != UNSAT, "API usage: expected to be in UNSAT state");
}
-static void
+static INLINE void
check_sat_or_unsat_or_unknown_state (PS * ps)
{
ABORTIF (ps->state != SAT && ps->state != UNSAT && ps->state != UNKNOWN,
@@ -6525,7 +6527,7 @@ enter (PS * ps)
ps->entered = picosat_time_stamp ();
}
-static void
+static INLINE void
leave (PS * ps)
{
assert (ps->nentered);
diff --git a/test/monniaux/rules.mk b/test/monniaux/rules.mk
index c8412479..09b845e5 100644
--- a/test/monniaux/rules.mk
+++ b/test/monniaux/rules.mk
@@ -1,15 +1,17 @@
+ALL_CCOMPFLAGS=-fno-unprototyped
CCOMP=ccomp
-CCOMPFLAGS=-g -O3 -Wall -fno-unprototyped
+CCOMPFLAGS=-g -O3 -Wall $(ALL_CCOMPFLAGS) $(ALL_CFLAGS)
-CFLAGS=-g -std=c99 -O3 -Wall -Wextra -Werror=implicit
+CFLAGS=-g -std=c99 -O3 -Wall -Wextra -Werror=implicit $(ALL_CFLAGS)
K1C_CC=k1-mbr-gcc
-K1C_CFLAGS =-g -std=c99 -O2 -Wall -Wextra -Werror=implicit
+K1C_CFLAGS =-g -std=c99 -O2 -Wall -Wextra -Werror=implicit $(ALL_CFLAGS)
K1C_CCOMP = ../../../ccomp
-K1C_CCOMPFLAGS=-O3 -Wall -Wno-c11-extensions -fno-unprototyped # -fpostpass-ilp
+K1C_CCOMPFLAGS=-O3 -Wall -Wno-c11-extensions $(ALL_CCOMPFLAGS) $(ALL_CFLAGS) # -fpostpass-ilp
EXECUTE=k1-cluster --syscall=libstd_scalls.so --
+EXECUTE_CYCLES=k1-cluster --syscall=libstd_scalls.so --cycle-based --
%.gcc.host.o : %.gcc.host.s
$(CC) $(CFLAGS) -c -o $@ $<
@@ -48,7 +50,7 @@ EXECUTE=k1-cluster --syscall=libstd_scalls.so --
# $(CCOMP) $(CCOMPFLAGS) $+ -o $@
%.k1c.out : %.k1c
- k1-cluster --cycle-based -- $< |tee $@
+ $(EXECUTE_CYCLES) $< $(EXECUTE_ARGS) |tee $@
%.host.out : %.host
- ./$< |tee $@
+ ./$< $(EXECUTE_ARGS) |tee $@
diff --git a/test/monniaux/ternary/Makefile b/test/monniaux/ternary/Makefile
new file mode 100644
index 00000000..b051b397
--- /dev/null
+++ b/test/monniaux/ternary/Makefile
@@ -0,0 +1,26 @@
+include ../rules.mk
+
+PRODUCTS=ternary.gcc.host.out ternary.ccomp.host.out \
+ ternary.gcc.k1c.out ternary.ccomp.k1c.out \
+ ternary.gcc.k1c.s ternary.ccomp.k1c.s
+
+all: $(PRODUCTS)
+
+ternary.gcc.host.s ternary.ccomp.host.s ternary.gcc.k1c.s ternary.ccomp.k1c.s : ../clock.h
+
+ternary.ccomp.host: ternary.ccomp.host.o ../clock.gcc.host.o
+ $(CCOMP) $(CCOMPFLAGS) $+ -o $@
+
+ternary.gcc.host: ternary.gcc.host.o ../clock.gcc.host.o
+ $(CC) $(CFLAGS) $+ -o $@
+
+ternary.gcc.k1c: ternary.gcc.k1c.o ../clock.gcc.k1c.o
+ $(K1C_CC) $(K1C_CFLAGS) $+ -o $@
+
+ternary.ccomp.k1c: ternary.ccomp.k1c.o ../clock.gcc.k1c.o
+ $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
+
+clean:
+ -rm -f *.o *.s *.k1c
+
+.PHONY: clean
diff --git a/test/monniaux/ternary/ternary.c b/test/monniaux/ternary/ternary.c
new file mode 100644
index 00000000..79025639
--- /dev/null
+++ b/test/monniaux/ternary/ternary.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include "../clock.h"
+
+typedef uint32_t data;
+
+#if 0
+#define TERNARY(a, b, c) ((a) ? (b) : (c))
+#else
+#define TERNARY(a, b, c) (((-(a)) & (b)) | ((-1+(a)) & (c)))
+#endif
+
+data silly_computation(void) {
+ data x = 1;
+ for(int i=0; i<10000; i++) {
+ x = x * TERNARY(((x & 0x100) != 0), 45561U, 337777U);
+ }
+ return x;
+}
+
+int main() {
+ clock_prepare();
+ clock_start();
+ data result = silly_computation();
+ clock_stop();
+ printf("result=%" PRIu32 "\ncycles=%" PRIu64 "\n", result, get_total_clock());
+ return 0;
+}
diff --git a/test/monniaux/too_slow/Makefile b/test/monniaux/too_slow/Makefile
new file mode 100644
index 00000000..bdc23def
--- /dev/null
+++ b/test/monniaux/too_slow/Makefile
@@ -0,0 +1,27 @@
+include ../rules.mk
+
+PRODUCTS=memset_from_bitsliced-aes.gcc.host.out memset_from_bitsliced-aes.ccomp.host.out \
+ memset_from_bitsliced-aes.gcc.k1c.out memset_from_bitsliced-aes.ccomp.k1c.out \
+ memset_from_bitsliced-aes.gcc.k1c.s memset_from_bitsliced-aes.ccomp.k1c.s
+
+all: $(PRODUCTS)
+
+memset_from_bitsliced-aes.gcc.host.s memset_from_bitsliced-aes.ccomp.host.s memset_from_bitsliced-aes.gcc.k1c.s memset_from_bitsliced-aes.ccomp.k1c.s : ../clock.h
+
+memset_from_bitsliced-aes.ccomp.host: memset_from_bitsliced-aes.ccomp.host.o ../clock.gcc.host.o
+ $(CCOMP) $(CCOMPFLAGS) $+ -o $@
+
+memset_from_bitsliced-aes.gcc.host: memset_from_bitsliced-aes.gcc.host.o ../clock.gcc.host.o
+ $(CC) $(CFLAGS) $+ -o $@
+
+memset_from_bitsliced-aes.gcc.k1c: memset_from_bitsliced-aes.gcc.k1c.o ../clock.gcc.k1c.o
+ $(K1C_CC) $(K1C_CFLAGS) $+ -o $@
+
+memset_from_bitsliced-aes.ccomp.k1c: memset_from_bitsliced-aes.ccomp.k1c.o ../clock.gcc.k1c.o
+ $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
+
+clean:
+ -rm -f *.o *.s *.k1c
+
+.PHONY: clean
+
diff --git a/test/monniaux/too_slow/memset_from_bitsliced-aes.c b/test/monniaux/too_slow/memset_from_bitsliced-aes.c
new file mode 100644
index 00000000..32137b55
--- /dev/null
+++ b/test/monniaux/too_slow/memset_from_bitsliced-aes.c
@@ -0,0 +1,43 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include "../clock.h"
+
+typedef uint64_t a;
+a n[128];
+int o, bs_expand_key_k;
+void b(a (*)[], uint8_t *);
+void c(uint8_t d, uint8_t e, size_t f, uint8_t g, uint8_t iv) {
+ a i[1];
+ b(i, g);
+}
+
+void b(a (*i)[], uint8_t *j) {
+ for (; o < 176; o += 8) {
+ bs_expand_key_k = 4;
+ for (; bs_expand_key_k < 128; bs_expand_key_k += 128 / 64)
+ ;
+ memset(n, 0, sizeof(n));
+ }
+}
+
+void aes_ctr_test() {
+ uint8_t k = "";
+ uint8_t l = "";
+ uint8_t m = "";
+ uint8_t output[4];
+ c(output, m, 4, k, l);
+}
+
+int main(int argc, char * argv[])
+{
+ clock_prepare();
+
+ clock_start();
+
+ aes_ctr_test();
+ clock_stop();
+ print_total_clock();
+
+ return 0;
+}