aboutsummaryrefslogtreecommitdiffstats
path: root/test/monniaux/BearSSL/src/symcipher
diff options
context:
space:
mode:
Diffstat (limited to 'test/monniaux/BearSSL/src/symcipher')
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_big_cbcdec.c69
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_big_cbcenc.c67
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_big_ctr.c84
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_big_ctrcbc.c142
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_big_dec.c254
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_big_enc.c157
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_common.c112
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct.c328
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64.c398
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcdec.c104
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcenc.c81
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64_ctr.c114
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64_ctrcbc.c433
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64_dec.c159
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct64_enc.c115
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct_cbcdec.c111
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct_cbcenc.c91
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct_ctr.c116
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct_ctrcbc.c422
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct_dec.c170
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_ct_enc.c112
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_pwr8.c445
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcdec.c670
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcenc.c417
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctr.c717
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctrcbc.c946
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_small_cbcdec.c69
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_small_cbcenc.c67
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_small_ctr.c84
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_small_ctrcbc.c142
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_small_dec.c176
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_small_enc.c129
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_x86ni.c240
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcdec.c223
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcenc.c122
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctr.c211
-rw-r--r--test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctrcbc.c596
-rw-r--r--test/monniaux/BearSSL/src/symcipher/chacha20_ct.c106
-rw-r--r--test/monniaux/BearSSL/src/symcipher/chacha20_sse2.c237
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_ct.c411
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_ct_cbcdec.c87
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_ct_cbcenc.c69
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_support.c166
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_tab.c310
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_tab_cbcdec.c85
-rw-r--r--test/monniaux/BearSSL/src/symcipher/des_tab_cbcenc.c67
-rw-r--r--test/monniaux/BearSSL/src/symcipher/poly1305_ctmul.c260
-rw-r--r--test/monniaux/BearSSL/src/symcipher/poly1305_ctmul32.c297
-rw-r--r--test/monniaux/BearSSL/src/symcipher/poly1305_ctmulq.c475
-rw-r--r--test/monniaux/BearSSL/src/symcipher/poly1305_i15.c221
50 files changed, 11684 insertions, 0 deletions
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_big_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/aes_big_cbcdec.c
new file mode 100644
index 00000000..d969a3bf
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_big_cbcdec.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_big_cbcdec_vtable;
+ ctx->num_rounds = br_aes_big_keysched_inv(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[16];
+ int i;
+
+ memcpy(tmp, buf, 16);
+ br_aes_big_decrypt(ctx->num_rounds, ctx->skey, buf);
+ for (i = 0; i < 16; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ memcpy(ivbuf, tmp, 16);
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_big_cbcdec_vtable = {
+ sizeof(br_aes_big_cbcdec_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_aes_big_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_aes_big_cbcdec_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_big_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/aes_big_cbcenc.c
new file mode 100644
index 00000000..265e53b8
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_big_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_big_cbcenc_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ int i;
+
+ for (i = 0; i < 16; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ br_aes_big_encrypt(ctx->num_rounds, ctx->skey, buf);
+ memcpy(ivbuf, buf, 16);
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_big_cbcenc_vtable = {
+ sizeof(br_aes_big_cbcenc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_aes_big_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_aes_big_cbcenc_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_big_ctr.c b/test/monniaux/BearSSL/src/symcipher/aes_big_ctr.c
new file mode 100644
index 00000000..18fbb846
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_big_ctr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_big_ctr_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[16];
+
+ memcpy(tmp, iv, 12);
+ br_enc32be(tmp + 12, cc ++);
+ br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+ if (len <= 16) {
+ xorbuf(buf, tmp, len);
+ break;
+ }
+ xorbuf(buf, tmp, 16);
+ buf += 16;
+ len -= 16;
+ }
+ return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_big_ctr_vtable = {
+ sizeof(br_aes_big_ctr_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctr_class **, const void *, size_t))
+ &br_aes_big_ctr_init,
+ (uint32_t (*)(const br_block_ctr_class *const *,
+ const void *, uint32_t, void *, size_t))
+ &br_aes_big_ctr_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_big_ctrcbc.c b/test/monniaux/BearSSL/src/symcipher/aes_big_ctrcbc.c
new file mode 100644
index 00000000..d45ca769
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_big_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_big_ctrcbc_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+ void *ctr, void *data, size_t len)
+{
+ unsigned char *buf, *bctr;
+ uint32_t cc0, cc1, cc2, cc3;
+
+ buf = data;
+ bctr = ctr;
+ cc3 = br_dec32be(bctr + 0);
+ cc2 = br_dec32be(bctr + 4);
+ cc1 = br_dec32be(bctr + 8);
+ cc0 = br_dec32be(bctr + 12);
+ while (len > 0) {
+ unsigned char tmp[16];
+ uint32_t carry;
+
+ br_enc32be(tmp + 0, cc3);
+ br_enc32be(tmp + 4, cc2);
+ br_enc32be(tmp + 8, cc1);
+ br_enc32be(tmp + 12, cc0);
+ br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+ xorbuf(buf, tmp, 16);
+ buf += 16;
+ len -= 16;
+ cc0 ++;
+ carry = (~(cc0 | -cc0)) >> 31;
+ cc1 += carry;
+ carry &= (~(cc1 | -cc1)) >> 31;
+ cc2 += carry;
+ carry &= (~(cc2 | -cc2)) >> 31;
+ cc3 += carry;
+ }
+ br_enc32be(bctr + 0, cc3);
+ br_enc32be(bctr + 4, cc2);
+ br_enc32be(bctr + 8, cc1);
+ br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+ void *cbcmac, const void *data, size_t len)
+{
+ const unsigned char *buf;
+
+ buf = data;
+ while (len > 0) {
+ xorbuf(cbcmac, buf, 16);
+ br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+ br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+ br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = {
+ sizeof(br_aes_big_ctrcbc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+ &br_aes_big_ctrcbc_init,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_big_ctrcbc_encrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_big_ctrcbc_decrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, size_t))
+ &br_aes_big_ctrcbc_ctr,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, const void *, size_t))
+ &br_aes_big_ctrcbc_mac
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_big_dec.c b/test/monniaux/BearSSL/src/symcipher/aes_big_dec.c
new file mode 100644
index 00000000..a5d0e3c6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_big_dec.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box (used in key schedule for decryption).
+ */
+static const unsigned char iS[] = {
+ 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+ 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+ 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+ 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+ 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+ 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+ 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+ 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+ 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+ 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+ 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+ 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+ 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+ 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+ 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+ 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+ 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+ 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+ 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+ 0x55, 0x21, 0x0C, 0x7D
+};
+
+static const uint32_t iSsm0[] = {
+ 0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1,
+ 0xACFA58AB, 0x4BE30393, 0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25,
+ 0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F, 0xDEB15A49, 0x25BA1B67,
+ 0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6,
+ 0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3,
+ 0x49E06929, 0x8EC9C844, 0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD,
+ 0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4, 0x63DF4A18, 0xE51A3182,
+ 0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94,
+ 0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2,
+ 0xE31F8F57, 0x6655AB2A, 0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5,
+ 0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C, 0x8ACF1C2B, 0xA779B492,
+ 0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A,
+ 0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA,
+ 0x5E719F06, 0xBD6E1051, 0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46,
+ 0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF, 0x1998FB24, 0xD6BDE997,
+ 0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB,
+ 0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48,
+ 0x1E1170AC, 0x6C5A724E, 0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927,
+ 0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A, 0x0C0A67B1, 0x9357E70F,
+ 0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16,
+ 0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD,
+ 0x2DB6A8B9, 0x141EA9C8, 0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD,
+ 0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34, 0x8B432976, 0xCB23C6DC,
+ 0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120,
+ 0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3,
+ 0x0D8652EC, 0x77C1E3D0, 0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422,
+ 0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF, 0x87494EC7, 0xD938D1C1,
+ 0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4,
+ 0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8,
+ 0x2E39F75E, 0x82C3AFF5, 0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3,
+ 0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B, 0xCD267809, 0x6E5918F4,
+ 0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6,
+ 0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331,
+ 0xC6A59430, 0x35A266C0, 0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815,
+ 0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F, 0x764DD68D, 0x43EFB04D,
+ 0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F,
+ 0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252,
+ 0xE9105633, 0x6DD64713, 0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89,
+ 0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C, 0x9CD2DF59, 0x55F2733F,
+ 0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86,
+ 0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C,
+ 0x283C498B, 0xFF0D9541, 0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190,
+ 0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742
+};
+
+static unsigned
+mul2(unsigned x)
+{
+ x <<= 1;
+ return x ^ ((unsigned)(-(int)(x >> 8)) & 0x11B);
+}
+
+static unsigned
+mul9(unsigned x)
+{
+ return x ^ mul2(mul2(mul2(x)));
+}
+
+static unsigned
+mulb(unsigned x)
+{
+ unsigned x2;
+
+ x2 = mul2(x);
+ return x ^ x2 ^ mul2(mul2(x2));
+}
+
+static unsigned
+muld(unsigned x)
+{
+ unsigned x4;
+
+ x4 = mul2(mul2(x));
+ return x ^ x4 ^ mul2(x4);
+}
+
+static unsigned
+mule(unsigned x)
+{
+ unsigned x2, x4;
+
+ x2 = mul2(x);
+ x4 = mul2(x2);
+ return x2 ^ x4 ^ mul2(x4);
+}
+
+/* see inner.h */
+unsigned
+br_aes_big_keysched_inv(uint32_t *skey, const void *key, size_t key_len)
+{
+ unsigned num_rounds;
+ int i, m;
+
+ /*
+ * Sub-keys for decryption are distinct from encryption sub-keys
+ * in that InvMixColumns() is already applied for the inner
+ * rounds.
+ */
+ num_rounds = br_aes_keysched(skey, key, key_len);
+ m = (int)(num_rounds << 2);
+ for (i = 4; i < m; i ++) {
+ uint32_t p;
+ unsigned p0, p1, p2, p3;
+ uint32_t q0, q1, q2, q3;
+
+ p = skey[i];
+ p0 = p >> 24;
+ p1 = (p >> 16) & 0xFF;
+ p2 = (p >> 8) & 0xFF;
+ p3 = p & 0xFF;
+ q0 = mule(p0) ^ mulb(p1) ^ muld(p2) ^ mul9(p3);
+ q1 = mul9(p0) ^ mule(p1) ^ mulb(p2) ^ muld(p3);
+ q2 = muld(p0) ^ mul9(p1) ^ mule(p2) ^ mulb(p3);
+ q3 = mulb(p0) ^ muld(p1) ^ mul9(p2) ^ mule(p3);
+ skey[i] = (q0 << 24) | (q1 << 16) | (q2 << 8) | q3;
+ }
+ return num_rounds;
+}
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+ return (x << (32 - n)) | (x >> n);
+}
+
+#define iSboxExt0(x) (iSsm0[x])
+#define iSboxExt1(x) (rotr(iSsm0[x], 8))
+#define iSboxExt2(x) (rotr(iSsm0[x], 16))
+#define iSboxExt3(x) (rotr(iSsm0[x], 24))
+
+/* see bearssl.h */
+void
+br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+ unsigned char *buf;
+ uint32_t s0, s1, s2, s3;
+ uint32_t t0, t1, t2, t3;
+ unsigned u;
+
+ buf = data;
+ s0 = br_dec32be(buf);
+ s1 = br_dec32be(buf + 4);
+ s2 = br_dec32be(buf + 8);
+ s3 = br_dec32be(buf + 12);
+ s0 ^= skey[(num_rounds << 2) + 0];
+ s1 ^= skey[(num_rounds << 2) + 1];
+ s2 ^= skey[(num_rounds << 2) + 2];
+ s3 ^= skey[(num_rounds << 2) + 3];
+ for (u = num_rounds - 1; u > 0; u --) {
+ uint32_t v0 = iSboxExt0(s0 >> 24)
+ ^ iSboxExt1((s3 >> 16) & 0xFF)
+ ^ iSboxExt2((s2 >> 8) & 0xFF)
+ ^ iSboxExt3(s1 & 0xFF);
+ uint32_t v1 = iSboxExt0(s1 >> 24)
+ ^ iSboxExt1((s0 >> 16) & 0xFF)
+ ^ iSboxExt2((s3 >> 8) & 0xFF)
+ ^ iSboxExt3(s2 & 0xFF);
+ uint32_t v2 = iSboxExt0(s2 >> 24)
+ ^ iSboxExt1((s1 >> 16) & 0xFF)
+ ^ iSboxExt2((s0 >> 8) & 0xFF)
+ ^ iSboxExt3(s3 & 0xFF);
+ uint32_t v3 = iSboxExt0(s3 >> 24)
+ ^ iSboxExt1((s2 >> 16) & 0xFF)
+ ^ iSboxExt2((s1 >> 8) & 0xFF)
+ ^ iSboxExt3(s0 & 0xFF);
+ s0 = v0;
+ s1 = v1;
+ s2 = v2;
+ s3 = v3;
+ s0 ^= skey[u << 2];
+ s1 ^= skey[(u << 2) + 1];
+ s2 ^= skey[(u << 2) + 2];
+ s3 ^= skey[(u << 2) + 3];
+ }
+ t0 = ((uint32_t)iS[s0 >> 24] << 24)
+ | ((uint32_t)iS[(s3 >> 16) & 0xFF] << 16)
+ | ((uint32_t)iS[(s2 >> 8) & 0xFF] << 8)
+ | (uint32_t)iS[s1 & 0xFF];
+ t1 = ((uint32_t)iS[s1 >> 24] << 24)
+ | ((uint32_t)iS[(s0 >> 16) & 0xFF] << 16)
+ | ((uint32_t)iS[(s3 >> 8) & 0xFF] << 8)
+ | (uint32_t)iS[s2 & 0xFF];
+ t2 = ((uint32_t)iS[s2 >> 24] << 24)
+ | ((uint32_t)iS[(s1 >> 16) & 0xFF] << 16)
+ | ((uint32_t)iS[(s0 >> 8) & 0xFF] << 8)
+ | (uint32_t)iS[s3 & 0xFF];
+ t3 = ((uint32_t)iS[s3 >> 24] << 24)
+ | ((uint32_t)iS[(s2 >> 16) & 0xFF] << 16)
+ | ((uint32_t)iS[(s1 >> 8) & 0xFF] << 8)
+ | (uint32_t)iS[s0 & 0xFF];
+ s0 = t0 ^ skey[0];
+ s1 = t1 ^ skey[1];
+ s2 = t2 ^ skey[2];
+ s3 = t3 ^ skey[3];
+ br_enc32be(buf, s0);
+ br_enc32be(buf + 4, s1);
+ br_enc32be(buf + 8, s2);
+ br_enc32be(buf + 12, s3);
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_big_enc.c b/test/monniaux/BearSSL/src/symcipher/aes_big_enc.c
new file mode 100644
index 00000000..bbabb9a6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_big_enc.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S br_aes_S
+
+static const uint32_t Ssm0[] = {
+ 0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD,
+ 0xDE6F6FB1, 0x91C5C554, 0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D,
+ 0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A, 0x8FCACA45, 0x1F82829D,
+ 0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B,
+ 0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7,
+ 0xE4727296, 0x9BC0C05B, 0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A,
+ 0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F, 0x6834345C, 0x51A5A5F4,
+ 0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F,
+ 0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1,
+ 0x0A05050F, 0x2F9A9AB5, 0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D,
+ 0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F, 0x1209091B, 0x1D83839E,
+ 0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB,
+ 0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E,
+ 0x5E2F2F71, 0x13848497, 0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C,
+ 0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED, 0xD46A6ABE, 0x8DCBCB46,
+ 0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A,
+ 0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7,
+ 0x66333355, 0x11858594, 0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81,
+ 0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3, 0xA25151F3, 0x5DA3A3FE,
+ 0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504,
+ 0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A,
+ 0xFDF3F30E, 0xBFD2D26D, 0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F,
+ 0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739, 0x93C4C457, 0x55A7A7F2,
+ 0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395,
+ 0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E,
+ 0x3B9090AB, 0x0B888883, 0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C,
+ 0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76, 0xDBE0E03B, 0x64323256,
+ 0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4,
+ 0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4,
+ 0xD3E4E437, 0xF279798B, 0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7,
+ 0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0, 0xD86C6CB4, 0xAC5656FA,
+ 0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818,
+ 0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1,
+ 0x73B4B4C7, 0x97C6C651, 0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21,
+ 0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85, 0xE0707090, 0x7C3E3E42,
+ 0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12,
+ 0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158,
+ 0x3A1D1D27, 0x279E9EB9, 0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133,
+ 0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7, 0x2D9B9BB6, 0x3C1E1E22,
+ 0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A,
+ 0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631,
+ 0x844242C6, 0xD06868B8, 0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11,
+ 0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A
+};
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+ return (x << (32 - n)) | (x >> n);
+}
+
+#define SboxExt0(x) (Ssm0[x])
+#define SboxExt1(x) (rotr(Ssm0[x], 8))
+#define SboxExt2(x) (rotr(Ssm0[x], 16))
+#define SboxExt3(x) (rotr(Ssm0[x], 24))
+
+
+/* see bearssl.h */
+void
+br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+ unsigned char *buf;
+ uint32_t s0, s1, s2, s3;
+ uint32_t t0, t1, t2, t3;
+ unsigned u;
+
+ buf = data;
+ s0 = br_dec32be(buf);
+ s1 = br_dec32be(buf + 4);
+ s2 = br_dec32be(buf + 8);
+ s3 = br_dec32be(buf + 12);
+ s0 ^= skey[0];
+ s1 ^= skey[1];
+ s2 ^= skey[2];
+ s3 ^= skey[3];
+ for (u = 1; u < num_rounds; u ++) {
+ uint32_t v0, v1, v2, v3;
+
+ v0 = SboxExt0(s0 >> 24)
+ ^ SboxExt1((s1 >> 16) & 0xFF)
+ ^ SboxExt2((s2 >> 8) & 0xFF)
+ ^ SboxExt3(s3 & 0xFF);
+ v1 = SboxExt0(s1 >> 24)
+ ^ SboxExt1((s2 >> 16) & 0xFF)
+ ^ SboxExt2((s3 >> 8) & 0xFF)
+ ^ SboxExt3(s0 & 0xFF);
+ v2 = SboxExt0(s2 >> 24)
+ ^ SboxExt1((s3 >> 16) & 0xFF)
+ ^ SboxExt2((s0 >> 8) & 0xFF)
+ ^ SboxExt3(s1 & 0xFF);
+ v3 = SboxExt0(s3 >> 24)
+ ^ SboxExt1((s0 >> 16) & 0xFF)
+ ^ SboxExt2((s1 >> 8) & 0xFF)
+ ^ SboxExt3(s2 & 0xFF);
+ s0 = v0;
+ s1 = v1;
+ s2 = v2;
+ s3 = v3;
+ s0 ^= skey[u << 2];
+ s1 ^= skey[(u << 2) + 1];
+ s2 ^= skey[(u << 2) + 2];
+ s3 ^= skey[(u << 2) + 3];
+ }
+ t0 = ((uint32_t)S[s0 >> 24] << 24)
+ | ((uint32_t)S[(s1 >> 16) & 0xFF] << 16)
+ | ((uint32_t)S[(s2 >> 8) & 0xFF] << 8)
+ | (uint32_t)S[s3 & 0xFF];
+ t1 = ((uint32_t)S[s1 >> 24] << 24)
+ | ((uint32_t)S[(s2 >> 16) & 0xFF] << 16)
+ | ((uint32_t)S[(s3 >> 8) & 0xFF] << 8)
+ | (uint32_t)S[s0 & 0xFF];
+ t2 = ((uint32_t)S[s2 >> 24] << 24)
+ | ((uint32_t)S[(s3 >> 16) & 0xFF] << 16)
+ | ((uint32_t)S[(s0 >> 8) & 0xFF] << 8)
+ | (uint32_t)S[s1 & 0xFF];
+ t3 = ((uint32_t)S[s3 >> 24] << 24)
+ | ((uint32_t)S[(s0 >> 16) & 0xFF] << 16)
+ | ((uint32_t)S[(s1 >> 8) & 0xFF] << 8)
+ | (uint32_t)S[s2 & 0xFF];
+ s0 = t0 ^ skey[num_rounds << 2];
+ s1 = t1 ^ skey[(num_rounds << 2) + 1];
+ s2 = t2 ^ skey[(num_rounds << 2) + 2];
+ s3 = t3 ^ skey[(num_rounds << 2) + 3];
+ br_enc32be(buf, s0);
+ br_enc32be(buf + 4, s1);
+ br_enc32be(buf + 8, s2);
+ br_enc32be(buf + 12, s3);
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_common.c b/test/monniaux/BearSSL/src/symcipher/aes_common.c
new file mode 100644
index 00000000..72c64fb1
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_common.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const uint32_t Rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+ 0x40000000, 0x80000000, 0x1B000000, 0x36000000
+};
+
+#define S br_aes_S
+
+/* see inner.h */
+const unsigned char br_aes_S[] = {
+ 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
+ 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+ 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
+ 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
+ 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+ 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
+ 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
+ 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+ 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
+ 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
+ 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+ 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
+ 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
+ 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+ 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
+ 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
+ 0xB0, 0x54, 0xBB, 0x16
+};
+
+static uint32_t
+SubWord(uint32_t x)
+{
+ return ((uint32_t)S[x >> 24] << 24)
+ | ((uint32_t)S[(x >> 16) & 0xFF] << 16)
+ | ((uint32_t)S[(x >> 8) & 0xFF] << 8)
+ | (uint32_t)S[x & 0xFF];
+}
+
+/* see inner.h */
+unsigned
+br_aes_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+ unsigned num_rounds;
+ int i, j, k, nk, nkf;
+
+ switch (key_len) {
+ case 16:
+ num_rounds = 10;
+ break;
+ case 24:
+ num_rounds = 12;
+ break;
+ case 32:
+ num_rounds = 14;
+ break;
+ default:
+ /* abort(); */
+ return 0;
+ }
+ nk = (int)(key_len >> 2);
+ nkf = (int)((num_rounds + 1) << 2);
+ for (i = 0; i < nk; i ++) {
+ skey[i] = br_dec32be((const unsigned char *)key + (i << 2));
+ }
+ for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+ uint32_t tmp;
+
+ tmp = skey[i - 1];
+ if (j == 0) {
+ tmp = (tmp << 8) | (tmp >> 24);
+ tmp = SubWord(tmp) ^ Rcon[k];
+ } else if (nk > 6 && j == 4) {
+ tmp = SubWord(tmp);
+ }
+ skey[i] = skey[i - nk] ^ tmp;
+ if (++ j == nk) {
+ j = 0;
+ k ++;
+ }
+ }
+ return num_rounds;
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct.c b/test/monniaux/BearSSL/src/symcipher/aes_ct.c
new file mode 100644
index 00000000..66776d9e
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_Sbox(uint32_t *q)
+{
+ /*
+ * This S-box implementation is a straightforward translation of
+ * the circuit described by Boyar and Peralta in "A new
+ * combinational logic minimization technique with applications
+ * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+ *
+ * Note that variables x* (input) and s* (output) are numbered
+ * in "reverse" order (x0 is the high bit, x7 is the low bit).
+ */
+
+ uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
+ uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+ uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+ uint32_t y20, y21;
+ uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+ uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
+ uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+ uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+ uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+ uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+ uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+ uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+ uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
+ uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ x0 = q[7];
+ x1 = q[6];
+ x2 = q[5];
+ x3 = q[4];
+ x4 = q[3];
+ x5 = q[2];
+ x6 = q[1];
+ x7 = q[0];
+
+ /*
+ * Top linear transformation.
+ */
+ y14 = x3 ^ x5;
+ y13 = x0 ^ x6;
+ y9 = x0 ^ x3;
+ y8 = x0 ^ x5;
+ t0 = x1 ^ x2;
+ y1 = t0 ^ x7;
+ y4 = y1 ^ x3;
+ y12 = y13 ^ y14;
+ y2 = y1 ^ x0;
+ y5 = y1 ^ x6;
+ y3 = y5 ^ y8;
+ t1 = x4 ^ y12;
+ y15 = t1 ^ x5;
+ y20 = t1 ^ x1;
+ y6 = y15 ^ x7;
+ y10 = y15 ^ t0;
+ y11 = y20 ^ y9;
+ y7 = x7 ^ y11;
+ y17 = y10 ^ y11;
+ y19 = y10 ^ y8;
+ y16 = t0 ^ y11;
+ y21 = y13 ^ y16;
+ y18 = x0 ^ y16;
+
+ /*
+ * Non-linear section.
+ */
+ t2 = y12 & y15;
+ t3 = y3 & y6;
+ t4 = t3 ^ t2;
+ t5 = y4 & x7;
+ t6 = t5 ^ t2;
+ t7 = y13 & y16;
+ t8 = y5 & y1;
+ t9 = t8 ^ t7;
+ t10 = y2 & y7;
+ t11 = t10 ^ t7;
+ t12 = y9 & y11;
+ t13 = y14 & y17;
+ t14 = t13 ^ t12;
+ t15 = y8 & y10;
+ t16 = t15 ^ t12;
+ t17 = t4 ^ t14;
+ t18 = t6 ^ t16;
+ t19 = t9 ^ t14;
+ t20 = t11 ^ t16;
+ t21 = t17 ^ y20;
+ t22 = t18 ^ y19;
+ t23 = t19 ^ y21;
+ t24 = t20 ^ y18;
+
+ t25 = t21 ^ t22;
+ t26 = t21 & t23;
+ t27 = t24 ^ t26;
+ t28 = t25 & t27;
+ t29 = t28 ^ t22;
+ t30 = t23 ^ t24;
+ t31 = t22 ^ t26;
+ t32 = t31 & t30;
+ t33 = t32 ^ t24;
+ t34 = t23 ^ t33;
+ t35 = t27 ^ t33;
+ t36 = t24 & t35;
+ t37 = t36 ^ t34;
+ t38 = t27 ^ t36;
+ t39 = t29 & t38;
+ t40 = t25 ^ t39;
+
+ t41 = t40 ^ t37;
+ t42 = t29 ^ t33;
+ t43 = t29 ^ t40;
+ t44 = t33 ^ t37;
+ t45 = t42 ^ t41;
+ z0 = t44 & y15;
+ z1 = t37 & y6;
+ z2 = t33 & x7;
+ z3 = t43 & y16;
+ z4 = t40 & y1;
+ z5 = t29 & y7;
+ z6 = t42 & y11;
+ z7 = t45 & y17;
+ z8 = t41 & y10;
+ z9 = t44 & y12;
+ z10 = t37 & y3;
+ z11 = t33 & y4;
+ z12 = t43 & y13;
+ z13 = t40 & y5;
+ z14 = t29 & y2;
+ z15 = t42 & y9;
+ z16 = t45 & y14;
+ z17 = t41 & y8;
+
+ /*
+ * Bottom linear transformation.
+ */
+ t46 = z15 ^ z16;
+ t47 = z10 ^ z11;
+ t48 = z5 ^ z13;
+ t49 = z9 ^ z10;
+ t50 = z2 ^ z12;
+ t51 = z2 ^ z5;
+ t52 = z7 ^ z8;
+ t53 = z0 ^ z3;
+ t54 = z6 ^ z7;
+ t55 = z16 ^ z17;
+ t56 = z12 ^ t48;
+ t57 = t50 ^ t53;
+ t58 = z4 ^ t46;
+ t59 = z3 ^ t54;
+ t60 = t46 ^ t57;
+ t61 = z14 ^ t57;
+ t62 = t52 ^ t58;
+ t63 = t49 ^ t58;
+ t64 = z4 ^ t59;
+ t65 = t61 ^ t62;
+ t66 = z1 ^ t63;
+ s0 = t59 ^ t63;
+ s6 = t56 ^ ~t62;
+ s7 = t48 ^ ~t60;
+ t67 = t64 ^ t65;
+ s3 = t53 ^ t66;
+ s4 = t51 ^ t66;
+ s5 = t47 ^ t65;
+ s1 = t64 ^ ~s3;
+ s2 = t55 ^ ~t67;
+
+ q[7] = s0;
+ q[6] = s1;
+ q[5] = s2;
+ q[4] = s3;
+ q[3] = s4;
+ q[2] = s5;
+ q[1] = s6;
+ q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct_ortho(uint32_t *q)
+{
+#define SWAPN(cl, ch, s, x, y) do { \
+ uint32_t a, b; \
+ a = (x); \
+ b = (y); \
+ (x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \
+ (y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \
+ } while (0)
+
+#define SWAP2(x, y) SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)
+#define SWAP4(x, y) SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)
+#define SWAP8(x, y) SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
+
+ SWAP2(q[0], q[1]);
+ SWAP2(q[2], q[3]);
+ SWAP2(q[4], q[5]);
+ SWAP2(q[6], q[7]);
+
+ SWAP4(q[0], q[2]);
+ SWAP4(q[1], q[3]);
+ SWAP4(q[4], q[6]);
+ SWAP4(q[5], q[7]);
+
+ SWAP8(q[0], q[4]);
+ SWAP8(q[1], q[5]);
+ SWAP8(q[2], q[6]);
+ SWAP8(q[3], q[7]);
+}
+
+static const unsigned char Rcon[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+ uint32_t q[8];
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ q[i] = x;
+ }
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_Sbox(q);
+ br_aes_ct_ortho(q);
+ return q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len)
+{
+ unsigned num_rounds;
+ int i, j, k, nk, nkf;
+ uint32_t tmp;
+ uint32_t skey[120];
+
+ switch (key_len) {
+ case 16:
+ num_rounds = 10;
+ break;
+ case 24:
+ num_rounds = 12;
+ break;
+ case 32:
+ num_rounds = 14;
+ break;
+ default:
+ /* abort(); */
+ return 0;
+ }
+ nk = (int)(key_len >> 2);
+ nkf = (int)((num_rounds + 1) << 2);
+ tmp = 0;
+ for (i = 0; i < nk; i ++) {
+ tmp = br_dec32le((const unsigned char *)key + (i << 2));
+ skey[(i << 1) + 0] = tmp;
+ skey[(i << 1) + 1] = tmp;
+ }
+ for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+ if (j == 0) {
+ tmp = (tmp << 24) | (tmp >> 8);
+ tmp = sub_word(tmp) ^ Rcon[k];
+ } else if (nk > 6 && j == 4) {
+ tmp = sub_word(tmp);
+ }
+ tmp ^= skey[(i - nk) << 1];
+ skey[(i << 1) + 0] = tmp;
+ skey[(i << 1) + 1] = tmp;
+ if (++ j == nk) {
+ j = 0;
+ k ++;
+ }
+ }
+ for (i = 0; i < nkf; i += 4) {
+ br_aes_ct_ortho(skey + (i << 1));
+ }
+ for (i = 0, j = 0; i < nkf; i ++, j += 2) {
+ comp_skey[i] = (skey[j + 0] & 0x55555555)
+ | (skey[j + 1] & 0xAAAAAAAA);
+ }
+ return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct_skey_expand(uint32_t *skey,
+ unsigned num_rounds, const uint32_t *comp_skey)
+{
+ unsigned u, v, n;
+
+ n = (num_rounds + 1) << 2;
+ for (u = 0, v = 0; u < n; u ++, v += 2) {
+ uint32_t x, y;
+
+ x = y = comp_skey[u];
+ x &= 0x55555555;
+ skey[v + 0] = x | (x << 1);
+ y &= 0xAAAAAAAA;
+ skey[v + 1] = y | (y >> 1);
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64.c
new file mode 100644
index 00000000..15238116
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_Sbox(uint64_t *q)
+{
+ /*
+ * This S-box implementation is a straightforward translation of
+ * the circuit described by Boyar and Peralta in "A new
+ * combinational logic minimization technique with applications
+ * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+ *
+ * Note that variables x* (input) and s* (output) are numbered
+ * in "reverse" order (x0 is the high bit, x7 is the low bit).
+ */
+
+ uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
+ uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+ uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+ uint64_t y20, y21;
+ uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+ uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
+ uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+ uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+ uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+ uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+ uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+ uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+ uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
+ uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ x0 = q[7];
+ x1 = q[6];
+ x2 = q[5];
+ x3 = q[4];
+ x4 = q[3];
+ x5 = q[2];
+ x6 = q[1];
+ x7 = q[0];
+
+ /*
+ * Top linear transformation.
+ */
+ y14 = x3 ^ x5;
+ y13 = x0 ^ x6;
+ y9 = x0 ^ x3;
+ y8 = x0 ^ x5;
+ t0 = x1 ^ x2;
+ y1 = t0 ^ x7;
+ y4 = y1 ^ x3;
+ y12 = y13 ^ y14;
+ y2 = y1 ^ x0;
+ y5 = y1 ^ x6;
+ y3 = y5 ^ y8;
+ t1 = x4 ^ y12;
+ y15 = t1 ^ x5;
+ y20 = t1 ^ x1;
+ y6 = y15 ^ x7;
+ y10 = y15 ^ t0;
+ y11 = y20 ^ y9;
+ y7 = x7 ^ y11;
+ y17 = y10 ^ y11;
+ y19 = y10 ^ y8;
+ y16 = t0 ^ y11;
+ y21 = y13 ^ y16;
+ y18 = x0 ^ y16;
+
+ /*
+ * Non-linear section.
+ */
+ t2 = y12 & y15;
+ t3 = y3 & y6;
+ t4 = t3 ^ t2;
+ t5 = y4 & x7;
+ t6 = t5 ^ t2;
+ t7 = y13 & y16;
+ t8 = y5 & y1;
+ t9 = t8 ^ t7;
+ t10 = y2 & y7;
+ t11 = t10 ^ t7;
+ t12 = y9 & y11;
+ t13 = y14 & y17;
+ t14 = t13 ^ t12;
+ t15 = y8 & y10;
+ t16 = t15 ^ t12;
+ t17 = t4 ^ t14;
+ t18 = t6 ^ t16;
+ t19 = t9 ^ t14;
+ t20 = t11 ^ t16;
+ t21 = t17 ^ y20;
+ t22 = t18 ^ y19;
+ t23 = t19 ^ y21;
+ t24 = t20 ^ y18;
+
+ t25 = t21 ^ t22;
+ t26 = t21 & t23;
+ t27 = t24 ^ t26;
+ t28 = t25 & t27;
+ t29 = t28 ^ t22;
+ t30 = t23 ^ t24;
+ t31 = t22 ^ t26;
+ t32 = t31 & t30;
+ t33 = t32 ^ t24;
+ t34 = t23 ^ t33;
+ t35 = t27 ^ t33;
+ t36 = t24 & t35;
+ t37 = t36 ^ t34;
+ t38 = t27 ^ t36;
+ t39 = t29 & t38;
+ t40 = t25 ^ t39;
+
+ t41 = t40 ^ t37;
+ t42 = t29 ^ t33;
+ t43 = t29 ^ t40;
+ t44 = t33 ^ t37;
+ t45 = t42 ^ t41;
+ z0 = t44 & y15;
+ z1 = t37 & y6;
+ z2 = t33 & x7;
+ z3 = t43 & y16;
+ z4 = t40 & y1;
+ z5 = t29 & y7;
+ z6 = t42 & y11;
+ z7 = t45 & y17;
+ z8 = t41 & y10;
+ z9 = t44 & y12;
+ z10 = t37 & y3;
+ z11 = t33 & y4;
+ z12 = t43 & y13;
+ z13 = t40 & y5;
+ z14 = t29 & y2;
+ z15 = t42 & y9;
+ z16 = t45 & y14;
+ z17 = t41 & y8;
+
+ /*
+ * Bottom linear transformation.
+ */
+ t46 = z15 ^ z16;
+ t47 = z10 ^ z11;
+ t48 = z5 ^ z13;
+ t49 = z9 ^ z10;
+ t50 = z2 ^ z12;
+ t51 = z2 ^ z5;
+ t52 = z7 ^ z8;
+ t53 = z0 ^ z3;
+ t54 = z6 ^ z7;
+ t55 = z16 ^ z17;
+ t56 = z12 ^ t48;
+ t57 = t50 ^ t53;
+ t58 = z4 ^ t46;
+ t59 = z3 ^ t54;
+ t60 = t46 ^ t57;
+ t61 = z14 ^ t57;
+ t62 = t52 ^ t58;
+ t63 = t49 ^ t58;
+ t64 = z4 ^ t59;
+ t65 = t61 ^ t62;
+ t66 = z1 ^ t63;
+ s0 = t59 ^ t63;
+ s6 = t56 ^ ~t62;
+ s7 = t48 ^ ~t60;
+ t67 = t64 ^ t65;
+ s3 = t53 ^ t66;
+ s4 = t51 ^ t66;
+ s5 = t47 ^ t65;
+ s1 = t64 ^ ~s3;
+ s2 = t55 ^ ~t67;
+
+ q[7] = s0;
+ q[6] = s1;
+ q[5] = s2;
+ q[4] = s3;
+ q[3] = s4;
+ q[2] = s5;
+ q[1] = s6;
+ q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_ortho(uint64_t *q)
+{
+#define SWAPN(cl, ch, s, x, y) do { \
+ uint64_t a, b; \
+ a = (x); \
+ b = (y); \
+ (x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
+ (y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
+ } while (0)
+
+#define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
+#define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
+#define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
+
+ SWAP2(q[0], q[1]);
+ SWAP2(q[2], q[3]);
+ SWAP2(q[4], q[5]);
+ SWAP2(q[6], q[7]);
+
+ SWAP4(q[0], q[2]);
+ SWAP4(q[1], q[3]);
+ SWAP4(q[4], q[6]);
+ SWAP4(q[5], q[7]);
+
+ SWAP8(q[0], q[4]);
+ SWAP8(q[1], q[5]);
+ SWAP8(q[2], q[6]);
+ SWAP8(q[3], q[7]);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
+{
+ uint64_t x0, x1, x2, x3;
+
+ x0 = w[0];
+ x1 = w[1];
+ x2 = w[2];
+ x3 = w[3];
+ x0 |= (x0 << 16);
+ x1 |= (x1 << 16);
+ x2 |= (x2 << 16);
+ x3 |= (x3 << 16);
+ x0 &= (uint64_t)0x0000FFFF0000FFFF;
+ x1 &= (uint64_t)0x0000FFFF0000FFFF;
+ x2 &= (uint64_t)0x0000FFFF0000FFFF;
+ x3 &= (uint64_t)0x0000FFFF0000FFFF;
+ x0 |= (x0 << 8);
+ x1 |= (x1 << 8);
+ x2 |= (x2 << 8);
+ x3 |= (x3 << 8);
+ x0 &= (uint64_t)0x00FF00FF00FF00FF;
+ x1 &= (uint64_t)0x00FF00FF00FF00FF;
+ x2 &= (uint64_t)0x00FF00FF00FF00FF;
+ x3 &= (uint64_t)0x00FF00FF00FF00FF;
+ *q0 = x0 | (x2 << 8);
+ *q1 = x1 | (x3 << 8);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
+{
+ uint64_t x0, x1, x2, x3;
+
+ x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
+ x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
+ x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+ x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+ x0 |= (x0 >> 8);
+ x1 |= (x1 >> 8);
+ x2 |= (x2 >> 8);
+ x3 |= (x3 >> 8);
+ x0 &= (uint64_t)0x0000FFFF0000FFFF;
+ x1 &= (uint64_t)0x0000FFFF0000FFFF;
+ x2 &= (uint64_t)0x0000FFFF0000FFFF;
+ x3 &= (uint64_t)0x0000FFFF0000FFFF;
+ w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
+ w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
+ w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
+ w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
+}
+
+static const unsigned char Rcon[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+ uint64_t q[8];
+
+ memset(q, 0, sizeof q);
+ q[0] = x;
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_Sbox(q);
+ br_aes_ct64_ortho(q);
+ return (uint32_t)q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len)
+{
+ unsigned num_rounds;
+ int i, j, k, nk, nkf;
+ uint32_t tmp;
+ uint32_t skey[60];
+
+ switch (key_len) {
+ case 16:
+ num_rounds = 10;
+ break;
+ case 24:
+ num_rounds = 12;
+ break;
+ case 32:
+ num_rounds = 14;
+ break;
+ default:
+ /* abort(); */
+ return 0;
+ }
+ nk = (int)(key_len >> 2);
+ nkf = (int)((num_rounds + 1) << 2);
+ br_range_dec32le(skey, (key_len >> 2), key);
+ tmp = skey[(key_len >> 2) - 1];
+ for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+ if (j == 0) {
+ tmp = (tmp << 24) | (tmp >> 8);
+ tmp = sub_word(tmp) ^ Rcon[k];
+ } else if (nk > 6 && j == 4) {
+ tmp = sub_word(tmp);
+ }
+ tmp ^= skey[i - nk];
+ skey[i] = tmp;
+ if (++ j == nk) {
+ j = 0;
+ k ++;
+ }
+ }
+
+ for (i = 0, j = 0; i < nkf; i += 4, j += 2) {
+ uint64_t q[8];
+
+ br_aes_ct64_interleave_in(&q[0], &q[4], skey + i);
+ q[1] = q[0];
+ q[2] = q[0];
+ q[3] = q[0];
+ q[5] = q[4];
+ q[6] = q[4];
+ q[7] = q[4];
+ br_aes_ct64_ortho(q);
+ comp_skey[j + 0] =
+ (q[0] & (uint64_t)0x1111111111111111)
+ | (q[1] & (uint64_t)0x2222222222222222)
+ | (q[2] & (uint64_t)0x4444444444444444)
+ | (q[3] & (uint64_t)0x8888888888888888);
+ comp_skey[j + 1] =
+ (q[4] & (uint64_t)0x1111111111111111)
+ | (q[5] & (uint64_t)0x2222222222222222)
+ | (q[6] & (uint64_t)0x4444444444444444)
+ | (q[7] & (uint64_t)0x8888888888888888);
+ }
+ return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_skey_expand(uint64_t *skey,
+ unsigned num_rounds, const uint64_t *comp_skey)
+{
+ unsigned u, v, n;
+
+ n = (num_rounds + 1) << 1;
+ for (u = 0, v = 0; u < n; u ++, v += 4) {
+ uint64_t x0, x1, x2, x3;
+
+ x0 = x1 = x2 = x3 = comp_skey[u];
+ x0 &= (uint64_t)0x1111111111111111;
+ x1 &= (uint64_t)0x2222222222222222;
+ x2 &= (uint64_t)0x4444444444444444;
+ x3 &= (uint64_t)0x8888888888888888;
+ x1 >>= 1;
+ x2 >>= 2;
+ x3 >>= 3;
+ skey[v + 0] = (x0 << 4) - x0;
+ skey[v + 1] = (x1 << 4) - x1;
+ skey[v + 2] = (x2 << 4) - x2;
+ skey[v + 3] = (x3 << 4) - x3;
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcdec.c
new file mode 100644
index 00000000..5a7360bc
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcdec.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct64_cbcdec_vtable;
+ ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf;
+ uint64_t sk_exp[120];
+ uint32_t ivw[4];
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ br_range_dec32le(ivw, 4, iv);
+ buf = data;
+ while (len > 0) {
+ uint64_t q[8];
+ uint32_t w1[16], w2[16];
+ int i;
+
+ if (len >= 64) {
+ br_range_dec32le(w1, 16, buf);
+ } else {
+ br_range_dec32le(w1, len >> 2, buf);
+ }
+ for (i = 0; i < 4; i ++) {
+ br_aes_ct64_interleave_in(
+ &q[i], &q[i + 4], w1 + (i << 2));
+ }
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ for (i = 0; i < 4; i ++) {
+ br_aes_ct64_interleave_out(
+ w2 + (i << 2), q[i], q[i + 4]);
+ }
+ for (i = 0; i < 4; i ++) {
+ w2[i] ^= ivw[i];
+ }
+ if (len >= 64) {
+ for (i = 4; i < 16; i ++) {
+ w2[i] ^= w1[i - 4];
+ }
+ memcpy(ivw, w1 + 12, sizeof ivw);
+ br_range_enc32le(buf, w2, 16);
+ } else {
+ int j;
+
+ j = (int)(len >> 2);
+ for (i = 4; i < j; i ++) {
+ w2[i] ^= w1[i - 4];
+ }
+ memcpy(ivw, w1 + j - 4, sizeof ivw);
+ br_range_enc32le(buf, w2, j);
+ break;
+ }
+ buf += 64;
+ len -= 64;
+ }
+ br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable = {
+ sizeof(br_aes_ct64_cbcdec_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_aes_ct64_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_aes_ct64_cbcdec_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcenc.c
new file mode 100644
index 00000000..6cb9dece
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64_cbcenc.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct64_cbcenc_vtable;
+ ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf;
+ uint64_t sk_exp[120];
+ uint32_t ivw[4];
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ br_range_dec32le(ivw, 4, iv);
+ buf = data;
+ while (len > 0) {
+ uint32_t w[4];
+ uint64_t q[8];
+
+ w[0] = ivw[0] ^ br_dec32le(buf);
+ w[1] = ivw[1] ^ br_dec32le(buf + 4);
+ w[2] = ivw[2] ^ br_dec32le(buf + 8);
+ w[3] = ivw[3] ^ br_dec32le(buf + 12);
+ br_aes_ct64_interleave_in(&q[0], &q[4], w);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_interleave_out(w, q[0], q[4]);
+ memcpy(ivw, w, sizeof w);
+ br_enc32le(buf, w[0]);
+ br_enc32le(buf + 4, w[1]);
+ br_enc32le(buf + 8, w[2]);
+ br_enc32le(buf + 12, w[3]);
+ buf += 16;
+ len -= 16;
+ }
+ br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable = {
+ sizeof(br_aes_ct64_cbcenc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_aes_ct64_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_aes_ct64_cbcenc_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64_ctr.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64_ctr.c
new file mode 100644
index 00000000..1275873d
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64_ctr.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct64_ctr_vtable;
+ ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+ uint32_t ivw[16];
+ uint64_t sk_exp[120];
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ br_range_dec32le(ivw, 3, iv);
+ memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
+ memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
+ memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t));
+ buf = data;
+ while (len > 0) {
+ uint64_t q[8];
+ uint32_t w[16];
+ unsigned char tmp[64];
+ int i;
+
+ /*
+ * TODO: see if we can save on the first br_aes_ct64_ortho()
+ * call, since iv0/iv1/iv2 are constant for the whole run.
+ */
+ memcpy(w, ivw, sizeof ivw);
+ w[3] = br_swap32(cc);
+ w[7] = br_swap32(cc + 1);
+ w[11] = br_swap32(cc + 2);
+ w[15] = br_swap32(cc + 3);
+ for (i = 0; i < 4; i ++) {
+ br_aes_ct64_interleave_in(
+ &q[i], &q[i + 4], w + (i << 2));
+ }
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ for (i = 0; i < 4; i ++) {
+ br_aes_ct64_interleave_out(
+ w + (i << 2), q[i], q[i + 4]);
+ }
+ br_range_enc32le(tmp, w, 16);
+ if (len <= 64) {
+ xorbuf(buf, tmp, len);
+ cc += (uint32_t)len >> 4;
+ break;
+ }
+ xorbuf(buf, tmp, 64);
+ buf += 64;
+ len -= 64;
+ cc += 4;
+ }
+ return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct64_ctr_vtable = {
+ sizeof(br_aes_ct64_ctr_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctr_class **, const void *, size_t))
+ &br_aes_ct64_ctr_init,
+ (uint32_t (*)(const br_block_ctr_class *const *,
+ const void *, uint32_t, void *, size_t))
+ &br_aes_ct64_ctr_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64_ctrcbc.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64_ctrcbc.c
new file mode 100644
index 00000000..21bb8efa
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64_ctrcbc.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
+ ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+ void *ctr, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint64_t sk_exp[120];
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ /*
+ * We keep the counter as four 32-bit values, with big-endian
+ * convention, because that's what is expected for purposes of
+ * incrementing the counter value.
+ */
+ ivbuf = ctr;
+ iv0 = br_dec32be(ivbuf + 0);
+ iv1 = br_dec32be(ivbuf + 4);
+ iv2 = br_dec32be(ivbuf + 8);
+ iv3 = br_dec32be(ivbuf + 12);
+
+ buf = data;
+ while (len > 0) {
+ uint64_t q[8];
+ uint32_t w[16];
+ unsigned char tmp[64];
+ int i, j;
+
+ /*
+ * The bitslice implementation expects values in
+ * little-endian convention, so we have to byteswap them.
+ */
+ j = (len >= 64) ? 16 : (int)(len >> 2);
+ for (i = 0; i < j; i += 4) {
+ uint32_t carry;
+
+ w[i + 0] = br_swap32(iv0);
+ w[i + 1] = br_swap32(iv1);
+ w[i + 2] = br_swap32(iv2);
+ w[i + 3] = br_swap32(iv3);
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+ }
+ memset(w + i, 0, (16 - i) * sizeof(uint32_t));
+
+ for (i = 0; i < 4; i ++) {
+ br_aes_ct64_interleave_in(
+ &q[i], &q[i + 4], w + (i << 2));
+ }
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ for (i = 0; i < 4; i ++) {
+ br_aes_ct64_interleave_out(
+ w + (i << 2), q[i], q[i + 4]);
+ }
+
+ br_range_enc32le(tmp, w, 16);
+ if (len <= 64) {
+ xorbuf(buf, tmp, len);
+ break;
+ }
+ xorbuf(buf, tmp, 64);
+ buf += 64;
+ len -= 64;
+ }
+ br_enc32be(ivbuf + 0, iv0);
+ br_enc32be(ivbuf + 4, iv1);
+ br_enc32be(ivbuf + 8, iv2);
+ br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+ void *cbcmac, const void *data, size_t len)
+{
+ const unsigned char *buf;
+ uint32_t cm0, cm1, cm2, cm3;
+ uint64_t q[8];
+ uint64_t sk_exp[120];
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ cm0 = br_dec32le((unsigned char *)cbcmac + 0);
+ cm1 = br_dec32le((unsigned char *)cbcmac + 4);
+ cm2 = br_dec32le((unsigned char *)cbcmac + 8);
+ cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+ buf = data;
+ memset(q, 0, sizeof q);
+ while (len > 0) {
+ uint32_t w[4];
+
+ w[0] = cm0 ^ br_dec32le(buf + 0);
+ w[1] = cm1 ^ br_dec32le(buf + 4);
+ w[2] = cm2 ^ br_dec32le(buf + 8);
+ w[3] = cm3 ^ br_dec32le(buf + 12);
+
+ br_aes_ct64_interleave_in(&q[0], &q[4], w);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_interleave_out(w, q[0], q[4]);
+
+ cm0 = w[0];
+ cm1 = w[1];
+ cm2 = w[2];
+ cm3 = w[3];
+ buf += 16;
+ len -= 16;
+ }
+
+ br_enc32le((unsigned char *)cbcmac + 0, cm0);
+ br_enc32le((unsigned char *)cbcmac + 4, cm1);
+ br_enc32le((unsigned char *)cbcmac + 8, cm2);
+ br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ /*
+ * When encrypting, the CBC-MAC processing must be lagging by
+ * one block, since it operates on the encrypted values, so
+ * it must wait for that encryption to complete.
+ */
+
+ unsigned char *buf;
+ unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t cm0, cm1, cm2, cm3;
+ uint64_t sk_exp[120];
+ uint64_t q[8];
+ int first_iter;
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ /*
+ * We keep the counter as four 32-bit values, with big-endian
+ * convention, because that's what is expected for purposes of
+ * incrementing the counter value.
+ */
+ ivbuf = ctr;
+ iv0 = br_dec32be(ivbuf + 0);
+ iv1 = br_dec32be(ivbuf + 4);
+ iv2 = br_dec32be(ivbuf + 8);
+ iv3 = br_dec32be(ivbuf + 12);
+
+ /*
+ * The current CBC-MAC value is kept in little-endian convention.
+ */
+ cm0 = br_dec32le((unsigned char *)cbcmac + 0);
+ cm1 = br_dec32le((unsigned char *)cbcmac + 4);
+ cm2 = br_dec32le((unsigned char *)cbcmac + 8);
+ cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+ buf = data;
+ first_iter = 1;
+ memset(q, 0, sizeof q);
+ while (len > 0) {
+ uint32_t w[8], carry;
+
+ /*
+ * The bitslice implementation expects values in
+ * little-endian convention, so we have to byteswap them.
+ */
+ w[0] = br_swap32(iv0);
+ w[1] = br_swap32(iv1);
+ w[2] = br_swap32(iv2);
+ w[3] = br_swap32(iv3);
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+
+ /*
+ * The block for CBC-MAC.
+ */
+ w[4] = cm0;
+ w[5] = cm1;
+ w[6] = cm2;
+ w[7] = cm3;
+
+ br_aes_ct64_interleave_in(&q[0], &q[4], w);
+ br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_interleave_out(w, q[0], q[4]);
+ br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+ /*
+ * We do the XOR with the plaintext in 32-bit registers,
+ * so that the value are available for CBC-MAC processing
+ * as well.
+ */
+ w[0] ^= br_dec32le(buf + 0);
+ w[1] ^= br_dec32le(buf + 4);
+ w[2] ^= br_dec32le(buf + 8);
+ w[3] ^= br_dec32le(buf + 12);
+ br_enc32le(buf + 0, w[0]);
+ br_enc32le(buf + 4, w[1]);
+ br_enc32le(buf + 8, w[2]);
+ br_enc32le(buf + 12, w[3]);
+
+ buf += 16;
+ len -= 16;
+
+ /*
+ * We set the cm* values to the block to encrypt in the
+ * next iteration.
+ */
+ if (first_iter) {
+ first_iter = 0;
+ cm0 ^= w[0];
+ cm1 ^= w[1];
+ cm2 ^= w[2];
+ cm3 ^= w[3];
+ } else {
+ cm0 = w[0] ^ w[4];
+ cm1 = w[1] ^ w[5];
+ cm2 = w[2] ^ w[6];
+ cm3 = w[3] ^ w[7];
+ }
+
+ /*
+ * If this was the last iteration, then compute the
+ * extra block encryption to complete CBC-MAC.
+ */
+ if (len == 0) {
+ w[0] = cm0;
+ w[1] = cm1;
+ w[2] = cm2;
+ w[3] = cm3;
+ br_aes_ct64_interleave_in(&q[0], &q[4], w);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(
+ ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_interleave_out(w, q[0], q[4]);
+ cm0 = w[0];
+ cm1 = w[1];
+ cm2 = w[2];
+ cm3 = w[3];
+ break;
+ }
+ }
+
+ br_enc32be(ivbuf + 0, iv0);
+ br_enc32be(ivbuf + 4, iv1);
+ br_enc32be(ivbuf + 8, iv2);
+ br_enc32be(ivbuf + 12, iv3);
+ br_enc32le((unsigned char *)cbcmac + 0, cm0);
+ br_enc32le((unsigned char *)cbcmac + 4, cm1);
+ br_enc32le((unsigned char *)cbcmac + 8, cm2);
+ br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t cm0, cm1, cm2, cm3;
+ uint64_t sk_exp[120];
+ uint64_t q[8];
+
+ br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ /*
+ * We keep the counter as four 32-bit values, with big-endian
+ * convention, because that's what is expected for purposes of
+ * incrementing the counter value.
+ */
+ ivbuf = ctr;
+ iv0 = br_dec32be(ivbuf + 0);
+ iv1 = br_dec32be(ivbuf + 4);
+ iv2 = br_dec32be(ivbuf + 8);
+ iv3 = br_dec32be(ivbuf + 12);
+
+ /*
+ * The current CBC-MAC value is kept in little-endian convention.
+ */
+ cm0 = br_dec32le((unsigned char *)cbcmac + 0);
+ cm1 = br_dec32le((unsigned char *)cbcmac + 4);
+ cm2 = br_dec32le((unsigned char *)cbcmac + 8);
+ cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+ buf = data;
+ memset(q, 0, sizeof q);
+ while (len > 0) {
+ uint32_t w[8], carry;
+ unsigned char tmp[16];
+
+ /*
+ * The bitslice implementation expects values in
+ * little-endian convention, so we have to byteswap them.
+ */
+ w[0] = br_swap32(iv0);
+ w[1] = br_swap32(iv1);
+ w[2] = br_swap32(iv2);
+ w[3] = br_swap32(iv3);
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+
+ /*
+ * The block for CBC-MAC.
+ */
+ w[4] = cm0 ^ br_dec32le(buf + 0);
+ w[5] = cm1 ^ br_dec32le(buf + 4);
+ w[6] = cm2 ^ br_dec32le(buf + 8);
+ w[7] = cm3 ^ br_dec32le(buf + 12);
+
+ br_aes_ct64_interleave_in(&q[0], &q[4], w);
+ br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct64_ortho(q);
+ br_aes_ct64_interleave_out(w, q[0], q[4]);
+ br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+ br_enc32le(tmp + 0, w[0]);
+ br_enc32le(tmp + 4, w[1]);
+ br_enc32le(tmp + 8, w[2]);
+ br_enc32le(tmp + 12, w[3]);
+ xorbuf(buf, tmp, 16);
+ cm0 = w[4];
+ cm1 = w[5];
+ cm2 = w[6];
+ cm3 = w[7];
+ buf += 16;
+ len -= 16;
+ }
+
+ br_enc32be(ivbuf + 0, iv0);
+ br_enc32be(ivbuf + 4, iv1);
+ br_enc32be(ivbuf + 8, iv2);
+ br_enc32be(ivbuf + 12, iv3);
+ br_enc32le((unsigned char *)cbcmac + 0, cm0);
+ br_enc32le((unsigned char *)cbcmac + 4, cm1);
+ br_enc32le((unsigned char *)cbcmac + 8, cm2);
+ br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
+ sizeof(br_aes_ct64_ctrcbc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+ &br_aes_ct64_ctrcbc_init,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_ct64_ctrcbc_encrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_ct64_ctrcbc_decrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, size_t))
+ &br_aes_ct64_ctrcbc_ctr,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, const void *, size_t))
+ &br_aes_ct64_ctrcbc_mac
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64_dec.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64_dec.c
new file mode 100644
index 00000000..ab00e099
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64_dec.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_invSbox(uint64_t *q)
+{
+ /*
+ * See br_aes_ct_bitslice_invSbox(). This is the natural extension
+ * to 64-bit registers.
+ */
+ uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+ q0 = ~q[0];
+ q1 = ~q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = ~q[5];
+ q6 = ~q[6];
+ q7 = q[7];
+ q[7] = q1 ^ q4 ^ q6;
+ q[6] = q0 ^ q3 ^ q5;
+ q[5] = q7 ^ q2 ^ q4;
+ q[4] = q6 ^ q1 ^ q3;
+ q[3] = q5 ^ q0 ^ q2;
+ q[2] = q4 ^ q7 ^ q1;
+ q[1] = q3 ^ q6 ^ q0;
+ q[0] = q2 ^ q5 ^ q7;
+
+ br_aes_ct64_bitslice_Sbox(q);
+
+ q0 = ~q[0];
+ q1 = ~q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = ~q[5];
+ q6 = ~q[6];
+ q7 = q[7];
+ q[7] = q1 ^ q4 ^ q6;
+ q[6] = q0 ^ q3 ^ q5;
+ q[5] = q7 ^ q2 ^ q4;
+ q[4] = q6 ^ q1 ^ q3;
+ q[3] = q5 ^ q0 ^ q2;
+ q[2] = q4 ^ q7 ^ q1;
+ q[1] = q3 ^ q6 ^ q0;
+ q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ q[i] ^= sk[i];
+ }
+}
+
+static void
+inv_shift_rows(uint64_t *q)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ uint64_t x;
+
+ x = q[i];
+ q[i] = (x & (uint64_t)0x000000000000FFFF)
+ | ((x & (uint64_t)0x000000000FFF0000) << 4)
+ | ((x & (uint64_t)0x00000000F0000000) >> 12)
+ | ((x & (uint64_t)0x000000FF00000000) << 8)
+ | ((x & (uint64_t)0x0000FF0000000000) >> 8)
+ | ((x & (uint64_t)0x000F000000000000) << 12)
+ | ((x & (uint64_t)0xFFF0000000000000) >> 4);
+ }
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+ return (x << 32) | (x >> 32);
+}
+
+static void
+inv_mix_columns(uint64_t *q)
+{
+ uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+ uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+ q0 = q[0];
+ q1 = q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = q[5];
+ q6 = q[6];
+ q7 = q[7];
+ r0 = (q0 >> 16) | (q0 << 48);
+ r1 = (q1 >> 16) | (q1 << 48);
+ r2 = (q2 >> 16) | (q2 << 48);
+ r3 = (q3 >> 16) | (q3 << 48);
+ r4 = (q4 >> 16) | (q4 << 48);
+ r5 = (q5 >> 16) | (q5 << 48);
+ r6 = (q6 >> 16) | (q6 << 48);
+ r7 = (q7 >> 16) | (q7 << 48);
+
+ q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+ q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+ q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+ q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+ q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+ q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+ q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+ q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
+ const uint64_t *skey, uint64_t *q)
+{
+ unsigned u;
+
+ add_round_key(q, skey + (num_rounds << 3));
+ for (u = num_rounds - 1; u > 0; u --) {
+ inv_shift_rows(q);
+ br_aes_ct64_bitslice_invSbox(q);
+ add_round_key(q, skey + (u << 3));
+ inv_mix_columns(q);
+ }
+ inv_shift_rows(q);
+ br_aes_ct64_bitslice_invSbox(q);
+ add_round_key(q, skey);
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct64_enc.c b/test/monniaux/BearSSL/src/symcipher/aes_ct64_enc.c
new file mode 100644
index 00000000..78631ced
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct64_enc.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+ q[0] ^= sk[0];
+ q[1] ^= sk[1];
+ q[2] ^= sk[2];
+ q[3] ^= sk[3];
+ q[4] ^= sk[4];
+ q[5] ^= sk[5];
+ q[6] ^= sk[6];
+ q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint64_t *q)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ uint64_t x;
+
+ x = q[i];
+ q[i] = (x & (uint64_t)0x000000000000FFFF)
+ | ((x & (uint64_t)0x00000000FFF00000) >> 4)
+ | ((x & (uint64_t)0x00000000000F0000) << 12)
+ | ((x & (uint64_t)0x0000FF0000000000) >> 8)
+ | ((x & (uint64_t)0x000000FF00000000) << 8)
+ | ((x & (uint64_t)0xF000000000000000) >> 12)
+ | ((x & (uint64_t)0x0FFF000000000000) << 4);
+ }
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+ return (x << 32) | (x >> 32);
+}
+
+static inline void
+mix_columns(uint64_t *q)
+{
+ uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+ uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+ q0 = q[0];
+ q1 = q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = q[5];
+ q6 = q[6];
+ q7 = q[7];
+ r0 = (q0 >> 16) | (q0 << 48);
+ r1 = (q1 >> 16) | (q1 << 48);
+ r2 = (q2 >> 16) | (q2 << 48);
+ r3 = (q3 >> 16) | (q3 << 48);
+ r4 = (q4 >> 16) | (q4 << 48);
+ r5 = (q5 >> 16) | (q5 << 48);
+ r6 = (q6 >> 16) | (q6 << 48);
+ r7 = (q7 >> 16) | (q7 << 48);
+
+ q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);
+ q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);
+ q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);
+ q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);
+ q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);
+ q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);
+ q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);
+ q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
+ const uint64_t *skey, uint64_t *q)
+{
+ unsigned u;
+
+ add_round_key(q, skey);
+ for (u = 1; u < num_rounds; u ++) {
+ br_aes_ct64_bitslice_Sbox(q);
+ shift_rows(q);
+ mix_columns(q);
+ add_round_key(q, skey + (u << 3));
+ }
+ br_aes_ct64_bitslice_Sbox(q);
+ shift_rows(q);
+ add_round_key(q, skey + (num_rounds << 3));
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/aes_ct_cbcdec.c
new file mode 100644
index 00000000..522645ad
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct_cbcdec.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct_cbcdec_vtable;
+ ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t sk_exp[120];
+
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ ivbuf = iv;
+ iv0 = br_dec32le(ivbuf);
+ iv1 = br_dec32le(ivbuf + 4);
+ iv2 = br_dec32le(ivbuf + 8);
+ iv3 = br_dec32le(ivbuf + 12);
+ buf = data;
+ while (len > 0) {
+ uint32_t q[8], sq[8];
+
+ q[0] = br_dec32le(buf);
+ q[2] = br_dec32le(buf + 4);
+ q[4] = br_dec32le(buf + 8);
+ q[6] = br_dec32le(buf + 12);
+ if (len >= 32) {
+ q[1] = br_dec32le(buf + 16);
+ q[3] = br_dec32le(buf + 20);
+ q[5] = br_dec32le(buf + 24);
+ q[7] = br_dec32le(buf + 28);
+ } else {
+ q[1] = 0;
+ q[3] = 0;
+ q[5] = 0;
+ q[7] = 0;
+ }
+ memcpy(sq, q, sizeof q);
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+ br_enc32le(buf, q[0] ^ iv0);
+ br_enc32le(buf + 4, q[2] ^ iv1);
+ br_enc32le(buf + 8, q[4] ^ iv2);
+ br_enc32le(buf + 12, q[6] ^ iv3);
+ if (len < 32) {
+ iv0 = sq[0];
+ iv1 = sq[2];
+ iv2 = sq[4];
+ iv3 = sq[6];
+ break;
+ }
+ br_enc32le(buf + 16, q[1] ^ sq[0]);
+ br_enc32le(buf + 20, q[3] ^ sq[2]);
+ br_enc32le(buf + 24, q[5] ^ sq[4]);
+ br_enc32le(buf + 28, q[7] ^ sq[6]);
+ iv0 = sq[1];
+ iv1 = sq[3];
+ iv2 = sq[5];
+ iv3 = sq[7];
+ buf += 32;
+ len -= 32;
+ }
+ br_enc32le(ivbuf, iv0);
+ br_enc32le(ivbuf + 4, iv1);
+ br_enc32le(ivbuf + 8, iv2);
+ br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct_cbcdec_vtable = {
+ sizeof(br_aes_ct_cbcdec_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_aes_ct_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_aes_ct_cbcdec_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/aes_ct_cbcenc.c
new file mode 100644
index 00000000..cb85977b
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct_cbcenc.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct_cbcenc_vtable;
+ ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+ uint32_t q[8];
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t sk_exp[120];
+
+ q[1] = 0;
+ q[3] = 0;
+ q[5] = 0;
+ q[7] = 0;
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ ivbuf = iv;
+ iv0 = br_dec32le(ivbuf);
+ iv1 = br_dec32le(ivbuf + 4);
+ iv2 = br_dec32le(ivbuf + 8);
+ iv3 = br_dec32le(ivbuf + 12);
+ buf = data;
+ while (len > 0) {
+ q[0] = iv0 ^ br_dec32le(buf);
+ q[2] = iv1 ^ br_dec32le(buf + 4);
+ q[4] = iv2 ^ br_dec32le(buf + 8);
+ q[6] = iv3 ^ br_dec32le(buf + 12);
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+ iv0 = q[0];
+ iv1 = q[2];
+ iv2 = q[4];
+ iv3 = q[6];
+ br_enc32le(buf, iv0);
+ br_enc32le(buf + 4, iv1);
+ br_enc32le(buf + 8, iv2);
+ br_enc32le(buf + 12, iv3);
+ buf += 16;
+ len -= 16;
+ }
+ br_enc32le(ivbuf, iv0);
+ br_enc32le(ivbuf + 4, iv1);
+ br_enc32le(ivbuf + 8, iv2);
+ br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct_cbcenc_vtable = {
+ sizeof(br_aes_ct_cbcenc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_aes_ct_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_aes_ct_cbcenc_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct_ctr.c b/test/monniaux/BearSSL/src/symcipher/aes_ct_ctr.c
new file mode 100644
index 00000000..f407689e
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct_ctr.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct_ctr_vtable;
+ ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+ const unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2;
+ uint32_t sk_exp[120];
+
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ ivbuf = iv;
+ iv0 = br_dec32le(ivbuf);
+ iv1 = br_dec32le(ivbuf + 4);
+ iv2 = br_dec32le(ivbuf + 8);
+ buf = data;
+ while (len > 0) {
+ uint32_t q[8];
+ unsigned char tmp[32];
+
+ /*
+ * TODO: see if we can save on the first br_aes_ct_ortho()
+ * call, since iv0/iv1/iv2 are constant for the whole run.
+ */
+ q[0] = q[1] = iv0;
+ q[2] = q[3] = iv1;
+ q[4] = q[5] = iv2;
+ q[6] = br_swap32(cc);
+ q[7] = br_swap32(cc + 1);
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+ br_enc32le(tmp, q[0]);
+ br_enc32le(tmp + 4, q[2]);
+ br_enc32le(tmp + 8, q[4]);
+ br_enc32le(tmp + 12, q[6]);
+ br_enc32le(tmp + 16, q[1]);
+ br_enc32le(tmp + 20, q[3]);
+ br_enc32le(tmp + 24, q[5]);
+ br_enc32le(tmp + 28, q[7]);
+
+ if (len <= 32) {
+ xorbuf(buf, tmp, len);
+ cc ++;
+ if (len > 16) {
+ cc ++;
+ }
+ break;
+ }
+ xorbuf(buf, tmp, 32);
+ buf += 32;
+ len -= 32;
+ cc += 2;
+ }
+ return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct_ctr_vtable = {
+ sizeof(br_aes_ct_ctr_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctr_class **, const void *, size_t))
+ &br_aes_ct_ctr_init,
+ (uint32_t (*)(const br_block_ctr_class *const *,
+ const void *, uint32_t, void *, size_t))
+ &br_aes_ct_ctr_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct_ctrcbc.c b/test/monniaux/BearSSL/src/symcipher/aes_ct_ctrcbc.c
new file mode 100644
index 00000000..8ae9fc75
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct_ctrcbc.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_ct_ctrcbc_vtable;
+ ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+ void *ctr, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t sk_exp[120];
+
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ /*
+ * We keep the counter as four 32-bit values, with big-endian
+ * convention, because that's what is expected for purposes of
+ * incrementing the counter value.
+ */
+ ivbuf = ctr;
+ iv0 = br_dec32be(ivbuf + 0);
+ iv1 = br_dec32be(ivbuf + 4);
+ iv2 = br_dec32be(ivbuf + 8);
+ iv3 = br_dec32be(ivbuf + 12);
+
+ buf = data;
+ while (len > 0) {
+ uint32_t q[8], carry;
+ unsigned char tmp[32];
+
+ /*
+ * The bitslice implementation expects values in
+ * little-endian convention, so we have to byteswap them.
+ */
+ q[0] = br_swap32(iv0);
+ q[2] = br_swap32(iv1);
+ q[4] = br_swap32(iv2);
+ q[6] = br_swap32(iv3);
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+ q[1] = br_swap32(iv0);
+ q[3] = br_swap32(iv1);
+ q[5] = br_swap32(iv2);
+ q[7] = br_swap32(iv3);
+ if (len > 16) {
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+ }
+
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+
+ br_enc32le(tmp, q[0]);
+ br_enc32le(tmp + 4, q[2]);
+ br_enc32le(tmp + 8, q[4]);
+ br_enc32le(tmp + 12, q[6]);
+ br_enc32le(tmp + 16, q[1]);
+ br_enc32le(tmp + 20, q[3]);
+ br_enc32le(tmp + 24, q[5]);
+ br_enc32le(tmp + 28, q[7]);
+
+ if (len <= 32) {
+ xorbuf(buf, tmp, len);
+ break;
+ }
+ xorbuf(buf, tmp, 32);
+ buf += 32;
+ len -= 32;
+ }
+ br_enc32be(ivbuf + 0, iv0);
+ br_enc32be(ivbuf + 4, iv1);
+ br_enc32be(ivbuf + 8, iv2);
+ br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+ void *cbcmac, const void *data, size_t len)
+{
+ const unsigned char *buf;
+ uint32_t cm0, cm1, cm2, cm3;
+ uint32_t q[8];
+ uint32_t sk_exp[120];
+
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ buf = data;
+ cm0 = br_dec32le((unsigned char *)cbcmac + 0);
+ cm1 = br_dec32le((unsigned char *)cbcmac + 4);
+ cm2 = br_dec32le((unsigned char *)cbcmac + 8);
+ cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+ q[1] = 0;
+ q[3] = 0;
+ q[5] = 0;
+ q[7] = 0;
+
+ while (len > 0) {
+ q[0] = cm0 ^ br_dec32le(buf + 0);
+ q[2] = cm1 ^ br_dec32le(buf + 4);
+ q[4] = cm2 ^ br_dec32le(buf + 8);
+ q[6] = cm3 ^ br_dec32le(buf + 12);
+
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+
+ cm0 = q[0];
+ cm1 = q[2];
+ cm2 = q[4];
+ cm3 = q[6];
+ buf += 16;
+ len -= 16;
+ }
+
+ br_enc32le((unsigned char *)cbcmac + 0, cm0);
+ br_enc32le((unsigned char *)cbcmac + 4, cm1);
+ br_enc32le((unsigned char *)cbcmac + 8, cm2);
+ br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ /*
+ * When encrypting, the CBC-MAC processing must be lagging by
+ * one block, since it operates on the encrypted values, so
+ * it must wait for that encryption to complete.
+ */
+
+ unsigned char *buf;
+ unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t cm0, cm1, cm2, cm3;
+ uint32_t sk_exp[120];
+ int first_iter;
+
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ /*
+ * We keep the counter as four 32-bit values, with big-endian
+ * convention, because that's what is expected for purposes of
+ * incrementing the counter value.
+ */
+ ivbuf = ctr;
+ iv0 = br_dec32be(ivbuf + 0);
+ iv1 = br_dec32be(ivbuf + 4);
+ iv2 = br_dec32be(ivbuf + 8);
+ iv3 = br_dec32be(ivbuf + 12);
+
+ /*
+ * The current CBC-MAC value is kept in little-endian convention.
+ */
+ cm0 = br_dec32le((unsigned char *)cbcmac + 0);
+ cm1 = br_dec32le((unsigned char *)cbcmac + 4);
+ cm2 = br_dec32le((unsigned char *)cbcmac + 8);
+ cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+ buf = data;
+ first_iter = 1;
+ while (len > 0) {
+ uint32_t q[8], carry;
+
+ /*
+ * The bitslice implementation expects values in
+ * little-endian convention, so we have to byteswap them.
+ */
+ q[0] = br_swap32(iv0);
+ q[2] = br_swap32(iv1);
+ q[4] = br_swap32(iv2);
+ q[6] = br_swap32(iv3);
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+
+ /*
+ * The odd values are used for CBC-MAC.
+ */
+ q[1] = cm0;
+ q[3] = cm1;
+ q[5] = cm2;
+ q[7] = cm3;
+
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+
+ /*
+ * We do the XOR with the plaintext in 32-bit registers,
+ * so that the value are available for CBC-MAC processing
+ * as well.
+ */
+ q[0] ^= br_dec32le(buf + 0);
+ q[2] ^= br_dec32le(buf + 4);
+ q[4] ^= br_dec32le(buf + 8);
+ q[6] ^= br_dec32le(buf + 12);
+ br_enc32le(buf + 0, q[0]);
+ br_enc32le(buf + 4, q[2]);
+ br_enc32le(buf + 8, q[4]);
+ br_enc32le(buf + 12, q[6]);
+
+ buf += 16;
+ len -= 16;
+
+ /*
+ * We set the cm* values to the block to encrypt in the
+ * next iteration.
+ */
+ if (first_iter) {
+ first_iter = 0;
+ cm0 ^= q[0];
+ cm1 ^= q[2];
+ cm2 ^= q[4];
+ cm3 ^= q[6];
+ } else {
+ cm0 = q[0] ^ q[1];
+ cm1 = q[2] ^ q[3];
+ cm2 = q[4] ^ q[5];
+ cm3 = q[6] ^ q[7];
+ }
+
+ /*
+ * If this was the last iteration, then compute the
+ * extra block encryption to complete CBC-MAC.
+ */
+ if (len == 0) {
+ q[0] = cm0;
+ q[2] = cm1;
+ q[4] = cm2;
+ q[6] = cm3;
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+ cm0 = q[0];
+ cm1 = q[2];
+ cm2 = q[4];
+ cm3 = q[6];
+ break;
+ }
+ }
+
+ br_enc32be(ivbuf + 0, iv0);
+ br_enc32be(ivbuf + 4, iv1);
+ br_enc32be(ivbuf + 8, iv2);
+ br_enc32be(ivbuf + 12, iv3);
+ br_enc32le((unsigned char *)cbcmac + 0, cm0);
+ br_enc32le((unsigned char *)cbcmac + 4, cm1);
+ br_enc32le((unsigned char *)cbcmac + 8, cm2);
+ br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned char *ivbuf;
+ uint32_t iv0, iv1, iv2, iv3;
+ uint32_t cm0, cm1, cm2, cm3;
+ uint32_t sk_exp[120];
+
+ br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+ /*
+ * We keep the counter as four 32-bit values, with big-endian
+ * convention, because that's what is expected for purposes of
+ * incrementing the counter value.
+ */
+ ivbuf = ctr;
+ iv0 = br_dec32be(ivbuf + 0);
+ iv1 = br_dec32be(ivbuf + 4);
+ iv2 = br_dec32be(ivbuf + 8);
+ iv3 = br_dec32be(ivbuf + 12);
+
+ /*
+ * The current CBC-MAC value is kept in little-endian convention.
+ */
+ cm0 = br_dec32le((unsigned char *)cbcmac + 0);
+ cm1 = br_dec32le((unsigned char *)cbcmac + 4);
+ cm2 = br_dec32le((unsigned char *)cbcmac + 8);
+ cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+ buf = data;
+ while (len > 0) {
+ uint32_t q[8], carry;
+ unsigned char tmp[16];
+
+ /*
+ * The bitslice implementation expects values in
+ * little-endian convention, so we have to byteswap them.
+ */
+ q[0] = br_swap32(iv0);
+ q[2] = br_swap32(iv1);
+ q[4] = br_swap32(iv2);
+ q[6] = br_swap32(iv3);
+ iv3 ++;
+ carry = ~(iv3 | -iv3) >> 31;
+ iv2 += carry;
+ carry &= -(~(iv2 | -iv2) >> 31);
+ iv1 += carry;
+ carry &= -(~(iv1 | -iv1) >> 31);
+ iv0 += carry;
+
+ /*
+ * The odd values are used for CBC-MAC.
+ */
+ q[1] = cm0 ^ br_dec32le(buf + 0);
+ q[3] = cm1 ^ br_dec32le(buf + 4);
+ q[5] = cm2 ^ br_dec32le(buf + 8);
+ q[7] = cm3 ^ br_dec32le(buf + 12);
+
+ br_aes_ct_ortho(q);
+ br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+ br_aes_ct_ortho(q);
+
+ br_enc32le(tmp + 0, q[0]);
+ br_enc32le(tmp + 4, q[2]);
+ br_enc32le(tmp + 8, q[4]);
+ br_enc32le(tmp + 12, q[6]);
+ xorbuf(buf, tmp, 16);
+ cm0 = q[1];
+ cm1 = q[3];
+ cm2 = q[5];
+ cm3 = q[7];
+ buf += 16;
+ len -= 16;
+ }
+
+ br_enc32be(ivbuf + 0, iv0);
+ br_enc32be(ivbuf + 4, iv1);
+ br_enc32be(ivbuf + 8, iv2);
+ br_enc32be(ivbuf + 12, iv3);
+ br_enc32le((unsigned char *)cbcmac + 0, cm0);
+ br_enc32le((unsigned char *)cbcmac + 4, cm1);
+ br_enc32le((unsigned char *)cbcmac + 8, cm2);
+ br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
+ sizeof(br_aes_ct_ctrcbc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+ &br_aes_ct_ctrcbc_init,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_ct_ctrcbc_encrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_ct_ctrcbc_decrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, size_t))
+ &br_aes_ct_ctrcbc_ctr,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, const void *, size_t))
+ &br_aes_ct_ctrcbc_mac
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct_dec.c b/test/monniaux/BearSSL/src/symcipher/aes_ct_dec.c
new file mode 100644
index 00000000..7f32d2bd
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct_dec.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_invSbox(uint32_t *q)
+{
+ /*
+ * AES S-box is:
+ * S(x) = A(I(x)) ^ 0x63
+ * where I() is inversion in GF(256), and A() is a linear
+ * transform (0 is formally defined to be its own inverse).
+ * Since inversion is an involution, the inverse S-box can be
+ * computed from the S-box as:
+ * iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
+ * where B() is the inverse of A(). Indeed, for any y in GF(256):
+ * iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
+ *
+ * Note: we reuse the implementation of the forward S-box,
+ * instead of duplicating it here, so that total code size is
+ * lower. By merging the B() transforms into the S-box circuit
+ * we could make faster CBC decryption, but CBC decryption is
+ * already quite faster than CBC encryption because we can
+ * process two blocks in parallel.
+ */
+ uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+ q0 = ~q[0];
+ q1 = ~q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = ~q[5];
+ q6 = ~q[6];
+ q7 = q[7];
+ q[7] = q1 ^ q4 ^ q6;
+ q[6] = q0 ^ q3 ^ q5;
+ q[5] = q7 ^ q2 ^ q4;
+ q[4] = q6 ^ q1 ^ q3;
+ q[3] = q5 ^ q0 ^ q2;
+ q[2] = q4 ^ q7 ^ q1;
+ q[1] = q3 ^ q6 ^ q0;
+ q[0] = q2 ^ q5 ^ q7;
+
+ br_aes_ct_bitslice_Sbox(q);
+
+ q0 = ~q[0];
+ q1 = ~q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = ~q[5];
+ q6 = ~q[6];
+ q7 = q[7];
+ q[7] = q1 ^ q4 ^ q6;
+ q[6] = q0 ^ q3 ^ q5;
+ q[5] = q7 ^ q2 ^ q4;
+ q[4] = q6 ^ q1 ^ q3;
+ q[3] = q5 ^ q0 ^ q2;
+ q[2] = q4 ^ q7 ^ q1;
+ q[1] = q3 ^ q6 ^ q0;
+ q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ q[i] ^= sk[i];
+ }
+}
+
+static void
+inv_shift_rows(uint32_t *q)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ uint32_t x;
+
+ x = q[i];
+ q[i] = (x & 0x000000FF)
+ | ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6)
+ | ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4)
+ | ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2);
+ }
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+ return (x << 16) | (x >> 16);
+}
+
+static void
+inv_mix_columns(uint32_t *q)
+{
+ uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+ uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+ q0 = q[0];
+ q1 = q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = q[5];
+ q6 = q[6];
+ q7 = q[7];
+ r0 = (q0 >> 8) | (q0 << 24);
+ r1 = (q1 >> 8) | (q1 << 24);
+ r2 = (q2 >> 8) | (q2 << 24);
+ r3 = (q3 >> 8) | (q3 << 24);
+ r4 = (q4 >> 8) | (q4 << 24);
+ r5 = (q5 >> 8) | (q5 << 24);
+ r6 = (q6 >> 8) | (q6 << 24);
+ r7 = (q7 >> 8) | (q7 << 24);
+
+ q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+ q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+ q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+ q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+ q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+ q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+ q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+ q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_decrypt(unsigned num_rounds,
+ const uint32_t *skey, uint32_t *q)
+{
+ unsigned u;
+
+ add_round_key(q, skey + (num_rounds << 3));
+ for (u = num_rounds - 1; u > 0; u --) {
+ inv_shift_rows(q);
+ br_aes_ct_bitslice_invSbox(q);
+ add_round_key(q, skey + (u << 3));
+ inv_mix_columns(q);
+ }
+ inv_shift_rows(q);
+ br_aes_ct_bitslice_invSbox(q);
+ add_round_key(q, skey);
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_ct_enc.c b/test/monniaux/BearSSL/src/symcipher/aes_ct_enc.c
new file mode 100644
index 00000000..089bf356
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_ct_enc.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+ q[0] ^= sk[0];
+ q[1] ^= sk[1];
+ q[2] ^= sk[2];
+ q[3] ^= sk[3];
+ q[4] ^= sk[4];
+ q[5] ^= sk[5];
+ q[6] ^= sk[6];
+ q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint32_t *q)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ uint32_t x;
+
+ x = q[i];
+ q[i] = (x & 0x000000FF)
+ | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
+ | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
+ | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
+ }
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+ return (x << 16) | (x >> 16);
+}
+
+static inline void
+mix_columns(uint32_t *q)
+{
+ uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+ uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+ q0 = q[0];
+ q1 = q[1];
+ q2 = q[2];
+ q3 = q[3];
+ q4 = q[4];
+ q5 = q[5];
+ q6 = q[6];
+ q7 = q[7];
+ r0 = (q0 >> 8) | (q0 << 24);
+ r1 = (q1 >> 8) | (q1 << 24);
+ r2 = (q2 >> 8) | (q2 << 24);
+ r3 = (q3 >> 8) | (q3 << 24);
+ r4 = (q4 >> 8) | (q4 << 24);
+ r5 = (q5 >> 8) | (q5 << 24);
+ r6 = (q6 >> 8) | (q6 << 24);
+ r7 = (q7 >> 8) | (q7 << 24);
+
+ q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);
+ q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);
+ q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);
+ q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);
+ q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);
+ q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);
+ q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);
+ q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_encrypt(unsigned num_rounds,
+ const uint32_t *skey, uint32_t *q)
+{
+ unsigned u;
+
+ add_round_key(q, skey);
+ for (u = 1; u < num_rounds; u ++) {
+ br_aes_ct_bitslice_Sbox(q);
+ shift_rows(q);
+ mix_columns(q);
+ add_round_key(q, skey + (u << 3));
+ }
+ br_aes_ct_bitslice_Sbox(q);
+ shift_rows(q);
+ add_round_key(q, skey + (num_rounds << 3));
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_pwr8.c b/test/monniaux/BearSSL/src/symcipher/aes_pwr8.c
new file mode 100644
index 00000000..b2c63c32
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_pwr8.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS 1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+static void
+key_schedule_128(unsigned char *sk, const unsigned char *key)
+{
+ long cc;
+
+ static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc = 0;
+
+ /*
+ * We use the VSX instructions for loading and storing the
+ * key/subkeys, since they support unaligned accesses. The rest
+ * of the computation is VMX only. VMX register 0 is VSX
+ * register 32.
+ */
+ asm volatile (
+
+ /*
+ * v0 = all-zero word
+ * v1 = constant -8 / +8, copied into four words
+ * v2 = current subkey
+ * v3 = Rcon (x4 words)
+ * v6 = constant 8, copied into four words
+ * v7 = constant 0x11B, copied into four words
+ * v8 = constant for byteswapping words
+ */
+ vspltisw(0, 0)
+#if BR_POWER8_LE
+ vspltisw(1, -8)
+#else
+ vspltisw(1, 8)
+#endif
+ lxvw4x(34, 0, %[key])
+ vspltisw(3, 1)
+ vspltisw(6, 8)
+ lxvw4x(39, 0, %[fmod])
+#if BR_POWER8_LE
+ lxvw4x(40, 0, %[idx2be])
+#endif
+
+ /*
+ * First subkey is a copy of the key itself.
+ */
+#if BR_POWER8_LE
+ vperm(4, 2, 2, 8)
+ stxvw4x(36, 0, %[sk])
+#else
+ stxvw4x(34, 0, %[sk])
+#endif
+
+ /*
+ * Loop must run 10 times.
+ */
+ li(%[cc], 10)
+ mtctr(%[cc])
+ label(loop)
+ /* Increment subkey address */
+ addi(%[sk], %[sk], 16)
+
+ /* Compute SubWord(RotWord(temp)) xor Rcon (into v4, splat) */
+ vrlw(4, 2, 1)
+ vsbox(4, 4)
+#if BR_POWER8_LE
+ vxor(4, 4, 3)
+#else
+ vsldoi(5, 3, 0, 3)
+ vxor(4, 4, 5)
+#endif
+ vspltw(4, 4, 3)
+
+ /* XOR words for next subkey */
+ vsldoi(5, 0, 2, 12)
+ vxor(2, 2, 5)
+ vsldoi(5, 0, 2, 12)
+ vxor(2, 2, 5)
+ vsldoi(5, 0, 2, 12)
+ vxor(2, 2, 5)
+ vxor(2, 2, 4)
+
+ /* Store next subkey */
+#if BR_POWER8_LE
+ vperm(4, 2, 2, 8)
+ stxvw4x(36, 0, %[sk])
+#else
+ stxvw4x(34, 0, %[sk])
+#endif
+
+ /* Update Rcon */
+ vadduwm(3, 3, 3)
+ vsrw(4, 3, 6)
+ vsubuwm(4, 0, 4)
+ vand(4, 4, 7)
+ vxor(3, 3, 4)
+
+ bdnz(loop)
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key), [fmod] "b" (fmod)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
+ );
+}
+
+static void
+key_schedule_192(unsigned char *sk, const unsigned char *key)
+{
+ long cc;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc = 0;
+
+ /*
+ * We use the VSX instructions for loading and storing the
+ * key/subkeys, since they support unaligned accesses. The rest
+ * of the computation is VMX only. VMX register 0 is VSX
+ * register 32.
+ */
+ asm volatile (
+
+ /*
+ * v0 = all-zero word
+ * v1 = constant -8 / +8, copied into four words
+ * v2, v3 = current subkey
+ * v5 = Rcon (x4 words) (already shifted on big-endian)
+ * v6 = constant 8, copied into four words
+ * v8 = constant for byteswapping words
+ *
+ * The left two words of v3 are ignored.
+ */
+ vspltisw(0, 0)
+#if BR_POWER8_LE
+ vspltisw(1, -8)
+#else
+ vspltisw(1, 8)
+#endif
+ li(%[cc], 8)
+ lxvw4x(34, 0, %[key])
+ lxvw4x(35, %[cc], %[key])
+ vsldoi(3, 3, 0, 8)
+ vspltisw(5, 1)
+#if !BR_POWER8_LE
+ vsldoi(5, 5, 0, 3)
+#endif
+ vspltisw(6, 8)
+#if BR_POWER8_LE
+ lxvw4x(40, 0, %[idx2be])
+#endif
+
+ /*
+ * Loop must run 8 times. Each iteration produces 256
+ * bits of subkeys, with a 64-bit overlap.
+ */
+ li(%[cc], 8)
+ mtctr(%[cc])
+ li(%[cc], 16)
+ label(loop)
+
+ /*
+ * Last 6 words in v2:v3l. Compute next 6 words into
+ * v3r:v4.
+ */
+ vrlw(10, 3, 1)
+ vsbox(10, 10)
+ vxor(10, 10, 5)
+ vspltw(10, 10, 1)
+ vsldoi(11, 0, 10, 8)
+
+ vsldoi(12, 0, 2, 12)
+ vxor(12, 2, 12)
+ vsldoi(13, 0, 12, 12)
+ vxor(12, 12, 13)
+ vsldoi(13, 0, 12, 12)
+ vxor(12, 12, 13)
+
+ vspltw(13, 12, 3)
+ vxor(13, 13, 3)
+ vsldoi(14, 0, 3, 12)
+ vxor(13, 13, 14)
+
+ vsldoi(4, 12, 13, 8)
+ vsldoi(14, 0, 3, 8)
+ vsldoi(3, 14, 12, 8)
+
+ vxor(3, 3, 11)
+ vxor(4, 4, 10)
+
+ /*
+ * Update Rcon. Since for a 192-bit key, we use only 8
+ * such constants, we will not hit the field modulus,
+ * so a simple shift (addition) works well.
+ */
+ vadduwm(5, 5, 5)
+
+ /*
+ * Write out the two left 128-bit words
+ */
+#if BR_POWER8_LE
+ vperm(10, 2, 2, 8)
+ vperm(11, 3, 3, 8)
+ stxvw4x(42, 0, %[sk])
+ stxvw4x(43, %[cc], %[sk])
+#else
+ stxvw4x(34, 0, %[sk])
+ stxvw4x(35, %[cc], %[sk])
+#endif
+ addi(%[sk], %[sk], 24)
+
+ /*
+ * Shift words for next iteration.
+ */
+ vsldoi(2, 3, 4, 8)
+ vsldoi(3, 4, 0, 8)
+
+ bdnz(loop)
+
+ /*
+ * The loop wrote the first 50 subkey words, but we need
+ * to produce 52, so we must do one last write.
+ */
+#if BR_POWER8_LE
+ vperm(10, 2, 2, 8)
+ stxvw4x(42, 0, %[sk])
+#else
+ stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+ );
+}
+
+static void
+key_schedule_256(unsigned char *sk, const unsigned char *key)
+{
+ long cc;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc = 0;
+
+ /*
+ * We use the VSX instructions for loading and storing the
+ * key/subkeys, since they support unaligned accesses. The rest
+ * of the computation is VMX only. VMX register 0 is VSX
+ * register 32.
+ */
+ asm volatile (
+
+ /*
+ * v0 = all-zero word
+ * v1 = constant -8 / +8, copied into four words
+ * v2, v3 = current subkey
+ * v6 = Rcon (x4 words) (already shifted on big-endian)
+ * v7 = constant 8, copied into four words
+ * v8 = constant for byteswapping words
+ *
+ * The left two words of v3 are ignored.
+ */
+ vspltisw(0, 0)
+#if BR_POWER8_LE
+ vspltisw(1, -8)
+#else
+ vspltisw(1, 8)
+#endif
+ li(%[cc], 16)
+ lxvw4x(34, 0, %[key])
+ lxvw4x(35, %[cc], %[key])
+ vspltisw(6, 1)
+#if !BR_POWER8_LE
+ vsldoi(6, 6, 0, 3)
+#endif
+ vspltisw(7, 8)
+#if BR_POWER8_LE
+ lxvw4x(40, 0, %[idx2be])
+#endif
+
+ /*
+ * Loop must run 7 times. Each iteration produces two
+ * subkeys.
+ */
+ li(%[cc], 7)
+ mtctr(%[cc])
+ li(%[cc], 16)
+ label(loop)
+
+ /*
+ * Current words are in v2:v3. Compute next word in v4.
+ */
+ vrlw(10, 3, 1)
+ vsbox(10, 10)
+ vxor(10, 10, 6)
+ vspltw(10, 10, 3)
+
+ vsldoi(4, 0, 2, 12)
+ vxor(4, 2, 4)
+ vsldoi(5, 0, 4, 12)
+ vxor(4, 4, 5)
+ vsldoi(5, 0, 4, 12)
+ vxor(4, 4, 5)
+ vxor(4, 4, 10)
+
+ /*
+ * Then other word in v5.
+ */
+ vsbox(10, 4)
+ vspltw(10, 10, 3)
+
+ vsldoi(5, 0, 3, 12)
+ vxor(5, 3, 5)
+ vsldoi(11, 0, 5, 12)
+ vxor(5, 5, 11)
+ vsldoi(11, 0, 5, 12)
+ vxor(5, 5, 11)
+ vxor(5, 5, 10)
+
+ /*
+ * Update Rcon. Since for a 256-bit key, we use only 7
+ * such constants, we will not hit the field modulus,
+ * so a simple shift (addition) works well.
+ */
+ vadduwm(6, 6, 6)
+
+ /*
+ * Write out the two left 128-bit words
+ */
+#if BR_POWER8_LE
+ vperm(10, 2, 2, 8)
+ vperm(11, 3, 3, 8)
+ stxvw4x(42, 0, %[sk])
+ stxvw4x(43, %[cc], %[sk])
+#else
+ stxvw4x(34, 0, %[sk])
+ stxvw4x(35, %[cc], %[sk])
+#endif
+ addi(%[sk], %[sk], 32)
+
+ /*
+ * Replace v2:v3 with v4:v5.
+ */
+ vxor(2, 0, 4)
+ vxor(3, 0, 5)
+
+ bdnz(loop)
+
+ /*
+ * The loop wrote the first 14 subkeys, but we need 15,
+ * so we must do an extra write.
+ */
+#if BR_POWER8_LE
+ vperm(10, 2, 2, 8)
+ stxvw4x(42, 0, %[sk])
+#else
+ stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+ );
+}
+
+/* see inner.h */
+int
+br_aes_pwr8_supported(void)
+{
+ return 1;
+}
+
+/* see inner.h */
+unsigned
+br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
+{
+ switch (len) {
+ case 16:
+ key_schedule_128(sk, key);
+ return 10;
+ case 24:
+ key_schedule_192(sk, key);
+ return 12;
+ default:
+ key_schedule_256(sk, key);
+ return 14;
+ }
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcdec.c
new file mode 100644
index 00000000..e535ba6f
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcdec.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS 1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
+ ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcdec_128(const unsigned char *sk,
+ const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+ long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc0 = 0;
+ cc1 = 16;
+ cc2 = 32;
+ cc3 = 48;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v10
+ */
+ lxvw4x(32, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(33, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(34, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(35, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(36, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(37, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(38, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(39, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(40, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(41, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(42, %[cc0], %[sk])
+ li(%[cc0], 0)
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * Load IV into v24.
+ */
+ lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+ vperm(24, 24, 24, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Load next ciphertext words in v16..v19. Also save them
+ * in v20..v23.
+ */
+ lxvw4x(48, %[cc0], %[buf])
+ lxvw4x(49, %[cc1], %[buf])
+ lxvw4x(50, %[cc2], %[buf])
+ lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+ vand(20, 16, 16)
+ vand(21, 17, 17)
+ vand(22, 18, 18)
+ vand(23, 19, 19)
+
+ /*
+ * Decrypt the blocks.
+ */
+ vxor(16, 16, 10)
+ vxor(17, 17, 10)
+ vxor(18, 18, 10)
+ vxor(19, 19, 10)
+ vncipher(16, 16, 9)
+ vncipher(17, 17, 9)
+ vncipher(18, 18, 9)
+ vncipher(19, 19, 9)
+ vncipher(16, 16, 8)
+ vncipher(17, 17, 8)
+ vncipher(18, 18, 8)
+ vncipher(19, 19, 8)
+ vncipher(16, 16, 7)
+ vncipher(17, 17, 7)
+ vncipher(18, 18, 7)
+ vncipher(19, 19, 7)
+ vncipher(16, 16, 6)
+ vncipher(17, 17, 6)
+ vncipher(18, 18, 6)
+ vncipher(19, 19, 6)
+ vncipher(16, 16, 5)
+ vncipher(17, 17, 5)
+ vncipher(18, 18, 5)
+ vncipher(19, 19, 5)
+ vncipher(16, 16, 4)
+ vncipher(17, 17, 4)
+ vncipher(18, 18, 4)
+ vncipher(19, 19, 4)
+ vncipher(16, 16, 3)
+ vncipher(17, 17, 3)
+ vncipher(18, 18, 3)
+ vncipher(19, 19, 3)
+ vncipher(16, 16, 2)
+ vncipher(17, 17, 2)
+ vncipher(18, 18, 2)
+ vncipher(19, 19, 2)
+ vncipher(16, 16, 1)
+ vncipher(17, 17, 1)
+ vncipher(18, 18, 1)
+ vncipher(19, 19, 1)
+ vncipherlast(16, 16, 0)
+ vncipherlast(17, 17, 0)
+ vncipherlast(18, 18, 0)
+ vncipherlast(19, 19, 0)
+
+ /*
+ * XOR decrypted blocks with IV / previous block.
+ */
+ vxor(16, 16, 24)
+ vxor(17, 17, 20)
+ vxor(18, 18, 21)
+ vxor(19, 19, 22)
+
+ /*
+ * Store back result (with byteswap)
+ */
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+ stxvw4x(48, %[cc0], %[buf])
+ stxvw4x(49, %[cc1], %[buf])
+ stxvw4x(50, %[cc2], %[buf])
+ stxvw4x(51, %[cc3], %[buf])
+
+ /*
+ * Fourth encrypted block is IV for next run.
+ */
+ vand(24, 23, 23)
+
+ addi(%[buf], %[buf], 64)
+
+ bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+ [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "ctr", "memory"
+ );
+}
+
+static void
+cbcdec_192(const unsigned char *sk,
+ const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+ long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc0 = 0;
+ cc1 = 16;
+ cc2 = 32;
+ cc3 = 48;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v12
+ */
+ lxvw4x(32, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(33, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(34, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(35, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(36, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(37, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(38, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(39, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(40, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(41, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(42, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(43, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(44, %[cc0], %[sk])
+ li(%[cc0], 0)
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * Load IV into v24.
+ */
+ lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+ vperm(24, 24, 24, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Load next ciphertext words in v16..v19. Also save them
+ * in v20..v23.
+ */
+ lxvw4x(48, %[cc0], %[buf])
+ lxvw4x(49, %[cc1], %[buf])
+ lxvw4x(50, %[cc2], %[buf])
+ lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+ vand(20, 16, 16)
+ vand(21, 17, 17)
+ vand(22, 18, 18)
+ vand(23, 19, 19)
+
+ /*
+ * Decrypt the blocks.
+ */
+ vxor(16, 16, 12)
+ vxor(17, 17, 12)
+ vxor(18, 18, 12)
+ vxor(19, 19, 12)
+ vncipher(16, 16, 11)
+ vncipher(17, 17, 11)
+ vncipher(18, 18, 11)
+ vncipher(19, 19, 11)
+ vncipher(16, 16, 10)
+ vncipher(17, 17, 10)
+ vncipher(18, 18, 10)
+ vncipher(19, 19, 10)
+ vncipher(16, 16, 9)
+ vncipher(17, 17, 9)
+ vncipher(18, 18, 9)
+ vncipher(19, 19, 9)
+ vncipher(16, 16, 8)
+ vncipher(17, 17, 8)
+ vncipher(18, 18, 8)
+ vncipher(19, 19, 8)
+ vncipher(16, 16, 7)
+ vncipher(17, 17, 7)
+ vncipher(18, 18, 7)
+ vncipher(19, 19, 7)
+ vncipher(16, 16, 6)
+ vncipher(17, 17, 6)
+ vncipher(18, 18, 6)
+ vncipher(19, 19, 6)
+ vncipher(16, 16, 5)
+ vncipher(17, 17, 5)
+ vncipher(18, 18, 5)
+ vncipher(19, 19, 5)
+ vncipher(16, 16, 4)
+ vncipher(17, 17, 4)
+ vncipher(18, 18, 4)
+ vncipher(19, 19, 4)
+ vncipher(16, 16, 3)
+ vncipher(17, 17, 3)
+ vncipher(18, 18, 3)
+ vncipher(19, 19, 3)
+ vncipher(16, 16, 2)
+ vncipher(17, 17, 2)
+ vncipher(18, 18, 2)
+ vncipher(19, 19, 2)
+ vncipher(16, 16, 1)
+ vncipher(17, 17, 1)
+ vncipher(18, 18, 1)
+ vncipher(19, 19, 1)
+ vncipherlast(16, 16, 0)
+ vncipherlast(17, 17, 0)
+ vncipherlast(18, 18, 0)
+ vncipherlast(19, 19, 0)
+
+ /*
+ * XOR decrypted blocks with IV / previous block.
+ */
+ vxor(16, 16, 24)
+ vxor(17, 17, 20)
+ vxor(18, 18, 21)
+ vxor(19, 19, 22)
+
+ /*
+ * Store back result (with byteswap)
+ */
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+ stxvw4x(48, %[cc0], %[buf])
+ stxvw4x(49, %[cc1], %[buf])
+ stxvw4x(50, %[cc2], %[buf])
+ stxvw4x(51, %[cc3], %[buf])
+
+ /*
+ * Fourth encrypted block is IV for next run.
+ */
+ vand(24, 23, 23)
+
+ addi(%[buf], %[buf], 64)
+
+ bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+ [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "ctr", "memory"
+ );
+}
+
+static void
+cbcdec_256(const unsigned char *sk,
+ const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+ long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc0 = 0;
+ cc1 = 16;
+ cc2 = 32;
+ cc3 = 48;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v14
+ */
+ lxvw4x(32, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(33, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(34, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(35, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(36, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(37, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(38, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(39, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(40, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(41, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(42, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(43, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(44, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(45, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(46, %[cc0], %[sk])
+ li(%[cc0], 0)
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * Load IV into v24.
+ */
+ lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+ vperm(24, 24, 24, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Load next ciphertext words in v16..v19. Also save them
+ * in v20..v23.
+ */
+ lxvw4x(48, %[cc0], %[buf])
+ lxvw4x(49, %[cc1], %[buf])
+ lxvw4x(50, %[cc2], %[buf])
+ lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+ vand(20, 16, 16)
+ vand(21, 17, 17)
+ vand(22, 18, 18)
+ vand(23, 19, 19)
+
+ /*
+ * Decrypt the blocks.
+ */
+ vxor(16, 16, 14)
+ vxor(17, 17, 14)
+ vxor(18, 18, 14)
+ vxor(19, 19, 14)
+ vncipher(16, 16, 13)
+ vncipher(17, 17, 13)
+ vncipher(18, 18, 13)
+ vncipher(19, 19, 13)
+ vncipher(16, 16, 12)
+ vncipher(17, 17, 12)
+ vncipher(18, 18, 12)
+ vncipher(19, 19, 12)
+ vncipher(16, 16, 11)
+ vncipher(17, 17, 11)
+ vncipher(18, 18, 11)
+ vncipher(19, 19, 11)
+ vncipher(16, 16, 10)
+ vncipher(17, 17, 10)
+ vncipher(18, 18, 10)
+ vncipher(19, 19, 10)
+ vncipher(16, 16, 9)
+ vncipher(17, 17, 9)
+ vncipher(18, 18, 9)
+ vncipher(19, 19, 9)
+ vncipher(16, 16, 8)
+ vncipher(17, 17, 8)
+ vncipher(18, 18, 8)
+ vncipher(19, 19, 8)
+ vncipher(16, 16, 7)
+ vncipher(17, 17, 7)
+ vncipher(18, 18, 7)
+ vncipher(19, 19, 7)
+ vncipher(16, 16, 6)
+ vncipher(17, 17, 6)
+ vncipher(18, 18, 6)
+ vncipher(19, 19, 6)
+ vncipher(16, 16, 5)
+ vncipher(17, 17, 5)
+ vncipher(18, 18, 5)
+ vncipher(19, 19, 5)
+ vncipher(16, 16, 4)
+ vncipher(17, 17, 4)
+ vncipher(18, 18, 4)
+ vncipher(19, 19, 4)
+ vncipher(16, 16, 3)
+ vncipher(17, 17, 3)
+ vncipher(18, 18, 3)
+ vncipher(19, 19, 3)
+ vncipher(16, 16, 2)
+ vncipher(17, 17, 2)
+ vncipher(18, 18, 2)
+ vncipher(19, 19, 2)
+ vncipher(16, 16, 1)
+ vncipher(17, 17, 1)
+ vncipher(18, 18, 1)
+ vncipher(19, 19, 1)
+ vncipherlast(16, 16, 0)
+ vncipherlast(17, 17, 0)
+ vncipherlast(18, 18, 0)
+ vncipherlast(19, 19, 0)
+
+ /*
+ * XOR decrypted blocks with IV / previous block.
+ */
+ vxor(16, 16, 24)
+ vxor(17, 17, 20)
+ vxor(18, 18, 21)
+ vxor(19, 19, 22)
+
+ /*
+ * Store back result (with byteswap)
+ */
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+ stxvw4x(48, %[cc0], %[buf])
+ stxvw4x(49, %[cc1], %[buf])
+ stxvw4x(50, %[cc2], %[buf])
+ stxvw4x(51, %[cc3], %[buf])
+
+ /*
+ * Fourth encrypted block is IV for next run.
+ */
+ vand(24, 23, 23)
+
+ addi(%[buf], %[buf], 64)
+
+ bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+ [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "ctr", "memory"
+ );
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char nextiv[16];
+ unsigned char *buf;
+
+ if (len == 0) {
+ return;
+ }
+ buf = data;
+ memcpy(nextiv, buf + len - 16, 16);
+ if (len >= 64) {
+ size_t num_blocks;
+ unsigned char tmp[16];
+
+ num_blocks = (len >> 4) & ~(size_t)3;
+ memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
+ switch (ctx->num_rounds) {
+ case 10:
+ cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
+ break;
+ case 12:
+ cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
+ break;
+ default:
+ cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
+ break;
+ }
+ buf += num_blocks << 4;
+ len &= 63;
+ memcpy(iv, tmp, 16);
+ }
+ if (len > 0) {
+ unsigned char tmp[64];
+
+ memcpy(tmp, buf, len);
+ memset(tmp + len, 0, (sizeof tmp) - len);
+ switch (ctx->num_rounds) {
+ case 10:
+ cbcdec_128(ctx->skey.skni, iv, tmp, 4);
+ break;
+ case 12:
+ cbcdec_192(ctx->skey.skni, iv, tmp, 4);
+ break;
+ default:
+ cbcdec_256(ctx->skey.skni, iv, tmp, 4);
+ break;
+ }
+ memcpy(buf, tmp, len);
+ }
+ memcpy(iv, nextiv, 16);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
+ sizeof(br_aes_pwr8_cbcdec_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_aes_pwr8_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_aes_pwr8_cbcdec_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+ return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcenc.c
new file mode 100644
index 00000000..00f8eca7
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_cbcenc.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS 1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
+ ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcenc_128(const unsigned char *sk,
+ const unsigned char *iv, unsigned char *buf, size_t len)
+{
+ long cc;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc = 0;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v10
+ */
+ lxvw4x(32, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(33, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(34, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(35, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(36, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(37, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(38, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(39, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(40, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(41, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(42, %[cc], %[sk])
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * Load IV into v16.
+ */
+ lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Load next plaintext word and XOR with current IV.
+ */
+ lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+ vperm(17, 17, 17, 15)
+#endif
+ vxor(16, 16, 17)
+
+ /*
+ * Encrypt the block.
+ */
+ vxor(16, 16, 0)
+ vcipher(16, 16, 1)
+ vcipher(16, 16, 2)
+ vcipher(16, 16, 3)
+ vcipher(16, 16, 4)
+ vcipher(16, 16, 5)
+ vcipher(16, 16, 6)
+ vcipher(16, 16, 7)
+ vcipher(16, 16, 8)
+ vcipher(16, 16, 9)
+ vcipherlast(16, 16, 10)
+
+ /*
+ * Store back result (with byteswap)
+ */
+#if BR_POWER8_LE
+ vperm(17, 16, 16, 15)
+ stxvw4x(49, 0, %[buf])
+#else
+ stxvw4x(48, 0, %[buf])
+#endif
+ addi(%[buf], %[buf], 16)
+
+ bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "ctr", "memory"
+ );
+}
+
+static void
+cbcenc_192(const unsigned char *sk,
+ const unsigned char *iv, unsigned char *buf, size_t len)
+{
+ long cc;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc = 0;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v12
+ */
+ lxvw4x(32, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(33, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(34, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(35, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(36, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(37, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(38, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(39, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(40, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(41, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(42, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(43, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(44, %[cc], %[sk])
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * Load IV into v16.
+ */
+ lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Load next plaintext word and XOR with current IV.
+ */
+ lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+ vperm(17, 17, 17, 15)
+#endif
+ vxor(16, 16, 17)
+
+ /*
+ * Encrypt the block.
+ */
+ vxor(16, 16, 0)
+ vcipher(16, 16, 1)
+ vcipher(16, 16, 2)
+ vcipher(16, 16, 3)
+ vcipher(16, 16, 4)
+ vcipher(16, 16, 5)
+ vcipher(16, 16, 6)
+ vcipher(16, 16, 7)
+ vcipher(16, 16, 8)
+ vcipher(16, 16, 9)
+ vcipher(16, 16, 10)
+ vcipher(16, 16, 11)
+ vcipherlast(16, 16, 12)
+
+ /*
+ * Store back result (with byteswap)
+ */
+#if BR_POWER8_LE
+ vperm(17, 16, 16, 15)
+ stxvw4x(49, 0, %[buf])
+#else
+ stxvw4x(48, 0, %[buf])
+#endif
+ addi(%[buf], %[buf], 16)
+
+ bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "ctr", "memory"
+ );
+}
+
+static void
+cbcenc_256(const unsigned char *sk,
+ const unsigned char *iv, unsigned char *buf, size_t len)
+{
+ long cc;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+
+ cc = 0;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v14
+ */
+ lxvw4x(32, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(33, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(34, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(35, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(36, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(37, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(38, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(39, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(40, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(41, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(42, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(43, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(44, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(45, %[cc], %[sk])
+ addi(%[cc], %[cc], 16)
+ lxvw4x(46, %[cc], %[sk])
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * Load IV into v16.
+ */
+ lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Load next plaintext word and XOR with current IV.
+ */
+ lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+ vperm(17, 17, 17, 15)
+#endif
+ vxor(16, 16, 17)
+
+ /*
+ * Encrypt the block.
+ */
+ vxor(16, 16, 0)
+ vcipher(16, 16, 1)
+ vcipher(16, 16, 2)
+ vcipher(16, 16, 3)
+ vcipher(16, 16, 4)
+ vcipher(16, 16, 5)
+ vcipher(16, 16, 6)
+ vcipher(16, 16, 7)
+ vcipher(16, 16, 8)
+ vcipher(16, 16, 9)
+ vcipher(16, 16, 10)
+ vcipher(16, 16, 11)
+ vcipher(16, 16, 12)
+ vcipher(16, 16, 13)
+ vcipherlast(16, 16, 14)
+
+ /*
+ * Store back result (with byteswap)
+ */
+#if BR_POWER8_LE
+ vperm(17, 16, 16, 15)
+ stxvw4x(49, 0, %[buf])
+#else
+ stxvw4x(48, 0, %[buf])
+#endif
+ addi(%[buf], %[buf], 16)
+
+ bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "ctr", "memory"
+ );
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ if (len > 0) {
+ switch (ctx->num_rounds) {
+ case 10:
+ cbcenc_128(ctx->skey.skni, iv, data, len);
+ break;
+ case 12:
+ cbcenc_192(ctx->skey.skni, iv, data, len);
+ break;
+ default:
+ cbcenc_256(ctx->skey.skni, iv, data, len);
+ break;
+ }
+ memcpy(iv, (unsigned char *)data + (len - 16), 16);
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
+ sizeof(br_aes_pwr8_cbcenc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_aes_pwr8_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_aes_pwr8_cbcenc_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+ return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctr.c b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctr.c
new file mode 100644
index 00000000..f5d20c0b
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctr.c
@@ -0,0 +1,717 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS 1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_pwr8_ctr_vtable;
+ ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
+ unsigned char *buf, size_t num_blocks)
+{
+ long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+ static const uint32_t ctrinc[] = {
+ 0, 0, 0, 4
+ };
+
+ cc0 = 0;
+ cc1 = 16;
+ cc2 = 32;
+ cc3 = 48;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v10
+ */
+ lxvw4x(32, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(33, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(34, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(35, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(36, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(37, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(38, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(39, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(40, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(41, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(42, %[cc0], %[sk])
+ li(%[cc0], 0)
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * v28 = increment for IV counter.
+ */
+ lxvw4x(60, 0, %[ctrinc])
+
+ /*
+ * Load IV into v16..v19
+ */
+ lxvw4x(48, %[cc0], %[ivbuf])
+ lxvw4x(49, %[cc1], %[ivbuf])
+ lxvw4x(50, %[cc2], %[ivbuf])
+ lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Compute next IV into v24..v27
+ */
+ vadduwm(24, 16, 28)
+ vadduwm(25, 17, 28)
+ vadduwm(26, 18, 28)
+ vadduwm(27, 19, 28)
+
+ /*
+ * Load next data blocks. We do this early on but we
+ * won't need them until IV encryption is done.
+ */
+ lxvw4x(52, %[cc0], %[buf])
+ lxvw4x(53, %[cc1], %[buf])
+ lxvw4x(54, %[cc2], %[buf])
+ lxvw4x(55, %[cc3], %[buf])
+
+ /*
+ * Encrypt the current IV.
+ */
+ vxor(16, 16, 0)
+ vxor(17, 17, 0)
+ vxor(18, 18, 0)
+ vxor(19, 19, 0)
+ vcipher(16, 16, 1)
+ vcipher(17, 17, 1)
+ vcipher(18, 18, 1)
+ vcipher(19, 19, 1)
+ vcipher(16, 16, 2)
+ vcipher(17, 17, 2)
+ vcipher(18, 18, 2)
+ vcipher(19, 19, 2)
+ vcipher(16, 16, 3)
+ vcipher(17, 17, 3)
+ vcipher(18, 18, 3)
+ vcipher(19, 19, 3)
+ vcipher(16, 16, 4)
+ vcipher(17, 17, 4)
+ vcipher(18, 18, 4)
+ vcipher(19, 19, 4)
+ vcipher(16, 16, 5)
+ vcipher(17, 17, 5)
+ vcipher(18, 18, 5)
+ vcipher(19, 19, 5)
+ vcipher(16, 16, 6)
+ vcipher(17, 17, 6)
+ vcipher(18, 18, 6)
+ vcipher(19, 19, 6)
+ vcipher(16, 16, 7)
+ vcipher(17, 17, 7)
+ vcipher(18, 18, 7)
+ vcipher(19, 19, 7)
+ vcipher(16, 16, 8)
+ vcipher(17, 17, 8)
+ vcipher(18, 18, 8)
+ vcipher(19, 19, 8)
+ vcipher(16, 16, 9)
+ vcipher(17, 17, 9)
+ vcipher(18, 18, 9)
+ vcipher(19, 19, 9)
+ vcipherlast(16, 16, 10)
+ vcipherlast(17, 17, 10)
+ vcipherlast(18, 18, 10)
+ vcipherlast(19, 19, 10)
+
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+
+ /*
+ * Load next plaintext word and XOR with encrypted IV.
+ */
+ vxor(16, 20, 16)
+ vxor(17, 21, 17)
+ vxor(18, 22, 18)
+ vxor(19, 23, 19)
+ stxvw4x(48, %[cc0], %[buf])
+ stxvw4x(49, %[cc1], %[buf])
+ stxvw4x(50, %[cc2], %[buf])
+ stxvw4x(51, %[cc3], %[buf])
+
+ addi(%[buf], %[buf], 64)
+
+ /*
+ * Update IV.
+ */
+ vand(16, 24, 24)
+ vand(17, 25, 25)
+ vand(18, 26, 26)
+ vand(19, 27, 27)
+
+ bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+ [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+ [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "ctr", "memory"
+ );
+}
+
+static void
+ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
+ unsigned char *buf, size_t num_blocks)
+{
+ long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+ static const uint32_t ctrinc[] = {
+ 0, 0, 0, 4
+ };
+
+ cc0 = 0;
+ cc1 = 16;
+ cc2 = 32;
+ cc3 = 48;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v12
+ */
+ lxvw4x(32, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(33, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(34, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(35, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(36, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(37, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(38, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(39, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(40, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(41, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(42, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(43, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(44, %[cc0], %[sk])
+ li(%[cc0], 0)
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * v28 = increment for IV counter.
+ */
+ lxvw4x(60, 0, %[ctrinc])
+
+ /*
+ * Load IV into v16..v19
+ */
+ lxvw4x(48, %[cc0], %[ivbuf])
+ lxvw4x(49, %[cc1], %[ivbuf])
+ lxvw4x(50, %[cc2], %[ivbuf])
+ lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Compute next IV into v24..v27
+ */
+ vadduwm(24, 16, 28)
+ vadduwm(25, 17, 28)
+ vadduwm(26, 18, 28)
+ vadduwm(27, 19, 28)
+
+ /*
+ * Load next data blocks. We do this early on but we
+ * won't need them until IV encryption is done.
+ */
+ lxvw4x(52, %[cc0], %[buf])
+ lxvw4x(53, %[cc1], %[buf])
+ lxvw4x(54, %[cc2], %[buf])
+ lxvw4x(55, %[cc3], %[buf])
+
+ /*
+ * Encrypt the current IV.
+ */
+ vxor(16, 16, 0)
+ vxor(17, 17, 0)
+ vxor(18, 18, 0)
+ vxor(19, 19, 0)
+ vcipher(16, 16, 1)
+ vcipher(17, 17, 1)
+ vcipher(18, 18, 1)
+ vcipher(19, 19, 1)
+ vcipher(16, 16, 2)
+ vcipher(17, 17, 2)
+ vcipher(18, 18, 2)
+ vcipher(19, 19, 2)
+ vcipher(16, 16, 3)
+ vcipher(17, 17, 3)
+ vcipher(18, 18, 3)
+ vcipher(19, 19, 3)
+ vcipher(16, 16, 4)
+ vcipher(17, 17, 4)
+ vcipher(18, 18, 4)
+ vcipher(19, 19, 4)
+ vcipher(16, 16, 5)
+ vcipher(17, 17, 5)
+ vcipher(18, 18, 5)
+ vcipher(19, 19, 5)
+ vcipher(16, 16, 6)
+ vcipher(17, 17, 6)
+ vcipher(18, 18, 6)
+ vcipher(19, 19, 6)
+ vcipher(16, 16, 7)
+ vcipher(17, 17, 7)
+ vcipher(18, 18, 7)
+ vcipher(19, 19, 7)
+ vcipher(16, 16, 8)
+ vcipher(17, 17, 8)
+ vcipher(18, 18, 8)
+ vcipher(19, 19, 8)
+ vcipher(16, 16, 9)
+ vcipher(17, 17, 9)
+ vcipher(18, 18, 9)
+ vcipher(19, 19, 9)
+ vcipher(16, 16, 10)
+ vcipher(17, 17, 10)
+ vcipher(18, 18, 10)
+ vcipher(19, 19, 10)
+ vcipher(16, 16, 11)
+ vcipher(17, 17, 11)
+ vcipher(18, 18, 11)
+ vcipher(19, 19, 11)
+ vcipherlast(16, 16, 12)
+ vcipherlast(17, 17, 12)
+ vcipherlast(18, 18, 12)
+ vcipherlast(19, 19, 12)
+
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+
+ /*
+ * Load next plaintext word and XOR with encrypted IV.
+ */
+ vxor(16, 20, 16)
+ vxor(17, 21, 17)
+ vxor(18, 22, 18)
+ vxor(19, 23, 19)
+ stxvw4x(48, %[cc0], %[buf])
+ stxvw4x(49, %[cc1], %[buf])
+ stxvw4x(50, %[cc2], %[buf])
+ stxvw4x(51, %[cc3], %[buf])
+
+ addi(%[buf], %[buf], 64)
+
+ /*
+ * Update IV.
+ */
+ vand(16, 24, 24)
+ vand(17, 25, 25)
+ vand(18, 26, 26)
+ vand(19, 27, 27)
+
+ bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+ [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+ [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "ctr", "memory"
+ );
+}
+
+static void
+ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
+ unsigned char *buf, size_t num_blocks)
+{
+ long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+ static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+ };
+#endif
+ static const uint32_t ctrinc[] = {
+ 0, 0, 0, 4
+ };
+
+ cc0 = 0;
+ cc1 = 16;
+ cc2 = 32;
+ cc3 = 48;
+ asm volatile (
+
+ /*
+ * Load subkeys into v0..v14
+ */
+ lxvw4x(32, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(33, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(34, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(35, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(36, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(37, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(38, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(39, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(40, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(41, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(42, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(43, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(44, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(45, %[cc0], %[sk])
+ addi(%[cc0], %[cc0], 16)
+ lxvw4x(46, %[cc0], %[sk])
+ li(%[cc0], 0)
+
+#if BR_POWER8_LE
+ /*
+ * v15 = constant for byteswapping words
+ */
+ lxvw4x(47, 0, %[idx2be])
+#endif
+ /*
+ * v28 = increment for IV counter.
+ */
+ lxvw4x(60, 0, %[ctrinc])
+
+ /*
+ * Load IV into v16..v19
+ */
+ lxvw4x(48, %[cc0], %[ivbuf])
+ lxvw4x(49, %[cc1], %[ivbuf])
+ lxvw4x(50, %[cc2], %[ivbuf])
+ lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+
+ mtctr(%[num_blocks])
+ label(loop)
+ /*
+ * Compute next IV into v24..v27
+ */
+ vadduwm(24, 16, 28)
+ vadduwm(25, 17, 28)
+ vadduwm(26, 18, 28)
+ vadduwm(27, 19, 28)
+
+ /*
+ * Load next data blocks. We do this early on but we
+ * won't need them until IV encryption is done.
+ */
+ lxvw4x(52, %[cc0], %[buf])
+ lxvw4x(53, %[cc1], %[buf])
+ lxvw4x(54, %[cc2], %[buf])
+ lxvw4x(55, %[cc3], %[buf])
+
+ /*
+ * Encrypt the current IV.
+ */
+ vxor(16, 16, 0)
+ vxor(17, 17, 0)
+ vxor(18, 18, 0)
+ vxor(19, 19, 0)
+ vcipher(16, 16, 1)
+ vcipher(17, 17, 1)
+ vcipher(18, 18, 1)
+ vcipher(19, 19, 1)
+ vcipher(16, 16, 2)
+ vcipher(17, 17, 2)
+ vcipher(18, 18, 2)
+ vcipher(19, 19, 2)
+ vcipher(16, 16, 3)
+ vcipher(17, 17, 3)
+ vcipher(18, 18, 3)
+ vcipher(19, 19, 3)
+ vcipher(16, 16, 4)
+ vcipher(17, 17, 4)
+ vcipher(18, 18, 4)
+ vcipher(19, 19, 4)
+ vcipher(16, 16, 5)
+ vcipher(17, 17, 5)
+ vcipher(18, 18, 5)
+ vcipher(19, 19, 5)
+ vcipher(16, 16, 6)
+ vcipher(17, 17, 6)
+ vcipher(18, 18, 6)
+ vcipher(19, 19, 6)
+ vcipher(16, 16, 7)
+ vcipher(17, 17, 7)
+ vcipher(18, 18, 7)
+ vcipher(19, 19, 7)
+ vcipher(16, 16, 8)
+ vcipher(17, 17, 8)
+ vcipher(18, 18, 8)
+ vcipher(19, 19, 8)
+ vcipher(16, 16, 9)
+ vcipher(17, 17, 9)
+ vcipher(18, 18, 9)
+ vcipher(19, 19, 9)
+ vcipher(16, 16, 10)
+ vcipher(17, 17, 10)
+ vcipher(18, 18, 10)
+ vcipher(19, 19, 10)
+ vcipher(16, 16, 11)
+ vcipher(17, 17, 11)
+ vcipher(18, 18, 11)
+ vcipher(19, 19, 11)
+ vcipher(16, 16, 12)
+ vcipher(17, 17, 12)
+ vcipher(18, 18, 12)
+ vcipher(19, 19, 12)
+ vcipher(16, 16, 13)
+ vcipher(17, 17, 13)
+ vcipher(18, 18, 13)
+ vcipher(19, 19, 13)
+ vcipherlast(16, 16, 14)
+ vcipherlast(17, 17, 14)
+ vcipherlast(18, 18, 14)
+ vcipherlast(19, 19, 14)
+
+#if BR_POWER8_LE
+ vperm(16, 16, 16, 15)
+ vperm(17, 17, 17, 15)
+ vperm(18, 18, 18, 15)
+ vperm(19, 19, 19, 15)
+#endif
+
+ /*
+ * Load next plaintext word and XOR with encrypted IV.
+ */
+ vxor(16, 20, 16)
+ vxor(17, 21, 17)
+ vxor(18, 22, 18)
+ vxor(19, 23, 19)
+ stxvw4x(48, %[cc0], %[buf])
+ stxvw4x(49, %[cc1], %[buf])
+ stxvw4x(50, %[cc2], %[buf])
+ stxvw4x(51, %[cc3], %[buf])
+
+ addi(%[buf], %[buf], 64)
+
+ /*
+ * Update IV.
+ */
+ vand(16, 24, 24)
+ vand(17, 25, 25)
+ vand(18, 26, 26)
+ vand(19, 27, 27)
+
+ bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+ [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+ [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+ , [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "ctr", "memory"
+ );
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned char ivbuf[64];
+
+ buf = data;
+ memcpy(ivbuf + 0, iv, 12);
+ memcpy(ivbuf + 16, iv, 12);
+ memcpy(ivbuf + 32, iv, 12);
+ memcpy(ivbuf + 48, iv, 12);
+ if (len >= 64) {
+ br_enc32be(ivbuf + 12, cc + 0);
+ br_enc32be(ivbuf + 28, cc + 1);
+ br_enc32be(ivbuf + 44, cc + 2);
+ br_enc32be(ivbuf + 60, cc + 3);
+ switch (ctx->num_rounds) {
+ case 10:
+ ctr_128(ctx->skey.skni, ivbuf, buf,
+ (len >> 4) & ~(size_t)3);
+ break;
+ case 12:
+ ctr_192(ctx->skey.skni, ivbuf, buf,
+ (len >> 4) & ~(size_t)3);
+ break;
+ default:
+ ctr_256(ctx->skey.skni, ivbuf, buf,
+ (len >> 4) & ~(size_t)3);
+ break;
+ }
+ cc += (len >> 4) & ~(size_t)3;
+ buf += len & ~(size_t)63;
+ len &= 63;
+ }
+ if (len > 0) {
+ unsigned char tmp[64];
+
+ memcpy(tmp, buf, len);
+ memset(tmp + len, 0, (sizeof tmp) - len);
+ br_enc32be(ivbuf + 12, cc + 0);
+ br_enc32be(ivbuf + 28, cc + 1);
+ br_enc32be(ivbuf + 44, cc + 2);
+ br_enc32be(ivbuf + 60, cc + 3);
+ switch (ctx->num_rounds) {
+ case 10:
+ ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
+ break;
+ case 12:
+ ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
+ break;
+ default:
+ ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
+ break;
+ }
+ memcpy(buf, tmp, len);
+ cc += (len + 15) >> 4;
+ }
+ return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
+ sizeof(br_aes_pwr8_ctr_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctr_class **, const void *, size_t))
+ &br_aes_pwr8_ctr_init,
+ (uint32_t (*)(const br_block_ctr_class *const *,
+ const void *, uint32_t, void *, size_t))
+ &br_aes_pwr8_ctr_run
+};
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+ return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctrcbc.c b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctrcbc.c
new file mode 100644
index 00000000..a67d30b6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_pwr8_ctrcbc.c
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS 1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+ return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
+ ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+/*
+ * Register conventions for CTR + CBC-MAC:
+ *
+ * AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ * Register v15 contains the byteswap index register (little-endian only)
+ * Register v16 contains the CTR counter value
+ * Register v17 contains the CBC-MAC current value
+ * Registers v18 to v27 are scratch
+ * Counter increment uses v28, v29 and v30
+ *
+ * For CTR alone:
+ *
+ * AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ * Register v15 contains the byteswap index register (little-endian only)
+ * Registers v16 to v19 contain the CTR counter values (four blocks)
+ * Registers v20 to v27 are scratch
+ * Counter increment uses v28, v29 and v30
+ */
+
+#define LOAD_SUBKEYS_128 \
+ lxvw4x(32, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(33, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(34, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(35, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(36, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(37, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(38, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(39, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(40, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(41, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(42, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_192 \
+ LOAD_SUBKEYS_128 \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(43, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(44, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_256 \
+ LOAD_SUBKEYS_192 \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(45, %[cc], %[sk]) \
+ addi(%[cc], %[cc], 16) \
+ lxvw4x(46, %[cc], %[sk])
+
+#define BLOCK_ENCRYPT_128(x) \
+ vxor(x, x, 0) \
+ vcipher(x, x, 1) \
+ vcipher(x, x, 2) \
+ vcipher(x, x, 3) \
+ vcipher(x, x, 4) \
+ vcipher(x, x, 5) \
+ vcipher(x, x, 6) \
+ vcipher(x, x, 7) \
+ vcipher(x, x, 8) \
+ vcipher(x, x, 9) \
+ vcipherlast(x, x, 10)
+
+#define BLOCK_ENCRYPT_192(x) \
+ vxor(x, x, 0) \
+ vcipher(x, x, 1) \
+ vcipher(x, x, 2) \
+ vcipher(x, x, 3) \
+ vcipher(x, x, 4) \
+ vcipher(x, x, 5) \
+ vcipher(x, x, 6) \
+ vcipher(x, x, 7) \
+ vcipher(x, x, 8) \
+ vcipher(x, x, 9) \
+ vcipher(x, x, 10) \
+ vcipher(x, x, 11) \
+ vcipherlast(x, x, 12)
+
+#define BLOCK_ENCRYPT_256(x) \
+ vxor(x, x, 0) \
+ vcipher(x, x, 1) \
+ vcipher(x, x, 2) \
+ vcipher(x, x, 3) \
+ vcipher(x, x, 4) \
+ vcipher(x, x, 5) \
+ vcipher(x, x, 6) \
+ vcipher(x, x, 7) \
+ vcipher(x, x, 8) \
+ vcipher(x, x, 9) \
+ vcipher(x, x, 10) \
+ vcipher(x, x, 11) \
+ vcipher(x, x, 12) \
+ vcipher(x, x, 13) \
+ vcipherlast(x, x, 14)
+
+#define BLOCK_ENCRYPT_X2_128(x, y) \
+ vxor(x, x, 0) \
+ vxor(y, y, 0) \
+ vcipher(x, x, 1) \
+ vcipher(y, y, 1) \
+ vcipher(x, x, 2) \
+ vcipher(y, y, 2) \
+ vcipher(x, x, 3) \
+ vcipher(y, y, 3) \
+ vcipher(x, x, 4) \
+ vcipher(y, y, 4) \
+ vcipher(x, x, 5) \
+ vcipher(y, y, 5) \
+ vcipher(x, x, 6) \
+ vcipher(y, y, 6) \
+ vcipher(x, x, 7) \
+ vcipher(y, y, 7) \
+ vcipher(x, x, 8) \
+ vcipher(y, y, 8) \
+ vcipher(x, x, 9) \
+ vcipher(y, y, 9) \
+ vcipherlast(x, x, 10) \
+ vcipherlast(y, y, 10)
+
+#define BLOCK_ENCRYPT_X2_192(x, y) \
+ vxor(x, x, 0) \
+ vxor(y, y, 0) \
+ vcipher(x, x, 1) \
+ vcipher(y, y, 1) \
+ vcipher(x, x, 2) \
+ vcipher(y, y, 2) \
+ vcipher(x, x, 3) \
+ vcipher(y, y, 3) \
+ vcipher(x, x, 4) \
+ vcipher(y, y, 4) \
+ vcipher(x, x, 5) \
+ vcipher(y, y, 5) \
+ vcipher(x, x, 6) \
+ vcipher(y, y, 6) \
+ vcipher(x, x, 7) \
+ vcipher(y, y, 7) \
+ vcipher(x, x, 8) \
+ vcipher(y, y, 8) \
+ vcipher(x, x, 9) \
+ vcipher(y, y, 9) \
+ vcipher(x, x, 10) \
+ vcipher(y, y, 10) \
+ vcipher(x, x, 11) \
+ vcipher(y, y, 11) \
+ vcipherlast(x, x, 12) \
+ vcipherlast(y, y, 12)
+
+#define BLOCK_ENCRYPT_X2_256(x, y) \
+ vxor(x, x, 0) \
+ vxor(y, y, 0) \
+ vcipher(x, x, 1) \
+ vcipher(y, y, 1) \
+ vcipher(x, x, 2) \
+ vcipher(y, y, 2) \
+ vcipher(x, x, 3) \
+ vcipher(y, y, 3) \
+ vcipher(x, x, 4) \
+ vcipher(y, y, 4) \
+ vcipher(x, x, 5) \
+ vcipher(y, y, 5) \
+ vcipher(x, x, 6) \
+ vcipher(y, y, 6) \
+ vcipher(x, x, 7) \
+ vcipher(y, y, 7) \
+ vcipher(x, x, 8) \
+ vcipher(y, y, 8) \
+ vcipher(x, x, 9) \
+ vcipher(y, y, 9) \
+ vcipher(x, x, 10) \
+ vcipher(y, y, 10) \
+ vcipher(x, x, 11) \
+ vcipher(y, y, 11) \
+ vcipher(x, x, 12) \
+ vcipher(y, y, 12) \
+ vcipher(x, x, 13) \
+ vcipher(y, y, 13) \
+ vcipherlast(x, x, 14) \
+ vcipherlast(y, y, 14)
+
+#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
+ vxor(x0, x0, 0) \
+ vxor(x1, x1, 0) \
+ vxor(x2, x2, 0) \
+ vxor(x3, x3, 0) \
+ vcipher(x0, x0, 1) \
+ vcipher(x1, x1, 1) \
+ vcipher(x2, x2, 1) \
+ vcipher(x3, x3, 1) \
+ vcipher(x0, x0, 2) \
+ vcipher(x1, x1, 2) \
+ vcipher(x2, x2, 2) \
+ vcipher(x3, x3, 2) \
+ vcipher(x0, x0, 3) \
+ vcipher(x1, x1, 3) \
+ vcipher(x2, x2, 3) \
+ vcipher(x3, x3, 3) \
+ vcipher(x0, x0, 4) \
+ vcipher(x1, x1, 4) \
+ vcipher(x2, x2, 4) \
+ vcipher(x3, x3, 4) \
+ vcipher(x0, x0, 5) \
+ vcipher(x1, x1, 5) \
+ vcipher(x2, x2, 5) \
+ vcipher(x3, x3, 5) \
+ vcipher(x0, x0, 6) \
+ vcipher(x1, x1, 6) \
+ vcipher(x2, x2, 6) \
+ vcipher(x3, x3, 6) \
+ vcipher(x0, x0, 7) \
+ vcipher(x1, x1, 7) \
+ vcipher(x2, x2, 7) \
+ vcipher(x3, x3, 7) \
+ vcipher(x0, x0, 8) \
+ vcipher(x1, x1, 8) \
+ vcipher(x2, x2, 8) \
+ vcipher(x3, x3, 8) \
+ vcipher(x0, x0, 9) \
+ vcipher(x1, x1, 9) \
+ vcipher(x2, x2, 9) \
+ vcipher(x3, x3, 9) \
+ vcipherlast(x0, x0, 10) \
+ vcipherlast(x1, x1, 10) \
+ vcipherlast(x2, x2, 10) \
+ vcipherlast(x3, x3, 10)
+
+#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
+ vxor(x0, x0, 0) \
+ vxor(x1, x1, 0) \
+ vxor(x2, x2, 0) \
+ vxor(x3, x3, 0) \
+ vcipher(x0, x0, 1) \
+ vcipher(x1, x1, 1) \
+ vcipher(x2, x2, 1) \
+ vcipher(x3, x3, 1) \
+ vcipher(x0, x0, 2) \
+ vcipher(x1, x1, 2) \
+ vcipher(x2, x2, 2) \
+ vcipher(x3, x3, 2) \
+ vcipher(x0, x0, 3) \
+ vcipher(x1, x1, 3) \
+ vcipher(x2, x2, 3) \
+ vcipher(x3, x3, 3) \
+ vcipher(x0, x0, 4) \
+ vcipher(x1, x1, 4) \
+ vcipher(x2, x2, 4) \
+ vcipher(x3, x3, 4) \
+ vcipher(x0, x0, 5) \
+ vcipher(x1, x1, 5) \
+ vcipher(x2, x2, 5) \
+ vcipher(x3, x3, 5) \
+ vcipher(x0, x0, 6) \
+ vcipher(x1, x1, 6) \
+ vcipher(x2, x2, 6) \
+ vcipher(x3, x3, 6) \
+ vcipher(x0, x0, 7) \
+ vcipher(x1, x1, 7) \
+ vcipher(x2, x2, 7) \
+ vcipher(x3, x3, 7) \
+ vcipher(x0, x0, 8) \
+ vcipher(x1, x1, 8) \
+ vcipher(x2, x2, 8) \
+ vcipher(x3, x3, 8) \
+ vcipher(x0, x0, 9) \
+ vcipher(x1, x1, 9) \
+ vcipher(x2, x2, 9) \
+ vcipher(x3, x3, 9) \
+ vcipher(x0, x0, 10) \
+ vcipher(x1, x1, 10) \
+ vcipher(x2, x2, 10) \
+ vcipher(x3, x3, 10) \
+ vcipher(x0, x0, 11) \
+ vcipher(x1, x1, 11) \
+ vcipher(x2, x2, 11) \
+ vcipher(x3, x3, 11) \
+ vcipherlast(x0, x0, 12) \
+ vcipherlast(x1, x1, 12) \
+ vcipherlast(x2, x2, 12) \
+ vcipherlast(x3, x3, 12)
+
+#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
+ vxor(x0, x0, 0) \
+ vxor(x1, x1, 0) \
+ vxor(x2, x2, 0) \
+ vxor(x3, x3, 0) \
+ vcipher(x0, x0, 1) \
+ vcipher(x1, x1, 1) \
+ vcipher(x2, x2, 1) \
+ vcipher(x3, x3, 1) \
+ vcipher(x0, x0, 2) \
+ vcipher(x1, x1, 2) \
+ vcipher(x2, x2, 2) \
+ vcipher(x3, x3, 2) \
+ vcipher(x0, x0, 3) \
+ vcipher(x1, x1, 3) \
+ vcipher(x2, x2, 3) \
+ vcipher(x3, x3, 3) \
+ vcipher(x0, x0, 4) \
+ vcipher(x1, x1, 4) \
+ vcipher(x2, x2, 4) \
+ vcipher(x3, x3, 4) \
+ vcipher(x0, x0, 5) \
+ vcipher(x1, x1, 5) \
+ vcipher(x2, x2, 5) \
+ vcipher(x3, x3, 5) \
+ vcipher(x0, x0, 6) \
+ vcipher(x1, x1, 6) \
+ vcipher(x2, x2, 6) \
+ vcipher(x3, x3, 6) \
+ vcipher(x0, x0, 7) \
+ vcipher(x1, x1, 7) \
+ vcipher(x2, x2, 7) \
+ vcipher(x3, x3, 7) \
+ vcipher(x0, x0, 8) \
+ vcipher(x1, x1, 8) \
+ vcipher(x2, x2, 8) \
+ vcipher(x3, x3, 8) \
+ vcipher(x0, x0, 9) \
+ vcipher(x1, x1, 9) \
+ vcipher(x2, x2, 9) \
+ vcipher(x3, x3, 9) \
+ vcipher(x0, x0, 10) \
+ vcipher(x1, x1, 10) \
+ vcipher(x2, x2, 10) \
+ vcipher(x3, x3, 10) \
+ vcipher(x0, x0, 11) \
+ vcipher(x1, x1, 11) \
+ vcipher(x2, x2, 11) \
+ vcipher(x3, x3, 11) \
+ vcipher(x0, x0, 12) \
+ vcipher(x1, x1, 12) \
+ vcipher(x2, x2, 12) \
+ vcipher(x3, x3, 12) \
+ vcipher(x0, x0, 13) \
+ vcipher(x1, x1, 13) \
+ vcipher(x2, x2, 13) \
+ vcipher(x3, x3, 13) \
+ vcipherlast(x0, x0, 14) \
+ vcipherlast(x1, x1, 14) \
+ vcipherlast(x2, x2, 14) \
+ vcipherlast(x3, x3, 14)
+
+#if BR_POWER8_LE
+static const uint32_t idx2be[] = {
+ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+};
+#define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be])
+#define BYTESWAP(x) vperm(x, x, x, 15)
+#define BYTESWAPX(d, s) vperm(d, s, s, 15)
+#define BYTESWAP_REG , [idx2be] "b" (idx2be)
+#else
+#define BYTESWAP_INIT
+#define BYTESWAP(x)
+#define BYTESWAPX(d, s) vand(d, s, s)
+#define BYTESWAP_REG
+#endif
+
+static const uint32_t ctrinc[] = {
+ 0, 0, 0, 1
+};
+static const uint32_t ctrinc_x4[] = {
+ 0, 0, 0, 4
+};
+#define INCR_128_INIT lxvw4x(60, 0, %[ctrinc])
+#define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4])
+#define INCR_128(d, s) \
+ vaddcuw(29, s, 28) \
+ vadduwm(d, s, 28) \
+ vsldoi(30, 29, 29, 4) \
+ vaddcuw(29, d, 30) \
+ vadduwm(d, d, 30) \
+ vsldoi(30, 29, 29, 4) \
+ vaddcuw(29, d, 30) \
+ vadduwm(d, d, 30) \
+ vsldoi(30, 29, 29, 4) \
+ vadduwm(d, d, 30)
+
+#define MKCTR(size) \
+static void \
+ctr_ ## size(const unsigned char *sk, \
+ unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
+{ \
+ long cc, cc0, cc1, cc2, cc3; \
+ \
+ cc = 0; \
+ cc0 = 0; \
+ cc1 = 16; \
+ cc2 = 32; \
+ cc3 = 48; \
+ asm volatile ( \
+ \
+ /* \
+ * Load subkeys into v0..v10 \
+ */ \
+ LOAD_SUBKEYS_ ## size \
+ li(%[cc], 0) \
+ \
+ BYTESWAP_INIT \
+ INCR_128_X4_INIT \
+ \
+ /* \
+ * Load current CTR counters into v16 to v19. \
+ */ \
+ lxvw4x(48, %[cc0], %[ctrbuf]) \
+ lxvw4x(49, %[cc1], %[ctrbuf]) \
+ lxvw4x(50, %[cc2], %[ctrbuf]) \
+ lxvw4x(51, %[cc3], %[ctrbuf]) \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ BYTESWAP(18) \
+ BYTESWAP(19) \
+ \
+ mtctr(%[num_blocks_x4]) \
+ \
+ label(loop) \
+ /* \
+ * Compute next counter values into v20..v23. \
+ */ \
+ INCR_128(20, 16) \
+ INCR_128(21, 17) \
+ INCR_128(22, 18) \
+ INCR_128(23, 19) \
+ \
+ /* \
+ * Encrypt counter values and XOR into next data blocks. \
+ */ \
+ lxvw4x(56, %[cc0], %[buf]) \
+ lxvw4x(57, %[cc1], %[buf]) \
+ lxvw4x(58, %[cc2], %[buf]) \
+ lxvw4x(59, %[cc3], %[buf]) \
+ BYTESWAP(24) \
+ BYTESWAP(25) \
+ BYTESWAP(26) \
+ BYTESWAP(27) \
+ BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
+ vxor(16, 16, 24) \
+ vxor(17, 17, 25) \
+ vxor(18, 18, 26) \
+ vxor(19, 19, 27) \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ BYTESWAP(18) \
+ BYTESWAP(19) \
+ stxvw4x(48, %[cc0], %[buf]) \
+ stxvw4x(49, %[cc1], %[buf]) \
+ stxvw4x(50, %[cc2], %[buf]) \
+ stxvw4x(51, %[cc3], %[buf]) \
+ \
+ /* \
+ * Update counters and data pointer. \
+ */ \
+ vand(16, 20, 20) \
+ vand(17, 21, 21) \
+ vand(18, 22, 22) \
+ vand(19, 23, 23) \
+ addi(%[buf], %[buf], 64) \
+ \
+ bdnz(loop) \
+ \
+ /* \
+ * Write back new counter values. \
+ */ \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ BYTESWAP(18) \
+ BYTESWAP(19) \
+ stxvw4x(48, %[cc0], %[ctrbuf]) \
+ stxvw4x(49, %[cc1], %[ctrbuf]) \
+ stxvw4x(50, %[cc2], %[ctrbuf]) \
+ stxvw4x(51, %[cc3], %[ctrbuf]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf), \
+ [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
+: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
+ [num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
+ BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+ "v30", "ctr", "memory" \
+ ); \
+}
+
+MKCTR(128)
+MKCTR(192)
+MKCTR(256)
+
+#define MKCBCMAC(size) \
+static void \
+cbcmac_ ## size(const unsigned char *sk, \
+ unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
+{ \
+ long cc; \
+ \
+ cc = 0; \
+ asm volatile ( \
+ \
+ /* \
+ * Load subkeys into v0..v10 \
+ */ \
+ LOAD_SUBKEYS_ ## size \
+ li(%[cc], 0) \
+ \
+ BYTESWAP_INIT \
+ \
+ /* \
+ * Load current CBC-MAC value into v16. \
+ */ \
+ lxvw4x(48, %[cc], %[cbcmac]) \
+ BYTESWAP(16) \
+ \
+ mtctr(%[num_blocks]) \
+ \
+ label(loop) \
+ /* \
+ * Load next block, XOR into current CBC-MAC value, \
+ * and then encrypt it. \
+ */ \
+ lxvw4x(49, %[cc], %[buf]) \
+ BYTESWAP(17) \
+ vxor(16, 16, 17) \
+ BLOCK_ENCRYPT_ ## size(16) \
+ addi(%[buf], %[buf], 16) \
+ \
+ bdnz(loop) \
+ \
+ /* \
+ * Write back new CBC-MAC value. \
+ */ \
+ BYTESWAP(16) \
+ stxvw4x(48, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
+ BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+ "v30", "ctr", "memory" \
+ ); \
+}
+
+MKCBCMAC(128)
+MKCBCMAC(192)
+MKCBCMAC(256)
+
+#define MKENCRYPT(size) \
+static void \
+ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
+ unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+ size_t num_blocks) \
+{ \
+ long cc; \
+ \
+ cc = 0; \
+ asm volatile ( \
+ \
+ /* \
+ * Load subkeys into v0..v10 \
+ */ \
+ LOAD_SUBKEYS_ ## size \
+ li(%[cc], 0) \
+ \
+ BYTESWAP_INIT \
+ INCR_128_INIT \
+ \
+ /* \
+ * Load current CTR counter into v16, and current \
+ * CBC-MAC IV into v17. \
+ */ \
+ lxvw4x(48, %[cc], %[ctr]) \
+ lxvw4x(49, %[cc], %[cbcmac]) \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ \
+ /* \
+ * At each iteration, we do two parallel encryption: \
+ * - new counter value for encryption of the next block; \
+ * - CBC-MAC over the previous encrypted block. \
+ * Thus, each plaintext block implies two AES instances, \
+ * over two successive iterations. This requires a single \
+ * counter encryption before the loop, and a single \
+ * CBC-MAC encryption after the loop. \
+ */ \
+ \
+ /* \
+ * Encrypt first block (into v20). \
+ */ \
+ lxvw4x(52, %[cc], %[buf]) \
+ BYTESWAP(20) \
+ INCR_128(22, 16) \
+ BLOCK_ENCRYPT_ ## size(16) \
+ vxor(20, 20, 16) \
+ BYTESWAPX(21, 20) \
+ stxvw4x(53, %[cc], %[buf]) \
+ vand(16, 22, 22) \
+ addi(%[buf], %[buf], 16) \
+ \
+ /* \
+ * Load loop counter; skip the loop if there is only \
+ * one block in total (already handled by the boundary \
+ * conditions). \
+ */ \
+ mtctr(%[num_blocks]) \
+ bdz(fastexit) \
+ \
+ label(loop) \
+ /* \
+ * Upon loop entry: \
+ * v16 counter value for next block \
+ * v17 current CBC-MAC value \
+ * v20 encrypted previous block \
+ */ \
+ vxor(17, 17, 20) \
+ INCR_128(22, 16) \
+ lxvw4x(52, %[cc], %[buf]) \
+ BYTESWAP(20) \
+ BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+ vxor(20, 20, 16) \
+ BYTESWAPX(21, 20) \
+ stxvw4x(53, %[cc], %[buf]) \
+ addi(%[buf], %[buf], 16) \
+ vand(16, 22, 22) \
+ \
+ bdnz(loop) \
+ \
+ label(fastexit) \
+ vxor(17, 17, 20) \
+ BLOCK_ENCRYPT_ ## size(17) \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ stxvw4x(48, %[cc], %[ctr]) \
+ stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+ [num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+ BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+ "v30", "ctr", "memory" \
+ ); \
+}
+
+MKENCRYPT(128)
+MKENCRYPT(192)
+MKENCRYPT(256)
+
+#define MKDECRYPT(size) \
+static void \
+ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
+ unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+ size_t num_blocks) \
+{ \
+ long cc; \
+ \
+ cc = 0; \
+ asm volatile ( \
+ \
+ /* \
+ * Load subkeys into v0..v10 \
+ */ \
+ LOAD_SUBKEYS_ ## size \
+ li(%[cc], 0) \
+ \
+ BYTESWAP_INIT \
+ INCR_128_INIT \
+ \
+ /* \
+ * Load current CTR counter into v16, and current \
+ * CBC-MAC IV into v17. \
+ */ \
+ lxvw4x(48, %[cc], %[ctr]) \
+ lxvw4x(49, %[cc], %[cbcmac]) \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ \
+ /* \
+ * At each iteration, we do two parallel encryption: \
+ * - new counter value for decryption of the next block; \
+ * - CBC-MAC over the next encrypted block. \
+ * Each iteration performs the two AES instances related \
+ * to the current block; there is thus no need for some \
+ * extra pre-loop and post-loop work as in encryption. \
+ */ \
+ \
+ mtctr(%[num_blocks]) \
+ \
+ label(loop) \
+ /* \
+ * Upon loop entry: \
+ * v16 counter value for next block \
+ * v17 current CBC-MAC value \
+ */ \
+ lxvw4x(52, %[cc], %[buf]) \
+ BYTESWAP(20) \
+ vxor(17, 17, 20) \
+ INCR_128(22, 16) \
+ BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+ vxor(20, 20, 16) \
+ BYTESWAPX(21, 20) \
+ stxvw4x(53, %[cc], %[buf]) \
+ addi(%[buf], %[buf], 16) \
+ vand(16, 22, 22) \
+ \
+ bdnz(loop) \
+ \
+ /* \
+ * Store back counter and CBC-MAC value. \
+ */ \
+ BYTESWAP(16) \
+ BYTESWAP(17) \
+ stxvw4x(48, %[cc], %[ctr]) \
+ stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+ [num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+ BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+ "v30", "ctr", "memory" \
+ ); \
+}
+
+MKDECRYPT(128)
+MKDECRYPT(192)
+MKDECRYPT(256)
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ if (len == 0) {
+ return;
+ }
+ switch (ctx->num_rounds) {
+ case 10:
+ ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+ break;
+ case 12:
+ ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+ break;
+ default:
+ ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+ break;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ if (len == 0) {
+ return;
+ }
+ switch (ctx->num_rounds) {
+ case 10:
+ ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+ break;
+ case 12:
+ ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+ break;
+ default:
+ ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+ break;
+ }
+}
+
+static inline void
+incr_ctr(void *dst, const void *src)
+{
+ uint64_t hi, lo;
+
+ hi = br_dec64be(src);
+ lo = br_dec64be((const unsigned char *)src + 8);
+ lo ++;
+ hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
+ br_enc64be(dst, hi);
+ br_enc64be((unsigned char *)dst + 8, lo);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
+ void *ctr, void *data, size_t len)
+{
+ unsigned char ctrbuf[64];
+
+ memcpy(ctrbuf, ctr, 16);
+ incr_ctr(ctrbuf + 16, ctrbuf);
+ incr_ctr(ctrbuf + 32, ctrbuf + 16);
+ incr_ctr(ctrbuf + 48, ctrbuf + 32);
+ if (len >= 64) {
+ switch (ctx->num_rounds) {
+ case 10:
+ ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
+ break;
+ case 12:
+ ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
+ break;
+ default:
+ ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
+ break;
+ }
+ data = (unsigned char *)data + (len & ~(size_t)63);
+ len &= 63;
+ }
+ if (len > 0) {
+ unsigned char tmp[64];
+
+ if (len >= 32) {
+ if (len >= 48) {
+ memcpy(ctr, ctrbuf + 48, 16);
+ } else {
+ memcpy(ctr, ctrbuf + 32, 16);
+ }
+ } else {
+ if (len >= 16) {
+ memcpy(ctr, ctrbuf + 16, 16);
+ }
+ }
+ memcpy(tmp, data, len);
+ memset(tmp + len, 0, (sizeof tmp) - len);
+ switch (ctx->num_rounds) {
+ case 10:
+ ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
+ break;
+ case 12:
+ ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
+ break;
+ default:
+ ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
+ break;
+ }
+ memcpy(data, tmp, len);
+ } else {
+ memcpy(ctr, ctrbuf, 16);
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
+ void *cbcmac, const void *data, size_t len)
+{
+ if (len > 0) {
+ switch (ctx->num_rounds) {
+ case 10:
+ cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
+ break;
+ case 12:
+ cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
+ break;
+ default:
+ cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
+ break;
+ }
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
+ sizeof(br_aes_pwr8_ctrcbc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+ &br_aes_pwr8_ctrcbc_init,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_pwr8_ctrcbc_encrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_pwr8_ctrcbc_decrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, size_t))
+ &br_aes_pwr8_ctrcbc_ctr,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, const void *, size_t))
+ &br_aes_pwr8_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_small_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/aes_small_cbcdec.c
new file mode 100644
index 00000000..8567244b
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_small_cbcdec.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_small_cbcdec_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[16];
+ int i;
+
+ memcpy(tmp, buf, 16);
+ br_aes_small_decrypt(ctx->num_rounds, ctx->skey, buf);
+ for (i = 0; i < 16; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ memcpy(ivbuf, tmp, 16);
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_small_cbcdec_vtable = {
+ sizeof(br_aes_small_cbcdec_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_aes_small_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_aes_small_cbcdec_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_small_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/aes_small_cbcenc.c
new file mode 100644
index 00000000..0dc2910a
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_small_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_small_cbcenc_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ int i;
+
+ for (i = 0; i < 16; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ br_aes_small_encrypt(ctx->num_rounds, ctx->skey, buf);
+ memcpy(ivbuf, buf, 16);
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_small_cbcenc_vtable = {
+ sizeof(br_aes_small_cbcenc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_aes_small_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_aes_small_cbcenc_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_small_ctr.c b/test/monniaux/BearSSL/src/symcipher/aes_small_ctr.c
new file mode 100644
index 00000000..d5d371c6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_small_ctr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_small_ctr_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[16];
+
+ memcpy(tmp, iv, 12);
+ br_enc32be(tmp + 12, cc ++);
+ br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+ if (len <= 16) {
+ xorbuf(buf, tmp, len);
+ break;
+ }
+ xorbuf(buf, tmp, 16);
+ buf += 16;
+ len -= 16;
+ }
+ return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_small_ctr_vtable = {
+ sizeof(br_aes_small_ctr_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctr_class **, const void *, size_t))
+ &br_aes_small_ctr_init,
+ (uint32_t (*)(const br_block_ctr_class *const *,
+ const void *, uint32_t, void *, size_t))
+ &br_aes_small_ctr_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_small_ctrcbc.c b/test/monniaux/BearSSL/src/symcipher/aes_small_ctrcbc.c
new file mode 100644
index 00000000..2d6ba329
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_small_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_small_ctrcbc_vtable;
+ ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+ unsigned char *d;
+ const unsigned char *s;
+
+ d = dst;
+ s = src;
+ while (len -- > 0) {
+ *d ++ ^= *s ++;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+ void *ctr, void *data, size_t len)
+{
+ unsigned char *buf, *bctr;
+ uint32_t cc0, cc1, cc2, cc3;
+
+ buf = data;
+ bctr = ctr;
+ cc3 = br_dec32be(bctr + 0);
+ cc2 = br_dec32be(bctr + 4);
+ cc1 = br_dec32be(bctr + 8);
+ cc0 = br_dec32be(bctr + 12);
+ while (len > 0) {
+ unsigned char tmp[16];
+ uint32_t carry;
+
+ br_enc32be(tmp + 0, cc3);
+ br_enc32be(tmp + 4, cc2);
+ br_enc32be(tmp + 8, cc1);
+ br_enc32be(tmp + 12, cc0);
+ br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+ xorbuf(buf, tmp, 16);
+ buf += 16;
+ len -= 16;
+ cc0 ++;
+ carry = (~(cc0 | -cc0)) >> 31;
+ cc1 += carry;
+ carry &= (~(cc1 | -cc1)) >> 31;
+ cc2 += carry;
+ carry &= (~(cc2 | -cc2)) >> 31;
+ cc3 += carry;
+ }
+ br_enc32be(bctr + 0, cc3);
+ br_enc32be(bctr + 4, cc2);
+ br_enc32be(bctr + 8, cc1);
+ br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+ void *cbcmac, const void *data, size_t len)
+{
+ const unsigned char *buf;
+
+ buf = data;
+ while (len > 0) {
+ xorbuf(cbcmac, buf, 16);
+ br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+ br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+ br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = {
+ sizeof(br_aes_small_ctrcbc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+ &br_aes_small_ctrcbc_init,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_small_ctrcbc_encrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_small_ctrcbc_decrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, size_t))
+ &br_aes_small_ctrcbc_ctr,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, const void *, size_t))
+ &br_aes_small_ctrcbc_mac
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_small_dec.c b/test/monniaux/BearSSL/src/symcipher/aes_small_dec.c
new file mode 100644
index 00000000..59dca8ec
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_small_dec.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box.
+ */
+static const unsigned char iS[] = {
+ 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+ 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+ 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+ 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+ 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+ 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+ 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+ 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+ 0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+ 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+ 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+ 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+ 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+ 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+ 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+ 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+ 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+ 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+ 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+ 0x55, 0x21, 0x0C, 0x7D
+};
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ uint32_t k;
+
+ k = *skeys ++;
+ state[i + 0] ^= (unsigned)(k >> 24);
+ state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+ state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+ state[i + 3] ^= (unsigned)k & 0xFF;
+ }
+}
+
+static void
+inv_sub_bytes(unsigned *state)
+{
+ int i;
+
+ for (i = 0; i < 16; i ++) {
+ state[i] = iS[state[i]];
+ }
+}
+
+static void
+inv_shift_rows(unsigned *state)
+{
+ unsigned tmp;
+
+ tmp = state[13];
+ state[13] = state[9];
+ state[9] = state[5];
+ state[5] = state[1];
+ state[1] = tmp;
+
+ tmp = state[2];
+ state[2] = state[10];
+ state[10] = tmp;
+ tmp = state[6];
+ state[6] = state[14];
+ state[14] = tmp;
+
+ tmp = state[3];
+ state[3] = state[7];
+ state[7] = state[11];
+ state[11] = state[15];
+ state[15] = tmp;
+}
+
+static inline unsigned
+gf256red(unsigned x)
+{
+ unsigned y;
+
+ y = x >> 8;
+ return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 0xFF;
+}
+
+static void
+inv_mix_columns(unsigned *state)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ unsigned s0, s1, s2, s3;
+ unsigned t0, t1, t2, t3;
+
+ s0 = state[i + 0];
+ s1 = state[i + 1];
+ s2 = state[i + 2];
+ s3 = state[i + 3];
+ t0 = (s0 << 1) ^ (s0 << 2) ^ (s0 << 3)
+ ^ s1 ^ (s1 << 1) ^ (s1 << 3)
+ ^ s2 ^ (s2 << 2) ^ (s2 << 3)
+ ^ s3 ^ (s3 << 3);
+ t1 = s0 ^ (s0 << 3)
+ ^ (s1 << 1) ^ (s1 << 2) ^ (s1 << 3)
+ ^ s2 ^ (s2 << 1) ^ (s2 << 3)
+ ^ s3 ^ (s3 << 2) ^ (s3 << 3);
+ t2 = s0 ^ (s0 << 2) ^ (s0 << 3)
+ ^ s1 ^ (s1 << 3)
+ ^ (s2 << 1) ^ (s2 << 2) ^ (s2 << 3)
+ ^ s3 ^ (s3 << 1) ^ (s3 << 3);
+ t3 = s0 ^ (s0 << 1) ^ (s0 << 3)
+ ^ s1 ^ (s1 << 2) ^ (s1 << 3)
+ ^ s2 ^ (s2 << 3)
+ ^ (s3 << 1) ^ (s3 << 2) ^ (s3 << 3);
+ state[i + 0] = gf256red(t0);
+ state[i + 1] = gf256red(t1);
+ state[i + 2] = gf256red(t2);
+ state[i + 3] = gf256red(t3);
+ }
+}
+
+/* see inner.h */
+void
+br_aes_small_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+ unsigned char *buf;
+ unsigned state[16];
+ unsigned u;
+
+ buf = data;
+ for (u = 0; u < 16; u ++) {
+ state[u] = buf[u];
+ }
+ add_round_key(state, skey + (num_rounds << 2));
+ for (u = num_rounds - 1; u > 0; u --) {
+ inv_shift_rows(state);
+ inv_sub_bytes(state);
+ add_round_key(state, skey + (u << 2));
+ inv_mix_columns(state);
+ }
+ inv_shift_rows(state);
+ inv_sub_bytes(state);
+ add_round_key(state, skey);
+ for (u = 0; u < 16; u ++) {
+ buf[u] = state[u];
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_small_enc.c b/test/monniaux/BearSSL/src/symcipher/aes_small_enc.c
new file mode 100644
index 00000000..29f48a8f
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_small_enc.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S br_aes_S
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ uint32_t k;
+
+ k = *skeys ++;
+ state[i + 0] ^= (unsigned)(k >> 24);
+ state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+ state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+ state[i + 3] ^= (unsigned)k & 0xFF;
+ }
+}
+
+static void
+sub_bytes(unsigned *state)
+{
+ int i;
+
+ for (i = 0; i < 16; i ++) {
+ state[i] = S[state[i]];
+ }
+}
+
+static void
+shift_rows(unsigned *state)
+{
+ unsigned tmp;
+
+ tmp = state[1];
+ state[1] = state[5];
+ state[5] = state[9];
+ state[9] = state[13];
+ state[13] = tmp;
+
+ tmp = state[2];
+ state[2] = state[10];
+ state[10] = tmp;
+ tmp = state[6];
+ state[6] = state[14];
+ state[14] = tmp;
+
+ tmp = state[15];
+ state[15] = state[11];
+ state[11] = state[7];
+ state[7] = state[3];
+ state[3] = tmp;
+}
+
+static void
+mix_columns(unsigned *state)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ unsigned s0, s1, s2, s3;
+ unsigned t0, t1, t2, t3;
+
+ s0 = state[i + 0];
+ s1 = state[i + 1];
+ s2 = state[i + 2];
+ s3 = state[i + 3];
+ t0 = (s0 << 1) ^ s1 ^ (s1 << 1) ^ s2 ^ s3;
+ t1 = s0 ^ (s1 << 1) ^ s2 ^ (s2 << 1) ^ s3;
+ t2 = s0 ^ s1 ^ (s2 << 1) ^ s3 ^ (s3 << 1);
+ t3 = s0 ^ (s0 << 1) ^ s1 ^ s2 ^ (s3 << 1);
+ state[i + 0] = t0 ^ ((unsigned)(-(int)(t0 >> 8)) & 0x11B);
+ state[i + 1] = t1 ^ ((unsigned)(-(int)(t1 >> 8)) & 0x11B);
+ state[i + 2] = t2 ^ ((unsigned)(-(int)(t2 >> 8)) & 0x11B);
+ state[i + 3] = t3 ^ ((unsigned)(-(int)(t3 >> 8)) & 0x11B);
+ }
+}
+
+/* see inner.h */
+void
+br_aes_small_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+ unsigned char *buf;
+ unsigned state[16];
+ unsigned u;
+
+ buf = data;
+ for (u = 0; u < 16; u ++) {
+ state[u] = buf[u];
+ }
+ add_round_key(state, skey);
+ for (u = 1; u < num_rounds; u ++) {
+ sub_bytes(state);
+ shift_rows(state);
+ mix_columns(state);
+ add_round_key(state, skey + (u << 2));
+ }
+ sub_bytes(state);
+ shift_rows(state);
+ add_round_key(state, skey + (num_rounds << 2));
+ for (u = 0; u < 16; u ++) {
+ buf[u] = state[u];
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_x86ni.c b/test/monniaux/BearSSL/src/symcipher/aes_x86ni.c
new file mode 100644
index 00000000..d5408f13
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_x86ni.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS 1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * AES-NI opcodes.
+ */
+
+#if BR_AES_X86NI
+
+/* see inner.h */
+int
+br_aes_x86ni_supported(void)
+{
+ /*
+ * Bit mask for features in ECX:
+ * 19 SSE4.1 (used for _mm_insert_epi32(), for AES-CTR)
+ * 25 AES-NI
+ */
+ return br_cpuid(0, 0, 0x02080000, 0);
+}
+
+BR_TARGETS_X86_UP
+
+BR_TARGET("sse2,aes")
+static inline __m128i
+expand_step128(__m128i k, __m128i k2)
+{
+ k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+ k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+ k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+ k2 = _mm_shuffle_epi32(k2, 0xFF);
+ return _mm_xor_si128(k, k2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step192(__m128i *t1, __m128i *t2, __m128i *t3)
+{
+ __m128i t4;
+
+ *t2 = _mm_shuffle_epi32(*t2, 0x55);
+ t4 = _mm_slli_si128(*t1, 0x4);
+ *t1 = _mm_xor_si128(*t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ *t1 = _mm_xor_si128(*t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ *t1 = _mm_xor_si128(*t1, t4);
+ *t1 = _mm_xor_si128(*t1, *t2);
+ *t2 = _mm_shuffle_epi32(*t1, 0xFF);
+ t4 = _mm_slli_si128(*t3, 0x4);
+ *t3 = _mm_xor_si128(*t3, t4);
+ *t3 = _mm_xor_si128(*t3, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_1(__m128i *t1, __m128i *t2)
+{
+ __m128i t4;
+
+ *t2 = _mm_shuffle_epi32(*t2, 0xFF);
+ t4 = _mm_slli_si128(*t1, 0x4);
+ *t1 = _mm_xor_si128(*t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ *t1 = _mm_xor_si128(*t1, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ *t1 = _mm_xor_si128(*t1, t4);
+ *t1 = _mm_xor_si128(*t1, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_2(__m128i *t1, __m128i *t3)
+{
+ __m128i t2, t4;
+
+ t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
+ t2 = _mm_shuffle_epi32(t4, 0xAA);
+ t4 = _mm_slli_si128(*t3, 0x4);
+ *t3 = _mm_xor_si128(*t3, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ *t3 = _mm_xor_si128(*t3, t4);
+ t4 = _mm_slli_si128(t4, 0x4);
+ *t3 = _mm_xor_si128(*t3, t4);
+ *t3 = _mm_xor_si128(*t3, t2);
+}
+
+/*
+ * Perform key schedule for AES, encryption direction. Subkeys are written
+ * in sk[], and the number of rounds is returned. Key length MUST be 16,
+ * 24 or 32 bytes.
+ */
+BR_TARGET("sse2,aes")
+static unsigned
+x86ni_keysched(__m128i *sk, const void *key, size_t len)
+{
+ const unsigned char *kb;
+
+#define KEXP128(k, i, rcon) do { \
+ k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \
+ sk[i] = k; \
+ } while (0)
+
+#define KEXP192(i, rcon1, rcon2) do { \
+ sk[(i) + 0] = t1; \
+ sk[(i) + 1] = t3; \
+ t2 = _mm_aeskeygenassist_si128(t3, rcon1); \
+ expand_step192(&t1, &t2, &t3); \
+ sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \
+ _mm_castsi128_pd(sk[(i) + 1]), \
+ _mm_castsi128_pd(t1), 0)); \
+ sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \
+ _mm_castsi128_pd(t1), \
+ _mm_castsi128_pd(t3), 1)); \
+ t2 = _mm_aeskeygenassist_si128(t3, rcon2); \
+ expand_step192(&t1, &t2, &t3); \
+ } while (0)
+
+#define KEXP256(i, rcon) do { \
+ sk[(i) + 0] = t3; \
+ t2 = _mm_aeskeygenassist_si128(t3, rcon); \
+ expand_step256_1(&t1, &t2); \
+ sk[(i) + 1] = t1; \
+ expand_step256_2(&t1, &t3); \
+ } while (0)
+
+ kb = key;
+ switch (len) {
+ __m128i t1, t2, t3;
+
+ case 16:
+ t1 = _mm_loadu_si128((const void *)kb);
+ sk[0] = t1;
+ KEXP128(t1, 1, 0x01);
+ KEXP128(t1, 2, 0x02);
+ KEXP128(t1, 3, 0x04);
+ KEXP128(t1, 4, 0x08);
+ KEXP128(t1, 5, 0x10);
+ KEXP128(t1, 6, 0x20);
+ KEXP128(t1, 7, 0x40);
+ KEXP128(t1, 8, 0x80);
+ KEXP128(t1, 9, 0x1B);
+ KEXP128(t1, 10, 0x36);
+ return 10;
+
+ case 24:
+ t1 = _mm_loadu_si128((const void *)kb);
+ t3 = _mm_loadu_si128((const void *)(kb + 8));
+ t3 = _mm_shuffle_epi32(t3, 0x4E);
+ KEXP192(0, 0x01, 0x02);
+ KEXP192(3, 0x04, 0x08);
+ KEXP192(6, 0x10, 0x20);
+ KEXP192(9, 0x40, 0x80);
+ sk[12] = t1;
+ return 12;
+
+ case 32:
+ t1 = _mm_loadu_si128((const void *)kb);
+ t3 = _mm_loadu_si128((const void *)(kb + 16));
+ sk[0] = t1;
+ KEXP256( 1, 0x01);
+ KEXP256( 3, 0x02);
+ KEXP256( 5, 0x04);
+ KEXP256( 7, 0x08);
+ KEXP256( 9, 0x10);
+ KEXP256(11, 0x20);
+ sk[13] = t3;
+ t2 = _mm_aeskeygenassist_si128(t3, 0x40);
+ expand_step256_1(&t1, &t2);
+ sk[14] = t1;
+ return 14;
+
+ default:
+ return 0;
+ }
+
+#undef KEXP128
+#undef KEXP192
+#undef KEXP256
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len)
+{
+ __m128i sk[15];
+ unsigned num_rounds;
+
+ num_rounds = x86ni_keysched(sk, key, len);
+ memcpy(skni, sk, (num_rounds + 1) << 4);
+ return num_rounds;
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len)
+{
+ __m128i sk[15];
+ unsigned u, num_rounds;
+
+ num_rounds = x86ni_keysched(sk, key, len);
+ _mm_storeu_si128((void *)skni, sk[num_rounds]);
+ for (u = 1; u < num_rounds; u ++) {
+ _mm_storeu_si128((void *)(skni + (u << 4)),
+ _mm_aesimc_si128(sk[num_rounds - u]));
+ }
+ _mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]);
+ return num_rounds;
+}
+
+BR_TARGETS_X86_DOWN
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcdec.c
new file mode 100644
index 00000000..862b1b5b
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcdec.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS 1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+ return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_x86ni_cbcdec_vtable;
+ ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned num_rounds;
+ __m128i sk[15], ivx;
+ unsigned u;
+
+ buf = data;
+ ivx = _mm_loadu_si128(iv);
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+ while (len > 0) {
+ __m128i x0, x1, x2, x3, e0, e1, e2, e3;
+
+ x0 = _mm_loadu_si128((void *)(buf + 0));
+ if (len >= 64) {
+ x1 = _mm_loadu_si128((void *)(buf + 16));
+ x2 = _mm_loadu_si128((void *)(buf + 32));
+ x3 = _mm_loadu_si128((void *)(buf + 48));
+ } else {
+ x0 = _mm_loadu_si128((void *)(buf + 0));
+ if (len >= 32) {
+ x1 = _mm_loadu_si128((void *)(buf + 16));
+ if (len >= 48) {
+ x2 = _mm_loadu_si128(
+ (void *)(buf + 32));
+ x3 = x2;
+ } else {
+ x2 = x0;
+ x3 = x1;
+ }
+ } else {
+ x1 = x0;
+ x2 = x0;
+ x3 = x0;
+ }
+ }
+ e0 = x0;
+ e1 = x1;
+ e2 = x2;
+ e3 = x3;
+ x0 = _mm_xor_si128(x0, sk[0]);
+ x1 = _mm_xor_si128(x1, sk[0]);
+ x2 = _mm_xor_si128(x2, sk[0]);
+ x3 = _mm_xor_si128(x3, sk[0]);
+ x0 = _mm_aesdec_si128(x0, sk[1]);
+ x1 = _mm_aesdec_si128(x1, sk[1]);
+ x2 = _mm_aesdec_si128(x2, sk[1]);
+ x3 = _mm_aesdec_si128(x3, sk[1]);
+ x0 = _mm_aesdec_si128(x0, sk[2]);
+ x1 = _mm_aesdec_si128(x1, sk[2]);
+ x2 = _mm_aesdec_si128(x2, sk[2]);
+ x3 = _mm_aesdec_si128(x3, sk[2]);
+ x0 = _mm_aesdec_si128(x0, sk[3]);
+ x1 = _mm_aesdec_si128(x1, sk[3]);
+ x2 = _mm_aesdec_si128(x2, sk[3]);
+ x3 = _mm_aesdec_si128(x3, sk[3]);
+ x0 = _mm_aesdec_si128(x0, sk[4]);
+ x1 = _mm_aesdec_si128(x1, sk[4]);
+ x2 = _mm_aesdec_si128(x2, sk[4]);
+ x3 = _mm_aesdec_si128(x3, sk[4]);
+ x0 = _mm_aesdec_si128(x0, sk[5]);
+ x1 = _mm_aesdec_si128(x1, sk[5]);
+ x2 = _mm_aesdec_si128(x2, sk[5]);
+ x3 = _mm_aesdec_si128(x3, sk[5]);
+ x0 = _mm_aesdec_si128(x0, sk[6]);
+ x1 = _mm_aesdec_si128(x1, sk[6]);
+ x2 = _mm_aesdec_si128(x2, sk[6]);
+ x3 = _mm_aesdec_si128(x3, sk[6]);
+ x0 = _mm_aesdec_si128(x0, sk[7]);
+ x1 = _mm_aesdec_si128(x1, sk[7]);
+ x2 = _mm_aesdec_si128(x2, sk[7]);
+ x3 = _mm_aesdec_si128(x3, sk[7]);
+ x0 = _mm_aesdec_si128(x0, sk[8]);
+ x1 = _mm_aesdec_si128(x1, sk[8]);
+ x2 = _mm_aesdec_si128(x2, sk[8]);
+ x3 = _mm_aesdec_si128(x3, sk[8]);
+ x0 = _mm_aesdec_si128(x0, sk[9]);
+ x1 = _mm_aesdec_si128(x1, sk[9]);
+ x2 = _mm_aesdec_si128(x2, sk[9]);
+ x3 = _mm_aesdec_si128(x3, sk[9]);
+ if (num_rounds == 10) {
+ x0 = _mm_aesdeclast_si128(x0, sk[10]);
+ x1 = _mm_aesdeclast_si128(x1, sk[10]);
+ x2 = _mm_aesdeclast_si128(x2, sk[10]);
+ x3 = _mm_aesdeclast_si128(x3, sk[10]);
+ } else if (num_rounds == 12) {
+ x0 = _mm_aesdec_si128(x0, sk[10]);
+ x1 = _mm_aesdec_si128(x1, sk[10]);
+ x2 = _mm_aesdec_si128(x2, sk[10]);
+ x3 = _mm_aesdec_si128(x3, sk[10]);
+ x0 = _mm_aesdec_si128(x0, sk[11]);
+ x1 = _mm_aesdec_si128(x1, sk[11]);
+ x2 = _mm_aesdec_si128(x2, sk[11]);
+ x3 = _mm_aesdec_si128(x3, sk[11]);
+ x0 = _mm_aesdeclast_si128(x0, sk[12]);
+ x1 = _mm_aesdeclast_si128(x1, sk[12]);
+ x2 = _mm_aesdeclast_si128(x2, sk[12]);
+ x3 = _mm_aesdeclast_si128(x3, sk[12]);
+ } else {
+ x0 = _mm_aesdec_si128(x0, sk[10]);
+ x1 = _mm_aesdec_si128(x1, sk[10]);
+ x2 = _mm_aesdec_si128(x2, sk[10]);
+ x3 = _mm_aesdec_si128(x3, sk[10]);
+ x0 = _mm_aesdec_si128(x0, sk[11]);
+ x1 = _mm_aesdec_si128(x1, sk[11]);
+ x2 = _mm_aesdec_si128(x2, sk[11]);
+ x3 = _mm_aesdec_si128(x3, sk[11]);
+ x0 = _mm_aesdec_si128(x0, sk[12]);
+ x1 = _mm_aesdec_si128(x1, sk[12]);
+ x2 = _mm_aesdec_si128(x2, sk[12]);
+ x3 = _mm_aesdec_si128(x3, sk[12]);
+ x0 = _mm_aesdec_si128(x0, sk[13]);
+ x1 = _mm_aesdec_si128(x1, sk[13]);
+ x2 = _mm_aesdec_si128(x2, sk[13]);
+ x3 = _mm_aesdec_si128(x3, sk[13]);
+ x0 = _mm_aesdeclast_si128(x0, sk[14]);
+ x1 = _mm_aesdeclast_si128(x1, sk[14]);
+ x2 = _mm_aesdeclast_si128(x2, sk[14]);
+ x3 = _mm_aesdeclast_si128(x3, sk[14]);
+ }
+ x0 = _mm_xor_si128(x0, ivx);
+ x1 = _mm_xor_si128(x1, e0);
+ x2 = _mm_xor_si128(x2, e1);
+ x3 = _mm_xor_si128(x3, e2);
+ ivx = e3;
+ _mm_storeu_si128((void *)(buf + 0), x0);
+ if (len >= 64) {
+ _mm_storeu_si128((void *)(buf + 16), x1);
+ _mm_storeu_si128((void *)(buf + 32), x2);
+ _mm_storeu_si128((void *)(buf + 48), x3);
+ buf += 64;
+ len -= 64;
+ } else {
+ if (len >= 32) {
+ _mm_storeu_si128((void *)(buf + 16), x1);
+ if (len >= 48) {
+ _mm_storeu_si128(
+ (void *)(buf + 32), x2);
+ }
+ }
+ break;
+ }
+ }
+ _mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = {
+ sizeof(br_aes_x86ni_cbcdec_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_aes_x86ni_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_aes_x86ni_cbcdec_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcenc.c
new file mode 100644
index 00000000..85feecdb
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_cbcenc.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS 1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+ return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_x86ni_cbcenc_vtable;
+ ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned num_rounds;
+ __m128i sk[15], ivx;
+ unsigned u;
+
+ buf = data;
+ ivx = _mm_loadu_si128(iv);
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+ while (len > 0) {
+ __m128i x;
+
+ x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+ x = _mm_xor_si128(x, sk[0]);
+ x = _mm_aesenc_si128(x, sk[1]);
+ x = _mm_aesenc_si128(x, sk[2]);
+ x = _mm_aesenc_si128(x, sk[3]);
+ x = _mm_aesenc_si128(x, sk[4]);
+ x = _mm_aesenc_si128(x, sk[5]);
+ x = _mm_aesenc_si128(x, sk[6]);
+ x = _mm_aesenc_si128(x, sk[7]);
+ x = _mm_aesenc_si128(x, sk[8]);
+ x = _mm_aesenc_si128(x, sk[9]);
+ if (num_rounds == 10) {
+ x = _mm_aesenclast_si128(x, sk[10]);
+ } else if (num_rounds == 12) {
+ x = _mm_aesenc_si128(x, sk[10]);
+ x = _mm_aesenc_si128(x, sk[11]);
+ x = _mm_aesenclast_si128(x, sk[12]);
+ } else {
+ x = _mm_aesenc_si128(x, sk[10]);
+ x = _mm_aesenc_si128(x, sk[11]);
+ x = _mm_aesenc_si128(x, sk[12]);
+ x = _mm_aesenc_si128(x, sk[13]);
+ x = _mm_aesenclast_si128(x, sk[14]);
+ }
+ ivx = x;
+ _mm_storeu_si128((void *)buf, x);
+ buf += 16;
+ len -= 16;
+ }
+ _mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = {
+ sizeof(br_aes_x86ni_cbcenc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_aes_x86ni_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_aes_x86ni_cbcenc_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctr.c b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctr.c
new file mode 100644
index 00000000..1cddd606
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctr.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS 1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+ return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_x86ni_ctr_vtable;
+ ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+uint32_t
+br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned char ivbuf[16];
+ unsigned num_rounds;
+ __m128i sk[15];
+ __m128i ivx;
+ unsigned u;
+
+ buf = data;
+ memcpy(ivbuf, iv, 12);
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+ ivx = _mm_loadu_si128((void *)ivbuf);
+ while (len > 0) {
+ __m128i x0, x1, x2, x3;
+
+ x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3);
+ x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3);
+ x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3);
+ x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3);
+ x0 = _mm_xor_si128(x0, sk[0]);
+ x1 = _mm_xor_si128(x1, sk[0]);
+ x2 = _mm_xor_si128(x2, sk[0]);
+ x3 = _mm_xor_si128(x3, sk[0]);
+ x0 = _mm_aesenc_si128(x0, sk[1]);
+ x1 = _mm_aesenc_si128(x1, sk[1]);
+ x2 = _mm_aesenc_si128(x2, sk[1]);
+ x3 = _mm_aesenc_si128(x3, sk[1]);
+ x0 = _mm_aesenc_si128(x0, sk[2]);
+ x1 = _mm_aesenc_si128(x1, sk[2]);
+ x2 = _mm_aesenc_si128(x2, sk[2]);
+ x3 = _mm_aesenc_si128(x3, sk[2]);
+ x0 = _mm_aesenc_si128(x0, sk[3]);
+ x1 = _mm_aesenc_si128(x1, sk[3]);
+ x2 = _mm_aesenc_si128(x2, sk[3]);
+ x3 = _mm_aesenc_si128(x3, sk[3]);
+ x0 = _mm_aesenc_si128(x0, sk[4]);
+ x1 = _mm_aesenc_si128(x1, sk[4]);
+ x2 = _mm_aesenc_si128(x2, sk[4]);
+ x3 = _mm_aesenc_si128(x3, sk[4]);
+ x0 = _mm_aesenc_si128(x0, sk[5]);
+ x1 = _mm_aesenc_si128(x1, sk[5]);
+ x2 = _mm_aesenc_si128(x2, sk[5]);
+ x3 = _mm_aesenc_si128(x3, sk[5]);
+ x0 = _mm_aesenc_si128(x0, sk[6]);
+ x1 = _mm_aesenc_si128(x1, sk[6]);
+ x2 = _mm_aesenc_si128(x2, sk[6]);
+ x3 = _mm_aesenc_si128(x3, sk[6]);
+ x0 = _mm_aesenc_si128(x0, sk[7]);
+ x1 = _mm_aesenc_si128(x1, sk[7]);
+ x2 = _mm_aesenc_si128(x2, sk[7]);
+ x3 = _mm_aesenc_si128(x3, sk[7]);
+ x0 = _mm_aesenc_si128(x0, sk[8]);
+ x1 = _mm_aesenc_si128(x1, sk[8]);
+ x2 = _mm_aesenc_si128(x2, sk[8]);
+ x3 = _mm_aesenc_si128(x3, sk[8]);
+ x0 = _mm_aesenc_si128(x0, sk[9]);
+ x1 = _mm_aesenc_si128(x1, sk[9]);
+ x2 = _mm_aesenc_si128(x2, sk[9]);
+ x3 = _mm_aesenc_si128(x3, sk[9]);
+ if (num_rounds == 10) {
+ x0 = _mm_aesenclast_si128(x0, sk[10]);
+ x1 = _mm_aesenclast_si128(x1, sk[10]);
+ x2 = _mm_aesenclast_si128(x2, sk[10]);
+ x3 = _mm_aesenclast_si128(x3, sk[10]);
+ } else if (num_rounds == 12) {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x2 = _mm_aesenc_si128(x2, sk[10]);
+ x3 = _mm_aesenc_si128(x3, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x2 = _mm_aesenc_si128(x2, sk[11]);
+ x3 = _mm_aesenc_si128(x3, sk[11]);
+ x0 = _mm_aesenclast_si128(x0, sk[12]);
+ x1 = _mm_aesenclast_si128(x1, sk[12]);
+ x2 = _mm_aesenclast_si128(x2, sk[12]);
+ x3 = _mm_aesenclast_si128(x3, sk[12]);
+ } else {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x2 = _mm_aesenc_si128(x2, sk[10]);
+ x3 = _mm_aesenc_si128(x3, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x2 = _mm_aesenc_si128(x2, sk[11]);
+ x3 = _mm_aesenc_si128(x3, sk[11]);
+ x0 = _mm_aesenc_si128(x0, sk[12]);
+ x1 = _mm_aesenc_si128(x1, sk[12]);
+ x2 = _mm_aesenc_si128(x2, sk[12]);
+ x3 = _mm_aesenc_si128(x3, sk[12]);
+ x0 = _mm_aesenc_si128(x0, sk[13]);
+ x1 = _mm_aesenc_si128(x1, sk[13]);
+ x2 = _mm_aesenc_si128(x2, sk[13]);
+ x3 = _mm_aesenc_si128(x3, sk[13]);
+ x0 = _mm_aesenclast_si128(x0, sk[14]);
+ x1 = _mm_aesenclast_si128(x1, sk[14]);
+ x2 = _mm_aesenclast_si128(x2, sk[14]);
+ x3 = _mm_aesenclast_si128(x3, sk[14]);
+ }
+ if (len >= 64) {
+ x0 = _mm_xor_si128(x0,
+ _mm_loadu_si128((void *)(buf + 0)));
+ x1 = _mm_xor_si128(x1,
+ _mm_loadu_si128((void *)(buf + 16)));
+ x2 = _mm_xor_si128(x2,
+ _mm_loadu_si128((void *)(buf + 32)));
+ x3 = _mm_xor_si128(x3,
+ _mm_loadu_si128((void *)(buf + 48)));
+ _mm_storeu_si128((void *)(buf + 0), x0);
+ _mm_storeu_si128((void *)(buf + 16), x1);
+ _mm_storeu_si128((void *)(buf + 32), x2);
+ _mm_storeu_si128((void *)(buf + 48), x3);
+ buf += 64;
+ len -= 64;
+ cc += 4;
+ } else {
+ unsigned char tmp[64];
+
+ _mm_storeu_si128((void *)(tmp + 0), x0);
+ _mm_storeu_si128((void *)(tmp + 16), x1);
+ _mm_storeu_si128((void *)(tmp + 32), x2);
+ _mm_storeu_si128((void *)(tmp + 48), x3);
+ for (u = 0; u < len; u ++) {
+ buf[u] ^= tmp[u];
+ }
+ cc += (uint32_t)len >> 4;
+ break;
+ }
+ }
+ return cc;
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_x86ni_ctr_vtable = {
+ sizeof(br_aes_x86ni_ctr_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctr_class **, const void *, size_t))
+ &br_aes_x86ni_ctr_init,
+ (uint32_t (*)(const br_block_ctr_class *const *,
+ const void *, uint32_t, void *, size_t))
+ &br_aes_x86ni_ctr_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctrcbc.c b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctrcbc.c
new file mode 100644
index 00000000..f57fead6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/aes_x86ni_ctrcbc.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS 1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+ return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_aes_x86ni_ctrcbc_vtable;
+ ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+ void *ctr, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned num_rounds;
+ __m128i sk[15];
+ __m128i ivx0, ivx1, ivx2, ivx3;
+ __m128i erev, zero, one, four, notthree;
+ unsigned u;
+
+ buf = data;
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+
+ /*
+ * Some SSE2 constants.
+ */
+ erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15);
+ zero = _mm_setzero_si128();
+ one = _mm_set_epi64x(0, 1);
+ four = _mm_set_epi64x(0, 4);
+ notthree = _mm_sub_epi64(zero, four);
+
+ /*
+ * Decode the counter in big-endian and pre-increment the other
+ * three counters.
+ */
+ ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev);
+ ivx1 = _mm_add_epi64(ivx0, one);
+ ivx1 = _mm_sub_epi64(ivx1,
+ _mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8));
+ ivx2 = _mm_add_epi64(ivx1, one);
+ ivx2 = _mm_sub_epi64(ivx2,
+ _mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8));
+ ivx3 = _mm_add_epi64(ivx2, one);
+ ivx3 = _mm_sub_epi64(ivx3,
+ _mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8));
+ while (len > 0) {
+ __m128i x0, x1, x2, x3;
+
+ /*
+ * Load counter values; we need to byteswap them because
+ * the specification says that they use big-endian.
+ */
+ x0 = _mm_shuffle_epi8(ivx0, erev);
+ x1 = _mm_shuffle_epi8(ivx1, erev);
+ x2 = _mm_shuffle_epi8(ivx2, erev);
+ x3 = _mm_shuffle_epi8(ivx3, erev);
+
+ x0 = _mm_xor_si128(x0, sk[0]);
+ x1 = _mm_xor_si128(x1, sk[0]);
+ x2 = _mm_xor_si128(x2, sk[0]);
+ x3 = _mm_xor_si128(x3, sk[0]);
+ x0 = _mm_aesenc_si128(x0, sk[1]);
+ x1 = _mm_aesenc_si128(x1, sk[1]);
+ x2 = _mm_aesenc_si128(x2, sk[1]);
+ x3 = _mm_aesenc_si128(x3, sk[1]);
+ x0 = _mm_aesenc_si128(x0, sk[2]);
+ x1 = _mm_aesenc_si128(x1, sk[2]);
+ x2 = _mm_aesenc_si128(x2, sk[2]);
+ x3 = _mm_aesenc_si128(x3, sk[2]);
+ x0 = _mm_aesenc_si128(x0, sk[3]);
+ x1 = _mm_aesenc_si128(x1, sk[3]);
+ x2 = _mm_aesenc_si128(x2, sk[3]);
+ x3 = _mm_aesenc_si128(x3, sk[3]);
+ x0 = _mm_aesenc_si128(x0, sk[4]);
+ x1 = _mm_aesenc_si128(x1, sk[4]);
+ x2 = _mm_aesenc_si128(x2, sk[4]);
+ x3 = _mm_aesenc_si128(x3, sk[4]);
+ x0 = _mm_aesenc_si128(x0, sk[5]);
+ x1 = _mm_aesenc_si128(x1, sk[5]);
+ x2 = _mm_aesenc_si128(x2, sk[5]);
+ x3 = _mm_aesenc_si128(x3, sk[5]);
+ x0 = _mm_aesenc_si128(x0, sk[6]);
+ x1 = _mm_aesenc_si128(x1, sk[6]);
+ x2 = _mm_aesenc_si128(x2, sk[6]);
+ x3 = _mm_aesenc_si128(x3, sk[6]);
+ x0 = _mm_aesenc_si128(x0, sk[7]);
+ x1 = _mm_aesenc_si128(x1, sk[7]);
+ x2 = _mm_aesenc_si128(x2, sk[7]);
+ x3 = _mm_aesenc_si128(x3, sk[7]);
+ x0 = _mm_aesenc_si128(x0, sk[8]);
+ x1 = _mm_aesenc_si128(x1, sk[8]);
+ x2 = _mm_aesenc_si128(x2, sk[8]);
+ x3 = _mm_aesenc_si128(x3, sk[8]);
+ x0 = _mm_aesenc_si128(x0, sk[9]);
+ x1 = _mm_aesenc_si128(x1, sk[9]);
+ x2 = _mm_aesenc_si128(x2, sk[9]);
+ x3 = _mm_aesenc_si128(x3, sk[9]);
+ if (num_rounds == 10) {
+ x0 = _mm_aesenclast_si128(x0, sk[10]);
+ x1 = _mm_aesenclast_si128(x1, sk[10]);
+ x2 = _mm_aesenclast_si128(x2, sk[10]);
+ x3 = _mm_aesenclast_si128(x3, sk[10]);
+ } else if (num_rounds == 12) {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x2 = _mm_aesenc_si128(x2, sk[10]);
+ x3 = _mm_aesenc_si128(x3, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x2 = _mm_aesenc_si128(x2, sk[11]);
+ x3 = _mm_aesenc_si128(x3, sk[11]);
+ x0 = _mm_aesenclast_si128(x0, sk[12]);
+ x1 = _mm_aesenclast_si128(x1, sk[12]);
+ x2 = _mm_aesenclast_si128(x2, sk[12]);
+ x3 = _mm_aesenclast_si128(x3, sk[12]);
+ } else {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x2 = _mm_aesenc_si128(x2, sk[10]);
+ x3 = _mm_aesenc_si128(x3, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x2 = _mm_aesenc_si128(x2, sk[11]);
+ x3 = _mm_aesenc_si128(x3, sk[11]);
+ x0 = _mm_aesenc_si128(x0, sk[12]);
+ x1 = _mm_aesenc_si128(x1, sk[12]);
+ x2 = _mm_aesenc_si128(x2, sk[12]);
+ x3 = _mm_aesenc_si128(x3, sk[12]);
+ x0 = _mm_aesenc_si128(x0, sk[13]);
+ x1 = _mm_aesenc_si128(x1, sk[13]);
+ x2 = _mm_aesenc_si128(x2, sk[13]);
+ x3 = _mm_aesenc_si128(x3, sk[13]);
+ x0 = _mm_aesenclast_si128(x0, sk[14]);
+ x1 = _mm_aesenclast_si128(x1, sk[14]);
+ x2 = _mm_aesenclast_si128(x2, sk[14]);
+ x3 = _mm_aesenclast_si128(x3, sk[14]);
+ }
+ if (len >= 64) {
+ x0 = _mm_xor_si128(x0,
+ _mm_loadu_si128((void *)(buf + 0)));
+ x1 = _mm_xor_si128(x1,
+ _mm_loadu_si128((void *)(buf + 16)));
+ x2 = _mm_xor_si128(x2,
+ _mm_loadu_si128((void *)(buf + 32)));
+ x3 = _mm_xor_si128(x3,
+ _mm_loadu_si128((void *)(buf + 48)));
+ _mm_storeu_si128((void *)(buf + 0), x0);
+ _mm_storeu_si128((void *)(buf + 16), x1);
+ _mm_storeu_si128((void *)(buf + 32), x2);
+ _mm_storeu_si128((void *)(buf + 48), x3);
+ buf += 64;
+ len -= 64;
+ } else {
+ unsigned char tmp[64];
+
+ _mm_storeu_si128((void *)(tmp + 0), x0);
+ _mm_storeu_si128((void *)(tmp + 16), x1);
+ _mm_storeu_si128((void *)(tmp + 32), x2);
+ _mm_storeu_si128((void *)(tmp + 48), x3);
+ for (u = 0; u < len; u ++) {
+ buf[u] ^= tmp[u];
+ }
+ switch (len) {
+ case 16:
+ ivx0 = ivx1;
+ break;
+ case 32:
+ ivx0 = ivx2;
+ break;
+ case 48:
+ ivx0 = ivx3;
+ break;
+ }
+ break;
+ }
+
+ /*
+ * Add 4 to each counter value. For carry propagation
+ * into the upper 64-bit words, we would need to compare
+ * the results with 4, but SSE2+ has only _signed_
+ * comparisons. Instead, we mask out the low two bits,
+ * and check whether the remaining bits are zero.
+ */
+ ivx0 = _mm_add_epi64(ivx0, four);
+ ivx1 = _mm_add_epi64(ivx1, four);
+ ivx2 = _mm_add_epi64(ivx2, four);
+ ivx3 = _mm_add_epi64(ivx3, four);
+ ivx0 = _mm_sub_epi64(ivx0,
+ _mm_slli_si128(_mm_cmpeq_epi64(
+ _mm_and_si128(ivx0, notthree), zero), 8));
+ ivx1 = _mm_sub_epi64(ivx1,
+ _mm_slli_si128(_mm_cmpeq_epi64(
+ _mm_and_si128(ivx1, notthree), zero), 8));
+ ivx2 = _mm_sub_epi64(ivx2,
+ _mm_slli_si128(_mm_cmpeq_epi64(
+ _mm_and_si128(ivx2, notthree), zero), 8));
+ ivx3 = _mm_sub_epi64(ivx3,
+ _mm_slli_si128(_mm_cmpeq_epi64(
+ _mm_and_si128(ivx3, notthree), zero), 8));
+ }
+
+ /*
+ * Write back new counter value. The loop took care to put the
+ * right counter value in ivx0.
+ */
+ _mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev));
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+ void *cbcmac, const void *data, size_t len)
+{
+ const unsigned char *buf;
+ unsigned num_rounds;
+ __m128i sk[15], ivx;
+ unsigned u;
+
+ buf = data;
+ ivx = _mm_loadu_si128(cbcmac);
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+ while (len > 0) {
+ __m128i x;
+
+ x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+ x = _mm_xor_si128(x, sk[0]);
+ x = _mm_aesenc_si128(x, sk[1]);
+ x = _mm_aesenc_si128(x, sk[2]);
+ x = _mm_aesenc_si128(x, sk[3]);
+ x = _mm_aesenc_si128(x, sk[4]);
+ x = _mm_aesenc_si128(x, sk[5]);
+ x = _mm_aesenc_si128(x, sk[6]);
+ x = _mm_aesenc_si128(x, sk[7]);
+ x = _mm_aesenc_si128(x, sk[8]);
+ x = _mm_aesenc_si128(x, sk[9]);
+ if (num_rounds == 10) {
+ x = _mm_aesenclast_si128(x, sk[10]);
+ } else if (num_rounds == 12) {
+ x = _mm_aesenc_si128(x, sk[10]);
+ x = _mm_aesenc_si128(x, sk[11]);
+ x = _mm_aesenclast_si128(x, sk[12]);
+ } else {
+ x = _mm_aesenc_si128(x, sk[10]);
+ x = _mm_aesenc_si128(x, sk[11]);
+ x = _mm_aesenc_si128(x, sk[12]);
+ x = _mm_aesenc_si128(x, sk[13]);
+ x = _mm_aesenclast_si128(x, sk[14]);
+ }
+ ivx = x;
+ buf += 16;
+ len -= 16;
+ }
+ _mm_storeu_si128(cbcmac, ivx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned num_rounds;
+ __m128i sk[15];
+ __m128i ivx, cmx;
+ __m128i erev, zero, one;
+ unsigned u;
+ int first_iter;
+
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+
+ /*
+ * Some SSE2 constants.
+ */
+ erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15);
+ zero = _mm_setzero_si128();
+ one = _mm_set_epi64x(0, 1);
+
+ /*
+ * Decode the counter in big-endian.
+ */
+ ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+ cmx = _mm_loadu_si128(cbcmac);
+
+ buf = data;
+ first_iter = 1;
+ while (len > 0) {
+ __m128i dx, x0, x1;
+
+ /*
+ * Load initial values:
+ * dx encrypted block of data
+ * x0 counter (for CTR encryption)
+ * x1 input for CBC-MAC
+ */
+ dx = _mm_loadu_si128((void *)buf);
+ x0 = _mm_shuffle_epi8(ivx, erev);
+ x1 = cmx;
+
+ x0 = _mm_xor_si128(x0, sk[0]);
+ x1 = _mm_xor_si128(x1, sk[0]);
+ x0 = _mm_aesenc_si128(x0, sk[1]);
+ x1 = _mm_aesenc_si128(x1, sk[1]);
+ x0 = _mm_aesenc_si128(x0, sk[2]);
+ x1 = _mm_aesenc_si128(x1, sk[2]);
+ x0 = _mm_aesenc_si128(x0, sk[3]);
+ x1 = _mm_aesenc_si128(x1, sk[3]);
+ x0 = _mm_aesenc_si128(x0, sk[4]);
+ x1 = _mm_aesenc_si128(x1, sk[4]);
+ x0 = _mm_aesenc_si128(x0, sk[5]);
+ x1 = _mm_aesenc_si128(x1, sk[5]);
+ x0 = _mm_aesenc_si128(x0, sk[6]);
+ x1 = _mm_aesenc_si128(x1, sk[6]);
+ x0 = _mm_aesenc_si128(x0, sk[7]);
+ x1 = _mm_aesenc_si128(x1, sk[7]);
+ x0 = _mm_aesenc_si128(x0, sk[8]);
+ x1 = _mm_aesenc_si128(x1, sk[8]);
+ x0 = _mm_aesenc_si128(x0, sk[9]);
+ x1 = _mm_aesenc_si128(x1, sk[9]);
+ if (num_rounds == 10) {
+ x0 = _mm_aesenclast_si128(x0, sk[10]);
+ x1 = _mm_aesenclast_si128(x1, sk[10]);
+ } else if (num_rounds == 12) {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x0 = _mm_aesenclast_si128(x0, sk[12]);
+ x1 = _mm_aesenclast_si128(x1, sk[12]);
+ } else {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x0 = _mm_aesenc_si128(x0, sk[12]);
+ x1 = _mm_aesenc_si128(x1, sk[12]);
+ x0 = _mm_aesenc_si128(x0, sk[13]);
+ x1 = _mm_aesenc_si128(x1, sk[13]);
+ x0 = _mm_aesenclast_si128(x0, sk[14]);
+ x1 = _mm_aesenclast_si128(x1, sk[14]);
+ }
+
+ x0 = _mm_xor_si128(x0, dx);
+ if (first_iter) {
+ cmx = _mm_xor_si128(cmx, x0);
+ first_iter = 0;
+ } else {
+ cmx = _mm_xor_si128(x1, x0);
+ }
+ _mm_storeu_si128((void *)buf, x0);
+
+ buf += 16;
+ len -= 16;
+
+ /*
+ * Increment the counter value.
+ */
+ ivx = _mm_add_epi64(ivx, one);
+ ivx = _mm_sub_epi64(ivx,
+ _mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+
+ /*
+ * If this was the last iteration, then compute the
+ * extra block encryption to complete CBC-MAC.
+ */
+ if (len == 0) {
+ cmx = _mm_xor_si128(cmx, sk[0]);
+ cmx = _mm_aesenc_si128(cmx, sk[1]);
+ cmx = _mm_aesenc_si128(cmx, sk[2]);
+ cmx = _mm_aesenc_si128(cmx, sk[3]);
+ cmx = _mm_aesenc_si128(cmx, sk[4]);
+ cmx = _mm_aesenc_si128(cmx, sk[5]);
+ cmx = _mm_aesenc_si128(cmx, sk[6]);
+ cmx = _mm_aesenc_si128(cmx, sk[7]);
+ cmx = _mm_aesenc_si128(cmx, sk[8]);
+ cmx = _mm_aesenc_si128(cmx, sk[9]);
+ if (num_rounds == 10) {
+ cmx = _mm_aesenclast_si128(cmx, sk[10]);
+ } else if (num_rounds == 12) {
+ cmx = _mm_aesenc_si128(cmx, sk[10]);
+ cmx = _mm_aesenc_si128(cmx, sk[11]);
+ cmx = _mm_aesenclast_si128(cmx, sk[12]);
+ } else {
+ cmx = _mm_aesenc_si128(cmx, sk[10]);
+ cmx = _mm_aesenc_si128(cmx, sk[11]);
+ cmx = _mm_aesenc_si128(cmx, sk[12]);
+ cmx = _mm_aesenc_si128(cmx, sk[13]);
+ cmx = _mm_aesenclast_si128(cmx, sk[14]);
+ }
+ break;
+ }
+ }
+
+ /*
+ * Write back new counter value and CBC-MAC value.
+ */
+ _mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+ _mm_storeu_si128(cbcmac, cmx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+ void *ctr, void *cbcmac, void *data, size_t len)
+{
+ unsigned char *buf;
+ unsigned num_rounds;
+ __m128i sk[15];
+ __m128i ivx, cmx;
+ __m128i erev, zero, one;
+ unsigned u;
+
+ num_rounds = ctx->num_rounds;
+ for (u = 0; u <= num_rounds; u ++) {
+ sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+ }
+
+ /*
+ * Some SSE2 constants.
+ */
+ erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15);
+ zero = _mm_setzero_si128();
+ one = _mm_set_epi64x(0, 1);
+
+ /*
+ * Decode the counter in big-endian.
+ */
+ ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+ cmx = _mm_loadu_si128(cbcmac);
+
+ buf = data;
+ while (len > 0) {
+ __m128i dx, x0, x1;
+
+ /*
+ * Load initial values:
+ * dx encrypted block of data
+ * x0 counter (for CTR encryption)
+ * x1 input for CBC-MAC
+ */
+ dx = _mm_loadu_si128((void *)buf);
+ x0 = _mm_shuffle_epi8(ivx, erev);
+ x1 = _mm_xor_si128(cmx, dx);
+
+ x0 = _mm_xor_si128(x0, sk[0]);
+ x1 = _mm_xor_si128(x1, sk[0]);
+ x0 = _mm_aesenc_si128(x0, sk[1]);
+ x1 = _mm_aesenc_si128(x1, sk[1]);
+ x0 = _mm_aesenc_si128(x0, sk[2]);
+ x1 = _mm_aesenc_si128(x1, sk[2]);
+ x0 = _mm_aesenc_si128(x0, sk[3]);
+ x1 = _mm_aesenc_si128(x1, sk[3]);
+ x0 = _mm_aesenc_si128(x0, sk[4]);
+ x1 = _mm_aesenc_si128(x1, sk[4]);
+ x0 = _mm_aesenc_si128(x0, sk[5]);
+ x1 = _mm_aesenc_si128(x1, sk[5]);
+ x0 = _mm_aesenc_si128(x0, sk[6]);
+ x1 = _mm_aesenc_si128(x1, sk[6]);
+ x0 = _mm_aesenc_si128(x0, sk[7]);
+ x1 = _mm_aesenc_si128(x1, sk[7]);
+ x0 = _mm_aesenc_si128(x0, sk[8]);
+ x1 = _mm_aesenc_si128(x1, sk[8]);
+ x0 = _mm_aesenc_si128(x0, sk[9]);
+ x1 = _mm_aesenc_si128(x1, sk[9]);
+ if (num_rounds == 10) {
+ x0 = _mm_aesenclast_si128(x0, sk[10]);
+ x1 = _mm_aesenclast_si128(x1, sk[10]);
+ } else if (num_rounds == 12) {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x0 = _mm_aesenclast_si128(x0, sk[12]);
+ x1 = _mm_aesenclast_si128(x1, sk[12]);
+ } else {
+ x0 = _mm_aesenc_si128(x0, sk[10]);
+ x1 = _mm_aesenc_si128(x1, sk[10]);
+ x0 = _mm_aesenc_si128(x0, sk[11]);
+ x1 = _mm_aesenc_si128(x1, sk[11]);
+ x0 = _mm_aesenc_si128(x0, sk[12]);
+ x1 = _mm_aesenc_si128(x1, sk[12]);
+ x0 = _mm_aesenc_si128(x0, sk[13]);
+ x1 = _mm_aesenc_si128(x1, sk[13]);
+ x0 = _mm_aesenclast_si128(x0, sk[14]);
+ x1 = _mm_aesenclast_si128(x1, sk[14]);
+ }
+ x0 = _mm_xor_si128(x0, dx);
+ cmx = x1;
+ _mm_storeu_si128((void *)buf, x0);
+
+ buf += 16;
+ len -= 16;
+
+ /*
+ * Increment the counter value.
+ */
+ ivx = _mm_add_epi64(ivx, one);
+ ivx = _mm_sub_epi64(ivx,
+ _mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+ }
+
+ /*
+ * Write back new counter value and CBC-MAC value.
+ */
+ _mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+ _mm_storeu_si128(cbcmac, cmx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = {
+ sizeof(br_aes_x86ni_ctrcbc_keys),
+ 16,
+ 4,
+ (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+ &br_aes_x86ni_ctrcbc_init,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_x86ni_ctrcbc_encrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, void *, size_t))
+ &br_aes_x86ni_ctrcbc_decrypt,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, void *, size_t))
+ &br_aes_x86ni_ctrcbc_ctr,
+ (void (*)(const br_block_ctrcbc_class *const *,
+ void *, const void *, size_t))
+ &br_aes_x86ni_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+ return NULL;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/chacha20_ct.c b/test/monniaux/BearSSL/src/symcipher/chacha20_ct.c
new file mode 100644
index 00000000..9961eb11
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/chacha20_ct.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+uint32_t
+br_chacha20_ct_run(const void *key,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+ uint32_t kw[8], ivw[3];
+ size_t u;
+
+ static const uint32_t CW[] = {
+ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+ };
+
+ buf = data;
+ for (u = 0; u < 8; u ++) {
+ kw[u] = br_dec32le((const unsigned char *)key + (u << 2));
+ }
+ for (u = 0; u < 3; u ++) {
+ ivw[u] = br_dec32le((const unsigned char *)iv + (u << 2));
+ }
+ while (len > 0) {
+ uint32_t state[16];
+ int i;
+ size_t clen;
+ unsigned char tmp[64];
+
+ memcpy(&state[0], CW, sizeof CW);
+ memcpy(&state[4], kw, sizeof kw);
+ state[12] = cc;
+ memcpy(&state[13], ivw, sizeof ivw);
+ for (i = 0; i < 10; i ++) {
+
+#define QROUND(a, b, c, d) do { \
+ state[a] += state[b]; \
+ state[d] ^= state[a]; \
+ state[d] = (state[d] << 16) | (state[d] >> 16); \
+ state[c] += state[d]; \
+ state[b] ^= state[c]; \
+ state[b] = (state[b] << 12) | (state[b] >> 20); \
+ state[a] += state[b]; \
+ state[d] ^= state[a]; \
+ state[d] = (state[d] << 8) | (state[d] >> 24); \
+ state[c] += state[d]; \
+ state[b] ^= state[c]; \
+ state[b] = (state[b] << 7) | (state[b] >> 25); \
+ } while (0)
+
+ QROUND( 0, 4, 8, 12);
+ QROUND( 1, 5, 9, 13);
+ QROUND( 2, 6, 10, 14);
+ QROUND( 3, 7, 11, 15);
+ QROUND( 0, 5, 10, 15);
+ QROUND( 1, 6, 11, 12);
+ QROUND( 2, 7, 8, 13);
+ QROUND( 3, 4, 9, 14);
+
+#undef QROUND
+
+ }
+ for (u = 0; u < 4; u ++) {
+ br_enc32le(&tmp[u << 2], state[u] + CW[u]);
+ }
+ for (u = 4; u < 12; u ++) {
+ br_enc32le(&tmp[u << 2], state[u] + kw[u - 4]);
+ }
+ br_enc32le(&tmp[48], state[12] + cc);
+ for (u = 13; u < 16; u ++) {
+ br_enc32le(&tmp[u << 2], state[u] + ivw[u - 13]);
+ }
+
+ clen = len < 64 ? len : 64;
+ for (u = 0; u < clen; u ++) {
+ buf[u] ^= tmp[u];
+ }
+ buf += clen;
+ len -= clen;
+ cc ++;
+ }
+ return cc;
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/chacha20_sse2.c b/test/monniaux/BearSSL/src/symcipher/chacha20_sse2.c
new file mode 100644
index 00000000..92b4a4a8
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/chacha20_sse2.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS 1
+#include "inner.h"
+
+#if BR_SSE2
+
+/*
+ * This file contains a ChaCha20 implementation that leverages SSE2
+ * opcodes for better performance.
+ */
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+ /*
+ * If using 64-bit mode, then SSE2 opcodes should be automatically
+ * available, since they are part of the ABI.
+ *
+ * In 32-bit mode, we use CPUID to detect the SSE2 feature.
+ */
+
+#if BR_amd64
+ return &br_chacha20_sse2_run;
+#else
+
+ /*
+ * SSE2 support is indicated by bit 26 in EDX.
+ */
+ if (br_cpuid(0, 0, 0, 0x04000000)) {
+ return &br_chacha20_sse2_run;
+ } else {
+ return 0;
+ }
+#endif
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2")
+uint32_t
+br_chacha20_sse2_run(const void *key,
+ const void *iv, uint32_t cc, void *data, size_t len)
+{
+ unsigned char *buf;
+ uint32_t ivtmp[4];
+ __m128i kw0, kw1;
+ __m128i iw, cw;
+ __m128i one;
+
+ static const uint32_t CW[] = {
+ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+ };
+
+ buf = data;
+ kw0 = _mm_loadu_si128(key);
+ kw1 = _mm_loadu_si128((const void *)((const unsigned char *)key + 16));
+ ivtmp[0] = cc;
+ memcpy(ivtmp + 1, iv, 12);
+ iw = _mm_loadu_si128((const void *)ivtmp);
+ cw = _mm_loadu_si128((const void *)CW);
+ one = _mm_set_epi32(0, 0, 0, 1);
+
+ while (len > 0) {
+ /*
+ * sj contains state words 4*j to 4*j+3.
+ */
+ __m128i s0, s1, s2, s3;
+ int i;
+
+ s0 = cw;
+ s1 = kw0;
+ s2 = kw1;
+ s3 = iw;
+ for (i = 0; i < 10; i ++) {
+ /*
+ * Even round is straightforward application on
+ * the state words.
+ */
+ s0 = _mm_add_epi32(s0, s1);
+ s3 = _mm_xor_si128(s3, s0);
+ s3 = _mm_or_si128(
+ _mm_slli_epi32(s3, 16),
+ _mm_srli_epi32(s3, 16));
+
+ s2 = _mm_add_epi32(s2, s3);
+ s1 = _mm_xor_si128(s1, s2);
+ s1 = _mm_or_si128(
+ _mm_slli_epi32(s1, 12),
+ _mm_srli_epi32(s1, 20));
+
+ s0 = _mm_add_epi32(s0, s1);
+ s3 = _mm_xor_si128(s3, s0);
+ s3 = _mm_or_si128(
+ _mm_slli_epi32(s3, 8),
+ _mm_srli_epi32(s3, 24));
+
+ s2 = _mm_add_epi32(s2, s3);
+ s1 = _mm_xor_si128(s1, s2);
+ s1 = _mm_or_si128(
+ _mm_slli_epi32(s1, 7),
+ _mm_srli_epi32(s1, 25));
+
+ /*
+ * For the odd round, we must rotate some state
+ * words so that the computations apply on the
+ * right combinations of words.
+ */
+ s1 = _mm_shuffle_epi32(s1, 0x39);
+ s2 = _mm_shuffle_epi32(s2, 0x4E);
+ s3 = _mm_shuffle_epi32(s3, 0x93);
+
+ s0 = _mm_add_epi32(s0, s1);
+ s3 = _mm_xor_si128(s3, s0);
+ s3 = _mm_or_si128(
+ _mm_slli_epi32(s3, 16),
+ _mm_srli_epi32(s3, 16));
+
+ s2 = _mm_add_epi32(s2, s3);
+ s1 = _mm_xor_si128(s1, s2);
+ s1 = _mm_or_si128(
+ _mm_slli_epi32(s1, 12),
+ _mm_srli_epi32(s1, 20));
+
+ s0 = _mm_add_epi32(s0, s1);
+ s3 = _mm_xor_si128(s3, s0);
+ s3 = _mm_or_si128(
+ _mm_slli_epi32(s3, 8),
+ _mm_srli_epi32(s3, 24));
+
+ s2 = _mm_add_epi32(s2, s3);
+ s1 = _mm_xor_si128(s1, s2);
+ s1 = _mm_or_si128(
+ _mm_slli_epi32(s1, 7),
+ _mm_srli_epi32(s1, 25));
+
+ /*
+ * After the odd round, we rotate back the values
+ * to undo the rotate at the start of the odd round.
+ */
+ s1 = _mm_shuffle_epi32(s1, 0x93);
+ s2 = _mm_shuffle_epi32(s2, 0x4E);
+ s3 = _mm_shuffle_epi32(s3, 0x39);
+ }
+
+ /*
+ * Addition with the initial state.
+ */
+ s0 = _mm_add_epi32(s0, cw);
+ s1 = _mm_add_epi32(s1, kw0);
+ s2 = _mm_add_epi32(s2, kw1);
+ s3 = _mm_add_epi32(s3, iw);
+
+ /*
+ * Increment block counter.
+ */
+ iw = _mm_add_epi32(iw, one);
+
+ /*
+ * XOR final state with the data.
+ */
+ if (len < 64) {
+ unsigned char tmp[64];
+ size_t u;
+
+ _mm_storeu_si128((void *)(tmp + 0), s0);
+ _mm_storeu_si128((void *)(tmp + 16), s1);
+ _mm_storeu_si128((void *)(tmp + 32), s2);
+ _mm_storeu_si128((void *)(tmp + 48), s3);
+ for (u = 0; u < len; u ++) {
+ buf[u] ^= tmp[u];
+ }
+ break;
+ } else {
+ __m128i b0, b1, b2, b3;
+
+ b0 = _mm_loadu_si128((const void *)(buf + 0));
+ b1 = _mm_loadu_si128((const void *)(buf + 16));
+ b2 = _mm_loadu_si128((const void *)(buf + 32));
+ b3 = _mm_loadu_si128((const void *)(buf + 48));
+ b0 = _mm_xor_si128(b0, s0);
+ b1 = _mm_xor_si128(b1, s1);
+ b2 = _mm_xor_si128(b2, s2);
+ b3 = _mm_xor_si128(b3, s3);
+ _mm_storeu_si128((void *)(buf + 0), b0);
+ _mm_storeu_si128((void *)(buf + 16), b1);
+ _mm_storeu_si128((void *)(buf + 32), b2);
+ _mm_storeu_si128((void *)(buf + 48), b3);
+ buf += 64;
+ len -= 64;
+ }
+ }
+
+ /*
+ * _mm_extract_epi32() requires SSE4.1. We prefer to stick to
+ * raw SSE2, thus we use _mm_extract_epi16().
+ */
+ return (uint32_t)_mm_extract_epi16(iw, 0)
+ | ((uint32_t)_mm_extract_epi16(iw, 1) << 16);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+ return 0;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/des_ct.c b/test/monniaux/BearSSL/src/symcipher/des_ct.c
new file mode 100644
index 00000000..581c0ab2
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_ct.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * During key schedule, we need to apply bit extraction PC-2 then permute
+ * things into our bitslice representation. PC-2 extracts 48 bits out
+ * of two 28-bit words (kl and kr), and we store these bits into two
+ * 32-bit words sk0 and sk1.
+ *
+ * -- bit 16+x of sk0 comes from bit QL0[x] of kl
+ * -- bit x of sk0 comes from bit QR0[x] of kr
+ * -- bit 16+x of sk1 comes from bit QL1[x] of kl
+ * -- bit x of sk1 comes from bit QR1[x] of kr
+ */
+
+static const unsigned char QL0[] = {
+ 17, 4, 27, 23, 13, 22, 7, 18,
+ 16, 24, 2, 20, 1, 8, 15, 26
+};
+
+static const unsigned char QR0[] = {
+ 25, 19, 9, 1, 5, 11, 23, 8,
+ 17, 0, 22, 3, 6, 20, 27, 24
+};
+
+static const unsigned char QL1[] = {
+ 28, 28, 14, 11, 28, 28, 25, 0,
+ 28, 28, 5, 9, 28, 28, 12, 21
+};
+
+static const unsigned char QR1[] = {
+ 28, 28, 15, 4, 28, 28, 26, 16,
+ 28, 28, 12, 7, 28, 28, 10, 14
+};
+
+/*
+ * 32-bit rotation. The C compiler is supposed to recognize it as a
+ * rotation and use the local architecture rotation opcode (if available).
+ */
+static inline uint32_t
+rotl(uint32_t x, int n)
+{
+ return (x << n) | (x >> (32 - n));
+}
+
+/*
+ * Compute key schedule for 8 key bytes (produces 32 subkey words).
+ */
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+ int i;
+
+ br_des_keysched_unit(skey, key);
+
+ /*
+ * Apply PC-2 + bitslicing.
+ */
+ for (i = 0; i < 16; i ++) {
+ uint32_t kl, kr, sk0, sk1;
+ int j;
+
+ kl = skey[(i << 1) + 0];
+ kr = skey[(i << 1) + 1];
+ sk0 = 0;
+ sk1 = 0;
+ for (j = 0; j < 16; j ++) {
+ sk0 <<= 1;
+ sk1 <<= 1;
+ sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16;
+ sk0 |= (kr >> QR0[j]) & (uint32_t)1;
+ sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16;
+ sk1 |= (kr >> QR1[j]) & (uint32_t)1;
+ }
+
+ skey[(i << 1) + 0] = sk0;
+ skey[(i << 1) + 1] = sk1;
+ }
+
+#if 0
+ /*
+ * Speed-optimized version for PC-2 + bitslicing.
+ * (Unused. Kept for reference only.)
+ */
+ sk0 = kl & (uint32_t)0x00100000;
+ sk0 |= (kl & (uint32_t)0x08008000) << 2;
+ sk0 |= (kl & (uint32_t)0x00400000) << 4;
+ sk0 |= (kl & (uint32_t)0x00800000) << 5;
+ sk0 |= (kl & (uint32_t)0x00040000) << 6;
+ sk0 |= (kl & (uint32_t)0x00010000) << 7;
+ sk0 |= (kl & (uint32_t)0x00000100) << 10;
+ sk0 |= (kl & (uint32_t)0x00022000) << 14;
+ sk0 |= (kl & (uint32_t)0x00000082) << 18;
+ sk0 |= (kl & (uint32_t)0x00000004) << 19;
+ sk0 |= (kl & (uint32_t)0x04000000) >> 10;
+ sk0 |= (kl & (uint32_t)0x00000010) << 26;
+ sk0 |= (kl & (uint32_t)0x01000000) >> 2;
+
+ sk0 |= kr & (uint32_t)0x00000100;
+ sk0 |= (kr & (uint32_t)0x00000008) << 1;
+ sk0 |= (kr & (uint32_t)0x00000200) << 4;
+ sk0 |= rotl(kr & (uint32_t)0x08000021, 6);
+ sk0 |= (kr & (uint32_t)0x01000000) >> 24;
+ sk0 |= (kr & (uint32_t)0x00000002) << 11;
+ sk0 |= (kr & (uint32_t)0x00100000) >> 18;
+ sk0 |= (kr & (uint32_t)0x00400000) >> 17;
+ sk0 |= (kr & (uint32_t)0x00800000) >> 14;
+ sk0 |= (kr & (uint32_t)0x02020000) >> 10;
+ sk0 |= (kr & (uint32_t)0x00080000) >> 5;
+ sk0 |= (kr & (uint32_t)0x00000040) >> 3;
+ sk0 |= (kr & (uint32_t)0x00000800) >> 1;
+
+ sk1 = kl & (uint32_t)0x02000000;
+ sk1 |= (kl & (uint32_t)0x00001000) << 5;
+ sk1 |= (kl & (uint32_t)0x00000200) << 11;
+ sk1 |= (kl & (uint32_t)0x00004000) << 15;
+ sk1 |= (kl & (uint32_t)0x00000020) << 16;
+ sk1 |= (kl & (uint32_t)0x00000800) << 17;
+ sk1 |= (kl & (uint32_t)0x00000001) << 24;
+ sk1 |= (kl & (uint32_t)0x00200000) >> 5;
+
+ sk1 |= (kr & (uint32_t)0x00000010) << 8;
+ sk1 |= (kr & (uint32_t)0x04000000) >> 17;
+ sk1 |= (kr & (uint32_t)0x00004000) >> 14;
+ sk1 |= (kr & (uint32_t)0x00000400) >> 9;
+ sk1 |= (kr & (uint32_t)0x00010000) >> 8;
+ sk1 |= (kr & (uint32_t)0x00001000) >> 7;
+ sk1 |= (kr & (uint32_t)0x00000080) >> 3;
+ sk1 |= (kr & (uint32_t)0x00008000) >> 2;
+#endif
+}
+
+/* see inner.h */
+unsigned
+br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+ switch (key_len) {
+ case 8:
+ keysched_unit(skey, key);
+ return 1;
+ case 16:
+ keysched_unit(skey, key);
+ keysched_unit(skey + 32, (const unsigned char *)key + 8);
+ br_des_rev_skey(skey + 32);
+ memcpy(skey + 64, skey, 32 * sizeof *skey);
+ return 3;
+ default:
+ keysched_unit(skey, key);
+ keysched_unit(skey + 32, (const unsigned char *)key + 8);
+ br_des_rev_skey(skey + 32);
+ keysched_unit(skey + 64, (const unsigned char *)key + 16);
+ return 3;
+ }
+}
+
+/*
+ * DES confusion function. This function performs expansion E (32 to
+ * 48 bits), XOR with subkey, S-boxes, and permutation P.
+ */
+static inline uint32_t
+Fconf(uint32_t r0, const uint32_t *sk)
+{
+ /*
+ * Each 6->4 S-box is virtually turned into four 6->1 boxes; we
+ * thus end up with 32 boxes that we call "T-boxes" here. We will
+ * evaluate them with bitslice code.
+ *
+ * Each T-box is a circuit of multiplexers (sort of) and thus
+ * takes 70 inputs: the 6 actual T-box inputs, and 64 constants
+ * that describe the T-box output for all combinations of the
+ * 6 inputs. With this model, all T-boxes are identical (with
+ * distinct inputs) and thus can be executed in parallel with
+ * bitslice code.
+ *
+ * T-boxes are numbered from 0 to 31, in least-to-most
+ * significant order. Thus, S-box S1 corresponds to T-boxes 31,
+ * 30, 29 and 28, in that order. T-box 'n' is computed with the
+ * bits at rank 'n' in the 32-bit words.
+ *
+ * Words x0 to x5 contain the T-box inputs 0 to 5.
+ */
+ uint32_t x0, x1, x2, x3, x4, x5, z0;
+ uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
+ uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+ uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
+ uint32_t y30;
+
+ /*
+ * Spread input bits over the 6 input words x*.
+ */
+ x1 = r0 & (uint32_t)0x11111111;
+ x2 = (r0 >> 1) & (uint32_t)0x11111111;
+ x3 = (r0 >> 2) & (uint32_t)0x11111111;
+ x4 = (r0 >> 3) & (uint32_t)0x11111111;
+ x1 = (x1 << 4) - x1;
+ x2 = (x2 << 4) - x2;
+ x3 = (x3 << 4) - x3;
+ x4 = (x4 << 4) - x4;
+ x0 = (x4 << 4) | (x4 >> 28);
+ x5 = (x1 >> 4) | (x1 << 28);
+
+ /*
+ * XOR with the subkey for this round.
+ */
+ x0 ^= sk[0];
+ x1 ^= sk[1];
+ x2 ^= sk[2];
+ x3 ^= sk[3];
+ x4 ^= sk[4];
+ x5 ^= sk[5];
+
+ /*
+ * The T-boxes are done in parallel, since they all use a
+ * "tree of multiplexer". We use "fake multiplexers":
+ *
+ * y = a ^ (x & b)
+ *
+ * computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1).
+ */
+ y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C);
+ y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821);
+ y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809);
+ y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28);
+ y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820);
+ y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801);
+ y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929);
+ y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1);
+ y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A);
+ y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B);
+ y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A);
+ y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572);
+ y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083);
+ y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000);
+ y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA);
+ /* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+ y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9);
+ y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE);
+ y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816);
+ y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B);
+ y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88);
+ y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25);
+ y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D);
+ y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0);
+ y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100);
+ y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011);
+ y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959);
+ y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A);
+ y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8);
+ y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B);
+ y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD);
+ /* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+
+ y0 = y0 ^ (x1 & y1);
+ y1 = y2 ^ (x1 & y3);
+ y2 = y4 ^ (x1 & y5);
+ y3 = y6 ^ (x1 & y7);
+ y4 = y8 ^ (x1 & y9);
+ y5 = y10 ^ (x1 & y11);
+ y6 = y12 ^ (x1 & y13);
+ y7 = y14; /* was: y14 ^ (x1 & y15) */
+ y8 = y16 ^ (x1 & y17);
+ y9 = y18 ^ (x1 & y19);
+ y10 = y20 ^ (x1 & y21);
+ y11 = y22 ^ (x1 & y23);
+ y12 = y24 ^ (x1 & y25);
+ y13 = y26 ^ (x1 & y27);
+ y14 = y28 ^ (x1 & y29);
+ y15 = y30; /* was: y30 ^ (x1 & y31) */
+
+ y0 = y0 ^ (x2 & y1);
+ y1 = y2 ^ (x2 & y3);
+ y2 = y4 ^ (x2 & y5);
+ y3 = y6 ^ (x2 & y7);
+ y4 = y8 ^ (x2 & y9);
+ y5 = y10 ^ (x2 & y11);
+ y6 = y12 ^ (x2 & y13);
+ y7 = y14 ^ (x2 & y15);
+
+ y0 = y0 ^ (x3 & y1);
+ y1 = y2 ^ (x3 & y3);
+ y2 = y4 ^ (x3 & y5);
+ y3 = y6 ^ (x3 & y7);
+
+ y0 = y0 ^ (x4 & y1);
+ y1 = y2 ^ (x4 & y3);
+
+ y0 = y0 ^ (x5 & y1);
+
+ /*
+ * The P permutation:
+ * -- Each bit move is converted into a mask + left rotation.
+ * -- Rotations that use the same movement are coalesced together.
+ * -- Left and right shifts are used as alternatives to a rotation
+ * where appropriate (this will help architectures that do not have
+ * a rotation opcode).
+ */
+ z0 = (y0 & (uint32_t)0x00000004) << 3;
+ z0 |= (y0 & (uint32_t)0x00004000) << 4;
+ z0 |= rotl(y0 & 0x12020120, 5);
+ z0 |= (y0 & (uint32_t)0x00100000) << 6;
+ z0 |= (y0 & (uint32_t)0x00008000) << 9;
+ z0 |= (y0 & (uint32_t)0x04000000) >> 22;
+ z0 |= (y0 & (uint32_t)0x00000001) << 11;
+ z0 |= rotl(y0 & 0x20000200, 12);
+ z0 |= (y0 & (uint32_t)0x00200000) >> 19;
+ z0 |= (y0 & (uint32_t)0x00000040) << 14;
+ z0 |= (y0 & (uint32_t)0x00010000) << 15;
+ z0 |= (y0 & (uint32_t)0x00000002) << 16;
+ z0 |= rotl(y0 & 0x40801800, 17);
+ z0 |= (y0 & (uint32_t)0x00080000) >> 13;
+ z0 |= (y0 & (uint32_t)0x00000010) << 21;
+ z0 |= (y0 & (uint32_t)0x01000000) >> 10;
+ z0 |= rotl(y0 & 0x88000008, 24);
+ z0 |= (y0 & (uint32_t)0x00000480) >> 7;
+ z0 |= (y0 & (uint32_t)0x00442000) >> 6;
+ return z0;
+}
+
+/*
+ * Process one block through 16 successive rounds, omitting the swap
+ * in the final round.
+ */
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp)
+{
+ int i;
+ uint32_t l, r;
+
+ l = *pl;
+ r = *pr;
+ for (i = 0; i < 16; i ++) {
+ uint32_t t;
+
+ t = l ^ Fconf(r, sk_exp);
+ l = r;
+ r = t;
+ sk_exp += 6;
+ }
+ *pl = r;
+ *pr = l;
+}
+
+/* see inner.h */
+void
+br_des_ct_process_block(unsigned num_rounds,
+ const uint32_t *sk_exp, void *block)
+{
+ unsigned char *buf;
+ uint32_t l, r;
+
+ buf = block;
+ l = br_dec32be(buf);
+ r = br_dec32be(buf + 4);
+ br_des_do_IP(&l, &r);
+ while (num_rounds -- > 0) {
+ process_block_unit(&l, &r, sk_exp);
+ sk_exp += 96;
+ }
+ br_des_do_invIP(&l, &r);
+ br_enc32be(buf, l);
+ br_enc32be(buf + 4, r);
+}
+
+/* see inner.h */
+void
+br_des_ct_skey_expand(uint32_t *sk_exp,
+ unsigned num_rounds, const uint32_t *skey)
+{
+ num_rounds <<= 4;
+ while (num_rounds -- > 0) {
+ uint32_t v, w0, w1, w2, w3;
+
+ v = *skey ++;
+ w0 = v & 0x11111111;
+ w1 = (v >> 1) & 0x11111111;
+ w2 = (v >> 2) & 0x11111111;
+ w3 = (v >> 3) & 0x11111111;
+ *sk_exp ++ = (w0 << 4) - w0;
+ *sk_exp ++ = (w1 << 4) - w1;
+ *sk_exp ++ = (w2 << 4) - w2;
+ *sk_exp ++ = (w3 << 4) - w3;
+ v = *skey ++;
+ w0 = v & 0x11111111;
+ w1 = (v >> 1) & 0x11111111;
+ *sk_exp ++ = (w0 << 4) - w0;
+ *sk_exp ++ = (w1 << 4) - w1;
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/des_ct_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/des_ct_cbcdec.c
new file mode 100644
index 00000000..d208a3d2
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_ct_cbcdec.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_des_ct_cbcdec_vtable;
+ ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+ if (len == 8) {
+ br_des_rev_skey(ctx->skey);
+ } else {
+ int i;
+
+ for (i = 0; i < 48; i += 2) {
+ uint32_t t;
+
+ t = ctx->skey[i];
+ ctx->skey[i] = ctx->skey[94 - i];
+ ctx->skey[94 - i] = t;
+ t = ctx->skey[i + 1];
+ ctx->skey[i + 1] = ctx->skey[95 - i];
+ ctx->skey[95 - i] = t;
+ }
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+ uint32_t sk_exp[288];
+
+ br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[8];
+ int i;
+
+ memcpy(tmp, buf, 8);
+ br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+ for (i = 0; i < 8; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ memcpy(ivbuf, tmp, 8);
+ buf += 8;
+ len -= 8;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_ct_cbcdec_vtable = {
+ sizeof(br_des_ct_cbcdec_keys),
+ 8,
+ 3,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_des_ct_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_des_ct_cbcdec_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/des_ct_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/des_ct_cbcenc.c
new file mode 100644
index 00000000..4b3610e0
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_ct_cbcenc.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_des_ct_cbcenc_vtable;
+ ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+ uint32_t sk_exp[288];
+
+ br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+ memcpy(ivbuf, buf, 8);
+ buf += 8;
+ len -= 8;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_ct_cbcenc_vtable = {
+ sizeof(br_des_ct_cbcenc_keys),
+ 8,
+ 3,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_des_ct_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_des_ct_cbcenc_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/des_support.c b/test/monniaux/BearSSL/src/symcipher/des_support.c
new file mode 100644
index 00000000..37f6db32
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_support.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_des_do_IP(uint32_t *xl, uint32_t *xr)
+{
+ /*
+ * Permutation algorithm is initially from Richard Outerbridge;
+ * implementation here is adapted from Crypto++ "des.cpp" file
+ * (which is in public domain).
+ */
+ uint32_t l, r, t;
+
+ l = *xl;
+ r = *xr;
+ t = ((l >> 4) ^ r) & (uint32_t)0x0F0F0F0F;
+ r ^= t;
+ l ^= t << 4;
+ t = ((l >> 16) ^ r) & (uint32_t)0x0000FFFF;
+ r ^= t;
+ l ^= t << 16;
+ t = ((r >> 2) ^ l) & (uint32_t)0x33333333;
+ l ^= t;
+ r ^= t << 2;
+ t = ((r >> 8) ^ l) & (uint32_t)0x00FF00FF;
+ l ^= t;
+ r ^= t << 8;
+ t = ((l >> 1) ^ r) & (uint32_t)0x55555555;
+ r ^= t;
+ l ^= t << 1;
+ *xl = l;
+ *xr = r;
+}
+
+/* see inner.h */
+void
+br_des_do_invIP(uint32_t *xl, uint32_t *xr)
+{
+ /*
+ * See br_des_do_IP().
+ */
+ uint32_t l, r, t;
+
+ l = *xl;
+ r = *xr;
+ t = ((l >> 1) ^ r) & 0x55555555;
+ r ^= t;
+ l ^= t << 1;
+ t = ((r >> 8) ^ l) & 0x00FF00FF;
+ l ^= t;
+ r ^= t << 8;
+ t = ((r >> 2) ^ l) & 0x33333333;
+ l ^= t;
+ r ^= t << 2;
+ t = ((l >> 16) ^ r) & 0x0000FFFF;
+ r ^= t;
+ l ^= t << 16;
+ t = ((l >> 4) ^ r) & 0x0F0F0F0F;
+ r ^= t;
+ l ^= t << 4;
+ *xl = l;
+ *xr = r;
+}
+
+/* see inner.h */
+void
+br_des_keysched_unit(uint32_t *skey, const void *key)
+{
+ uint32_t xl, xr, kl, kr;
+ int i;
+
+ xl = br_dec32be(key);
+ xr = br_dec32be((const unsigned char *)key + 4);
+
+ /*
+ * Permutation PC-1 is quite similar to the IP permutation.
+ * Definition of IP (in FIPS 46-3 notations) is:
+ * 58 50 42 34 26 18 10 2
+ * 60 52 44 36 28 20 12 4
+ * 62 54 46 38 30 22 14 6
+ * 64 56 48 40 32 24 16 8
+ * 57 49 41 33 25 17 9 1
+ * 59 51 43 35 27 19 11 3
+ * 61 53 45 37 29 21 13 5
+ * 63 55 47 39 31 23 15 7
+ *
+ * Definition of PC-1 is:
+ * 57 49 41 33 25 17 9 1
+ * 58 50 42 34 26 18 10 2
+ * 59 51 43 35 27 19 11 3
+ * 60 52 44 36
+ * 63 55 47 39 31 23 15 7
+ * 62 54 46 38 30 22 14 6
+ * 61 53 45 37 29 21 13 5
+ * 28 20 12 4
+ */
+ br_des_do_IP(&xl, &xr);
+ kl = ((xr & (uint32_t)0xFF000000) >> 4)
+ | ((xl & (uint32_t)0xFF000000) >> 12)
+ | ((xr & (uint32_t)0x00FF0000) >> 12)
+ | ((xl & (uint32_t)0x00FF0000) >> 20);
+ kr = ((xr & (uint32_t)0x000000FF) << 20)
+ | ((xl & (uint32_t)0x0000FF00) << 4)
+ | ((xr & (uint32_t)0x0000FF00) >> 4)
+ | ((xl & (uint32_t)0x000F0000) >> 16);
+
+ /*
+ * For each round, rotate the two 28-bit words kl and kr.
+ * The extraction of the 48-bit subkey (PC-2) is not done yet.
+ */
+ for (i = 0; i < 16; i ++) {
+ if ((1 << i) & 0x8103) {
+ kl = (kl << 1) | (kl >> 27);
+ kr = (kr << 1) | (kr >> 27);
+ } else {
+ kl = (kl << 2) | (kl >> 26);
+ kr = (kr << 2) | (kr >> 26);
+ }
+ kl &= (uint32_t)0x0FFFFFFF;
+ kr &= (uint32_t)0x0FFFFFFF;
+ skey[(i << 1) + 0] = kl;
+ skey[(i << 1) + 1] = kr;
+ }
+}
+
+/* see inner.h */
+void
+br_des_rev_skey(uint32_t *skey)
+{
+ int i;
+
+ for (i = 0; i < 16; i += 2) {
+ uint32_t t;
+
+ t = skey[i + 0];
+ skey[i + 0] = skey[30 - i];
+ skey[30 - i] = t;
+ t = skey[i + 1];
+ skey[i + 1] = skey[31 - i];
+ skey[31 - i] = t;
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/des_tab.c b/test/monniaux/BearSSL/src/symcipher/des_tab.c
new file mode 100644
index 00000000..3f8e4f9f
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_tab.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * PC2left[x] tells where bit x goes when applying PC-2. 'x' is a bit
+ * position in the left rotated key word. Both position are in normal
+ * order (rightmost bit is 0).
+ */
+static const unsigned char PC2left[] = {
+ 16, 3, 7, 24, 20, 11, 24,
+ 13, 2, 10, 24, 22, 5, 15,
+ 23, 1, 9, 21, 12, 24, 6,
+ 4, 14, 18, 8, 17, 0, 19
+};
+
+/*
+ * Similar to PC2left[x], for the right rotated key word.
+ */
+static const unsigned char PC2right[] = {
+ 8, 18, 24, 6, 22, 15, 3,
+ 10, 12, 19, 5, 14, 11, 24,
+ 4, 23, 16, 9, 24, 20, 2,
+ 24, 7, 13, 0, 21, 17, 1
+};
+
+/*
+ * S-boxes and PC-1 merged.
+ */
+static const uint32_t S1[] = {
+ 0x00808200, 0x00000000, 0x00008000, 0x00808202,
+ 0x00808002, 0x00008202, 0x00000002, 0x00008000,
+ 0x00000200, 0x00808200, 0x00808202, 0x00000200,
+ 0x00800202, 0x00808002, 0x00800000, 0x00000002,
+ 0x00000202, 0x00800200, 0x00800200, 0x00008200,
+ 0x00008200, 0x00808000, 0x00808000, 0x00800202,
+ 0x00008002, 0x00800002, 0x00800002, 0x00008002,
+ 0x00000000, 0x00000202, 0x00008202, 0x00800000,
+ 0x00008000, 0x00808202, 0x00000002, 0x00808000,
+ 0x00808200, 0x00800000, 0x00800000, 0x00000200,
+ 0x00808002, 0x00008000, 0x00008200, 0x00800002,
+ 0x00000200, 0x00000002, 0x00800202, 0x00008202,
+ 0x00808202, 0x00008002, 0x00808000, 0x00800202,
+ 0x00800002, 0x00000202, 0x00008202, 0x00808200,
+ 0x00000202, 0x00800200, 0x00800200, 0x00000000,
+ 0x00008002, 0x00008200, 0x00000000, 0x00808002
+};
+
+static const uint32_t S2[] = {
+ 0x40084010, 0x40004000, 0x00004000, 0x00084010,
+ 0x00080000, 0x00000010, 0x40080010, 0x40004010,
+ 0x40000010, 0x40084010, 0x40084000, 0x40000000,
+ 0x40004000, 0x00080000, 0x00000010, 0x40080010,
+ 0x00084000, 0x00080010, 0x40004010, 0x00000000,
+ 0x40000000, 0x00004000, 0x00084010, 0x40080000,
+ 0x00080010, 0x40000010, 0x00000000, 0x00084000,
+ 0x00004010, 0x40084000, 0x40080000, 0x00004010,
+ 0x00000000, 0x00084010, 0x40080010, 0x00080000,
+ 0x40004010, 0x40080000, 0x40084000, 0x00004000,
+ 0x40080000, 0x40004000, 0x00000010, 0x40084010,
+ 0x00084010, 0x00000010, 0x00004000, 0x40000000,
+ 0x00004010, 0x40084000, 0x00080000, 0x40000010,
+ 0x00080010, 0x40004010, 0x40000010, 0x00080010,
+ 0x00084000, 0x00000000, 0x40004000, 0x00004010,
+ 0x40000000, 0x40080010, 0x40084010, 0x00084000
+};
+
+static const uint32_t S3[] = {
+ 0x00000104, 0x04010100, 0x00000000, 0x04010004,
+ 0x04000100, 0x00000000, 0x00010104, 0x04000100,
+ 0x00010004, 0x04000004, 0x04000004, 0x00010000,
+ 0x04010104, 0x00010004, 0x04010000, 0x00000104,
+ 0x04000000, 0x00000004, 0x04010100, 0x00000100,
+ 0x00010100, 0x04010000, 0x04010004, 0x00010104,
+ 0x04000104, 0x00010100, 0x00010000, 0x04000104,
+ 0x00000004, 0x04010104, 0x00000100, 0x04000000,
+ 0x04010100, 0x04000000, 0x00010004, 0x00000104,
+ 0x00010000, 0x04010100, 0x04000100, 0x00000000,
+ 0x00000100, 0x00010004, 0x04010104, 0x04000100,
+ 0x04000004, 0x00000100, 0x00000000, 0x04010004,
+ 0x04000104, 0x00010000, 0x04000000, 0x04010104,
+ 0x00000004, 0x00010104, 0x00010100, 0x04000004,
+ 0x04010000, 0x04000104, 0x00000104, 0x04010000,
+ 0x00010104, 0x00000004, 0x04010004, 0x00010100
+};
+
+static const uint32_t S4[] = {
+ 0x80401000, 0x80001040, 0x80001040, 0x00000040,
+ 0x00401040, 0x80400040, 0x80400000, 0x80001000,
+ 0x00000000, 0x00401000, 0x00401000, 0x80401040,
+ 0x80000040, 0x00000000, 0x00400040, 0x80400000,
+ 0x80000000, 0x00001000, 0x00400000, 0x80401000,
+ 0x00000040, 0x00400000, 0x80001000, 0x00001040,
+ 0x80400040, 0x80000000, 0x00001040, 0x00400040,
+ 0x00001000, 0x00401040, 0x80401040, 0x80000040,
+ 0x00400040, 0x80400000, 0x00401000, 0x80401040,
+ 0x80000040, 0x00000000, 0x00000000, 0x00401000,
+ 0x00001040, 0x00400040, 0x80400040, 0x80000000,
+ 0x80401000, 0x80001040, 0x80001040, 0x00000040,
+ 0x80401040, 0x80000040, 0x80000000, 0x00001000,
+ 0x80400000, 0x80001000, 0x00401040, 0x80400040,
+ 0x80001000, 0x00001040, 0x00400000, 0x80401000,
+ 0x00000040, 0x00400000, 0x00001000, 0x00401040
+};
+
+static const uint32_t S5[] = {
+ 0x00000080, 0x01040080, 0x01040000, 0x21000080,
+ 0x00040000, 0x00000080, 0x20000000, 0x01040000,
+ 0x20040080, 0x00040000, 0x01000080, 0x20040080,
+ 0x21000080, 0x21040000, 0x00040080, 0x20000000,
+ 0x01000000, 0x20040000, 0x20040000, 0x00000000,
+ 0x20000080, 0x21040080, 0x21040080, 0x01000080,
+ 0x21040000, 0x20000080, 0x00000000, 0x21000000,
+ 0x01040080, 0x01000000, 0x21000000, 0x00040080,
+ 0x00040000, 0x21000080, 0x00000080, 0x01000000,
+ 0x20000000, 0x01040000, 0x21000080, 0x20040080,
+ 0x01000080, 0x20000000, 0x21040000, 0x01040080,
+ 0x20040080, 0x00000080, 0x01000000, 0x21040000,
+ 0x21040080, 0x00040080, 0x21000000, 0x21040080,
+ 0x01040000, 0x00000000, 0x20040000, 0x21000000,
+ 0x00040080, 0x01000080, 0x20000080, 0x00040000,
+ 0x00000000, 0x20040000, 0x01040080, 0x20000080
+};
+
+static const uint32_t S6[] = {
+ 0x10000008, 0x10200000, 0x00002000, 0x10202008,
+ 0x10200000, 0x00000008, 0x10202008, 0x00200000,
+ 0x10002000, 0x00202008, 0x00200000, 0x10000008,
+ 0x00200008, 0x10002000, 0x10000000, 0x00002008,
+ 0x00000000, 0x00200008, 0x10002008, 0x00002000,
+ 0x00202000, 0x10002008, 0x00000008, 0x10200008,
+ 0x10200008, 0x00000000, 0x00202008, 0x10202000,
+ 0x00002008, 0x00202000, 0x10202000, 0x10000000,
+ 0x10002000, 0x00000008, 0x10200008, 0x00202000,
+ 0x10202008, 0x00200000, 0x00002008, 0x10000008,
+ 0x00200000, 0x10002000, 0x10000000, 0x00002008,
+ 0x10000008, 0x10202008, 0x00202000, 0x10200000,
+ 0x00202008, 0x10202000, 0x00000000, 0x10200008,
+ 0x00000008, 0x00002000, 0x10200000, 0x00202008,
+ 0x00002000, 0x00200008, 0x10002008, 0x00000000,
+ 0x10202000, 0x10000000, 0x00200008, 0x10002008
+};
+
+static const uint32_t S7[] = {
+ 0x00100000, 0x02100001, 0x02000401, 0x00000000,
+ 0x00000400, 0x02000401, 0x00100401, 0x02100400,
+ 0x02100401, 0x00100000, 0x00000000, 0x02000001,
+ 0x00000001, 0x02000000, 0x02100001, 0x00000401,
+ 0x02000400, 0x00100401, 0x00100001, 0x02000400,
+ 0x02000001, 0x02100000, 0x02100400, 0x00100001,
+ 0x02100000, 0x00000400, 0x00000401, 0x02100401,
+ 0x00100400, 0x00000001, 0x02000000, 0x00100400,
+ 0x02000000, 0x00100400, 0x00100000, 0x02000401,
+ 0x02000401, 0x02100001, 0x02100001, 0x00000001,
+ 0x00100001, 0x02000000, 0x02000400, 0x00100000,
+ 0x02100400, 0x00000401, 0x00100401, 0x02100400,
+ 0x00000401, 0x02000001, 0x02100401, 0x02100000,
+ 0x00100400, 0x00000000, 0x00000001, 0x02100401,
+ 0x00000000, 0x00100401, 0x02100000, 0x00000400,
+ 0x02000001, 0x02000400, 0x00000400, 0x00100001
+};
+
+static const uint32_t S8[] = {
+ 0x08000820, 0x00000800, 0x00020000, 0x08020820,
+ 0x08000000, 0x08000820, 0x00000020, 0x08000000,
+ 0x00020020, 0x08020000, 0x08020820, 0x00020800,
+ 0x08020800, 0x00020820, 0x00000800, 0x00000020,
+ 0x08020000, 0x08000020, 0x08000800, 0x00000820,
+ 0x00020800, 0x00020020, 0x08020020, 0x08020800,
+ 0x00000820, 0x00000000, 0x00000000, 0x08020020,
+ 0x08000020, 0x08000800, 0x00020820, 0x00020000,
+ 0x00020820, 0x00020000, 0x08020800, 0x00000800,
+ 0x00000020, 0x08020020, 0x00000800, 0x00020820,
+ 0x08000800, 0x00000020, 0x08000020, 0x08020000,
+ 0x08020020, 0x08000000, 0x00020000, 0x08000820,
+ 0x00000000, 0x08020820, 0x00020020, 0x08000020,
+ 0x08020000, 0x08000800, 0x08000820, 0x00000000,
+ 0x08020820, 0x00020800, 0x00020800, 0x00000820,
+ 0x00000820, 0x00020020, 0x08000000, 0x08020800
+};
+
+static inline uint32_t
+Fconf(uint32_t r0, uint32_t skl, uint32_t skr)
+{
+ uint32_t r1;
+
+ r1 = (r0 << 16) | (r0 >> 16);
+ return
+ S1[((r1 >> 11) ^ (skl >> 18)) & 0x3F]
+ | S2[((r0 >> 23) ^ (skl >> 12)) & 0x3F]
+ | S3[((r0 >> 19) ^ (skl >> 6)) & 0x3F]
+ | S4[((r0 >> 15) ^ (skl )) & 0x3F]
+ | S5[((r0 >> 11) ^ (skr >> 18)) & 0x3F]
+ | S6[((r0 >> 7) ^ (skr >> 12)) & 0x3F]
+ | S7[((r0 >> 3) ^ (skr >> 6)) & 0x3F]
+ | S8[((r1 >> 15) ^ (skr )) & 0x3F];
+}
+
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *skey)
+{
+ int i;
+ uint32_t l, r;
+
+ l = *pl;
+ r = *pr;
+ for (i = 0; i < 16; i ++) {
+ uint32_t t;
+
+ t = l ^ Fconf(r, skey[(i << 1) + 0], skey[(i << 1) + 1]);
+ l = r;
+ r = t;
+ }
+ *pl = r;
+ *pr = l;
+}
+
+/* see inner.h */
+void
+br_des_tab_process_block(unsigned num_rounds, const uint32_t *skey, void *block)
+{
+ unsigned char *buf;
+ uint32_t l, r;
+
+ buf = block;
+ l = br_dec32be(buf);
+ r = br_dec32be(buf + 4);
+ br_des_do_IP(&l, &r);
+ while (num_rounds -- > 0) {
+ process_block_unit(&l, &r, skey);
+ skey += 32;
+ }
+ br_des_do_invIP(&l, &r);
+ br_enc32be(buf, l);
+ br_enc32be(buf + 4, r);
+}
+
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+ int i;
+
+ br_des_keysched_unit(skey, key);
+
+ /*
+ * Apply PC-2 to get the 48-bit subkeys.
+ */
+ for (i = 0; i < 16; i ++) {
+ uint32_t xl, xr, ul, ur;
+ int j;
+
+ xl = skey[(i << 1) + 0];
+ xr = skey[(i << 1) + 1];
+ ul = 0;
+ ur = 0;
+ for (j = 0; j < 28; j ++) {
+ ul |= (xl & 1) << PC2left[j];
+ ur |= (xr & 1) << PC2right[j];
+ xl >>= 1;
+ xr >>= 1;
+ }
+ skey[(i << 1) + 0] = ul;
+ skey[(i << 1) + 1] = ur;
+ }
+}
+
+/* see inner.h */
+unsigned
+br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+ switch (key_len) {
+ case 8:
+ keysched_unit(skey, key);
+ return 1;
+ case 16:
+ keysched_unit(skey, key);
+ keysched_unit(skey + 32, (const unsigned char *)key + 8);
+ br_des_rev_skey(skey + 32);
+ memcpy(skey + 64, skey, 32 * sizeof *skey);
+ return 3;
+ default:
+ keysched_unit(skey, key);
+ keysched_unit(skey + 32, (const unsigned char *)key + 8);
+ br_des_rev_skey(skey + 32);
+ keysched_unit(skey + 64, (const unsigned char *)key + 16);
+ return 3;
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/des_tab_cbcdec.c b/test/monniaux/BearSSL/src/symcipher/des_tab_cbcdec.c
new file mode 100644
index 00000000..e7eabe9d
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_tab_cbcdec.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_des_tab_cbcdec_vtable;
+ ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+ if (len == 8) {
+ br_des_rev_skey(ctx->skey);
+ } else {
+ int i;
+
+ for (i = 0; i < 48; i += 2) {
+ uint32_t t;
+
+ t = ctx->skey[i];
+ ctx->skey[i] = ctx->skey[94 - i];
+ ctx->skey[94 - i] = t;
+ t = ctx->skey[i + 1];
+ ctx->skey[i + 1] = ctx->skey[95 - i];
+ ctx->skey[95 - i] = t;
+ }
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[8];
+ int i;
+
+ memcpy(tmp, buf, 8);
+ br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+ for (i = 0; i < 8; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ memcpy(ivbuf, tmp, 8);
+ buf += 8;
+ len -= 8;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_tab_cbcdec_vtable = {
+ sizeof(br_des_tab_cbcdec_keys),
+ 8,
+ 3,
+ (void (*)(const br_block_cbcdec_class **, const void *, size_t))
+ &br_des_tab_cbcdec_init,
+ (void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+ &br_des_tab_cbcdec_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/des_tab_cbcenc.c b/test/monniaux/BearSSL/src/symcipher/des_tab_cbcenc.c
new file mode 100644
index 00000000..3a45ba3e
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/des_tab_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx,
+ const void *key, size_t len)
+{
+ ctx->vtable = &br_des_tab_cbcenc_vtable;
+ ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx,
+ void *iv, void *data, size_t len)
+{
+ unsigned char *buf, *ivbuf;
+
+ ivbuf = iv;
+ buf = data;
+ while (len > 0) {
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ buf[i] ^= ivbuf[i];
+ }
+ br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+ memcpy(ivbuf, buf, 8);
+ buf += 8;
+ len -= 8;
+ }
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_tab_cbcenc_vtable = {
+ sizeof(br_des_tab_cbcenc_keys),
+ 8,
+ 3,
+ (void (*)(const br_block_cbcenc_class **, const void *, size_t))
+ &br_des_tab_cbcenc_init,
+ (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+ &br_des_tab_cbcenc_run
+};
diff --git a/test/monniaux/BearSSL/src/symcipher/poly1305_ctmul.c b/test/monniaux/BearSSL/src/symcipher/poly1305_ctmul.c
new file mode 100644
index 00000000..150e610a
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/poly1305_ctmul.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Perform the inner processing of blocks for Poly1305. The accumulator
+ * and the r key are provided as arrays of 26-bit words (these words
+ * are allowed to have an extra bit, i.e. use 27 bits).
+ *
+ * On output, all accumulator words fit on 26 bits, except acc[1], which
+ * may be slightly larger (but by a very small amount only).
+ */
+static void
+poly1305_inner(uint32_t *acc, const uint32_t *r, const void *data, size_t len)
+{
+ /*
+ * Implementation notes: we split the 130-bit values into five
+ * 26-bit words. This gives us some space for carries.
+ *
+ * This code is inspired from the public-domain code available
+ * on:
+ * https://github.com/floodyberry/poly1305-donna
+ *
+ * Since we compute modulo 2^130-5, the "upper words" become
+ * low words with a factor of 5; that is, x*2^130 = x*5 mod p.
+ */
+ const unsigned char *buf;
+ uint32_t a0, a1, a2, a3, a4;
+ uint32_t r0, r1, r2, r3, r4;
+ uint32_t u1, u2, u3, u4;
+
+ r0 = r[0];
+ r1 = r[1];
+ r2 = r[2];
+ r3 = r[3];
+ r4 = r[4];
+
+ u1 = r1 * 5;
+ u2 = r2 * 5;
+ u3 = r3 * 5;
+ u4 = r4 * 5;
+
+ a0 = acc[0];
+ a1 = acc[1];
+ a2 = acc[2];
+ a3 = acc[3];
+ a4 = acc[4];
+
+ buf = data;
+ while (len > 0) {
+ uint64_t w0, w1, w2, w3, w4;
+ uint64_t c;
+ unsigned char tmp[16];
+
+ /*
+ * If there is a partial block, right-pad it with zeros.
+ */
+ if (len < 16) {
+ memset(tmp, 0, sizeof tmp);
+ memcpy(tmp, buf, len);
+ buf = tmp;
+ len = 16;
+ }
+
+ /*
+ * Decode next block and apply the "high bit"; that value
+ * is added to the accumulator.
+ */
+ a0 += br_dec32le(buf) & 0x03FFFFFF;
+ a1 += (br_dec32le(buf + 3) >> 2) & 0x03FFFFFF;
+ a2 += (br_dec32le(buf + 6) >> 4) & 0x03FFFFFF;
+ a3 += (br_dec32le(buf + 9) >> 6) & 0x03FFFFFF;
+ a4 += (br_dec32le(buf + 12) >> 8) | 0x01000000;
+
+ /*
+ * Compute multiplication.
+ */
+#define M(x, y) ((uint64_t)(x) * (uint64_t)(y))
+
+ w0 = M(a0, r0) + M(a1, u4) + M(a2, u3) + M(a3, u2) + M(a4, u1);
+ w1 = M(a0, r1) + M(a1, r0) + M(a2, u4) + M(a3, u3) + M(a4, u2);
+ w2 = M(a0, r2) + M(a1, r1) + M(a2, r0) + M(a3, u4) + M(a4, u3);
+ w3 = M(a0, r3) + M(a1, r2) + M(a2, r1) + M(a3, r0) + M(a4, u4);
+ w4 = M(a0, r4) + M(a1, r3) + M(a2, r2) + M(a3, r1) + M(a4, r0);
+
+#undef M
+ /*
+ * Perform some (partial) modular reduction. This step is
+ * enough to keep values in ranges such that there won't
+ * be carry overflows. Most of the reduction was done in
+ * the multiplication step (by using the 'u*' values, and
+ * using the fact that 2^130 = -5 mod p); here we perform
+ * some carry propagation.
+ */
+ c = w0 >> 26;
+ a0 = (uint32_t)w0 & 0x3FFFFFF;
+ w1 += c;
+ c = w1 >> 26;
+ a1 = (uint32_t)w1 & 0x3FFFFFF;
+ w2 += c;
+ c = w2 >> 26;
+ a2 = (uint32_t)w2 & 0x3FFFFFF;
+ w3 += c;
+ c = w3 >> 26;
+ a3 = (uint32_t)w3 & 0x3FFFFFF;
+ w4 += c;
+ c = w4 >> 26;
+ a4 = (uint32_t)w4 & 0x3FFFFFF;
+ a0 += (uint32_t)c * 5;
+ a1 += a0 >> 26;
+ a0 &= 0x3FFFFFF;
+
+ buf += 16;
+ len -= 16;
+ }
+
+ acc[0] = a0;
+ acc[1] = a1;
+ acc[2] = a2;
+ acc[3] = a3;
+ acc[4] = a4;
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmul_run(const void *key, const void *iv,
+ void *data, size_t len, const void *aad, size_t aad_len,
+ void *tag, br_chacha20_run ichacha, int encrypt)
+{
+ unsigned char pkey[32], foot[16];
+ uint32_t r[5], acc[5], cc, ctl, hi;
+ uint64_t w;
+ int i;
+
+ /*
+ * Compute the MAC key. The 'r' value is the first 16 bytes of
+ * pkey[].
+ */
+ memset(pkey, 0, sizeof pkey);
+ ichacha(key, iv, 0, pkey, sizeof pkey);
+
+ /*
+ * If encrypting, ChaCha20 must run first, followed by Poly1305.
+ * When decrypting, the operations are reversed.
+ */
+ if (encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+
+ /*
+ * Run Poly1305. We must process the AAD, then ciphertext, then
+ * the footer (with the lengths). Note that the AAD and ciphertext
+ * are meant to be padded with zeros up to the next multiple of 16,
+ * and the length of the footer is 16 bytes as well.
+ */
+
+ /*
+ * Decode the 'r' value into 26-bit words, with the "clamping"
+ * operation applied.
+ */
+ r[0] = br_dec32le(pkey) & 0x03FFFFFF;
+ r[1] = (br_dec32le(pkey + 3) >> 2) & 0x03FFFF03;
+ r[2] = (br_dec32le(pkey + 6) >> 4) & 0x03FFC0FF;
+ r[3] = (br_dec32le(pkey + 9) >> 6) & 0x03F03FFF;
+ r[4] = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF;
+
+ /*
+ * Accumulator is 0.
+ */
+ memset(acc, 0, sizeof acc);
+
+ /*
+ * Process the additional authenticated data, ciphertext, and
+ * footer in due order.
+ */
+ br_enc64le(foot, (uint64_t)aad_len);
+ br_enc64le(foot + 8, (uint64_t)len);
+ poly1305_inner(acc, r, aad, aad_len);
+ poly1305_inner(acc, r, data, len);
+ poly1305_inner(acc, r, foot, sizeof foot);
+
+ /*
+ * Finalise modular reduction. This is done with carry propagation
+ * and applying the '2^130 = -5 mod p' rule. Note that the output
+ * of poly1035_inner() is already mostly reduced, since only
+ * acc[1] may be (very slightly) above 2^26. A single loop back
+ * to acc[1] will be enough to make the value fit in 130 bits.
+ */
+ cc = 0;
+ for (i = 1; i <= 6; i ++) {
+ int j;
+
+ j = (i >= 5) ? i - 5 : i;
+ acc[j] += cc;
+ cc = acc[j] >> 26;
+ acc[j] &= 0x03FFFFFF;
+ }
+
+ /*
+ * We may still have a value in the 2^130-5..2^130-1 range, in
+ * which case we must reduce it again. The code below selects,
+ * in constant-time, between 'acc' and 'acc-p',
+ */
+ ctl = GT(acc[0], 0x03FFFFFA);
+ for (i = 1; i < 5; i ++) {
+ ctl &= EQ(acc[i], 0x03FFFFFF);
+ }
+ cc = 5;
+ for (i = 0; i < 5; i ++) {
+ uint32_t t;
+
+ t = (acc[i] + cc);
+ cc = t >> 26;
+ t &= 0x03FFFFFF;
+ acc[i] = MUX(ctl, t, acc[i]);
+ }
+
+ /*
+ * Convert back the accumulator to 32-bit words, and add the
+ * 's' value (second half of pkey[]). That addition is done
+ * modulo 2^128.
+ */
+ w = (uint64_t)acc[0] + ((uint64_t)acc[1] << 26) + br_dec32le(pkey + 16);
+ br_enc32le((unsigned char *)tag, (uint32_t)w);
+ w = (w >> 32) + ((uint64_t)acc[2] << 20) + br_dec32le(pkey + 20);
+ br_enc32le((unsigned char *)tag + 4, (uint32_t)w);
+ w = (w >> 32) + ((uint64_t)acc[3] << 14) + br_dec32le(pkey + 24);
+ br_enc32le((unsigned char *)tag + 8, (uint32_t)w);
+ hi = (uint32_t)(w >> 32) + (acc[4] << 8) + br_dec32le(pkey + 28);
+ br_enc32le((unsigned char *)tag + 12, hi);
+
+ /*
+ * If decrypting, then ChaCha20 runs _after_ Poly1305.
+ */
+ if (!encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/poly1305_ctmul32.c b/test/monniaux/BearSSL/src/symcipher/poly1305_ctmul32.c
new file mode 100644
index 00000000..15d9635d
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/poly1305_ctmul32.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Perform the inner processing of blocks for Poly1305.
+ */
+static void
+poly1305_inner(uint32_t *a, const uint32_t *r, const void *data, size_t len)
+{
+ /*
+ * Implementation notes: we split the 130-bit values into ten
+ * 13-bit words. This gives us some space for carries and allows
+ * using only 32x32->32 multiplications, which are way faster than
+ * 32x32->64 multiplications on the ARM Cortex-M0/M0+, and also
+ * help in making constant-time code on the Cortex-M3.
+ *
+ * Since we compute modulo 2^130-5, the "upper words" become
+ * low words with a factor of 5; that is, x*2^130 = x*5 mod p.
+ * This has already been integrated in the r[] array, which
+ * is extended to the 0..18 range.
+ *
+ * In each loop iteration, a[] and r[] words are 13-bit each,
+ * except a[1] which may use 14 bits.
+ */
+ const unsigned char *buf;
+
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[16];
+ uint32_t b[10];
+ unsigned u, v;
+ uint32_t z, cc1, cc2;
+
+ /*
+ * If there is a partial block, right-pad it with zeros.
+ */
+ if (len < 16) {
+ memset(tmp, 0, sizeof tmp);
+ memcpy(tmp, buf, len);
+ buf = tmp;
+ len = 16;
+ }
+
+ /*
+ * Decode next block and apply the "high bit"; that value
+ * is added to the accumulator.
+ */
+ v = br_dec16le(buf);
+ a[0] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[2] << 3;
+ v |= buf[3] << 11;
+ a[1] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[4] << 6;
+ a[2] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[5] << 1;
+ v |= buf[6] << 9;
+ a[3] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[7] << 4;
+ v |= buf[8] << 12;
+ a[4] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[9] << 7;
+ a[5] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[10] << 2;
+ v |= buf[11] << 10;
+ a[6] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[12] << 5;
+ a[7] += v & 0x01FFF;
+ v = br_dec16le(buf + 13);
+ a[8] += v & 0x01FFF;
+ v >>= 13;
+ v |= buf[15] << 3;
+ a[9] += v | 0x00800;
+
+ /*
+ * At that point, all a[] values fit on 14 bits, while
+ * all r[] values fit on 13 bits. Thus products fit on
+ * 27 bits, and we can accumulate up to 31 of them in
+ * a 32-bit word and still have some room for carries.
+ */
+
+ /*
+ * Now a[] contains words with values up to 14 bits each.
+ * We perform the multiplication with r[].
+ *
+ * The extended words of r[] may be larger than 13 bits
+ * (they are 5 times a 13-bit word) so the full summation
+ * may yield values up to 46 times a 27-bit word, which
+ * does not fit on a 32-bit word. To avoid that issue, we
+ * must split the loop below in two, with a carry
+ * propagation operation in the middle.
+ */
+ cc1 = 0;
+ for (u = 0; u < 10; u ++) {
+ uint32_t s;
+
+ s = cc1
+ + MUL15(a[0], r[u + 9 - 0])
+ + MUL15(a[1], r[u + 9 - 1])
+ + MUL15(a[2], r[u + 9 - 2])
+ + MUL15(a[3], r[u + 9 - 3])
+ + MUL15(a[4], r[u + 9 - 4]);
+ b[u] = s & 0x1FFF;
+ cc1 = s >> 13;
+ }
+ cc2 = 0;
+ for (u = 0; u < 10; u ++) {
+ uint32_t s;
+
+ s = b[u] + cc2
+ + MUL15(a[5], r[u + 9 - 5])
+ + MUL15(a[6], r[u + 9 - 6])
+ + MUL15(a[7], r[u + 9 - 7])
+ + MUL15(a[8], r[u + 9 - 8])
+ + MUL15(a[9], r[u + 9 - 9]);
+ b[u] = s & 0x1FFF;
+ cc2 = s >> 13;
+ }
+ memcpy(a, b, sizeof b);
+
+ /*
+ * The two carries "loop back" with a factor of 5. We
+ * propagate them into a[0] and a[1].
+ */
+ z = cc1 + cc2;
+ z += (z << 2) + a[0];
+ a[0] = z & 0x1FFF;
+ a[1] += z >> 13;
+
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmul32_run(const void *key, const void *iv,
+ void *data, size_t len, const void *aad, size_t aad_len,
+ void *tag, br_chacha20_run ichacha, int encrypt)
+{
+ unsigned char pkey[32], foot[16];
+ uint32_t z, r[19], acc[10], cc, ctl;
+ int i;
+
+ /*
+ * Compute the MAC key. The 'r' value is the first 16 bytes of
+ * pkey[].
+ */
+ memset(pkey, 0, sizeof pkey);
+ ichacha(key, iv, 0, pkey, sizeof pkey);
+
+ /*
+ * If encrypting, ChaCha20 must run first, followed by Poly1305.
+ * When decrypting, the operations are reversed.
+ */
+ if (encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+
+ /*
+ * Run Poly1305. We must process the AAD, then ciphertext, then
+ * the footer (with the lengths). Note that the AAD and ciphertext
+ * are meant to be padded with zeros up to the next multiple of 16,
+ * and the length of the footer is 16 bytes as well.
+ */
+
+ /*
+ * Decode the 'r' value into 13-bit words, with the "clamping"
+ * operation applied.
+ */
+ z = br_dec32le(pkey) & 0x03FFFFFF;
+ r[9] = z & 0x1FFF;
+ r[10] = z >> 13;
+ z = (br_dec32le(pkey + 3) >> 2) & 0x03FFFF03;
+ r[11] = z & 0x1FFF;
+ r[12] = z >> 13;
+ z = (br_dec32le(pkey + 6) >> 4) & 0x03FFC0FF;
+ r[13] = z & 0x1FFF;
+ r[14] = z >> 13;
+ z = (br_dec32le(pkey + 9) >> 6) & 0x03F03FFF;
+ r[15] = z & 0x1FFF;
+ r[16] = z >> 13;
+ z = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF;
+ r[17] = z & 0x1FFF;
+ r[18] = z >> 13;
+
+ /*
+ * Extend r[] with the 5x factor pre-applied.
+ */
+ for (i = 0; i < 9; i ++) {
+ r[i] = MUL15(5, r[i + 10]);
+ }
+
+ /*
+ * Accumulator is 0.
+ */
+ memset(acc, 0, sizeof acc);
+
+ /*
+ * Process the additional authenticated data, ciphertext, and
+ * footer in due order.
+ */
+ br_enc64le(foot, (uint64_t)aad_len);
+ br_enc64le(foot + 8, (uint64_t)len);
+ poly1305_inner(acc, r, aad, aad_len);
+ poly1305_inner(acc, r, data, len);
+ poly1305_inner(acc, r, foot, sizeof foot);
+
+ /*
+ * Finalise modular reduction. This is done with carry propagation
+ * and applying the '2^130 = -5 mod p' rule. Note that the output
+ * of poly1035_inner() is already mostly reduced, since only
+ * acc[1] may be (very slightly) above 2^13. A single loop back
+ * to acc[1] will be enough to make the value fit in 130 bits.
+ */
+ cc = 0;
+ for (i = 1; i < 10; i ++) {
+ z = acc[i] + cc;
+ acc[i] = z & 0x1FFF;
+ cc = z >> 13;
+ }
+ z = acc[0] + cc + (cc << 2);
+ acc[0] = z & 0x1FFF;
+ acc[1] += z >> 13;
+
+ /*
+ * We may still have a value in the 2^130-5..2^130-1 range, in
+ * which case we must reduce it again. The code below selects,
+ * in constant-time, between 'acc' and 'acc-p',
+ */
+ ctl = GT(acc[0], 0x1FFA);
+ for (i = 1; i < 10; i ++) {
+ ctl &= EQ(acc[i], 0x1FFF);
+ }
+ acc[0] = MUX(ctl, acc[0] - 0x1FFB, acc[0]);
+ for (i = 1; i < 10; i ++) {
+ acc[i] &= ~(-ctl);
+ }
+
+ /*
+ * Convert back the accumulator to 32-bit words, and add the
+ * 's' value (second half of pkey[]). That addition is done
+ * modulo 2^128.
+ */
+ z = acc[0] + (acc[1] << 13) + br_dec16le(pkey + 16);
+ br_enc16le((unsigned char *)tag, z & 0xFFFF);
+ z = (z >> 16) + (acc[2] << 10) + br_dec16le(pkey + 18);
+ br_enc16le((unsigned char *)tag + 2, z & 0xFFFF);
+ z = (z >> 16) + (acc[3] << 7) + br_dec16le(pkey + 20);
+ br_enc16le((unsigned char *)tag + 4, z & 0xFFFF);
+ z = (z >> 16) + (acc[4] << 4) + br_dec16le(pkey + 22);
+ br_enc16le((unsigned char *)tag + 6, z & 0xFFFF);
+ z = (z >> 16) + (acc[5] << 1) + (acc[6] << 14) + br_dec16le(pkey + 24);
+ br_enc16le((unsigned char *)tag + 8, z & 0xFFFF);
+ z = (z >> 16) + (acc[7] << 11) + br_dec16le(pkey + 26);
+ br_enc16le((unsigned char *)tag + 10, z & 0xFFFF);
+ z = (z >> 16) + (acc[8] << 8) + br_dec16le(pkey + 28);
+ br_enc16le((unsigned char *)tag + 12, z & 0xFFFF);
+ z = (z >> 16) + (acc[9] << 5) + br_dec16le(pkey + 30);
+ br_enc16le((unsigned char *)tag + 14, z & 0xFFFF);
+
+ /*
+ * If decrypting, then ChaCha20 runs _after_ Poly1305.
+ */
+ if (!encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+}
diff --git a/test/monniaux/BearSSL/src/symcipher/poly1305_ctmulq.c b/test/monniaux/BearSSL/src/symcipher/poly1305_ctmulq.c
new file mode 100644
index 00000000..b00683a6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/poly1305_ctmulq.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_INT128
+
+#define MUL128(hi, lo, x, y) do { \
+ unsigned __int128 mul128tmp; \
+ mul128tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
+ (hi) = (uint64_t)(mul128tmp >> 64); \
+ (lo) = (uint64_t)mul128tmp; \
+ } while (0)
+
+#elif BR_UMUL128
+
+#include <intrin.h>
+
+#define MUL128(hi, lo, x, y) do { \
+ (lo) = _umul128((x), (y), &(hi)); \
+ } while (0)
+
+#endif
+
+#define MASK42 ((uint64_t)0x000003FFFFFFFFFF)
+#define MASK44 ((uint64_t)0x00000FFFFFFFFFFF)
+
+/*
+ * The "accumulator" word is nominally a 130-bit value. We split it into
+ * words of 44 bits, each held in a 64-bit variable.
+ *
+ * If the current accumulator is a = a0 + a1*W + a2*W^2 (where W = 2^44)
+ * and r = r0 + r1*W + r2*W^2, then:
+ *
+ * a*r = (a0*r0)
+ * + (a0*r1 + a1*r0) * W
+ * + (a0*r2 + a1*r1 + a2*r0) * W^2
+ * + (a1*r2 + a2*r1) * W^3
+ * + (a2*r2) * W^4
+ *
+ * We want to reduce that value modulo p = 2^130-5, so W^3 = 20 mod p,
+ * and W^4 = 20*W mod p. Thus, if we define u1 = 20*r1 and u2 = 20*r2,
+ * then the equations above become:
+ *
+ * b0 = a0*r0 + a1*u2 + a2*u1
+ * b1 = a0*r1 + a1*r0 + a2*u2
+ * b2 = a0*r2 + a1*r1 + a2*r0
+ *
+ * In order to make u1 fit in 44 bits, we can change these equations
+ * into:
+ *
+ * b0 = a0*r0 + a1*u2 + a2*t1
+ * b1 = a0*r1 + a1*r0 + a2*t2
+ * b2 = a0*r2 + a1*r1 + a2*r0
+ *
+ * Where t1 is u1 truncated to 44 bits, and t2 is u2 added to the extra
+ * bits of u1. Note that since r is clamped down to a 124-bit value, the
+ * values u2 and t2 fit on 44 bits too.
+ *
+ * The bx values are larger than 44 bits, so we may split them into a
+ * lower half (cx, 44 bits) and an upper half (dx). The new values for
+ * the accumulator are then:
+ *
+ * e0 = c0 + 20*d2
+ * e1 = c1 + d0
+ * e2 = c2 + d1
+ *
+ * The equations allow for some room, i.e. the ax values may be larger
+ * than 44 bits. Similarly, the ex values will usually be larger than
+ * the ax. Thus, some sort of carry propagation must be done regularly,
+ * though not necessarily at each iteration. In particular, we do not
+ * need to compute the additions (for the bx values) over 128-bit
+ * quantities; we can stick to 64-bit computations.
+ *
+ *
+ * Since the 128-bit result of a 64x64 multiplication is actually
+ * represented over two 64-bit registers, it is cheaper to arrange for
+ * any split that happens between the "high" and "low" halves to be on
+ * that 64-bit boundary. This is done by left shifting the rx, ux and tx
+ * by 20 bits (since they all fit on 44 bits each, this shift is
+ * always possible).
+ */
+
+static void
+poly1305_inner_big(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+
+#define MX(hi, lo, m0, m1, m2) do { \
+ uint64_t mxhi, mxlo; \
+ MUL128(mxhi, mxlo, a0, m0); \
+ (hi) = mxhi; \
+ (lo) = mxlo >> 20; \
+ MUL128(mxhi, mxlo, a1, m1); \
+ (hi) += mxhi; \
+ (lo) += mxlo >> 20; \
+ MUL128(mxhi, mxlo, a2, m2); \
+ (hi) += mxhi; \
+ (lo) += mxlo >> 20; \
+ } while (0)
+
+ const unsigned char *buf;
+ uint64_t a0, a1, a2;
+ uint64_t r0, r1, r2, t1, t2, u2;
+
+ r0 = r[0];
+ r1 = r[1];
+ r2 = r[2];
+ t1 = r[3];
+ t2 = r[4];
+ u2 = r[5];
+ a0 = acc[0];
+ a1 = acc[1];
+ a2 = acc[2];
+ buf = data;
+
+ while (len > 0) {
+ uint64_t v0, v1, v2;
+ uint64_t c0, c1, c2, d0, d1, d2;
+
+ v0 = br_dec64le(buf + 0);
+ v1 = br_dec64le(buf + 8);
+ v2 = v1 >> 24;
+ v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+ v0 &= MASK44;
+ a0 += v0;
+ a1 += v1;
+ a2 += v2 + ((uint64_t)1 << 40);
+ MX(d0, c0, r0, u2, t1);
+ MX(d1, c1, r1, r0, t2);
+ MX(d2, c2, r2, r1, r0);
+ a0 = c0 + 20 * d2;
+ a1 = c1 + d0;
+ a2 = c2 + d1;
+
+ v0 = br_dec64le(buf + 16);
+ v1 = br_dec64le(buf + 24);
+ v2 = v1 >> 24;
+ v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+ v0 &= MASK44;
+ a0 += v0;
+ a1 += v1;
+ a2 += v2 + ((uint64_t)1 << 40);
+ MX(d0, c0, r0, u2, t1);
+ MX(d1, c1, r1, r0, t2);
+ MX(d2, c2, r2, r1, r0);
+ a0 = c0 + 20 * d2;
+ a1 = c1 + d0;
+ a2 = c2 + d1;
+
+ v0 = br_dec64le(buf + 32);
+ v1 = br_dec64le(buf + 40);
+ v2 = v1 >> 24;
+ v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+ v0 &= MASK44;
+ a0 += v0;
+ a1 += v1;
+ a2 += v2 + ((uint64_t)1 << 40);
+ MX(d0, c0, r0, u2, t1);
+ MX(d1, c1, r1, r0, t2);
+ MX(d2, c2, r2, r1, r0);
+ a0 = c0 + 20 * d2;
+ a1 = c1 + d0;
+ a2 = c2 + d1;
+
+ v0 = br_dec64le(buf + 48);
+ v1 = br_dec64le(buf + 56);
+ v2 = v1 >> 24;
+ v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+ v0 &= MASK44;
+ a0 += v0;
+ a1 += v1;
+ a2 += v2 + ((uint64_t)1 << 40);
+ MX(d0, c0, r0, u2, t1);
+ MX(d1, c1, r1, r0, t2);
+ MX(d2, c2, r2, r1, r0);
+ a0 = c0 + 20 * d2;
+ a1 = c1 + d0;
+ a2 = c2 + d1;
+
+ a1 += a0 >> 44;
+ a0 &= MASK44;
+ a2 += a1 >> 44;
+ a1 &= MASK44;
+ a0 += 20 * (a2 >> 44);
+ a2 &= MASK44;
+
+ buf += 64;
+ len -= 64;
+ }
+ acc[0] = a0;
+ acc[1] = a1;
+ acc[2] = a2;
+
+#undef MX
+}
+
+static void
+poly1305_inner_small(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+ const unsigned char *buf;
+ uint64_t a0, a1, a2;
+ uint64_t r0, r1, r2, t1, t2, u2;
+
+ r0 = r[0];
+ r1 = r[1];
+ r2 = r[2];
+ t1 = r[3];
+ t2 = r[4];
+ u2 = r[5];
+ a0 = acc[0];
+ a1 = acc[1];
+ a2 = acc[2];
+ buf = data;
+
+ while (len > 0) {
+ uint64_t v0, v1, v2;
+ uint64_t c0, c1, c2, d0, d1, d2;
+ unsigned char tmp[16];
+
+ if (len < 16) {
+ memcpy(tmp, buf, len);
+ memset(tmp + len, 0, (sizeof tmp) - len);
+ buf = tmp;
+ len = 16;
+ }
+ v0 = br_dec64le(buf + 0);
+ v1 = br_dec64le(buf + 8);
+
+ v2 = v1 >> 24;
+ v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+ v0 &= MASK44;
+
+ a0 += v0;
+ a1 += v1;
+ a2 += v2 + ((uint64_t)1 << 40);
+
+#define MX(hi, lo, m0, m1, m2) do { \
+ uint64_t mxhi, mxlo; \
+ MUL128(mxhi, mxlo, a0, m0); \
+ (hi) = mxhi; \
+ (lo) = mxlo >> 20; \
+ MUL128(mxhi, mxlo, a1, m1); \
+ (hi) += mxhi; \
+ (lo) += mxlo >> 20; \
+ MUL128(mxhi, mxlo, a2, m2); \
+ (hi) += mxhi; \
+ (lo) += mxlo >> 20; \
+ } while (0)
+
+ MX(d0, c0, r0, u2, t1);
+ MX(d1, c1, r1, r0, t2);
+ MX(d2, c2, r2, r1, r0);
+
+#undef MX
+
+ a0 = c0 + 20 * d2;
+ a1 = c1 + d0;
+ a2 = c2 + d1;
+
+ a1 += a0 >> 44;
+ a0 &= MASK44;
+ a2 += a1 >> 44;
+ a1 &= MASK44;
+ a0 += 20 * (a2 >> 44);
+ a2 &= MASK44;
+
+ buf += 16;
+ len -= 16;
+ }
+ acc[0] = a0;
+ acc[1] = a1;
+ acc[2] = a2;
+}
+
+static inline void
+poly1305_inner(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+ if (len >= 64) {
+ size_t len2;
+
+ len2 = len & ~(size_t)63;
+ poly1305_inner_big(acc, r, data, len2);
+ data = (const unsigned char *)data + len2;
+ len -= len2;
+ }
+ if (len > 0) {
+ poly1305_inner_small(acc, r, data, len);
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmulq_run(const void *key, const void *iv,
+ void *data, size_t len, const void *aad, size_t aad_len,
+ void *tag, br_chacha20_run ichacha, int encrypt)
+{
+ unsigned char pkey[32], foot[16];
+ uint64_t r[6], acc[3], r0, r1;
+ uint32_t v0, v1, v2, v3, v4;
+ uint64_t w0, w1, w2, w3;
+ uint32_t ctl;
+
+ /*
+ * Compute the MAC key. The 'r' value is the first 16 bytes of
+ * pkey[].
+ */
+ memset(pkey, 0, sizeof pkey);
+ ichacha(key, iv, 0, pkey, sizeof pkey);
+
+ /*
+ * If encrypting, ChaCha20 must run first, followed by Poly1305.
+ * When decrypting, the operations are reversed.
+ */
+ if (encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+
+ /*
+ * Run Poly1305. We must process the AAD, then ciphertext, then
+ * the footer (with the lengths). Note that the AAD and ciphertext
+ * are meant to be padded with zeros up to the next multiple of 16,
+ * and the length of the footer is 16 bytes as well.
+ */
+
+ /*
+ * Apply the "clamping" on r.
+ */
+ pkey[ 3] &= 0x0F;
+ pkey[ 4] &= 0xFC;
+ pkey[ 7] &= 0x0F;
+ pkey[ 8] &= 0xFC;
+ pkey[11] &= 0x0F;
+ pkey[12] &= 0xFC;
+ pkey[15] &= 0x0F;
+
+ /*
+ * Decode the 'r' value into 44-bit words, left-shifted by 20 bits.
+ * Also compute the u1 and u2 values.
+ */
+ r0 = br_dec64le(pkey + 0);
+ r1 = br_dec64le(pkey + 8);
+ r[0] = r0 << 20;
+ r[1] = ((r0 >> 24) | (r1 << 40)) & ~(uint64_t)0xFFFFF;
+ r[2] = (r1 >> 4) & ~(uint64_t)0xFFFFF;
+ r1 = 20 * (r[1] >> 20);
+ r[3] = r1 << 20;
+ r[5] = 20 * r[2];
+ r[4] = (r[5] + (r1 >> 24)) & ~(uint64_t)0xFFFFF;
+
+ /*
+ * Accumulator is 0.
+ */
+ acc[0] = 0;
+ acc[1] = 0;
+ acc[2] = 0;
+
+ /*
+ * Process the additional authenticated data, ciphertext, and
+ * footer in due order.
+ */
+ br_enc64le(foot, (uint64_t)aad_len);
+ br_enc64le(foot + 8, (uint64_t)len);
+ poly1305_inner(acc, r, aad, aad_len);
+ poly1305_inner(acc, r, data, len);
+ poly1305_inner_small(acc, r, foot, sizeof foot);
+
+ /*
+ * Finalise modular reduction. At that point, the value consists
+ * in three 44-bit values (the lowest one might be slightly above
+ * 2^44). Two loops shall be sufficient.
+ */
+ acc[1] += (acc[0] >> 44);
+ acc[0] &= MASK44;
+ acc[2] += (acc[1] >> 44);
+ acc[1] &= MASK44;
+ acc[0] += 5 * (acc[2] >> 42);
+ acc[2] &= MASK42;
+ acc[1] += (acc[0] >> 44);
+ acc[0] &= MASK44;
+ acc[2] += (acc[1] >> 44);
+ acc[1] &= MASK44;
+ acc[0] += 5 * (acc[2] >> 42);
+ acc[2] &= MASK42;
+
+ /*
+ * The value may still fall in the 2^130-5..2^130-1 range, in
+ * which case we must reduce it again. The code below selects,
+ * in constant-time, between 'acc' and 'acc-p'. We encode the
+ * value over four 32-bit integers to finish the operation.
+ */
+ v0 = (uint32_t)acc[0];
+ v1 = (uint32_t)(acc[0] >> 32) | ((uint32_t)acc[1] << 12);
+ v2 = (uint32_t)(acc[1] >> 20) | ((uint32_t)acc[2] << 24);
+ v3 = (uint32_t)(acc[2] >> 8);
+ v4 = (uint32_t)(acc[2] >> 40);
+
+ ctl = GT(v0, 0xFFFFFFFA);
+ ctl &= EQ(v1, 0xFFFFFFFF);
+ ctl &= EQ(v2, 0xFFFFFFFF);
+ ctl &= EQ(v3, 0xFFFFFFFF);
+ ctl &= EQ(v4, 0x00000003);
+ v0 = MUX(ctl, v0 + 5, v0);
+ v1 = MUX(ctl, 0, v1);
+ v2 = MUX(ctl, 0, v2);
+ v3 = MUX(ctl, 0, v3);
+
+ /*
+ * Add the "s" value. This is done modulo 2^128. Don't forget
+ * carry propagation...
+ */
+ w0 = (uint64_t)v0 + (uint64_t)br_dec32le(pkey + 16);
+ w1 = (uint64_t)v1 + (uint64_t)br_dec32le(pkey + 20) + (w0 >> 32);
+ w2 = (uint64_t)v2 + (uint64_t)br_dec32le(pkey + 24) + (w1 >> 32);
+ w3 = (uint64_t)v3 + (uint64_t)br_dec32le(pkey + 28) + (w2 >> 32);
+ v0 = (uint32_t)w0;
+ v1 = (uint32_t)w1;
+ v2 = (uint32_t)w2;
+ v3 = (uint32_t)w3;
+
+ /*
+ * Encode the tag.
+ */
+ br_enc32le((unsigned char *)tag + 0, v0);
+ br_enc32le((unsigned char *)tag + 4, v1);
+ br_enc32le((unsigned char *)tag + 8, v2);
+ br_enc32le((unsigned char *)tag + 12, v3);
+
+ /*
+ * If decrypting, then ChaCha20 runs _after_ Poly1305.
+ */
+ if (!encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+}
+
+/* see bearssl_block.h */
+br_poly1305_run
+br_poly1305_ctmulq_get(void)
+{
+ return &br_poly1305_ctmulq_run;
+}
+
+#else
+
+/* see bearssl_block.h */
+br_poly1305_run
+br_poly1305_ctmulq_get(void)
+{
+ return 0;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/symcipher/poly1305_i15.c b/test/monniaux/BearSSL/src/symcipher/poly1305_i15.c
new file mode 100644
index 00000000..6f892121
--- /dev/null
+++ b/test/monniaux/BearSSL/src/symcipher/poly1305_i15.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This is a "reference" implementation of Poly1305 that uses the
+ * generic "i15" code for big integers. It is slow, but it handles all
+ * big-integer operations with generic code, thereby avoiding most
+ * tricky situations with carry propagation and modular reduction.
+ */
+
+/*
+ * Modulus: 2^130-5.
+ */
+static const uint16_t P1305[] = {
+ 0x008A,
+ 0x7FFB, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x03FF
+};
+
+/*
+ * -p mod 2^15.
+ */
+#define P0I 0x4CCD
+
+/*
+ * R^2 mod p, for conversion to Montgomery representation (R = 2^135,
+ * since we use 9 words of 15 bits each, and 15*9 = 135).
+ */
+static const uint16_t R2[] = {
+ 0x008A,
+ 0x6400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+/*
+ * Perform the inner processing of blocks for Poly1305. The "r" array
+ * is in Montgomery representation, while the "a" array is not.
+ */
+static void
+poly1305_inner(uint16_t *a, const uint16_t *r, const void *data, size_t len)
+{
+ const unsigned char *buf;
+
+ buf = data;
+ while (len > 0) {
+ unsigned char tmp[16], rev[16];
+ uint16_t b[10];
+ uint32_t ctl;
+ int i;
+
+ /*
+ * If there is a partial block, right-pad it with zeros.
+ */
+ if (len < 16) {
+ memset(tmp, 0, sizeof tmp);
+ memcpy(tmp, buf, len);
+ buf = tmp;
+ len = 16;
+ }
+
+ /*
+ * Decode next block and apply the "high bit". Since
+ * decoding is little-endian, we must byte-swap the buffer.
+ */
+ for (i = 0; i < 16; i ++) {
+ rev[i] = buf[15 - i];
+ }
+ br_i15_decode_mod(b, rev, sizeof rev, P1305);
+ b[9] |= 0x0100;
+
+ /*
+ * Add the accumulator to the decoded block (modular
+ * addition).
+ */
+ ctl = br_i15_add(b, a, 1);
+ ctl |= NOT(br_i15_sub(b, P1305, 0));
+ br_i15_sub(b, P1305, ctl);
+
+ /*
+ * Multiply by r, result is the new accumulator value.
+ */
+ br_i15_montymul(a, b, r, P1305, P0I);
+
+ buf += 16;
+ len -= 16;
+ }
+}
+
+/*
+ * Byteswap a 16-byte value.
+ */
+static void
+byteswap16(unsigned char *buf)
+{
+ int i;
+
+ for (i = 0; i < 8; i ++) {
+ unsigned x;
+
+ x = buf[i];
+ buf[i] = buf[15 - i];
+ buf[15 - i] = x;
+ }
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_i15_run(const void *key, const void *iv,
+ void *data, size_t len, const void *aad, size_t aad_len,
+ void *tag, br_chacha20_run ichacha, int encrypt)
+{
+ unsigned char pkey[32], foot[16];
+ uint16_t t[10], r[10], acc[10];
+
+ /*
+ * Compute the MAC key. The 'r' value is the first 16 bytes of
+ * pkey[].
+ */
+ memset(pkey, 0, sizeof pkey);
+ ichacha(key, iv, 0, pkey, sizeof pkey);
+
+ /*
+ * If encrypting, ChaCha20 must run first, followed by Poly1305.
+ * When decrypting, the operations are reversed.
+ */
+ if (encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+
+ /*
+ * Run Poly1305. We must process the AAD, then ciphertext, then
+ * the footer (with the lengths). Note that the AAD and ciphertext
+ * are meant to be padded with zeros up to the next multiple of 16,
+ * and the length of the footer is 16 bytes as well.
+ */
+
+ /*
+ * Apply the "clamping" operation on the encoded 'r' value.
+ */
+ pkey[ 3] &= 0x0F;
+ pkey[ 7] &= 0x0F;
+ pkey[11] &= 0x0F;
+ pkey[15] &= 0x0F;
+ pkey[ 4] &= 0xFC;
+ pkey[ 8] &= 0xFC;
+ pkey[12] &= 0xFC;
+
+ /*
+ * Decode the clamped 'r' value. Decoding should use little-endian
+ * so we must byteswap the value first.
+ */
+ byteswap16(pkey);
+ br_i15_decode_mod(t, pkey, 16, P1305);
+
+ /*
+ * Convert 'r' to Montgomery representation.
+ */
+ br_i15_montymul(r, t, R2, P1305, P0I);
+
+ /*
+ * Accumulator is 0.
+ */
+ br_i15_zero(acc, 0x8A);
+
+ /*
+ * Process the additional authenticated data, ciphertext, and
+ * footer in due order.
+ */
+ br_enc64le(foot, (uint64_t)aad_len);
+ br_enc64le(foot + 8, (uint64_t)len);
+ poly1305_inner(acc, r, aad, aad_len);
+ poly1305_inner(acc, r, data, len);
+ poly1305_inner(acc, r, foot, sizeof foot);
+
+ /*
+ * Decode the value 's'. Again, a byteswap is needed.
+ */
+ byteswap16(pkey + 16);
+ br_i15_decode_mod(t, pkey + 16, 16, P1305);
+
+ /*
+ * Add the value 's' to the accumulator. That addition is done
+ * modulo 2^128, so we just ignore the carry.
+ */
+ br_i15_add(acc, t, 1);
+
+ /*
+ * Encode the result (128 low bits) to the tag. Encoding should
+ * be little-endian.
+ */
+ br_i15_encode(tag, 16, acc);
+ byteswap16(tag);
+
+ /*
+ * If decrypting, then ChaCha20 runs _after_ Poly1305.
+ */
+ if (!encrypt) {
+ ichacha(key, iv, 1, data, len);
+ }
+}