14 files changed, 3072 insertions, 0 deletions
diff --git a/test/monniaux/BearSSL/src/hash/dig_oid.c b/test/monniaux/BearSSL/src/hash/dig_oid.c
new file mode 100644
index 00000000..cd9692c9
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/dig_oid.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains the encoded OID for the standard hash functions.
+ * Such OID appear in, for instance, the PKCS#1 v1.5 padding for RSA
+ * signatures.
+ */
+
+static const unsigned char md5_OID[] = {
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05
+};
+
+static const unsigned char sha1_OID[] = {
+	0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char sha224_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char sha256_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char sha384_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char sha512_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+/* see inner.h */
+const unsigned char *
+br_digest_OID(int digest_id, size_t *len)
+{
+	switch (digest_id) {
+	case br_md5_ID:
+		*len = sizeof md5_OID;
+		return md5_OID;
+	case br_sha1_ID:
+		*len = sizeof sha1_OID;
+		return sha1_OID;
+	case br_sha224_ID:
+		*len = sizeof sha224_OID;
+		return sha224_OID;
+	case br_sha256_ID:
+		*len = sizeof sha256_OID;
+		return sha256_OID;
+	case br_sha384_ID:
+		*len = sizeof sha384_OID;
+		return sha384_OID;
+	case br_sha512_ID:
+		*len = sizeof sha512_OID;
+		return sha512_OID;
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
diff --git a/test/monniaux/BearSSL/src/hash/dig_size.c b/test/monniaux/BearSSL/src/hash/dig_size.c
new file mode 100644
index 00000000..4625d2c6
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/dig_size.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+size_t
+br_digest_size_by_ID(int digest_id)
+{
+	switch (digest_id) {
+	case br_md5sha1_ID:
+		return br_md5_SIZE + br_sha1_SIZE;
+	case br_md5_ID:
+		return br_md5_SIZE;
+	case br_sha1_ID:
+		return br_sha1_SIZE;
+	case br_sha224_ID:
+		return br_sha224_SIZE;
+	case br_sha256_ID:
+		return br_sha256_SIZE;
+	case br_sha384_ID:
+		return br_sha384_SIZE;
+	case br_sha512_ID:
+		return br_sha512_SIZE;
+	default:
+		/* abort(); */
+		return 0;
+	}
+}
diff --git a/test/monniaux/BearSSL/src/hash/ghash_ctmul.c b/test/monniaux/BearSSL/src/hash/ghash_ctmul.c
new file mode 100644
index 00000000..36232025
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/ghash_ctmul.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * We compute "carryless multiplications" through normal integer
+ * multiplications, masking out enough bits to create "holes" in which
+ * carries may expand without altering our bits; we really use 8 data
+ * bits per 32-bit word, spaced every fourth bit. Accumulated carries
+ * may not exceed 8 in total, which fits in 4 bits.
+ *
+ * It would be possible to use a 3-bit spacing, allowing two operands,
+ * one with 7 non-zero data bits, the other one with 10 or 11 non-zero
+ * data bits; this asymmetric splitting makes the overall code more
+ * complex with thresholds and exceptions, and does not appear to be
+ * worth the effort.
+ */
+
+/*
+ * We cannot really autodetect whether multiplications are "slow" or
+ * not. A typical example is the ARM Cortex M0+, which exists in two
+ * versions: one with a 1-cycle multiplication opcode, the other with
+ * a 32-cycle multiplication opcode. They both use exactly the same
+ * architecture and ABI, and cannot be distinguished from each other
+ * at compile-time.
+ *
+ * Since most modern CPU (even embedded CPU) still have fast
+ * multiplications, we use the "fast mul" code by default.
+ */
+
+#if BR_SLOW_MUL
+
+/*
+ * This implementation uses Karatsuba-like reduction to make fewer
+ * integer multiplications (9 instead of 16), at the expense of extra
+ * logical operations (XOR, shifts...). On modern x86 CPU that offer
+ * fast, pipelined multiplications, this code is about twice slower than
+ * the simpler code with 16 multiplications. This tendency may be
+ * reversed on low-end platforms with expensive multiplications.
+ */
+
+#define MUL32(h, l, x, y)   do { \
+		uint64_t mul32tmp = MUL(x, y); \
+		(h) = (uint32_t)(mul32tmp >> 32); \
+		(l) = (uint32_t)mul32tmp; \
+	} while (0)
+
+static inline void
+bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint32_t a0, a1, a2, a3, a4, a5, a6, a7, a8;
+	uint32_t b0, b1, b2, b3, b4, b5, b6, b7, b8;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+
+	/*
+	 * (x0+W*x1)*(y0+W*y1) -> a0:b0
+	 * (x2+W*x3)*(y2+W*y3) -> a3:b3
+	 * ((x0+x2)+W*(x1+x3))*((y0+y2)+W*(y1+y3)) -> a6:b6
+	 */
+	a0 = x0;
+	b0 = y0;
+	a1 = x1 >> 1;
+	b1 = y1 >> 1;
+	a2 = a0 ^ a1;
+	b2 = b0 ^ b1;
+	a3 = x2 >> 2;
+	b3 = y2 >> 2;
+	a4 = x3 >> 3;
+	b4 = y3 >> 3;
+	a5 = a3 ^ a4;
+	b5 = b3 ^ b4;
+	a6 = a0 ^ a3;
+	b6 = b0 ^ b3;
+	a7 = a1 ^ a4;
+	b7 = b1 ^ b4;
+	a8 = a6 ^ a7;
+	b8 = b6 ^ b7;
+
+	MUL32(b0, a0, b0, a0);
+	MUL32(b1, a1, b1, a1);
+	MUL32(b2, a2, b2, a2);
+	MUL32(b3, a3, b3, a3);
+	MUL32(b4, a4, b4, a4);
+	MUL32(b5, a5, b5, a5);
+	MUL32(b6, a6, b6, a6);
+	MUL32(b7, a7, b7, a7);
+	MUL32(b8, a8, b8, a8);
+
+	a0 &= (uint32_t)0x11111111;
+	a1 &= (uint32_t)0x11111111;
+	a2 &= (uint32_t)0x11111111;
+	a3 &= (uint32_t)0x11111111;
+	a4 &= (uint32_t)0x11111111;
+	a5 &= (uint32_t)0x11111111;
+	a6 &= (uint32_t)0x11111111;
+	a7 &= (uint32_t)0x11111111;
+	a8 &= (uint32_t)0x11111111;
+	b0 &= (uint32_t)0x11111111;
+	b1 &= (uint32_t)0x11111111;
+	b2 &= (uint32_t)0x11111111;
+	b3 &= (uint32_t)0x11111111;
+	b4 &= (uint32_t)0x11111111;
+	b5 &= (uint32_t)0x11111111;
+	b6 &= (uint32_t)0x11111111;
+	b7 &= (uint32_t)0x11111111;
+	b8 &= (uint32_t)0x11111111;
+
+	a2 ^= a0 ^ a1;
+	b2 ^= b0 ^ b1;
+	a0 ^= (a2 << 1) ^ (a1 << 2);
+	b0 ^= (b2 << 1) ^ (b1 << 2);
+	a5 ^= a3 ^ a4;
+	b5 ^= b3 ^ b4;
+	a3 ^= (a5 << 1) ^ (a4 << 2);
+	b3 ^= (b5 << 1) ^ (b4 << 2);
+	a8 ^= a6 ^ a7;
+	b8 ^= b6 ^ b7;
+	a6 ^= (a8 << 1) ^ (a7 << 2);
+	b6 ^= (b8 << 1) ^ (b7 << 2);
+	a6 ^= a0 ^ a3;
+	b6 ^= b0 ^ b3;
+	*lo = a0 ^ (a6 << 2) ^ (a3 << 4);
+	*hi = b0 ^ (b6 << 2) ^ (b3 << 4) ^ (a6 >> 30) ^ (a3 >> 28);
+}
+
+#else
+
+/*
+ * Simple multiplication in GF(2)[X], using 16 integer multiplications.
+ */
+
+static inline void
+bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint64_t z0, z1, z2, z3;
+	uint64_t z;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+	z0 = MUL(x0, y0) ^ MUL(x1, y3) ^ MUL(x2, y2) ^ MUL(x3, y1);
+	z1 = MUL(x0, y1) ^ MUL(x1, y0) ^ MUL(x2, y3) ^ MUL(x3, y2);
+	z2 = MUL(x0, y2) ^ MUL(x1, y1) ^ MUL(x2, y0) ^ MUL(x3, y3);
+	z3 = MUL(x0, y3) ^ MUL(x1, y2) ^ MUL(x2, y1) ^ MUL(x3, y0);
+	z0 &= (uint64_t)0x1111111111111111;
+	z1 &= (uint64_t)0x2222222222222222;
+	z2 &= (uint64_t)0x4444444444444444;
+	z3 &= (uint64_t)0x8888888888888888;
+	z = z0 | z1 | z2 | z3;
+	*lo = (uint32_t)z;
+	*hi = (uint32_t)(z >> 32);
+}
+
+#endif
+
+/* see bearssl_hash.h */
+void
+br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint32_t yw[4];
+	uint32_t hw[4];
+
+	/*
+	 * Throughout the loop we handle the y and h values as arrays
+	 * of 32-bit words.
+	 */
+	buf = data;
+	yb = y;
+	hb = h;
+	yw[3] = br_dec32be(yb);
+	yw[2] = br_dec32be(yb + 4);
+	yw[1] = br_dec32be(yb + 8);
+	yw[0] = br_dec32be(yb + 12);
+	hw[3] = br_dec32be(hb);
+	hw[2] = br_dec32be(hb + 4);
+	hw[1] = br_dec32be(hb + 8);
+	hw[0] = br_dec32be(hb + 12);
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		int i;
+		uint32_t a[9], b[9], zw[8];
+		uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3;
+
+		/*
+		 * Get the next 16-byte block (using zero-padding if
+		 * necessary).
+		 */
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+
+		/*
+		 * Decode the block. The GHASH standard mandates
+		 * big-endian encoding.
+		 */
+		yw[3] ^= br_dec32be(src);
+		yw[2] ^= br_dec32be(src + 4);
+		yw[1] ^= br_dec32be(src + 8);
+		yw[0] ^= br_dec32be(src + 12);
+
+		/*
+		 * We multiply two 128-bit field elements. We use
+		 * Karatsuba to turn that into three 64-bit
+		 * multiplications, which are themselves done with a
+		 * total of nine 32-bit multiplications.
+		 */
+
+		/*
+		 * y[0,1]*h[0,1] -> 0..2
+		 * y[2,3]*h[2,3] -> 3..5
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6..8
+		 */
+		a[0] = yw[0];
+		b[0] = hw[0];
+		a[1] = yw[1];
+		b[1] = hw[1];
+		a[2] = a[0] ^ a[1];
+		b[2] = b[0] ^ b[1];
+
+		a[3] = yw[2];
+		b[3] = hw[2];
+		a[4] = yw[3];
+		b[4] = hw[3];
+		a[5] = a[3] ^ a[4];
+		b[5] = b[3] ^ b[4];
+
+		a[6] = a[0] ^ a[3];
+		b[6] = b[0] ^ b[3];
+		a[7] = a[1] ^ a[4];
+		b[7] = b[1] ^ b[4];
+		a[8] = a[6] ^ a[7];
+		b[8] = b[6] ^ b[7];
+
+		for (i = 0; i < 9; i ++) {
+			bmul(&b[i], &a[i], b[i], a[i]);
+		}
+
+		c0 = a[0];
+		c1 = b[0] ^ a[2] ^ a[0] ^ a[1];
+		c2 = a[1] ^ b[2] ^ b[0] ^ b[1];
+		c3 = b[1];
+		d0 = a[3];
+		d1 = b[3] ^ a[5] ^ a[3] ^ a[4];
+		d2 = a[4] ^ b[5] ^ b[3] ^ b[4];
+		d3 = b[4];
+		e0 = a[6];
+		e1 = b[6] ^ a[8] ^ a[6] ^ a[7];
+		e2 = a[7] ^ b[8] ^ b[6] ^ b[7];
+		e3 = b[7];
+
+		e0 ^= c0 ^ d0;
+		e1 ^= c1 ^ d1;
+		e2 ^= c2 ^ d2;
+		e3 ^= c3 ^ d3;
+		c2 ^= e0;
+		c3 ^= e1;
+		d0 ^= e2;
+		d1 ^= e3;
+
+		/*
+		 * GHASH specification has the bits "reversed" (most
+		 * significant is in fact least significant), which does
+		 * not matter for a carryless multiplication, except that
+		 * the 255-bit result must be shifted by 1 bit.
+		 */
+		zw[0] = c0 << 1;
+		zw[1] = (c1 << 1) | (c0 >> 31);
+		zw[2] = (c2 << 1) | (c1 >> 31);
+		zw[3] = (c3 << 1) | (c2 >> 31);
+		zw[4] = (d0 << 1) | (c3 >> 31);
+		zw[5] = (d1 << 1) | (d0 >> 31);
+		zw[6] = (d2 << 1) | (d1 >> 31);
+		zw[7] = (d3 << 1) | (d2 >> 31);
+
+		/*
+		 * We now do the reduction modulo the field polynomial
+		 * to get back to 128 bits.
+		 */
+		for (i = 0; i < 4; i ++) {
+			uint32_t lw;
+
+			lw = zw[i];
+			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
+			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
+		}
+		memcpy(yw, zw + 4, sizeof yw);
+	}
+
+	/*
+	 * Encode back the result.
+	 */
+	br_enc32be(yb, yw[3]);
+	br_enc32be(yb + 4, yw[2]);
+	br_enc32be(yb + 8, yw[1]);
+	br_enc32be(yb + 12, yw[0]);
+}
diff --git a/test/monniaux/BearSSL/src/hash/ghash_ctmul32.c b/test/monniaux/BearSSL/src/hash/ghash_ctmul32.c
new file mode 100644
index 00000000..c66af465
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/ghash_ctmul32.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This implementation uses 32-bit multiplications, and only the low
+ * 32 bits for each multiplication result. This is meant primarily for
+ * the ARM Cortex M0 and M0+, whose multiplication opcode does not yield
+ * the upper 32 bits; but it might also be useful on architectures where
+ * access to the upper 32 bits requires use of specific registers that
+ * create contention (e.g. on i386, "mul" necessarily outputs the result
+ * in edx:eax, while "imul" can use any registers but is limited to the
+ * low 32 bits).
+ *
+ * The implementation trick that is used here is bit-reversing (bit 0
+ * is swapped with bit 31, bit 1 with bit 30, and so on). In GF(2)[X],
+ * for all values x and y, we have:
+ *    rev32(x) * rev32(y) = rev64(x * y)
+ * In other words, if we bit-reverse (over 32 bits) the operands, then we
+ * bit-reverse (over 64 bits) the result.
+ */
+
+/*
+ * Multiplication in GF(2)[X], truncated to its low 32 bits.
+ */
+static inline uint32_t
+bmul32(uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint32_t z0, z1, z2, z3;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+	z0 &= (uint32_t)0x11111111;
+	z1 &= (uint32_t)0x22222222;
+	z2 &= (uint32_t)0x44444444;
+	z3 &= (uint32_t)0x88888888;
+	return z0 | z1 | z2 | z3;
+}
+
+/*
+ * Bit-reverse a 32-bit word.
+ */
+static uint32_t
+rev32(uint32_t x)
+{
+#define RMS(m, s)   do { \
+		x = ((x & (uint32_t)(m)) << (s)) \
+			| ((x >> (s)) & (uint32_t)(m)); \
+	} while (0)
+
+	RMS(0x55555555, 1);
+	RMS(0x33333333, 2);
+	RMS(0x0F0F0F0F, 4);
+	RMS(0x00FF00FF, 8);
+	return (x << 16) | (x >> 16);
+
+#undef RMS
+}
+
+/* see bearssl_hash.h */
+void
+br_ghash_ctmul32(void *y, const void *h, const void *data, size_t len)
+{
+	/*
+	 * This implementation is similar to br_ghash_ctmul() except
+	 * that we have to do the multiplication twice, with the
+	 * "normal" and "bit reversed" operands. Hence we end up with
+	 * eighteen 32-bit multiplications instead of nine.
+	 */
+
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint32_t yw[4];
+	uint32_t hw[4], hwr[4];
+
+	buf = data;
+	yb = y;
+	hb = h;
+	yw[3] = br_dec32be(yb);
+	yw[2] = br_dec32be(yb + 4);
+	yw[1] = br_dec32be(yb + 8);
+	yw[0] = br_dec32be(yb + 12);
+	hw[3] = br_dec32be(hb);
+	hw[2] = br_dec32be(hb + 4);
+	hw[1] = br_dec32be(hb + 8);
+	hw[0] = br_dec32be(hb + 12);
+	hwr[3] = rev32(hw[3]);
+	hwr[2] = rev32(hw[2]);
+	hwr[1] = rev32(hw[1]);
+	hwr[0] = rev32(hw[0]);
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		int i;
+		uint32_t a[18], b[18], c[18];
+		uint32_t d0, d1, d2, d3, d4, d5, d6, d7;
+		uint32_t zw[8];
+
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+		yw[3] ^= br_dec32be(src);
+		yw[2] ^= br_dec32be(src + 4);
+		yw[1] ^= br_dec32be(src + 8);
+		yw[0] ^= br_dec32be(src + 12);
+
+		/*
+		 * We are using Karatsuba: the 128x128 multiplication is
+		 * reduced to three 64x64 multiplications, hence nine
+		 * 32x32 multiplications. With the bit-reversal trick,
+		 * we have to perform 18 32x32 multiplications.
+		 */
+
+		/*
+		 * y[0,1]*h[0,1] -> 0,1,4
+		 * y[2,3]*h[2,3] -> 2,3,5
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,7,8
+		 */
+
+		a[0] = yw[0];
+		a[1] = yw[1];
+		a[2] = yw[2];
+		a[3] = yw[3];
+		a[4] = a[0] ^ a[1];
+		a[5] = a[2] ^ a[3];
+		a[6] = a[0] ^ a[2];
+		a[7] = a[1] ^ a[3];
+		a[8] = a[6] ^ a[7];
+
+		a[ 9] = rev32(yw[0]);
+		a[10] = rev32(yw[1]);
+		a[11] = rev32(yw[2]);
+		a[12] = rev32(yw[3]);
+		a[13] = a[ 9] ^ a[10];
+		a[14] = a[11] ^ a[12];
+		a[15] = a[ 9] ^ a[11];
+		a[16] = a[10] ^ a[12];
+		a[17] = a[15] ^ a[16];
+
+		b[0] = hw[0];
+		b[1] = hw[1];
+		b[2] = hw[2];
+		b[3] = hw[3];
+		b[4] = b[0] ^ b[1];
+		b[5] = b[2] ^ b[3];
+		b[6] = b[0] ^ b[2];
+		b[7] = b[1] ^ b[3];
+		b[8] = b[6] ^ b[7];
+
+		b[ 9] = hwr[0];
+		b[10] = hwr[1];
+		b[11] = hwr[2];
+		b[12] = hwr[3];
+		b[13] = b[ 9] ^ b[10];
+		b[14] = b[11] ^ b[12];
+		b[15] = b[ 9] ^ b[11];
+		b[16] = b[10] ^ b[12];
+		b[17] = b[15] ^ b[16];
+
+		for (i = 0; i < 18; i ++) {
+			c[i] = bmul32(a[i], b[i]);
+		}
+
+		c[4] ^= c[0] ^ c[1];
+		c[5] ^= c[2] ^ c[3];
+		c[8] ^= c[6] ^ c[7];
+
+		c[13] ^= c[ 9] ^ c[10];
+		c[14] ^= c[11] ^ c[12];
+		c[17] ^= c[15] ^ c[16];
+
+		/*
+		 * y[0,1]*h[0,1] -> 0,9^4,1^13,10
+		 * y[2,3]*h[2,3] -> 2,11^5,3^14,12
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,15^8,7^17,16
+		 */
+		d0 = c[0];
+		d1 = c[4] ^ (rev32(c[9]) >> 1);
+		d2 = c[1] ^ c[0] ^ c[2] ^ c[6] ^ (rev32(c[13]) >> 1);
+		d3 = c[4] ^ c[5] ^ c[8]
+			^ (rev32(c[10] ^ c[9] ^ c[11] ^ c[15]) >> 1);
+		d4 = c[2] ^ c[1] ^ c[3] ^ c[7]
+			^ (rev32(c[13] ^ c[14] ^ c[17]) >> 1);
+		d5 = c[5] ^ (rev32(c[11] ^ c[10] ^ c[12] ^ c[16]) >> 1);
+		d6 = c[3] ^ (rev32(c[14]) >> 1);
+		d7 = rev32(c[12]) >> 1;
+
+		zw[0] = d0 << 1;
+		zw[1] = (d1 << 1) | (d0 >> 31);
+		zw[2] = (d2 << 1) | (d1 >> 31);
+		zw[3] = (d3 << 1) | (d2 >> 31);
+		zw[4] = (d4 << 1) | (d3 >> 31);
+		zw[5] = (d5 << 1) | (d4 >> 31);
+		zw[6] = (d6 << 1) | (d5 >> 31);
+		zw[7] = (d7 << 1) | (d6 >> 31);
+
+		for (i = 0; i < 4; i ++) {
+			uint32_t lw;
+
+			lw = zw[i];
+			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
+			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
+		}
+		memcpy(yw, zw + 4, sizeof yw);
+	}
+	br_enc32be(yb, yw[3]);
+	br_enc32be(yb + 4, yw[2]);
+	br_enc32be(yb + 8, yw[1]);
+	br_enc32be(yb + 12, yw[0]);
+}
diff --git a/test/monniaux/BearSSL/src/hash/ghash_ctmul64.c b/test/monniaux/BearSSL/src/hash/ghash_ctmul64.c
new file mode 100644
index 00000000..a46f16fe
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/ghash_ctmul64.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This is the 64-bit variant of br_ghash_ctmul32(), with 64-bit operands
+ * and bit reversal of 64-bit words.
+ */
+
+static inline uint64_t
+bmul64(uint64_t x, uint64_t y)
+{
+	uint64_t x0, x1, x2, x3;
+	uint64_t y0, y1, y2, y3;
+	uint64_t z0, z1, z2, z3;
+
+	x0 = x & (uint64_t)0x1111111111111111;
+	x1 = x & (uint64_t)0x2222222222222222;
+	x2 = x & (uint64_t)0x4444444444444444;
+	x3 = x & (uint64_t)0x8888888888888888;
+	y0 = y & (uint64_t)0x1111111111111111;
+	y1 = y & (uint64_t)0x2222222222222222;
+	y2 = y & (uint64_t)0x4444444444444444;
+	y3 = y & (uint64_t)0x8888888888888888;
+	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+	z0 &= (uint64_t)0x1111111111111111;
+	z1 &= (uint64_t)0x2222222222222222;
+	z2 &= (uint64_t)0x4444444444444444;
+	z3 &= (uint64_t)0x8888888888888888;
+	return z0 | z1 | z2 | z3;
+}
+
+static uint64_t
+rev64(uint64_t x)
+{
+#define RMS(m, s)   do { \
+		x = ((x & (uint64_t)(m)) << (s)) \
+			| ((x >> (s)) & (uint64_t)(m)); \
+	} while (0)
+
+	RMS(0x5555555555555555,  1);
+	RMS(0x3333333333333333,  2);
+	RMS(0x0F0F0F0F0F0F0F0F,  4);
+	RMS(0x00FF00FF00FF00FF,  8);
+	RMS(0x0000FFFF0000FFFF, 16);
+	return (x << 32) | (x >> 32);
+
+#undef RMS
+}
+
+/* see bearssl_ghash.h */
+void
+br_ghash_ctmul64(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint64_t y0, y1;
+	uint64_t h0, h1, h2, h0r, h1r, h2r;
+
+	buf = data;
+	yb = y;
+	hb = h;
+	y1 = br_dec64be(yb);
+	y0 = br_dec64be(yb + 8);
+	h1 = br_dec64be(hb);
+	h0 = br_dec64be(hb + 8);
+	h0r = rev64(h0);
+	h1r = rev64(h1);
+	h2 = h0 ^ h1;
+	h2r = h0r ^ h1r;
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		uint64_t y0r, y1r, y2, y2r;
+		uint64_t z0, z1, z2, z0h, z1h, z2h;
+		uint64_t v0, v1, v2, v3;
+
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+		y1 ^= br_dec64be(src);
+		y0 ^= br_dec64be(src + 8);
+
+		y0r = rev64(y0);
+		y1r = rev64(y1);
+		y2 = y0 ^ y1;
+		y2r = y0r ^ y1r;
+
+		z0 = bmul64(y0, h0);
+		z1 = bmul64(y1, h1);
+		z2 = bmul64(y2, h2);
+		z0h = bmul64(y0r, h0r);
+		z1h = bmul64(y1r, h1r);
+		z2h = bmul64(y2r, h2r);
+		z2 ^= z0 ^ z1;
+		z2h ^= z0h ^ z1h;
+		z0h = rev64(z0h) >> 1;
+		z1h = rev64(z1h) >> 1;
+		z2h = rev64(z2h) >> 1;
+
+		v0 = z0;
+		v1 = z0h ^ z2;
+		v2 = z1 ^ z2h;
+		v3 = z1h;
+
+		v3 = (v3 << 1) | (v2 >> 63);
+		v2 = (v2 << 1) | (v1 >> 63);
+		v1 = (v1 << 1) | (v0 >> 63);
+		v0 = (v0 << 1);
+
+		v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7);
+		v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57);
+		v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7);
+		v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57);
+
+		y0 = v2;
+		y1 = v3;
+	}
+
+	br_enc64be(yb, y1);
+	br_enc64be(yb + 8, y0);
+}
diff --git a/test/monniaux/BearSSL/src/hash/ghash_pclmul.c b/test/monniaux/BearSSL/src/hash/ghash_pclmul.c
new file mode 100644
index 00000000..a58e7dc0
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/ghash_pclmul.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+/*
+ * This is the GHASH implementation that leverages the pclmulqdq opcode
+ * (from the AES-NI instructions).
+ */
+
+#if BR_AES_X86NI
+
+/*
+ * Test CPU support for PCLMULQDQ.
+ */
+static inline int
+pclmul_supported(void)
+{
+	/*
+	 * Bit mask for features in ECX:
+	 *    1   PCLMULQDQ support
+	 */
+	return br_cpuid(0, 0, 0x00000002, 0);
+}
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pclmul_get(void)
+{
+	return pclmul_supported() ? &br_ghash_pclmul : 0;
+}
+
+BR_TARGETS_X86_UP
+
+/*
+ * GHASH is defined over elements of GF(2^128) with "full little-endian"
+ * representation: leftmost byte is least significant, and, within each
+ * byte, leftmost _bit_ is least significant. The natural ordering in
+ * x86 is "mixed little-endian": bytes are ordered from least to most
+ * significant, but bits within a byte are in most-to-least significant
+ * order. Going to full little-endian representation would require
+ * reversing bits within each byte, which is doable but expensive.
+ *
+ * Instead, we go to full big-endian representation, by swapping bytes
+ * around, which is done with a single _mm_shuffle_epi8() opcode (it
+ * comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We
+ * can use a full big-endian representation because in a carryless
+ * multiplication, we have a nice bit reversal property:
+ *
+ *    rev_128(x) * rev_128(y) = rev_255(x * y)
+ *
+ * So by using full big-endian, we still get the right result, except
+ * that it is right-shifted by 1 bit. The left-shift is relatively
+ * inexpensive, and it can be mutualised.
+ *
+ *
+ * Since SSE2 opcodes do not have facilities for shitfting full 128-bit
+ * values with bit precision, we have to break down values into 64-bit
+ * chunks. We number chunks from 0 to 3 in left to right order.
+ */
+
+/*
+ * Byte-swap a complete 128-bit value. This normally uses
+ * _mm_shuffle_epi8(), which gets translated to pshufb (an SSSE3 opcode).
+ * However, this crashes old Clang versions, so, for Clang before 3.8,
+ * we use an alternate (and less efficient) version.
+ */
+#if BR_CLANG && !BR_CLANG_3_8
+#define BYTESWAP_DECL
+#define BYTESWAP_PREP   (void)0
+#define BYTESWAP(x)   do { \
+		__m128i byteswap1, byteswap2; \
+		byteswap1 = (x); \
+		byteswap2 = _mm_srli_epi16(byteswap1, 8); \
+		byteswap1 = _mm_slli_epi16(byteswap1, 8); \
+		byteswap1 = _mm_or_si128(byteswap1, byteswap2); \
+		byteswap1 = _mm_shufflelo_epi16(byteswap1, 0x1B); \
+		byteswap1 = _mm_shufflehi_epi16(byteswap1, 0x1B); \
+		(x) = _mm_shuffle_epi32(byteswap1, 0x4E); \
+	} while (0)
+#else
+#define BYTESWAP_DECL   __m128i byteswap_index;
+#define BYTESWAP_PREP   do { \
+		byteswap_index = _mm_set_epi8( \
+			0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+	} while (0)
+#define BYTESWAP(x)   do { \
+		(x) = _mm_shuffle_epi8((x), byteswap_index); \
+	} while (0)
+#endif
+
+/*
+ * Call pclmulqdq. Clang appears to have trouble with the intrinsic, so,
+ * for that compiler, we use inline assembly. Inline assembly is
+ * potentially a bit slower because the compiler does not understand
+ * what the opcode does, and thus cannot optimize instruction
+ * scheduling.
+ *
+ * We use a target of "sse2" only, so that Clang may still handle the
+ * '__m128i' type and allocate SSE2 registers.
+ */
+#if BR_CLANG
+BR_TARGET("sse2")
+static inline __m128i
+pclmulqdq00(__m128i x, __m128i y)
+{
+	__asm__ ("pclmulqdq $0x00, %1, %0" : "+x" (x) : "x" (y));
+	return x;
+}
+BR_TARGET("sse2")
+static inline __m128i
+pclmulqdq11(__m128i x, __m128i y)
+{
+	__asm__ ("pclmulqdq $0x11, %1, %0" : "+x" (x) : "x" (y));
+	return x;
+}
+#else
+#define pclmulqdq00(x, y)   _mm_clmulepi64_si128(x, y, 0x00)
+#define pclmulqdq11(x, y)   _mm_clmulepi64_si128(x, y, 0x11)
+#endif
+
+/*
+ * From a 128-bit value kw, compute kx as the XOR of the two 64-bit
+ * halves of kw (into the right half of kx; left half is unspecified).
+ */
+#define BK(kw, kx)   do { \
+		kx = _mm_xor_si128(kw, _mm_shuffle_epi32(kw, 0x0E)); \
+	} while (0)
+
+/*
+ * Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and
+ * the XOR of the two values (kx).
+ */
+#define PBK(k0, k1, kw, kx)   do { \
+		kw = _mm_unpacklo_epi64(k1, k0); \
+		kx = _mm_xor_si128(k0, k1); \
+	} while (0)
+
+/*
+ * Left-shift by 1 bit a 256-bit value (in four 64-bit words).
+ */
+#define SL_256(x0, x1, x2, x3)   do { \
+		x0 = _mm_or_si128( \
+			_mm_slli_epi64(x0, 1), \
+			_mm_srli_epi64(x1, 63)); \
+		x1 = _mm_or_si128( \
+			_mm_slli_epi64(x1, 1), \
+			_mm_srli_epi64(x2, 63)); \
+		x2 = _mm_or_si128( \
+			_mm_slli_epi64(x2, 1), \
+			_mm_srli_epi64(x3, 63)); \
+		x3 = _mm_slli_epi64(x3, 1); \
+	} while (0)
+
+/*
+ * Perform reduction in GF(2^128). The 256-bit value is in x0..x3;
+ * result is written in x0..x1.
+ */
+#define REDUCE_F128(x0, x1, x2, x3)   do { \
+		x1 = _mm_xor_si128( \
+			x1, \
+			_mm_xor_si128( \
+				_mm_xor_si128( \
+					x3, \
+					_mm_srli_epi64(x3, 1)), \
+				_mm_xor_si128( \
+					_mm_srli_epi64(x3, 2), \
+					_mm_srli_epi64(x3, 7)))); \
+		x2 = _mm_xor_si128( \
+			_mm_xor_si128( \
+				x2, \
+				_mm_slli_epi64(x3, 63)), \
+			_mm_xor_si128( \
+				_mm_slli_epi64(x3, 62), \
+				_mm_slli_epi64(x3, 57))); \
+		x0 = _mm_xor_si128( \
+			x0, \
+			_mm_xor_si128( \
+				_mm_xor_si128( \
+					x2, \
+					_mm_srli_epi64(x2, 1)), \
+				_mm_xor_si128( \
+					_mm_srli_epi64(x2, 2), \
+					_mm_srli_epi64(x2, 7)))); \
+		x1 = _mm_xor_si128( \
+			_mm_xor_si128( \
+				x1, \
+				_mm_slli_epi64(x2, 63)), \
+			_mm_xor_si128( \
+				_mm_slli_epi64(x2, 62), \
+				_mm_slli_epi64(x2, 57))); \
+	} while (0)
+
+/*
+ * Square value kw into (dw,dx).
+ */
+#define SQUARE_F128(kw, dw, dx)   do { \
+		__m128i z0, z1, z2, z3; \
+		z1 = pclmulqdq11(kw, kw); \
+		z3 = pclmulqdq00(kw, kw); \
+		z0 = _mm_shuffle_epi32(z1, 0x0E); \
+		z2 = _mm_shuffle_epi32(z3, 0x0E); \
+		SL_256(z0, z1, z2, z3); \
+		REDUCE_F128(z0, z1, z2, z3); \
+		PBK(z0, z1, dw, dx); \
+	} while (0)
+
+/* see bearssl_hash.h */
+BR_TARGET("ssse3,pclmul")
+void
+br_ghash_pclmul(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+	unsigned char tmp[64];
+	size_t num4, num1;
+	__m128i yw, h1w, h1x;
+	BYTESWAP_DECL
+
+	/*
+	 * We split data into two chunks. First chunk starts at buf1
+	 * and contains num4 blocks of 64-byte values. Second chunk
+	 * starts at buf2 and contains num1 blocks of 16-byte values.
+	 * We want the first chunk to be as large as possible.
+	 */
+	buf1 = data;
+	num4 = len >> 6;
+	len &= 63;
+	buf2 = buf1 + (num4 << 6);
+	num1 = (len + 15) >> 4;
+	if ((len & 15) != 0) {
+		memcpy(tmp, buf2, len);
+		memset(tmp + len, 0, (num1 << 4) - len);
+		buf2 = tmp;
+	}
+
+	/*
+	 * Preparatory step for endian conversions.
+	 */
+	BYTESWAP_PREP;
+
+	/*
+	 * Load y and h.
+	 */
+	yw = _mm_loadu_si128(y);
+	h1w = _mm_loadu_si128(h);
+	BYTESWAP(yw);
+	BYTESWAP(h1w);
+	BK(h1w, h1x);
+
+	if (num4 > 0) {
+		__m128i h2w, h2x, h3w, h3x, h4w, h4x;
+		__m128i t0, t1, t2, t3;
+
+		/*
+		 * Compute h2 = h^2.
+		 */
+		SQUARE_F128(h1w, h2w, h2x);
+
+		/*
+		 * Compute h3 = h^3 = h*(h^2).
+		 */
+		t1 = pclmulqdq11(h1w, h2w);
+		t3 = pclmulqdq00(h1w, h2w);
+		t2 = _mm_xor_si128(pclmulqdq00(h1x, h2x),
+			_mm_xor_si128(t1, t3));
+		t0 = _mm_shuffle_epi32(t1, 0x0E);
+		t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+		t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+		SL_256(t0, t1, t2, t3);
+		REDUCE_F128(t0, t1, t2, t3);
+		PBK(t0, t1, h3w, h3x);
+
+		/*
+		 * Compute h4 = h^4 = (h^2)^2.
+		 */
+		SQUARE_F128(h2w, h4w, h4x);
+
+		while (num4 -- > 0) {
+			__m128i aw0, aw1, aw2, aw3;
+			__m128i ax0, ax1, ax2, ax3;
+
+			aw0 = _mm_loadu_si128((void *)(buf1 +  0));
+			aw1 = _mm_loadu_si128((void *)(buf1 + 16));
+			aw2 = _mm_loadu_si128((void *)(buf1 + 32));
+			aw3 = _mm_loadu_si128((void *)(buf1 + 48));
+			BYTESWAP(aw0);
+			BYTESWAP(aw1);
+			BYTESWAP(aw2);
+			BYTESWAP(aw3);
+			buf1 += 64;
+
+			aw0 = _mm_xor_si128(aw0, yw);
+			BK(aw1, ax1);
+			BK(aw2, ax2);
+			BK(aw3, ax3);
+			BK(aw0, ax0);
+
+			t1 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq11(aw0, h4w),
+					pclmulqdq11(aw1, h3w)),
+				_mm_xor_si128(
+					pclmulqdq11(aw2, h2w),
+					pclmulqdq11(aw3, h1w)));
+			t3 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq00(aw0, h4w),
+					pclmulqdq00(aw1, h3w)),
+				_mm_xor_si128(
+					pclmulqdq00(aw2, h2w),
+					pclmulqdq00(aw3, h1w)));
+			t2 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq00(ax0, h4x),
+					pclmulqdq00(ax1, h3x)),
+				_mm_xor_si128(
+					pclmulqdq00(ax2, h2x),
+					pclmulqdq00(ax3, h1x)));
+			t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+			t0 = _mm_shuffle_epi32(t1, 0x0E);
+			t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+			t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+			SL_256(t0, t1, t2, t3);
+			REDUCE_F128(t0, t1, t2, t3);
+			yw = _mm_unpacklo_epi64(t1, t0);
+		}
+	}
+
+	while (num1 -- > 0) {
+		__m128i aw, ax;
+		__m128i t0, t1, t2, t3;
+
+		aw = _mm_loadu_si128((void *)buf2);
+		BYTESWAP(aw);
+		buf2 += 16;
+
+		aw = _mm_xor_si128(aw, yw);
+		BK(aw, ax);
+
+		t1 = pclmulqdq11(aw, h1w);
+		t3 = pclmulqdq00(aw, h1w);
+		t2 = pclmulqdq00(ax, h1x);
+		t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+		t0 = _mm_shuffle_epi32(t1, 0x0E);
+		t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+		t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+		SL_256(t0, t1, t2, t3);
+		REDUCE_F128(t0, t1, t2, t3);
+		yw = _mm_unpacklo_epi64(t1, t0);
+	}
+
+	BYTESWAP(yw);
+	_mm_storeu_si128(y, yw);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pclmul_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/hash/ghash_pwr8.c b/test/monniaux/BearSSL/src/hash/ghash_pwr8.c
new file mode 100644
index 00000000..2e7b0f4c
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/ghash_pwr8.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+/*
+ * This is the GHASH implementation that leverages the POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+/*
+ * Some symbolic names for registers.
+ *   HB0 = 16 bytes of value 0
+ *   HB1 = 16 bytes of value 1
+ *   HB2 = 16 bytes of value 2
+ *   HB6 = 16 bytes of value 6
+ *   HB7 = 16 bytes of value 7
+ *   TT0, TT1 and TT2 are temporaries
+ *
+ * BSW holds the pattern for byteswapping 32-bit words; this is set only
+ * on little-endian systems. XBSW is the same register with the +32 offset
+ * for access with the VSX opcodes.
+ */
+#define HB0     0
+#define HB1     1
+#define HB2     2
+#define HB6     3
+#define HB7     4
+#define TT0     5
+#define TT1     6
+#define TT2     7
+
+#define BSW     8
+#define XBSW   40
+
+/*
+ * Macro to initialise the constants.
+ */
+#define INIT \
+		vxor(HB0, HB0, HB0) \
+		vspltisb(HB1, 1) \
+		vspltisb(HB2, 2) \
+		vspltisb(HB6, 6) \
+		vspltisb(HB7, 7) \
+		INIT_BSW
+
+/*
+ * Fix endianness of a value after reading it or before writing it, if
+ * necessary.
+ */
+#if BR_POWER8_LE
+#define INIT_BSW         lxvw4x(XBSW, 0, %[idx2be])
+#define FIX_ENDIAN(xx)   vperm(xx, xx, xx, BSW)
+#else
+#define INIT_BSW
+#define FIX_ENDIAN(xx)
+#endif
+
+/*
+ * Left-shift x0:x1 by one bit to the left. This is a corrective action
+ * needed because GHASH is defined in full little-endian specification,
+ * while the opcodes use full big-endian convention, so the 255-bit product
+ * ends up one bit to the right.
+ */
+#define SL_256(x0, x1) \
+		vsldoi(TT0, HB0, x1, 1) \
+		vsl(x0, x0, HB1) \
+		vsr(TT0, TT0, HB7) \
+		vsl(x1, x1, HB1) \
+		vxor(x0, x0, TT0)
+
+/*
+ * Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as
+ * x0 or x1, or a different register). x0 and x1 are modified.
+ */
+#define REDUCE_F128(xd, x0, x1) \
+		vxor(x0, x0, x1) \
+		vsr(TT0, x1, HB1) \
+		vsr(TT1, x1, HB2) \
+		vsr(TT2, x1, HB7) \
+		vxor(x0, x0, TT0) \
+		vxor(TT1, TT1, TT2) \
+		vxor(x0, x0, TT1) \
+		vsldoi(x1, x1, HB0, 15) \
+		vsl(TT1, x1, HB6) \
+		vsl(TT2, x1, HB1) \
+		vxor(x1, TT1, TT2) \
+		vsr(TT0, x1, HB1) \
+		vsr(TT1, x1, HB2) \
+		vsr(TT2, x1, HB7) \
+		vxor(x0, x0, x1) \
+		vxor(x0, x0, TT0) \
+		vxor(TT1, TT1, TT2) \
+		vxor(xd, x0, TT1)
+
+/* see bearssl_hash.h */
+void
+br_ghash_pwr8(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+	size_t num4, num1;
+	unsigned char tmp[64];
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	buf1 = data;
+
+	/*
+	 * Assembly code requires data into two chunks; first chunk
+	 * must contain a number of blocks which is a multiple of 4.
+	 * Since the processing for the first chunk is faster, we want
+	 * to make it as big as possible.
+	 *
+	 * For the remainder, there are two possibilities:
+	 *  -- if the remainder size is a multiple of 16, then use it
+	 *     in place;
+	 *  -- otherwise, copy it to the tmp[] array and pad it with
+	 *     zeros.
+	 */
+	num4 = len >> 6;
+	buf2 = buf1 + (num4 << 6);
+	len &= 63;
+	num1 = (len + 15) >> 4;
+	if ((len & 15) != 0) {
+		memcpy(tmp, buf2, len);
+		memset(tmp + len, 0, (num1 << 4) - len);
+		buf2 = tmp;
+	}
+
+	cc0 =  0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+		INIT
+
+		/*
+		 * Load current h (denoted hereafter h1) in v9.
+		 */
+		lxvw4x(41, 0, %[h])
+		FIX_ENDIAN(9)
+
+		/*
+		 * Load current y into v28.
+		 */
+		lxvw4x(60, 0, %[y])
+		FIX_ENDIAN(28)
+
+		/*
+		 * Split h1 into three registers:
+		 *   v17 = h1_1:h1_0
+		 *   v18 =    0:h1_0
+		 *   v19 = h1_1:0
+		 */
+		xxpermdi(49, 41, 41, 2)
+		vsldoi(18, HB0, 9, 8)
+		vsldoi(19, 9, HB0, 8)
+
+		/*
+		 * If num4 is 0, skip directly to the second chunk.
+		 */
+		cmpldi(%[num4], 0)
+		beq(chunk1)
+
+		/*
+		 * Compute h2 = h*h in v10.
+		 */
+		vpmsumd(10, 18, 18)
+		vpmsumd(11, 19, 19)
+		SL_256(10, 11)
+		REDUCE_F128(10, 10, 11)
+
+		/*
+		 * Compute h3 = h*h*h in v11.
+		 * We first split h2 into:
+		 *   v10 = h2_0:h2_1
+		 *   v11 =    0:h2_0
+		 *   v12 = h2_1:0
+		 * Then we do the product with h1, and reduce into v11.
+		 */
+		vsldoi(11, HB0, 10, 8)
+		vsldoi(12, 10, HB0, 8)
+		vpmsumd(13, 10, 17)
+		vpmsumd(11, 11, 18)
+		vpmsumd(12, 12, 19)
+		vsldoi(14, HB0, 13, 8)
+		vsldoi(15, 13, HB0, 8)
+		vxor(11, 11, 14)
+		vxor(12, 12, 15)
+		SL_256(11, 12)
+		REDUCE_F128(11, 11, 12)
+
+		/*
+		 * Compute h4 = h*h*h*h in v12. This is done by squaring h2.
+		 */
+		vsldoi(12, HB0, 10, 8)
+		vsldoi(13, 10, HB0, 8)
+		vpmsumd(12, 12, 12)
+		vpmsumd(13, 13, 13)
+		SL_256(12, 13)
+		REDUCE_F128(12, 12, 13)
+
+		/*
+		 * Repack h1, h2, h3 and h4:
+		 *   v13 = h4_0:h3_0
+		 *   v14 = h4_1:h3_1
+		 *   v15 = h2_0:h1_0
+		 *   v16 = h2_1:h1_1
+		 */
+		xxpermdi(45, 44, 43, 0)
+		xxpermdi(46, 44, 43, 3)
+		xxpermdi(47, 42, 41, 0)
+		xxpermdi(48, 42, 41, 3)
+
+		/*
+		 * Loop for each group of four blocks.
+		 */
+		mtctr(%[num4])
+	label(loop4)
+		/*
+		 * Read the four next blocks.
+		 *   v20 = y + a0 = b0
+		 *   v21 = a1     = b1
+		 *   v22 = a2     = b2
+		 *   v23 = a3     = b3
+		 */
+		lxvw4x(52, %[cc0], %[buf1])
+		lxvw4x(53, %[cc1], %[buf1])
+		lxvw4x(54, %[cc2], %[buf1])
+		lxvw4x(55, %[cc3], %[buf1])
+		FIX_ENDIAN(20)
+		FIX_ENDIAN(21)
+		FIX_ENDIAN(22)
+		FIX_ENDIAN(23)
+		addi(%[buf1], %[buf1], 64)
+		vxor(20, 20, 28)
+
+		/*
+		 * Repack the blocks into v9, v10, v11 and v12.
+		 *   v9  = b0_0:b1_0
+		 *   v10 = b0_1:b1_1
+		 *   v11 = b2_0:b3_0
+		 *   v12 = b2_1:b3_1
+		 */
+		xxpermdi(41, 52, 53, 0)
+		xxpermdi(42, 52, 53, 3)
+		xxpermdi(43, 54, 55, 0)
+		xxpermdi(44, 54, 55, 3)
+
+		/*
+		 * Compute the products.
+		 *   v20 = b0_0*h4_0 + b1_0*h3_0
+		 *   v21 = b0_1*h4_0 + b1_1*h3_0
+		 *   v22 = b0_0*h4_1 + b1_0*h3_1
+		 *   v23 = b0_1*h4_1 + b1_1*h3_1
+		 *   v24 = b2_0*h2_0 + b3_0*h1_0
+		 *   v25 = b2_1*h2_0 + b3_1*h1_0
+		 *   v26 = b2_0*h2_1 + b3_0*h1_1
+		 *   v27 = b2_1*h2_1 + b3_1*h1_1
+		 */
+		vpmsumd(20, 13,  9)
+		vpmsumd(21, 13, 10)
+		vpmsumd(22, 14,  9)
+		vpmsumd(23, 14, 10)
+		vpmsumd(24, 15, 11)
+		vpmsumd(25, 15, 12)
+		vpmsumd(26, 16, 11)
+		vpmsumd(27, 16, 12)
+
+		/*
+		 * Sum products into a single 256-bit result in v11:v12.
+		 */
+		vxor(11, 20, 24)
+		vxor(12, 23, 27)
+		vxor( 9, 21, 22)
+		vxor(10, 25, 26)
+		vxor(20,  9, 10)
+		vsldoi( 9, HB0, 20, 8)
+		vsldoi(10, 20, HB0, 8)
+		vxor(11, 11, 9)
+		vxor(12, 12, 10)
+
+		/*
+		 * Fix and reduce in GF(2^128); this is the new y (in v28).
+		 */
+		SL_256(11, 12)
+		REDUCE_F128(28, 11, 12)
+
+		/*
+		 * Loop for next group of four blocks.
+		 */
+		bdnz(loop4)
+
+		/*
+		 * Process second chunk, one block at a time.
+		 */
+	label(chunk1)
+		cmpldi(%[num1], 0)
+		beq(done)
+
+		mtctr(%[num1])
+	label(loop1)
+		/*
+		 * Load next data block and XOR it into y.
+		 */
+		lxvw4x(41, 0, %[buf2])
+#if BR_POWER8_LE
+		FIX_ENDIAN(9)
+#endif
+		addi(%[buf2], %[buf2], 16)
+		vxor(9, 28, 9)
+
+		/*
+		 * Split y into doublewords:
+		 *   v9  = y_0:y_1
+		 *   v10 =   0:y_0
+		 *   v11 = y_1:0
+		 */
+		vsldoi(10, HB0, 9, 8)
+		vsldoi(11, 9, HB0, 8)
+
+		/*
+		 * Compute products with h:
+		 *   v12 = y_0 * h_0
+		 *   v13 = y_1 * h_1
+		 *   v14 = y_1 * h_0 + y_0 * h_1
+		 */
+		vpmsumd(14,  9, 17)
+		vpmsumd(12, 10, 18)
+		vpmsumd(13, 11, 19)
+
+		/*
+		 * Propagate v14 into v12:v13 to finalise product.
+		 */
+		vsldoi(10, HB0, 14, 8)
+		vsldoi(11, 14, HB0, 8)
+		vxor(12, 12, 10)
+		vxor(13, 13, 11)
+
+		/*
+		 * Fix result and reduce into v28 (next value for y).
+		 */
+		SL_256(12, 13)
+		REDUCE_F128(28, 12, 13)
+		bdnz(loop1)
+
+	label(done)
+		/*
+		 * Write back the new y.
+		 */
+		FIX_ENDIAN(28)
+		stxvw4x(60, 0, %[y])
+
+: [buf1] "+b" (buf1), [buf2] "+b" (buf2)
+: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1),
+  [cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pwr8_get(void)
+{
+	return &br_ghash_pwr8;
+}
+
+#else
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pwr8_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/test/monniaux/BearSSL/src/hash/md5.c b/test/monniaux/BearSSL/src/hash/md5.c
new file mode 100644
index 00000000..0df7abe0
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/md5.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((((C) ^ (B)) & (D)) ^ (C))
+#define H(B, C, D)     ((B) ^ (C) ^ (D))
+#define I(B, C, D)     ((C) ^ ((B) | ~(D)))
+
+#define ROTL(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+
+/* see inner.h */
+const uint32_t br_md5_IV[4] = {
+	0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476
+};
+
+static const uint32_t K[64] = {
+	0xD76AA478, 0xE8C7B756, 0x242070DB, 0xC1BDCEEE,
+	0xF57C0FAF, 0x4787C62A, 0xA8304613, 0xFD469501,
+	0x698098D8, 0x8B44F7AF, 0xFFFF5BB1, 0x895CD7BE,
+	0x6B901122, 0xFD987193, 0xA679438E, 0x49B40821,
+
+	0xF61E2562, 0xC040B340, 0x265E5A51, 0xE9B6C7AA,
+	0xD62F105D, 0x02441453, 0xD8A1E681, 0xE7D3FBC8,
+	0x21E1CDE6, 0xC33707D6, 0xF4D50D87, 0x455A14ED,
+	0xA9E3E905, 0xFCEFA3F8, 0x676F02D9, 0x8D2A4C8A,
+
+	0xFFFA3942, 0x8771F681, 0x6D9D6122, 0xFDE5380C,
+	0xA4BEEA44, 0x4BDECFA9, 0xF6BB4B60, 0xBEBFBC70,
+	0x289B7EC6, 0xEAA127FA, 0xD4EF3085, 0x04881D05,
+	0xD9D4D039, 0xE6DB99E5, 0x1FA27CF8, 0xC4AC5665,
+
+	0xF4292244, 0x432AFF97, 0xAB9423A7, 0xFC93A039,
+	0x655B59C3, 0x8F0CCC92, 0xFFEFF47D, 0x85845DD1,
+	0x6FA87E4F, 0xFE2CE6E0, 0xA3014314, 0x4E0811A1,
+	0xF7537E82, 0xBD3AF235, 0x2AD7D2BB, 0xEB86D391
+};
+
+static const unsigned char MP[48] = {
+	1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,
+	5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,
+	0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9
+};
+
+/* see inner.h */
+void
+br_md5_round(const unsigned char *buf, uint32_t *val)
+{
+	uint32_t m[16];
+	uint32_t a, b, c, d;
+	int i;
+
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	/* obsolete
+	for (i = 0; i < 16; i ++) {
+		m[i] = br_dec32le(buf + (i << 2));
+	}
+	*/
+	br_range_dec32le(m, 16, buf);
+
+	for (i = 0; i < 16; i += 4) {
+		a = b + ROTL(a + F(b, c, d) + m[i + 0] + K[i + 0],  7);
+		d = a + ROTL(d + F(a, b, c) + m[i + 1] + K[i + 1], 12);
+		c = d + ROTL(c + F(d, a, b) + m[i + 2] + K[i + 2], 17);
+		b = c + ROTL(b + F(c, d, a) + m[i + 3] + K[i + 3], 22);
+	}
+	for (i = 16; i < 32; i += 4) {
+		a = b + ROTL(a + G(b, c, d) + m[MP[i - 16]] + K[i + 0],  5);
+		d = a + ROTL(d + G(a, b, c) + m[MP[i - 15]] + K[i + 1],  9);
+		c = d + ROTL(c + G(d, a, b) + m[MP[i - 14]] + K[i + 2], 14);
+		b = c + ROTL(b + G(c, d, a) + m[MP[i - 13]] + K[i + 3], 20);
+	}
+	for (i = 32; i < 48; i += 4) {
+		a = b + ROTL(a + H(b, c, d) + m[MP[i - 16]] + K[i + 0],  4);
+		d = a + ROTL(d + H(a, b, c) + m[MP[i - 15]] + K[i + 1], 11);
+		c = d + ROTL(c + H(d, a, b) + m[MP[i - 14]] + K[i + 2], 16);
+		b = c + ROTL(b + H(c, d, a) + m[MP[i - 13]] + K[i + 3], 23);
+	}
+	for (i = 48; i < 64; i += 4) {
+		a = b + ROTL(a + I(b, c, d) + m[MP[i - 16]] + K[i + 0],  6);
+		d = a + ROTL(d + I(a, b, c) + m[MP[i - 15]] + K[i + 1], 10);
+		c = d + ROTL(c + I(d, a, b) + m[MP[i - 14]] + K[i + 2], 15);
+		b = c + ROTL(b + I(c, d, a) + m[MP[i - 13]] + K[i + 3], 21);
+	}
+
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+}
+
+/* see bearssl.h */
+void
+br_md5_init(br_md5_context *cc)
+{
+	cc->vtable = &br_md5_vtable;
+	memcpy(cc->val, br_md5_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_md5_update(br_md5_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_md5_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_md5_out(const br_md5_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val[4];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_md5_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64le(buf + 56, cc->count << 3);
+	br_md5_round(buf, val);
+	br_range_enc32le(dst, val, 4);
+}
+
+/* see bearssl.h */
+uint64_t
+br_md5_state(const br_md5_context *cc, void *dst)
+{
+	br_range_enc32le(dst, cc->val, 4);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_md5_set_state(br_md5_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32le(cc->val, 4, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_md5_vtable = {
+	sizeof(br_md5_context),
+	BR_HASHDESC_ID(br_md5_ID)
+		| BR_HASHDESC_OUT(16)
+		| BR_HASHDESC_STATE(16)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING,
+	(void (*)(const br_hash_class **))&br_md5_init,
+	(void (*)(const br_hash_class **, const void *, size_t))&br_md5_update,
+	(void (*)(const br_hash_class *const *, void *))&br_md5_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_md5_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_md5_set_state
+};
diff --git a/test/monniaux/BearSSL/src/hash/md5sha1.c b/test/monniaux/BearSSL/src/hash/md5sha1.c
new file mode 100644
index 00000000..f701aeed
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/md5sha1.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_md5sha1_init(br_md5sha1_context *cc)
+{
+	cc->vtable = &br_md5sha1_vtable;
+	memcpy(cc->val_md5, br_md5_IV, sizeof cc->val_md5);
+	memcpy(cc->val_sha1, br_sha1_IV, sizeof cc->val_sha1);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_update(br_md5sha1_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_md5_round(cc->buf, cc->val_md5);
+			br_sha1_round(cc->buf, cc->val_sha1);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_out(const br_md5sha1_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val_md5[4];
+	uint32_t val_sha1[5];
+	size_t ptr;
+	unsigned char *out;
+	uint64_t count;
+
+	count = cc->count;
+	ptr = (size_t)count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val_md5, cc->val_md5, sizeof val_md5);
+	memcpy(val_sha1, cc->val_sha1, sizeof val_sha1);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_md5_round(buf, val_md5);
+		br_sha1_round(buf, val_sha1);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	count <<= 3;
+	br_enc64le(buf + 56, count);
+	br_md5_round(buf, val_md5);
+	br_enc64be(buf + 56, count);
+	br_sha1_round(buf, val_sha1);
+	out = dst;
+	br_range_enc32le(out, val_md5, 4);
+	br_range_enc32be(out + 16, val_sha1, 5);
+}
+
+/* see bearssl.h */
+uint64_t
+br_md5sha1_state(const br_md5sha1_context *cc, void *dst)
+{
+	unsigned char *out;
+
+	out = dst;
+	br_range_enc32le(out, cc->val_md5, 4);
+	br_range_enc32be(out + 16, cc->val_sha1, 5);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_set_state(br_md5sha1_context *cc, const void *stb, uint64_t count)
+{
+	const unsigned char *buf;
+
+	buf = stb;
+	br_range_dec32le(cc->val_md5, 4, buf);
+	br_range_dec32be(cc->val_sha1, 5, buf + 16);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_md5sha1_vtable = {
+	sizeof(br_md5sha1_context),
+	BR_HASHDESC_ID(br_md5sha1_ID)
+		| BR_HASHDESC_OUT(36)
+		| BR_HASHDESC_STATE(36)
+		| BR_HASHDESC_LBLEN(6),
+	(void (*)(const br_hash_class **))&br_md5sha1_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_md5sha1_update,
+	(void (*)(const br_hash_class *const *, void *))
+		&br_md5sha1_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))
+		&br_md5sha1_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_md5sha1_set_state
+};
diff --git a/test/monniaux/BearSSL/src/hash/mgf1.c b/test/monniaux/BearSSL/src/hash/mgf1.c
new file mode 100644
index 00000000..7a235887
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/mgf1.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_mgf1_xor(void *data, size_t len,
+	const br_hash_class *dig, const void *seed, size_t seed_len)
+{
+	unsigned char *buf;
+	size_t u, hlen;
+	uint32_t c;
+
+	buf = data;
+	hlen = br_digest_size(dig);
+	for (u = 0, c = 0; u < len; u += hlen, c ++) {
+		br_hash_compat_context hc;
+		unsigned char tmp[64];
+		size_t v;
+
+		hc.vtable = dig;
+		dig->init(&hc.vtable);
+		dig->update(&hc.vtable, seed, seed_len);
+		br_enc32be(tmp, c);
+		dig->update(&hc.vtable, tmp, 4);
+		dig->out(&hc.vtable, tmp);
+		for (v = 0; v < hlen; v ++) {
+			if ((u + v) >= len) {
+				break;
+			}
+			buf[u + v] ^= tmp[v];
+		}
+	}
+}
diff --git a/test/monniaux/BearSSL/src/hash/multihash.c b/test/monniaux/BearSSL/src/hash/multihash.c
new file mode 100644
index 00000000..b6df2e0e
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/multihash.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * An aggregate context that is large enough for all supported hash
+ * functions.
+ */
+typedef union {
+	const br_hash_class *vtable;
+	br_md5_context md5;
+	br_sha1_context sha1;
+	br_sha224_context sha224;
+	br_sha256_context sha256;
+	br_sha384_context sha384;
+	br_sha512_context sha512;
+} gen_hash_context;
+
+/*
+ * Get the offset to the state for a specific hash function within the
+ * context structure. This shall be called only for the supported hash
+ * functions,
+ */
+static size_t
+get_state_offset(int id)
+{
+	if (id >= 5) {
+		/*
+		 * SHA-384 has id 5, and SHA-512 has id 6. Both use
+		 * eight 64-bit words for their state.
+		 */
+		return offsetof(br_multihash_context, val_64)
+			+ ((size_t)(id - 5) * (8 * sizeof(uint64_t)));
+	} else {
+		/*
+		 * MD5 has id 1, SHA-1 has id 2, SHA-224 has id 3 and
+		 * SHA-256 has id 4. They use 32-bit words for their
+		 * states (4 words for MD5, 5 for SHA-1, 8 for SHA-224
+		 * and 8 for SHA-256).
+		 */
+		unsigned x;
+
+		x = id - 1;
+		x = ((x + (x & (x >> 1))) << 2) + (x >> 1);
+		return offsetof(br_multihash_context, val_32)
+			+ x * sizeof(uint32_t);
+	}
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_zero(br_multihash_context *ctx)
+{
+	/*
+	 * This is not standard, but yields very short and efficient code,
+	 * and it works "everywhere".
+	 */
+	memset(ctx, 0, sizeof *ctx);
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_init(br_multihash_context *ctx)
+{
+	int i;
+
+	ctx->count = 0;
+	for (i = 1; i <= 6; i ++) {
+		const br_hash_class *hc;
+
+		hc = ctx->impl[i - 1];
+		if (hc != NULL) {
+			gen_hash_context g;
+
+			hc->init(&g.vtable);
+			hc->state(&g.vtable,
+				(unsigned char *)ctx + get_state_offset(i));
+		}
+	}
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_update(br_multihash_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)ctx->count & 127;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 128 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(ctx->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		ctx->count += (uint64_t)clen;
+		if (ptr == 128) {
+			int i;
+
+			for (i = 1; i <= 6; i ++) {
+				const br_hash_class *hc;
+
+				hc = ctx->impl[i - 1];
+				if (hc != NULL) {
+					gen_hash_context g;
+					unsigned char *state;
+
+					state = (unsigned char *)ctx
+						+ get_state_offset(i);
+					hc->set_state(&g.vtable,
+						state, ctx->count - 128);
+					hc->update(&g.vtable, ctx->buf, 128);
+					hc->state(&g.vtable, state);
+				}
+			}
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl_hash.h */
+size_t
+br_multihash_out(const br_multihash_context *ctx, int id, void *dst)
+{
+	const br_hash_class *hc;
+	gen_hash_context g;
+	const unsigned char *state;
+
+	hc = ctx->impl[id - 1];
+	if (hc == NULL) {
+		return 0;
+	}
+	state = (const unsigned char *)ctx + get_state_offset(id);
+	hc->set_state(&g.vtable, state, ctx->count & ~(uint64_t)127);
+	hc->update(&g.vtable, ctx->buf, ctx->count & (uint64_t)127);
+	hc->out(&g.vtable, dst);
+	return (hc->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+}
diff --git a/test/monniaux/BearSSL/src/hash/sha1.c b/test/monniaux/BearSSL/src/hash/sha1.c
new file mode 100644
index 00000000..4f65d846
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/sha1.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((B) ^ (C) ^ (D))
+#define H(B, C, D)     (((D) & (C)) | (((D) | (C)) & (B)))
+#define I(B, C, D)     G(B, C, D)
+
+#define ROTL(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+
+#define K1     ((uint32_t)0x5A827999)
+#define K2     ((uint32_t)0x6ED9EBA1)
+#define K3     ((uint32_t)0x8F1BBCDC)
+#define K4     ((uint32_t)0xCA62C1D6)
+
+/* see inner.h */
+const uint32_t br_sha1_IV[5] = {
+	0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0
+};
+
+/* see inner.h */
+void
+br_sha1_round(const unsigned char *buf, uint32_t *val)
+{
+	uint32_t m[80];
+	uint32_t a, b, c, d, e;
+	int i;
+
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	br_range_dec32be(m, 16, buf);
+	for (i = 16; i < 80; i ++) {
+		uint32_t x = m[i - 3] ^ m[i - 8] ^ m[i - 14] ^ m[i - 16];
+		m[i] = ROTL(x, 1);
+	}
+
+	for (i = 0; i < 20; i += 5) {
+		e += ROTL(a, 5) + F(b, c, d) + K1 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + F(a, b, c) + K1 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + F(e, a, b) + K1 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + F(d, e, a) + K1 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + F(c, d, e) + K1 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 20; i < 40; i += 5) {
+		e += ROTL(a, 5) + G(b, c, d) + K2 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + G(a, b, c) + K2 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + G(e, a, b) + K2 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + G(d, e, a) + K2 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + G(c, d, e) + K2 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 40; i < 60; i += 5) {
+		e += ROTL(a, 5) + H(b, c, d) + K3 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + H(a, b, c) + K3 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + H(e, a, b) + K3 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + H(d, e, a) + K3 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + H(c, d, e) + K3 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 60; i < 80; i += 5) {
+		e += ROTL(a, 5) + I(b, c, d) + K4 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + I(a, b, c) + K4 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + I(e, a, b) + K4 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + I(d, e, a) + K4 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + I(c, d, e) + K4 + m[i + 4]; c = ROTL(c, 30);
+	}
+
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+}
+
+/* see bearssl.h */
+void
+br_sha1_init(br_sha1_context *cc)
+{
+	cc->vtable = &br_sha1_vtable;
+	memcpy(cc->val, br_sha1_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha1_update(br_sha1_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_sha1_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_sha1_out(const br_sha1_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val[5];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_sha1_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64be(buf + 56, cc->count << 3);
+	br_sha1_round(buf, val);
+	br_range_enc32be(dst, val, 5);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha1_state(const br_sha1_context *cc, void *dst)
+{
+	br_range_enc32be(dst, cc->val, 5);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha1_set_state(br_sha1_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32be(cc->val, 5, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha1_vtable = {
+	sizeof(br_sha1_context),
+	BR_HASHDESC_ID(br_sha1_ID)
+		| BR_HASHDESC_OUT(20)
+		| BR_HASHDESC_STATE(20)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha1_init,
+	(void (*)(const br_hash_class **, const void *, size_t))&br_sha1_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha1_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha1_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha1_set_state
+};
diff --git a/test/monniaux/BearSSL/src/hash/sha2big.c b/test/monniaux/BearSSL/src/hash/sha2big.c
new file mode 100644
index 00000000..5be92ed5
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/sha2big.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR(x, n)    (((uint64_t)(x) << (64 - (n))) | ((uint64_t)(x) >> (n)))
+
+#define BSG5_0(x)      (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
+#define BSG5_1(x)      (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
+#define SSG5_0(x)      (ROTR(x, 1) ^ ROTR(x, 8) ^ (uint64_t)((x) >> 7))
+#define SSG5_1(x)      (ROTR(x, 19) ^ ROTR(x, 61) ^ (uint64_t)((x) >> 6))
+
+static const uint64_t IV384[8] = {
+	0xCBBB9D5DC1059ED8, 0x629A292A367CD507,
+	0x9159015A3070DD17, 0x152FECD8F70E5939,
+	0x67332667FFC00B31, 0x8EB44A8768581511,
+	0xDB0C2E0D64F98FA7, 0x47B5481DBEFA4FA4
+};
+
+static const uint64_t IV512[8] = {
+	0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
+	0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
+	0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
+	0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
+};
+
+static const uint64_t K[80] = {
+	0x428A2F98D728AE22, 0x7137449123EF65CD,
+	0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
+	0x3956C25BF348B538, 0x59F111F1B605D019,
+	0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
+	0xD807AA98A3030242, 0x12835B0145706FBE,
+	0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
+	0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
+	0x9BDC06A725C71235, 0xC19BF174CF692694,
+	0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
+	0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
+	0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
+	0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
+	0x983E5152EE66DFAB, 0xA831C66D2DB43210,
+	0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
+	0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
+	0x06CA6351E003826F, 0x142929670A0E6E70,
+	0x27B70A8546D22FFC, 0x2E1B21385C26C926,
+	0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
+	0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
+	0x81C2C92E47EDAEE6, 0x92722C851482353B,
+	0xA2BFE8A14CF10364, 0xA81A664BBC423001,
+	0xC24B8B70D0F89791, 0xC76C51A30654BE30,
+	0xD192E819D6EF5218, 0xD69906245565A910,
+	0xF40E35855771202A, 0x106AA07032BBD1B8,
+	0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
+	0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
+	0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
+	0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
+	0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
+	0x84C87814A1F0AB72, 0x8CC702081A6439EC,
+	0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
+	0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
+	0xCA273ECEEA26619C, 0xD186B8C721C0C207,
+	0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
+	0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
+	0x113F9804BEF90DAE, 0x1B710B35131C471B,
+	0x28DB77F523047D84, 0x32CAAB7B40C72493,
+	0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
+	0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
+	0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817
+};
+
+static void
+sha2big_round(const unsigned char *buf, uint64_t *val)
+{
+
+#define SHA2BIG_STEP(A, B, C, D, E, F, G, H, j)   do { \
+		uint64_t T1, T2; \
+		T1 = H + BSG5_1(E) + CH(E, F, G) + K[j] + w[j]; \
+		T2 = BSG5_0(A) + MAJ(A, B, C); \
+		D += T1; \
+		H = T1 + T2; \
+	} while (0)
+
+	int i;
+	uint64_t a, b, c, d, e, f, g, h;
+	uint64_t w[80];
+
+	br_range_dec64be(w, 16, buf);
+	for (i = 16; i < 80; i ++) {
+		w[i] = SSG5_1(w[i - 2]) + w[i - 7]
+			+ SSG5_0(w[i - 15]) + w[i - 16];
+	}
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	f = val[5];
+	g = val[6];
+	h = val[7];
+	for (i = 0; i < 80; i += 8) {
+		SHA2BIG_STEP(a, b, c, d, e, f, g, h, i + 0);
+		SHA2BIG_STEP(h, a, b, c, d, e, f, g, i + 1);
+		SHA2BIG_STEP(g, h, a, b, c, d, e, f, i + 2);
+		SHA2BIG_STEP(f, g, h, a, b, c, d, e, i + 3);
+		SHA2BIG_STEP(e, f, g, h, a, b, c, d, i + 4);
+		SHA2BIG_STEP(d, e, f, g, h, a, b, c, i + 5);
+		SHA2BIG_STEP(c, d, e, f, g, h, a, b, i + 6);
+		SHA2BIG_STEP(b, c, d, e, f, g, h, a, i + 7);
+	}
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+	val[5] += f;
+	val[6] += g;
+	val[7] += h;
+}
+
+static void
+sha2big_update(br_sha384_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 127;
+	cc->count += (uint64_t)len;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 128 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		if (ptr == 128) {
+			sha2big_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+static void
+sha2big_out(const br_sha384_context *cc, void *dst, int num)
+{
+	unsigned char buf[128];
+	uint64_t val[8];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 127;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 112) {
+		memset(buf + ptr, 0, 128 - ptr);
+		sha2big_round(buf, val);
+		memset(buf, 0, 112);
+	} else {
+		memset(buf + ptr, 0, 112 - ptr);
+	}
+	br_enc64be(buf + 112, cc->count >> 61);
+	br_enc64be(buf + 120, cc->count << 3);
+	sha2big_round(buf, val);
+	br_range_enc64be(dst, val, num);
+}
+
+/* see bearssl.h */
+void
+br_sha384_init(br_sha384_context *cc)
+{
+	cc->vtable = &br_sha384_vtable;
+	memcpy(cc->val, IV384, sizeof IV384);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha384_update(br_sha384_context *cc, const void *data, size_t len)
+{
+	sha2big_update(cc, data, len);
+}
+
+/* see bearssl.h */
+void
+br_sha384_out(const br_sha384_context *cc, void *dst)
+{
+	sha2big_out(cc, dst, 6);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha384_state(const br_sha384_context *cc, void *dst)
+{
+	br_range_enc64be(dst, cc->val, 8);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha384_set_state(br_sha384_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec64be(cc->val, 8, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+void
+br_sha512_init(br_sha512_context *cc)
+{
+	cc->vtable = &br_sha512_vtable;
+	memcpy(cc->val, IV512, sizeof IV512);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha512_out(const br_sha512_context *cc, void *dst)
+{
+	sha2big_out(cc, dst, 8);
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha384_vtable = {
+	sizeof(br_sha384_context),
+	BR_HASHDESC_ID(br_sha384_ID)
+		| BR_HASHDESC_OUT(48)
+		| BR_HASHDESC_STATE(64)
+		| BR_HASHDESC_LBLEN(7)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE
+		| BR_HASHDESC_MD_PADDING_128,
+	(void (*)(const br_hash_class **))&br_sha384_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_sha384_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha384_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha384_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha384_set_state
+};
+
+/* see bearssl.h */
+const br_hash_class br_sha512_vtable = {
+	sizeof(br_sha512_context),
+	BR_HASHDESC_ID(br_sha512_ID)
+		| BR_HASHDESC_OUT(64)
+		| BR_HASHDESC_STATE(64)
+		| BR_HASHDESC_LBLEN(7)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE
+		| BR_HASHDESC_MD_PADDING_128,
+	(void (*)(const br_hash_class **))&br_sha512_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_sha512_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha512_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha512_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha512_set_state
+};
diff --git a/test/monniaux/BearSSL/src/hash/sha2small.c b/test/monniaux/BearSSL/src/hash/sha2small.c
new file mode 100644
index 00000000..ca196559
--- /dev/null
+++ b/test/monniaux/BearSSL/src/hash/sha2small.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR(x, n)    (((uint32_t)(x) << (32 - (n))) | ((uint32_t)(x) >> (n)))
+
+#define BSG2_0(x)      (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define BSG2_1(x)      (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define SSG2_0(x)      (ROTR(x, 7) ^ ROTR(x, 18) ^ (uint32_t)((x) >> 3))
+#define SSG2_1(x)      (ROTR(x, 17) ^ ROTR(x, 19) ^ (uint32_t)((x) >> 10))
+
+/* see inner.h */
+const uint32_t br_sha224_IV[8] = {
+	0xC1059ED8, 0x367CD507, 0x3070DD17, 0xF70E5939,
+	0xFFC00B31, 0x68581511, 0x64F98FA7, 0xBEFA4FA4
+};
+
+/* see inner.h */
+const uint32_t br_sha256_IV[8] = {
+	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+};
+
+static const uint32_t K[64] = {
+	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
+	0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
+	0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
+	0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
+	0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
+	0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
+	0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
+	0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
+	0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
+	0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
+	0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
+	0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
+	0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
+	0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
+	0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
+	0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
+};
+
+/* see inner.h */
+void
+br_sha2small_round(const unsigned char *buf, uint32_t *val)
+{
+
+#define SHA2_STEP(A, B, C, D, E, F, G, H, j)   do { \
+		uint32_t T1, T2; \
+		T1 = H + BSG2_1(E) + CH(E, F, G) + K[j] + w[j]; \
+		T2 = BSG2_0(A) + MAJ(A, B, C); \
+		D += T1; \
+		H = T1 + T2; \
+	} while (0)
+
+	int i;
+	uint32_t a, b, c, d, e, f, g, h;
+	uint32_t w[64];
+
+	br_range_dec32be(w, 16, buf);
+	for (i = 16; i < 64; i ++) {
+		w[i] = SSG2_1(w[i - 2]) + w[i - 7]
+			+ SSG2_0(w[i - 15]) + w[i - 16];
+	}
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	f = val[5];
+	g = val[6];
+	h = val[7];
+	for (i = 0; i < 64; i += 8) {
+		SHA2_STEP(a, b, c, d, e, f, g, h, i + 0);
+		SHA2_STEP(h, a, b, c, d, e, f, g, i + 1);
+		SHA2_STEP(g, h, a, b, c, d, e, f, i + 2);
+		SHA2_STEP(f, g, h, a, b, c, d, e, i + 3);
+		SHA2_STEP(e, f, g, h, a, b, c, d, i + 4);
+		SHA2_STEP(d, e, f, g, h, a, b, c, i + 5);
+		SHA2_STEP(c, d, e, f, g, h, a, b, i + 6);
+		SHA2_STEP(b, c, d, e, f, g, h, a, i + 7);
+	}
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+	val[5] += f;
+	val[6] += g;
+	val[7] += h;
+
+#if 0
+/* obsolete */
+#define SHA2_MEXP1(pc)   do { \
+		W[pc] = br_dec32be(buf + ((pc) << 2)); \
+	} while (0)
+
+#define SHA2_MEXP2(pc)   do { \
+		W[(pc) & 0x0F] = SSG2_1(W[((pc) - 2) & 0x0F]) \
+			+ W[((pc) - 7) & 0x0F] \
+			+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]; \
+	} while (0)
+
+#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, pc)   do { \
+		uint32_t t1, t2; \
+		SHA2_MEXP ## n(pc); \
+		t1 = h + BSG2_1(e) + CH(e, f, g) \
+			+ K[pcount + (pc)] + W[(pc) & 0x0F]; \
+		t2 = BSG2_0(a) + MAJ(a, b, c); \
+		d += t1; \
+		h = t1 + t2; \
+	} while (0)
+
+#define SHA2_STEP1(a, b, c, d, e, f, g, h, pc) \
+	SHA2_STEPn(1, a, b, c, d, e, f, g, h, pc)
+#define SHA2_STEP2(a, b, c, d, e, f, g, h, pc) \
+	SHA2_STEPn(2, a, b, c, d, e, f, g, h, pc)
+
+	uint32_t A, B, C, D, E, F, G, H;
+	uint32_t W[16];
+	unsigned pcount;
+
+	A = val[0];
+	B = val[1];
+	C = val[2];
+	D = val[3];
+	E = val[4];
+	F = val[5];
+	G = val[6];
+	H = val[7];
+	pcount = 0;
+	SHA2_STEP1(A, B, C, D, E, F, G, H,  0);
+	SHA2_STEP1(H, A, B, C, D, E, F, G,  1);
+	SHA2_STEP1(G, H, A, B, C, D, E, F,  2);
+	SHA2_STEP1(F, G, H, A, B, C, D, E,  3);
+	SHA2_STEP1(E, F, G, H, A, B, C, D,  4);
+	SHA2_STEP1(D, E, F, G, H, A, B, C,  5);
+	SHA2_STEP1(C, D, E, F, G, H, A, B,  6);
+	SHA2_STEP1(B, C, D, E, F, G, H, A,  7);
+	SHA2_STEP1(A, B, C, D, E, F, G, H,  8);
+	SHA2_STEP1(H, A, B, C, D, E, F, G,  9);
+	SHA2_STEP1(G, H, A, B, C, D, E, F, 10);
+	SHA2_STEP1(F, G, H, A, B, C, D, E, 11);
+	SHA2_STEP1(E, F, G, H, A, B, C, D, 12);
+	SHA2_STEP1(D, E, F, G, H, A, B, C, 13);
+	SHA2_STEP1(C, D, E, F, G, H, A, B, 14);
+	SHA2_STEP1(B, C, D, E, F, G, H, A, 15);
+	for (pcount = 16; pcount < 64; pcount += 16) {
+		SHA2_STEP2(A, B, C, D, E, F, G, H,  0);
+		SHA2_STEP2(H, A, B, C, D, E, F, G,  1);
+		SHA2_STEP2(G, H, A, B, C, D, E, F,  2);
+		SHA2_STEP2(F, G, H, A, B, C, D, E,  3);
+		SHA2_STEP2(E, F, G, H, A, B, C, D,  4);
+		SHA2_STEP2(D, E, F, G, H, A, B, C,  5);
+		SHA2_STEP2(C, D, E, F, G, H, A, B,  6);
+		SHA2_STEP2(B, C, D, E, F, G, H, A,  7);
+		SHA2_STEP2(A, B, C, D, E, F, G, H,  8);
+		SHA2_STEP2(H, A, B, C, D, E, F, G,  9);
+		SHA2_STEP2(G, H, A, B, C, D, E, F, 10);
+		SHA2_STEP2(F, G, H, A, B, C, D, E, 11);
+		SHA2_STEP2(E, F, G, H, A, B, C, D, 12);
+		SHA2_STEP2(D, E, F, G, H, A, B, C, 13);
+		SHA2_STEP2(C, D, E, F, G, H, A, B, 14);
+		SHA2_STEP2(B, C, D, E, F, G, H, A, 15);
+	}
+	val[0] += A;
+	val[1] += B;
+	val[2] += C;
+	val[3] += D;
+	val[4] += E;
+	val[5] += F;
+	val[6] += G;
+	val[7] += H;
+#endif
+}
+
+static void
+sha2small_update(br_sha224_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	cc->count += (uint64_t)len;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		if (ptr == 64) {
+			br_sha2small_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+static void
+sha2small_out(const br_sha224_context *cc, void *dst, int num)
+{
+	unsigned char buf[64];
+	uint32_t val[8];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_sha2small_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64be(buf + 56, cc->count << 3);
+	br_sha2small_round(buf, val);
+	br_range_enc32be(dst, val, num);
+}
+
+/* see bearssl.h */
+void
+br_sha224_init(br_sha224_context *cc)
+{
+	cc->vtable = &br_sha224_vtable;
+	memcpy(cc->val, br_sha224_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha224_update(br_sha224_context *cc, const void *data, size_t len)
+{
+	sha2small_update(cc, data, len);
+}
+
+/* see bearssl.h */
+void
+br_sha224_out(const br_sha224_context *cc, void *dst)
+{
+	sha2small_out(cc, dst, 7);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha224_state(const br_sha224_context *cc, void *dst)
+{
+	br_range_enc32be(dst, cc->val, 8);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha224_set_state(br_sha224_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32be(cc->val, 8, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+void
+br_sha256_init(br_sha256_context *cc)
+{
+	cc->vtable = &br_sha256_vtable;
+	memcpy(cc->val, br_sha256_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha256_out(const br_sha256_context *cc, void *dst)
+{
+	sha2small_out(cc, dst, 8);
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha224_vtable = {
+	sizeof(br_sha224_context),
+	BR_HASHDESC_ID(br_sha224_ID)
+		| BR_HASHDESC_OUT(28)
+		| BR_HASHDESC_STATE(32)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha224_init,
+	(void (*)(const br_hash_class **,
+		const void *, size_t))&br_sha224_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha224_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha224_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha224_set_state
+};
+
+/* see bearssl.h */
+const br_hash_class br_sha256_vtable = {
+	sizeof(br_sha256_context),
+	BR_HASHDESC_ID(br_sha256_ID)
+		| BR_HASHDESC_OUT(32)
+		| BR_HASHDESC_STATE(32)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha256_init,
+	(void (*)(const br_hash_class **,
+		const void *, size_t))&br_sha256_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha256_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha256_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha256_set_state
+};