From b8a398d37d0c7dbc8068b0759f1fb3f9daa11a63 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 30 Jan 2019 12:54:34 +0100 Subject: bitsliced TEA from archived http://plaintext.crypto.lo.gy/article/378/untwisted-bit-sliced-tea-time --- test/monniaux/bitsliced-tea/Makefile | 5 + test/monniaux/bitsliced-tea/README.md | 3 + test/monniaux/bitsliced-tea/bstea.c | 368 +++++++++++++++++++++++++++ test/monniaux/bitsliced-tea/bstea.h | 147 +++++++++++ test/monniaux/bitsliced-tea/bstea_test.c | 124 +++++++++ test/monniaux/bitsliced-tea/bstea_wordsize.h | 16 ++ 6 files changed, 663 insertions(+) create mode 100644 test/monniaux/bitsliced-tea/Makefile create mode 100644 test/monniaux/bitsliced-tea/README.md create mode 100644 test/monniaux/bitsliced-tea/bstea.c create mode 100644 test/monniaux/bitsliced-tea/bstea.h create mode 100644 test/monniaux/bitsliced-tea/bstea_test.c create mode 100644 test/monniaux/bitsliced-tea/bstea_wordsize.h (limited to 'test/monniaux/bitsliced-tea') diff --git a/test/monniaux/bitsliced-tea/Makefile b/test/monniaux/bitsliced-tea/Makefile new file mode 100644 index 00000000..b904666c --- /dev/null +++ b/test/monniaux/bitsliced-tea/Makefile @@ -0,0 +1,5 @@ +all: + $(CC) -Wall -O2 -D__BSTEA_MAIN_ -o bstea_test bstea_test.c bstea.c + +clean: + rm -f bstea_test diff --git a/test/monniaux/bitsliced-tea/README.md b/test/monniaux/bitsliced-tea/README.md new file mode 100644 index 00000000..476880c2 --- /dev/null +++ b/test/monniaux/bitsliced-tea/README.md @@ -0,0 +1,3 @@ +Bit-sliced TEA encryption +from Alfonso De Gregorio, 06 December 2010 +https://web.archive.org/web/20131021214351/http://plaintext.crypto.lo.gy/article/378/untwisted-bit-sliced-tea-time diff --git a/test/monniaux/bitsliced-tea/bstea.c b/test/monniaux/bitsliced-tea/bstea.c new file mode 100644 index 00000000..c20c169c --- /dev/null +++ b/test/monniaux/bitsliced-tea/bstea.c @@ -0,0 +1,368 @@ +#include +#include +#include + +#include "bstea.h" + +/* a key schedule constant - 32/golden-ratio */ +static const uint32_t delta = 0x9e3779b9; + +/* v points to the wordsize-way vectorized plaintext, + * k to the vectorized key */ +/* input quantities are disposed in the following way: + v0 <- v[0..31] k0 <- k[0..31] k2 <- k[64..95] + v1 <- v[32..63] k1 <- k[32..63] k3 <- k[96..127] + */ +void encrypt(parallel_blocks_t v, const parallel_keys_t k, unsigned int r) +{ + /* Stride 32 between consecutive words in input quantities */ +# define offset_v0 0 +# define offset_v1 32 +# define offset_k0 0 +# define offset_k1 32 +# define offset_k2 64 +# define offset_k3 96 + + vector_width_t carry; + vector_width_t axorb; + vector_width_t aandb; + vector_width_t ai; + vector_width_t bi; + vector_width_t borrow; + vector_width_t notaandb; + + vector_width_t v1_lshift_4[32]; + vector_width_t v1_plus_sum[32]; /* term two */ + vector_width_t v1_rshift_5[32]; + vector_width_t v1_lshift_4_plus_k0[32]; /* term one */ + vector_width_t v1_rshift_5_plus_k1[32]; /* term three */ + + vector_width_t v0_lshift_4[32]; + vector_width_t v0_plus_sum[32]; /* term two */ + vector_width_t v0_rshift_5[32]; + vector_width_t v0_lshift_4_plus_k2[32]; /* term one */ + vector_width_t v0_rshift_5_plus_k3[32]; /* term three */ + + vector_width_t shift; + + int i; + + + /* setup */ + uint32_t sum = 0; + for (i = 0; i < 32; ++i) + v1_lshift_4[i] = v1_plus_sum[i] = v1_rshift_5[i] = \ + v1_lshift_4_plus_k0[i] = v1_rshift_5_plus_k1[i] = \ + v0_lshift_4[i] = v0_plus_sum[i] = v0_rshift_5[i] = \ + v0_lshift_4_plus_k2[i] = v0_rshift_5_plus_k3[i] = 0; + + + while (r > 0) { + sum += delta; + + /* lshift v1 by 4 */ + shift = 4; + for (i = 31; i >= 0; i--) + v1_lshift_4[i] = (i >= shift) ? v[offset_v1 + i - shift] : 0; + + /* add k0 to v1_lshift_4 */ + carry = 0; + for (i = 0;i < 32;++i) { + ai = v1_lshift_4[i]; + bi = k[offset_k0 + i]; + aandb = ai & bi; + axorb = ai ^ bi; + v1_lshift_4_plus_k0[i] = axorb ^ carry; + carry &= axorb; + carry |= aandb; + } + + /* add delta sum to v1 */ + carry = 0; + for (i = 0;i < 32;++i) { + /* VECTOR_AT_ONE where the ith bit of the sum is set */ + /* + * Each iteration follows the first 32 elements + * in the expansion of multiples of 32/golden-ratio, + * or 32/(1+sqrt(5)/2 + */ + ai = (sum & (1<= 0; i--) + v0_lshift_4[i] = (i >= shift) ? v[offset_v0 + i - shift] : 0; + + /* add k2 and v0_lshift_4 */ + carry = 0; + for (i = 0;i < 32;++i) { + ai = v0_lshift_4[i]; + bi = k[offset_k2 + i]; + aandb = ai & bi; + axorb = ai ^ bi; + v0_lshift_4_plus_k2[i] = axorb ^ carry; + carry &= axorb; + carry |= aandb; + } + + /* add delta sum to v0 */ + carry = 0; + for (i = 0;i < 32;++i) { + /* VECTOR_AT_ONE where the ith bit of the sum is set */ + ai = (sum & (1< 0) { + /* lshift v0 by 4 */ + shift = 4; + for (i = 31; i >= 0; i--) + v0_lshift_4[i] = (i >= shift) ? v[offset_v0 + i - shift] : 0; + + /* add k2 and v0_lshift_4 */ + carry = 0; + for (i = 0;i < 32;++i) { + ai = v0_lshift_4[i]; + bi = k[offset_k2 + i]; + aandb = ai & bi; + axorb = ai ^ bi; + v0_lshift_4_plus_k2[i] = axorb ^ carry; + carry &= axorb; + carry |= aandb; + } + + /* add delta sum to v0 */ + carry = 0; + for (i = 0;i < 32;++i) { + /* VECTOR_AT_ONE where the ith bit of the sum is set */ + ai = (sum & (1<= 0; i--) + v1_lshift_4[i] = (i >= shift) ? v[offset_v1 + i - shift] : 0; + + /* add k0 to v1_lshift_4 */ + carry = 0; + for (i = 0;i < 32;++i) { + ai = v1_lshift_4[i]; + bi = k[offset_k0 + i]; + aandb = ai & bi; + axorb = ai ^ bi; + v1_lshift_4_plus_k0[i] = axorb ^ carry; + carry &= axorb; + carry |= aandb; + } + + /* add delta sum to v1 */ + carry = 0; + for (i = 0;i < 32;++i) { + /* VECTOR_AT_ONE where the ith bit of the sum is set */ + ai = (sum & (1< +#include + +#include "bstea_wordsize.h" + +#define TEA_ROUNDS 32 + +#define TEA_BLOCK_SIZE 64 +#define TEA_KEY_SIZE 128 + +#if __BSTEA_WORDSIZE == 64 +typedef uint64_t vector_width_t; /* 64-way bit-level vectorization */ +#define VECTOR_AT_ONE 0xffffffffffffffff +#define VECTOR_AT_ZERO 0x0000000000000000 +#elif __BSTEA_WORDSIZE == 32 +typedef uint32_t vector_width_t; /* 32-way bit-level vectorization */ +#define VECTOR_AT_ONE 0xffffffff +#define VECTOR_AT_ZERO 0x00000000 +#elif __BSTEA_WORDSIZE == 16 +typedef uint32_t vector_width_t; /* 16-way bit-level vectorization */ +#define VECTOR_AT_ONE 0xffff +#define VECTOR_AT_ZERO 0x0000 +#elif __BSTEA_WORDSIZE == 8 +typedef uint32_t vector_width_t; /* 8-way bit-level vectorization */ +#define VECTOR_AT_ONE 0xff +#define VECTOR_AT_ZERO 0x00 +#else +typedef unsigned long int vector_width_t; /* word-way bit-level vectorization */ +#define VECTOR_AT_ONE ULONG_MAX +#define VECTOR_AT_ZERO 0 +#endif + + +typedef vector_width_t parallel_blocks_t[TEA_BLOCK_SIZE]; +typedef vector_width_t parallel_keys_t[TEA_KEY_SIZE]; + + +/* __P is a macro used to wrap function prototypes, so that compilers + that don't understand ANSI C prototypes still work, and ANSI C + compilers can issue warnings about type mismatches. */ +#undef __P +#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(WIN32) || defined(__cplusplus) + # define __P(protos) protos +#else + # define __P(protos) () +#endif + + +/* __BEGIN_DECLS should be used at the beginning of your declarations, + so that C++ compilers don't mangle their names. Use __END_DECLS at + the end of C declarations. */ +#undef __BEGIN_DECLS +#undef __END_DECLS +#ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# define __END_DECLS } +#else +# define __BEGIN_DECLS /* empty */ +# define __END_DECLS /* empty */ +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + + +#if defined(WINDOWS) || defined(WIN32) +# ifdef BSTEA_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef BSTEA_INTERNAL +# define BSTEA_EXTERN extern __declspec(dllexport) +# else +# define BSTEA_EXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* BSTEA_DLL */ + + /* If building or using bstea with the WINAPI/WINAPIV calling convention, + * define BSTEA_WINAPI. + * Caution: the standard BSTEA.DLL is NOT compiled using BSTEA_WINAPI. + */ +# ifdef BSTEA_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use BSTEA_LIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define BSTEA_EXPORT WINAPI +# ifdef WIN32 +# define BSTEA_EXPORTVA WINAPIV +# else +# define BSTEA_EXPORTVA FAR CDECL +# endif +# endif + +#else +# include +#endif + +#ifndef BSTEA_EXTERN +# define BSTEA_EXTERN extern +#endif +#ifndef BSTEA_EXPORT +# define BSTEA_EXPORT +#endif +#ifndef BSTEA_EXPORTVA +# define BSTEA_EXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +__BEGIN_DECLS + +BSTEA_EXTERN void encrypt __P((parallel_blocks_t, const parallel_keys_t, unsigned int)); +BSTEA_EXTERN void decrypt __P((parallel_blocks_t, const parallel_keys_t, unsigned int)); + +__END_DECLS + +#endif /* __BSTEA_H */ diff --git a/test/monniaux/bitsliced-tea/bstea_test.c b/test/monniaux/bitsliced-tea/bstea_test.c new file mode 100644 index 00000000..85c750bd --- /dev/null +++ b/test/monniaux/bitsliced-tea/bstea_test.c @@ -0,0 +1,124 @@ +#include +#include +#include + +#include "bstea.h" + + + +/* pack and unpack a single value all over the data path */ +static void pack(uint32_t *v, size_t len, vector_width_t *bv) { + size_t i, p, offset = 0; + + for (i=0; i>5] |= 1<<(i%32); +} + +/* pack and unpack one element at a time */ +static void pack_elem(uint32_t *v, size_t len, vector_width_t *bv, int elem) { + size_t i, p, offset = 0; + + for (i=0; i>5] |= 1<<(i%32); +} + +typedef struct tvector_s { + uint32_t ptext[TEA_BLOCK_SIZE >> 5]; + uint32_t ctext[TEA_BLOCK_SIZE >> 5]; + uint32_t key[TEA_KEY_SIZE >> 5]; +} tvector_t; + +static void test_vectors() { + int i, j; + parallel_blocks_t v; + parallel_keys_t k; + uint32_t ctext[TEA_BLOCK_SIZE >> 5]; + uint32_t ptext[TEA_BLOCK_SIZE >> 5]; + uint32_t key[TEA_KEY_SIZE >> 5]; + + tvector_t testv [] = { { {0x00000000, 0x00000000}, \ + {0x41ea3a0a, 0x94baa940}, \ + {0x00000000, 0x00000000, \ + 0x00000000, 0x00000000} }, \ + { {0x74736574, 0x2e656d20}, \ + {0x6a2a5d77, 0x0992cef6}, \ + {0x6805022b, 0x76491406, \ + 0x260e5d77, 0x4378286c} }, \ + { {0x94baa940, 0x00000000}, \ + {0x4e8e7829, 0x7d8236d8}, \ + {0x00000000, 0x00000000, \ + 0x00000000, 0x41ea3a0a} }, \ + { {0x7d8236d8, 0x00000000}, \ + {0xc88ba95e, 0xe7edac02}, \ + {0x00000000, 0x00000000, \ + 0x41ea3a0a, 0x4e8e7829} } }; + + for (i = 0; i < sizeof(testv)/sizeof(tvector_t); ++i) { + for (j = 0;j < TEA_BLOCK_SIZE;++j) v[j] = 0; + for (j = 0;j < TEA_KEY_SIZE;++j) k[j] = 0; + + (void) memset(&ctext, 0, 8); + (void) memset(&ptext, 0, 8); + (void) memset(&key, 0, 16); + + pack(testv[i].ptext, 2, v); + pack(testv[i].key, 4, k); + + encrypt(v,k,TEA_ROUNDS); + + unpack(v, 64, ctext); + + decrypt(v,k,TEA_ROUNDS); + + unpack(v, 64, ptext); + unpack(k, 128, key); + +#if 0 + printf("key[0]: 0x%8x, key[1]: 0x%8x, key[2]: 0x%8x, key[3]: 0x%8x\n", key[0], key[1], key[2], key[3]); + printf("ctext[0]: 0x%8x, ctext[1]: 0x%8x\n", ctext[0], ctext[1]); + printf("ptext[0]: 0x%8x, ptext[1]: 0x%8x\n", ptext[0], ptext[1]); + + printf("t1_ctext[0]: 0x%8x, t1_ctext[1]: 0x%8x\n", testv[i].ctext[0], testv[i].ctext[1]); + printf("t1_ptext[0]: 0x%8x, t1_ptext[1]: 0x%8x\n\n\n", testv[i].ptext[0], testv[i].ptext[1]); +#endif + + assert(testv[i].ctext[0] == ctext[0] \ + && testv[i].ctext[1] == ctext[1]); + assert(testv[i].ptext[0] == ptext[0] \ + && testv[i].ptext[1] == ptext[1]); + assert(testv[i].key[0] == key[0] \ + && testv[i].key[1] == key[1] \ + && testv[i].key[2] == key[2] \ + && testv[i].key[3] == key[3] ); + + printf("test vector, %i,\t[PASSED]\n", i); + } +} + + +#ifdef __BSTEA_MAIN_ + +int main(int argc, char *argv[]) { + test_vectors(); + + return 0; +} + +#endif /* __BSTEA_MAIN_ */ diff --git a/test/monniaux/bitsliced-tea/bstea_wordsize.h b/test/monniaux/bitsliced-tea/bstea_wordsize.h new file mode 100644 index 00000000..5381e17c --- /dev/null +++ b/test/monniaux/bitsliced-tea/bstea_wordsize.h @@ -0,0 +1,16 @@ +#ifndef __BSTEA_WORDSIZE_H +#define __BSTEA_WORDSIZE_H + +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __x86_64__ || defined __amd64__ || defined __x86_64 || \ + defined __amd64 || defined _M_X64 || defined __ia64__ || \ + defined __ia64__ || defined __IA64__ || defined __ia64 || \ + defined _M_IA64 +# define __BSTEA_WORDSIZE 64 +#else +# define __BSTEA_WORDSIZE 32 +#endif + + +#endif /* __BSTEA_WORDSIZE_H */ -- cgit