aboutsummaryrefslogtreecommitdiffstats
path: root/test/monniaux/xor_and_mat
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-03-03 08:17:40 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-03-03 08:17:40 +0100
commit1ab7b51c30e1b10ac45b0bd64cefdc01da0f7f68 (patch)
tree210ffc156c83f04fb0c61a40b4f9037d7ba8a7e1 /test/monniaux/xor_and_mat
parent222c9047d61961db9c6b19fed5ca49829223fd33 (diff)
parent12be46d59a2483a10d77fa8ee67f7e0ca1bd702f (diff)
downloadcompcert-kvx-1ab7b51c30e1b10ac45b0bd64cefdc01da0f7f68.tar.gz
compcert-kvx-1ab7b51c30e1b10ac45b0bd64cefdc01da0f7f68.zip
Merge branch 'mppa-cse2' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into mppa-work
Diffstat (limited to 'test/monniaux/xor_and_mat')
-rw-r--r--test/monniaux/xor_and_mat/Makefile4
-rw-r--r--test/monniaux/xor_and_mat/int_mat.c210
-rw-r--r--test/monniaux/xor_and_mat/int_mat_run.c90
-rw-r--r--test/monniaux/xor_and_mat/xor_and.h50
4 files changed, 354 insertions, 0 deletions
diff --git a/test/monniaux/xor_and_mat/Makefile b/test/monniaux/xor_and_mat/Makefile
new file mode 100644
index 00000000..e6c4db77
--- /dev/null
+++ b/test/monniaux/xor_and_mat/Makefile
@@ -0,0 +1,4 @@
+TARGET=xor_and_mat
+MEASURES="c1 c2 c3 c4 c5 c6 c7"
+
+include ../rules.mk
diff --git a/test/monniaux/xor_and_mat/int_mat.c b/test/monniaux/xor_and_mat/int_mat.c
new file mode 100644
index 00000000..0bfe16b9
--- /dev/null
+++ b/test/monniaux/xor_and_mat/int_mat.c
@@ -0,0 +1,210 @@
+#include "xor_and.h"
+
+#define ADD ^=
+#define MUL &
+
+void xor_and_mat_mul1(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ c[i*stride_c+k] = 0;
+ }
+ }
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ for(unsigned j=0; j<n; j++) {
+ c[i*stride_c+k] ADD (a[i*stride_a+j] MUL b[j*stride_b+k]);
+ }
+ }
+ }
+}
+
+void xor_and_mat_mul2(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ xor_and total = 0;
+ for(unsigned j=0; j<n; j++) {
+ total ADD (a[i*stride_a + j] MUL b[j*stride_b + k]);
+ }
+ c[i*stride_c+k] = total;
+ }
+ }
+}
+
+void xor_and_mat_mul3(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ xor_and total0 = 0, total1 = 0;
+ unsigned j;
+ for(j=0; j+1<n; j+=2) {
+ total0 ADD (a[i*stride_a + j] MUL b[j*stride_b + k]);
+ total1 ADD (a[i*stride_a + (j+1)] MUL b[(j+1)*stride_b + k]);
+ }
+ if (j < n) {
+ total0 ADD a[i*stride_a + j] MUL b[j*stride_b + k];
+ }
+ total0 ADD total1;
+ c[i*stride_c+k] = total0;
+ }
+ }
+}
+
+void xor_and_mat_mul4(unsigned m, unsigned n, unsigned p,
+ xor_and * c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ const xor_and *pa_i = a;
+ xor_and * pc_i = c;
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ const xor_and *pb_j_k = b+k, *pa_i_j = pa_i;
+ xor_and total = 0;
+ for(unsigned j=0; j<n; j++) {
+ total ADD (*pa_i_j MUL *pb_j_k);
+ pa_i_j ++;
+ pb_j_k += stride_b;
+ }
+ pc_i[k] = total;
+ }
+ pa_i += stride_a;
+ pc_i += stride_c;
+ }
+}
+
+void xor_and_mat_mul5(unsigned m, unsigned n, unsigned p,
+ xor_and * c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ const xor_and *pa_i = a;
+ xor_and * pc_i = c;
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ const xor_and *pb_j_k = b+k, *pa_i_j = pa_i;
+ xor_and total = 0;
+ for(unsigned j2=0, n2=n/2; j2<n2; j2++) {
+ xor_and p0 = *pa_i_j MUL *pb_j_k;
+ pa_i_j ++;
+ pb_j_k += stride_b;
+ xor_and p1 = *pa_i_j MUL *pb_j_k;
+ pa_i_j ++;
+ pb_j_k += stride_b;
+ total ADD p0;
+ total ADD p1;
+ }
+ if (n%2) {
+ total ADD *pa_i_j MUL *pb_j_k;
+ }
+ pc_i[k] = total;
+ }
+ pa_i += stride_a;
+ pc_i += stride_c;
+ }
+}
+
+#define CHUNK \
+ total ADD (*pa_i_j MUL *pb_j_k); \
+ pa_i_j ++; \
+ pb_j_k += stride_b;
+
+void xor_and_mat_mul6(unsigned m, unsigned n, unsigned p,
+ xor_and * c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ const xor_and *pa_i = a;
+ xor_and * pc_i = c;
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ const xor_and *pb_j_k = b+k, *pa_i_j = pa_i;
+ xor_and total = 0;
+ unsigned j2=0, n2=n/2;
+ if (n2 > 0) {
+ do {
+ CHUNK
+ CHUNK
+ j2++;
+ } while (j2 < n2);
+ }
+ if (n%2) {
+ total ADD (*pa_i_j MUL *pb_j_k);
+ }
+ pc_i[k] = total;
+ }
+ pa_i += stride_a;
+ pc_i += stride_c;
+ }
+}
+
+#define UNROLL 4
+void xor_and_mat_mul7(unsigned m, unsigned n, unsigned p,
+ xor_and * c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ const xor_and *pa_i = a;
+ xor_and * pc_i = c;
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ const xor_and *pb_j_k = b+k, *pa_i_j = pa_i;
+ xor_and total = 0;
+ {
+ unsigned j4=0, n4=n/UNROLL;
+ if (n4 > 0) {
+ do {
+ CHUNK
+ CHUNK
+ CHUNK
+ CHUNK
+ j4++;
+ } while (j4 < n4);
+ }
+ }
+ {
+ unsigned j4=0, n4=n%UNROLL;
+ if (n4 > 0) {
+ do {
+ CHUNK
+ j4++;
+ } while (j4 < n4);
+ }
+ }
+ pc_i[k] = total;
+ }
+ pa_i += stride_a;
+ pc_i += stride_c;
+ }
+}
+
+xor_and xor_and_random(void) {
+ static uint64_t next = 1325997111;
+ next = next * 1103515249 + 12345;
+ return next;
+}
+
+void xor_and_mat_random(unsigned m,
+ unsigned n,
+ xor_and *a, unsigned stride_a) {
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned j=0; j<n; j++) {
+ a[i*stride_a + j] = xor_and_random();
+ }
+ }
+}
+
+bool xor_and_mat_equal(unsigned m,
+ unsigned n,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b) {
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned j=0; j<n; j++) {
+ if (a[i*stride_a + j] != b[i*stride_b + j]) return false;
+ }
+ }
+ return true;
+}
diff --git a/test/monniaux/xor_and_mat/int_mat_run.c b/test/monniaux/xor_and_mat/int_mat_run.c
new file mode 100644
index 00000000..a6a821d9
--- /dev/null
+++ b/test/monniaux/xor_and_mat/int_mat_run.c
@@ -0,0 +1,90 @@
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include "xor_and.h"
+#include "../clock.h"
+
+int main() {
+ const unsigned m = 60, n = 31, p = 50;
+ clock_prepare();
+ xor_and *a = malloc(sizeof(xor_and) * m * n);
+ xor_and_mat_random(m, n, a, n);
+ xor_and *b = malloc(sizeof(xor_and) * n * p);
+ xor_and_mat_random(n, p, b, p);
+
+ xor_and *c1 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c1_time = get_current_cycle();
+ xor_and_mat_mul1(m, n, p, c1, p, a, n, b, p);
+ c1_time = get_current_cycle()-c1_time;
+
+ xor_and *c2 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c2_time = get_current_cycle();
+ xor_and_mat_mul2(m, n, p, c2, p, a, n, b, p);
+ c2_time = get_current_cycle()-c2_time;
+
+ xor_and *c3 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c3_time = get_current_cycle();
+ xor_and_mat_mul3(m, n, p, c3, p, a, n, b, p);
+ c3_time = get_current_cycle()-c3_time;
+
+ xor_and *c4 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c4_time = get_current_cycle();
+ xor_and_mat_mul4(m, n, p, c4, p, a, n, b, p);
+ c4_time = get_current_cycle()-c4_time;
+
+ xor_and *c5 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c5_time = get_current_cycle();
+ xor_and_mat_mul5(m, n, p, c5, p, a, n, b, p);
+ c5_time = get_current_cycle()-c5_time;
+
+ xor_and *c6 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c6_time = get_current_cycle();
+ xor_and_mat_mul6(m, n, p, c6, p, a, n, b, p);
+ c6_time = get_current_cycle()-c6_time;
+
+ xor_and *c7 = malloc(sizeof(xor_and) * m * p);
+ cycle_t c7_time = get_current_cycle();
+ xor_and_mat_mul7(m, n, p, c7, p, a, n, b, p);
+ c7_time = get_current_cycle()-c7_time;
+
+ printf("c1==c2: %s\n"
+ "c1==c3: %s\n"
+ "c1==c4: %s\n"
+ "c1==c5: %s\n"
+ "c1==c6: %s\n"
+ "c1==c7: %s\n"
+ "c1 cycles: %" PRIu64 "\n"
+ "c2 cycles: %" PRIu64 "\n"
+ "c3 cycles: %" PRIu64 "\n"
+ "c4 cycles: %" PRIu64 "\n"
+ "c5 cycles: %" PRIu64 "\n"
+ "c6 cycles: %" PRIu64 "\n"
+ "c7 cycles: %" PRIu64 "\n",
+
+ xor_and_mat_equal(m, n, c1, p, c2, p)?"true":"false",
+ xor_and_mat_equal(m, n, c1, p, c3, p)?"true":"false",
+ xor_and_mat_equal(m, n, c1, p, c4, p)?"true":"false",
+ xor_and_mat_equal(m, n, c1, p, c5, p)?"true":"false",
+ xor_and_mat_equal(m, n, c1, p, c6, p)?"true":"false",
+ xor_and_mat_equal(m, n, c1, p, c7, p)?"true":"false",
+
+ c1_time,
+ c2_time,
+ c3_time,
+ c4_time,
+ c5_time,
+ c6_time,
+ c7_time);
+
+ free(a);
+ free(b);
+ free(c1);
+ free(c2);
+ free(c3);
+ free(c4);
+ free(c5);
+ free(c6);
+ free(c7);
+ return 0;
+}
diff --git a/test/monniaux/xor_and_mat/xor_and.h b/test/monniaux/xor_and_mat/xor_and.h
new file mode 100644
index 00000000..956ad800
--- /dev/null
+++ b/test/monniaux/xor_and_mat/xor_and.h
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+
+typedef uint64_t xor_and;
+
+void xor_and_mat_mul1(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+void xor_and_mat_mul2(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+void xor_and_mat_mul3(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+void xor_and_mat_mul4(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+void xor_and_mat_mul5(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+void xor_and_mat_mul6(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+void xor_and_mat_mul7(unsigned m, unsigned n, unsigned p,
+ xor_and * restrict c, unsigned stride_c,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);
+
+xor_and xor_and_random(void);
+
+void xor_and_mat_random(unsigned m,
+ unsigned n,
+ xor_and *a, unsigned stride_a);
+
+bool xor_and_mat_equal(unsigned m,
+ unsigned n,
+ const xor_and *a, unsigned stride_a,
+ const xor_and *b, unsigned stride_b);