diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-02-15 21:50:45 +0100 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-02-15 21:50:45 +0100 |
commit | b0e23628f95845591f8ce697784beda13e3cf640 (patch) | |
tree | d5188eb50d94ef2799e9abe69c1e6f144a5640f8 /test | |
parent | 2524079a87d69c8c3574ef1e7bcc4df98b36011c (diff) | |
download | compcert-kvx-b0e23628f95845591f8ce697784beda13e3cf640.tar.gz compcert-kvx-b0e23628f95845591f8ce697784beda13e3cf640.zip |
double matrix multiplication
Diffstat (limited to 'test')
-rw-r--r-- | test/monniaux/float_mat/Makefile | 21 | ||||
-rw-r--r-- | test/monniaux/float_mat/float_mat.c | 182 | ||||
-rw-r--r-- | test/monniaux/float_mat/float_mat.h | 50 | ||||
-rw-r--r-- | test/monniaux/float_mat/float_mat_run.c | 118 |
4 files changed, 371 insertions, 0 deletions
diff --git a/test/monniaux/float_mat/Makefile b/test/monniaux/float_mat/Makefile new file mode 100644 index 00000000..0b66010a --- /dev/null +++ b/test/monniaux/float_mat/Makefile @@ -0,0 +1,21 @@ +include ../rules.mk + +PRODUCTS=float_mat.host float_mat.gcc.k1c.out float_mat.ccomp.k1c.out float_mat.ccomp.k1c.s float_mat.gcc.k1c.s float_mat.gcc.k1c float_mat.ccomp.k1c + +all: $(PRODUCTS) + +float_mat.host: float_mat.c float_mat_run.c float_mat.h + $(CC) $(CFLAGS) float_mat.c float_mat_run.c -o $@ + +float_mat.gcc.k1c.s float_mat.ccomp.k1c.s float_mat_run.gcc.k1c.s: float_mat.h + +float_mat.gcc.k1c: float_mat.gcc.k1c.o float_mat_run.gcc.k1c.o + $(K1C_CC) $(K1C_CFLAGS) $+ -o $@ + +float_mat.ccomp.k1c: float_mat.ccomp.k1c.o float_mat_run.gcc.k1c.o + $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@ + +clean: + $(RM) -f $(PRODUCTS) float_mat.gcc.k1c.o float_mat.ccomp.k1c.o float_mat_run.gcc.k1c.o + +.PHONY: clean diff --git a/test/monniaux/float_mat/float_mat.c b/test/monniaux/float_mat/float_mat.c new file mode 100644 index 00000000..818dbded --- /dev/null +++ b/test/monniaux/float_mat/float_mat.c @@ -0,0 +1,182 @@ +#include "float_mat.h" + +#define ADD += +#define MUL * + +void REAL_mat_mul1(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + c[i*stride_c+k] = 0; + } + } + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + for(unsigned j=0; j<n; j++) { + c[i*stride_c+k] ADD (a[i*stride_a+j] MUL b[j*stride_b+k]); + } + } + } +} + +void REAL_mat_mul2(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + REAL total = 0; + for(unsigned j=0; j<n; j++) { + total ADD (a[i*stride_a + j] MUL b[j*stride_b + k]); + } + c[i*stride_c+k] = total; + } + } +} + +void REAL_mat_mul3(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + REAL total0 = 0, total1 = 0; + unsigned j; + for(j=0; j+1<n; j+=2) { + total0 ADD (a[i*stride_a + j] MUL b[j*stride_b + k]); + total1 ADD (a[i*stride_a + (j+1)] MUL b[(j+1)*stride_b + k]); + } + if (j < n) { + total0 ADD a[i*stride_a + j] MUL b[j*stride_b + k]; + } + total0 ADD total1; + c[i*stride_c+k] = total0; + } + } +} + +void REAL_mat_mul4(unsigned m, unsigned n, unsigned p, + REAL * c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + const REAL *pa_i = a; + REAL * pc_i = c; + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + const REAL *pb_j_k = b+k, *pa_i_j = pa_i; + REAL total = 0; + for(unsigned j=0; j<n; j++) { + total ADD (*pa_i_j MUL *pb_j_k); + pa_i_j ++; + pb_j_k += stride_b; + } + pc_i[k] = total; + } + pa_i += stride_a; + pc_i += stride_c; + } +} + +void REAL_mat_mul5(unsigned m, unsigned n, unsigned p, + REAL * c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + const REAL *pa_i = a; + REAL * pc_i = c; + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + const REAL *pb_j_k = b+k, *pa_i_j = pa_i; + REAL total = 0; + for(unsigned j2=0, n2=n/2; j2<n2; j2++) { + REAL p0 = *pa_i_j MUL *pb_j_k; + pa_i_j ++; + pb_j_k += stride_b; + REAL p1 = *pa_i_j MUL *pb_j_k; + pa_i_j ++; + pb_j_k += stride_b; + total ADD p0; + total ADD p1; + } + if (n%2) { + total ADD *pa_i_j MUL *pb_j_k; + } + pc_i[k] = total; + } + pa_i += stride_a; + pc_i += stride_c; + } +} + +#define CHUNK \ + total ADD (*pa_i_j MUL *pb_j_k); \ + pa_i_j ++; \ + pb_j_k += stride_b; + +void REAL_mat_mul6(unsigned m, unsigned n, unsigned p, + REAL * c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + const REAL *pa_i = a; + REAL * pc_i = c; + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + const REAL *pb_j_k = b+k, *pa_i_j = pa_i; + REAL total = 0; + unsigned j2=0, n2=n/2; + if (n2 > 0) { + do { + CHUNK + CHUNK + j2++; + } while (j2 < n2); + } + if (n%2) { + total ADD (*pa_i_j MUL *pb_j_k); + } + pc_i[k] = total; + } + pa_i += stride_a; + pc_i += stride_c; + } +} + +#define UNROLL 4 +void REAL_mat_mul7(unsigned m, unsigned n, unsigned p, + REAL * c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + const REAL *pa_i = a; + REAL * pc_i = c; + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + const REAL *pb_j_k = b+k, *pa_i_j = pa_i; + REAL total = 0; + { + unsigned j4=0, n4=n/UNROLL; + if (n4 > 0) { + do { + CHUNK + CHUNK + CHUNK + CHUNK + j4++; + } while (j4 < n4); + } + } + { + unsigned j4=0, n4=n%UNROLL; + if (n4 > 0) { + do { + CHUNK + j4++; + } while (j4 < n4); + } + } + pc_i[k] = total; + } + pa_i += stride_a; + pc_i += stride_c; + } +} diff --git a/test/monniaux/float_mat/float_mat.h b/test/monniaux/float_mat/float_mat.h new file mode 100644 index 00000000..d0f48951 --- /dev/null +++ b/test/monniaux/float_mat/float_mat.h @@ -0,0 +1,50 @@ +#include <stdint.h> +#include <stdbool.h> + +typedef double REAL; + +void REAL_mat_mul1(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +void REAL_mat_mul2(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +void REAL_mat_mul3(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +void REAL_mat_mul4(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +void REAL_mat_mul5(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +void REAL_mat_mul6(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +void REAL_mat_mul7(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + +REAL REAL_random(void); + +void REAL_mat_random(unsigned m, + unsigned n, + REAL *a, unsigned stride_a); + +bool REAL_mat_equal(unsigned m, + unsigned n, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); diff --git a/test/monniaux/float_mat/float_mat_run.c b/test/monniaux/float_mat/float_mat_run.c new file mode 100644 index 00000000..cb5e2110 --- /dev/null +++ b/test/monniaux/float_mat/float_mat_run.c @@ -0,0 +1,118 @@ +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <inttypes.h> +#include "float_mat.h" +#include "../cycles.h" + +/* FIXME DMonniaux should be in the other but branches and float_of_int not implemented */ +bool REAL_mat_equal(unsigned m, + unsigned n, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + for(unsigned i=0; i<m; i++) { + for(unsigned j=0; j<n; j++) { + if (a[i*stride_a + j] != b[i*stride_b + j]) return false; + } + } + return true; +} + +REAL REAL_random(void) { + static uint64_t next = 1325997111; + next = next * 1103515249 + 12345; + return next % 1000; +} + +void REAL_mat_random(unsigned m, + unsigned n, + REAL *a, unsigned stride_a) { + for(unsigned i=0; i<m; i++) { + for(unsigned j=0; j<n; j++) { + a[i*stride_a + j] = REAL_random(); + } + } +} + +int main() { + const unsigned m = 60, n = 31, p = 50; + cycle_count_config(); + REAL *a = malloc(sizeof(REAL) * m * n); + REAL_mat_random(m, n, a, n); + REAL *b = malloc(sizeof(REAL) * n * p); + REAL_mat_random(n, p, b, p); + + REAL *c1 = malloc(sizeof(REAL) * m * p); + cycle_t c1_time = get_cycle(); + REAL_mat_mul1(m, n, p, c1, p, a, n, b, p); + c1_time = get_cycle()-c1_time; + + REAL *c2 = malloc(sizeof(REAL) * m * p); + cycle_t c2_time = get_cycle(); + REAL_mat_mul2(m, n, p, c2, p, a, n, b, p); + c2_time = get_cycle()-c2_time; + + REAL *c3 = malloc(sizeof(REAL) * m * p); + cycle_t c3_time = get_cycle(); + REAL_mat_mul3(m, n, p, c3, p, a, n, b, p); + c3_time = get_cycle()-c3_time; + + REAL *c4 = malloc(sizeof(REAL) * m * p); + cycle_t c4_time = get_cycle(); + REAL_mat_mul4(m, n, p, c4, p, a, n, b, p); + c4_time = get_cycle()-c4_time; + + REAL *c5 = malloc(sizeof(REAL) * m * p); + cycle_t c5_time = get_cycle(); + REAL_mat_mul5(m, n, p, c5, p, a, n, b, p); + c5_time = get_cycle()-c5_time; + + REAL *c6 = malloc(sizeof(REAL) * m * p); + cycle_t c6_time = get_cycle(); + REAL_mat_mul6(m, n, p, c6, p, a, n, b, p); + c6_time = get_cycle()-c6_time; + + REAL *c7 = malloc(sizeof(REAL) * m * p); + cycle_t c7_time = get_cycle(); + REAL_mat_mul7(m, n, p, c7, p, a, n, b, p); + c7_time = get_cycle()-c7_time; + + printf("c1==c2: %s\n" + "c1==c3: %s\n" + "c1==c4: %s\n" + "c1==c5: %s\n" + "c1==c6: %s\n" + "c1==c7: %s\n" + "c1_time = %" PRIu64 "\n" + "c2_time = %" PRIu64 "\n" + "c3_time = %" PRIu64 "\n" + "c4_time = %" PRIu64 "\n" + "c5_time = %" PRIu64 "\n" + "c6_time = %" PRIu64 "\n" + "c7_time = %" PRIu64 "\n", + + REAL_mat_equal(m, n, c1, p, c2, p)?"true":"false", + REAL_mat_equal(m, n, c1, p, c3, p)?"true":"false", + REAL_mat_equal(m, n, c1, p, c4, p)?"true":"false", + REAL_mat_equal(m, n, c1, p, c5, p)?"true":"false", + REAL_mat_equal(m, n, c1, p, c6, p)?"true":"false", + REAL_mat_equal(m, n, c1, p, c7, p)?"true":"false", + + c1_time, + c2_time, + c3_time, + c4_time, + c5_time, + c6_time, + c7_time); + + free(a); + free(b); + free(c1); + free(c2); + free(c3); + free(c4); + free(c5); + free(c6); + return 0; +} |