diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-02-19 12:48:00 +0100 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-02-19 12:48:00 +0100 |
commit | 3f39d34b6f711d8dde42b0d48889c6d4b67ce541 (patch) | |
tree | 35757cb047a5fbb9ef6884150108840f54762d48 /test/monniaux | |
parent | c4296102ae17e434279ed82df0471b7c50ab2f51 (diff) | |
download | compcert-kvx-3f39d34b6f711d8dde42b0d48889c6d4b67ce541.tar.gz compcert-kvx-3f39d34b6f711d8dde42b0d48889c6d4b67ce541.zip |
mul8: loop-invariant code motion
Diffstat (limited to 'test/monniaux')
-rw-r--r-- | test/monniaux/float_mat/float_mat.c | 46 | ||||
-rw-r--r-- | test/monniaux/float_mat/float_mat.h | 7 | ||||
-rw-r--r-- | test/monniaux/float_mat/float_mat_run.c | 13 |
3 files changed, 63 insertions, 3 deletions
diff --git a/test/monniaux/float_mat/float_mat.c b/test/monniaux/float_mat/float_mat.c index 818dbded..45612635 100644 --- a/test/monniaux/float_mat/float_mat.c +++ b/test/monniaux/float_mat/float_mat.c @@ -1,4 +1,5 @@ #include "float_mat.h" +#include <stddef.h> #define ADD += #define MUL * @@ -180,3 +181,48 @@ void REAL_mat_mul7(unsigned m, unsigned n, unsigned p, pc_i += stride_c; } } + +#undef CHUNK +#define CHUNK \ + total ADD (*pa_i_j MUL *pb_j_k); \ + pa_i_j ++; \ + pb_j_k = (REAL*) ((char*) pb_j_k + stride_b_scaled); + +void REAL_mat_mul8(unsigned m, unsigned n, unsigned p, + REAL * c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b) { + const REAL *pa_i = a; + REAL * pc_i = c; + size_t stride_b_scaled = sizeof(REAL) * stride_b; + for(unsigned i=0; i<m; i++) { + for(unsigned k=0; k<p; k++) { + const REAL *pb_j_k = b+k, *pa_i_j = pa_i; + REAL total = 0; + { + unsigned j4=0, n4=n/UNROLL; + if (n4 > 0) { + do { + CHUNK + CHUNK + CHUNK + CHUNK + j4++; + } while (j4 < n4); + } + } + { + unsigned j4=0, n4=n%UNROLL; + if (n4 > 0) { + do { + CHUNK + j4++; + } while (j4 < n4); + } + } + pc_i[k] = total; + } + pa_i += stride_a; + pc_i += stride_c; + } +} diff --git a/test/monniaux/float_mat/float_mat.h b/test/monniaux/float_mat/float_mat.h index 03a25036..3b787b1f 100644 --- a/test/monniaux/float_mat/float_mat.h +++ b/test/monniaux/float_mat/float_mat.h @@ -1,7 +1,7 @@ #include <stdint.h> #include <stdbool.h> -typedef float REAL; +typedef double REAL; void REAL_mat_mul1(unsigned m, unsigned n, unsigned p, REAL * restrict c, unsigned stride_c, @@ -38,6 +38,11 @@ void REAL_mat_mul7(unsigned m, unsigned n, unsigned p, const REAL *a, unsigned stride_a, const REAL *b, unsigned stride_b); +void REAL_mat_mul8(unsigned m, unsigned n, unsigned p, + REAL * restrict c, unsigned stride_c, + const REAL *a, unsigned stride_a, + const REAL *b, unsigned stride_b); + REAL REAL_random(void); void REAL_mat_random(unsigned m, diff --git a/test/monniaux/float_mat/float_mat_run.c b/test/monniaux/float_mat/float_mat_run.c index 448bb8a5..f5dfa0af 100644 --- a/test/monniaux/float_mat/float_mat_run.c +++ b/test/monniaux/float_mat/float_mat_run.c @@ -81,19 +81,26 @@ int main() { REAL_mat_mul7(m, n, p, c7, p, a, n, b, p); c7_time = get_cycle()-c7_time; + REAL *c8 = malloc(sizeof(REAL) * m * p); + cycle_t c8_time = get_cycle(); + REAL_mat_mul8(m, n, p, c8, p, a, n, b, p); + c8_time = get_cycle()-c8_time; + printf("c1==c2: %s\n" "c1==c3: %s\n" "c1==c4: %s\n" "c1==c5: %s\n" "c1==c6: %s\n" "c1==c7: %s\n" + "c1==c8: %s\n" "c1_time = %" PRIu64 "\n" "c2_time = %" PRIu64 "\n" "c3_time = %" PRIu64 "\n" "c4_time = %" PRIu64 "\n" "c5_time = %" PRIu64 "\n" "c6_time = %" PRIu64 "\n" - "c7_time = %" PRIu64 "\n", + "c7_time = %" PRIu64 "\n" + "c8_time = %" PRIu64 "\n", REAL_mat_equal(m, n, c1, p, c2, p)?"true":"false", REAL_mat_equal(m, n, c1, p, c3, p)?"true":"false", @@ -101,6 +108,7 @@ int main() { REAL_mat_equal(m, n, c1, p, c5, p)?"true":"false", REAL_mat_equal(m, n, c1, p, c6, p)?"true":"false", REAL_mat_equal(m, n, c1, p, c7, p)?"true":"false", + REAL_mat_equal(m, n, c1, p, c8, p)?"true":"false", c1_time, c2_time, @@ -108,7 +116,8 @@ int main() { c4_time, c5_time, c6_time, - c7_time); + c7_time, + c8_time); free(a); free(b); |