aboutsummaryrefslogtreecommitdiffstats
path: root/test/monniaux/float_mat
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-02-19 12:48:00 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-02-19 12:48:00 +0100
commit3f39d34b6f711d8dde42b0d48889c6d4b67ce541 (patch)
tree35757cb047a5fbb9ef6884150108840f54762d48 /test/monniaux/float_mat
parentc4296102ae17e434279ed82df0471b7c50ab2f51 (diff)
downloadcompcert-kvx-3f39d34b6f711d8dde42b0d48889c6d4b67ce541.tar.gz
compcert-kvx-3f39d34b6f711d8dde42b0d48889c6d4b67ce541.zip
mul8: loop-invariant code motion
Diffstat (limited to 'test/monniaux/float_mat')
-rw-r--r--test/monniaux/float_mat/float_mat.c46
-rw-r--r--test/monniaux/float_mat/float_mat.h7
-rw-r--r--test/monniaux/float_mat/float_mat_run.c13
3 files changed, 63 insertions, 3 deletions
diff --git a/test/monniaux/float_mat/float_mat.c b/test/monniaux/float_mat/float_mat.c
index 818dbded..45612635 100644
--- a/test/monniaux/float_mat/float_mat.c
+++ b/test/monniaux/float_mat/float_mat.c
@@ -1,4 +1,5 @@
#include "float_mat.h"
+#include <stddef.h>
#define ADD +=
#define MUL *
@@ -180,3 +181,48 @@ void REAL_mat_mul7(unsigned m, unsigned n, unsigned p,
pc_i += stride_c;
}
}
+
+#undef CHUNK
+#define CHUNK \
+ total ADD (*pa_i_j MUL *pb_j_k); \
+ pa_i_j ++; \
+ pb_j_k = (REAL*) ((char*) pb_j_k + stride_b_scaled);
+
+void REAL_mat_mul8(unsigned m, unsigned n, unsigned p,
+ REAL * c, unsigned stride_c,
+ const REAL *a, unsigned stride_a,
+ const REAL *b, unsigned stride_b) {
+ const REAL *pa_i = a;
+ REAL * pc_i = c;
+ size_t stride_b_scaled = sizeof(REAL) * stride_b;
+ for(unsigned i=0; i<m; i++) {
+ for(unsigned k=0; k<p; k++) {
+ const REAL *pb_j_k = b+k, *pa_i_j = pa_i;
+ REAL total = 0;
+ {
+ unsigned j4=0, n4=n/UNROLL;
+ if (n4 > 0) {
+ do {
+ CHUNK
+ CHUNK
+ CHUNK
+ CHUNK
+ j4++;
+ } while (j4 < n4);
+ }
+ }
+ {
+ unsigned j4=0, n4=n%UNROLL;
+ if (n4 > 0) {
+ do {
+ CHUNK
+ j4++;
+ } while (j4 < n4);
+ }
+ }
+ pc_i[k] = total;
+ }
+ pa_i += stride_a;
+ pc_i += stride_c;
+ }
+}
diff --git a/test/monniaux/float_mat/float_mat.h b/test/monniaux/float_mat/float_mat.h
index 03a25036..3b787b1f 100644
--- a/test/monniaux/float_mat/float_mat.h
+++ b/test/monniaux/float_mat/float_mat.h
@@ -1,7 +1,7 @@
#include <stdint.h>
#include <stdbool.h>
-typedef float REAL;
+typedef double REAL;
void REAL_mat_mul1(unsigned m, unsigned n, unsigned p,
REAL * restrict c, unsigned stride_c,
@@ -38,6 +38,11 @@ void REAL_mat_mul7(unsigned m, unsigned n, unsigned p,
const REAL *a, unsigned stride_a,
const REAL *b, unsigned stride_b);
+void REAL_mat_mul8(unsigned m, unsigned n, unsigned p,
+ REAL * restrict c, unsigned stride_c,
+ const REAL *a, unsigned stride_a,
+ const REAL *b, unsigned stride_b);
+
REAL REAL_random(void);
void REAL_mat_random(unsigned m,
diff --git a/test/monniaux/float_mat/float_mat_run.c b/test/monniaux/float_mat/float_mat_run.c
index 448bb8a5..f5dfa0af 100644
--- a/test/monniaux/float_mat/float_mat_run.c
+++ b/test/monniaux/float_mat/float_mat_run.c
@@ -81,19 +81,26 @@ int main() {
REAL_mat_mul7(m, n, p, c7, p, a, n, b, p);
c7_time = get_cycle()-c7_time;
+ REAL *c8 = malloc(sizeof(REAL) * m * p);
+ cycle_t c8_time = get_cycle();
+ REAL_mat_mul8(m, n, p, c8, p, a, n, b, p);
+ c8_time = get_cycle()-c8_time;
+
printf("c1==c2: %s\n"
"c1==c3: %s\n"
"c1==c4: %s\n"
"c1==c5: %s\n"
"c1==c6: %s\n"
"c1==c7: %s\n"
+ "c1==c8: %s\n"
"c1_time = %" PRIu64 "\n"
"c2_time = %" PRIu64 "\n"
"c3_time = %" PRIu64 "\n"
"c4_time = %" PRIu64 "\n"
"c5_time = %" PRIu64 "\n"
"c6_time = %" PRIu64 "\n"
- "c7_time = %" PRIu64 "\n",
+ "c7_time = %" PRIu64 "\n"
+ "c8_time = %" PRIu64 "\n",
REAL_mat_equal(m, n, c1, p, c2, p)?"true":"false",
REAL_mat_equal(m, n, c1, p, c3, p)?"true":"false",
@@ -101,6 +108,7 @@ int main() {
REAL_mat_equal(m, n, c1, p, c5, p)?"true":"false",
REAL_mat_equal(m, n, c1, p, c6, p)?"true":"false",
REAL_mat_equal(m, n, c1, p, c7, p)?"true":"false",
+ REAL_mat_equal(m, n, c1, p, c8, p)?"true":"false",
c1_time,
c2_time,
@@ -108,7 +116,8 @@ int main() {
c4_time,
c5_time,
c6_time,
- c7_time);
+ c7_time,
+ c8_time);
free(a);
free(b);