From 549e1f93c95e06d796bdd15ca0cf24c0a0128e70 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 12 Mar 2019 06:24:50 +0100 Subject: some more work on complex matrices --- test/monniaux/complex/complex.c | 203 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 8 deletions(-) (limited to 'test/monniaux/complex') diff --git a/test/monniaux/complex/complex.c b/test/monniaux/complex/complex.c index 536c321b..0b806b24 100644 --- a/test/monniaux/complex/complex.c +++ b/test/monniaux/complex/complex.c @@ -1,7 +1,13 @@ #include +#include +#include +#include +#include "../clock.h" + +typedef double REAL; typedef struct { - double re, im; + REAL re, im; } COMPLEX; static inline void COMPLEX_zero(COMPLEX *r) { @@ -26,7 +32,7 @@ static inline void COMPLEX_mul(COMPLEX *r, r->im = im; } -static inline void COMPLEX_mul_add(COMPLEX *r, +static inline void COMPLEX_mul_addto(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = r->re + x->re * y->re - x->im * y->im; @@ -35,6 +41,17 @@ static inline void COMPLEX_mul_add(COMPLEX *r, r->im = im; } +#define MACRO_COMPLEX_mul_addto(r, x, y) \ + { \ + double rre=(r).re, rim=(r).im, \ + xre = (x).re, xim=(x).im, \ + yre = (y).re, yim = (y).im; \ + double re = rre + xre * yre - xim * yim; \ + double im = rim + xim * yre + xre * yim; \ + r.re = re; \ + r.im = im; \ + } + void COMPLEX_mat_mul1(unsigned m, unsigned n, unsigned p, COMPLEX * restrict c, unsigned stride_c, @@ -48,16 +65,186 @@ void COMPLEX_mat_mul1(unsigned m, unsigned n, unsigned p, for(unsigned i=0; i 0) { + do { + CHUNK + CHUNK + CHUNK + CHUNK + j4++; + } while (j4 < n4); + } + } + { + unsigned j4=0, n4=n%UNROLL; + if (n4 > 0) { + do { + CHUNK + j4++; + } while (j4 < n4); + } + } + pc_i[k] = total; + } + pa_i += stride_a; + pc_i += stride_c; + } +} + +#undef CHUNK +#define CHUNK \ + MACRO_COMPLEX_mul_addto(total, *pa_i_j, *pb_j_k) \ + pa_i_j ++; \ + pb_j_k = (COMPLEX*) ((char*) pb_j_k + stride_b_scaled); + +void COMPLEX_mat_mul9(unsigned m, unsigned n, unsigned p, + COMPLEX * c, unsigned stride_c, + const COMPLEX *a, unsigned stride_a, + const COMPLEX *b, unsigned stride_b) { + const COMPLEX *pa_i = a; + COMPLEX * pc_i = c; + size_t stride_b_scaled = sizeof(COMPLEX) * stride_b; + for(unsigned i=0; i 0) { + do { + CHUNK + CHUNK + CHUNK + CHUNK + j4++; + } while (j4 < n4); + } + } + { + unsigned j4=0, n4=n%UNROLL; + if (n4 > 0) { + do { + CHUNK + j4++; + } while (j4 < n4); + } } + pc_i[k] = total; } + pa_i += stride_a; + pc_i += stride_c; } } +bool COMPLEX_equal(const COMPLEX *a, + const COMPLEX *b) { + return a->re==b->re && a->im==b->im; +} + +bool COMPLEX_mat_equal(unsigned m, + unsigned n, + const COMPLEX *a, unsigned stride_a, + const COMPLEX *b, unsigned stride_b) { + for(unsigned i=0; i