#include #include #include #include #include "../clock.h" typedef double REAL; typedef struct { REAL re, im; } COMPLEX; static inline void COMPLEX_zero(COMPLEX *r) { r->re = r->im = 0.0; } static inline void COMPLEX_add(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = x->re + y->re; double im = x->im + y->im; r->re = re; r->im = im; } static inline void COMPLEX_mul(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = x->re * y->re - x->im * y->im; double im = x->im * y->re + x->re * y->im; r->re = re; r->im = im; } static inline void COMPLEX_mul_addto(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = r->re + x->re * y->re - x->im * y->im; double im = r->im + x->im * y->re + x->re * y->im; r->re = re; r->im = im; } #define MACRO_COMPLEX_mul_addto(r, x, y) \ { \ double rre=(r).re, rim=(r).im, \ xre = (x).re, xim=(x).im, \ yre = (y).re, yim = (y).im; \ double re = rre + xre * yre - xim * yim; \ double im = rim + xim * yre + xre * yim; \ r.re = re; \ r.im = im; \ } void COMPLEX_mat_mul1(unsigned m, unsigned n, unsigned p, COMPLEX * restrict c, unsigned stride_c, const COMPLEX *a, unsigned stride_a, const COMPLEX *b, unsigned stride_b) { for(unsigned i=0; i 0) { do { CHUNK CHUNK CHUNK CHUNK j4++; } while (j4 < n4); } } { unsigned j4=0, n4=n%UNROLL; if (n4 > 0) { do { CHUNK j4++; } while (j4 < n4); } } pc_i[k] = total; } pa_i += stride_a; pc_i += stride_c; } } #undef CHUNK #define CHUNK \ MACRO_COMPLEX_mul_addto(total, *pa_i_j, *pb_j_k) \ pa_i_j ++; \ pb_j_k = (COMPLEX*) ((char*) pb_j_k + stride_b_scaled); void COMPLEX_mat_mul9(unsigned m, unsigned n, unsigned p, COMPLEX * c, unsigned stride_c, const COMPLEX *a, unsigned stride_a, const COMPLEX *b, unsigned stride_b) { const COMPLEX *pa_i = a; COMPLEX * pc_i = c; size_t stride_b_scaled = sizeof(COMPLEX) * stride_b; for(unsigned i=0; i 0) { do { CHUNK CHUNK CHUNK CHUNK j4++; } while (j4 < n4); } } { unsigned j4=0, n4=n%UNROLL; if (n4 > 0) { do { CHUNK j4++; } while (j4 < n4); } } pc_i[k] = total; } pa_i += stride_a; pc_i += stride_c; } } bool COMPLEX_equal(const COMPLEX *a, const COMPLEX *b) { return a->re==b->re && a->im==b->im; } bool COMPLEX_mat_equal(unsigned m, unsigned n, const COMPLEX *a, unsigned stride_a, const COMPLEX *b, unsigned stride_b) { for(unsigned i=0; i