#include #include #include #include #include "../clock.h" typedef double REAL; typedef struct { REAL re, im; } COMPLEX; static inline void COMPLEX_zero(COMPLEX *r) { r->re = r->im = 0.0; } static inline void COMPLEX_add(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = x->re + y->re; double im = x->im + y->im; r->re = re; r->im = im; } static inline void COMPLEX_mul(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = x->re * y->re - x->im * y->im; double im = x->im * y->re + x->re * y->im; r->re = re; r->im = im; } static inline void COMPLEX_mul_addto(COMPLEX *r, const COMPLEX *x, const COMPLEX *y) { double re = r->re + x->re * y->re - x->im * y->im; double im = r->im + x->im * y->re + x->re * y->im; r->re = re; r->im = im; } #define MACRO_COMPLEX_mul_addto(rre, rim, x, y) \ { \ double xre = (x).re, xim=(x).im, \ yre = (y).re, yim=(y).im; \ (rre) += xre * yre - xim * yim; \ (rim) += xim * yre + xre * yim; \ } void COMPLEX_mat_mul1(unsigned m, unsigned n, unsigned p, COMPLEX * restrict c, unsigned stride_c, const COMPLEX *a, unsigned stride_a, const COMPLEX *b, unsigned stride_b) { for(unsigned i=0; i 0) { do { CHUNK CHUNK CHUNK CHUNK j4++; } while (j4 < n4); } } { unsigned j4=0, n4=n%UNROLL; if (n4 > 0) { do { CHUNK j4++; } while (j4 < n4); } } pc_i[k] = total; } pa_i += stride_a; pc_i += stride_c; } } #undef CHUNK #define CHUNK \ MACRO_COMPLEX_mul_addto(totalre, totalim, *pa_i_j, *pb_j_k) \ pa_i_j ++; \ pb_j_k = (COMPLEX*) ((char*) pb_j_k + stride_b_scaled); void COMPLEX_mat_mul9(unsigned m, unsigned n, unsigned p, COMPLEX * c, unsigned stride_c, const COMPLEX *a, unsigned stride_a, const COMPLEX *b, unsigned stride_b) { const COMPLEX *pa_i = a; COMPLEX * pc_i = c; size_t stride_b_scaled = sizeof(COMPLEX) * stride_b; for(unsigned i=0; i 0) { do { CHUNK CHUNK CHUNK CHUNK j4++; } while (j4 < n4); } } { unsigned j4=0, n4=n%UNROLL; if (n4 > 0) { do { CHUNK j4++; } while (j4 < n4); } } pc_i[k].re = totalre; pc_i[k].im = totalim; } pa_i += stride_a; pc_i += stride_c; } } bool COMPLEX_equal(const COMPLEX *a, const COMPLEX *b) { return a->re==b->re && a->im==b->im; } bool COMPLEX_mat_equal(unsigned m, unsigned n, const COMPLEX *a, unsigned stride_a, const COMPLEX *b, unsigned stride_b) { for(unsigned i=0; i