#include "modint.h" void modint_mat_mul1(unsigned m, unsigned n, unsigned p, modint * restrict c, unsigned stride_c, const modint *a, unsigned stride_a, const modint *b, unsigned stride_b) { for(unsigned i=0; i 0) { do { total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; j2++; } while (j2 < n2); } if (n%2) { total += *pa_i_j * *pb_j_k; } pc_i[k] = total % MODULUS; } pa_i += stride_a; pc_i += stride_c; } } void modint_mat_mul7(unsigned m, unsigned n, unsigned p, modint * c, unsigned stride_c, const modint *a, unsigned stride_a, const modint *b, unsigned stride_b) { const modint *pa_i = a; modint * pc_i = c; for(unsigned i=0; i 0) { do { total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; j4++; } while (j4 < n4); } } { unsigned j4=0, n4=n%4; if (n4 > 0) { do { total += *pa_i_j * *pb_j_k; pa_i_j ++; pb_j_k += stride_b; j4++; } while (j4 < n4); } } pc_i[k] = total % MODULUS; } pa_i += stride_a; pc_i += stride_c; } } modint modint_random(void) { static uint32_t next = 1325997111; next = next * 1103515249 + 12345; return next % MODULUS; } void modint_mat_random(unsigned m, unsigned n, modint *a, unsigned stride_a) { for(unsigned i=0; i