diff options
-rw-r--r-- | README.md | 20 | ||||
-rw-r--r-- | doc/index-kvx.html | 4 | ||||
-rw-r--r-- | driver/Clflags.ml | 2 | ||||
-rw-r--r-- | driver/Compopts.v | 3 | ||||
-rw-r--r-- | driver/Driver.ml | 2 | ||||
-rw-r--r-- | test/monniaux/loop_nest/polybench.h | 202 | ||||
-rw-r--r-- | test/monniaux/loop_nest/syrk.h | 54 |
7 files changed, 277 insertions, 10 deletions
@@ -16,19 +16,23 @@ features, installation instructions, using the compiler, etc), please refer to the [Web site](http://compcert.inria.fr/) and especially the [user's manual](http://compcert.inria.fr/man/). -## VERIMAG version +## Verimag-Kalray version This is a special version with additions from Verimag and Kalray : - * Some general-purpose optimization phases (e.g. profiling). - * A backend for the KVX processor. +* A backend for the KVX processor: see [`README_Kalray.md`](README_Kalray.md) for details. +* Some general-purpose optimization phases (e.g. profiling). + - see [`PROFILING.md`](PROFILING.md) for details on the profiling system The people responsible for this version are - * Sylvain Boulmé (Grenoble-INP, Verimag) - * David Monniaux (CNRS, Verimag) - * Cyril Six (Kalray) - -See also `README_Kalray.md` and `PROFILING.md` and [the online documentation](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx). +* Sylvain Boulmé (Grenoble-INP, Verimag) +* David Monniaux (CNRS, Verimag) +* Cyril Six (Kalray) + +## Papers on this CompCert version + +* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend. +* [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux. ## License CompCert is not free software. This non-commercial release can only diff --git a/doc/index-kvx.html b/doc/index-kvx.html index 6906c212..b8850727 100644 --- a/doc/index-kvx.html +++ b/doc/index-kvx.html @@ -34,10 +34,10 @@ a:active {color : Red; text-decoration : underline; } The unmodified parts of this table appear in <font color=gray>gray</font>. <br> <br> - A high-level view of this CompCert backend is provided by this OOSPLA'20 paper (of Six, Boulmé and Monniaux): + A high-level view of this CompCert backend is provided by this OOPSLA'20 paper (of Six, Boulmé and Monniaux): <div><a href=https://hal.archives-ouvertes.fr/hal-02185883>Certified and Efficient Instruction Scheduling. Application to Interlocked VLIW Processors.</a></div> <br> - Our source code is available on our <a href=https://gricad-gitlab.univ-grenoble-alpes.fr/certicompil/compcert-kvx>GitLab public repository</a> (see conditions in the LICENSE file). + See also the <tt>README.md</tt> of our <a href=https://gricad-gitlab.univ-grenoble-alpes.fr/certicompil/compcert-kvx>GitLab public repository</a>. </p> <font color=gray><H2>Table of contents</H2> diff --git a/driver/Clflags.ml b/driver/Clflags.ml index e9b0dade..206bbb00 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -27,11 +27,13 @@ let option_ftailcalls = ref true let option_fconstprop = ref true let option_fcse = ref true let option_fcse2 = ref false + let option_fcse3 = ref true let option_fcse3_alias_analysis = ref true let option_fcse3_across_calls = ref false let option_fcse3_across_merges = ref true let option_fcse3_glb = ref true +let option_fcse3_trivial_ops = ref false let option_fredundancy = ref true (** Options relative to superblock scheduling *) diff --git a/driver/Compopts.v b/driver/Compopts.v index 540e8922..0c90ee52 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -54,6 +54,9 @@ Parameter optim_CSE3_across_merges: unit -> bool. (** Flag -fcse3-glb *) Parameter optim_CSE3_glb: unit -> bool. +(** Flag -fcse3-trivial-ops. For DMonniaux's common subexpression elimination, simplify trivial operations as well. *) +Parameter optim_CSE3_trivial_ops: unit -> bool. + (** Flag -fmove-loop-invariants. *) Parameter optim_move_loop_invariants: unit -> bool. diff --git a/driver/Driver.ml b/driver/Driver.ml index 62e586f0..b93bf688 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -202,6 +202,7 @@ Processing options: -fcse3-across-calls Propagate CSE3 information across function calls [off] -fcse3-across-merges Propagate CSE3 information across control-flow merges [on] -fcse3-glb Refine CSE3 information using greatest lower bounds [on] + -fcse3-trivial-ops Replace trivial operations as well using CSE3 [off] -fmove-loop-invariants Perform loop-invariant code motion [off] -fredundancy Perform redundancy elimination [on] -mtune= Type of CPU (for scheduling on some architectures) @@ -419,6 +420,7 @@ let cmdline_actions = @ f_opt "cse3-across-calls" option_fcse3_across_calls @ f_opt "cse3-across-merges" option_fcse3_across_merges @ f_opt "cse3-glb" option_fcse3_glb + @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops @ f_opt "move-loop-invariants" option_fmove_loop_invariants @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] diff --git a/test/monniaux/loop_nest/polybench.h b/test/monniaux/loop_nest/polybench.h new file mode 100644 index 00000000..7d092e45 --- /dev/null +++ b/test/monniaux/loop_nest/polybench.h @@ -0,0 +1,202 @@ +/** + * polybench.h: This file is part of the PolyBench/C 3.2 test suite. + * + * + * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu> + * Web address: http://polybench.sourceforge.net + */ +/* + * Polybench header for instrumentation. + * + * Programs must be compiled with `-I utilities utilities/polybench.c' + * + * Optionally, one can define: + * + * -DPOLYBENCH_TIME, to report the execution time, + * OR (exclusive): + * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c) + * + * + * See README or utilities/polybench.c for additional options. + * + */ +#ifndef POLYBENCH_H +# define POLYBENCH_H + +# include <stdlib.h> + +/* Array padding. By default, none is used. */ +# ifndef POLYBENCH_PADDING_FACTOR +/* default: */ +# define POLYBENCH_PADDING_FACTOR 0 +# endif + + +/* C99 arrays in function prototype. By default, do not use. */ +# ifdef POLYBENCH_USE_C99_PROTO +# define POLYBENCH_C99_SELECT(x,y) y +# else +/* default: */ +# define POLYBENCH_C99_SELECT(x,y) x +# endif + + +/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */ +# ifdef POLYBENCH_USE_SCALAR_LB +# define POLYBENCH_LOOP_BOUND(x,y) x +# else +/* default: */ +# define POLYBENCH_LOOP_BOUND(x,y) y +# endif + + +/* Macros to reference an array. Generic for heap and stack arrays + (C99). Each array dimensionality has his own macro, to be used at + declaration or as a function argument. + Example: + int b[x] => POLYBENCH_1D_ARRAY(b, x) + int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N) +*/ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_ARRAY(x) *x +# define POLYBENCH_FREE_ARRAY(x) free((void*)x); +# define POLYBENCH_DECL_VAR(x) (*x) +# else +# define POLYBENCH_ARRAY(x) x +# define POLYBENCH_FREE_ARRAY(x) +# define POLYBENCH_DECL_VAR(x) x +# endif +/* Macros for using arrays in the function prototypes. */ +# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] + + +/* Macros to allocate heap arrays. + Example: + polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double) + and returns a pointer to the 2d array + */ +# define POLYBENCH_ALLOC_1D_ARRAY(n1, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type)) +# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type)) + +/* Macros for array declaration. */ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); \ + var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \ + var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \ + var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, ,dim3, dim4, ddim1, ddim2, ddim3, ddim4); \ + var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \ + var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type); +# else +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); +# endif + + +/* Dead-code elimination macros. Use argc/argv for the run-time check. */ +# ifndef POLYBENCH_DUMP_ARRAYS +# define POLYBENCH_DCE_ONLY_CODE if (argc > 42 && ! strcmp(argv[0], "")) +# else +# define POLYBENCH_DCE_ONLY_CODE +# endif + +# define polybench_prevent_dce(func) \ + POLYBENCH_DCE_ONLY_CODE \ + func + + +/* Performance-related instrumentation. See polybench.c */ +# define polybench_start_instruments +# define polybench_stop_instruments +# define polybench_print_instruments + + +/* PAPI support. */ +# ifdef POLYBENCH_PAPI +extern const unsigned int polybench_papi_eventlist[]; +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_set_papi_thread_report(x) \ + polybench_papi_counters_threadid = x; +# define polybench_start_instruments \ + polybench_prepare_instruments(); \ + polybench_papi_init(); \ + int evid; \ + for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++) \ + { \ + if (polybench_papi_start_counter(evid)) \ + continue; \ + +# define polybench_stop_instruments \ + polybench_papi_stop_counter(evid); \ + } \ + polybench_papi_close(); \ + +# define polybench_print_instruments polybench_papi_print(); +# endif + + +/* Timing support. */ +# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_start_instruments polybench_timer_start(); +# define polybench_stop_instruments polybench_timer_stop(); +# define polybench_print_instruments polybench_timer_print(); +extern double polybench_program_total_flops; +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +/* Function declaration. */ +# ifdef POLYBENCH_TIME +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +# ifdef POLYBENCH_PAPI +extern void polybench_prepare_instruments(); +extern int polybench_papi_start_counter(int evid); +extern void polybench_papi_stop_counter(int evid); +extern void polybench_papi_init(); +extern void polybench_papi_close(); +extern void polybench_papi_print(); +# endif + +/* Function prototypes. */ +extern void* polybench_alloc_data(unsigned long long int n, int elt_size); + + +#endif /* !POLYBENCH_H */ diff --git a/test/monniaux/loop_nest/syrk.h b/test/monniaux/loop_nest/syrk.h new file mode 100644 index 00000000..c753ff3b --- /dev/null +++ b/test/monniaux/loop_nest/syrk.h @@ -0,0 +1,54 @@ +/** + * syrk.h: This file is part of the PolyBench/C 3.2 test suite. + * + * + * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu> + * Web address: http://polybench.sourceforge.net + */ +#ifndef SYRK_H +# define SYRK_H + +/* Default to STANDARD_DATASET. */ +# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET) +# define STANDARD_DATASET +# endif + +/* Do not define anything if the user manually defines the size. */ +# if !defined(NI) && !defined(NJ) +/* Define the possible dataset sizes. */ +# ifdef MINI_DATASET +# define NI 32 +# define NJ 32 +# endif + +# ifdef SMALL_DATASET +# define NI 128 +# define NJ 128 +# endif + +# ifdef STANDARD_DATASET /* Default if unspecified. */ +# define NI 1024 +# define NJ 1024 +# endif + +# ifdef LARGE_DATASET +# define NI 2000 +# define NJ 2000 +# endif + +# ifdef EXTRALARGE_DATASET +# define NI 4000 +# define NJ 4000 +# endif +# endif /* !N */ + +# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni) +# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj) + +# ifndef DATA_TYPE +# define DATA_TYPE double +# define DATA_PRINTF_MODIFIER "%0.2lf " +# endif + + +#endif /* !SYRK */ |