aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-10-27 15:53:38 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-10-27 15:53:38 +0100
commit21af08182a41cd6a2fe1d2da08bedb572ab88981 (patch)
tree08220395866aa7308c159de854e8098d0a4bfa90
parent19c81a709d42467d57031ca275159a83568ca042 (diff)
parentbad9e770dd77304f6f1dddbfe9930d5b6897ae27 (diff)
downloadcompcert-kvx-21af08182a41cd6a2fe1d2da08bedb572ab88981.tar.gz
compcert-kvx-21af08182a41cd6a2fe1d2da08bedb572ab88981.zip
Merge remote-tracking branch 'origin/kvx-work' into kvx-test-prepass
-rw-r--r--README.md20
-rw-r--r--doc/index-kvx.html4
-rw-r--r--driver/Clflags.ml2
-rw-r--r--driver/Compopts.v3
-rw-r--r--driver/Driver.ml2
-rw-r--r--test/monniaux/loop_nest/polybench.h202
-rw-r--r--test/monniaux/loop_nest/syrk.h54
7 files changed, 277 insertions, 10 deletions
diff --git a/README.md b/README.md
index 59ff7447..377776ca 100644
--- a/README.md
+++ b/README.md
@@ -16,19 +16,23 @@ features, installation instructions, using the compiler, etc), please
refer to the [Web site](http://compcert.inria.fr/) and especially
the [user's manual](http://compcert.inria.fr/man/).
-## VERIMAG version
+## Verimag-Kalray version
This is a special version with additions from Verimag and Kalray :
- * Some general-purpose optimization phases (e.g. profiling).
- * A backend for the KVX processor.
+* A backend for the KVX processor: see [`README_Kalray.md`](README_Kalray.md) for details.
+* Some general-purpose optimization phases (e.g. profiling).
+ - see [`PROFILING.md`](PROFILING.md) for details on the profiling system
The people responsible for this version are
- * Sylvain Boulmé (Grenoble-INP, Verimag)
- * David Monniaux (CNRS, Verimag)
- * Cyril Six (Kalray)
-
-See also `README_Kalray.md` and `PROFILING.md` and [the online documentation](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx).
+* Sylvain Boulmé (Grenoble-INP, Verimag)
+* David Monniaux (CNRS, Verimag)
+* Cyril Six (Kalray)
+
+## Papers on this CompCert version
+
+* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend.
+* [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux.
## License
CompCert is not free software. This non-commercial release can only
diff --git a/doc/index-kvx.html b/doc/index-kvx.html
index 6906c212..b8850727 100644
--- a/doc/index-kvx.html
+++ b/doc/index-kvx.html
@@ -34,10 +34,10 @@ a:active {color : Red; text-decoration : underline; }
The unmodified parts of this table appear in <font color=gray>gray</font>.
<br>
<br>
- A high-level view of this CompCert backend is provided by this OOSPLA'20 paper (of Six, Boulm&eacute; and Monniaux):
+ A high-level view of this CompCert backend is provided by this OOPSLA'20 paper (of Six, Boulm&eacute; and Monniaux):
<div><a href=https://hal.archives-ouvertes.fr/hal-02185883>Certified and Efficient Instruction Scheduling. Application to Interlocked VLIW Processors.</a></div>
<br>
- Our source code is available on our <a href=https://gricad-gitlab.univ-grenoble-alpes.fr/certicompil/compcert-kvx>GitLab public repository</a> (see conditions in the LICENSE file).
+ See also the <tt>README.md</tt> of our <a href=https://gricad-gitlab.univ-grenoble-alpes.fr/certicompil/compcert-kvx>GitLab public repository</a>.
</p>
<font color=gray><H2>Table of contents</H2>
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index e9b0dade..206bbb00 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -27,11 +27,13 @@ let option_ftailcalls = ref true
let option_fconstprop = ref true
let option_fcse = ref true
let option_fcse2 = ref false
+
let option_fcse3 = ref true
let option_fcse3_alias_analysis = ref true
let option_fcse3_across_calls = ref false
let option_fcse3_across_merges = ref true
let option_fcse3_glb = ref true
+let option_fcse3_trivial_ops = ref false
let option_fredundancy = ref true
(** Options relative to superblock scheduling *)
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 540e8922..0c90ee52 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -54,6 +54,9 @@ Parameter optim_CSE3_across_merges: unit -> bool.
(** Flag -fcse3-glb *)
Parameter optim_CSE3_glb: unit -> bool.
+(** Flag -fcse3-trivial-ops. For DMonniaux's common subexpression elimination, simplify trivial operations as well. *)
+Parameter optim_CSE3_trivial_ops: unit -> bool.
+
(** Flag -fmove-loop-invariants. *)
Parameter optim_move_loop_invariants: unit -> bool.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 62e586f0..b93bf688 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -202,6 +202,7 @@ Processing options:
-fcse3-across-calls Propagate CSE3 information across function calls [off]
-fcse3-across-merges Propagate CSE3 information across control-flow merges [on]
-fcse3-glb Refine CSE3 information using greatest lower bounds [on]
+ -fcse3-trivial-ops Replace trivial operations as well using CSE3 [off]
-fmove-loop-invariants Perform loop-invariant code motion [off]
-fredundancy Perform redundancy elimination [on]
-mtune= Type of CPU (for scheduling on some architectures)
@@ -419,6 +420,7 @@ let cmdline_actions =
@ f_opt "cse3-across-calls" option_fcse3_across_calls
@ f_opt "cse3-across-merges" option_fcse3_across_merges
@ f_opt "cse3-glb" option_fcse3_glb
+ @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops
@ f_opt "move-loop-invariants" option_fmove_loop_invariants
@ f_opt "redundancy" option_fredundancy
@ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
diff --git a/test/monniaux/loop_nest/polybench.h b/test/monniaux/loop_nest/polybench.h
new file mode 100644
index 00000000..7d092e45
--- /dev/null
+++ b/test/monniaux/loop_nest/polybench.h
@@ -0,0 +1,202 @@
+/**
+ * polybench.h: This file is part of the PolyBench/C 3.2 test suite.
+ *
+ *
+ * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
+ * Web address: http://polybench.sourceforge.net
+ */
+/*
+ * Polybench header for instrumentation.
+ *
+ * Programs must be compiled with `-I utilities utilities/polybench.c'
+ *
+ * Optionally, one can define:
+ *
+ * -DPOLYBENCH_TIME, to report the execution time,
+ * OR (exclusive):
+ * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c)
+ *
+ *
+ * See README or utilities/polybench.c for additional options.
+ *
+ */
+#ifndef POLYBENCH_H
+# define POLYBENCH_H
+
+# include <stdlib.h>
+
+/* Array padding. By default, none is used. */
+# ifndef POLYBENCH_PADDING_FACTOR
+/* default: */
+# define POLYBENCH_PADDING_FACTOR 0
+# endif
+
+
+/* C99 arrays in function prototype. By default, do not use. */
+# ifdef POLYBENCH_USE_C99_PROTO
+# define POLYBENCH_C99_SELECT(x,y) y
+# else
+/* default: */
+# define POLYBENCH_C99_SELECT(x,y) x
+# endif
+
+
+/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */
+# ifdef POLYBENCH_USE_SCALAR_LB
+# define POLYBENCH_LOOP_BOUND(x,y) x
+# else
+/* default: */
+# define POLYBENCH_LOOP_BOUND(x,y) y
+# endif
+
+
+/* Macros to reference an array. Generic for heap and stack arrays
+ (C99). Each array dimensionality has his own macro, to be used at
+ declaration or as a function argument.
+ Example:
+ int b[x] => POLYBENCH_1D_ARRAY(b, x)
+ int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N)
+*/
+# ifndef POLYBENCH_STACK_ARRAYS
+# define POLYBENCH_ARRAY(x) *x
+# define POLYBENCH_FREE_ARRAY(x) free((void*)x);
+# define POLYBENCH_DECL_VAR(x) (*x)
+# else
+# define POLYBENCH_ARRAY(x) x
+# define POLYBENCH_FREE_ARRAY(x)
+# define POLYBENCH_DECL_VAR(x) x
+# endif
+/* Macros for using arrays in the function prototypes. */
+# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR]
+
+
+/* Macros to allocate heap arrays.
+ Example:
+ polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double)
+ and returns a pointer to the 2d array
+ */
+# define POLYBENCH_ALLOC_1D_ARRAY(n1, type) \
+ (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type))
+# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type) \
+ (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type) \
+ (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type) \
+ (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type) \
+ (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+
+/* Macros for array declaration. */
+# ifndef POLYBENCH_STACK_ARRAYS
+# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \
+ type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); \
+ var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type);
+# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \
+ type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \
+ var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type);
+# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \
+ type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \
+ var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type);
+# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \
+ type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, ,dim3, dim4, ddim1, ddim2, ddim3, ddim4); \
+ var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type);
+# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \
+ type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \
+ var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type);
+# else
+# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \
+ type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1);
+# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \
+ type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2);
+# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \
+ type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3);
+# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \
+ type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4);
+# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \
+ type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5);
+# endif
+
+
+/* Dead-code elimination macros. Use argc/argv for the run-time check. */
+# ifndef POLYBENCH_DUMP_ARRAYS
+# define POLYBENCH_DCE_ONLY_CODE if (argc > 42 && ! strcmp(argv[0], ""))
+# else
+# define POLYBENCH_DCE_ONLY_CODE
+# endif
+
+# define polybench_prevent_dce(func) \
+ POLYBENCH_DCE_ONLY_CODE \
+ func
+
+
+/* Performance-related instrumentation. See polybench.c */
+# define polybench_start_instruments
+# define polybench_stop_instruments
+# define polybench_print_instruments
+
+
+/* PAPI support. */
+# ifdef POLYBENCH_PAPI
+extern const unsigned int polybench_papi_eventlist[];
+# undef polybench_start_instruments
+# undef polybench_stop_instruments
+# undef polybench_print_instruments
+# define polybench_set_papi_thread_report(x) \
+ polybench_papi_counters_threadid = x;
+# define polybench_start_instruments \
+ polybench_prepare_instruments(); \
+ polybench_papi_init(); \
+ int evid; \
+ for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++) \
+ { \
+ if (polybench_papi_start_counter(evid)) \
+ continue; \
+
+# define polybench_stop_instruments \
+ polybench_papi_stop_counter(evid); \
+ } \
+ polybench_papi_close(); \
+
+# define polybench_print_instruments polybench_papi_print();
+# endif
+
+
+/* Timing support. */
+# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS)
+# undef polybench_start_instruments
+# undef polybench_stop_instruments
+# undef polybench_print_instruments
+# define polybench_start_instruments polybench_timer_start();
+# define polybench_stop_instruments polybench_timer_stop();
+# define polybench_print_instruments polybench_timer_print();
+extern double polybench_program_total_flops;
+extern void polybench_timer_start();
+extern void polybench_timer_stop();
+extern void polybench_timer_print();
+# endif
+
+/* Function declaration. */
+# ifdef POLYBENCH_TIME
+extern void polybench_timer_start();
+extern void polybench_timer_stop();
+extern void polybench_timer_print();
+# endif
+
+# ifdef POLYBENCH_PAPI
+extern void polybench_prepare_instruments();
+extern int polybench_papi_start_counter(int evid);
+extern void polybench_papi_stop_counter(int evid);
+extern void polybench_papi_init();
+extern void polybench_papi_close();
+extern void polybench_papi_print();
+# endif
+
+/* Function prototypes. */
+extern void* polybench_alloc_data(unsigned long long int n, int elt_size);
+
+
+#endif /* !POLYBENCH_H */
diff --git a/test/monniaux/loop_nest/syrk.h b/test/monniaux/loop_nest/syrk.h
new file mode 100644
index 00000000..c753ff3b
--- /dev/null
+++ b/test/monniaux/loop_nest/syrk.h
@@ -0,0 +1,54 @@
+/**
+ * syrk.h: This file is part of the PolyBench/C 3.2 test suite.
+ *
+ *
+ * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
+ * Web address: http://polybench.sourceforge.net
+ */
+#ifndef SYRK_H
+# define SYRK_H
+
+/* Default to STANDARD_DATASET. */
+# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET)
+# define STANDARD_DATASET
+# endif
+
+/* Do not define anything if the user manually defines the size. */
+# if !defined(NI) && !defined(NJ)
+/* Define the possible dataset sizes. */
+# ifdef MINI_DATASET
+# define NI 32
+# define NJ 32
+# endif
+
+# ifdef SMALL_DATASET
+# define NI 128
+# define NJ 128
+# endif
+
+# ifdef STANDARD_DATASET /* Default if unspecified. */
+# define NI 1024
+# define NJ 1024
+# endif
+
+# ifdef LARGE_DATASET
+# define NI 2000
+# define NJ 2000
+# endif
+
+# ifdef EXTRALARGE_DATASET
+# define NI 4000
+# define NJ 4000
+# endif
+# endif /* !N */
+
+# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni)
+# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj)
+
+# ifndef DATA_TYPE
+# define DATA_TYPE double
+# define DATA_PRINTF_MODIFIER "%0.2lf "
+# endif
+
+
+#endif /* !SYRK */