From 23bad2f693dfa4b2eaec8f677fd82a1c863a669d Mon Sep 17 00:00:00 2001 From: Sylvain Boulmé Date: Tue, 4 Jan 2022 08:17:57 +0100 Subject: add LCTES paper in README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6406cd27..c99d4a67 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Please follow the instructions in [`INSTALL.md`](INSTALL.md) * [A 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend (also on [YouTube if you need subtitles](https://www.youtube.com/watch?v=RAzMDS9OVSw)). * [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux. +* [Simple, Light, Yet Formally Verified, Global Common Subexpression Elimination and Loop-Invariant Code Motion](https://hal.archives-ouvertes.fr/hal-03212087), a LCTES'21 paper, by Monniaux and Six. * [Formally Verified Superblock Scheduling](https://hal.archives-ouvertes.fr/hal-03200774), a CPP'22 paper, by Six, Gourdin, Boulmé, Monniaux, Fasse and Nardino. * [Optimized and formally-verified compilation for a VLIW processor](https://hal.archives-ouvertes.fr/tel-03326923), Phd Thesis of Cyril Six in 2021. * [Formally Verified Defensive Programming (efficient Coq-verified computations from untrusted ML oracles) -- Chapters 1 to 3](https://hal.archives-ouvertes.fr/tel-03356701), Habilitation Thesis of Sylvain Boulmé in 2021. -- cgit From 1c264a258be01623b8936657a32f2251ca2059c1 Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Wed, 5 Jan 2022 15:22:22 +0100 Subject: ccomp profiling --- Makefile.extr | 43 +++++++++++++++++++++++++++++++++++++++++-- configure | 1 + test/Makefile | 3 +++ test/abi/Makefile | 29 ++++++++++++++--------------- test/compression/Makefile | 8 +++++++- test/raytracer/Makefile | 6 ++++++ test/spass/Makefile | 6 ++++++ 7 files changed, 78 insertions(+), 18 deletions(-) diff --git a/Makefile.extr b/Makefile.extr index 6106aff2..0d2ec61f 100644 --- a/Makefile.extr +++ b/Makefile.extr @@ -77,6 +77,9 @@ endif OCAMLC=ocamlc$(DOTOPT) $(COMPFLAGS) OCAMLOPT=ocamlopt$(DOTOPT) $(COMPFLAGS) OCAMLDEP=ocamldep$(DOTOPT) -slash $(INCLUDES) +OCAMLFIND=ocamlfind +LMFLAGS_LINK=-package landmarks -linkpkg +LMFLAGS_COMP=-package landmarks -package landmarks-ppx -ppxopt landmarks-ppx,--auto OCAMLLEX=ocamllex -q MODORDER=tools/modorder .depend.extr @@ -99,9 +102,15 @@ ifeq ($(wildcard .depend.extr),.depend.extr) CCOMP_OBJS:=$(shell $(MODORDER) driver/Driver.cmx) ifeq ($(OCAML_NATIVE_COMP),true) +ifeq ($(OCAML_LM_PROF), true) +ccomp: $(CCOMP_OBJS) + @echo "Linking $@ with landmarks profiling" + @$(OCAMLFIND) opt $(COMPFLAGS) -o $@ $(LMFLAGS_LINK) $(LIBS) $(LINK_OPT) $+ +else ccomp: $(CCOMP_OBJS) @echo "Linking $@" @$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $+ +endif else ccomp: ccomp.byte @echo "Copying to $@" @@ -112,16 +121,28 @@ endif ccomp.force: $(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $(CCOMP_OBJS) +ifeq ($(OCAML_LM_PROF), true) +ccomp.byte: $(CCOMP_OBJS:.cmx=.cmo) + @echo "Linking $@ with landmarks profiling" + @$(OCAMLFIND) ocamlc $(COMPFLAGS) -o $@ $(LMFLAGS_LINK) $(LIBS_BYTE) $+ +else ccomp.byte: $(CCOMP_OBJS:.cmx=.cmo) @echo "Linking $@" @$(OCAMLC) -o $@ $(LIBS_BYTE) $+ +endif CLIGHTGEN_OBJS:=$(shell $(MODORDER) export/ExportDriver.cmx) ifeq ($(OCAML_NATIVE_COMP),true) +ifeq ($(OCAML_LM_PROF), true) +clightgen: $(CLIGHTGEN_OBJS) + @echo "Linking $@ with landmarks profiling" + @$(OCAMLFIND) opt $(COMPFLAGS) -o $@ $(LMFLAGS_LINK) $(LIBS) $(LINK_OPT) $+ +else clightgen: $(CLIGHTGEN_OBJS) @echo "Linking $@" @$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $+ +endif else clightgen: clightgen.byte @echo "Copying to $@" @@ -138,15 +159,33 @@ endif # End of part that assumes .depend.extr already exists +ifeq ($(OCAML_LM_PROF), true) %.cmi: %.mli - @echo "OCAMLC $<" + @echo "OCAMLC $< with landmarks profiling" + @$(OCAMLFIND) ocamlc $(COMPFLAGS) -c $(LMFLAGS_COMP) $< +else +%.cmi: %.mli + @echo "OCAMLC $<" @$(OCAMLC) -c $< +endif +ifeq ($(OCAML_LM_PROF), true) +%.cmo: %.ml + @echo "OCAMLC $< with landmarks profiling" + @$(OCAMLFIND) ocamlc $(COMPFLAGS) -c $(LMFLAGS_COMP) $< +else %.cmo: %.ml - @echo "OCAMLC $<" + @echo "OCAMLC $<" @$(OCAMLC) -c $< +endif +ifeq ($(OCAML_LM_PROF), true) +%.cmx: %.ml + @echo "OCAMLOPT $< with landmarks profiling" + @$(OCAMLFIND) opt $(COMPFLAGS) -c $(LMFLAGS_COMP) $< +else %.cmx: %.ml @echo "OCAMLOPT $<" @$(OCAMLOPT) -c $< +endif %.ml: %.mll $(OCAMLLEX) $< diff --git a/configure b/configure index 3da00fb3..e21c2424 100755 --- a/configure +++ b/configure @@ -661,6 +661,7 @@ SHAREDIR=$sharedir COQDEVDIR=$coqdevdir OCAML_NATIVE_COMP=$ocaml_native_comp OCAML_OPT_COMP=$ocaml_opt_comp +OCAML_LM_PROF=false MENHIR_DIR=$menhir_dir COMPFLAGS=-bin-annot EOF diff --git a/test/Makefile b/test/Makefile index b0010ea0..e998b52a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -17,6 +17,9 @@ endif all: set -e; for i in $(DIRS); do $(MAKE) CCOMPOPTS='$(CCOMPOPTS)' -C $$i all; done +all_s: + set -e; for i in $(DIRS); do $(MAKE) CCOMPOPTS='$(CCOMPOPTS)' -C $$i all_s; done + test: set -e; for i in $(DIRS); do $(MAKE) SIMU='$(SIMU)' -C $$i test; done diff --git a/test/abi/Makefile b/test/abi/Makefile index ef354e06..ee00d4de 100644 --- a/test/abi/Makefile +++ b/test/abi/Makefile @@ -1,7 +1,7 @@ include ../../Makefile.config CCOMP=../../ccomp -stdlib ../../runtime -CCOMPFLAGS= +CCOMPFLAGS=-fstruct-passing CFLAGS=-O -Wno-overflow -Wno-constant-conversion TESTS=fixed.compcert fixed.cc2compcert fixed.compcert2cc \ @@ -17,18 +17,6 @@ all_s: fixed_def_compcert.s fixed_use_compcert.s \ vararg_def_compcert.s vararg_use_compcert.s \ struct_def_compcert.s struct_use_compcert.s -test: - @set -e; for t in $(TESTS); do \ - SIMU='$(SIMU)' ARCH=$(ARCH) MODEL=$(MODEL) ABI=$(ABI) SYSTEM=$(SYSTEM) ./Runtest $$t; \ - done - @set -e; for t in layout staticlayout; do \ - $(SIMU) ./$$t.compcert > _compcert.log; \ - $(SIMU) ./$$t.cc > _cc.log; \ - if diff -a -u _cc.log _compcert.log; \ - then echo "$$t: CompCert and $CC agree"; rm _*.log; \ - else echo "$$t: CompCert and $CC DISAGREE"; exit 2; fi; \ - done - generator.exe: generator.ml ocamlopt -g -o $@ generator.ml @@ -72,7 +60,7 @@ endif layout.h: genlayout.exe ./genlayout.exe $(GENLAYOUT_OPTIONS) > layout.h -struct%.o: CCOMPFLAGS += -fstruct-passing -dclight +struct%.o: CCOMPFLAGS +=-dclight %_compcert.o: %.c $(CCOMP) $(CCOMPFLAGS) -c -o $@ $*.c @@ -80,7 +68,7 @@ struct%.o: CCOMPFLAGS += -fstruct-passing -dclight $(CC) $(CFLAGS) -c -o $@ $*.c %_compcert.s: %.c - $(CCOMP) -S -o $@ $*.c + $(CCOMP) $(CCOMPFLAGS) -S -o $@ $*.c %_cc.s: %.c $(CC) $(CFLAGS) -S -o $@ $*.c @@ -109,3 +97,14 @@ staticlayout.cc: staticlayout.c layout.h clean:: rm -f *.[os] *.compcert *.cc2compcert *.compcert2cc *.light.c +test: + @set -e; for t in $(TESTS); do \ + SIMU='$(SIMU)' ARCH=$(ARCH) MODEL=$(MODEL) ABI=$(ABI) SYSTEM=$(SYSTEM) ./Runtest $$t; \ + done + @set -e; for t in layout staticlayout; do \ + $(SIMU) ./$$t.compcert > _compcert.log; \ + $(SIMU) ./$$t.cc > _cc.log; \ + if diff -a -u _cc.log _compcert.log; \ + then echo "$$t: CompCert and $CC agree"; rm _*.log; \ + else echo "$$t: CompCert and $CC DISAGREE"; exit 2; fi; \ + done diff --git a/test/compression/Makefile b/test/compression/Makefile index ff7032d5..330c33af 100644 --- a/test/compression/Makefile +++ b/test/compression/Makefile @@ -3,7 +3,11 @@ include ../../Makefile.config SIMU=timeout --signal=SIGKILL 20s $(EXECUTE) CC=../../ccomp -CFLAGS=$(CCOMPOPTS) -U__GNUC__ -stdlib ../../runtime -dclight -dasm +ifeq ($(OCAML_LM_PROF),true) + override CCOMPOPTS+=-S +endif + +CFLAGS:=$(CCOMPOPTS) -U__GNUC__ -stdlib ../../runtime -dclight -dasm LIBS= TIME=ocaml unix.cma ../../tools/xtime.ml -mintime 2.0 -minruns 2 @@ -13,6 +17,8 @@ COMMON_OBJS=optlist.o bitfile.o all: $(EXE) +all_s: all + ARCODE_OBJS=$(COMMON_OBJS) arcode.o armain.o arcode: $(ARCODE_OBJS) diff --git a/test/raytracer/Makefile b/test/raytracer/Makefile index 24461bd1..c59bb21e 100644 --- a/test/raytracer/Makefile +++ b/test/raytracer/Makefile @@ -1,6 +1,10 @@ include ../../Makefile.config CC=../../ccomp +ifeq ($(OCAML_LM_PROF), true) + override CCOMPOPTS+=-S +endif + CFLAGS=$(CCOMPOPTS) -stdlib ../../runtime -dparse -dclight -dasm -fstruct-return LIBS=$(LIBMATH) TIME=ocaml unix.cma ../../tools/xtime.ml -mintime 2.0 -minruns 4 @@ -11,6 +15,8 @@ OBJS=memory.o gmllexer.o gmlparser.o eval.o \ all: render +all_s: all + render: $(OBJS) $(CC) $(CFLAGS) -o render $(OBJS) $(LIBS) diff --git a/test/spass/Makefile b/test/spass/Makefile index d512ea95..7b9b4f65 100644 --- a/test/spass/Makefile +++ b/test/spass/Makefile @@ -1,6 +1,10 @@ include ../../Makefile.config CC=../../ccomp +ifeq ($(OCAML_LM_PROF), true) + override CCOMPOPTS+=-S +endif + CFLAGS=$(CCOMPOPTS) -stdlib ../../runtime -dparse -dclight -dasm -fstruct-return SRCS=analyze.c clause.c clock.c closure.c cnf.c component.c \ @@ -14,6 +18,8 @@ SRCS=analyze.c clause.c clock.c closure.c cnf.c component.c \ all: spass +all_s: all + spass: $(SRCS:.c=.o) $(CC) $(CFLAGS) -o spass $(SRCS:.c=.o) $(LIBMATH) -- cgit From 1749be90299633474d2acad39ec8c70df6ccab32 Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Wed, 5 Jan 2022 15:32:46 +0100 Subject: update from BTL dev branch --- OPTIM_PROFILING.md | 34 +++++ PROFILING.md | 34 ----- README.md | 4 +- README_Kalray.md | 37 ----- ccomp_profiling/CCOMP_PROFILING.md | 162 +++++++++++++++++++++ .../old_script_with_perf/perf_append.sh | 72 +++++++++ ccomp_profiling/prof_ci_tests.sh | 31 ++++ ccomp_profiling/prof_function.sh | 47 ++++++ ccomp_profiling/prof_multiple_ccomp.sh | 19 +++ ccomp_profiling/stats_prof_info.py | 21 +++ 10 files changed, 389 insertions(+), 72 deletions(-) create mode 100644 OPTIM_PROFILING.md delete mode 100644 PROFILING.md delete mode 100644 README_Kalray.md create mode 100644 ccomp_profiling/CCOMP_PROFILING.md create mode 100755 ccomp_profiling/old_script_with_perf/perf_append.sh create mode 100755 ccomp_profiling/prof_ci_tests.sh create mode 100755 ccomp_profiling/prof_function.sh create mode 100755 ccomp_profiling/prof_multiple_ccomp.sh create mode 100755 ccomp_profiling/stats_prof_info.py diff --git a/OPTIM_PROFILING.md b/OPTIM_PROFILING.md new file mode 100644 index 00000000..8eb8c585 --- /dev/null +++ b/OPTIM_PROFILING.md @@ -0,0 +1,34 @@ +This version of CompCert includes a profiling system. It tells CompCert's optimization phases for each conditional branch instruction which of the two branches was more frequently taken. This system is not available for all combinations of target architecture and operating system; see below. + +For using this profiling system one has to +1. Compile a special version of the program that will count, for each branch, the number of times it was taken, and recording this information to a file. +2. Execute this special version on representative examples. It will record the frequencies of execution of branches to a log file. +3. Recompile the program, telling CompCert to use the information in the log file. + +This system does not use the same formats as gcc's gcov profiles, since it depends heavily on compiler internals. It seems however possible to profile and optimize programs consisting of modules compiled with gcc and CompCert by using both system simultaneously: compiler uses separate log files. + +To compile the special version that logs frequencies to files, use the option `-fprofile-arcs`. This option has to be specified at compile time but is not needed at link time (however, a reminder: if you link using another compiled than CompCert, you need to link against `libcompcert.a`). You may mix object files compiled with and without this option. + +This version may experience significant slowdown compared to normally compiled code, so do not use `-fprofile-arcs` for production code. + +At the end of execution of the program, frequency information will be logged to a file whose default name is `compcert_profiling.dat` (in the current directory). Another name may be used by specifying it using the `COMPCERT_PROFILING_DATA` environment variable. If this variable contains an empty string, no logging is done (but the slowdown still applies). + +Data are appended to the log file, never deleted, so it is safe to run the program several times on several test cases to accumulate data. + +Depending on the platform, this logging system is or is not thread-safe and is or is not compatible with position-independent code (PIC). In non thread-safe configurations, if two different execution threads execute code to be profiled, the profiling counters may end up with incorrect values. + +| Target platform | Available? | Thread-safe | PIC | +|-----------------|------------|-------------|-----| +| AArch64 | Yes | Yes | No | +| ARM | Yes | No | No | +| IA32 | Yes | No | No | +| KVX | Yes | Yes | No | +| PowerPC | No | | | +| PowerPC 64 | No | | | +| Risc-V 32 | No | | | +| Risc-V 64 | No | | | +| x86-64 | Yes | Yes | Yes | + +For recompiling the program using profiling information, use `-fprofile-use= compcert_profiling.dat -ftracelinearize` (substitute the appropriate filename for `compcert_profiling.dat` if needed). Experiments show performance improvement on KVX, not on other platforms. + +The same options (except for `-fprofile-use=` and `-fprofile-arcs`) should be used to compile the logging and optimized versions of the program: only functions that are exactly the same in the intermediate representation will be optimized according to profiling information. diff --git a/PROFILING.md b/PROFILING.md deleted file mode 100644 index 8eb8c585..00000000 --- a/PROFILING.md +++ /dev/null @@ -1,34 +0,0 @@ -This version of CompCert includes a profiling system. It tells CompCert's optimization phases for each conditional branch instruction which of the two branches was more frequently taken. This system is not available for all combinations of target architecture and operating system; see below. - -For using this profiling system one has to -1. Compile a special version of the program that will count, for each branch, the number of times it was taken, and recording this information to a file. -2. Execute this special version on representative examples. It will record the frequencies of execution of branches to a log file. -3. Recompile the program, telling CompCert to use the information in the log file. - -This system does not use the same formats as gcc's gcov profiles, since it depends heavily on compiler internals. It seems however possible to profile and optimize programs consisting of modules compiled with gcc and CompCert by using both system simultaneously: compiler uses separate log files. - -To compile the special version that logs frequencies to files, use the option `-fprofile-arcs`. This option has to be specified at compile time but is not needed at link time (however, a reminder: if you link using another compiled than CompCert, you need to link against `libcompcert.a`). You may mix object files compiled with and without this option. - -This version may experience significant slowdown compared to normally compiled code, so do not use `-fprofile-arcs` for production code. - -At the end of execution of the program, frequency information will be logged to a file whose default name is `compcert_profiling.dat` (in the current directory). Another name may be used by specifying it using the `COMPCERT_PROFILING_DATA` environment variable. If this variable contains an empty string, no logging is done (but the slowdown still applies). - -Data are appended to the log file, never deleted, so it is safe to run the program several times on several test cases to accumulate data. - -Depending on the platform, this logging system is or is not thread-safe and is or is not compatible with position-independent code (PIC). In non thread-safe configurations, if two different execution threads execute code to be profiled, the profiling counters may end up with incorrect values. - -| Target platform | Available? | Thread-safe | PIC | -|-----------------|------------|-------------|-----| -| AArch64 | Yes | Yes | No | -| ARM | Yes | No | No | -| IA32 | Yes | No | No | -| KVX | Yes | Yes | No | -| PowerPC | No | | | -| PowerPC 64 | No | | | -| Risc-V 32 | No | | | -| Risc-V 64 | No | | | -| x86-64 | Yes | Yes | Yes | - -For recompiling the program using profiling information, use `-fprofile-use= compcert_profiling.dat -ftracelinearize` (substitute the appropriate filename for `compcert_profiling.dat` if needed). Experiments show performance improvement on KVX, not on other platforms. - -The same options (except for `-fprofile-use=` and `-fprofile-arcs`) should be used to compile the logging and optimized versions of the program: only functions that are exactly the same in the intermediate representation will be optimized according to profiling information. diff --git a/README.md b/README.md index c99d4a67..d32ce0e1 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,10 @@ This is a special version with additions from Verimag and Kalray : * A generic prepass scheduling optimizer with a multi-purpose preprocessing front-end: rewritings, register renaming, if-lifting and some generic code transformations such as loop-rotation, loop-unrolling, or tail-duplication. -* A profiling system: see [`PROFILING.md`](PROFILING.md) for details. +* A profiling system: see [`OPTIM_PROFILING.md`](OPTIM_PROFILING.md) for details. * Static branch prediction. + _The branch prediction is basic, it annotates each `Icond` node by an `option bool`. A `Some true` annotation indicates we predict the branch will be taken. `Some false` indicates the fallthrough case is predicted. `None` indicates we could not predict anything, and are not sure about which control will be preferred._ +* `-ftracelinearize` uses the branch prediction information to linearize LTL basic blocks in a slightly better way (in the `Linearize` phase). * And some experimental features that are work in progress. _Please refer to the resources listed below for more information._ diff --git a/README_Kalray.md b/README_Kalray.md deleted file mode 100644 index 86c49ad1..00000000 --- a/README_Kalray.md +++ /dev/null @@ -1,37 +0,0 @@ -# CompCert Kalray port -The verified C compiler ported to Kalray. - -## Features - -This delivery contains (in addition to features from CompCert master branch): -- A fully functional port of CompCert to Coolidge kvx VLIW core -- Postpass scheduling optimization, only for kvx. Activated by default, it can be deactivated with the compiler flag `-fno-postpass` -- Some experimental features that are work in progress: - - Slightly better subexpression eliminations, called CSE2 and CSE3. Both go through loops and feature a small alias analysis. - - `-fduplicate 0` to activate static branch prediction information. The branch prediction is basic, it annotates each `Icond` node by an `option bool`. A `Some true` annotation indicates we predict the branch will be taken. `Some false` indicates the fallthrough case is predicted. `None` indicates we could not predict anything, and are not sure about which control will be preferred. - - It is also possible to provide a number to perform tail duplication: `-fduplicate 5` will tail duplicate, stopping when more than 5 RTL instructions have been duplicated. This feature offers very variable performance (from -20% up to +20%) because of variations in the later register allocation phase that impacts the postpass scheduling. We intend to work on fine tuning the tail duplication phase once we have the prepass superblock scheduling. - - `-ftracelinearize` uses the branch prediction information to linearize LTL basic blocks in a slightly better way (in the `Linearize` phase). - -## Installing - -Please follow the instructions in `INSTALL.md` - -## Documentation of the Coq sources - -The documentation is available [online](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx). -You may also generate it locally from `make documentation` (after installation via `INSTALL.md`): the entry-point is in `doc/index-kvx.html`. - -## Testing - -We modified most of the CompCert tests of the `c` folder in order for them to be executable in reasonable time by the simulator. - -To pass the testsuite, first, build and install CompCert using the instructions in `INSTALL.md`, then: -``` -cd test/c -make -make test -``` - -The reference files were generated using `kvx-cos-gcc -O1`. - -We also have our own tests in `test/kvx/` - to run them, execute the script `simucheck.sh` located in that folder. These consist in comparing `compcert` output to `kvx-cos-gcc` output. diff --git a/ccomp_profiling/CCOMP_PROFILING.md b/ccomp_profiling/CCOMP_PROFILING.md new file mode 100644 index 00000000..26a46584 --- /dev/null +++ b/ccomp_profiling/CCOMP_PROFILING.md @@ -0,0 +1,162 @@ +# Profiling the CompCert compiler + +This documentation is about the profiling of the compiler itself, not about using programs' +profiling information to improve optimizations. For the latter, please refer to +[OPTIM_PROFILING](../OPTIM_PROFILING.md). It is possible to compile CompCert using the `Landmarks` +profiling library, in order to measure the compiler performance, and to find in which functions the +program spends most of its time. + +## Landmarks + +First, install [Landmarks](https://github.com/LexiFi/landmarks) on your computer with: +``` +opam install landmarks-ppx +``` + +The Landmark library can either automatically instrument the code using a preprocessor extention +(-ppx) or work with a manually instrumented code (see the online documentation). + +In our case, we prefer to use the automatic instrumentation, which will automatically instrument +top-level functions in each module. + +## Cleaning and building CompCert with the profiling lib + +First, we need to clean up our installation, in order to rebuild CompCert with a special Make +variable indicating that we want to use the profiling library, and to automatically instrument the +ocaml code. + +``` +make distclean +./config_.sh +``` + +Then, edit the file `Makefile.config` and change the `OCAML_LM_PROF` variable to `true`. After that, +we can follow the classical comilation process: + +``` +make -jN +``` + +## Profile CompCert on a specific input file + +To manually extract profiling information about CompCert, on a choosen source file, one only have to +set an environment variable as in this example: + +``` +OCAML_LANDMARKS=on,format=json,output=temporary:,time ./ccomp -S .c +``` + +The `OCAML_LANDMARKS` variable will be read at runtime. Below are some explanations about the +arguments: + +The arguments are of the form `arg[=option]` where, for flags without sub-arguments, the text in +brackets is ommitted. +The `on` option is just to turn on profiling, but can be replaced by `off` to disable it. +The `format=json` tells the profiling lib to output data as a json, but it may be replaced by +`textual` to output data in a tty style format. +The `output=temporary:` tells the profiling to store the results in ``, but +it's also possible to use `stderr` or `stdout`. +Using a temporary dir is handy because files will get a generated (unique) name, and as the json +contains a label with the name of the source program, there is no loss of information. +The `temporary` option is useful when writing automated scripts. +Finally, the `time` option is used to record system time (in seconds) of execution in addition to +the number of clock ticks. + +## Extract the profiling ratios for some function(s) + +Using the above command, you can get a profiling database for any program compiled with CompCert. +Now, to examine the time spent in one or more function, compared to the total time, one can use the +`prof_function.sh` script. + +``` +./prof_function.sh jq +``` + +Call the script using the above template, where `jq` is a json parser available on every linux +distribution (it needs to be installed). The second argument is the path of the json file, and the +third is a list of function to monitor. For instance, in the `RTLpath` version of CompCert (on the +CPP_2022 branch), I use `function_equiv_checker;WF.*function_checker;Live.*function_checker`. + +The above list contains three elements (regexp, separated by ";"): +- the first is the name of the SE checker +- the second is the wellformed liveness checker (for pre-output-regs) +- the third is the "normal" liveness checker + +When calling the script using this list of functions, and a json file obtained with the above +method, the script will output the following result: + +``` +./prof_function.sh jq ../Compcert_one/csv_tmp/profile_at_exitf1e257.tmp "function_equiv_checker;WF.*function_checker;Live.*function_checker" +total_cycles,total_time +55433885580.00000000000000000000,15.36316000000000000017 +sum_fct_cycles,sum_fct_time +802227564.00000000000000000000,.22260400000000000001 +test_name,ratio_cycles,ratio_time +"../../ccomp -S -stdlib ../../runtime -dparse -dclight -dasm -fstruct-return -c -o symbol.o symbol.c",1.44717902345534985300,1.44894670106931126100 +``` + +Description of the above output (line by line): +The first line is the shell command used to call the script, the lines 2 and 3 are giving the total +number of cycles and the total system time used by CompCert when compiling the file. +The lines 4 and 5 represent those numbers (cycles, system time) but for the sum of the functions +taken as arguments (so in our example, the 3 checkers used in `RTLpath`). +Finally, lines 6 and 7 are providing the launched command (here to compile `symbol.c`, a CI test), +and the ratios for the number of cycles and system time by comparing the previous sum with the +totals. + +## Profile Using the Continuous Integration tests + +As testing with only one source file is not representative, there is a script that can launch the +above process for every test in the CI: `prof_ci_tests.sh`. The latter takes two arguments: the +CompCert repo clone root directory (absolute path) and the list of functions, as above. + +It will create a directory `json_tmp` containing the jsons files for each CI test, and a file +prof.csv, containing an array of the output (the last line - ratios - only) of `prof_function.sh` +for each test. + +## Profile multiple versions of CompCert with different functions + +To compute results using different CompCert version, use the script `prof_multiple_ccomp.sh`, after +editing the variables it contains. Using this script will call the `prof_ci_tests.sh` script on each +CompCert version specified, with the list of functions given for these version. + +## Merging results obtained with `prof_multiple_ccomp.sh` + +Gather every `prof.csv` (one for each profiled version) in the same place (by default, they are +created in the CompCert version root directory), and launch the `stats_prof_info.py` script (you +need to have the `pandas` module, from pip). The script takes as arguments the path of each json +file. Then, it will merge the csv (keeping the same order as args) into one final csv. The merge is +an inner join, so the tests not available in every version are deleted. The script also clean lines +full of zeros, that can occurs when the CI compile object files, for instance. + +Once you get the resulting file, you can simply open it with libre office to average each column and +get a precise idea concerning the "weight" of the given function for each CompCert version. + +## Example + +I created those scripts to observe the performance difference between the verifier(s) of `RTLpath`, +`BTL` with a separated liveness checker (first version) and `BTL` with liveness check embedded in +symbolic execution (last version). +The first `BTL` version is named "BTL OLD" and the last "BTL NEW". +The collected data is accessible [here](https://gricad-gitlab.univ-grenoble-alpes.fr/gourdinl/verimag_pres/-/tree/master/benches/results/ccomp_profiling/btl_old_new_vs_rtlpath) as a tar gzip archive. +In the folder, filenames are prefixed with a number: +- "one" corresponds to RTLpath +- "two" corresponds to BTL NEW +- "three" corresponds to BTL OLD + +The file `merged.csv` in the folder is the resulting file after merging. I have manually modified +it, so the first line gives information on the CompCert version (column headers), and the last line +gives averages of each column. + +The fact that, for a CompCert version, ratios of the cycles and time columns are close to each +other, is a good sign: if ratios are similar, the measure is precise. + +We can conclude the following: +- For `RTLpath`: the three verifiers combined are (on average) responsible for about 2.11% of the + total compilation time. +- For `BTL NEW`: the unique verifier is taking about 1.14%, which is the lower (thus best) execution + time. +- For `BTL OLD`: the two verifiers combined are responsible for about 2.17%. + +Hence, the old version of `BTL` was sligthly slower than `RTLpath`, but the new one is two times +faster! diff --git a/ccomp_profiling/old_script_with_perf/perf_append.sh b/ccomp_profiling/old_script_with_perf/perf_append.sh new file mode 100755 index 00000000..4b86a99e --- /dev/null +++ b/ccomp_profiling/old_script_with_perf/perf_append.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +CSV=perfs.csv +TDIR=test/monniaux/yarpgen +CCOMP_ROOT_BTL_NEW=/home/yuki/Work/VERIMAG/Compcert_two +CCOMP_ROOT_BTL_OLD=/home/yuki/Work/VERIMAG/Compcert_three +CCOMP_ROOT_RTL=/home/yuki/Work/VERIMAG/Compcert_one +FILTER_BTL_NEW=simu_check_single_1421 +FILTER_BTL_OLD_LIVE=liveness_checker_720 +FILTER_BTL_OLD_SB=simu_check_1445 +FILTER_RTL_LIVE=list_path_checker_706 +FILTER_RTL_SB=function_equiv_checker_235 + +clean_n_build () { + cd $CCOMP_ROOT_BTL_NEW/$TDIR + rm perf.*.data + make clean && make tests_c tests_s + cd $CCOMP_ROOT_BTL_OLD/$TDIR + rm perf.*.data + make clean && make tests_c tests_s + cd $CCOMP_ROOT_RTL/$TDIR + rm perf.*.data + make clean && make tests_c tests_s + echo "file,btl_new ($FILTER_BTL_NEW),btl_old ($FILTER_BTL_OLD_LIVE + $FILTER_BTL_OLD_SB),rtl ($FILTER_RTL_LIVE + $FILTER_RTL_SB)" > $CSV +} + +check_result () { + if [[ $1 ]]; then + echo "$1" + else + echo "0" + fi +} + +awk_filter () { + res=$(echo "$1" | awk -F "," '/%*ccomp/ { print substr($1,2,length($1)-4) }') + check_result $res +} + +perf_btl_new () { + cd $CCOMP_ROOT_BTL_NEW/$TDIR + perc_btl_new=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_BTL_NEW -i $1)") +} + +perf_btl_old () { + cd $CCOMP_ROOT_BTL_OLD/$TDIR + btl_live=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_BTL_OLD_LIVE -i $1)") + btl_sb=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_BTL_OLD_SB -i $1)") + echo "btl_old: $btl_live + $btl_sb" + perc_btl_old=$(bc <<< "$btl_live + $btl_sb") +} + +perf_rtl () { + cd $CCOMP_ROOT_RTL/$TDIR + rtl_live=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_RTL_LIVE -i $1)") + rtl_sb=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_RTL_SB -i $1)") + echo "rtl_old: $rtl_live + $rtl_sb" + perc_rtl=$(bc <<< "$rtl_live + $rtl_sb") +} + +rdir=$(pwd) +clean_n_build +for file in $CCOMP_ROOT_RTL/$TDIR/perf.*; do + bfile=$(basename $file) + perf_btl_new "$bfile" + perf_btl_old "$bfile" + perf_rtl "$bfile" + cd $rdir + echo "$file, $perc_btl_new, $perc_btl_old, $perc_rtl" + echo "$file, $perc_btl_new, $perc_btl_old, $perc_rtl" >> $CSV +done + diff --git a/ccomp_profiling/prof_ci_tests.sh b/ccomp_profiling/prof_ci_tests.sh new file mode 100755 index 00000000..9af2428a --- /dev/null +++ b/ccomp_profiling/prof_ci_tests.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# @author gourdinl +# Script to compute profiling results as csv from CI tests + +if [ $# -ne 2 ]; then + echo "Usage: ./prof_ci_tests.sh " + echo $# + exit 1 +fi + +RDIR=$1 +RESULT=prof.csv +JQ=jq +CCOMP=$RDIR/ccomp +TMP=$RDIR/json_tmp +PROF_FCT=/home/yuki/Work/VERIMAG/Compcert_two/ccomp_profiling/prof_function.sh +LANDMARKSFLAGS="on,format=json,output=temporary:$TMP,time" +FCTS=$2 + +cd $RDIR +rm -rf $TMP +mkdir $TMP +make -C test clean +export OCAML_LANDMARKS=$LANDMARKSFLAGS +make -j4 -C test all_s +echo "test_name,ratio_cycles,ratio_time" > $RESULT +for file in $TMP/*; do + echo "Computing profiling results for $file" + RATIOS=$($PROF_FCT $JQ $file $FCTS | tail -n 1) + echo $RATIOS >> $RESULT +done diff --git a/ccomp_profiling/prof_function.sh b/ccomp_profiling/prof_function.sh new file mode 100755 index 00000000..0582a6ec --- /dev/null +++ b/ccomp_profiling/prof_function.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# @author gourdinl +# Script to profile several CompCert functions +# Args: $1=jq program; $2=json file; $3=list of functions + +if [ $# -ne 3 ]; then + echo "Usage: ./prof_function.sh " + echo $# + exit 1 +fi + +JQ=$1 +JSON=$2 +FCTS=$3 + +convert_to_bc () { + printf "%.20f" $1 +} + +test_name=$($JQ '.label' $JSON) +total_cycles=$(convert_to_bc $($JQ '.nodes | .[] | select(.kind=="root") | .time' $JSON)) +total_time=$(convert_to_bc $($JQ '.nodes | .[] | select(.kind=="root") | .sys_time' $JSON)) + +IFS=';' +read -ra arr_FCTS <<< "$FCTS" +sum_fct_time=0 +sum_fct_cycles=0 +for fct in ${arr_FCTS[@]}; do + echo $fct + fct_cycles=$(convert_to_bc $($JQ '.nodes | .[] | select(.name | test("'$fct'")) | .time' $JSON)) + fct_time=$(convert_to_bc $($JQ '.nodes | .[] | select(.name | test("'$fct'")) | .sys_time' $JSON)) + sum_fct_time=$(bc -l <<< "$sum_fct_time + $fct_time") + sum_fct_cycles=$(bc -l <<< "$sum_fct_cycles + $fct_cycles") +done + +echo "total_cycles,total_time" +echo "$total_cycles,$total_time" +echo "sum_fct_cycles,sum_fct_time" +echo "$sum_fct_cycles,$sum_fct_time" +if (( $(bc -l <<< "$sum_fct_cycles > 0") )) && (( $(bc -l <<< "$sum_fct_time > 0") )); then + ratio_cycles=$(bc -l <<< "($sum_fct_cycles / $total_cycles) * 100") + ratio_time=$(bc -l <<< "($sum_fct_time / $total_time) * 100") + echo "test_name,ratio_cycles,ratio_time" + echo "$test_name,$ratio_cycles,$ratio_time" +else + echo "$test_name,0,0" +fi diff --git a/ccomp_profiling/prof_multiple_ccomp.sh b/ccomp_profiling/prof_multiple_ccomp.sh new file mode 100755 index 00000000..77a8bef7 --- /dev/null +++ b/ccomp_profiling/prof_multiple_ccomp.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# @author gourdinl +# Script to compute profiling results for multiple version of CompCert + +CCOMPS=( + "/home/yuki/Work/VERIMAG/Compcert_one" + "/home/yuki/Work/VERIMAG/Compcert_two" + "/home/yuki/Work/VERIMAG/Compcert_three" +) + +FCTS=( + '.*function_equiv_checker$;.*WF.*function_checker$;.*Live.*function_checker$' + '.*check_symbolic_simu$' + '.*check_symbolic_simu$;.*liveness_checker$' +) + +for ((i = 0; i < ${#CCOMPS[@]}; i++)); do + ./prof_ci_tests.sh ${CCOMPS[$i]} ${FCTS[$i]} +done diff --git a/ccomp_profiling/stats_prof_info.py b/ccomp_profiling/stats_prof_info.py new file mode 100755 index 00000000..dbc50996 --- /dev/null +++ b/ccomp_profiling/stats_prof_info.py @@ -0,0 +1,21 @@ +#!/bin/python + +import sys +import pandas as pd + +# Reading and merging csv files +df = pd.read_csv(sys.argv[1]) +df.rename(columns = {'ratio_cycles':'ratio_cycles1', 'ratio_time':'ratio_time1'}, inplace = True) +for i in range(2, len(sys.argv)): + sdf = pd.read_csv(sys.argv[i]) + sdf.rename(columns = {'ratio_cycles':('ratio_cycles'+str(i)), 'ratio_time':('ratio_time'+str(i))}, inplace = True) + df = df.merge(sdf, on="test_name", how="inner") + +indices=[] +for idx, row in df.iterrows(): + brow = row[1:].map(lambda x: x==0) + if brow.all(): + indices.append(idx) +df.drop(indices, inplace=True) + +df.to_csv("merged.csv") -- cgit