From 1749be90299633474d2acad39ec8c70df6ccab32 Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Wed, 5 Jan 2022 15:32:46 +0100 Subject: update from BTL dev branch --- ccomp_profiling/CCOMP_PROFILING.md | 162 +++++++++++++++++++++ .../old_script_with_perf/perf_append.sh | 72 +++++++++ ccomp_profiling/prof_ci_tests.sh | 31 ++++ ccomp_profiling/prof_function.sh | 47 ++++++ ccomp_profiling/prof_multiple_ccomp.sh | 19 +++ ccomp_profiling/stats_prof_info.py | 21 +++ 6 files changed, 352 insertions(+) create mode 100644 ccomp_profiling/CCOMP_PROFILING.md create mode 100755 ccomp_profiling/old_script_with_perf/perf_append.sh create mode 100755 ccomp_profiling/prof_ci_tests.sh create mode 100755 ccomp_profiling/prof_function.sh create mode 100755 ccomp_profiling/prof_multiple_ccomp.sh create mode 100755 ccomp_profiling/stats_prof_info.py (limited to 'ccomp_profiling') diff --git a/ccomp_profiling/CCOMP_PROFILING.md b/ccomp_profiling/CCOMP_PROFILING.md new file mode 100644 index 00000000..26a46584 --- /dev/null +++ b/ccomp_profiling/CCOMP_PROFILING.md @@ -0,0 +1,162 @@ +# Profiling the CompCert compiler + +This documentation is about the profiling of the compiler itself, not about using programs' +profiling information to improve optimizations. For the latter, please refer to +[OPTIM_PROFILING](../OPTIM_PROFILING.md). It is possible to compile CompCert using the `Landmarks` +profiling library, in order to measure the compiler performance, and to find in which functions the +program spends most of its time. + +## Landmarks + +First, install [Landmarks](https://github.com/LexiFi/landmarks) on your computer with: +``` +opam install landmarks-ppx +``` + +The Landmark library can either automatically instrument the code using a preprocessor extention +(-ppx) or work with a manually instrumented code (see the online documentation). + +In our case, we prefer to use the automatic instrumentation, which will automatically instrument +top-level functions in each module. + +## Cleaning and building CompCert with the profiling lib + +First, we need to clean up our installation, in order to rebuild CompCert with a special Make +variable indicating that we want to use the profiling library, and to automatically instrument the +ocaml code. + +``` +make distclean +./config_.sh +``` + +Then, edit the file `Makefile.config` and change the `OCAML_LM_PROF` variable to `true`. After that, +we can follow the classical comilation process: + +``` +make -jN +``` + +## Profile CompCert on a specific input file + +To manually extract profiling information about CompCert, on a choosen source file, one only have to +set an environment variable as in this example: + +``` +OCAML_LANDMARKS=on,format=json,output=temporary:,time ./ccomp -S .c +``` + +The `OCAML_LANDMARKS` variable will be read at runtime. Below are some explanations about the +arguments: + +The arguments are of the form `arg[=option]` where, for flags without sub-arguments, the text in +brackets is ommitted. +The `on` option is just to turn on profiling, but can be replaced by `off` to disable it. +The `format=json` tells the profiling lib to output data as a json, but it may be replaced by +`textual` to output data in a tty style format. +The `output=temporary:` tells the profiling to store the results in ``, but +it's also possible to use `stderr` or `stdout`. +Using a temporary dir is handy because files will get a generated (unique) name, and as the json +contains a label with the name of the source program, there is no loss of information. +The `temporary` option is useful when writing automated scripts. +Finally, the `time` option is used to record system time (in seconds) of execution in addition to +the number of clock ticks. + +## Extract the profiling ratios for some function(s) + +Using the above command, you can get a profiling database for any program compiled with CompCert. +Now, to examine the time spent in one or more function, compared to the total time, one can use the +`prof_function.sh` script. + +``` +./prof_function.sh jq +``` + +Call the script using the above template, where `jq` is a json parser available on every linux +distribution (it needs to be installed). The second argument is the path of the json file, and the +third is a list of function to monitor. For instance, in the `RTLpath` version of CompCert (on the +CPP_2022 branch), I use `function_equiv_checker;WF.*function_checker;Live.*function_checker`. + +The above list contains three elements (regexp, separated by ";"): +- the first is the name of the SE checker +- the second is the wellformed liveness checker (for pre-output-regs) +- the third is the "normal" liveness checker + +When calling the script using this list of functions, and a json file obtained with the above +method, the script will output the following result: + +``` +./prof_function.sh jq ../Compcert_one/csv_tmp/profile_at_exitf1e257.tmp "function_equiv_checker;WF.*function_checker;Live.*function_checker" +total_cycles,total_time +55433885580.00000000000000000000,15.36316000000000000017 +sum_fct_cycles,sum_fct_time +802227564.00000000000000000000,.22260400000000000001 +test_name,ratio_cycles,ratio_time +"../../ccomp -S -stdlib ../../runtime -dparse -dclight -dasm -fstruct-return -c -o symbol.o symbol.c",1.44717902345534985300,1.44894670106931126100 +``` + +Description of the above output (line by line): +The first line is the shell command used to call the script, the lines 2 and 3 are giving the total +number of cycles and the total system time used by CompCert when compiling the file. +The lines 4 and 5 represent those numbers (cycles, system time) but for the sum of the functions +taken as arguments (so in our example, the 3 checkers used in `RTLpath`). +Finally, lines 6 and 7 are providing the launched command (here to compile `symbol.c`, a CI test), +and the ratios for the number of cycles and system time by comparing the previous sum with the +totals. + +## Profile Using the Continuous Integration tests + +As testing with only one source file is not representative, there is a script that can launch the +above process for every test in the CI: `prof_ci_tests.sh`. The latter takes two arguments: the +CompCert repo clone root directory (absolute path) and the list of functions, as above. + +It will create a directory `json_tmp` containing the jsons files for each CI test, and a file +prof.csv, containing an array of the output (the last line - ratios - only) of `prof_function.sh` +for each test. + +## Profile multiple versions of CompCert with different functions + +To compute results using different CompCert version, use the script `prof_multiple_ccomp.sh`, after +editing the variables it contains. Using this script will call the `prof_ci_tests.sh` script on each +CompCert version specified, with the list of functions given for these version. + +## Merging results obtained with `prof_multiple_ccomp.sh` + +Gather every `prof.csv` (one for each profiled version) in the same place (by default, they are +created in the CompCert version root directory), and launch the `stats_prof_info.py` script (you +need to have the `pandas` module, from pip). The script takes as arguments the path of each json +file. Then, it will merge the csv (keeping the same order as args) into one final csv. The merge is +an inner join, so the tests not available in every version are deleted. The script also clean lines +full of zeros, that can occurs when the CI compile object files, for instance. + +Once you get the resulting file, you can simply open it with libre office to average each column and +get a precise idea concerning the "weight" of the given function for each CompCert version. + +## Example + +I created those scripts to observe the performance difference between the verifier(s) of `RTLpath`, +`BTL` with a separated liveness checker (first version) and `BTL` with liveness check embedded in +symbolic execution (last version). +The first `BTL` version is named "BTL OLD" and the last "BTL NEW". +The collected data is accessible [here](https://gricad-gitlab.univ-grenoble-alpes.fr/gourdinl/verimag_pres/-/tree/master/benches/results/ccomp_profiling/btl_old_new_vs_rtlpath) as a tar gzip archive. +In the folder, filenames are prefixed with a number: +- "one" corresponds to RTLpath +- "two" corresponds to BTL NEW +- "three" corresponds to BTL OLD + +The file `merged.csv` in the folder is the resulting file after merging. I have manually modified +it, so the first line gives information on the CompCert version (column headers), and the last line +gives averages of each column. + +The fact that, for a CompCert version, ratios of the cycles and time columns are close to each +other, is a good sign: if ratios are similar, the measure is precise. + +We can conclude the following: +- For `RTLpath`: the three verifiers combined are (on average) responsible for about 2.11% of the + total compilation time. +- For `BTL NEW`: the unique verifier is taking about 1.14%, which is the lower (thus best) execution + time. +- For `BTL OLD`: the two verifiers combined are responsible for about 2.17%. + +Hence, the old version of `BTL` was sligthly slower than `RTLpath`, but the new one is two times +faster! diff --git a/ccomp_profiling/old_script_with_perf/perf_append.sh b/ccomp_profiling/old_script_with_perf/perf_append.sh new file mode 100755 index 00000000..4b86a99e --- /dev/null +++ b/ccomp_profiling/old_script_with_perf/perf_append.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +CSV=perfs.csv +TDIR=test/monniaux/yarpgen +CCOMP_ROOT_BTL_NEW=/home/yuki/Work/VERIMAG/Compcert_two +CCOMP_ROOT_BTL_OLD=/home/yuki/Work/VERIMAG/Compcert_three +CCOMP_ROOT_RTL=/home/yuki/Work/VERIMAG/Compcert_one +FILTER_BTL_NEW=simu_check_single_1421 +FILTER_BTL_OLD_LIVE=liveness_checker_720 +FILTER_BTL_OLD_SB=simu_check_1445 +FILTER_RTL_LIVE=list_path_checker_706 +FILTER_RTL_SB=function_equiv_checker_235 + +clean_n_build () { + cd $CCOMP_ROOT_BTL_NEW/$TDIR + rm perf.*.data + make clean && make tests_c tests_s + cd $CCOMP_ROOT_BTL_OLD/$TDIR + rm perf.*.data + make clean && make tests_c tests_s + cd $CCOMP_ROOT_RTL/$TDIR + rm perf.*.data + make clean && make tests_c tests_s + echo "file,btl_new ($FILTER_BTL_NEW),btl_old ($FILTER_BTL_OLD_LIVE + $FILTER_BTL_OLD_SB),rtl ($FILTER_RTL_LIVE + $FILTER_RTL_SB)" > $CSV +} + +check_result () { + if [[ $1 ]]; then + echo "$1" + else + echo "0" + fi +} + +awk_filter () { + res=$(echo "$1" | awk -F "," '/%*ccomp/ { print substr($1,2,length($1)-4) }') + check_result $res +} + +perf_btl_new () { + cd $CCOMP_ROOT_BTL_NEW/$TDIR + perc_btl_new=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_BTL_NEW -i $1)") +} + +perf_btl_old () { + cd $CCOMP_ROOT_BTL_OLD/$TDIR + btl_live=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_BTL_OLD_LIVE -i $1)") + btl_sb=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_BTL_OLD_SB -i $1)") + echo "btl_old: $btl_live + $btl_sb" + perc_btl_old=$(bc <<< "$btl_live + $btl_sb") +} + +perf_rtl () { + cd $CCOMP_ROOT_RTL/$TDIR + rtl_live=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_RTL_LIVE -i $1)") + rtl_sb=$(awk_filter "$(perf report -t , --stdio --symbol-filter=$FILTER_RTL_SB -i $1)") + echo "rtl_old: $rtl_live + $rtl_sb" + perc_rtl=$(bc <<< "$rtl_live + $rtl_sb") +} + +rdir=$(pwd) +clean_n_build +for file in $CCOMP_ROOT_RTL/$TDIR/perf.*; do + bfile=$(basename $file) + perf_btl_new "$bfile" + perf_btl_old "$bfile" + perf_rtl "$bfile" + cd $rdir + echo "$file, $perc_btl_new, $perc_btl_old, $perc_rtl" + echo "$file, $perc_btl_new, $perc_btl_old, $perc_rtl" >> $CSV +done + diff --git a/ccomp_profiling/prof_ci_tests.sh b/ccomp_profiling/prof_ci_tests.sh new file mode 100755 index 00000000..9af2428a --- /dev/null +++ b/ccomp_profiling/prof_ci_tests.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# @author gourdinl +# Script to compute profiling results as csv from CI tests + +if [ $# -ne 2 ]; then + echo "Usage: ./prof_ci_tests.sh " + echo $# + exit 1 +fi + +RDIR=$1 +RESULT=prof.csv +JQ=jq +CCOMP=$RDIR/ccomp +TMP=$RDIR/json_tmp +PROF_FCT=/home/yuki/Work/VERIMAG/Compcert_two/ccomp_profiling/prof_function.sh +LANDMARKSFLAGS="on,format=json,output=temporary:$TMP,time" +FCTS=$2 + +cd $RDIR +rm -rf $TMP +mkdir $TMP +make -C test clean +export OCAML_LANDMARKS=$LANDMARKSFLAGS +make -j4 -C test all_s +echo "test_name,ratio_cycles,ratio_time" > $RESULT +for file in $TMP/*; do + echo "Computing profiling results for $file" + RATIOS=$($PROF_FCT $JQ $file $FCTS | tail -n 1) + echo $RATIOS >> $RESULT +done diff --git a/ccomp_profiling/prof_function.sh b/ccomp_profiling/prof_function.sh new file mode 100755 index 00000000..0582a6ec --- /dev/null +++ b/ccomp_profiling/prof_function.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# @author gourdinl +# Script to profile several CompCert functions +# Args: $1=jq program; $2=json file; $3=list of functions + +if [ $# -ne 3 ]; then + echo "Usage: ./prof_function.sh " + echo $# + exit 1 +fi + +JQ=$1 +JSON=$2 +FCTS=$3 + +convert_to_bc () { + printf "%.20f" $1 +} + +test_name=$($JQ '.label' $JSON) +total_cycles=$(convert_to_bc $($JQ '.nodes | .[] | select(.kind=="root") | .time' $JSON)) +total_time=$(convert_to_bc $($JQ '.nodes | .[] | select(.kind=="root") | .sys_time' $JSON)) + +IFS=';' +read -ra arr_FCTS <<< "$FCTS" +sum_fct_time=0 +sum_fct_cycles=0 +for fct in ${arr_FCTS[@]}; do + echo $fct + fct_cycles=$(convert_to_bc $($JQ '.nodes | .[] | select(.name | test("'$fct'")) | .time' $JSON)) + fct_time=$(convert_to_bc $($JQ '.nodes | .[] | select(.name | test("'$fct'")) | .sys_time' $JSON)) + sum_fct_time=$(bc -l <<< "$sum_fct_time + $fct_time") + sum_fct_cycles=$(bc -l <<< "$sum_fct_cycles + $fct_cycles") +done + +echo "total_cycles,total_time" +echo "$total_cycles,$total_time" +echo "sum_fct_cycles,sum_fct_time" +echo "$sum_fct_cycles,$sum_fct_time" +if (( $(bc -l <<< "$sum_fct_cycles > 0") )) && (( $(bc -l <<< "$sum_fct_time > 0") )); then + ratio_cycles=$(bc -l <<< "($sum_fct_cycles / $total_cycles) * 100") + ratio_time=$(bc -l <<< "($sum_fct_time / $total_time) * 100") + echo "test_name,ratio_cycles,ratio_time" + echo "$test_name,$ratio_cycles,$ratio_time" +else + echo "$test_name,0,0" +fi diff --git a/ccomp_profiling/prof_multiple_ccomp.sh b/ccomp_profiling/prof_multiple_ccomp.sh new file mode 100755 index 00000000..77a8bef7 --- /dev/null +++ b/ccomp_profiling/prof_multiple_ccomp.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# @author gourdinl +# Script to compute profiling results for multiple version of CompCert + +CCOMPS=( + "/home/yuki/Work/VERIMAG/Compcert_one" + "/home/yuki/Work/VERIMAG/Compcert_two" + "/home/yuki/Work/VERIMAG/Compcert_three" +) + +FCTS=( + '.*function_equiv_checker$;.*WF.*function_checker$;.*Live.*function_checker$' + '.*check_symbolic_simu$' + '.*check_symbolic_simu$;.*liveness_checker$' +) + +for ((i = 0; i < ${#CCOMPS[@]}; i++)); do + ./prof_ci_tests.sh ${CCOMPS[$i]} ${FCTS[$i]} +done diff --git a/ccomp_profiling/stats_prof_info.py b/ccomp_profiling/stats_prof_info.py new file mode 100755 index 00000000..dbc50996 --- /dev/null +++ b/ccomp_profiling/stats_prof_info.py @@ -0,0 +1,21 @@ +#!/bin/python + +import sys +import pandas as pd + +# Reading and merging csv files +df = pd.read_csv(sys.argv[1]) +df.rename(columns = {'ratio_cycles':'ratio_cycles1', 'ratio_time':'ratio_time1'}, inplace = True) +for i in range(2, len(sys.argv)): + sdf = pd.read_csv(sys.argv[i]) + sdf.rename(columns = {'ratio_cycles':('ratio_cycles'+str(i)), 'ratio_time':('ratio_time'+str(i))}, inplace = True) + df = df.merge(sdf, on="test_name", how="inner") + +indices=[] +for idx, row in df.iterrows(): + brow = row[1:].map(lambda x: x==0) + if brow.all(): + indices.append(idx) +df.drop(indices, inplace=True) + +df.to_csv("merged.csv") -- cgit