aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-04-12 19:23:28 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-04-12 19:23:28 +0200
commit9e00dd1645b6adcdb46739562cba0fc314ec3bed (patch)
tree9519a4d28224197b93d25dc55687322730757487
parent47ecb1ec8d00636d6e7bb4cebd21df5d0b9b5ac7 (diff)
parent9e7f5e5611c5b5281b74b075b4524aef7bc05437 (diff)
downloadcompcert-kvx-9e00dd1645b6adcdb46739562cba0fc314ec3bed.tar.gz
compcert-kvx-9e00dd1645b6adcdb46739562cba0fc314ec3bed.zip
Merge remote-tracking branch 'origin/mppa-profiling' into mppa-features
-rw-r--r--Makefile2
-rw-r--r--PROFILING.md34
-rw-r--r--aarch64/Asmexpand.ml2
-rw-r--r--aarch64/Machregs.v1
-rw-r--r--aarch64/TargetPrinter.ml41
-rw-r--r--arm/AsmToJSON.ml1
-rw-r--r--arm/Asmexpand.ml2
-rw-r--r--arm/Constantexpand.ml1
-rw-r--r--arm/Machregs.v1
-rw-r--r--arm/TargetPrinter.ml53
-rw-r--r--backend/CSE.v2
-rw-r--r--backend/CSEproof.v1
-rw-r--r--backend/PrintAsmaux.ml86
-rw-r--r--backend/Profiling.v65
-rw-r--r--backend/ProfilingExploit.v30
-rw-r--r--backend/ProfilingExploitproof.v224
-rw-r--r--backend/Profilingaux.ml71
-rw-r--r--backend/Profilingproof.v687
-rw-r--r--cfrontend/Cexec.v9
-rw-r--r--common/AST.v14
-rw-r--r--common/Events.v65
-rw-r--r--common/PrintAST.ml11
-rw-r--r--driver/Clflags.ml3
-rw-r--r--driver/Compiler.v44
-rw-r--r--driver/Compopts.v6
-rw-r--r--driver/Driver.ml10
-rw-r--r--extraction/extraction.v14
-rw-r--r--mppa_k1c/Asmexpand.ml1
-rw-r--r--mppa_k1c/Machregs.v1
-rw-r--r--mppa_k1c/TargetPrinter.ml30
-rw-r--r--powerpc/AsmToJSON.ml1
-rw-r--r--runtime/Makefile4
-rw-r--r--runtime/c/write_profiling_table.c58
-rw-r--r--test/monniaux/cycles.h16
-rw-r--r--test/monniaux/minisat/Makefile.on_marte16
-rw-r--r--test/monniaux/minisat/Makefile.profiled64
l---------test/monniaux/minisat/clock.c1
l---------test/monniaux/minisat/cycles.h1
-rw-r--r--test/monniaux/minisat/k1c.inline_50.log14
-rw-r--r--test/monniaux/minisat/solver.h5
-rw-r--r--test/monniaux/profiling/profiling_call.c27
-rw-r--r--x86/Asmexpand.ml2
-rw-r--r--x86/TargetPrinter.ml65
43 files changed, 1741 insertions, 45 deletions
diff --git a/Makefile b/Makefile
index 8aa5e98a..ee8a11bc 100644
--- a/Makefile
+++ b/Makefile
@@ -80,6 +80,8 @@ BACKEND=\
RTLgen.v RTLgenspec.v RTLgenproof.v \
Tailcall.v Tailcallproof.v \
Inlining.v Inliningspec.v Inliningproof.v \
+ Profiling.v Profilingproof.v \
+ ProfilingExploit.v ProfilingExploitproof.v \
Renumber.v Renumberproof.v \
Duplicate.v Duplicateproof.v \
RTLtyping.v \
diff --git a/PROFILING.md b/PROFILING.md
new file mode 100644
index 00000000..3f4cbc46
--- /dev/null
+++ b/PROFILING.md
@@ -0,0 +1,34 @@
+This version of CompCert includes a profiling system. It tells CompCert's optimization phases for each conditional branch instruction which of the two branches was more frequently taken. This system is not available for all combinations of target architecture and operating system; see below.
+
+For using this profiling system one has to
+1. Compile a special version of the program that will count, for each branch, the number of times it was taken, and recording this information to a file.
+2. Execute this special version on representative examples. It will record the frequencies of execution of branches to a log file.
+3. Recompile the program, telling CompCert to use the information in the log file.
+
+This system does not use the same formats as gcc's gcov profiles, since it depends heavily on compiler internals. It seems however possible to profile and optimize programs consisting of modules compiled with gcc and CompCert by using both system simultaneously: compiler uses separate log files.
+
+To compile the special version that logs frequencies to files, use the option `-fprofile-arcs`. This option has to be specified at compile time but is not needed at link time (however, a reminder: if you link using another compiled than CompCert, you need to link against `libcompcert.a`). You may mix object files compiled with and without this option.
+
+This version may experience significant slowdown compared to normally compiled code, so do not use `-fprofile-arcs` for production code.
+
+At the end of execution of the program, frequency information will be logged to a file whose default name is `compcert_profiling.dat` (in the current directory). Another name may be used by specifying it using the `COMPCERT_PROFILING_DATA` environment variable. If this variable contains an empty string, no logging is done (but the slowdown still applies).
+
+Data are appended to the log file, never deleted, so it is safe to run the program several times on several test cases to accumulate data.
+
+Depending on the platform, this logging system is or is not thread-safe and is or is not compatible with position-independent code (PIC). In non thread-safe configurations, if two different execution threads execute code to be profiled, the profiling counters may end up with incorrect values.
+
+| Target platform | Available? | Thread-safe | PIC |
+|-----------------|------------|-------------|-----|
+| AArch64 | Yes | Yes | No |
+| ARM | Yes | No | No |
+| IA32 | Yes | No | No |
+| K1c | Yes | Yes | No |
+| PowerPC | No | | |
+| PowerPC 64 | No | | |
+| Risc-V 32 | No | | |
+| Risc-V 64 | No | | |
+| x86-64 | Yes | Yes | Yes |
+
+For recompiling the program using profiling information, use `-fprofile-use= compcert_profiling.dat -ftracelinearize` (substitute the appropriate filename for `compcert_profiling.dat` if needed). Experiments show performance improvement on K1c, not on other platforms.
+
+The same options (except for `-fprofile-use=` and `-fprofile-arcs`) should be used to compile the logging and optimized versions of the program: only functions that are exactly the same in the intermediate representation will be optimized according to profiling information.
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 471ad501..b0787d0a 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -400,7 +400,7 @@ let expand_instruction instr =
expand_annot_val kind txt targ args res
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
- | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+ | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
emit instr
| _ ->
assert false
diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v
index b2a2308e..3d27f48f 100644
--- a/aarch64/Machregs.v
+++ b/aarch64/Machregs.v
@@ -158,6 +158,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
match ef with
| EF_memcpy sz al => R15 :: R17 :: R29 :: nil
| EF_inline_asm txt sg clob => destroyed_by_clobber clob
+ | EF_profiling _ _ => R15 :: R17 :: nil
| _ => nil
end.
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index e7edcf0d..8d74daf4 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -229,6 +229,28 @@ module Target : TARGET =
| EOuxtw n -> fprintf oc ", uxtw #%a" coqint n
| EOuxtx n -> fprintf oc ", uxtx #%a" coqint n
+ let next_profiling_label =
+ let atomic_incr_counter = ref 0 in
+ fun () ->
+ let r = sprintf ".Lcompcert_atomic_incr%d" !atomic_incr_counter in
+ incr atomic_incr_counter; r;;
+
+ let print_profiling_logger oc id kind =
+ assert (kind >= 0);
+ assert (kind <= 1);
+ fprintf oc "%s begin profiling %a %d: atomic increment\n" comment
+ Profilingaux.pp_id id kind;
+ let ofs = profiling_offset id kind and lbl = next_profiling_label () in
+ fprintf oc " adrp x15, %s+%d\n" profiling_counter_table_name ofs;
+ fprintf oc " add x15, x15, :lo12:(%s+%d)\n" profiling_counter_table_name ofs;
+ fprintf oc "%s:\n" lbl;
+ fprintf oc " ldaxr x17, [x15]\n";
+ fprintf oc " add x17, x17, 1\n";
+ fprintf oc " stlxr w17, x17, [x15]\n";
+ fprintf oc " cbnz w17, %s\n" lbl;
+ fprintf oc "%s end profiling %a %d\n" comment
+ Profilingaux.pp_id id kind;;
+
(* Printing of instructions *)
let print_instruction oc = function
(* Branches *)
@@ -521,6 +543,8 @@ module Target : TARGET =
fprintf oc "%s begin inline assembly\n\t" comment;
print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
fprintf oc "%s end inline assembly\n" comment
+ | EF_profiling (id, coq_kind) ->
+ print_profiling_logger oc id (Z.to_int coq_kind)
| _ ->
assert false
end
@@ -577,7 +601,24 @@ module Target : TARGET =
section oc Section_text;
end
+ let aarch64_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name =
+ fprintf oc " adrp x2, %s\n" profiling_counter_table_name;
+ fprintf oc " adrp x1, %s\n" profiling_id_table_name;
+ fprintf oc " add x2, x2, :lo12:%s\n" profiling_counter_table_name;
+ fprintf oc " add x1, x1, :lo12:%s\n" profiling_id_table_name;
+ fprintf oc " mov w0, %d\n" nr_items;
+ fprintf oc " b %s\n" profiling_write_table_helper ;;
+
+ let print_atexit oc to_be_called =
+ fprintf oc " adrp x0, %s\n" to_be_called;
+ fprintf oc " add x0, x0, :lo12:%s\n" to_be_called;
+ fprintf oc " b atexit\n";;
+
+
let print_epilogue oc =
+ print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) aarch64_profiling_stub oc;
if !Clflags.option_g then begin
Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
section oc Section_text;
diff --git a/arm/AsmToJSON.ml b/arm/AsmToJSON.ml
index e850fed6..669d8c0c 100644
--- a/arm/AsmToJSON.ml
+++ b/arm/AsmToJSON.ml
@@ -177,6 +177,7 @@ let pp_instructions pp ic =
| EF_annot_val _
| EF_builtin _
| EF_debug _
+ | EF_profiling _
| EF_external _
| EF_free
| EF_malloc
diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml
index 89aab5c7..6996c9bb 100644
--- a/arm/Asmexpand.ml
+++ b/arm/Asmexpand.ml
@@ -619,7 +619,7 @@ let expand_instruction instr =
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz))
(Int32.to_int (camlint_of_coqint al)) args
- | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+ | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
emit instr
| _ ->
assert false
diff --git a/arm/Constantexpand.ml b/arm/Constantexpand.ml
index 408b291e..8cc32c1f 100644
--- a/arm/Constantexpand.ml
+++ b/arm/Constantexpand.ml
@@ -106,6 +106,7 @@ let estimate_size = function
| Pbuiltin (ef,_,_) ->
begin match ef with
| EF_inline_asm _ -> 256
+ | EF_profiling _ -> 40
| _ -> 0 end
| Pcfi_adjust _
| Pcfi_rel_offset _
diff --git a/arm/Machregs.v b/arm/Machregs.v
index ae0ff6bf..1ec8f0a1 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -153,6 +153,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
match ef with
| EF_memcpy sz al => R2 :: R3 :: R12 :: F7 :: nil
| EF_inline_asm txt sg clob => destroyed_by_clobber clob
+ | EF_profiling _ _ => R2 :: R3 :: R12 :: nil
| _ => nil
end.
diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml
index 54af7cd5..839530c6 100644
--- a/arm/TargetPrinter.ml
+++ b/arm/TargetPrinter.ml
@@ -204,6 +204,38 @@ struct
| SOasr(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n
| SOror(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n
+
+ let next_profiling_label =
+ let profiling_label_counter = ref 0 in
+ fun () ->
+ let r = sprintf ".Lprofiling_label%d" !profiling_label_counter in
+ incr profiling_label_counter; r;;
+
+ let print_profiling_logger oc id kind =
+ assert (kind >= 0);
+ assert (kind <= 1);
+ let ofs = profiling_offset id kind and olbl = next_profiling_label () in
+ fprintf oc "%s begin profiling %a %d: non-atomic increment\n" comment
+ Profilingaux.pp_id id kind;
+ fprintf oc " ldr r2, %s\n" olbl;
+ fprintf oc " ldr r3, [r2, #%d]\n"
+ (if Configuration.is_big_endian then 8 else 0);
+ fprintf oc " ldr r12, [r2, #%d]\n"
+ (if Configuration.is_big_endian then 0 else 8);
+ fprintf oc " adds r3, r3, #1\n";
+ fprintf oc " adc r12, r12, #0\n";
+ fprintf oc " str r3, [r2, #%d]\n"
+ (if Configuration.is_big_endian then 8 else 0);
+ fprintf oc " str r12, [r2, #%d]\n"
+ (if Configuration.is_big_endian then 0 else 8);
+ let jlbl = next_profiling_label () in
+ fprintf oc " b %s\n" jlbl;
+ fprintf oc "%s:\n" olbl;
+ fprintf oc " .word %s + %d\n" profiling_counter_table_name ofs;
+ fprintf oc "%s:\n" jlbl;
+ fprintf oc "%s end profiling %a %d\n" comment
+ Profilingaux.pp_id id kind;;
+
let print_instruction oc = function
(* Core instructions *)
| Padc (r1,r2,so) ->
@@ -484,6 +516,7 @@ struct
fprintf oc "%s begin inline assembly\n\t" comment;
print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
fprintf oc "%s end inline assembly\n" comment
+ | EF_profiling(id, coq_kind) -> print_profiling_logger oc id (Z.to_int coq_kind)
| _ ->
assert false
end
@@ -551,6 +584,11 @@ struct
if !Clflags.option_mthumb then
fprintf oc " .thumb_func\n"
+
+ let text_print_fun_info oc name =
+ fprintf oc " .type %s, %%function\n" name;
+ fprintf oc " .size %s, . - %s\n" name name
+
let print_fun_info oc name =
fprintf oc " .type %a, %%function\n" symbol name;
fprintf oc " .size %a, . - %a\n" symbol name symbol name
@@ -598,9 +636,22 @@ struct
if !Clflags.option_g then begin
section oc Section_text;
cfi_section oc
- end
+ end
+
+ let arm_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name =
+ fprintf oc " ldr r2, = %s\n" profiling_counter_table_name;
+ fprintf oc " ldr r1, = %s\n" profiling_id_table_name;
+ fprintf oc " mov r0, #%d\n" nr_items;
+ fprintf oc " b %s\n" profiling_write_table_helper;;
+
+ let print_atexit oc to_be_called =
+ fprintf oc " ldr r0, = %s\n" to_be_called;
+ fprintf oc " b atexit\n";;
let print_epilogue oc =
+ print_profiling_epilogue text_print_fun_info (Init_atexit print_atexit) arm_profiling_stub oc;
if !Clflags.option_g then begin
Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
section oc Section_text;
diff --git a/backend/CSE.v b/backend/CSE.v
index 1936d4e4..838d96a6 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -493,7 +493,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
| _ =>
empty_numbering
end
- | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ =>
+ | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ | EF_profiling _ _ =>
set_res_unknown before res
end
| Icond cond args ifso ifnot _ =>
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 5bbb7508..a7465cee 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -1318,6 +1318,7 @@ Proof.
+ apply CASE2; inv H1; auto.
+ apply CASE1.
+ apply CASE2; inv H1; auto.
+ + apply CASE2; inv H1; auto.
* apply set_res_lessdef; auto.
- (* Icond *)
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index e14da87e..7fa10aee 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -111,6 +111,10 @@ let elf_symbol_offset oc (symb, ofs) =
if ofs <> 0L then fprintf oc " + %Ld" ofs
(* Functions for fun and var info *)
+let elf_text_print_fun_info oc name =
+ fprintf oc " .type %s, @function\n" name;
+ fprintf oc " .size %s, . - %s\n" name name
+
let elf_print_fun_info oc name =
fprintf oc " .type %a, @function\n" elf_symbol name;
fprintf oc " .size %a, . - %a\n" elf_symbol name elf_symbol name
@@ -311,4 +315,84 @@ let common_section ?(sec = ".bss") () =
if !Clflags.option_fcommon then
"COMM"
else
- sec
+ sec;;
+
+(* Profiling *)
+let profiling_table : (Digest.t, int) Hashtbl.t = Hashtbl.create 1000;;
+let next_profiling_position = ref 0;;
+let profiling_position (x : Digest.t) : int =
+ match Hashtbl.find_opt profiling_table x with
+ | None -> let y = !next_profiling_position in
+ next_profiling_position := succ y;
+ Hashtbl.replace profiling_table x y;
+ y
+ | Some y -> y;;
+
+let profiling_ids () =
+ let nr_items = !next_profiling_position in
+ let ar = Array.make nr_items "" in
+ Hashtbl.iter
+ (fun x y -> ar.(y) <- x)
+ profiling_table;
+ ar;;
+
+let print_profiling_id oc id =
+ assert (String.length id = 16);
+ output_string oc " .byte";
+ for i=0 to 15 do
+ fprintf oc " 0x%02x" (Char.code (String.get id i));
+ if i < 15 then output_char oc ','
+ done;
+ output_char oc '\n';;
+
+let profiling_counter_table_name = ".compcert_profiling_counters"
+and profiling_id_table_name = ".compcert_profiling_ids"
+and profiling_write_table = ".compcert_profiling_save_for_this_object"
+and profiling_init = ".compcert_profiling_init"
+and profiling_write_table_helper = "_compcert_write_profiling_table"
+and dtor_section = ".dtors.65435,\"aw\",@progbits"
+(* and fini_section = ".fini_array_00100,\"aw\"" *)
+and init_section = ".init_array,\"aw\"";;
+
+type finalizer_call_method =
+ | Dtors
+ | Init_atexit of (out_channel -> string -> unit);;
+
+let write_symbol_pointer oc sym =
+ if Archi.ptr64
+ then fprintf oc " .8byte %s\n" sym
+ else fprintf oc " .4byte %s\n" sym;;
+
+let print_profiling_epilogue declare_function finalizer_call_method print_profiling_stub oc =
+ if !Clflags.option_profile_arcs
+ then
+ let nr_items = !next_profiling_position in
+ if nr_items > 0
+ then
+ begin
+ fprintf oc " .lcomm %s, %d\n"
+ profiling_counter_table_name (nr_items * 16);
+ fprintf oc " .section .rodata\n";
+ fprintf oc "%s:\n" profiling_id_table_name;
+ Array.iter (print_profiling_id oc) (profiling_ids ());
+ fprintf oc " .text\n";
+ fprintf oc "%s:\n" profiling_write_table;
+ print_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name;
+ declare_function oc profiling_write_table;
+ match finalizer_call_method with
+ | Dtors ->
+ fprintf oc " .section %s\n" dtor_section;
+ write_symbol_pointer oc profiling_write_table
+ | Init_atexit(atexit_call) ->
+ fprintf oc " .section %s\n" init_section;
+ write_symbol_pointer oc profiling_init;
+ fprintf oc " .text\n";
+ fprintf oc "%s:\n" profiling_init;
+ atexit_call oc profiling_write_table;
+ declare_function oc profiling_init
+ end;;
+
+let profiling_offset id kind =
+ ((profiling_position id)*2 + kind)*8;;
diff --git a/backend/Profiling.v b/backend/Profiling.v
new file mode 100644
index 00000000..4cba49ee
--- /dev/null
+++ b/backend/Profiling.v
@@ -0,0 +1,65 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Parameter function_id : function -> AST.profiling_id.
+Parameter branch_id : AST.profiling_id -> node -> AST.profiling_id.
+
+Section PER_FUNCTION_ID.
+ Variable f_id : AST.profiling_id.
+
+ Definition inject_profiling_call (prog : code)
+ (pc extra_pc ifso ifnot : node) : node * code :=
+ let id := branch_id f_id pc in
+ let extra_pc' := Pos.succ extra_pc in
+ let prog' := PTree.set extra_pc
+ (Ibuiltin (EF_profiling id 0%Z) nil BR_none ifnot) prog in
+ let prog'':= PTree.set extra_pc'
+ (Ibuiltin (EF_profiling id 1%Z) nil BR_none ifso) prog' in
+ (Pos.succ extra_pc', prog'').
+
+ Definition inject_at (prog : code) (pc extra_pc : node) : node * code :=
+ match PTree.get pc prog with
+ | Some (Icond cond args ifso ifnot expected) =>
+ inject_profiling_call
+ (PTree.set pc
+ (Icond cond args (Pos.succ extra_pc) extra_pc expected) prog)
+ pc extra_pc ifso ifnot
+ | _ => inject_profiling_call prog pc extra_pc 1 1 (* does not happen *)
+ end.
+
+ Definition inject_at' (already : node * code) pc :=
+ let (extra_pc, prog) := already in
+ inject_at prog pc extra_pc.
+
+ Definition inject_l (prog : code) extra_pc injections :=
+ List.fold_left (fun already (inject_pc : node) =>
+ inject_at' already inject_pc)
+ injections
+ (extra_pc, prog).
+
+ Definition gen_conditions (prog : code) :=
+ List.map fst (PTree.elements (PTree.filter1
+ (fun instr =>
+ match instr with
+ | Icond cond args ifso ifnot expected => true
+ | _ => false
+ end) prog)).
+End PER_FUNCTION_ID.
+
+Definition transf_function (f : function) : function :=
+ let max_pc := max_pc_function f in
+ let conditions := gen_conditions (fn_code f) in
+ {| fn_sig := f.(fn_sig);
+ fn_params := f.(fn_params);
+ fn_stacksize := f.(fn_stacksize);
+ fn_code := snd (inject_l (function_id f) (fn_code f) (Pos.succ max_pc) conditions);
+ fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+ AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+ transform_program transf_fundef p.
diff --git a/backend/ProfilingExploit.v b/backend/ProfilingExploit.v
new file mode 100644
index 00000000..cfca1a12
--- /dev/null
+++ b/backend/ProfilingExploit.v
@@ -0,0 +1,30 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Parameter function_id : function -> AST.profiling_id.
+Parameter branch_id : AST.profiling_id -> node -> AST.profiling_id.
+Parameter condition_oracle : AST.profiling_id -> option bool.
+
+Definition transf_instr (f_id : AST.profiling_id)
+ (pc : node) (i : instruction) : instruction :=
+ match i with
+ | Icond cond args ifso ifnot None =>
+ Icond cond args ifso ifnot (condition_oracle (branch_id f_id pc))
+ | _ => i
+ end.
+
+Definition transf_function (f : function) : function :=
+ {| fn_sig := f.(fn_sig);
+ fn_params := f.(fn_params);
+ fn_stacksize := f.(fn_stacksize);
+ fn_code := PTree.map (transf_instr (function_id f)) f.(fn_code);
+ fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+ AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+ transform_program transf_fundef p.
diff --git a/backend/ProfilingExploitproof.v b/backend/ProfilingExploitproof.v
new file mode 100644
index 00000000..bc68c38e
--- /dev/null
+++ b/backend/ProfilingExploitproof.v
@@ -0,0 +1,224 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ProfilingExploit.
+
+
+Definition match_prog (p tp: RTL.program) :=
+ match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+ forall p, match_prog p (transf_program p).
+Proof.
+ intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+ forall v f,
+ Genv.find_funct ge v = Some f ->
+ Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+ forall v f,
+ Genv.find_funct_ptr ge v = Some f ->
+ Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+ forall id,
+ Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+ forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+ destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+ forall ros rs fd,
+ find_function ge ros rs = Some fd ->
+ find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+ unfold find_function; intros. destruct ros as [r|id].
+ eapply functions_translated; eauto.
+ rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+ eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+ forall f pc i,
+ f.(fn_code)!pc = Some i ->
+ (transf_function f).(fn_code)!pc = Some(transf_instr (function_id f) pc i).
+Proof.
+ intros until i. intro Hcode.
+ unfold transf_function; simpl.
+ rewrite PTree.gmap.
+ unfold option_map.
+ rewrite Hcode.
+ reflexivity.
+Qed.
+
+Ltac TR_AT :=
+ match goal with
+ | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+ generalize (transf_function_at _ _ _ A); intros
+ end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+ | match_frames_intro: forall res f sp pc rs,
+ match_frames (Stackframe res f sp pc rs)
+ (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+ | match_regular_states: forall stk f sp pc rs m stk'
+ (STACKS: list_forall2 match_frames stk stk'),
+ match_states (State stk f sp pc rs m)
+ (State stk' (transf_function f) sp pc rs m)
+ | match_callstates: forall stk f args m stk'
+ (STACKS: list_forall2 match_frames stk stk'),
+ match_states (Callstate stk f args m)
+ (Callstate stk' (transf_fundef f) args m)
+ | match_returnstates: forall stk v m stk'
+ (STACKS: list_forall2 match_frames stk stk'),
+ match_states (Returnstate stk v m)
+ (Returnstate stk' v m).
+
+Lemma step_simulation:
+ forall S1 t S2, RTL.step ge S1 t S2 ->
+ forall S1', match_states S1 S1' ->
+ exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+ induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+ econstructor; split. eapply exec_Inop; eauto.
+ constructor; auto.
+- (* op *)
+ econstructor; split.
+ eapply exec_Iop with (v := v); eauto.
+ rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+ constructor; auto.
+(* load *)
+- econstructor; split.
+ assert (eval_addressing tge sp addr rs ## args = Some a).
+ rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+ eapply exec_Iload; eauto.
+ constructor; auto.
+- (* load notrap1 *)
+ econstructor; split.
+ assert (eval_addressing tge sp addr rs ## args = None).
+ rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+ eapply exec_Iload_notrap1; eauto.
+ constructor; auto.
+- (* load notrap2 *)
+ econstructor; split.
+ assert (eval_addressing tge sp addr rs ## args = Some a).
+ rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+ eapply exec_Iload_notrap2; eauto.
+ constructor; auto.
+- (* store *)
+ econstructor; split.
+ assert (eval_addressing tge sp addr rs ## args = Some a).
+ rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+ eapply exec_Istore; eauto.
+ constructor; auto.
+(* call *)
+- econstructor; split.
+ eapply exec_Icall with (fd := transf_fundef fd); eauto.
+ eapply find_function_translated; eauto.
+ apply sig_preserved.
+ constructor. constructor; auto. constructor.
+(* tailcall *)
+- econstructor; split.
+ eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+ eapply find_function_translated; eauto.
+ apply sig_preserved.
+ constructor. auto.
+(* builtin *)
+- econstructor; split.
+ eapply exec_Ibuiltin; eauto.
+ eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+ constructor; auto.
+(* cond *)
+- destruct predb.
+ + econstructor; split.
+ eapply exec_Icond; eauto.
+ constructor; auto.
+ + simpl transf_instr in H1.
+ destruct condition_oracle in H1.
+ * econstructor; split.
+ eapply exec_Icond; eauto.
+ constructor; auto.
+ * econstructor; split.
+ eapply exec_Icond; eauto.
+ constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+ eapply exec_Ijumptable; eauto.
+ constructor; auto.
+(* return *)
+- econstructor; split.
+ eapply exec_Ireturn; eauto.
+ constructor; auto.
+(* internal function *)
+- simpl. econstructor; split.
+ eapply exec_function_internal; eauto.
+ constructor; auto.
+(* external function *)
+- econstructor; split.
+ eapply exec_function_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+ constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+ econstructor; split.
+ eapply exec_return; eauto.
+ constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+ forall S1, RTL.initial_state prog S1 ->
+ exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+ intros. inv H. econstructor; split.
+ econstructor.
+ eapply (Genv.init_mem_transf TRANSL); eauto.
+ rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+ eapply function_ptr_translated; eauto.
+ rewrite <- H3; apply sig_preserved.
+ constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+ forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+ intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+ forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+ eapply forward_simulation_step.
+ apply senv_preserved.
+ eexact transf_initial_states.
+ eexact transf_final_states.
+ exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Profilingaux.ml b/backend/Profilingaux.ml
new file mode 100644
index 00000000..f8fc5d6b
--- /dev/null
+++ b/backend/Profilingaux.ml
@@ -0,0 +1,71 @@
+open Camlcoq
+open RTL
+open Maps
+
+type identifier = Digest.t
+
+let pp_id channel (x : identifier) =
+ assert(String.length x = 16);
+ for i=0 to 15 do
+ Printf.fprintf channel "%02x" (Char.code (String.get x i))
+ done
+
+let print_anonymous_function pp f =
+ let instrs =
+ List.sort
+ (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+ (List.rev_map
+ (fun (pc, i) -> (P.to_int pc, i))
+ (PTree.elements f.fn_code)) in
+ PrintRTL.print_succ pp f.fn_entrypoint
+ (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1);
+ List.iter (PrintRTL.print_instruction pp) instrs;
+ Printf.fprintf pp "}\n\n"
+
+let function_id (f : coq_function) : identifier =
+ let digest = Digest.string (Marshal.to_string f []) in
+ Printf.fprintf stderr "FUNCTION hash = %a\n" pp_id digest;
+ print_anonymous_function stderr f;
+ digest
+
+let branch_id (f_id : identifier) (node : P.t) : identifier =
+ Digest.string (f_id ^ (Int64.to_string (P.to_int64 node)));;
+
+let profiling_counts : (identifier, (Int64.t*Int64.t)) Hashtbl.t = Hashtbl.create 1000;;
+
+let get_counts id =
+ match Hashtbl.find_opt profiling_counts id with
+ | Some x -> x
+ | None -> (0L, 0L);;
+
+let add_profiling_counts id counter0 counter1 =
+ let (old0, old1) = get_counts id in
+ Hashtbl.replace profiling_counts id (Int64.add old0 counter0,
+ Int64.add old1 counter1);;
+
+let input_counter (ic : in_channel) : Int64.t =
+ let r = ref Int64.zero in
+ for i=0 to 7
+ do
+ r := Int64.add !r (Int64.shift_left (Int64.of_int (input_byte ic)) (8*i))
+ done;
+ !r;;
+
+let load_profiling_info (filename : string) : unit =
+ let ic = open_in filename in
+ try
+ while true do
+ let id : identifier = really_input_string ic 16 in
+ let counter0 = input_counter ic in
+ let counter1 = input_counter ic in
+ (* Printf.fprintf stderr "%a : %Ld %Ld\n" pp_id id counter0 counter1 *)
+ add_profiling_counts id counter0 counter1
+ done
+ with End_of_file -> close_in ic;;
+
+let condition_oracle (id : identifier) : bool option =
+ let (count0, count1) = get_counts id in
+ ( (* if count0 <> 0L || count1 <> 0L then *)
+ Printf.fprintf stderr "%a : %Ld %Ld\n" pp_id id count0 count1);
+ if count0 = count1 then None
+ else Some(count1 > count0);;
diff --git a/backend/Profilingproof.v b/backend/Profilingproof.v
new file mode 100644
index 00000000..fc04c77e
--- /dev/null
+++ b/backend/Profilingproof.v
@@ -0,0 +1,687 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import Profiling.
+Require Import Lia.
+
+Local Open Scope positive.
+
+Definition match_prog (p tp: RTL.program) :=
+ match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+ forall p, match_prog p (transf_program p).
+Proof.
+ intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+ forall v f,
+ Genv.find_funct ge v = Some f ->
+ Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+ forall v f,
+ Genv.find_funct_ptr ge v = Some f ->
+ Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+ forall id,
+ Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+ forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+ destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+ forall ros rs fd,
+ find_function ge ros rs = Some fd ->
+ find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+ unfold find_function; intros. destruct ros as [r|id].
+ eapply functions_translated; eauto.
+ rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+ eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma pair_expand:
+ forall { A B : Type } (p : A*B),
+ p = ((fst p), (snd p)).
+Proof.
+ destruct p; simpl; trivial.
+Qed.
+
+Lemma inject_profiling_call_preserves:
+ forall id body pc extra_pc ifso ifnot pc0,
+ pc0 < extra_pc ->
+ PTree.get pc0 (snd (inject_profiling_call id body pc extra_pc ifso ifnot)) = PTree.get pc0 body.
+Proof.
+ intros. simpl.
+ rewrite PTree.gso by lia.
+ apply PTree.gso.
+ lia.
+Qed.
+
+Lemma inject_at_preserves :
+ forall id body pc extra_pc pc0,
+ pc0 < extra_pc ->
+ pc0 <> pc ->
+ PTree.get pc0 (snd (inject_at id body pc extra_pc)) = PTree.get pc0 body.
+Proof.
+ intros. unfold inject_at.
+ destruct (PTree.get pc body) eqn:GET.
+ - destruct i.
+ all: try (rewrite inject_profiling_call_preserves; trivial; fail).
+ rewrite inject_profiling_call_preserves by trivial.
+ apply PTree.gso; lia.
+ - apply inject_profiling_call_preserves; trivial.
+Qed.
+
+Lemma inject_profiling_call_increases:
+ forall id body pc extra_pc ifso ifnot,
+ fst (inject_profiling_call id body pc extra_pc ifso ifnot) = extra_pc + 2.
+Proof.
+ intros.
+ simpl.
+ lia.
+Qed.
+
+Lemma inject_at_increases:
+ forall id body pc extra_pc,
+ (fst (inject_at id body pc extra_pc)) = extra_pc + 2.
+Proof.
+ intros. unfold inject_at.
+ destruct (PTree.get pc body).
+ - destruct i; apply inject_profiling_call_increases.
+ - apply inject_profiling_call_increases.
+Qed.
+
+Lemma inject_l_preserves :
+ forall id injections body extra_pc pc0,
+ pc0 < extra_pc ->
+ List.forallb (fun injection => if peq injection pc0 then false else true) injections = true ->
+ PTree.get pc0 (snd (inject_l id body extra_pc injections)) = PTree.get pc0 body.
+Proof.
+ induction injections;
+ intros until pc0; intros BEFORE ALL; simpl; trivial.
+ unfold inject_l.
+ simpl in ALL.
+ rewrite andb_true_iff in ALL.
+ destruct ALL as [NEQ ALL].
+ simpl.
+ rewrite pair_expand with (p := inject_at id body a extra_pc).
+ progress fold (inject_l id (snd (inject_at id body a extra_pc))
+ (fst (inject_at id body a extra_pc))
+ injections).
+ rewrite IHinjections; trivial.
+ - apply inject_at_preserves; trivial.
+ destruct (peq a pc0); congruence.
+ - rewrite inject_at_increases.
+ lia.
+Qed.
+
+Fixpoint inject_l_position extra_pc
+ (injections : list node)
+ (k : nat) {struct injections} : node :=
+ match injections with
+ | nil => extra_pc
+ | pc::l' =>
+ match k with
+ | O => extra_pc
+ | S k' => inject_l_position (extra_pc + 2) l' k'
+ end
+ end.
+
+Lemma inject_l_position_increases : forall injections pc k,
+ pc <= inject_l_position pc injections k.
+Proof.
+ induction injections; simpl; intros.
+ lia.
+ destruct k.
+ lia.
+ specialize IHinjections with (pc := pc + 2) (k := k).
+ lia.
+Qed.
+
+Lemma inject_l_injected_pc:
+ forall f_id injections cond args ifso ifnot expected body injnum pc extra_pc
+ (INSTR : body ! pc = Some (Icond cond args ifso ifnot expected))
+ (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+ (NOREPET : list_norepet injections)
+ (NUMBER : nth_error injections injnum = Some pc),
+ PTree.get pc (snd (inject_l f_id body extra_pc injections)) =
+ Some (Icond cond args
+ (Pos.succ (inject_l_position extra_pc injections injnum))
+ (inject_l_position extra_pc injections injnum) expected).
+Proof.
+ induction injections; simpl; intros.
+ { rewrite nth_error_nil in NUMBER.
+ discriminate NUMBER. }
+ simpl in BELOW.
+ rewrite andb_true_iff in BELOW.
+ destruct BELOW as [BELOW1 BELOW2].
+ rewrite Pos.ltb_lt in BELOW1.
+ unfold inject_l.
+ simpl fold_left.
+ rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+ progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+ (fst (inject_at f_id body a extra_pc))
+ injections).
+ destruct injnum as [ | injnum']; simpl in NUMBER.
+ { inv NUMBER.
+ rewrite inject_l_preserves; simpl.
+ - unfold inject_at.
+ rewrite INSTR.
+ unfold inject_profiling_call. simpl.
+ rewrite PTree.gso by lia.
+ rewrite PTree.gso by lia.
+ apply PTree.gss.
+ - rewrite inject_at_increases.
+ lia.
+ - inv NOREPET.
+ rewrite forallb_forall.
+ intros x IN.
+ destruct peq as [EQ | ]; trivial.
+ subst x.
+ contradiction.
+ }
+ simpl.
+ rewrite inject_at_increases.
+ apply IHinjections with (ifso := ifso) (ifnot := ifnot).
+ - rewrite inject_at_preserves; trivial.
+ + rewrite forallb_forall in BELOW2.
+ rewrite <- Pos.ltb_lt.
+ apply nth_error_In in NUMBER.
+ auto.
+ + inv NOREPET.
+ intro ZZZ.
+ subst a.
+ apply nth_error_In in NUMBER.
+ auto.
+
+ - rewrite forallb_forall in BELOW2.
+ rewrite forallb_forall.
+ intros.
+ specialize BELOW2 with x.
+ rewrite Pos.ltb_lt in *.
+ intuition lia.
+ - inv NOREPET. trivial.
+ - trivial.
+Qed.
+
+Lemma inject_l_injected0:
+ forall f_id cond args ifso ifnot expected injections body injnum pc extra_pc
+ (INSTR : body ! pc = Some (Icond cond args ifso ifnot expected))
+ (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+ (NOREPET : list_norepet injections)
+ (NUMBER : nth_error injections injnum = Some pc),
+ PTree.get (inject_l_position extra_pc injections injnum)
+ (snd (inject_l f_id body extra_pc injections)) =
+ Some (Ibuiltin (EF_profiling (branch_id f_id pc) 0%Z) nil BR_none ifnot).
+Proof.
+ induction injections; intros.
+ { rewrite nth_error_nil in NUMBER.
+ discriminate NUMBER. }
+ simpl in BELOW.
+ rewrite andb_true_iff in BELOW.
+ destruct BELOW as [BELOW1 BELOW2].
+ unfold inject_l.
+ simpl fold_left.
+ rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+ progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+ (fst (inject_at f_id body a extra_pc))
+ injections).
+ destruct injnum as [ | injnum']; simpl in NUMBER.
+ { inv NUMBER.
+ rewrite inject_l_preserves; simpl.
+ - unfold inject_at.
+ rewrite INSTR.
+ unfold inject_profiling_call. simpl.
+ rewrite PTree.gso by lia.
+ apply PTree.gss.
+ - rewrite inject_at_increases.
+ lia.
+ - rewrite forallb_forall.
+ rewrite forallb_forall in BELOW2.
+ intros loc IN.
+ specialize BELOW2 with loc.
+ apply BELOW2 in IN.
+ destruct peq as [EQ | ]; trivial.
+ rewrite EQ in IN.
+ rewrite Pos.ltb_lt in IN.
+ lia.
+ }
+ simpl.
+ rewrite inject_at_increases.
+
+ apply IHinjections.
+ - rewrite inject_at_preserves; trivial.
+ + rewrite forallb_forall in BELOW2.
+ rewrite <- Pos.ltb_lt.
+ apply nth_error_In in NUMBER.
+ auto.
+ + inv NOREPET.
+ intro ZZZ.
+ subst a.
+ apply nth_error_In in NUMBER.
+ auto.
+
+ - rewrite forallb_forall in BELOW2.
+ rewrite forallb_forall.
+ intros.
+ specialize BELOW2 with x.
+ rewrite Pos.ltb_lt in *.
+ intuition lia.
+ - inv NOREPET. trivial.
+ - trivial.
+Qed.
+
+Lemma inject_l_injected1:
+ forall f_id cond args ifso ifnot expected injections body injnum pc extra_pc
+ (INSTR : body ! pc = Some (Icond cond args ifso ifnot expected))
+ (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+ (NOREPET : list_norepet injections)
+ (NUMBER : nth_error injections injnum = Some pc),
+ PTree.get (Pos.succ (inject_l_position extra_pc injections injnum))
+ (snd (inject_l f_id body extra_pc injections)) =
+ Some (Ibuiltin (EF_profiling (branch_id f_id pc) 1%Z) nil BR_none ifso).
+Proof.
+ induction injections; intros.
+ { rewrite nth_error_nil in NUMBER.
+ discriminate NUMBER. }
+ simpl in BELOW.
+ rewrite andb_true_iff in BELOW.
+ destruct BELOW as [BELOW1 BELOW2].
+ unfold inject_l.
+ simpl fold_left.
+ rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+ progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+ (fst (inject_at f_id body a extra_pc))
+ injections).
+ destruct injnum as [ | injnum']; simpl in NUMBER.
+ { inv NUMBER.
+ rewrite inject_l_preserves; simpl.
+ - unfold inject_at.
+ rewrite INSTR.
+ unfold inject_profiling_call. simpl.
+ apply PTree.gss.
+ - rewrite inject_at_increases.
+ lia.
+ - rewrite forallb_forall.
+ rewrite forallb_forall in BELOW2.
+ intros loc IN.
+ specialize BELOW2 with loc.
+ apply BELOW2 in IN.
+ destruct peq as [EQ | ]; trivial.
+ rewrite EQ in IN.
+ rewrite Pos.ltb_lt in IN.
+ lia.
+ }
+ simpl.
+ rewrite inject_at_increases.
+
+ apply IHinjections.
+ - rewrite inject_at_preserves; trivial.
+ + rewrite forallb_forall in BELOW2.
+ rewrite <- Pos.ltb_lt.
+ apply nth_error_In in NUMBER.
+ auto.
+ + inv NOREPET.
+ intro ZZZ.
+ subst a.
+ apply nth_error_In in NUMBER.
+ auto.
+
+ - rewrite forallb_forall in BELOW2.
+ rewrite forallb_forall.
+ intros.
+ specialize BELOW2 with x.
+ rewrite Pos.ltb_lt in *.
+ intuition lia.
+ - inv NOREPET. trivial.
+ - trivial.
+Qed.
+
+Lemma transf_function_at:
+ forall f pc i
+ (CODE : f.(fn_code)!pc = Some i)
+ (INSTR : match i with
+ | Icond _ _ _ _ _ => False
+ | _ => True
+ end),
+ (transf_function f).(fn_code)!pc = Some i.
+Proof.
+ intros.
+ unfold transf_function; simpl.
+ rewrite inject_l_preserves.
+ assumption.
+ - pose proof (max_pc_function_sound f pc i CODE) as LE.
+ unfold Ple in LE.
+ lia.
+ - rewrite forallb_forall.
+ intros x IN.
+ destruct peq; trivial.
+ subst x.
+ unfold gen_conditions in IN.
+ rewrite in_map_iff in IN.
+ destruct IN as [[pc' i'] [EQ IN]].
+ simpl in EQ.
+ subst pc'.
+ apply PTree.elements_complete in IN.
+ rewrite PTree.gfilter1 in IN.
+ rewrite CODE in IN.
+ destruct i; try discriminate; contradiction.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+ match_frames (Stackframe res f sp pc rs)
+ (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+ | match_regular_states: forall stk f sp pc rs m stk'
+ (STACKS: list_forall2 match_frames stk stk'),
+ match_states (State stk f sp pc rs m)
+ (State stk' (transf_function f) sp pc rs m)
+ | match_callstates: forall stk f args m stk'
+ (STACKS: list_forall2 match_frames stk stk'),
+ match_states (Callstate stk f args m)
+ (Callstate stk' (transf_fundef f) args m)
+ | match_returnstates: forall stk v m stk'
+ (STACKS: list_forall2 match_frames stk stk'),
+ match_states (Returnstate stk v m)
+ (Returnstate stk' v m).
+
+Lemma funsig_preserved:
+ forall fd,
+ funsig (transf_fundef fd) = funsig fd.
+Proof.
+ destruct fd; simpl; trivial.
+Qed.
+
+Lemma stacksize_preserved:
+ forall f,
+ fn_stacksize (transf_function f) = fn_stacksize f.
+Proof.
+ destruct f; simpl; trivial.
+Qed.
+
+Hint Resolve symbols_preserved funsig_preserved external_call_symbols_preserved senv_preserved stacksize_preserved : profiling.
+
+Lemma step_simulation:
+ forall s1 t s2 (STEP : step ge s1 t s2)
+ s1' (MS: match_states s1 s1'),
+ exists s2', plus step tge s1' t s2' /\ match_states s2 s2'.
+Proof.
+ induction 1; intros; inv MS.
+ - econstructor; split.
+ + apply plus_one. apply exec_Inop.
+ erewrite transf_function_at; eauto. apply I.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_Iop with (op:=op) (args:=args).
+ * erewrite transf_function_at; eauto. apply I.
+ * rewrite eval_operation_preserved with (ge1:=ge);
+ eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_Iload with (trap:=trap) (chunk:=chunk)
+ (addr:=addr) (args:=args) (a:=a).
+ erewrite transf_function_at; eauto. apply I.
+ rewrite eval_addressing_preserved with (ge1:=ge).
+ all: eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_Iload_notrap1 with (chunk:=chunk)
+ (addr:=addr) (args:=args).
+ erewrite transf_function_at; eauto. apply I.
+ rewrite eval_addressing_preserved with (ge1:=ge).
+ all: eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_Iload_notrap2 with (chunk:=chunk)
+ (addr:=addr) (args:=args) (a:=a).
+ erewrite transf_function_at; eauto. apply I.
+ rewrite eval_addressing_preserved with (ge1:=ge).
+ all: eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_Istore with (chunk:=chunk) (src := src)
+ (addr:=addr) (args:=args) (a:=a).
+ erewrite transf_function_at; eauto. apply I.
+ rewrite eval_addressing_preserved with (ge1:=ge).
+ all: eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_Icall with (sig:=(funsig fd)) (ros:=ros).
+ erewrite transf_function_at; eauto. apply I.
+ apply find_function_translated with (fd := fd).
+ all: eauto with profiling.
+ + constructor; auto.
+ constructor; auto.
+ constructor.
+ - econstructor; split.
+ + apply plus_one. apply exec_Itailcall with (sig:=(funsig fd)) (ros:=ros).
+ erewrite transf_function_at; eauto. apply I.
+ apply find_function_translated with (fd := fd).
+ all: eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one.
+ apply exec_Ibuiltin with (ef:=ef) (args:=args) (vargs:=vargs).
+ erewrite transf_function_at; eauto. apply I.
+ apply eval_builtin_args_preserved with (ge1:=ge).
+ all: eauto with profiling.
+ + constructor; auto.
+ - destruct b.
+ + assert (In pc (gen_conditions (fn_code f))) as IN.
+ { unfold gen_conditions.
+ rewrite in_map_iff.
+ exists (pc, (Icond cond args ifso ifnot predb)).
+ split; simpl; trivial.
+ apply PTree.elements_correct.
+ rewrite PTree.gfilter1.
+ rewrite H.
+ reflexivity.
+ }
+ apply In_nth_error in IN.
+ destruct IN as [n IN].
+ econstructor; split.
+ * eapply plus_two.
+ ++ eapply exec_Icond with (cond := cond) (args := args) (predb := predb) (b := true).
+ unfold transf_function. simpl.
+ erewrite inject_l_injected_pc with (cond := cond) (args := args).
+ ** reflexivity.
+ ** eassumption.
+ ** unfold gen_conditions.
+ rewrite forallb_forall.
+ intros x INx.
+ rewrite in_map_iff in INx.
+ destruct INx as [[x' i'] [EQ INx]].
+ simpl in EQ.
+ subst x'.
+ apply PTree.elements_complete in INx.
+ rewrite PTree.gfilter1 in INx.
+ assert (x <= max_pc_function f) as MAX.
+ { destruct ((fn_code f) ! x) eqn:CODEx.
+ 2: discriminate.
+ apply max_pc_function_sound with (i:=i).
+ assumption.
+ }
+ rewrite Pos.ltb_lt.
+ lia.
+ ** unfold gen_conditions.
+ apply PTree.elements_keys_norepet.
+ ** exact IN.
+ ** assumption.
+ ** reflexivity.
+ ++ apply exec_Ibuiltin with (ef := (EF_profiling (branch_id (function_id f) pc) 1%Z)) (args := nil) (vargs := nil).
+ apply inject_l_injected1 with (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (expected := predb).
+ ** exact H.
+ ** unfold gen_conditions.
+ rewrite forallb_forall.
+ intros x INx.
+ rewrite in_map_iff in INx.
+ destruct INx as [[x' i'] [EQ INx]].
+ simpl in EQ.
+ subst x'.
+ apply PTree.elements_complete in INx.
+ rewrite PTree.gfilter1 in INx.
+ assert (x <= max_pc_function f) as MAX.
+ { destruct ((fn_code f) ! x) eqn:CODEx.
+ 2: discriminate.
+ apply max_pc_function_sound with (i:=i).
+ assumption.
+ }
+ rewrite Pos.ltb_lt.
+ lia.
+ ** unfold gen_conditions.
+ apply PTree.elements_keys_norepet.
+ ** exact IN.
+ ** constructor.
+ ** constructor.
+ ++ reflexivity.
+ * simpl. constructor; auto.
+
+ + assert (In pc (gen_conditions (fn_code f))) as IN.
+ { unfold gen_conditions.
+ rewrite in_map_iff.
+ exists (pc, (Icond cond args ifso ifnot predb)).
+ split; simpl; trivial.
+ apply PTree.elements_correct.
+ rewrite PTree.gfilter1.
+ rewrite H.
+ reflexivity.
+ }
+ apply In_nth_error in IN.
+ destruct IN as [n IN].
+ econstructor; split.
+ * eapply plus_two.
+ ++ eapply exec_Icond with (cond := cond) (args := args) (predb := predb) (b := false).
+ unfold transf_function. simpl.
+ erewrite inject_l_injected_pc with (cond := cond) (args := args).
+ ** reflexivity.
+ ** eassumption.
+ ** unfold gen_conditions.
+ rewrite forallb_forall.
+ intros x INx.
+ rewrite in_map_iff in INx.
+ destruct INx as [[x' i'] [EQ INx]].
+ simpl in EQ.
+ subst x'.
+ apply PTree.elements_complete in INx.
+ rewrite PTree.gfilter1 in INx.
+ assert (x <= max_pc_function f) as MAX.
+ { destruct ((fn_code f) ! x) eqn:CODEx.
+ 2: discriminate.
+ apply max_pc_function_sound with (i:=i).
+ assumption.
+ }
+ rewrite Pos.ltb_lt.
+ lia.
+ ** unfold gen_conditions.
+ apply PTree.elements_keys_norepet.
+ ** exact IN.
+ ** assumption.
+ ** reflexivity.
+ ++ apply exec_Ibuiltin with (ef := (EF_profiling (branch_id (function_id f) pc) 0%Z)) (args := nil) (vargs := nil).
+ apply inject_l_injected0 with (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (expected := predb).
+ ** exact H.
+ ** unfold gen_conditions.
+ rewrite forallb_forall.
+ intros x INx.
+ rewrite in_map_iff in INx.
+ destruct INx as [[x' i'] [EQ INx]].
+ simpl in EQ.
+ subst x'.
+ apply PTree.elements_complete in INx.
+ rewrite PTree.gfilter1 in INx.
+ assert (x <= max_pc_function f) as MAX.
+ { destruct ((fn_code f) ! x) eqn:CODEx.
+ 2: discriminate.
+ apply max_pc_function_sound with (i:=i).
+ assumption.
+ }
+ rewrite Pos.ltb_lt.
+ lia.
+ ** unfold gen_conditions.
+ apply PTree.elements_keys_norepet.
+ ** exact IN.
+ ** constructor.
+ ** constructor.
+ ++ reflexivity.
+ * simpl. constructor; auto.
+
+ - econstructor; split.
+ + apply plus_one.
+ apply exec_Ijumptable with (arg:=arg) (tbl:=tbl) (n:=n).
+ erewrite transf_function_at; eauto. apply I.
+ all: eauto with profiling.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one.
+ apply exec_Ireturn.
+ erewrite transf_function_at; eauto. apply I.
+ rewrite stacksize_preserved. eassumption.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_function_internal.
+ rewrite stacksize_preserved. eassumption.
+ + constructor; auto.
+ - econstructor; split.
+ + apply plus_one. apply exec_function_external.
+ eauto with profiling.
+ + constructor; auto.
+ - inv STACKS. inv H1.
+ econstructor; split.
+ + apply plus_one. apply exec_return.
+ + constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+ forall S1, RTL.initial_state prog S1 ->
+ exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+ intros. inv H. econstructor; split.
+ econstructor.
+ eapply (Genv.init_mem_transf TRANSL); eauto.
+ rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+ eapply function_ptr_translated; eauto.
+ rewrite <- H3; apply sig_preserved.
+ constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+ forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+ intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+ forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+ eapply forward_simulation_plus.
+ apply senv_preserved.
+ eexact transf_initial_states.
+ eexact transf_final_states.
+ exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index b08c3ad7..fbf9bbeb 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -509,6 +509,10 @@ Definition do_ef_debug (kind: positive) (text: ident) (targs: list typ)
(w: world) (vargs: list val) (m: mem) : option (world * trace * val * mem) :=
Some(w, E0, Vundef, m).
+Definition do_ef_profiling (id : profiling_id)
+ (w: world) (vargs: list val) (m: mem) : option (world * trace * val * mem) :=
+ Some(w, E0, Vundef, m).
+
Definition do_builtin_or_external (name: string) (sg: signature)
(w: world) (vargs: list val) (m: mem) : option (world * trace * val * mem) :=
match lookup_builtin_function name sg with
@@ -531,6 +535,7 @@ Definition do_external (ef: external_function):
| EF_annot_val kind text targ => do_ef_annot_val text targ
| EF_inline_asm text sg clob => do_inline_assembly text sg ge
| EF_debug kind text targs => do_ef_debug kind text targs
+ | EF_profiling id kind => do_ef_profiling id
end.
Lemma do_ef_external_sound:
@@ -598,6 +603,8 @@ Proof with try congruence.
eapply do_inline_assembly_sound; eauto.
- (* EF_debug *)
unfold do_ef_debug. mydestr. split; constructor.
+- (* EF_profiling *)
+ unfold do_ef_profiling. mydestr. split; constructor.
Qed.
Lemma do_ef_external_complete:
@@ -652,6 +659,8 @@ Proof.
eapply do_inline_assembly_complete; eauto.
- (* EF_debug *)
inv H. inv H0. reflexivity.
+- (* EF_profiling *)
+ inv H. inv H0. reflexivity.
Qed.
(** * Reduction of expressions *)
diff --git a/common/AST.v b/common/AST.v
index eb34d675..268e13d5 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -464,6 +464,11 @@ Qed.
(** * External functions *)
+(* Identifiers for profiling information *)
+Parameter profiling_id : Type.
+Axiom profiling_id_eq : forall (x y : profiling_id), {x=y} + {x<>y}.
+Definition profiling_kind := Z.t.
+
(** For most languages, the functions composing the program are either
internal functions, defined within the language, or external functions,
defined outside. External functions include system calls but also
@@ -514,10 +519,13 @@ Inductive external_function : Type :=
used with caution, as it can invalidate the semantic
preservation theorem. Generated only if [-finline-asm] is
given. *)
- | EF_debug (kind: positive) (text: ident) (targs: list typ).
+ | EF_debug (kind: positive) (text: ident) (targs: list typ)
(** Transport debugging information from the front-end to the generated
assembly. Takes zero, one or several arguments like [EF_annot].
Unlike [EF_annot], produces no observable event. *)
+ | EF_profiling (id: profiling_id) (kind : profiling_kind).
+ (** Count one profiling event for this identifier and kind.
+ Takes no argument. Produces no observable event. *)
(** The type signature of an external function. *)
@@ -535,6 +543,7 @@ Definition ef_sig (ef: external_function): signature :=
| EF_annot_val kind text targ => mksignature (targ :: nil) targ cc_default
| EF_inline_asm text sg clob => sg
| EF_debug kind text targs => mksignature targs Tvoid cc_default
+ | EF_profiling id kind => mksignature nil Tvoid cc_default
end.
(** Whether an external function should be inlined by the compiler. *)
@@ -553,6 +562,7 @@ Definition ef_inline (ef: external_function) : bool :=
| EF_annot_val kind Text rg => true
| EF_inline_asm text sg clob => true
| EF_debug kind text targs => true
+ | EF_profiling id kind => true
end.
(** Whether an external function must reload its arguments. *)
@@ -568,7 +578,7 @@ Definition ef_reloads (ef: external_function) : bool :=
Definition external_function_eq: forall (ef1 ef2: external_function), {ef1=ef2} + {ef1<>ef2}.
Proof.
- generalize ident_eq string_dec signature_eq chunk_eq typ_eq list_eq_dec zeq Int.eq_dec; intros.
+ generalize profiling_id_eq ident_eq string_dec signature_eq chunk_eq typ_eq list_eq_dec zeq Int.eq_dec; intros.
decide equality.
Defined.
Global Opaque external_function_eq.
diff --git a/common/Events.v b/common/Events.v
index 28bb992a..033e2e03 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -1378,6 +1378,11 @@ Inductive extcall_debug_sem (ge: Senv.t):
| extcall_debug_sem_intro: forall vargs m,
extcall_debug_sem ge vargs m E0 Vundef m.
+Inductive extcall_profiling_sem (ge: Senv.t):
+ list val -> mem -> trace -> val -> mem -> Prop :=
+ | extcall_profiling_sem_intro: forall vargs m,
+ extcall_profiling_sem ge vargs m E0 Vundef m.
+
Lemma extcall_debug_ok:
forall targs,
extcall_properties extcall_debug_sem
@@ -1412,6 +1417,40 @@ Proof.
split. constructor. auto.
Qed.
+Lemma extcall_profiling_ok:
+ forall targs,
+ extcall_properties extcall_profiling_sem
+ (mksignature targs Tvoid cc_default).
+Proof.
+ intros; constructor; intros.
+(* well typed *)
+- inv H. simpl. auto.
+(* symbols *)
+- inv H0. econstructor; eauto.
+(* valid blocks *)
+- inv H; auto.
+(* perms *)
+- inv H; auto.
+(* readonly *)
+- inv H; auto.
+(* mem extends *)
+- inv H.
+ exists Vundef; exists m1'; intuition.
+ econstructor; eauto.
+(* mem injects *)
+- inv H0.
+ exists f; exists Vundef; exists m1'; intuition.
+ econstructor; eauto.
+ red; intros; congruence.
+(* trace length *)
+- inv H; simpl; omega.
+(* receptive *)
+- inv H; inv H0. exists Vundef, m1; constructor.
+(* determ *)
+- inv H; inv H0.
+ split. constructor. auto.
+Qed.
+
(** ** Semantics of known built-in functions. *)
(** Some built-in functions and runtime support functions have known semantics
@@ -1530,6 +1569,7 @@ Definition external_call (ef: external_function): extcall_sem :=
| EF_annot_val kind txt targ => extcall_annot_val_sem txt targ
| EF_inline_asm txt sg clb => inline_assembly_sem txt sg
| EF_debug kind txt targs => extcall_debug_sem
+ | EF_profiling id kind => extcall_profiling_sem
end.
Theorem external_call_spec:
@@ -1537,18 +1577,19 @@ Theorem external_call_spec:
extcall_properties (external_call ef) (ef_sig ef).
Proof.
intros. unfold external_call, ef_sig; destruct ef.
- apply external_functions_properties.
- apply builtin_or_external_sem_ok.
- apply builtin_or_external_sem_ok.
- apply volatile_load_ok.
- apply volatile_store_ok.
- apply extcall_malloc_ok.
- apply extcall_free_ok.
- apply extcall_memcpy_ok.
- apply extcall_annot_ok.
- apply extcall_annot_val_ok.
- apply inline_assembly_properties.
- apply extcall_debug_ok.
+- apply external_functions_properties.
+- apply builtin_or_external_sem_ok.
+- apply builtin_or_external_sem_ok.
+- apply volatile_load_ok.
+- apply volatile_store_ok.
+- apply extcall_malloc_ok.
+- apply extcall_free_ok.
+- apply extcall_memcpy_ok.
+- apply extcall_annot_ok.
+- apply extcall_annot_val_ok.
+- apply inline_assembly_properties.
+- apply extcall_debug_ok.
+- apply extcall_profiling_ok.
Qed.
Definition external_call_well_typed_gen ef := ec_well_typed (external_call_spec ef).
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index 3f718428..38bbfa47 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -47,6 +47,13 @@ let name_of_chunk = function
| Many32 -> "any32"
| Many64 -> "any64"
+let spp_profiling_id () (x : Digest.t) : string =
+ let s = Buffer.create 32 in
+ for i=0 to 15 do
+ Printf.bprintf s "%02x" (Char.code (String.get x i))
+ done;
+ Buffer.contents s;;
+
let name_of_external = function
| EF_external(name, sg) -> sprintf "extern %S" (camlstring_of_coqstring name)
| EF_builtin(name, sg) -> sprintf "builtin %S" (camlstring_of_coqstring name)
@@ -61,7 +68,9 @@ let name_of_external = function
| EF_annot_val(kind,text, targ) -> sprintf "annot_val %S" (camlstring_of_coqstring text)
| EF_inline_asm(text, sg, clob) -> sprintf "inline_asm %S" (camlstring_of_coqstring text)
| EF_debug(kind, text, targs) ->
- sprintf "debug%d %S" (P.to_int kind) (extern_atom text)
+ sprintf "debug%d %S" (P.to_int kind) (extern_atom text)
+ | EF_profiling(id, kind) ->
+ sprintf "profiling %a %d" spp_profiling_id id (Z.to_int kind)
let rec print_builtin_arg px oc = function
| BA x -> px oc x
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index ff2647a7..558e1d09 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -84,3 +84,6 @@ let option_fcoalesce_mem = ref true
let option_fforward_moves = ref true
let option_all_loads_nontrap = ref false
let option_inline_auto_threshold = ref 0
+
+let option_profile_arcs = ref false
+let option_fbranch_probabilities = ref true
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 3dbd35ce..ee091c37 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -37,6 +37,8 @@ Require Selection.
Require RTLgen.
Require Tailcall.
Require Inlining.
+Require Profiling.
+Require ProfilingExploit.
Require Renumber.
Require Duplicate.
Require Constprop.
@@ -63,6 +65,8 @@ Require Selectionproof.
Require RTLgenproof.
Require Tailcallproof.
Require Inliningproof.
+Require Profilingproof.
+Require ProfilingExploitproof.
Require Renumberproof.
Require Duplicateproof.
Require Constpropproof.
@@ -134,28 +138,32 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
@@ print (print_RTL 1)
@@@ time "Inlining" Inlining.transf_program
@@ print (print_RTL 2)
- @@ time "Renumbering" Renumber.transf_program
+ @@ total_if Compopts.profile_arcs (time "Profiling insertion" Profiling.transf_program)
@@ print (print_RTL 3)
- @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
+ @@ total_if Compopts.branch_probabilities (time "Profiling use" ProfilingExploit.transf_program)
@@ print (print_RTL 4)
- @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
+ @@ time "Renumbering" Renumber.transf_program
@@ print (print_RTL 5)
- @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program)
+ @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
@@ print (print_RTL 6)
- @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program)
+ @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
@@ print (print_RTL 7)
- @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program)
+ @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program)
@@ print (print_RTL 8)
- @@@ partial_if Compopts.optim_CSE3 (time "CSE3" CSE3.transf_program)
+ @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program)
@@ print (print_RTL 9)
- @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program
+ @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program)
@@ print (print_RTL 10)
- @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
+ @@@ partial_if Compopts.optim_CSE3 (time "CSE3" CSE3.transf_program)
@@ print (print_RTL 11)
- @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program
+ @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program
@@ print (print_RTL 12)
- @@@ time "Unused globals" Unusedglob.transform_program
+ @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
@@ print (print_RTL 13)
+ @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program
+ @@ print (print_RTL 14)
+ @@@ time "Unused globals" Unusedglob.transform_program
+ @@ print (print_RTL 15)
@@@ time "Register allocation" Allocation.transf_program
@@ print print_LTL
@@ time "Branch tunneling" Tunneling.tunnel_program
@@ -257,6 +265,8 @@ Definition CompCert's_passes :=
::: mkpass RTLgenproof.match_prog
::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog)
::: mkpass Inliningproof.match_prog
+ ::: mkpass (match_if Compopts.profile_arcs Profilingproof.match_prog)
+ ::: mkpass (match_if Compopts.branch_probabilities ProfilingExploitproof.match_prog)
::: mkpass Renumberproof.match_prog
::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog)
::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
@@ -305,7 +315,9 @@ Proof.
unfold transf_rtl_program, time in T. rewrite ! compose_print_identity in T. simpl in T.
set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *.
destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate.
- set (p9 := Renumber.transf_program p8) in *.
+ set (p8bis := total_if profile_arcs Profiling.transf_program p8) in *.
+ set (p8ter := total_if branch_probabilities ProfilingExploit.transf_program p8bis) in *.
+ set (p9 := Renumber.transf_program p8ter) in *.
destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
@@ -331,6 +343,8 @@ Proof.
exists p6; split. apply RTLgenproof.transf_program_match; auto.
exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match.
exists p8; split. apply Inliningproof.transf_program_match; auto.
+ exists p8bis; split. apply total_if_match. apply Profilingproof.transf_program_match; auto.
+ exists p8ter; split. apply total_if_match. apply ProfilingExploitproof.transf_program_match; auto.
exists p9; split. apply Renumberproof.transf_program_match; auto.
exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto.
exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
@@ -399,7 +413,7 @@ Ltac DestructM :=
destruct H as (p & M & MM); clear H
end.
repeat DestructM. subst tp.
- assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p26)).
+ assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p28)).
{
eapply compose_forward_simulations.
eapply SimplExprproof.transl_program_correct; eassumption.
@@ -417,6 +431,10 @@ Ltac DestructM :=
eapply match_if_simulation. eassumption. exact Tailcallproof.transf_program_correct.
eapply compose_forward_simulations.
eapply Inliningproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. exact Profilingproof.transf_program_correct.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. exact ProfilingExploitproof.transf_program_correct.
eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption.
eapply compose_forward_simulations.
eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index a3181da8..1e10009d 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -81,6 +81,12 @@ Parameter all_loads_nontrap: unit -> bool.
(** Flag -fforward-moves. Forward moves after CSE. *)
Parameter optim_forward_moves: unit -> bool.
+(** Flag -fprofile-arcs. Add profiling logger. *)
+Parameter profile_arcs : unit -> bool.
+
+(** Flag -fbranch_probabilities. Use profiling information if available *)
+Parameter branch_probabilities : unit -> bool.
+
(* TODO is there a more appropriate place? *)
Require Import Coqlib.
Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index b167dbd1..72715e65 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -224,7 +224,10 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>)
-falign-functions <n> Set alignment (in bytes) of function entry points
-falign-branch-targets <n> Set alignment (in bytes) of branch targets
-falign-cond-branches <n> Set alignment (in bytes) of conditional branches
- -fcommon Put uninitialized globals in the common section [on].
+ -fcommon Put uninitialized globals in the common section [on]
+ -fprofile-arcs Profile branches [off].
+ -fprofile-use= filename Use profiling information in filename
+ -fbranch-probabilities Use profiling information (if available) for branches [on]
|} ^
target_help ^
toolchain_help ^
@@ -329,6 +332,7 @@ let cmdline_actions =
_Regexp "-O[123]$", Unit (set_all optimization_options);
Exact "-Os", Set option_Osize;
Exact "-Obranchless", Set option_Obranchless;
+ Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s);
Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
@@ -416,7 +420,9 @@ let cmdline_actions =
@ f_opt "coalesce-mem" option_fcoalesce_mem
@ f_opt "all-loads-nontrap" option_all_loads_nontrap
@ f_opt "forward-moves" option_fforward_moves
-(* Code generation options *)
+ (* Code generation options *)
+ @ f_opt "profile-arcs" option_profile_arcs
+ @ f_opt "branch-probabilities" option_fbranch_probabilities
@ f_opt "fpu" option_ffpu
@ f_opt "sse" option_ffpu (* backward compatibility *)
@ [
diff --git a/extraction/extraction.v b/extraction/extraction.v
index cb461361..98eecc76 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -144,6 +144,10 @@ Extract Constant Compopts.va_strict =>
"fun _ -> false".
Extract Constant Compopts.all_loads_nontrap =>
"fun _ -> !Clflags.option_all_loads_nontrap".
+Extract Constant Compopts.profile_arcs =>
+"fun _ -> !Clflags.option_profile_arcs".
+Extract Constant Compopts.branch_probabilities =>
+ "fun _ -> !Clflags.option_fbranch_probabilities".
(* Compiler *)
Extract Constant Compiler.print_Clight => "PrintClight.print_if".
@@ -154,9 +158,17 @@ Extract Constant Compiler.print_Mach => "PrintMach.print_if".
Extract Constant Compiler.print => "fun (f: 'a -> unit) (x: 'a) -> f x; x".
Extract Constant Compiler.time => "Timing.time_coq".
Extract Constant Compopts.time => "Timing.time_coq".
-
(*Extraction Inline Compiler.apply_total Compiler.apply_partial.*)
+(* Profiling *)
+Extract Constant AST.profiling_id => "Digest.t".
+Extract Constant AST.profiling_id_eq => "Digest.equal".
+Extract Constant Profiling.function_id => "Profilingaux.function_id".
+Extract Constant Profiling.branch_id => "Profilingaux.branch_id".
+Extract Constant ProfilingExploit.function_id => "Profilingaux.function_id".
+Extract Constant ProfilingExploit.branch_id => "Profilingaux.branch_id".
+Extract Constant ProfilingExploit.condition_oracle => "Profilingaux.condition_oracle".
+
(* Cabs *)
Extract Constant Cabs.loc =>
"{ lineno : int;
diff --git a/mppa_k1c/Asmexpand.ml b/mppa_k1c/Asmexpand.ml
index 8ab10bc5..e388d2aa 100644
--- a/mppa_k1c/Asmexpand.ml
+++ b/mppa_k1c/Asmexpand.ml
@@ -591,6 +591,7 @@ let expand_instruction instr =
| EF_external _ -> failwith "asmexpand: external"
| EF_inline_asm _ -> emit instr
| EF_runtime _ -> failwith "asmexpand: runtime"
+ | EF_profiling _ -> emit instr
end
| _ ->
emit instr
diff --git a/mppa_k1c/Machregs.v b/mppa_k1c/Machregs.v
index 8098b5d1..cff1164c 100644
--- a/mppa_k1c/Machregs.v
+++ b/mppa_k1c/Machregs.v
@@ -171,6 +171,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
if Z.leb sz 15
then R62 :: R63 :: R61 :: nil
else R62 :: R63 :: R61 :: R60 :: nil
+ | EF_profiling _ _ => R62 :: R63 ::nil
| _ => nil
end.
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 2489b959..43177019 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -249,7 +249,20 @@ module Target (*: TARGET*) =
(*let w oc =
if Archi.ptr64 then output_string oc "w"
*)
-(* Offset part of a load or store *)
+
+ (* Profiling *)
+
+
+ let k1c_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name =
+ fprintf oc " make $r0 = %d\n" nr_items;
+ fprintf oc " make $r1 = %s\n" profiling_id_table_name;
+ fprintf oc " make $r2 = %s\n" profiling_counter_table_name;
+ fprintf oc " goto %s\n" profiling_write_table_helper;
+ fprintf oc " ;;\n";;
+
+ (* Offset part of a load or store *)
let offset oc n = ptrofs oc n
@@ -338,6 +351,18 @@ module Target (*: TARGET*) =
fprintf oc "%s begin inline assembly\n\t" comment;
print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
fprintf oc "%s end inline assembly\n" comment
+ | EF_profiling(id, coq_kind) ->
+ let kind = Z.to_int coq_kind in
+ assert (kind >= 0);
+ assert (kind <= 1);
+ fprintf oc "%s profiling %a %d\n" comment
+ Profilingaux.pp_id id kind;
+ fprintf oc " make $r63 = %s\n" profiling_counter_table_name;
+ fprintf oc " make $r62 = 1\n";
+ fprintf oc " ;;\n";
+ fprintf oc " afaddd %d[$r63] = $r62\n"
+ (profiling_offset id kind);
+ fprintf oc " ;;\n"
| _ ->
assert false
end
@@ -799,8 +824,9 @@ module Target (*: TARGET*) =
if !Clflags.option_g then begin
section oc Section_text;
end
-
+
let print_epilogue oc =
+ print_profiling_epilogue elf_text_print_fun_info Dtors k1c_profiling_stub oc;
if !Clflags.option_g then begin
Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
section oc Section_text;
diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml
index f4d4285a..38f4bc75 100644
--- a/powerpc/AsmToJSON.ml
+++ b/powerpc/AsmToJSON.ml
@@ -365,6 +365,7 @@ let pp_instructions pp ic =
| EF_annot_val _
| EF_builtin _
| EF_debug _
+ | EF_profiling _
| EF_external _
| EF_free
| EF_malloc
diff --git a/runtime/Makefile b/runtime/Makefile
index 3b1cabc4..bf979d5f 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -38,6 +38,8 @@ OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
vararg.o
endif
+OBJS+=write_profiling_table.o
+
LIB=libcompcert.a
INCLUDES=include/float.h include/stdarg.h include/stdbool.h \
@@ -71,7 +73,7 @@ $(LIB): $(OBJS)
# generated assembly
%.o: c/%.c c/i64.h ../ccomp
- ../ccomp -O2 -S -o $*.s -I./c c/$*.c
+ ../ccomp -g -O2 -S -o $*.s -I./c c/$*.c
sed -i -e 's/i64_/__compcert_i64_/g' $*.s
$(CASMRUNTIME) -o $*.o $*.s
@rm $*.s
diff --git a/runtime/c/write_profiling_table.c b/runtime/c/write_profiling_table.c
new file mode 100644
index 00000000..0ce7a948
--- /dev/null
+++ b/runtime/c/write_profiling_table.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+typedef uint8_t md5_hash[16];
+typedef uint64_t condition_counters[2];
+
+static void write_id(FILE *fp, md5_hash *hash) {
+ fwrite(hash, 16, 1, fp);
+}
+
+#define BYTE(counter, i) ((counter >> (8*i)) & 0xFF)
+static void write_counter(FILE *fp, uint64_t counter) {
+ putc(BYTE(counter, 0), fp);
+ putc(BYTE(counter, 1), fp);
+ putc(BYTE(counter, 2), fp);
+ putc(BYTE(counter, 3), fp);
+ putc(BYTE(counter, 4), fp);
+ putc(BYTE(counter, 5), fp);
+ putc(BYTE(counter, 6), fp);
+ putc(BYTE(counter, 7), fp);
+}
+
+void _compcert_write_profiling_table(unsigned int nr_items,
+ md5_hash id_table[],
+ condition_counters counter_table[]) {
+ errno = 0;
+
+ const char *filename = getenv("COMPCERT_PROFILING_DATA");
+ if (filename) {
+ if (!*filename) return;
+ } else {
+ filename = "compcert_profiling.dat";
+ }
+
+ FILE *fp = fopen(filename, "a");
+ //fprintf(stderr, "successfully opened profiling file\n");
+ if (fp == NULL) {
+ perror("open CompCert profiling data for writing");
+ return;
+ }
+
+ for(unsigned int i=0; i<nr_items; i++) {
+ write_id(fp, &id_table[i]);
+ write_counter(fp, counter_table[i][0]);
+ write_counter(fp, counter_table[i][1]);
+ }
+ //fprintf(stderr, "successfully written profiling file\n");
+
+ fclose(fp);
+ //fprintf(stderr, "successfully closed profiling file\n");
+ if (errno != 0) {
+ perror("write CompCert profiling data");
+ return;
+ }
+ // fprintf(stderr, "write CompCert profiling data: no error\n");
+}
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index c7dc582b..5011b18c 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -45,11 +45,16 @@ static inline cycle_t get_cycle(void) {
return cycles;
}
-#elif defined (__ARM_ARCH) && (__ARM_ARCH >= 6)
+#elif defined (__ARM_ARCH) // && (__ARM_ARCH >= 6)
#if (__ARM_ARCH < 8)
typedef uint32_t cycle_t;
#define PRcycle PRId32
+#ifdef ARM_NO_PRIVILEGE
+static inline cycle_t get_cycle(void) {
+ return 0;
+}
+#else
/* need this kernel module
https://github.com/zertyz/MTL/tree/master/cpp/time/kernel/arm */
static inline cycle_t get_cycle(void) {
@@ -57,14 +62,20 @@ static inline cycle_t get_cycle(void) {
__asm__ volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (cycles));
return cycles;
}
+#endif
#else
#define PRcycle PRId64
typedef uint64_t cycle_t;
+
+#ifdef ARM_NO_PRIVILEGE
+static inline cycle_t get_cycle(void) {
+ return 0;
+}
+#else
/* need this kernel module:
https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0
on 5+ kernels, remove first argument of access_ok macro */
-
static inline cycle_t get_cycle(void)
{
uint64_t val;
@@ -72,6 +83,7 @@ static inline cycle_t get_cycle(void)
return val;
}
#endif
+#endif
#else
#define PRcycle PRId32
diff --git a/test/monniaux/minisat/Makefile.on_marte b/test/monniaux/minisat/Makefile.on_marte
new file mode 100644
index 00000000..af7b9145
--- /dev/null
+++ b/test/monniaux/minisat/Makefile.on_marte
@@ -0,0 +1,16 @@
+EXE=minisat.ccomp.exe minisat.ccomp.trace-linearize.exe \
+ minisat.gcc-O3.exe \
+ minisat.ccomp.profiled.exe minisat.gcc-O3.profiled.exe
+
+LOG=$(EXE:.exe=.dat)
+
+all: $(LOG)
+
+%.log : %.exe
+ rm -f $@
+ for i in `seq 1 1000` ; do ./$< sudoku.sat >> $@; done
+
+%.dat : %.log
+ grep 'time cycles: ' $< | sed -e 's/time cycles: //' | awk '{ total += $$1; count++ } END { print total/count }' > $@
+
+.SECONDARY:
diff --git a/test/monniaux/minisat/Makefile.profiled b/test/monniaux/minisat/Makefile.profiled
new file mode 100644
index 00000000..77ba8b43
--- /dev/null
+++ b/test/monniaux/minisat/Makefile.profiled
@@ -0,0 +1,64 @@
+# -*- mode: makefile; -*-
+
+CFILES=main.c solver.c clock.c
+CCOMP=../../../ccomp
+
+#GCC=aarch64-linux-gnu-gcc
+GCC=k1-cos-gcc
+#EXECUTE=qemu-aarch64
+#EXECUTE=qemu-arm
+#EXECUTE=k1-cluster --
+#EXECUTE_CYCLES=k1-cluster --cycle-based --
+
+LIBS=-lm
+PROFILING_DAT=compcert_profiling.dat
+EXAMPLE=sudoku.sat
+CCOMPFLAGS=-static -finline-asm -finline-auto-threshold 50
+GCCFLAGS=-static
+ALL=minisat.ccomp.log minisat.ccomp.trace-linearize.log minisat.ccomp.profiled.log minisat.gcc-O3.log minisat.gcc-O3.profiled.log
+
+all: $(ALL)
+exe: $(ALL:.log=.exe)
+
+minisat.ccomp.exe: $(CFILES)
+ $(CCOMP) $(CCOMPFLAGS) $(CFILES) -o $@ $(LIBS)
+
+minisat.ccomp.profile-arcs.exe: $(CFILES)
+ $(CCOMP) -DARM_NO_PRIVILEGE $(CCOMPFLAGS) -fprofile-arcs $(CFILES) -o $@ $(LIBS)
+
+minisat.gcc-O3.exe: $(CFILES)
+ $(GCC) $(GCCFLAGS) -O3 $(CFILES) -o $@ $(LIBS)
+
+clock.gcc-O3.noprofile.o : clock.c
+ $(GCC) -DARM_NO_PRIVILEGE $(GCCFLAGS) -O3 -c $< -o @
+
+minisat.gcc-O3.profile-arcs.exe: main.c solver.c clock.gcc-O3.noprofile.o
+ $(GCC) -DARM_NO_PRIVILEGE $(GCCFLAGS) -fprofile-arcs -O3 $+ -o $@ $(LIBS)
+
+gcda: minisat.gcc-O3.profile-arcs.exe
+ $(EXECUTE) ./$< $(EXAMPLE)
+
+main.gcda solver.gcda: gcda
+
+minisat.gcc-O3.profiled.exe: $(CFILES) $(GCDAFILES)
+ $(GCC) $(GCCFLAGS) -O3 -fprofile-use $(CFILES) -o $@ $(LIBS)
+
+minisat.ccomp.trace-linearize.exe: $(CFILES)
+ $(CCOMP) $(CCOMPFLAGS) -fduplicate 0 -ftracelinearize $(CFILES) -o $@ $(LIBS)
+
+$(PROFILING_DAT): minisat.ccomp.profile-arcs.exe
+ -rm -f $(PROFILING_DAT)
+ $(EXECUTE) ./$< $(EXAMPLE)
+
+minisat.ccomp.profiled.exe: $(CFILES) $(PROFILING_DAT)
+ $(CCOMP) $(CCOMPFLAGS) -fprofile-use= $(PROFILING_DAT) -ftracelinearize $(CFILES) -o $@ $(LIBS)
+
+%.log : %.exe
+ $(EXECUTE_CYCLES) $< $(EXAMPLE) 2>&1 | tee $@
+
+clean:
+ -rm -f *.log *.exe $(PROFILING_DAT) $(GCDAFILES)
+
+.PHONY: clean gcda exe all
+
+.SECONDARY:
diff --git a/test/monniaux/minisat/clock.c b/test/monniaux/minisat/clock.c
new file mode 120000
index 00000000..d6bade99
--- /dev/null
+++ b/test/monniaux/minisat/clock.c
@@ -0,0 +1 @@
+../clock.c \ No newline at end of file
diff --git a/test/monniaux/minisat/cycles.h b/test/monniaux/minisat/cycles.h
new file mode 120000
index 00000000..84e54d21
--- /dev/null
+++ b/test/monniaux/minisat/cycles.h
@@ -0,0 +1 @@
+../cycles.h \ No newline at end of file
diff --git a/test/monniaux/minisat/k1c.inline_50.log b/test/monniaux/minisat/k1c.inline_50.log
new file mode 100644
index 00000000..438a06b4
--- /dev/null
+++ b/test/monniaux/minisat/k1c.inline_50.log
@@ -0,0 +1,14 @@
+==> minisat.ccomp.log <==
+time cycles: 3252345
+
+==> minisat.ccomp.profiled.log <==
+time cycles: 3150170
+
+==> minisat.ccomp.trace-linearize.log <==
+time cycles: 3192299
+
+==> minisat.gcc-O3.log <==
+time cycles: 2780324
+
+==> minisat.gcc-O3.profiled.log <==
+time cycles: 2487533
diff --git a/test/monniaux/minisat/solver.h b/test/monniaux/minisat/solver.h
index c9ce0219..4b96b017 100644
--- a/test/monniaux/minisat/solver.h
+++ b/test/monniaux/minisat/solver.h
@@ -19,6 +19,8 @@ OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWA
**************************************************************************************************/
// Modified to compile with MS Visual Studio 6.0 by Alan Mishchenko
+#include <stdint.h>
+
#ifndef solver_h
#define solver_h
@@ -39,11 +41,14 @@ static const bool false = 0;
typedef int lit;
typedef char lbool;
+#if 0
#ifdef _WIN32
typedef signed __int64 uint64; // compatible with MS VS 6.0
#else
typedef unsigned long long uint64;
#endif
+#endif
+typedef uint64_t uint64;
static const int var_Undef = -1;
static const lit lit_Undef = -2;
diff --git a/test/monniaux/profiling/profiling_call.c b/test/monniaux/profiling/profiling_call.c
new file mode 100644
index 00000000..ce20241d
--- /dev/null
+++ b/test/monniaux/profiling/profiling_call.c
@@ -0,0 +1,27 @@
+/*
+For knowing how to write assembly profiling stubs.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+
+typedef uint8_t md5_hash[16];
+typedef uint64_t condition_counters[2];
+
+void _compcert_write_profiling_table(unsigned int nr_items,
+ md5_hash id_table[],
+ condition_counters counter_table[]);
+
+static md5_hash id_table[42] = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}};
+static condition_counters counter_table[42];
+
+void write_profile(void) {
+ _compcert_write_profiling_table(42, id_table, counter_table);
+}
+
+static _Atomic uint64_t counter;
+
+void incr_counter(void) {
+ counter++;
+}
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index b8353046..ad667e3d 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -552,7 +552,7 @@ let expand_instruction instr =
expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
| EF_annot_val(kind,txt, targ) ->
expand_annot_val kind txt targ args res
- | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+ | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
emit instr
| _ ->
assert false
diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml
index e371380c..38eff731 100644
--- a/x86/TargetPrinter.ml
+++ b/x86/TargetPrinter.ml
@@ -167,7 +167,44 @@ module ELF_System : SYSTEM =
let print_var_info = elf_print_var_info
- let print_epilogue _ = ()
+ let print_atexit oc to_be_called =
+ if Archi.ptr64
+ then
+ begin
+ fprintf oc " leaq %s(%%rip), %%rdi\n" to_be_called;
+ fprintf oc " jmp atexit\n"
+ end
+ else
+ begin
+ fprintf oc " pushl $%s\n" to_be_called;
+ fprintf oc " call atexit\n";
+ fprintf oc " addl $4, %%esp\n";
+ fprintf oc " ret\n"
+ end
+
+ let x86_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name =
+ if Archi.ptr64
+ then
+ begin
+ fprintf oc " leaq %s(%%rip), %%rdx\n" profiling_counter_table_name;
+ fprintf oc " leaq %s(%%rip), %%rsi\n" profiling_id_table_name;
+ fprintf oc " movl $%d, %%edi\n" nr_items;
+ fprintf oc " jmp %s\n" profiling_write_table_helper
+ end
+ else
+ begin
+ fprintf oc " pushl $%s\n" profiling_counter_table_name;
+ fprintf oc " pushl $%s\n" profiling_id_table_name;
+ fprintf oc " pushl $%d\n" nr_items;
+ fprintf oc " call %s\n" profiling_write_table_helper ;
+ fprintf oc " addl $12, %%esp\n";
+ fprintf oc " ret\n"
+ end;;
+
+ let print_epilogue oc =
+ print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) x86_profiling_stub oc;;
let print_comm_decl oc name sz al =
fprintf oc " .comm %a, %s, %d\n" symbol name (Z.to_string sz) al
@@ -401,8 +438,28 @@ module Target(System: SYSTEM):TARGET =
fprintf oc "%a(%%rip)" label lbl
end
-
-
+ let print_profiling_logger oc id kind =
+ assert (kind >= 0);
+ assert (kind <= 1);
+ let ofs = profiling_offset id kind in
+ if Archi.ptr64
+ then
+ begin
+ fprintf oc "%s profiling %a %d: atomic increment\n" comment
+ Profilingaux.pp_id id kind;
+ fprintf oc " lock addq $1, %s+%d(%%rip)\n"
+ profiling_counter_table_name ofs
+ end
+ else
+ begin
+ fprintf oc "%s begin profiling %a %d: increment\n" comment
+ Profilingaux.pp_id id kind;
+ fprintf oc " addl $1, %s+%d\n" profiling_counter_table_name ofs;
+ fprintf oc " adcl $1, %s+%d\n" profiling_counter_table_name (ofs+4);
+ fprintf oc "%s end profiling %a %d: increment\n" comment
+ Profilingaux.pp_id id kind;
+ end
+
(* Printing of instructions *)
(* Reminder on X86 assembly syntaxes:
@@ -840,6 +897,8 @@ module Target(System: SYSTEM):TARGET =
fprintf oc "%s begin inline assembly\n\t" comment;
print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
fprintf oc "%s end inline assembly\n" comment
+ | EF_profiling(id, coq_kind) ->
+ print_profiling_logger oc id (Z.to_int coq_kind)
| _ ->
assert false
end