aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--driver/Clflags.ml1
-rw-r--r--driver/Compiler.v11
-rw-r--r--driver/Compopts.v4
-rw-r--r--driver/Driver.ml5
-rw-r--r--extraction/extraction.v2
5 files changed, 17 insertions, 6 deletions
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 9aa4a2bf..67ec9702 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -24,6 +24,7 @@ let option_fpacked_structs = ref false
let option_ffpu = ref true
let option_ffloatconstprop = ref 2
let option_ftailcalls = ref true
+let option_fduplicate = ref true
let option_fconstprop = ref true
let option_fcse = ref true
let option_fredundancy = ref true
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 24964237..9f53a4fc 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -132,7 +132,7 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
@@ print (print_RTL 2)
@@ time "Renumbering" Renumber.transf_program
@@ print (print_RTL 3)
- @@@ time "Duplicating" Duplicate.transf_program
+ @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
@@ print (print_RTL 4)
@@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
@@ print (print_RTL 5)
@@ -250,7 +250,7 @@ Definition CompCert's_passes :=
::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog)
::: mkpass Inliningproof.match_prog
::: mkpass Renumberproof.match_prog
- ::: mkpass Duplicateproof.match_prog
+ ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog)
::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog)
::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog)
@@ -296,7 +296,7 @@ Proof.
set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *.
destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate.
set (p9 := Renumber.transf_program p8) in *.
- destruct (Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
+ destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate.
@@ -320,7 +320,7 @@ Proof.
exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match.
exists p8; split. apply Inliningproof.transf_program_match; auto.
exists p9; split. apply Renumberproof.transf_program_match; auto.
- exists p10; split. apply Duplicateproof.transf_program_match; auto.
+ exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto.
exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match.
exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
@@ -404,7 +404,8 @@ Ltac DestructM :=
eapply compose_forward_simulations.
eapply Inliningproof.transf_program_correct; eassumption.
eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption.
- eapply compose_forward_simulations. eapply Duplicateproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct.
eapply compose_forward_simulations.
eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct.
eapply compose_forward_simulations.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index fdd2b1d6..a979c69b 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -27,6 +27,10 @@ Parameter generate_float_constants: unit -> bool.
(** For value analysis. Currently always false. *)
Parameter va_strict: unit -> bool.
+(** Flag -fduplicate. For tail duplication optimization. Necessary to have
+ * bigger superblocks *)
+Parameter optim_duplicate: unit -> bool.
+
(** Flag -ftailcalls. For tail call optimization. *)
Parameter optim_tailcalls: unit -> bool.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 992cf8c4..5d08dc6b 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -199,6 +199,7 @@ Processing options:
-fpostpass Perform postpass scheduling (only for K1 architecture) [on]
-fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
(<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+ -fduplicate Perform tail duplication to form superblocks on predicted traces
-fforward-moves Forward moves after CSE
-finline Perform inlining of functions [on]
-finline-functions-called-once Integrate functions only required by their
@@ -260,6 +261,7 @@ let dump_mnemonics destfile =
let optimization_options = [
option_ftailcalls; option_fifconversion; option_fconstprop; option_fcse;
option_fpostpass; option_fredundancy; option_finline_functions_called_once;
+ option_fduplicate
]
let set_all opts () = List.iter (fun r -> r := true) opts
@@ -310,7 +312,7 @@ let cmdline_actions =
[
Exact "-O0", Unit (unset_all optimization_options);
Exact "-O", Unit (set_all optimization_options);
- _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false);
+ _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false; option_fduplicate := false);
_Regexp "-O[123]$", Unit (set_all optimization_options);
Exact "-Os", Set option_Osize;
Exact "-Obranchless", Set option_Obranchless;
@@ -384,6 +386,7 @@ let cmdline_actions =
@ f_opt "cse" option_fcse
@ f_opt "redundancy" option_fredundancy
@ f_opt "postpass" option_fpostpass
+ @ f_opt "duplicate" option_fduplicate
@ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
@ f_opt "inline" option_finline
@ f_opt "inline-functions-called-once" option_finline_functions_called_once
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 0c19ea70..c2f97778 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -105,6 +105,8 @@ Extract Constant Compopts.generate_float_constants =>
"fun _ -> !Clflags.option_ffloatconstprop >= 2".
Extract Constant Compopts.optim_tailcalls =>
"fun _ -> !Clflags.option_ftailcalls".
+Extract Constant Compopts.optim_duplicate =>
+ "fun _ -> !Clflags.option_fduplicate".
Extract Constant Compopts.optim_constprop =>
"fun _ -> !Clflags.option_fconstprop".
Extract Constant Compopts.optim_CSE =>