From bda5ee25ac991c38f5541a234936f1f6e2226072 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Sun, 16 Nov 2014 18:39:43 +0100 Subject: Add flags to control individual optimization passes + flag -O0 for turning them off. --- backend/Tailcall.v | 3 +- backend/Tailcallproof.v | 10 ++--- driver/Clflags.ml | 3 ++ driver/Compiler.v | 98 +++++++++++++++++++++++++++++++++++++------------ driver/Compopts.v | 11 +++++- driver/Driver.ml | 41 +++++++++++++++------ extraction/extraction.v | 8 +++- 7 files changed, 128 insertions(+), 46 deletions(-) diff --git a/backend/Tailcall.v b/backend/Tailcall.v index 25da531c..db246fec 100644 --- a/backend/Tailcall.v +++ b/backend/Tailcall.v @@ -14,7 +14,6 @@ Require Import Coqlib. Require Import Maps. -Require Import Compopts. Require Import AST. Require Import Registers. Require Import Op. @@ -100,7 +99,7 @@ Definition transf_instr (f: function) (pc: node) (instr: instruction) := using a compilation option. *) Definition transf_function (f: function) : function := - if zeq f.(fn_stacksize) 0 && eliminate_tailcalls tt + if zeq f.(fn_stacksize) 0 then RTL.transf_function (transf_instr f) f else f. diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v index 1965b18e..cc4ff55e 100644 --- a/backend/Tailcallproof.v +++ b/backend/Tailcallproof.v @@ -13,7 +13,6 @@ (** Recognition of tail calls: correctness proof *) Require Import Coqlib. -Require Import Compopts. Require Import Maps. Require Import AST. Require Import Integers. @@ -183,11 +182,10 @@ Lemma transf_instr_lookup: exists i', (transf_function f).(fn_code)!pc = Some i' /\ transf_instr_spec f i i'. Proof. intros. unfold transf_function. - destruct (zeq (fn_stacksize f) 0). destruct (eliminate_tailcalls tt). + destruct (zeq (fn_stacksize f) 0). simpl. rewrite PTree.gmap. rewrite H. simpl. exists (transf_instr f pc i); split. auto. apply transf_instr_charact; auto. exists i; split. auto. constructor. - exists i; split. auto. constructor. Qed. (** * Semantic properties of the code transformation *) @@ -263,14 +261,14 @@ Lemma sig_preserved: forall f, funsig (transf_fundef f) = funsig f. Proof. destruct f; auto. simpl. unfold transf_function. - destruct (zeq (fn_stacksize f) 0 && eliminate_tailcalls tt); auto. + destruct (zeq (fn_stacksize f) 0); auto. Qed. Lemma stacksize_preserved: forall f, fn_stacksize (transf_function f) = fn_stacksize f. Proof. unfold transf_function. intros. - destruct (zeq (fn_stacksize f) 0 && eliminate_tailcalls tt); auto. + destruct (zeq (fn_stacksize f) 0); auto. Qed. Lemma find_function_translated: @@ -556,7 +554,7 @@ Proof. assert (fn_stacksize (transf_function f) = fn_stacksize f /\ fn_entrypoint (transf_function f) = fn_entrypoint f /\ fn_params (transf_function f) = fn_params f). - unfold transf_function. destruct (zeq (fn_stacksize f) 0 && eliminate_tailcalls tt); auto. + unfold transf_function. destruct (zeq (fn_stacksize f) 0); auto. destruct H0 as [EQ1 [EQ2 EQ3]]. left. econstructor; split. simpl. eapply exec_function_internal; eauto. rewrite EQ1; eauto. diff --git a/driver/Clflags.ml b/driver/Clflags.ml index c6217ba1..ead27b36 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -24,6 +24,9 @@ let option_fpacked_structs = ref false let option_ffpu = ref true let option_ffloatconstprop = ref 2 let option_ftailcalls = ref true +let option_fconstprop = ref true +let option_fcse = ref true +let option_fredundancy = ref true let option_falignfunctions = ref (None: int option) let option_falignbranchtargets = ref 0 let option_faligncondbranchs = ref 0 diff --git a/driver/Compiler.v b/driver/Compiler.v index ae54f487..fb813c7c 100644 --- a/driver/Compiler.v +++ b/driver/Compiler.v @@ -71,6 +71,8 @@ Require Linearizeproof. Require CleanupLabelsproof. Require Stackingproof. Require Asmgenproof. +(** Command-line flags. *) +Require Import Compopts. (** Pretty-printers (defined in Caml). *) Parameter print_Clight: Clight.program -> unit. @@ -103,6 +105,14 @@ Definition print {A: Type} (printer: A -> unit) (prog: A) : A := Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f. +Definition total_if {A: Type} + (flag: unit -> bool) (f: A -> A) (prog: A) : A := + if flag tt then f prog else prog. + +Definition partial_if {A: Type} + (flag: unit -> bool) (f: A -> res A) (prog: A) : res A := + if flag tt then f prog else OK prog. + (** We define three translation functions for whole programs: one starting with a C program, one with a Cminor program, one with an RTL program. The three translations produce Asm programs ready for @@ -111,24 +121,24 @@ Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f. Definition transf_rtl_program (f: RTL.program) : res Asm.program := OK f @@ print (print_RTL 0) - @@ time "Tail calls" Tailcall.transf_program + @@ total_if Compopts.optim_tailcalls (time "Tail calls" Tailcall.transf_program) @@ print (print_RTL 1) @@@ time "Inlining" Inlining.transf_program @@ print (print_RTL 2) @@ time "Renumbering" Renumber.transf_program @@ print (print_RTL 3) - @@ time "Constant propagation" Constprop.transf_program + @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program) @@ print (print_RTL 4) - @@ time "Renumbering" Renumber.transf_program + @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program) @@ print (print_RTL 5) - @@@ time "CSE" CSE.transf_program + @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program) @@ print (print_RTL 6) - @@@ time "Dead code" Deadcode.transf_program + @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program) @@ print (print_RTL 7) @@@ time "Register allocation" Allocation.transf_program @@ print print_LTL @@ time "Branch tunneling" Tunneling.tunnel_program - @@@ Linearize.transf_program + @@@ time "CFG linearization" Linearize.transf_program @@ time "Label cleanup" CleanupLabels.transf_program @@@ time "Mach generation" Stacking.transf_program @@ print print_Mach @@ -175,6 +185,33 @@ Proof. intros. destruct x; simpl. rewrite print_identity. auto. auto. Qed. +Remark forward_simulation_identity: + forall sem, forward_simulation sem sem. +Proof. + intros. apply forward_simulation_step with (fun s1 s2 => s2 = s1); intros. +- auto. +- exists s1; auto. +- subst s2; auto. +- subst s2. exists s1'; auto. +Qed. + +Lemma total_if_simulation: + forall (A: Type) (sem: A -> semantics) (flag: unit -> bool) (f: A -> A) (prog: A), + (forall p, forward_simulation (sem p) (sem (f p))) -> + forward_simulation (sem prog) (sem (total_if flag f prog)). +Proof. + intros. unfold total_if. destruct (flag tt). auto. apply forward_simulation_identity. +Qed. + +Lemma partial_if_simulation: + forall (A: Type) (sem: A -> semantics) (flag: unit -> bool) (f: A -> res A) (prog tprog: A), + partial_if flag f prog = OK tprog -> + (forall p tp, f p = OK tp -> forward_simulation (sem p) (sem tp)) -> + forward_simulation (sem prog) (sem tprog). +Proof. + intros. unfold partial_if in *. destruct (flag tt). eauto. inv H. apply forward_simulation_identity. +Qed. + (** * Semantic preservation *) (** We prove that the [transf_program] translations preserve semantics @@ -200,31 +237,44 @@ Proof. unfold transf_rtl_program, time in H. repeat rewrite compose_print_identity in H. simpl in H. - set (p1 := Tailcall.transf_program p) in *. + set (p1 := total_if optim_tailcalls Tailcall.transf_program p) in *. destruct (Inlining.transf_program p1) as [p11|] eqn:?; simpl in H; try discriminate. set (p12 := Renumber.transf_program p11) in *. - set (p2 := Constprop.transf_program p12) in *. - set (p21 := Renumber.transf_program p2) in *. - destruct (CSE.transf_program p21) as [p3|] eqn:?; simpl in H; try discriminate. - destruct (Deadcode.transf_program p3) as [p31|] eqn:?; simpl in H; try discriminate. + set (p2 := total_if optim_constprop Constprop.transf_program p12) in *. + set (p21 := total_if optim_constprop Renumber.transf_program p2) in *. + destruct (partial_if optim_CSE CSE.transf_program p21) as [p3|] eqn:?; simpl in H; try discriminate. + destruct (partial_if optim_redundancy Deadcode.transf_program p3) as [p31|] eqn:?; simpl in H; try discriminate. destruct (Allocation.transf_program p31) as [p4|] eqn:?; simpl in H; try discriminate. set (p5 := Tunneling.tunnel_program p4) in *. destruct (Linearize.transf_program p5) as [p6|] eqn:?; simpl in H; try discriminate. set (p7 := CleanupLabels.transf_program p6) in *. destruct (Stacking.transf_program p7) as [p8|] eqn:?; simpl in H; try discriminate. - eapply compose_forward_simulation. apply Tailcallproof.transf_program_correct. - eapply compose_forward_simulation. apply Inliningproof.transf_program_correct. eassumption. - eapply compose_forward_simulation. apply Renumberproof.transf_program_correct. - eapply compose_forward_simulation. apply Constpropproof.transf_program_correct. - eapply compose_forward_simulation. apply Renumberproof.transf_program_correct. - eapply compose_forward_simulation. apply CSEproof.transf_program_correct. eassumption. - eapply compose_forward_simulation. apply Deadcodeproof.transf_program_correct. eassumption. - eapply compose_forward_simulation. apply Allocproof.transf_program_correct. eassumption. - eapply compose_forward_simulation. apply Tunnelingproof.transf_program_correct. - eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. - eapply compose_forward_simulation. apply CleanupLabelsproof.transf_program_correct. - eapply compose_forward_simulation. apply Stackingproof.transf_program_correct. - eexact Asmgenproof.return_address_exists. eassumption. + apply compose_forward_simulation with (RTL.semantics p1). + apply total_if_simulation. apply Tailcallproof.transf_program_correct. + apply compose_forward_simulation with (RTL.semantics p11). + apply Inliningproof.transf_program_correct; auto. + apply compose_forward_simulation with (RTL.semantics p12). + apply Renumberproof.transf_program_correct. + apply compose_forward_simulation with (RTL.semantics p2). + apply total_if_simulation. apply Constpropproof.transf_program_correct. + apply compose_forward_simulation with (RTL.semantics p21). + apply total_if_simulation. apply Renumberproof.transf_program_correct. + apply compose_forward_simulation with (RTL.semantics p3). + eapply partial_if_simulation; eauto. apply CSEproof.transf_program_correct. + apply compose_forward_simulation with (RTL.semantics p31). + eapply partial_if_simulation; eauto. apply Deadcodeproof.transf_program_correct. + apply compose_forward_simulation with (LTL.semantics p4). + apply Allocproof.transf_program_correct; auto. + apply compose_forward_simulation with (LTL.semantics p5). + apply Tunnelingproof.transf_program_correct. + apply compose_forward_simulation with (Linear.semantics p6). + apply Linearizeproof.transf_program_correct; auto. + apply compose_forward_simulation with (Linear.semantics p7). + apply CleanupLabelsproof.transf_program_correct. + apply compose_forward_simulation with (Mach.semantics Asmgenproof0.return_address_offset p8). + apply Stackingproof.transf_program_correct. + exact Asmgenproof.return_address_exists. + auto. apply Asmgenproof.transf_program_correct; eauto. split. auto. apply forward_to_backward_simulation. auto. diff --git a/driver/Compopts.v b/driver/Compopts.v index 01109f52..d0c6686e 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -28,7 +28,16 @@ Parameter generate_float_constants: unit -> bool. Parameter va_strict: unit -> bool. (** Flag -ftailcalls. For tail call optimization. *) -Parameter eliminate_tailcalls: unit -> bool. +Parameter optim_tailcalls: unit -> bool. + +(** Flag -fconstprop. For constant propagation. *) +Parameter optim_constprop: unit -> bool. + +(** Flag -fcse. For common subexpression elimination. *) +Parameter optim_CSE: unit -> bool. + +(** Flag -fredundancy. For dead code elimination. *) +Parameter optim_redundancy: unit -> bool. (** Flag -fthumb. For the ARM back-end. *) Parameter thumb: unit -> bool. diff --git a/driver/Driver.ml b/driver/Driver.ml index 602b01be..76509f41 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -393,15 +393,21 @@ Language support options (use -fno- to turn off -f) : -fnone Turn off all language support options above Debugging options: -g Generate debugging information -Code generation options: (use -fno- to turn off -f) : - -O Optimize for speed [on by default] - -Os Optimize for code size +Optimization options: (use -fno- to turn off -f) + -O Optimize the compiled code [on by default] + -O0 Do not optimize the compiled code + -O1 -O2 -O3 Synonymous for -O + -Os Optimize for code size in preference to code speed + -ftailcalls Optimize function calls in tail position [on] + -fconst-prop Perform global constant propagation [on] + -ffloat-const-prop Control constant propagation of floats + (=0: none, =1: limited, =2: full; default is full) + -fcse Perform common subexpression elimination [on] + -fredundancy Perform redundancy elimination [on] +Code generation options: (use -fno- to turn off -f) -ffpu Use FP registers for some integer operations [on] -fsmall-data Set maximal size for allocation in small data area -fsmall-const Set maximal size for allocation in small constant area - -ffloat-const-prop Control constant propagation of floats - (=0: none, =1: limited, =2: full; default is full) - -ftailcalls Optimize function calls in tail position [on] -falign-functions Set alignment (in bytes) of function entry points -falign-branch-targets Set alignment (in bytes) of branch targets -falign-cond-branches Set alignment (in bytes) of conditional branches @@ -442,6 +448,13 @@ let language_support_options = [ option_fpacked_structs; option_finline_asm ] +let optimization_options = [ + option_ftailcalls; option_fconstprop; option_fcse; option_fredundancy +] + +let set_all opts = List.iter (fun r -> r := true) opts +let unset_all opts = List.iter (fun r -> r := false) opts + let num_source_files = ref 0 let cmdline_actions = @@ -483,15 +496,15 @@ let cmdline_actions = Prefix "-Wp,", Self (fun s -> prepro_options := List.rev_append (explode_comma_option s) !prepro_options); (* Language support options -- more below *) - Exact "-fall", Self (fun _ -> - List.iter (fun r -> r := true) language_support_options); - Exact "-fnone", Self (fun _ -> - List.iter (fun r -> r := false) language_support_options); + Exact "-fall", Self (fun _ -> set_all language_support_options); + Exact "-fnone", Self (fun _ -> unset_all language_support_options); (* Debugging options *) Exact "-g", Self (fun s -> option_g := true; push_linker_arg s); (* Code generation options -- more below *) + Exact "-O0", Self (fun _ -> unset_all optimization_options); + Exact "-O", Self (fun _ -> set_all optimization_options); + _Regexp "-O[123]$", Self (fun _ -> set_all optimization_options); Exact "-Os", Set option_Osize; - Exact "-O", Unset option_Osize; Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -538,8 +551,12 @@ let cmdline_actions = @ f_opt "unprototyped" option_funprototyped @ f_opt "packed-structs" option_fpacked_structs @ f_opt "inline-asm" option_finline_asm -(* Code generation options *) +(* Optimization options *) @ f_opt "tailcalls" option_ftailcalls + @ f_opt "const-prop" option_fconstprop + @ f_opt "cse" option_fcse + @ f_opt "redundancy" option_fredundancy +(* Code generation options *) @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) diff --git a/extraction/extraction.v b/extraction/extraction.v index 5b71a150..ee496ffa 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -92,8 +92,14 @@ Extract Constant Compopts.propagate_float_constants => "fun _ -> !Clflags.option_ffloatconstprop >= 1". Extract Constant Compopts.generate_float_constants => "fun _ -> !Clflags.option_ffloatconstprop >= 2". -Extract Constant Compopts.eliminate_tailcalls => +Extract Constant Compopts.optim_tailcalls => "fun _ -> !Clflags.option_ftailcalls". +Extract Constant Compopts.optim_constprop => + "fun _ -> !Clflags.option_fconstprop". +Extract Constant Compopts.optim_CSE => + "fun _ -> !Clflags.option_fcse". +Extract Constant Compopts.optim_redundancy => + "fun _ -> !Clflags.option_fredundancy". Extract Constant Compopts.thumb => "fun _ -> !Clflags.option_mthumb". -- cgit