diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-04-23 19:59:21 +0200 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-04-23 19:59:21 +0200 |
commit | 433c67f9a05e8cab773d1755aa3eb16f8af588e6 (patch) | |
tree | dadda4120b592e3900854133b3222cf4fcf3b4af /driver | |
parent | 7a30a72809448535785cd47d26d9415f6ada93e3 (diff) | |
parent | 63f425b8ca329820e629c109fb0a2454ea7e2f27 (diff) | |
download | compcert-kvx-433c67f9a05e8cab773d1755aa3eb16f8af588e6.tar.gz compcert-kvx-433c67f9a05e8cab773d1755aa3eb16f8af588e6.zip |
Merge branch 'mppa-work' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into mppa-work
Diffstat (limited to 'driver')
-rw-r--r-- | driver/Clflags.ml | 15 | ||||
-rw-r--r-- | driver/Compiler.vexpand (renamed from driver/Compiler.v) | 158 | ||||
-rw-r--r-- | driver/Compopts.v | 21 | ||||
-rw-r--r-- | driver/Driver.ml | 31 |
4 files changed, 73 insertions, 152 deletions
diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 467d41aa..14d15ba6 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -26,7 +26,10 @@ let option_ffloatconstprop = ref 2 let option_ftailcalls = ref true let option_fconstprop = ref true let option_fcse = ref true -let option_fcse2 = ref true +let option_fcse2 = ref false +let option_fcse3 = ref true +let option_fcse3_alias_analysis = ref true +let option_fcse3_across_calls = ref false let option_fredundancy = ref true let option_fduplicate = ref (-1) let option_finvertcond = ref true @@ -77,8 +80,16 @@ let use_standard_headers = ref Configuration.has_standard_headers let option_fglobaladdrtmp = ref false let option_fglobaladdroffset = ref false let option_fxsaddr = ref true -let option_faddx = ref false +let option_faddx = ref false +let option_fmadd = ref true +let option_div_i32 = ref "stsud" +let option_div_i64 = ref "stsud" let option_fcoalesce_mem = ref true let option_fforward_moves = ref false +let option_fmove_loop_invariants = ref true +let option_fnontrap_loads = ref true let option_all_loads_nontrap = ref false let option_inline_auto_threshold = ref 0 +let option_profile_arcs = ref false +let option_fbranch_probabilities = ref true +let option_debug_compcert = ref 0 diff --git a/driver/Compiler.v b/driver/Compiler.vexpand index 002c55fe..0f59aab7 100644 --- a/driver/Compiler.v +++ b/driver/Compiler.vexpand @@ -35,23 +35,7 @@ Require Cshmgen. Require Cminorgen. Require Selection. Require RTLgen. -Require Tailcall. -Require Inlining. -Require Renumber. -Require Duplicate. -Require Constprop. -Require CSE. -Require ForwardMoves. -Require CSE2. -Require Deadcode. -Require Unusedglob. -Require Allnontrap. -Require Allocation. -Require Tunneling. -Require Linearize. -Require CleanupLabels. -Require Debugvar. -Require Stacking. +EXPAND_RTL_REQUIRE Require Asmgen. (** Proofs of semantic preservation. *) Require SimplExprproof. @@ -60,23 +44,7 @@ Require Cshmgenproof. Require Cminorgenproof. Require Selectionproof. Require RTLgenproof. -Require Tailcallproof. -Require Inliningproof. -Require Renumberproof. -Require Duplicateproof. -Require Constpropproof. -Require CSEproof. -Require ForwardMovesproof. -Require CSE2proof. -Require Deadcodeproof. -Require Unusedglobproof. -Require Allnontrapproof. -Require Allocproof. -Require Tunnelingproof. -Require Linearizeproof. -Require CleanupLabelsproof. -Require Debugvarproof. -Require Stackingproof. +EXPAND_RTL_REQUIRE_PROOF Require Import Asmgenproof. (** Command-line flags. *) Require Import Compopts. @@ -128,40 +96,9 @@ Definition partial_if {A: Type} Definition transf_rtl_program (f: RTL.program) : res Asm.program := OK f @@ print (print_RTL 0) - @@ total_if Compopts.optim_tailcalls (time "Tail calls" Tailcall.transf_program) - @@ print (print_RTL 1) - @@@ time "Inlining" Inlining.transf_program - @@ print (print_RTL 2) - @@ time "Renumbering" Renumber.transf_program - @@ print (print_RTL 3) - @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program) - @@ print (print_RTL 4) - @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program) - @@ print (print_RTL 5) - @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program) - @@ print (print_RTL 6) - @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program) - @@ print (print_RTL 7) - @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program) - @@ print (print_RTL 8) - @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program - @@ print (print_RTL 9) - @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program) - @@ print (print_RTL 10) - @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program - @@ print (print_RTL 11) - @@@ time "Unused globals" Unusedglob.transform_program - @@ print (print_RTL 12) - @@@ time "Register allocation" Allocation.transf_program - @@ print print_LTL - @@ time "Branch tunneling" Tunneling.tunnel_program - @@@ time "CFG linearization" Linearize.transf_program - @@ time "Label cleanup" CleanupLabels.transf_program - @@@ partial_if Compopts.debug (time "Debugging info for local variables" Debugvar.transf_program) - @@@ time "Mach generation" Stacking.transf_program - @@ print print_Mach +EXPAND_RTL_TRANSF_PROGRAM @@@ time "Total Mach->Asm generation" Asmgen.transf_program. - + Definition transf_cminor_program (p: Cminor.program) : res Asm.program := OK p @@ print print_Cminor @@ -251,24 +188,7 @@ Definition CompCert's_passes := ::: mkpass Cminorgenproof.match_prog ::: mkpass Selectionproof.match_prog ::: mkpass RTLgenproof.match_prog - ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog) - ::: mkpass Inliningproof.match_prog - ::: mkpass Renumberproof.match_prog - ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog) - ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog) - ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog) - ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog) - ::: mkpass (match_if Compopts.optim_CSE2 CSE2proof.match_prog) - ::: mkpass (match_if Compopts.optim_forward_moves ForwardMovesproof.match_prog) - ::: mkpass (match_if Compopts.optim_redundancy Deadcodeproof.match_prog) - ::: mkpass (match_if Compopts.all_loads_nontrap Allnontrapproof.match_prog) - ::: mkpass Unusedglobproof.match_prog - ::: mkpass Allocproof.match_prog - ::: mkpass Tunnelingproof.match_prog - ::: mkpass Linearizeproof.match_prog - ::: mkpass CleanupLabelsproof.match_prog - ::: mkpass (match_if Compopts.debug Debugvarproof.match_prog) - ::: mkpass Stackingproof.match_prog +EXPAND_RTL_MKPASS ::: mkpass Asmgenproof.match_prog ::: pass_nil _. @@ -299,24 +219,7 @@ Proof. destruct (RTLgen.transl_program p5) as [p6|e] eqn:P6; cbn in T; try discriminate. unfold transf_rtl_program, time in T. rewrite ! compose_print_identity in T. cbn in T. - set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *. - destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; cbn in T; try discriminate. - set (p9 := Renumber.transf_program p8) in *. - destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; cbn in T; try discriminate. - set (p11 := total_if optim_constprop Constprop.transf_program p10) in *. - set (p12 := total_if optim_constprop Renumber.transf_program p11) in *. - destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; cbn in T; try discriminate. - set (p13bis := total_if optim_CSE2 CSE2.transf_program p13) in *. - set (p13ter := total_if optim_forward_moves ForwardMoves.transf_program p13bis) in *. - destruct (partial_if optim_redundancy Deadcode.transf_program p13ter) as [p14|e] eqn:P14; cbn in T; try discriminate. - set (p14bis := total_if all_loads_nontrap Allnontrap.transf_program p14) in *. - destruct (Unusedglob.transform_program p14bis) as [p15|e] eqn:P15; cbn in T; try discriminate. - destruct (Allocation.transf_program p15) as [p16|e] eqn:P16; cbn in T; try discriminate. - set (p17 := Tunneling.tunnel_program p16) in *. - destruct (Linearize.transf_program p17) as [p18|e] eqn:P18; cbn in T; try discriminate. - set (p19 := CleanupLabels.transf_program p18) in *. - destruct (partial_if debug Debugvar.transf_program p19) as [p20|e] eqn:P20; cbn in T; try discriminate. - destruct (Stacking.transf_program p20) as [p21|e] eqn:P21; cbn in T; try discriminate. +EXPAND_RTL_PROOF unfold match_prog; simpl. exists p1; split. apply SimplExprproof.transf_program_match; auto. exists p2; split. apply SimplLocalsproof.match_transf_program; auto. @@ -324,24 +227,7 @@ Proof. exists p4; split. apply Cminorgenproof.transf_program_match; auto. exists p5; split. apply Selectionproof.transf_program_match; auto. exists p6; split. apply RTLgenproof.transf_program_match; auto. - exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match. - exists p8; split. apply Inliningproof.transf_program_match; auto. - exists p9; split. apply Renumberproof.transf_program_match; auto. - exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto. - exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match. - exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match. - exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match. - exists p13bis; split. apply total_if_match. apply CSE2proof.transf_program_match. - exists p13ter; split. eapply total_if_match; eauto. apply ForwardMovesproof.transf_program_match. - exists p14; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match. - exists p14bis; split. eapply total_if_match; eauto. apply Allnontrapproof.transf_program_match. - exists p15; split. apply Unusedglobproof.transf_program_match; auto. - exists p16; split. apply Allocproof.transf_program_match; auto. - exists p17; split. apply Tunnelingproof.transf_program_match. - exists p18; split. apply Linearizeproof.transf_program_match; auto. - exists p19; split. apply CleanupLabelsproof.transf_program_match; auto. - exists p20; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match. - exists p21; split. apply Stackingproof.transf_program_match; auto. +EXPAND_RTL_PROOF2 exists tp; split. apply Asmgenproof.transf_program_match; auto. reflexivity. Qed. @@ -393,7 +279,9 @@ Ltac DestructM := destruct H as (p & M & MM); clear H end. repeat DestructM. subst tp. - assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p25)). + assert (F: forward_simulation (Cstrategy.semantics p) +EXPAND_ASM_SEMANTICS + ). { eapply compose_forward_simulations. eapply SimplExprproof.transl_program_correct; eassumption. @@ -407,31 +295,9 @@ Ltac DestructM := eapply Selectionproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply RTLgenproof.transf_program_correct; eassumption. +EXPAND_RTL_FORWARD_SIMULATIONS eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Tailcallproof.transf_program_correct. - eapply compose_forward_simulations. - eapply Inliningproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Renumberproof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact CSEproof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact CSE2proof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact ForwardMovesproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Deadcodeproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Allnontrapproof.transf_program_correct. - eapply compose_forward_simulations. - eapply Unusedglobproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. - eapply Allocproof.transf_program_correct; eassumption. + eapply Allocationproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply Tunnelingproof.transf_program_correct; eassumption. eapply compose_forward_simulations. diff --git a/driver/Compopts.v b/driver/Compopts.v index 848657e5..3c5ccf36 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -42,6 +42,18 @@ Parameter optim_CSE: unit -> bool. (** Flag -fcse2. For DMonniaux's common subexpression elimination. *) Parameter optim_CSE2: unit -> bool. +(** Flag -fcse3. For DMonniaux's common subexpression elimination. *) +Parameter optim_CSE3: unit -> bool. + +(** Flag -fcse3-alias-analysis. For DMonniaux's common subexpression elimination. Perform a simple alias analysis. *) +Parameter optim_CSE3_alias_analysis: unit -> bool. + +(** Flag -fcse3-across-calls. For DMonniaux's common subexpression elimination. Propagate information across function calls (may increase register pressure). *) +Parameter optim_CSE3_across_calls: unit -> bool. + +(** Flag -fmove-loop-invariants. *) +Parameter optim_move_loop_invariants: unit -> bool. + (** Flag -fredundancy. For dead code elimination. *) Parameter optim_redundancy: unit -> bool. @@ -60,6 +72,9 @@ Parameter optim_xsaddr: unit -> bool. (** FIXME TEMPORARY Flag -fcoaelesce-mem. Fuse (default true) *) Parameter optim_coalesce_mem: unit -> bool. +(* FIXME TEMPORARY Flag -faddx. Fuse (default true) *) +Parameter optim_madd: unit -> bool. + (** FIXME TEMPORARY Flag -faddx. Fuse (default false) *) Parameter optim_addx: unit -> bool. @@ -75,6 +90,12 @@ Parameter all_loads_nontrap: unit -> bool. (** Flag -fforward-moves. Forward moves after CSE. *) Parameter optim_forward_moves: unit -> bool. +(** Flag -fprofile-arcs. Add profiling logger. *) +Parameter profile_arcs : unit -> bool. + +(** Flag -fbranch_probabilities. Use profiling information if available *) +Parameter branch_probabilities : unit -> bool. + (* TODO is there a more appropriate place? *) Require Import Coqlib. Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f. diff --git a/driver/Driver.ml b/driver/Driver.ml index 388482a0..b9060ca7 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -196,7 +196,11 @@ Processing options: -ffloat-const-prop <n> Control constant propagation of floats (<n>=0: none, <n>=1: limited, <n>=2: full; default is full) -fcse Perform common subexpression elimination [on] - -fcse2 Perform inter-loop common subexpression elimination [on] + -fcse2 Perform inter-loop common subexpression elimination [off] + -fcse3 Perform inter-loop common subexpression elimination [on] + -fcse3-alias-analysis Perform inter-loop common subexpression elimination with alias analysis [on] + -fcse3-across-calls Propagate CSE3 information across function calls [off] + -fmove-loop-invariants Perform loop-invariant code motion [off] -fredundancy Perform redundancy elimination [on] -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list] @@ -222,7 +226,10 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>) -falign-functions <n> Set alignment (in bytes) of function entry points -falign-branch-targets <n> Set alignment (in bytes) of branch targets -falign-cond-branches <n> Set alignment (in bytes) of conditional branches - -fcommon Put uninitialized globals in the common section [on]. + -fcommon Put uninitialized globals in the common section [on] + -fprofile-arcs Profile branches [off]. + -fprofile-use= filename Use profiling information in filename + -fbranch-probabilities Use profiling information (if available) for branches [on] |} ^ target_help ^ toolchain_help ^ @@ -270,7 +277,7 @@ let dump_mnemonics destfile = let optimization_options = [ option_ftailcalls; option_fifconversion; option_fconstprop; - option_fcse; option_fcse2; + option_fcse; option_fcse2; option_fcse3; option_fpostpass; option_fredundancy; option_finline; option_finline_functions_called_once; ] @@ -289,6 +296,10 @@ let cmdline_actions = [Exact("-f" ^ name ^ "="), String (fun s -> (strref := (if s == "" then "list" else s)); ref := true) ] in + let f_str name strref default = + [Exact("-f" ^ name ^ "="), String + (fun s -> (strref := (if s == "" then default else s))) + ] in let check_align n = if n <= 0 || ((n land (n - 1)) <> 0) then error no_loc "requested alignment %d is not a power of 2" n @@ -327,7 +338,9 @@ let cmdline_actions = _Regexp "-O[123]$", Unit (set_all optimization_options); Exact "-Os", Set option_Osize; Exact "-Obranchless", Set option_Obranchless; + Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); + Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -397,6 +410,10 @@ let cmdline_actions = @ f_opt "const-prop" option_fconstprop @ f_opt "cse" option_fcse @ f_opt "cse2" option_fcse2 + @ f_opt "cse3" option_fcse3 + @ f_opt "cse3-alias-analysis" option_fcse3_alias_analysis + @ f_opt "cse3-across-calls" option_fcse3_across_calls + @ f_opt "move-loop-invariants" option_fmove_loop_invariants @ f_opt "redundancy" option_fredundancy @ f_opt "postpass" option_fpostpass @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ] @@ -408,11 +425,17 @@ let cmdline_actions = @ f_opt "globaladdrtmp" option_fglobaladdrtmp @ f_opt "globaladdroffset" option_fglobaladdroffset @ f_opt "xsaddr" option_fxsaddr + @ f_str "div-i32" option_div_i32 "stsud" + @ f_str "div-i64" option_div_i64 "stsud" @ f_opt "addx" option_faddx + @ f_opt "madd" option_fmadd + @ f_opt "nontrap-loads" option_fnontrap_loads @ f_opt "coalesce-mem" option_fcoalesce_mem @ f_opt "all-loads-nontrap" option_all_loads_nontrap @ f_opt "forward-moves" option_fforward_moves -(* Code generation options *) + (* Code generation options *) + @ f_opt "profile-arcs" option_profile_arcs + @ f_opt "branch-probabilities" option_fbranch_probabilities @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) @ [ |