diff options
Diffstat (limited to 'driver')
-rw-r--r-- | driver/Clflags.ml | 15 | ||||
-rw-r--r-- | driver/Compiler.v | 95 | ||||
-rw-r--r-- | driver/Compopts.v | 32 | ||||
-rw-r--r-- | driver/Configuration.ml | 2 | ||||
-rw-r--r-- | driver/Driver.ml | 30 | ||||
-rw-r--r-- | driver/Frontend.ml | 1 |
6 files changed, 138 insertions, 37 deletions
diff --git a/driver/Clflags.ml b/driver/Clflags.ml index b4ab51e7..6d6f1df4 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -15,7 +15,7 @@ let prepro_options = ref ([]: string list) let linker_options = ref ([]: string list) let assembler_options = ref ([]: string list) -let option_flongdouble = ref false +let option_flongdouble = ref (Configuration.arch = "mppa_k1c") let option_fstruct_passing = ref false let option_fbitfields = ref false let option_fvararg_calls = ref true @@ -28,6 +28,11 @@ let option_fconstprop = ref true let option_fcse = ref true let option_fcse2 = ref true let option_fredundancy = ref true +let option_fduplicate = ref false +let option_finvertcond = ref true (* only active if option_fduplicate is also true *) +let option_ftracelinearize = ref false +let option_fpostpass = ref true +let option_fpostpass_sched = ref "list" let option_fifconversion = ref true let option_Obranchless = ref false let option_falignfunctions = ref (None: int option) @@ -68,3 +73,11 @@ let option_small_const = ref (!option_small_data) let option_timings = ref false let stdlib_path = ref Configuration.stdlib_path let use_standard_headers = ref Configuration.has_standard_headers + +let option_fglobaladdrtmp = ref false +let option_fglobaladdroffset = ref false +let option_fxsaddr = ref true +let option_faddx = ref false +let option_fcoalesce_mem = ref true +let option_fforward_moves = ref true +let option_all_loads_nontrap = ref false diff --git a/driver/Compiler.v b/driver/Compiler.v index 0dd413f5..499feff2 100644 --- a/driver/Compiler.v +++ b/driver/Compiler.v @@ -38,11 +38,14 @@ Require RTLgen. Require Tailcall. Require Inlining. Require Renumber. +Require Duplicate. Require Constprop. Require CSE. +Require ForwardMoves. Require CSE2. Require Deadcode. Require Unusedglob. +Require Allnontrap. Require Allocation. Require Tunneling. Require Linearize. @@ -60,18 +63,21 @@ Require RTLgenproof. Require Tailcallproof. Require Inliningproof. Require Renumberproof. +Require Duplicateproof. Require Constpropproof. Require CSEproof. +Require ForwardMovesproof. Require CSE2proof. Require Deadcodeproof. Require Unusedglobproof. +Require Allnontrapproof. Require Allocproof. Require Tunnelingproof. Require Linearizeproof. Require CleanupLabelsproof. Require Debugvarproof. Require Stackingproof. -Require Asmgenproof. +Require Import Asmgenproof. (** Command-line flags. *) Require Import Compopts. @@ -128,18 +134,24 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program := @@ print (print_RTL 2) @@ time "Renumbering" Renumber.transf_program @@ print (print_RTL 3) - @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program) + @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program) @@ print (print_RTL 4) - @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program) + @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program) @@ print (print_RTL 5) - @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program) + @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program) @@ print (print_RTL 6) - @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program) + @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program) @@ print (print_RTL 7) - @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program) + @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program) @@ print (print_RTL 8) - @@@ time "Unused globals" Unusedglob.transform_program + @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program @@ print (print_RTL 9) + @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program) + @@ print (print_RTL 10) + @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program + @@ print (print_RTL 11) + @@@ time "Unused globals" Unusedglob.transform_program + @@ print (print_RTL 12) @@@ time "Register allocation" Allocation.transf_program @@ print print_LTL @@ time "Branch tunneling" Tunneling.tunnel_program @@ -148,7 +160,7 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program := @@@ partial_if Compopts.debug (time "Debugging info for local variables" Debugvar.transf_program) @@@ time "Mach generation" Stacking.transf_program @@ print print_Mach - @@@ time "Asm generation" Asmgen.transf_program. + @@@ time "Total Mach->Asm generation" Asmgen.transf_program. Definition transf_cminor_program (p: Cminor.program) : res Asm.program := OK p @@ -242,11 +254,14 @@ Definition CompCert's_passes := ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog) ::: mkpass Inliningproof.match_prog ::: mkpass Renumberproof.match_prog + ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog) ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog) ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog) ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog) ::: mkpass (match_if Compopts.optim_CSE2 CSE2proof.match_prog) + ::: mkpass (match_if Compopts.optim_forward_moves ForwardMovesproof.match_prog) ::: mkpass (match_if Compopts.optim_redundancy Deadcodeproof.match_prog) + ::: mkpass (match_if Compopts.all_loads_nontrap Allnontrapproof.match_prog) ::: mkpass Unusedglobproof.match_prog ::: mkpass Allocproof.match_prog ::: mkpass Tunnelingproof.match_prog @@ -286,18 +301,21 @@ Proof. set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *. destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate. set (p9 := Renumber.transf_program p8) in *. - set (p10 := total_if optim_constprop Constprop.transf_program p9) in *. - set (p11 := total_if optim_constprop Renumber.transf_program p10) in *. - destruct (partial_if optim_CSE CSE.transf_program p11) as [p12|e] eqn:P12; simpl in T; try discriminate. - set (p12bis := @total_if RTL.program optim_CSE2 CSE2.transf_program p12) in *. - destruct (partial_if optim_redundancy Deadcode.transf_program p12bis) as [p13|e] eqn:P13; simpl in T; try discriminate. - destruct (Unusedglob.transform_program p13) as [p14|e] eqn:P14; simpl in T; try discriminate. - destruct (Allocation.transf_program p14) as [p15|e] eqn:P15; simpl in T; try discriminate. - set (p16 := Tunneling.tunnel_program p15) in *. - destruct (Linearize.transf_program p16) as [p17|e] eqn:P17; simpl in T; try discriminate. - set (p18 := CleanupLabels.transf_program p17) in *. - destruct (partial_if debug Debugvar.transf_program p18) as [p19|e] eqn:P19; simpl in T; try discriminate. - destruct (Stacking.transf_program p19) as [p20|e] eqn:P20; simpl in T; try discriminate. + destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate. + set (p11 := total_if optim_constprop Constprop.transf_program p10) in *. + set (p12 := total_if optim_constprop Renumber.transf_program p11) in *. + destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate. + set (p13bis := total_if optim_CSE2 CSE2.transf_program p13) in *. + set (p13ter := total_if optim_forward_moves ForwardMoves.transf_program p13bis) in *. + destruct (partial_if optim_redundancy Deadcode.transf_program p13ter) as [p14|e] eqn:P14; simpl in T; try discriminate. + set (p14bis := total_if all_loads_nontrap Allnontrap.transf_program p14) in *. + destruct (Unusedglob.transform_program p14bis) as [p15|e] eqn:P15; simpl in T; try discriminate. + destruct (Allocation.transf_program p15) as [p16|e] eqn:P16; simpl in T; try discriminate. + set (p17 := Tunneling.tunnel_program p16) in *. + destruct (Linearize.transf_program p17) as [p18|e] eqn:P18; simpl in T; try discriminate. + set (p19 := CleanupLabels.transf_program p18) in *. + destruct (partial_if debug Debugvar.transf_program p19) as [p20|e] eqn:P20; simpl in T; try discriminate. + destruct (Stacking.transf_program p20) as [p21|e] eqn:P21; simpl in T; try discriminate. unfold match_prog; simpl. exists p1; split. apply SimplExprproof.transf_program_match; auto. exists p2; split. apply SimplLocalsproof.match_transf_program; auto. @@ -308,18 +326,21 @@ Proof. exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match. exists p8; split. apply Inliningproof.transf_program_match; auto. exists p9; split. apply Renumberproof.transf_program_match; auto. - exists p10; split. apply total_if_match. apply Constpropproof.transf_program_match. - exists p11; split. apply total_if_match. apply Renumberproof.transf_program_match. - exists p12; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match. - exists p12bis; split. apply total_if_match. apply CSE2proof.transf_program_match. - exists p13; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match. - exists p14; split. apply Unusedglobproof.transf_program_match; auto. - exists p15; split. apply Allocproof.transf_program_match; auto. - exists p16; split. apply Tunnelingproof.transf_program_match. - exists p17; split. apply Linearizeproof.transf_program_match; auto. - exists p18; split. apply CleanupLabelsproof.transf_program_match; auto. - exists p19; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match. - exists p20; split. apply Stackingproof.transf_program_match; auto. + exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto. + exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match. + exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match. + exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match. + exists p13bis; split. apply total_if_match. apply CSE2proof.transf_program_match. + exists p13ter; split. eapply total_if_match; eauto. apply ForwardMovesproof.transf_program_match. + exists p14; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match. + exists p14bis; split. eapply total_if_match; eauto. apply Allnontrapproof.transf_program_match. + exists p15; split. apply Unusedglobproof.transf_program_match; auto. + exists p16; split. apply Allocproof.transf_program_match; auto. + exists p17; split. apply Tunnelingproof.transf_program_match. + exists p18; split. apply Linearizeproof.transf_program_match; auto. + exists p19; split. apply CleanupLabelsproof.transf_program_match; auto. + exists p20; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match. + exists p21; split. apply Stackingproof.transf_program_match; auto. exists tp; split. apply Asmgenproof.transf_program_match; auto. reflexivity. Qed. @@ -371,7 +392,7 @@ Ltac DestructM := destruct H as (p & M & MM); clear H end. repeat DestructM. subst tp. - assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p22)). + assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p25)). { eapply compose_forward_simulations. eapply SimplExprproof.transl_program_correct; eassumption. @@ -391,16 +412,22 @@ Ltac DestructM := eapply Inliningproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption. eapply compose_forward_simulations. + eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct. + eapply compose_forward_simulations. eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct. eapply compose_forward_simulations. eapply match_if_simulation. eassumption. exact Renumberproof.transf_program_correct. eapply compose_forward_simulations. eapply match_if_simulation. eassumption. exact CSEproof.transf_program_correct. eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact CSE2proof.transf_program_correct. + eapply match_if_simulation. eassumption. exact CSE2proof.transf_program_correct. + eapply compose_forward_simulations. + eapply match_if_simulation. eassumption. exact ForwardMovesproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply match_if_simulation. eassumption. exact Deadcodeproof.transf_program_correct; eassumption. eapply compose_forward_simulations. + eapply match_if_simulation. eassumption. exact Allnontrapproof.transf_program_correct. + eapply compose_forward_simulations. eapply Unusedglobproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply Allocproof.transf_program_correct; eassumption. diff --git a/driver/Compopts.v b/driver/Compopts.v index 594b74f1..b4b9f30d 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -27,6 +27,10 @@ Parameter generate_float_constants: unit -> bool. (** For value analysis. Currently always false. *) Parameter va_strict: unit -> bool. +(** Flag -fduplicate. For tail duplication optimization. Necessary to have + * bigger superblocks *) +Parameter optim_duplicate: unit -> bool. + (** Flag -ftailcalls. For tail call optimization. *) Parameter optim_tailcalls: unit -> bool. @@ -42,8 +46,36 @@ Parameter optim_CSE2: unit -> bool. (** Flag -fredundancy. For dead code elimination. *) Parameter optim_redundancy: unit -> bool. +(** Flag -fpostpass. Postpass scheduling for K1 architecture *) +Parameter optim_postpass: unit -> bool. + +(** FIXME TEMPORARY Flag -fglobaladdrtmp. Use a temporary register for loading the address of global variables (default false) *) +Parameter optim_globaladdrtmp: unit -> bool. + +(** FIXME TEMPORARY Flag -fglobaladdroffset. Fold offsets into global addresses (default false) *) +Parameter optim_globaladdroffset: unit -> bool. + +(** FIXME TEMPORARY Flag -fxsaddr. Use .xs addressing mode (default true) *) +Parameter optim_xsaddr: unit -> bool. + +(** FIXME TEMPORARY Flag -fcoaelesce-mem. Fuse (default true) *) +Parameter optim_coalesce_mem: unit -> bool. + +(** FIXME TEMPORARY Flag -faddx. Fuse (default false) *) +Parameter optim_addx: unit -> bool. + (** Flag -fthumb. For the ARM back-end. *) Parameter thumb: unit -> bool. (** Flag -g. For insertion of debugging information. *) Parameter debug: unit -> bool. + +(** Flag -fall-loads-nontrap. Turn user loads into non trapping. *) +Parameter all_loads_nontrap: unit -> bool. + +(** Flag -fforward-moves. Forward moves after CSE. *) +Parameter optim_forward_moves: unit -> bool. + +(* TODO is there a more appropriate place? *) +Require Import Coqlib. +Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f. diff --git a/driver/Configuration.ml b/driver/Configuration.ml index 2188acf0..08084720 100644 --- a/driver/Configuration.ml +++ b/driver/Configuration.ml @@ -123,7 +123,7 @@ let get_bool_config key = let arch = match get_config_string "arch" with - | "powerpc"|"arm"|"x86"|"riscV"|"aarch64" as a -> a + | "powerpc"|"arm"|"x86"|"riscV"|"mppa_k1c"|"aarch64" as a -> a | v -> bad_config "arch" [v] let model = get_config_string "model" let abi = get_config_string "abi" diff --git a/driver/Driver.ml b/driver/Driver.ml index bdf72250..db71aef9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -185,7 +185,8 @@ Processing options: {|Optimization options: (use -fno-<opt> to turn off -f<opt>) -O Optimize the compiled code [on by default] -O0 Do not optimize the compiled code - -O1 -O2 -O3 Synonymous for -O + -O1 Perform all optimization passes except scheduling + -O2 -O3 Synonymous for -O -Os Optimize for code size in preference to code speed -Obranchless Optimize to generate fewer conditional branches; try to produce branch-free instruction sequences as much as possible @@ -196,6 +197,15 @@ Processing options: -fcse Perform common subexpression elimination [on] -fcse2 Perform inter-loop common subexpression elimination [on] -fredundancy Perform redundancy elimination [on] + -fpostpass Perform postpass scheduling (only for K1 architecture) [on] + -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list] + (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + -fduplicate Perform tail duplication to form superblocks on predicted traces + -finvertcond Invert conditions based on predicted paths (to prefer fallthrough). + Requires -fduplicate to be also activated [on] + -ftracelinearize Linearizes based on the traces identified by duplicate phase + It is recommended to also activate -fduplicate with this pass [off] + -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their single caller [on] @@ -256,6 +266,7 @@ let dump_mnemonics destfile = let optimization_options = [ option_ftailcalls; option_fifconversion; option_fconstprop; option_fcse; option_fcse2; + option_fpostpass; option_fredundancy; option_finline; option_finline_functions_called_once; ] @@ -269,6 +280,10 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in + let f_opt_str name ref strref = + [Exact("-f" ^ name ^ "="), String + (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + ] in let check_align n = if n <= 0 || ((n land (n - 1)) <> 0) then error no_loc "requested alignment %d is not a power of 2" n @@ -303,6 +318,7 @@ let cmdline_actions = [ Exact "-O0", Unit (unset_all optimization_options); Exact "-O", Unit (set_all optimization_options); + _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false; option_fduplicate := false); _Regexp "-O[123]$", Unit (set_all optimization_options); Exact "-Os", Set option_Osize; Exact "-Obranchless", Set option_Obranchless; @@ -376,8 +392,20 @@ let cmdline_actions = @ f_opt "cse" option_fcse @ f_opt "cse2" option_fcse2 @ f_opt "redundancy" option_fredundancy + @ f_opt "postpass" option_fpostpass + @ f_opt "duplicate" option_fduplicate + @ f_opt "invertcond" option_finvertcond + @ f_opt "tracelinearize" option_ftracelinearize + @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once + @ f_opt "globaladdrtmp" option_fglobaladdrtmp + @ f_opt "globaladdroffset" option_fglobaladdroffset + @ f_opt "xsaddr" option_fxsaddr + @ f_opt "addx" option_faddx + @ f_opt "coalesce-mem" option_fcoalesce_mem + @ f_opt "all-loads-nontrap" option_all_loads_nontrap + @ f_opt "forward-moves" option_fforward_moves (* Code generation options *) @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) diff --git a/driver/Frontend.ml b/driver/Frontend.ml index 74791247..b9db0d23 100644 --- a/driver/Frontend.ml +++ b/driver/Frontend.ml @@ -116,6 +116,7 @@ let init () = | "riscV" -> if Configuration.model = "64" then Machine.rv64 else Machine.rv32 + | "mppa_k1c" -> Machine.mppa_k1c | "aarch64" -> Machine.aarch64 | _ -> assert false end; |