diff options
Diffstat (limited to 'driver')
-rw-r--r-- | driver/Clflags.ml | 52 | ||||
-rw-r--r-- | driver/Compiler.vexpand (renamed from driver/Compiler.v) | 152 | ||||
-rw-r--r-- | driver/Compopts.v | 64 | ||||
-rw-r--r-- | driver/Configuration.ml | 3 | ||||
-rw-r--r-- | driver/Configuration.mli | 3 | ||||
-rw-r--r-- | driver/Driver.ml | 96 | ||||
-rw-r--r-- | driver/Frontend.ml | 5 |
7 files changed, 247 insertions, 128 deletions
diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 25c3e1dd..db67142e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -15,7 +15,7 @@ let prepro_options = ref ([]: string list) let linker_options = ref ([]: string list) let assembler_options = ref ([]: string list) -let option_flongdouble = ref false +let option_flongdouble = ref (Configuration.arch = "kvx") let option_fstruct_passing = ref false let option_fvararg_calls = ref true let option_funprototyped = ref true @@ -25,7 +25,36 @@ let option_ffloatconstprop = ref 2 let option_ftailcalls = ref true let option_fconstprop = ref true let option_fcse = ref true +let option_fcse2 = ref false + +let option_fcse3 = ref true +let option_fcse3_alias_analysis = ref true +let option_fcse3_across_calls = ref false +let option_fcse3_across_merges = ref true +let option_fcse3_glb = ref true +let option_fcse3_trivial_ops = ref false +let option_fcse3_refine = ref true +let option_fcse3_conditions = ref true + let option_fredundancy = ref true + +(** Options relative to superblock scheduling *) +let option_fpredict = ref true (* insert static branch prediction information, and swaps ifso/ifnot branches accordingly *) +let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size n *) +let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *) +let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *) +let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *) +let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *) + +(* Scheduling *) +let option_mtune = ref "" + +let option_fprepass = ref true +let option_fprepass_sched = ref "regpres" + +let option_fpostpass = ref true +let option_fpostpass_sched = ref "list" + let option_fifconversion = ref true let option_Obranchless = ref false let option_falignfunctions = ref (None: int option) @@ -66,4 +95,25 @@ let option_small_const = ref (!option_small_data) let option_timings = ref false let stdlib_path = ref Configuration.stdlib_path let use_standard_headers = ref Configuration.has_standard_headers + +let option_fglobaladdrtmp = ref false +let option_fglobaladdroffset = ref false +let option_fxsaddr = ref true +let option_faddx = ref false +let option_fmadd = ref true +let option_div_i32 = ref "stsud" +let option_div_i64 = ref "stsud" +let option_fcoalesce_mem = ref true +let option_fexpanse_rtlcond = ref false +let option_fexpanse_others = ref false +let option_fforward_moves = ref false +let option_fmove_loop_invariants = ref false +let option_fnontrap_loads = ref false +let option_all_loads_nontrap = ref false +let option_inline_auto_threshold = ref 0 +let option_profile_arcs = ref false +let option_fbranch_probabilities = ref true +let option_debug_compcert = ref 0 +let option_regpres_threshold = ref 2 +let option_regpres_wait_window = ref false let main_function_name = ref "main" diff --git a/driver/Compiler.v b/driver/Compiler.vexpand index 75247f71..9673267d 100644 --- a/driver/Compiler.v +++ b/driver/Compiler.vexpand @@ -35,19 +35,8 @@ Require Cshmgen. Require Cminorgen. Require Selection. Require RTLgen. -Require Tailcall. -Require Inlining. -Require Renumber. -Require Constprop. -Require CSE. -Require Deadcode. -Require Unusedglob. -Require Allocation. -Require Tunneling. -Require Linearize. -Require CleanupLabels. -Require Debugvar. -Require Stacking. +Require Import Duplicatepasses. +EXPAND_RTL_REQUIRE Require Asmgen. (** Proofs of semantic preservation. *) Require SimplExprproof. @@ -56,20 +45,8 @@ Require Cshmgenproof. Require Cminorgenproof. Require Selectionproof. Require RTLgenproof. -Require Tailcallproof. -Require Inliningproof. -Require Renumberproof. -Require Constpropproof. -Require CSEproof. -Require Deadcodeproof. -Require Unusedglobproof. -Require Allocproof. -Require Tunnelingproof. -Require Linearizeproof. -Require CleanupLabelsproof. -Require Debugvarproof. -Require Stackingproof. -Require Asmgenproof. +EXPAND_RTL_REQUIRE_PROOF +Require Import Asmgenproof. (** Command-line flags. *) Require Import Compopts. @@ -77,7 +54,7 @@ Require Import Compopts. Parameter print_Clight: Clight.program -> unit. Parameter print_Cminor: Cminor.program -> unit. Parameter print_RTL: Z -> RTL.program -> unit. -Parameter print_LTL: LTL.program -> unit. +Parameter print_LTL: Z -> LTL.program -> unit. Parameter print_Mach: Mach.program -> unit. Local Open Scope string_scope. @@ -120,32 +97,9 @@ Definition partial_if {A: Type} Definition transf_rtl_program (f: RTL.program) : res Asm.program := OK f @@ print (print_RTL 0) - @@ total_if Compopts.optim_tailcalls (time "Tail calls" Tailcall.transf_program) - @@ print (print_RTL 1) - @@@ time "Inlining" Inlining.transf_program - @@ print (print_RTL 2) - @@ time "Renumbering" Renumber.transf_program - @@ print (print_RTL 3) - @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program) - @@ print (print_RTL 4) - @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program) - @@ print (print_RTL 5) - @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program) - @@ print (print_RTL 6) - @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program) - @@ print (print_RTL 7) - @@@ time "Unused globals" Unusedglob.transform_program - @@ print (print_RTL 8) - @@@ time "Register allocation" Allocation.transf_program - @@ print print_LTL - @@ time "Branch tunneling" Tunneling.tunnel_program - @@@ time "CFG linearization" Linearize.transf_program - @@ time "Label cleanup" CleanupLabels.transf_program - @@@ partial_if Compopts.debug (time "Debugging info for local variables" Debugvar.transf_program) - @@@ time "Mach generation" Stacking.transf_program - @@ print print_Mach - @@@ time "Asm generation" Asmgen.transf_program. - +EXPAND_RTL_TRANSF_PROGRAM + @@@ time "Total Mach->Asm generation" Asmgen.transf_program. + Definition transf_cminor_program (p: Cminor.program) : res Asm.program := OK p @@ print print_Cminor @@ -235,20 +189,7 @@ Definition CompCert's_passes := ::: mkpass Cminorgenproof.match_prog ::: mkpass Selectionproof.match_prog ::: mkpass RTLgenproof.match_prog - ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog) - ::: mkpass Inliningproof.match_prog - ::: mkpass Renumberproof.match_prog - ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog) - ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog) - ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog) - ::: mkpass (match_if Compopts.optim_redundancy Deadcodeproof.match_prog) - ::: mkpass Unusedglobproof.match_prog - ::: mkpass Allocproof.match_prog - ::: mkpass Tunnelingproof.match_prog - ::: mkpass Linearizeproof.match_prog - ::: mkpass CleanupLabelsproof.match_prog - ::: mkpass (match_if Compopts.debug Debugvarproof.match_prog) - ::: mkpass Stackingproof.match_prog +EXPAND_RTL_MKPASS ::: mkpass Asmgenproof.match_prog ::: pass_nil _. @@ -268,30 +209,18 @@ Theorem transf_c_program_match: match_prog p tp. Proof. intros p tp T. - unfold transf_c_program, time in T. simpl in T. - destruct (SimplExpr.transl_program p) as [p1|e] eqn:P1; simpl in T; try discriminate. - unfold transf_clight_program, time in T. rewrite ! compose_print_identity in T. simpl in T. - destruct (SimplLocals.transf_program p1) as [p2|e] eqn:P2; simpl in T; try discriminate. - destruct (Cshmgen.transl_program p2) as [p3|e] eqn:P3; simpl in T; try discriminate. - destruct (Cminorgen.transl_program p3) as [p4|e] eqn:P4; simpl in T; try discriminate. - unfold transf_cminor_program, time in T. rewrite ! compose_print_identity in T. simpl in T. - destruct (Selection.sel_program p4) as [p5|e] eqn:P5; simpl in T; try discriminate. - destruct (RTLgen.transl_program p5) as [p6|e] eqn:P6; simpl in T; try discriminate. - unfold transf_rtl_program, time in T. rewrite ! compose_print_identity in T. simpl in T. - set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *. - destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate. - set (p9 := Renumber.transf_program p8) in *. - set (p10 := total_if optim_constprop Constprop.transf_program p9) in *. - set (p11 := total_if optim_constprop Renumber.transf_program p10) in *. - destruct (partial_if optim_CSE CSE.transf_program p11) as [p12|e] eqn:P12; simpl in T; try discriminate. - destruct (partial_if optim_redundancy Deadcode.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate. - destruct (Unusedglob.transform_program p13) as [p14|e] eqn:P14; simpl in T; try discriminate. - destruct (Allocation.transf_program p14) as [p15|e] eqn:P15; simpl in T; try discriminate. - set (p16 := Tunneling.tunnel_program p15) in *. - destruct (Linearize.transf_program p16) as [p17|e] eqn:P17; simpl in T; try discriminate. - set (p18 := CleanupLabels.transf_program p17) in *. - destruct (partial_if debug Debugvar.transf_program p18) as [p19|e] eqn:P19; simpl in T; try discriminate. - destruct (Stacking.transf_program p19) as [p20|e] eqn:P20; simpl in T; try discriminate. + unfold transf_c_program, time in T. cbn in T. + destruct (SimplExpr.transl_program p) as [p1|e] eqn:P1; cbn in T; try discriminate. + unfold transf_clight_program, time in T. rewrite ! compose_print_identity in T. cbn in T. + destruct (SimplLocals.transf_program p1) as [p2|e] eqn:P2; cbn in T; try discriminate. + destruct (Cshmgen.transl_program p2) as [p3|e] eqn:P3; cbn in T; try discriminate. + destruct (Cminorgen.transl_program p3) as [p4|e] eqn:P4; cbn in T; try discriminate. + unfold transf_cminor_program, time in T. rewrite ! compose_print_identity in T. cbn in T. + destruct (Selection.sel_program p4) as [p5|e] eqn:P5; cbn in T; try discriminate. + destruct (RTLgen.transl_program p5) as [p6|e] eqn:P6; cbn in T; try discriminate. + unfold transf_rtl_program, time in T. rewrite ! compose_print_identity in T. + cbn in T. +EXPAND_RTL_PROOF unfold match_prog; simpl. exists p1; split. apply SimplExprproof.transf_program_match; auto. exists p2; split. apply SimplLocalsproof.match_transf_program; auto. @@ -299,20 +228,7 @@ Proof. exists p4; split. apply Cminorgenproof.transf_program_match; auto. exists p5; split. apply Selectionproof.transf_program_match; auto. exists p6; split. apply RTLgenproof.transf_program_match; auto. - exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match. - exists p8; split. apply Inliningproof.transf_program_match; auto. - exists p9; split. apply Renumberproof.transf_program_match; auto. - exists p10; split. apply total_if_match. apply Constpropproof.transf_program_match. - exists p11; split. apply total_if_match. apply Renumberproof.transf_program_match. - exists p12; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match. - exists p13; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match. - exists p14; split. apply Unusedglobproof.transf_program_match; auto. - exists p15; split. apply Allocproof.transf_program_match; auto. - exists p16; split. apply Tunnelingproof.transf_program_match. - exists p17; split. apply Linearizeproof.transf_program_match; auto. - exists p18; split. apply CleanupLabelsproof.transf_program_match; auto. - exists p19; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match. - exists p20; split. apply Stackingproof.transf_program_match; auto. +EXPAND_RTL_PROOF2 exists tp; split. apply Asmgenproof.transf_program_match; auto. reflexivity. Qed. @@ -364,7 +280,9 @@ Ltac DestructM := destruct H as (p & M & MM); clear H end. repeat DestructM. subst tp. - assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p21)). + assert (F: forward_simulation (Cstrategy.semantics p) +EXPAND_ASM_SEMANTICS + ). { eapply compose_forward_simulations. eapply SimplExprproof.transl_program_correct; eassumption. @@ -378,25 +296,17 @@ Ltac DestructM := eapply Selectionproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply RTLgenproof.transf_program_correct; eassumption. +EXPAND_RTL_FORWARD_SIMULATIONS eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Tailcallproof.transf_program_correct. - eapply compose_forward_simulations. - eapply Inliningproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Renumberproof.transf_program_correct. - eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact CSEproof.transf_program_correct. + eapply RTLtoBTLproof.transf_program_correct; eassumption. eapply compose_forward_simulations. - eapply match_if_simulation. eassumption. exact Deadcodeproof.transf_program_correct; eassumption. + eapply BTL_Schedulerproof.transf_program_correct; eassumption. eapply compose_forward_simulations. - eapply Unusedglobproof.transf_program_correct; eassumption. + eapply BTLtoRTLproof.transf_program_correct; eassumption. eapply compose_forward_simulations. - eapply Allocproof.transf_program_correct; eassumption. + eapply Allocationproof.transf_program_correct; eassumption. eapply compose_forward_simulations. - eapply Tunnelingproof.transf_program_correct; eassumption. + eapply LTLTunnelingproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply Linearizeproof.transf_program_correct; eassumption. eapply compose_forward_simulations. diff --git a/driver/Compopts.v b/driver/Compopts.v index 2a213350..65264124 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -36,11 +36,75 @@ Parameter optim_constprop: unit -> bool. (** Flag -fcse. For common subexpression elimination. *) Parameter optim_CSE: unit -> bool. +(** Flag -fcse2. For DMonniaux's common subexpression elimination. *) +Parameter optim_CSE2: unit -> bool. + +(** Flag -fcse3. For DMonniaux's common subexpression elimination. *) +Parameter optim_CSE3: unit -> bool. + +(** Flag -fcse3-alias-analysis. For DMonniaux's common subexpression elimination. Perform a simple alias analysis. *) +Parameter optim_CSE3_alias_analysis: unit -> bool. + +(** Flag -fcse3-across-calls. For DMonniaux's common subexpression elimination. Propagate information across function calls (may increase register pressure). *) +Parameter optim_CSE3_across_calls: unit -> bool. + +(** Flag -fcse3-across-calls. For DMonniaux's common subexpression elimination. Propagate information across control-flow merges (may increase register pressure). *) +Parameter optim_CSE3_across_merges: unit -> bool. + +(** Flag -fcse3-glb *) +Parameter optim_CSE3_glb: unit -> bool. + +(** Flag -fcse3-trivial-ops. For DMonniaux's common subexpression elimination, simplify trivial operations as well. *) +Parameter optim_CSE3_trivial_ops: unit -> bool. + +(** Flag -fcse3-conditions. For DMonniaux's common subexpression elimination: remove redundant conditional branches. *) +Parameter optim_CSE3_conditions: unit -> bool. + +(** Flag -fmove-loop-invariants. *) +Parameter optim_move_loop_invariants: unit -> bool. + (** Flag -fredundancy. For dead code elimination. *) Parameter optim_redundancy: unit -> bool. +(** Flag -fpostpass. Postpass scheduling for K1 architecture *) +Parameter optim_postpass: unit -> bool. + +(** FIXME TEMPORARY Flag -fglobaladdrtmp. Use a temporary register for loading the address of global variables (default false) *) +Parameter optim_globaladdrtmp: unit -> bool. + +(** FIXME TEMPORARY Flag -fglobaladdroffset. Fold offsets into global addresses (default false) *) +Parameter optim_globaladdroffset: unit -> bool. + +(** FIXME TEMPORARY Flag -fxsaddr. Use .xs addressing mode (default true) *) +Parameter optim_xsaddr: unit -> bool. + +(** FIXME TEMPORARY Flag -fcoaelesce-mem. Fuse (default true) *) +Parameter optim_coalesce_mem: unit -> bool. + +(* FIXME TEMPORARY Flag -faddx. Fuse (default true) *) +Parameter optim_madd: unit -> bool. + +(** FIXME TEMPORARY Flag -faddx. Fuse (default false) *) +Parameter optim_addx: unit -> bool. + (** Flag -fthumb. For the ARM back-end. *) Parameter thumb: unit -> bool. (** Flag -g. For insertion of debugging information. *) Parameter debug: unit -> bool. + +(** Flag -fall-loads-nontrap. Turn user loads into non trapping. *) +Parameter all_loads_nontrap: unit -> bool. + +(** Flag -fforward-moves. Forward moves after CSE. *) +Parameter optim_forward_moves: unit -> bool. + +(** Flag -fprofile-arcs. Add profiling logger. *) +Parameter profile_arcs : unit -> bool. + +(** Flag -fbranch_probabilities. Use profiling information if available *) +Parameter branch_probabilities : unit -> bool. + +(* TODO is there a more appropriate place? *) +Require Import Coqlib. +Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f. diff --git a/driver/Configuration.ml b/driver/Configuration.ml index 4b0c116e..deca85f2 100644 --- a/driver/Configuration.ml +++ b/driver/Configuration.ml @@ -123,9 +123,10 @@ let get_bool_config key = let arch = match get_config_string "arch" with - | "powerpc"|"arm"|"x86"|"riscV"|"aarch64" as a -> a + | "powerpc"|"arm"|"x86"|"riscV"|"kvx"|"aarch64" as a -> a | v -> bad_config "arch" [v] let model = get_config_string "model" +let os = get_config_string "os" let abi = get_config_string "abi" let is_big_endian = match get_config_string "endianness" with diff --git a/driver/Configuration.mli b/driver/Configuration.mli index a71da72d..75e547ff 100644 --- a/driver/Configuration.mli +++ b/driver/Configuration.mli @@ -19,6 +19,9 @@ val model: string val abi: string (** ABI to use *) +val os: string + (** ABI to use *) + val is_big_endian: bool (** Endianness to use *) diff --git a/driver/Driver.ml b/driver/Driver.ml index 2b34d538..3f5a4bd9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -185,16 +185,46 @@ Processing options: {|Optimization options: (use -fno-<opt> to turn off -f<opt>) -O Optimize the compiled code [on by default] -O0 Do not optimize the compiled code - -O1 -O2 -O3 Synonymous for -O + -O1 Perform all optimization passes except scheduling + -O2 -O3 Synonymous for -O -Os Optimize for code size in preference to code speed -Obranchless Optimize to generate fewer conditional branches; try to produce branch-free instruction sequences as much as possible + -finline-auto-threshold n Inline functions under size n -ftailcalls Optimize function calls in tail position [on] -fconst-prop Perform global constant propagation [on] -ffloat-const-prop <n> Control constant propagation of floats (<n>=0: none, <n>=1: limited, <n>=2: full; default is full) -fcse Perform common subexpression elimination [on] + -fcse2 Perform inter-loop common subexpression elimination [off] + -fcse3 Perform inter-loop common subexpression elimination [on] + -fcse3-alias-analysis Perform inter-loop common subexpression elimination with alias analysis [on] + -fcse3-across-calls Propagate CSE3 information across function calls [off] + -fcse3-across-merges Propagate CSE3 information across control-flow merges [on] + -fcse3-glb Refine CSE3 information using greatest lower bounds [on] + -fcse3-trivial-ops Replace trivial operations as well using CSE3 [off] + -fcse3-refine Refine CSE3 invariants by descending iteration [on] + -fcse3-conditions Remove redundant conditions using CSE3 [on] + -fmove-loop-invariants Perform loop-invariant code motion [off] -fredundancy Perform redundancy elimination [on] + -mtune= Type of CPU (for scheduling on some architectures) + -fprepass Perform prepass scheduling (only on some architectures) [on] + -fprepass= <optim> Perform postpass scheduling with the specified optimization [list] + (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure + -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no) + -fpostpass Perform postpass scheduling (only for K1 architecture) [on] + -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list] + (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + -fpredict Insert static branch prediction information [on] + Also swaps ifso/ifnot branches accordingly at RTL level + -ftailduplicate n Perform tail duplication for RTL code blocks of size n (not counting Inops) [0] + -ftracelinearize Uses branch prediction information to improve the Linearize [on] + -funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0] + -funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0] + -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0] + Doesn't duplicate if the size of that header is strictly greater than n + -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their single caller [on] @@ -206,7 +236,10 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>) -falign-functions <n> Set alignment (in bytes) of function entry points -falign-branch-targets <n> Set alignment (in bytes) of branch targets -falign-cond-branches <n> Set alignment (in bytes) of conditional branches - -fcommon Put uninitialized globals in the common section [on]. + -fcommon Put uninitialized globals in the common section [on] + -fprofile-arcs Profile branches [off]. + -fprofile-use= filename Use profiling information in filename + -fbranch-probabilities Use profiling information (if available) for branches [on] |} ^ target_help ^ toolchain_help ^ @@ -248,8 +281,11 @@ let dump_mnemonics destfile = exit 0 let optimization_options = [ - option_ftailcalls; option_fifconversion; option_fconstprop; option_fcse; - option_fredundancy; option_finline; option_finline_functions_called_once; + option_ftailcalls; option_fifconversion; option_fconstprop; + option_fcse; option_fcse2; option_fcse3; + option_fpredict; option_ftracelinearize; + option_fpostpass; + option_fredundancy; option_finline; option_finline_functions_called_once; ] let set_all opts () = List.iter (fun r -> r := true) opts @@ -262,6 +298,14 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in + let f_opt_str name default ref strref = + [Exact("-f" ^ name ^ "="), String + (fun s -> (strref := (if s == "" then default else s)); ref := true) + ] in + let f_str name strref default = + [Exact("-f" ^ name ^ "="), String + (fun s -> (strref := (if s == "" then default else s))) + ] in let check_align n = if n <= 0 || ((n land (n - 1)) <> 0) then error no_loc "requested alignment %d is not a power of 2" n @@ -293,9 +337,14 @@ let cmdline_actions = [ Exact "-O0", Unit (unset_all optimization_options); Exact "-O", Unit (set_all optimization_options); + _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false); _Regexp "-O[123]$", Unit (set_all optimization_options); Exact "-Os", Set option_Osize; Exact "-Obranchless", Set option_Obranchless; + Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); + Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); + Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); + Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -365,10 +414,47 @@ let cmdline_actions = @ f_opt "if-conversion" option_fifconversion @ f_opt "const-prop" option_fconstprop @ f_opt "cse" option_fcse + @ f_opt "cse2" option_fcse2 + @ f_opt "cse3" option_fcse3 + @ f_opt "cse3-alias-analysis" option_fcse3_alias_analysis + @ f_opt "cse3-across-calls" option_fcse3_across_calls + @ f_opt "cse3-across-merges" option_fcse3_across_merges + @ f_opt "cse3-glb" option_fcse3_glb + @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops + @ f_opt "cse3-refine" option_fcse3_refine + @ f_opt "cse3-conditions" option_fcse3_conditions + @ f_opt "move-loop-invariants" option_fmove_loop_invariants @ f_opt "redundancy" option_fredundancy + @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] + @ f_opt "prepass" option_fprepass + @ f_opt "regpres-wait-window" option_regpres_wait_window + @ f_opt "postpass" option_fpostpass + @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] + @ f_opt "predict" option_fpredict + @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ] + @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] + @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] + @ f_opt "tracelinearize" option_ftracelinearize + @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched + @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once -(* Code generation options *) + @ f_opt "globaladdrtmp" option_fglobaladdrtmp + @ f_opt "globaladdroffset" option_fglobaladdroffset + @ f_opt "xsaddr" option_fxsaddr + @ f_str "div-i32" option_div_i32 "stsud" + @ f_str "div-i64" option_div_i64 "stsud" + @ f_opt "addx" option_faddx + @ f_opt "madd" option_fmadd + @ f_opt "nontrap-loads" option_fnontrap_loads + @ f_opt "coalesce-mem" option_fcoalesce_mem + @ f_opt "expanse-rtlcond" option_fexpanse_rtlcond + @ f_opt "expanse-others" option_fexpanse_others + @ f_opt "all-loads-nontrap" option_all_loads_nontrap + @ f_opt "forward-moves" option_fforward_moves + (* Code generation options *) + @ f_opt "profile-arcs" option_profile_arcs + @ f_opt "branch-probabilities" option_fbranch_probabilities @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) @ [ diff --git a/driver/Frontend.ml b/driver/Frontend.ml index 6133291e..ecf3d6a5 100644 --- a/driver/Frontend.ml +++ b/driver/Frontend.ml @@ -116,6 +116,11 @@ let init () = | "riscV" -> if Configuration.model = "64" then Machine.rv64 else Machine.rv32 + | "kvx" -> if Configuration.os = "cos" then Machine.kvxcos + else if Configuration.os = "mbr" then Machine.kvxmbr + else if Configuration.os = "elf" then Machine.kvxelf + else (Printf.eprintf "Configuration OS = %s\n" Configuration.os; + failwith "Wrong OS configuration for KVX") | "aarch64" -> if Configuration.abi = "apple" then Machine.aarch64_apple else Machine.aarch64 |