diff options
Diffstat (limited to 'aarch64')
-rw-r--r-- | aarch64/Machregsaux.ml | 3 | ||||
-rw-r--r-- | aarch64/Machregsaux.mli | 3 | ||||
-rw-r--r-- | aarch64/PostpassSchedulingOracle.ml | 3 | ||||
-rw-r--r-- | aarch64/PrepassSchedulingOracle.ml | 191 |
4 files changed, 156 insertions, 44 deletions
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 41db3bd4..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -19,3 +19,6 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index cde3e7a7..6f784238 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,6 +507,9 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 1935e785..d7e80cd9 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -1,14 +1,26 @@ +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + open AST open BTL open Maps open InstructionScheduler open Registers open PrepassSchedulingOracleDeps -open RTLcommonaux - +open PrintBTL +open DebugPrint + let use_alias_analysis () = false -let build_constraints_and_resources (opweights : opweights) insts btl = +let build_constraints_and_resources (opweights : opweights) seqa btl = let last_reg_reads : int list PTree.t ref = ref PTree.empty and last_reg_write : (int * int) PTree.t ref = ref PTree.empty and last_mem_reads : int list ref = ref [] @@ -16,8 +28,7 @@ let build_constraints_and_resources (opweights : opweights) insts btl = and last_branch : int option ref = ref None and last_non_pipelined_op : int array = Array.make opweights.nr_non_pipelined_units (-1) - and latency_constraints : latency_constraint list ref = ref [] - and resources = ref [] in + and latency_constraints : latency_constraint list ref = ref [] in let add_constraint instr_from instr_to latency = assert (instr_from <= instr_to); assert (latency >= 0); @@ -60,7 +71,29 @@ let build_constraints_and_resources (opweights : opweights) insts btl = last_reg_write := PTree.set reg (i, latency) !last_reg_write; last_reg_reads := PTree.remove reg !last_reg_reads in - let add_input_regs i regs = List.iter (add_input_reg i) regs + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> + add_builtin_res i hi; + add_builtin_res i lo + in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack (_, _) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal (_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong (hi, lo) -> + add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr (a1, a2) -> + add_builtin_arg i a1; + add_builtin_arg i a2 and irreversible_action i = match !last_branch with None -> () | Some j -> add_constraint j i 1 in @@ -77,58 +110,122 @@ let build_constraints_and_resources (opweights : opweights) insts btl = resources in Array.iteri - (fun i inst -> + (fun i (inst, other_uses) -> + List.iter (fun use -> add_input_reg i use) (Regset.elements other_uses); match inst with - | Bnop _ -> - let rs = Array.map (fun _ -> 0) opweights.pipelined_resource_bounds in - resources := rs :: !resources + | Bnop _ -> () | Bop (op, lr, rd, _) -> add_non_pipelined_resources i (opweights.non_pipelined_resources_of_op op (List.length lr)); if Op.is_trapping_op op then irreversible_action i; add_input_regs i lr; - add_output_reg i (opweights.latency_of_op op (List.length lr)) rd; - let rs = opweights.resources_of_op op (List.length lr) in - resources := rs :: !resources + add_output_reg i (opweights.latency_of_op op (List.length lr)) rd | Bload (trap, chk, addr, lr, rd, _) -> if trap = TRAP then irreversible_action i; add_input_mem i; add_input_regs i lr; add_output_reg i (opweights.latency_of_load trap chk addr (List.length lr)) - rd; - let rs = opweights.resources_of_load trap chk addr (List.length lr) in - resources := rs :: !resources + rd | Bstore (chk, addr, lr, src, _) -> irreversible_action i; add_input_regs i lr; add_input_reg i src; - add_output_mem i; - let rs = opweights.resources_of_store chk addr (List.length lr) in - resources := rs :: !resources + add_output_mem i | Bcond (cond, lr, BF (Bgoto s, _), ibnot, _) -> - let live = (get_some @@ PTree.get s btl).input_regs in - add_input_regs i (Regset.elements live); set_branch i; add_input_mem i; - add_input_regs i lr; - let rs = opweights.resources_of_cond cond (List.length lr) in - resources := rs :: !resources + add_input_regs i lr | Bcond (_, _, _, _, _) -> - failwith "get_simple_dependencies: invalid Bcond" - | BF (_, _) -> failwith "get_simple_dependencies: BF" - | Bseq (_, _) -> failwith "get_simple_dependencies: Bseq") - insts; - (!latency_constraints, Array.of_list (List.rev !resources)) + failwith "build_constraints_and_resources: invalid Bcond" + | BF (Bcall (signature, ef, lr, rd, _), _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> ()); + add_input_mem i; + add_input_regs i lr; + add_output_reg i (opweights.latency_of_call signature ef) rd; + add_output_mem i; + failwith "build_constraints_and_resources: invalid Bcall" + | BF (Btailcall (signature, ef, lr), _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> ()); + add_input_mem i; + add_input_regs i lr; + failwith "build_constraints_and_resources: invalid Btailcall" + | BF (Bbuiltin (ef, lr, rd, _), _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) lr; + add_builtin_res i rd; + add_output_mem i; + failwith "build_constraints_and_resources: invalid Bbuiltin" + | BF (Bjumptable (lr, _), _) -> + set_branch i; + add_input_reg i lr; + failwith "build_constraints_and_resources: invalid Bjumptable" + | BF (Breturn (Some r), _) -> + set_branch i; + add_input_reg i r; + failwith "build_constraints_and_resources: invalid Breturn Some" + | BF (Breturn None, _) -> + set_branch i; + failwith "build_constraints_and_resources: invalid Breturn None" + | BF (Bgoto _, _) -> + failwith "build_constraints_and_resources: invalid Bgoto" + | Bseq (_, _) -> failwith "build_constraints_and_resources: Bseq") + seqa; + !latency_constraints -let define_problem (opweights : opweights) ibf btl = - let simple_deps, resources = - build_constraints_and_resources opweights ibf btl - in +let resources_of_instruction (opweights : opweights) = function + | Bnop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Bop (op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Bload (trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Bstore (chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | BF (Bcall (signature, ef, inputs, output, _), _) -> + opweights.resources_of_call signature ef + | BF (Bbuiltin (ef, builtin_inputs, builtin_output, _), _) -> + opweights.resources_of_builtin ef + | Bcond (cond, args, _, _, _) -> + opweights.resources_of_cond cond (List.length args) + | BF (Btailcall _, _) | BF (Bjumptable _, _) | BF (Breturn _, _) -> + opweights.pipelined_resource_bounds + | BF (Bgoto _, _) | Bseq (_, _) -> + failwith "resources_of_instruction: invalid btl instruction" + +let print_sequence pp seqa = + Array.iteri + (fun i (inst, other_uses) -> + debug "i=%d\n inst = " i; + print_btl_inst pp inst; + debug "\n other_uses="; + print_regset other_uses; + debug "\n") + seqa + +let length_of_chunk = function + | Mint8signed | Mint8unsigned -> 1 + | Mint16signed | Mint16unsigned -> 2 + | Mint32 | Mfloat32 | Many32 -> 4 + | Mint64 | Mfloat64 | Many64 -> 8 + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa btl = + let simple_deps = build_constraints_and_resources opweights seqa btl in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - instruction_usages = resources; + live_regs_entry = live_entry_regs; + typing; + reference_counting = Some reference_counting; + instruction_usages = + Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = simple_deps; } @@ -154,27 +251,35 @@ let zigzag_scheduler problem early_ones = { problem with latency_constraints = !constraints' } | None -> None -let prepass_scheduler_by_name name problem insts = +let prepass_scheduler_by_name name problem seqa = match name with | "zigzag" -> let early_ones = Array.map - (fun inst -> + (fun (inst, _) -> match inst with Bcond (_, _, _, _, _) -> true | _ -> false) - insts + seqa in zigzag_scheduler problem early_ones | _ -> scheduler_by_name name problem -let schedule_sequence insts btl = +let schedule_sequence seqa btl (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) reference = let opweights = OpWeights.get_opweights () in try - if Array.length insts <= 1 then None + if Array.length seqa <= 1 then None else - let nr_instructions = Array.length insts in - let problem = define_problem opweights insts btl in + let nr_instructions = Array.length seqa in + if !Clflags.option_debug_compcert > 6 then + Printf.printf "prepass scheduling length = %d\n" nr_instructions; + let problem = + define_problem opweights live_regs_entry typing reference seqa btl + in + if !Clflags.option_debug_compcert > 7 then ( + print_sequence stdout seqa; + print_problem stdout problem); match - prepass_scheduler_by_name !Clflags.option_fprepass_sched problem insts + prepass_scheduler_by_name !Clflags.option_fprepass_sched problem seqa with | None -> Printf.printf "no solution in prepass scheduling\n"; @@ -190,5 +295,3 @@ let schedule_sequence insts btl = with Failure s -> Printf.printf "failure in prepass scheduling: %s\n" s; None - - |