From 51668ba258e7b79a1b2b129a404b1eb9981e8e3b Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 18:01:59 +0200 Subject: Make prepass scheduling sensitive to register pressure, by Nicolas Nardino. Squashed commit of the following: commit cf033ec29391d5358dea1d3b25da1738957478c4 Author: David Monniaux Date: Fri Jul 16 18:01:03 2021 +0200 comment for authors commit 2ff766a18432fd75739abab0b5741ded6b67a2a5 Author: David Monniaux Date: Fri Jul 16 15:29:25 2021 +0200 activate register pressure by default commit 67f4ae2b702cc95ed7cef67b726e15abbf18e768 Author: David Monniaux Date: Fri Jul 16 15:26:03 2021 +0200 use a more recognizable option name commit 6121be54b80a55fdadd8b64dfad53357148c9090 Author: David Monniaux Date: Fri Jul 16 14:13:50 2021 +0200 fix for KVX commit 43d4932e8ba9e00eb8c8788c86f56b6bddd46392 Author: David Monniaux Date: Fri Jul 16 13:28:26 2021 +0200 setup registers commit 169a221104c37737f12abe79711009fc0d88ce09 Author: David Monniaux Date: Fri Jul 16 13:00:56 2021 +0200 rm useless code commit d6a846b641787ea6a5ed113b1d7275ffb5028d9c Author: David Monniaux Date: Fri Jul 16 12:54:19 2021 +0200 rm "Admitted" commit fd4d085aa988a6044f89fc17e8422be23bc87f9d Merge: 70f5867e 56498b64 Author: David Monniaux Date: Fri Jul 16 12:30:25 2021 +0200 Merge remote-tracking branch 'origin/kvx-work' into kvx-sched-w-reg-press commit 70f5867e441e253869cb3b432af77636a186d1cb Author: David Monniaux Date: Fri Jul 16 12:26:27 2021 +0200 rm TODO commit f86f5df47b69053702661671340b0fcb31506aa3 Author: nicolas.nardino Date: Thu Jul 8 11:22:17 2021 +0200 add more debug info commit a4a0b36f56a94c19da301265a4e3acad1fbdf6c4 Author: nicolas.nardino Date: Thu Jul 8 11:20:49 2021 +0200 Deactivate sched validator (i think) commit af97fca0f1d824f3becf9c6895f44ad234e262f8 Author: nicolas.nardino Date: Tue Jul 6 15:32:35 2021 +0200 Add debug info commit b96a48de58e1969535865b7b345514a24f7178a6 Author: nicolas.nardino Date: Mon Jun 28 16:04:44 2021 +0200 Change temporary solution (see prev commits), and add option for it commit 9ac49c465f9c8969fba00e6242da0c188a6a3080 Author: nicolas.nardino Date: Fri Jun 25 09:42:41 2021 +0200 Changed printfs into debugs commit dfa09586ae40c70769eeda688a0e7f59f611749f Author: nicolas.nardino Date: Thu Jun 24 18:33:20 2021 +0200 Another scheduler commit c5e8595480604c78260017cc771b0e4195fdd182 Merge: 10cbe4b2 cf2aa686 Author: nicolas.nardino Date: Tue Jun 22 15:58:10 2021 +0200 Merge branch 'kvx-sched-w-reg-press' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into kvx-sched-w-reg-press commit 10cbe4b28ef6dc5d02c9a5d4d369484e4943a18d Author: nicolas.nardino Date: Tue Jun 22 15:57:21 2021 +0200 Changed default threshold value following tests commit cf2aa686bcf9a823562fe977df6dd778d5467985 Merge: eddbce33 fe557bf6 Author: David Monniaux Date: Thu Jun 17 17:05:30 2021 +0200 Merge branch 'kvx-sched-w-reg-press' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into kvx-sched-w-reg-press commit eddbce33e28c49bf7b9e83ebd5dbf6cb0d770090 Merge: 8f399dfa fae8d9b5 Author: David Monniaux Date: Thu Jun 17 17:05:20 2021 +0200 Merge branch 'kvx-sched-w-reg-press' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into kvx-sched-w-reg-press commit 8f399dfa9d794f2f728f523ff1aa7788cc3599b2 Author: David Monniaux Date: Thu Jun 17 17:04:52 2021 +0200 fix for Risc-V commit fe557bf65ec738eaa078bc5e398ff690eb1f2b9e Author: nicolas.nardino Date: Thu Jun 17 17:03:53 2021 +0200 changed type of schedule_seq in x86 for compatibility commit fae8d9b5c5f93d5eda36f800eb0ca1837b237cba Author: nicolas.nardino Date: Thu Jun 17 17:00:57 2021 +0200 fix riscv/Machregsaux.mli commit 9759e94256fd09f4995418b67b7aedbcf84b4b10 Merge: 4413c27d 04b2489d Author: David Monniaux Date: Thu Jun 17 16:52:09 2021 +0200 Merge remote-tracking branch 'origin/kvx-work' into kvx-sched-w-reg-press commit 4413c27d6c6a3d69df34955d9d453c38b32174c7 Author: nicolas.nardino Date: Thu Jun 17 15:38:13 2021 +0200 Add option to set thresold and support for riscv commit 21278bd87e89210bcc287116f6e35fc1b52d0df2 Author: nicolas.nardino Date: Wed Jun 16 20:27:31 2021 +0200 Now working, tests show a decrease in spillage Should still find a proper way to treat the case mentioned in earlier commits commit 87c82b6fcf2bf825a8c60fc6a95498aac9f826d4 Author: nicolas.nardino Date: Tue Jun 15 14:44:56 2021 +0200 kinda fixed Spills are definitely reduced, but lots of arbitrary in there: See previous commit: need to determine what to do if pressure is too high but no schedulable instruction can reduce it. For now, advance time for at most 5 cycles, if still no suitable instruction, go back to CSP commit 19464b3992eadf7670acc7231896103ab54885e5 Author: nicolas.nardino Date: Tue Jun 15 12:07:43 2021 +0200 fixing Still need to find what to do when pressure is high but there are no instructions available that decrease it commit bff4e6ff0b782619b6fcc18751fa575cbb11de68 Author: nicolas.nardino Date: Mon Jun 14 17:39:58 2021 +0200 was very wrong, fixing commit 3eb3751f84348a20b7ce211fdbf1d01a9c4685a8 Author: nicolas.nardino Date: Mon Jun 14 14:46:01 2021 +0200 One fewer spill with new sched on `test/.../spille_forw.c` commit 66e15205c40de54639387a4c9b1cc78994525d55 Author: nicolas.nardino Date: Mon Jun 14 13:53:08 2021 +0200 scheduler written, need to test now commit 2b814b1f9bb30d9c8b59a713f69bced808bca7c7 Author: nicolas.nardino Date: Sat Jun 12 10:52:59 2021 +0200 work on the scheduler commit 1701e43316ee8e69e794a025a8c9979af6bb8c93 Author: nicolas.nardino Date: Thu Jun 10 16:31:51 2021 +0200 Work on new schedluer Renamed a test file, wrote function to compute pressure deltas, Still need to pass the info in some way; beginning of the actual scheduler function commit 386b9053177bb4ef2801cec00b717c400a828139 Author: nicolas.nardino Date: Tue Jun 8 16:53:19 2021 +0200 Fix RTLpathScheduleraux.get_live_regs_entry commit 9b6247b7996f3e0181d27ec0e20daffd28e0884f Author: nicolas.nardino Date: Tue Jun 8 16:06:36 2021 +0200 Another test : one spill when scheduled forward, none if not commit 52378f0600652a94edcc8c78e4b426243f717a89 Author: nicolas.nardino Date: Tue Jun 8 15:11:03 2021 +0200 Add some tests commit 2249f3c7771c285ccd25f6e94478be388a741da5 Author: nicolas.nardino Date: Sun Jun 6 20:49:34 2021 +0200 Adding debug info commit 9118878bd14e24cc04c2f36cab7aa7271a0f1852 Author: nicolas.nardino Date: Sun Jun 6 12:11:15 2021 +0200 Fixing scope error, and non-exhaustive pattern matching commit 599823a6410f1629f2b8704291839e0974bce83b Author: nicolas.nardino Date: Sat Jun 5 19:52:59 2021 +0200 function written, now needs testing commit 98a7a04258f2cf6caf9f18925cbeeae2f5b17be4 Author: nicolas.nardino Date: Fri Jun 4 16:56:32 2021 +0200 computing live regs at sb entry from its live output regs commit 7ae1fb0faea68ce5cfe04a232e49659247c244e9 Author: nicolas.nardino Date: Fri Jun 4 14:24:07 2021 +0200 Passing info of live regs to scheduler: beginning --- riscV/Machregsaux.ml | 2 + riscV/Machregsaux.mli | 3 + riscV/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++++++- riscV/PrepassSchedulingOracleDeps.ml | 18 +- 4 files changed, 507 insertions(+), 2 deletions(-) mode change 120000 => 100644 riscV/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracleDeps.ml (limited to 'riscV') diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml index 840943e7..e3e47946 100644 --- a/riscV/Machregsaux.ml +++ b/riscV/Machregsaux.ml @@ -18,3 +18,5 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 26; 32|] diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..bb3777bf 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/riscV/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/riscV/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/riscV/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/riscV/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; -- cgit