diff options
Diffstat (limited to 'scheduling')
-rw-r--r-- | scheduling/BTL.v | 8 | ||||
-rw-r--r-- | scheduling/BTLRenumber.ml | 2 | ||||
-rw-r--r-- | scheduling/BTLScheduleraux.ml | 217 | ||||
-rw-r--r-- | scheduling/BTL_SEtheory.v | 4 | ||||
-rw-r--r-- | scheduling/BTLcommonaux.ml | 23 | ||||
-rw-r--r-- | scheduling/BTLtypes.ml | 10 | ||||
-rw-r--r-- | scheduling/InstructionScheduler.ml | 488 | ||||
-rw-r--r-- | scheduling/InstructionScheduler.mli | 16 | ||||
-rw-r--r-- | scheduling/RTLpath.v | 2 | ||||
-rw-r--r-- | scheduling/RTLpathSE_theory.v | 4 | ||||
-rw-r--r-- | scheduling/RTLpathScheduleraux.ml | 187 | ||||
-rw-r--r-- | scheduling/RTLtoBTLaux.ml | 11 | ||||
-rw-r--r-- | scheduling/postpass_lib/Machblock.v | 4 |
13 files changed, 933 insertions, 43 deletions
diff --git a/scheduling/BTL.v b/scheduling/BTL.v index 96377943..f832085c 100644 --- a/scheduling/BTL.v +++ b/scheduling/BTL.v @@ -214,7 +214,7 @@ Inductive has_loaded sp rs m chunk addr args v: trapping_mode -> Prop := : has_loaded sp rs m chunk addr args v trap | has_loaded_default (LOAD: forall a, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None) - (DEFAULT: v = default_notrap_load_value chunk) + (DEFAULT: v = Vundef) : has_loaded sp rs m chunk addr args v NOTRAP . Local Hint Constructors has_loaded: core. @@ -272,12 +272,10 @@ Fixpoint iblock_istep_run sp ib rs m: option outcome := match Mem.loadv chunk m a with | Some v => Some {| _rs := rs#dst <- v; _m:= m; _fin := None |} | None => - let v := default_notrap_load_value chunk in - Some {| _rs := rs#dst <- v; _m:= m; _fin := None |} + Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |} end | None => - let v := default_notrap_load_value chunk in - Some {| _rs := rs#dst <- v; _m:= m; _fin := None |} + Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |} end | Bstore chunk addr args src _ => SOME a <- eval_addressing ge sp addr rs##args IN diff --git a/scheduling/BTLRenumber.ml b/scheduling/BTLRenumber.ml index 6ff42a27..2f4f9203 100644 --- a/scheduling/BTLRenumber.ml +++ b/scheduling/BTLRenumber.ml @@ -94,7 +94,7 @@ let regenerate_btl_tree btl entry = let ib = renumber_iblock old_ibf.entry in let n = get_inumb_or_next ib in let n_pos = i2p n in - let bi = mk_binfo n in + let bi = mk_binfo n old_ibf.binfo.s_output_regs old_ibf.binfo.typing in let ibf = { entry = ib; input_regs = old_ibf.input_regs; binfo = bi } in if old_n = entry then new_entry := n_pos; dm := PTree.set old_n n_pos !dm; diff --git a/scheduling/BTLScheduleraux.ml b/scheduling/BTLScheduleraux.ml index 00be2aa7..75672243 100644 --- a/scheduling/BTLScheduleraux.ml +++ b/scheduling/BTLScheduleraux.ml @@ -4,7 +4,9 @@ open BTLtypes open Machine open DebugPrint open PrintBTL +open Registers open RTLcommonaux +open BTLcommonaux open ExpansionOracle open PrepassSchedulingOracle @@ -88,11 +90,179 @@ let apply_schedule bseq olast positions = in build_iblock ibl +(** the useful one. Returns a hashtable with bindings of shape + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) +let reference_counting (seqa : (iblock * Regset.t) array) (out_regs : Regset.t) + (typing : RTLtyping.regenv) : + (reg, int * int) Hashtbl.t * (reg * bool) list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + * where b = true if seen as arg, false if seen as dest + *) + List.iter + (fun reg -> + Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1)) + (Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n + 1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1) + in + let map_true = List.map (fun r -> (r, true)) in + Array.iteri + (fun i (ins, _) -> + match ins with + | Bop (_, args, dest, _) | Bload (_, _, _, args, dest, _) -> + List.iter add_reg args; + retr.(i) <- (dest, false) :: map_true args + | Bcond (_, args, _, _, _) -> + List.iter add_reg args; + retr.(i) <- map_true args + | Bstore (_, _, args, src, _) -> + List.iter add_reg args; + add_reg src; + retr.(i) <- (src, true) :: map_true args + | BF (Bcall (_, fn, args, dest, _), _) -> + List.iter add_reg args; + retr.(i) <- + (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest, false) :: (reg, true) :: map_true args + | _ -> (dest, false) :: map_true args) + | BF (Btailcall (_, fn, args), _) -> + List.iter add_reg args; + retr.(i) <- + (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (reg, true) :: map_true args + | _ -> map_true args) + | BF (Bbuiltin (_, args, dest, _), _) -> + let rec bar = function + | AST.BA r -> + add_reg r; + retr.(i) <- (r, true) :: retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; + bar lo + | _ -> () + in + List.iter bar args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false) :: retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; + bad lo + | _ -> () + in + bad dest + | BF (Bjumptable (reg, _), _) | BF (Breturn (Some reg), _) -> + add_reg reg; + retr.(i) <- [ (reg, true) ] + | _ -> ()) + seqa; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) + (retl, retr) + +let get_live_regs_entry seqa ibf btl = + if !Clflags.option_debug_compcert > 6 then debug_flag := true; + let ret = + Array.fold_right + (fun (ins, liveins) regset_i -> + let regset = Regset.union liveins regset_i in + match ins with + | Bnop _ -> regset + | Bop (_, args, dest, _) | Bload (_, _, _, args, dest, _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + (Regset.remove dest regset) + args + | Bstore (_, _, args, src, _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + (Regset.add src regset) args + | BF (Bcall (_, fn, args, dest, _), _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> Regset.add reg + | Datatypes.Coq_inr _ -> fun x -> x) + (Regset.remove dest regset)) + args + | BF (Btailcall (_, fn, args), _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> Regset.add reg regset + | Datatypes.Coq_inr _ -> regset) + args + | BF (Bbuiltin (_, args, dest, _), _) -> + List.fold_left + (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Regset.add r set + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + add hi (add lo set) + | _ -> set + in + add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in + rem dest regset) + args + | BF (Bjumptable (reg, _), _) | BF (Breturn (Some reg), _) -> + Regset.add reg regset + | Bcond (_, args, _, _, _) -> + List.fold_left (fun set reg -> Regset.add reg set) regset args + | _ -> regset) + seqa ibf.binfo.s_output_regs + in + debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret + let schedule_blk n ibf btl = if not !Clflags.option_fprepass then btl else let bseq, olast = flatten_blk_basics ibf in - match schedule_sequence bseq btl with + let seqa = + Array.mapi + (fun i inst -> (inst, get_liveins inst)) + bseq + in + let live_regs_entry = get_live_regs_entry seqa ibf btl in + let refcount = + reference_counting seqa ibf.binfo.s_output_regs ibf.binfo.typing + in + match + schedule_sequence seqa btl live_regs_entry ibf.binfo.typing refcount + with | Some positions -> let new_ib = apply_schedule bseq olast positions in let new_ibf = @@ -116,29 +286,52 @@ let turn_all_loads_nontrap n ibf btl = in PTree.set n ibf' btl +let compute_liveins n ibf btl = + let rec traverse_rec ib = + match ib with + | Bseq (ib1, ib2) -> + traverse_rec ib1; + traverse_rec ib2 + | BF (Bgoto succ, iinfo) + | BF (Bcall (_, _, _, _, succ), iinfo) + | BF (Bbuiltin (_, _, _, succ), iinfo) -> + let lives = (get_some @@ PTree.get succ btl).input_regs in + iinfo.liveins <- lives + | BF (Bjumptable (_, tbl), iinfo) -> + List.iter + (fun ex -> + let lives = (get_some @@ PTree.get ex btl).input_regs in + iinfo.liveins <- Regset.union iinfo.liveins lives) + tbl + | Bcond (_, _, BF (Bgoto succ, _), Bnop None, iinfo) -> ( + match iinfo.pcond with + | Some predb -> + assert (predb = false); + let lives = (get_some @@ PTree.get succ btl).input_regs in + iinfo.liveins <- lives + | None -> ()) + | _ -> () + in + traverse_rec ibf.entry + let rec do_schedule btl = function | [] -> btl | (n, ibf) :: blks -> + compute_liveins n ibf btl; let code_exp = expanse n ibf btl in let ibf_exp = get_some @@ PTree.get n code_exp in let code_nt = turn_all_loads_nontrap n ibf_exp code_exp in let ibf_nt = get_some @@ PTree.get n code_nt in let btl' = schedule_blk n ibf_nt code_nt in - begin - (*debug_flag := true;*) - print_btl_code stderr code_nt; - print_btl_code stderr btl'; - (*debug_flag := false;*) - do_schedule btl' blks - end + (*debug_flag := true;*) + print_btl_code stderr btl; + print_btl_code stderr btl'; + (*debug_flag := false;*) + do_schedule btl' blks let btl_scheduler f = let btl = f.fn_code in - (*debug_flag := true;*) let elts = PTree.elements btl in find_last_reg elts; let btl' = do_schedule btl elts in - debug "Scheduled BTL Code:\n"; - print_btl_code stderr btl'; - (*debug_flag := false;*) btl' diff --git a/scheduling/BTL_SEtheory.v b/scheduling/BTL_SEtheory.v index e2e6be6b..033ed843 100644 --- a/scheduling/BTL_SEtheory.v +++ b/scheduling/BTL_SEtheory.v @@ -93,11 +93,11 @@ Fixpoint eval_sval ctx (sv: sval): option val := | NOTRAP => SOME args <- eval_list_sval ctx lsv IN match (eval_addressing (cge ctx) (csp ctx) addr args) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some a => SOME m <- eval_smem ctx sm IN match (Mem.loadv chunk m a) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some val => Some val end end diff --git a/scheduling/BTLcommonaux.ml b/scheduling/BTLcommonaux.ml index 4605d613..577e4828 100644 --- a/scheduling/BTLcommonaux.ml +++ b/scheduling/BTLcommonaux.ml @@ -1,15 +1,22 @@ open Maps open BTL +open Registers open BTLtypes open RTLcommonaux let undef_node = -1 -let mk_iinfo _inumb _pcond = { inumb = _inumb; pcond = _pcond; visited = false } +let mk_iinfo _inumb _pcond = { inumb = _inumb; pcond = _pcond; visited = false; liveins = Regset.empty } -let def_iinfo () = { inumb = undef_node; pcond = None; visited = false } +let def_iinfo () = { inumb = undef_node; pcond = None; visited = false; liveins = Regset.empty } -let mk_binfo _bnumb = { bnumb = _bnumb; visited = false } +let mk_binfo _bnumb _s_output_regs _typing = + { + bnumb = _bnumb; + visited = false; + s_output_regs = _s_output_regs; + typing = _typing; + } let reset_visited_ibf btl = PTree.map @@ -68,3 +75,13 @@ let rec get_inumb_or_next = function iinfo.inumb | Bseq (ib1, _) -> get_inumb_or_next ib1 | _ -> failwith "get_inumb_or_next: Bnop None" + +let get_liveins = function + | BF (_, iinfo) + | Bnop (Some iinfo) + | Bop (_, _, _, iinfo) + | Bload (_, _, _, _, _, iinfo) + | Bstore (_, _, _, _, iinfo) + | Bcond (_, _, _, _, iinfo) -> + iinfo.liveins + | _ -> failwith "get_liveins: invalid iblock" diff --git a/scheduling/BTLtypes.ml b/scheduling/BTLtypes.ml index 3972fd6b..12ca30e8 100644 --- a/scheduling/BTLtypes.ml +++ b/scheduling/BTLtypes.ml @@ -1,7 +1,15 @@ +open Registers + type inst_info = { mutable inumb : int; mutable pcond : bool option; mutable visited : bool; + mutable liveins: Regset.t; } -type block_info = { mutable bnumb : int; mutable visited : bool } +type block_info = { + mutable bnumb : int; + mutable visited : bool; + s_output_regs : Regset.t; + typing : RTLtyping.regenv; +} diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..e3a421a5 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,10 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -118,6 +122,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -334,6 +346,470 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + (* DebugPrint.debug_flag := true; *) + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; + flush stderr); + + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + let nr_types_regs = Array.length available_regs in + + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in + (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + let csr_b = ref false in + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let iter = MSet.iter + + let compare_regs i x y = + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i > 0) + then result := i) ready; + !result + end + in + + let max_time = bound_max_time problem + 5*nr_instructions in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in + + let attempt_scheduling ready usages = + let result = ref (-1) in + DebugPrint.debug "\n\nREADY: "; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready; + DebugPrint.debug "\n\n"; + try + Array.iteri (fun i avlregs -> + DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n" + i avlregs (Hashtbl.length live_regs); + if !cnt < 5 && avlregs <= regs_thresholds.(i) + then ( + csr_b := true; + let maybe = InstrSet.sched_CSR i ready usages in + DebugPrint.debug "maybe %d\n" maybe; + (if maybe >= 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + DebugPrint.debug "delta %d\n" delta; + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else + if not !Clflags.option_regpres_wait_window + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources; + incr cnt) + else + (incr cnt) + ); + raise Exit)) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)) ready; + -1 + with Exit -> + !result in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + (DebugPrint.debug "INSTR ISSUED: %d\n" i; + if !csr_b && !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "REGPRES: high pres class %d\n" i; + flush stderr); + csr_b := false; + (* if !Clflags.option_regpres_wait_window then *) + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + DebugPrint.debug "%d " i; + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + (* DebugPrint.debug_flag := false; *) + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + (* DebugPrint.debug_flag := false; *) + None + +;; + + +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + (* Printf.printf "\nNEW\n\n"; *) + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) + ready.(!current_time); + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) + successors.(i); + DebugPrint.debug "\n\n"; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + None + +;; + +(********************************************************************) + type bundle = int list;; let rec extract_deps_to index = function @@ -438,6 +914,12 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 @@ -1259,5 +1741,7 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index fb7af3f6..48c7bc09 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -23,6 +23,16 @@ type problem = { resource_bounds : int array; (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + live_regs_entry : Registers.Regset.t; + (** The set of live pseudo-registers at entry. *) + + typing : RTLtyping.regenv; + (** Register type map. *) + + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; + (** See RTLpathScheduleraux.reference_counting. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) @@ -68,6 +78,12 @@ Once a clock tick is full go to the next. @return [Some solution] when a solution is found, [None] if not. *) val list_scheduler : problem -> solution option +(** WIP : Same as list_scheduler, but schedules instructions which decrease +register pressure when it gets too high. *) +val reg_pres_scheduler : problem -> solution option + +val reg_pres_scheduler_bis : problem -> solution option + (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v index a4fce97e..b29a7759 100644 --- a/scheduling/RTLpath.v +++ b/scheduling/RTLpath.v @@ -156,7 +156,7 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem) SOME v <- Mem.loadv chunk m a IN Some (mk_istate true pc' (rs#dst <- v) m) | Iload NOTRAP chunk addr args dst pc' => - let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in + let default_state := mk_istate true pc' rs#dst <- Vundef m in match (eval_addressing ge sp addr rs##args) with | None => Some default_state | Some a => match (Mem.loadv chunk m a) with diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v index aa8db342..2a791feb 100644 --- a/scheduling/RTLpathSE_theory.v +++ b/scheduling/RTLpathSE_theory.v @@ -87,11 +87,11 @@ Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem): | NOTRAP => SOME args <- seval_list_sval ge sp lsv rs0 m0 IN match (eval_addressing ge sp addr args) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some a => SOME m <- seval_smem ge sp sm rs0 m0 IN match (Mem.loadv chunk m a) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some val => Some val end end diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 3db25d82..659a8ba7 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -18,7 +18,7 @@ let print_superblock (sb: superblock) code = begin debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" + debug " output_regs = "; print_regset outs; debug "\n}" end let print_superblocks lsb code = @@ -73,6 +73,168 @@ let get_superblocks code entry pm typing = lsb end +(** the useful one. Returns a hashtable with bindings of shape + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) +let reference_counting (seqa : (instruction * Regset.t) array) + (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : + (Registers.reg, int * int) Hashtbl.t * + (Registers.reg * bool) list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + * where b = true if seen as arg, false if seen as dest + *) + List.iter (fun reg -> + Hashtbl.add retl + reg (Machregsaux.class_of_type (typing reg), 1) + ) (Registers.Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n+1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type + (typing reg), 1) + in + let map_true = List.map (fun r -> r, true) in + Array.iteri (fun i (ins, _) -> + match ins with + | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (dest, false)::(map_true args) + | Icond(_,args,_,_,_) -> + List.iter (add_reg) args; + retr.(i) <- map_true args + | Istore(_,_,args,src,_) -> + List.iter (add_reg) args; + add_reg src; + retr.(i) <- (src, true)::(map_true args) + | Icall(_,fn,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest,false)::(reg, true)::(map_true args) + | _ -> (dest,false)::(map_true args)) + + | Itailcall(_,fn,args) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (reg, true)::(map_true args) + | _ -> map_true args) + | Ibuiltin(_,args,dest,_) -> + let rec bar = function + | AST.BA r -> add_reg r; + retr.(i) <- (r, true)::retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; bar lo + | _ -> () + in + List.iter (bar) args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false)::retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; bad lo + | _ -> () + in + bad dest; + | Ijumptable (reg,_) | Ireturn (Some reg) -> + add_reg reg; + retr.(i) <- [reg, true] + | _ -> () + ) seqa; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) + retl, retr + + +let get_live_regs_entry (sb : superblock) code = + (if !Clflags.option_debug_compcert > 6 + then debug_flag := true); + debug "getting live regs for superblock:\n"; + print_superblock sb code; + debug "\n"; + let seqa = Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.get_live_regs_entry" + ), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + sb.instructions in + let ret = + Array.fold_right (fun (ins, liveins) regset_i -> + let regset = Registers.Regset.union liveins regset_i in + match ins with + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Registers.Regset.add r set + | AST.BA_splitlong (hi, lo) + | AST.BA_addptr (hi, lo) -> add hi (add lo set) + | _ -> set + in add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Registers.Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in rem dest regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + in debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass @@ -91,15 +253,22 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + debug "hello\n"; + let live_regs_entry = get_live_regs_entry sb code in + let seqa = + Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.schedule_superblock"), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in match PrepassSchedulingOracle.schedule_sequence - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with + seqa + live_regs_entry + sb.typing + (reference_counting seqa sb.s_output_regs sb.typing) with | None -> sb.instructions | Some order -> let ins' = diff --git a/scheduling/RTLtoBTLaux.ml b/scheduling/RTLtoBTLaux.ml index d544e87f..3c06f4cb 100644 --- a/scheduling/RTLtoBTLaux.ml +++ b/scheduling/RTLtoBTLaux.ml @@ -45,7 +45,7 @@ let translate_inst (iinfo : BTL.inst_info) inst is_final = in if is_final then encaps_final btli !osucc else btli -let translate_function code entry join_points liveness = +let translate_function code entry join_points liveness (typing : RTLtyping.regenv) = let reached = ref (PTree.map (fun n i -> false) code) in let btl_code = ref PTree.empty in let rec build_btl_tree e = @@ -53,6 +53,7 @@ let translate_function code entry join_points liveness = else ( reached := PTree.set e true !reached; let next_nodes = ref [] in + let last = ref None in let rec build_btl_block n = let inst = get_some @@ PTree.get n code in let psucc = predicted_successor inst in @@ -78,13 +79,16 @@ let translate_function code entry join_points liveness = | None -> debug "BLOCK END.\n"; next_nodes := !next_nodes @ successors_inst inst; + last := Some inst; translate_inst iinfo inst true in let ib = build_btl_block e in let succs = !next_nodes in let inputs = get_some @@ PTree.get e liveness in - let bi = mk_binfo (p2i e) in + let soutput = get_outputs liveness e (get_some @@ !last) in + + let bi = mk_binfo (p2i e) soutput typing in let ibf = { entry = ib; input_regs = inputs; binfo = bi } in btl_code := PTree.set e ibf !btl_code; List.iter build_btl_tree succs) @@ -97,7 +101,8 @@ let rtl2btl (f : RTL.coq_function) = let entry = f.fn_entrypoint in let join_points = get_join_points code entry in let liveness = analyze f in - let btl = translate_function code entry join_points liveness in + let typing = get_ok @@ RTLtyping.type_function f in + let btl = translate_function code entry join_points liveness typing in let dm = PTree.map (fun n i -> n) btl in (*debug_flag := true;*) debug "Entry %d\n" (p2i entry); diff --git a/scheduling/postpass_lib/Machblock.v b/scheduling/postpass_lib/Machblock.v index c8eadbd7..b588cca8 100644 --- a/scheduling/postpass_lib/Machblock.v +++ b/scheduling/postpass_lib/Machblock.v @@ -237,13 +237,13 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m: | exec_MBload_notrap1: forall addr args rs' chunk dst, eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBload_notrap2: forall addr args a rs' chunk dst, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBstore: forall chunk addr args src m' a rs', |