diff options
Diffstat (limited to 'scheduling/InstructionScheduler.ml')
-rw-r--r-- | scheduling/InstructionScheduler.ml | 503 |
1 files changed, 501 insertions, 2 deletions
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..0203d9c8 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,10 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -118,6 +122,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -334,6 +346,485 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + DebugPrint.debug_flag := true; + + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions; + + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + let nr_types_regs = Array.length available_regs in + + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in + (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + let csr_b = ref false in + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let iter = MSet.iter + + let compare_regs i x y = + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i > 0) + then result := i) ready; + !result + end + in + + let max_time = bound_max_time problem + 5*nr_instructions in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in + + let attempt_scheduling ready usages = + let result = ref (-1) in + try + Array.iteri (fun i avlregs -> + (* print_string "avlregs: "; + * print_int i; + * print_string " "; + * print_int avlregs; + * print_newline (); + * print_string "live regs: "; + * print_int (Hashtbl.length live_regs); + * print_newline (); + * flush stdout; *) + if avlregs <= regs_thresholds.(i) + then ( + csr_b := true; + let maybe = InstrSet.sched_CSR i ready usages in + (* print_string "maybe\n"; + * print_int maybe; + * print_newline (); + * flush stdout; *) + (if maybe > 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + (* print_string "delta "; + * print_int delta; + * print_newline (); + * flush stdout; *) + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else + if not !Clflags.option_regpres_wait_window + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources) + else + (incr cnt) + ); + raise Exit)) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)) ready; + -1 + with Exit -> + !result in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + ((* print_string "INSTR ISSUED: "; + * print_int i; + * print_newline (); + * flush stdout; *) + if !csr_b && !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := false; + if !Clflags.option_regpres_wait_window then + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + + +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + DebugPrint.debug_flag := true; + Printf.printf "\nNEW\n\n"; + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) + ready.(!current_time); + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) + successors.(i); + DebugPrint.debug "\n\n"; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + +(********************************************************************) + type bundle = int list;; let rec extract_deps_to index = function @@ -438,6 +929,12 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 @@ -1259,5 +1756,7 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; |