aboutsummaryrefslogtreecommitdiffstats
path: root/scheduling/InstructionScheduler.ml
diff options
context:
space:
mode:
Diffstat (limited to 'scheduling/InstructionScheduler.ml')
-rw-r--r--scheduling/InstructionScheduler.ml488
1 files changed, 486 insertions, 2 deletions
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..e3a421a5 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -33,6 +33,10 @@ type latency_constraint = {
type problem = {
max_latency : int;
resource_bounds : int array;
+ live_regs_entry : Registers.Regset.t;
+ typing : RTLtyping.regenv;
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
instruction_usages : int array array;
latency_constraints : latency_constraint list;
};;
@@ -118,6 +122,13 @@ let vector_less_equal a b =
true
with Exit -> false;;
+(* let vector_add a b =
+ * assert ((Array.length a) = (Array.length b));
+ * for i=0 to (Array.length a)-1
+ * do
+ * b.(i) <- b.(i) + a.(i)
+ * done;; *)
+
let vector_subtract a b =
assert ((Array.length a) = (Array.length b));
for i=0 to (Array.length a)-1
@@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order)
assert(!time >= 0);
!time
with Exit -> -1
-
in
+
let advance_time() =
begin
(if !current_time < max_time-1
@@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order)
Array.blit problem.resource_bounds 0 current_resources 0
(Array.length current_resources);
ready.(!current_time + 1) <-
- InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time + 1));
ready.(!current_time) <- InstrSet.empty;
end);
incr current_time
@@ -334,6 +346,470 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
(* dummy code for placating ocaml's warnings *)
let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
+
+(* A scheduler sensitive to register pressure *)
+let reg_pres_scheduler (problem : problem) : solution option =
+ (* DebugPrint.debug_flag := true; *)
+ let nr_instructions = get_nr_instructions problem in
+
+ if !Clflags.option_debug_compcert > 6 then
+ (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+ flush stderr);
+
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ let available_regs = Array.copy Machregsaux.nr_regs in
+
+ let nr_types_regs = Array.length available_regs in
+
+ let thres = Array.fold_left (min)
+ (max !(Clflags.option_regpres_threshold) 0)
+ Machregsaux.nr_regs
+ in
+
+
+ let regs_thresholds = Array.make nr_types_regs thres in
+ (* placeholder value *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ available_regs.(classe)
+ <- available_regs.(classe) - 1;
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+ let csr_b = ref false in
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta i = (fun a (r, b) ->
+ a +
+ if class_r r <> i then 0 else
+ (if b then
+ if (Hashtbl.find counts r = (i, 1))
+ then 1 else 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ )) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let module InstrSet =
+ struct
+ module MSet =
+ Set.Make (struct
+ type t=int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end)
+
+ let empty = MSet.empty
+ let is_empty = MSet.is_empty
+ let add = MSet.add
+ let remove = MSet.remove
+ let union = MSet.union
+ let iter = MSet.iter
+
+ let compare_regs i x y =
+ let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in
+ (* print_int y;
+ * print_string " ";
+ * print_int pyi;
+ * print_newline ();
+ * flush stdout; *)
+ let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in
+ match pyi - pxi with
+ | 0 -> (match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z)
+ | z -> z
+
+ (** t is the register class *)
+ let sched_CSR t ready usages =
+ (* print_string "looking for max delta";
+ * print_newline ();
+ * flush stdout; *)
+ let result = ref (-1) in
+ iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then if !result = -1 || (compare_regs t !result i > 0)
+ then result := i) ready;
+ !result
+ end
+ in
+
+ let max_time = bound_max_time problem + 5*nr_instructions in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+ (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *)
+ let cnt = ref 0 in
+
+ let attempt_scheduling ready usages =
+ let result = ref (-1) in
+ DebugPrint.debug "\n\nREADY: ";
+ InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready;
+ DebugPrint.debug "\n\n";
+ try
+ Array.iteri (fun i avlregs ->
+ DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n"
+ i avlregs (Hashtbl.length live_regs);
+ if !cnt < 5 && avlregs <= regs_thresholds.(i)
+ then (
+ csr_b := true;
+ let maybe = InstrSet.sched_CSR i ready usages in
+ DebugPrint.debug "maybe %d\n" maybe;
+ (if maybe >= 0 &&
+ let delta =
+ List.fold_left (fold_delta i) 0 mentions.(maybe) in
+ DebugPrint.debug "delta %d\n" delta;
+ delta > 0
+ then
+ (vector_subtract usages.(maybe) current_resources;
+ result := maybe)
+ else
+ if not !Clflags.option_regpres_wait_window
+ then
+ (InstrSet.iter (fun ins ->
+ if vector_less_equal usages.(ins) current_resources &&
+ List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0
+ then result := ins
+ ) ready;
+ if !result <> -1 then
+ vector_subtract usages.(!result) current_resources;
+ incr cnt)
+ else
+ (incr cnt)
+ );
+ raise Exit)) available_regs;
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then (
+ vector_subtract usages.(i) current_resources;
+ result := i;
+ raise Exit)) ready;
+ -1
+ with Exit ->
+ !result in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (assert(times.(i) < 0);
+ (DebugPrint.debug "INSTR ISSUED: %d\n" i;
+ if !csr_b && !Clflags.option_debug_compcert > 6 then
+ (Printf.eprintf "REGPRES: high pres class %d\n" i;
+ flush stderr);
+ csr_b := false;
+ (* if !Clflags.option_regpres_wait_window then *)
+ cnt := 0;
+ List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ (Hashtbl.remove live_regs r;
+ available_regs.(t)
+ <- available_regs.(t) + 1))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> (Hashtbl.add live_regs r t;
+ available_regs.(t)
+ <- available_regs.(t) - 1)
+ | Some i -> ()
+ ) mentions.(i));
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ DebugPrint.debug "%d " i;
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ (* DebugPrint.debug_flag := false; *)
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ (* DebugPrint.debug_flag := false; *)
+ None
+
+;;
+
+
+(********************************************************************)
+
+let reg_pres_scheduler_bis (problem : problem) : solution option =
+ (* Printf.printf "\nNEW\n\n"; *)
+ let nr_instructions = get_nr_instructions problem in
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ (* let available_regs = Array.copy Machregsaux.nr_regs in *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ (* available_regs.(classe)
+ * <- available_regs.(classe) - 1; *)
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta a (r, b) =
+ a + (if b then
+ match Hashtbl.find_opt counts r with
+ | Some (_, 1) -> 1
+ | _ -> 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ ) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let compare_pres x y =
+ let pdy = List.fold_left (fold_delta) 0 mentions.(y) in
+ let pdx = List.fold_left (fold_delta) 0 mentions.(x) in
+ match pdy - pdx with
+ | 0 -> x - y
+ | z -> z
+ in
+
+ let module InstrSet =
+ Set.Make (struct
+ type t = int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end) in
+
+ let max_time = bound_max_time problem (* + 5*nr_instructions *) in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (* Printf.printf "ADV\n";
+ * flush stdout; *)
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+
+ let attempt_scheduling ready usages =
+ let result = ref [] in
+ try
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then
+ if !result = [] || priorities.(i) = priorities.(List.hd (!result))
+ then
+ result := i::(!result)
+ else raise Exit
+ ) ready;
+ if !result <> [] then raise Exit;
+ -1
+ with
+ Exit ->
+ let mini = List.fold_left (fun a b ->
+ if a = -1 || compare_pres a b > 0
+ then b else a
+ ) (-1) !result in
+ vector_subtract usages.(mini) current_resources;
+ mini
+ in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (
+ DebugPrint.debug "ISSUED: %d\nREADY: " i;
+ InstrSet.iter (fun i -> DebugPrint.debug "%d " i)
+ ready.(!current_time);
+ DebugPrint.debug "\nSUCC: ";
+ List.iter (fun (i, l) -> DebugPrint.debug "%d " i)
+ successors.(i);
+ DebugPrint.debug "\n\n";
+ assert(times.(i) < 0);
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ (List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ (Hashtbl.remove live_regs r;
+ (* available_regs.(t)
+ * <- available_regs.(t) + 1 *)))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> (Hashtbl.add live_regs r t;
+ (* available_regs.(t)
+ * <- available_regs.(t) - 1 *))
+ | Some i -> ()
+ ) mentions.(i));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ (* print_int i;
+ * flush stdout; *)
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ None
+
+;;
+
+(********************************************************************)
+
type bundle = int list;;
let rec extract_deps_to index = function
@@ -438,6 +914,12 @@ let reverse_problem problem =
{
max_latency = problem.max_latency;
resource_bounds = problem.resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
+ (* Not needed for the revlist sched, and for now we wont bother
+ with creating a reverse scheduler aware of reg press *)
+
+ typing = problem.typing;
+ reference_counting = problem.reference_counting;
instruction_usages = Array.init (nr_instructions + 1)
(fun i ->
if i=0
@@ -1259,5 +1741,7 @@ let scheduler_by_name name =
| "ilp" -> validated_scheduler cascaded_scheduler
| "list" -> validated_scheduler list_scheduler
| "revlist" -> validated_scheduler reverse_list_scheduler
+ | "regpres" -> validated_scheduler reg_pres_scheduler
+ | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis
| "greedy" -> greedy_scheduler
| s -> failwith ("unknown scheduler: " ^ s);;