aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--aarch64/Machregsaux.ml4
-rw-r--r--aarch64/Machregsaux.mli2
-rw-r--r--aarch64/PostpassSchedulingOracle.ml3
-rw-r--r--aarch64/PrepassSchedulingOracle.ml64
-rw-r--r--common/DebugPrint.ml4
-rw-r--r--driver/Clflags.ml1
-rw-r--r--driver/Driver.ml4
-rw-r--r--riscV/Machregsaux.ml2
-rw-r--r--scheduling/InstructionScheduler.ml286
-rw-r--r--scheduling/InstructionScheduler.mli14
-rw-r--r--scheduling/RTLpathScheduleraux.ml187
-rw-r--r--test/nardino/scheduling/entry_regs.c19
-rw-r--r--test/nardino/scheduling/spille_backw.c114
-rw-r--r--test/nardino/scheduling/spille_forw.c166
14 files changed, 852 insertions, 18 deletions
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index 41db3bd4..15fb08ca 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -19,3 +19,7 @@ let class_of_type = function
| AST.Tint | AST.Tlong -> 0
| AST.Tfloat | AST.Tsingle -> 1
| AST.Tany32 | AST.Tany64 -> assert false
+
+(* number of available registers per class *)
+(* TODO: add this to all archs *)
+let nr_regs = [| 29; 32 |]
diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli
index 01b0f9fd..8487a557 100644
--- a/aarch64/Machregsaux.mli
+++ b/aarch64/Machregsaux.mli
@@ -15,3 +15,5 @@
val is_scratch_register: string -> bool
val class_of_type: AST.typ -> int
+
+val nr_regs : int array
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index cde3e7a7..6f784238 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -507,6 +507,9 @@ let build_problem bb =
{
max_latency = -1;
resource_bounds = opweights.pipelined_resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* unused here *)
+ typing = (fun x -> AST.Tint); (* unused here *)
+ reference_counting = None;
instruction_usages = instruction_usages bb;
latency_constraints = latency_constraints bb;
}
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 2c3eb14f..fe757c99 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -201,6 +201,54 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
end seqa;
!latency_constraints;;
+
+(** useless *)
+let get_pressure_deltas (seqa : (instruction * Regset.t) array)
+ (typing : RTLtyping.regenv)
+ : int array array =
+ let nr_types_regs = Array.length Machregsaux.nr_regs in
+ let ret = Array.init (Array.length seqa) (fun i ->
+ Array.make nr_types_regs 0) in
+ Array.iteri (fun i (instr, liveins) -> match instr with
+ | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) ->
+ ret.(i).(Machregsaux.class_of_type (typing dest)) <-
+ if List.mem dest args then 0
+ else 1
+ | Istore (_, _, _, src, _) ->
+ ret.(i).(Machregsaux.class_of_type (typing src)) <-
+ -1
+ | Icall (_, fn, args, dest, _) ->
+ ret.(i).(Machregsaux.class_of_type (typing dest)) <-
+ if List.mem dest
+ (match fn with
+ | Datatypes.Coq_inl reg -> reg::args
+ | _ -> args)
+ then 0 else 1
+ | Ibuiltin (_, args, dest, _) ->
+ let rec arg_l list = function
+ | AST.BA r -> r::list
+ | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+ arg_l (arg_l list lo) hi
+ | _ -> list
+ in
+ let l = (List.fold_left arg_l [] args) in
+ let rec dest_l = function
+ | AST.BR r ->
+ let t = Machregsaux.class_of_type (typing r) in
+ ret.(i).(t) <-
+ (if List.mem r l
+ then 0 else 1) + ret.(i).(t)
+ | AST.BR_splitlong (hi, lo) ->
+ dest_l hi;
+ dest_l lo
+ | _ -> ()
+ in
+ dest_l dest
+ | _ -> ()
+ ) seqa;
+ ret
+
+
let resources_of_instruction (opweights : opweights) = function
| Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
| Iop(op, inputs, output, _) ->
@@ -406,11 +454,15 @@ let get_alias_dependencies seqa =
!deps;;
*)
-let define_problem (opweights : opweights) seqa =
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+ (typing : RTLtyping.regenv) reference_counting seqa =
let simple_deps = get_simple_dependencies opweights seqa in
{ max_latency = -1;
resource_bounds = opweights.pipelined_resource_bounds;
- instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ live_regs_entry = live_entry_regs;
+ typing = typing;
+ reference_counting = Some reference_counting;
+ instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
latency_constraints =
(* if (use_alias_analysis ())
then (get_alias_dependencies seqa) @ simple_deps
@@ -439,7 +491,10 @@ let prepass_scheduler_by_name name problem early_ones =
| "zigzag" -> zigzag_scheduler problem early_ones
| _ -> scheduler_by_name name problem
-let schedule_sequence (seqa : (instruction*Regset.t) array) =
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ (live_regs_entry : Registers.Regset.t)
+ (typing : RTLtyping.regenv)
+ reference =
let opweights = OpWeights.get_opweights () in
try
if (Array.length seqa) <= 1
@@ -449,7 +504,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) =
let nr_instructions = Array.length seqa in
(if !Clflags.option_debug_compcert > 6
then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
- let problem = define_problem opweights seqa in
+ let problem = define_problem opweights live_regs_entry
+ typing reference seqa in
(if !Clflags.option_debug_compcert > 7
then (print_sequence stdout (Array.map fst seqa);
print_problem stdout problem));
diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml
index 6f8449ee..275e6a71 100644
--- a/common/DebugPrint.ml
+++ b/common/DebugPrint.ml
@@ -132,10 +132,10 @@ let print_instructions insts code =
| None -> failwith "Did not get some"
| Some thing -> thing
in if (!debug_flag) then begin
- debug "[ ";
+ debug "[\n";
List.iter (
fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
- ) insts; debug "]"
+ ) insts; debug " ]"
end
let print_arrayp arr = begin
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index fa17c2d9..1f31bd3e 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -115,4 +115,5 @@ let option_inline_auto_threshold = ref 0
let option_profile_arcs = ref false
let option_fbranch_probabilities = ref true
let option_debug_compcert = ref 0
+let option_regpres_threshold = ref 5
let main_function_name = ref "main"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 7192ba4b..fa187f26 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -210,7 +210,8 @@ Processing options:
-mtune= Type of CPU (for scheduling on some architectures)
-fprepass Perform prepass scheduling (only on some architectures) [on]
-fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
- (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+ (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+ -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure
-fpostpass Perform postpass scheduling (only for K1 architecture) [on]
-fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
(<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
@@ -342,6 +343,7 @@ let cmdline_actions =
Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s);
Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n);
+ Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n);
Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n);
diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml
index 840943e7..e3e47946 100644
--- a/riscV/Machregsaux.ml
+++ b/riscV/Machregsaux.ml
@@ -18,3 +18,5 @@ let class_of_type = function
| AST.Tint | AST.Tlong -> 0
| AST.Tfloat | AST.Tsingle -> 1
| AST.Tany32 | AST.Tany64 -> assert false
+
+let nr_regs = [| 26; 32|]
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..5b4c87f4 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -33,6 +33,10 @@ type latency_constraint = {
type problem = {
max_latency : int;
resource_bounds : int array;
+ live_regs_entry : Registers.Regset.t;
+ typing : RTLtyping.regenv;
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
instruction_usages : int array array;
latency_constraints : latency_constraint list;
};;
@@ -118,6 +122,13 @@ let vector_less_equal a b =
true
with Exit -> false;;
+(* let vector_add a b =
+ * assert ((Array.length a) = (Array.length b));
+ * for i=0 to (Array.length a)-1
+ * do
+ * b.(i) <- b.(i) + a.(i)
+ * done;; *)
+
let vector_subtract a b =
assert ((Array.length a) = (Array.length b));
for i=0 to (Array.length a)-1
@@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order)
assert(!time >= 0);
!time
with Exit -> -1
-
in
+
let advance_time() =
begin
(if !current_time < max_time-1
@@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order)
Array.blit problem.resource_bounds 0 current_resources 0
(Array.length current_resources);
ready.(!current_time + 1) <-
- InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time + 1));
ready.(!current_time) <- InstrSet.empty;
end);
incr current_time
@@ -334,6 +346,269 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
(* dummy code for placating ocaml's warnings *)
let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
+
+(* A scheduler sensitive to register pressure *)
+let reg_pres_scheduler (problem : problem) : solution option =
+ DebugPrint.debug_flag := true;
+ let nr_instructions = get_nr_instructions problem in
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ let available_regs = Array.copy Machregsaux.nr_regs in
+
+ let nr_types_regs = Array.length available_regs in
+
+ let thres = Array.fold_left (min)
+ (max !(Clflags.option_regpres_threshold) 0)
+ Machregsaux.nr_regs
+ in
+
+
+ let regs_thresholds = Array.make nr_types_regs thres in
+ (* placeholder value *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ available_regs.(classe)
+ <- available_regs.(classe) - 1;
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta i = (fun a (r, b) ->
+ a +
+ if class_r r <> i then 0 else
+ (if b then
+ if (Hashtbl.find counts r = (i, 1))
+ then 1 else 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ )) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let module InstrSet =
+ struct
+ module MSet =
+ Set.Make (struct
+ type t=int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end)
+
+ let empty = MSet.empty
+ let is_empty = MSet.is_empty
+ let add = MSet.add
+ let remove = MSet.remove
+ let union = MSet.union
+ let iter = MSet.iter
+
+ let compare_regs i x y =
+ let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in
+ (* print_int y;
+ * print_string " ";
+ * print_int pyi;
+ * print_newline ();
+ * flush stdout; *)
+ let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in
+ match pyi - pxi with
+ | 0 -> (match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z)
+ | z -> z
+
+ (** t is the register class *)
+ let sched_CSR t ready usages =
+ (* print_string "looking for max delta";
+ * print_newline ();
+ * flush stdout; *)
+ let result = ref (-1) in
+ iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then if !result = -1 || (compare_regs t !result i > 0)
+ then result := i) ready;
+ !result
+ end
+ in
+
+ let max_time = bound_max_time problem + 5*nr_instructions in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+ (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *)
+ let cnt = ref 0 in
+
+ let attempt_scheduling ready usages =
+ let result = ref (-1) in
+ try
+ Array.iteri (fun i avlregs ->
+ (* print_string "avlregs: ";
+ * print_int i;
+ * print_string " ";
+ * print_int avlregs;
+ * print_newline ();
+ * print_string "live regs: ";
+ * print_int (Hashtbl.length live_regs);
+ * print_newline ();
+ * flush stdout; *)
+ if !cnt < 5 && avlregs <= regs_thresholds.(i)
+ then (
+ let maybe = InstrSet.sched_CSR i ready usages in
+ (* print_string "maybe\n";
+ * print_int maybe;
+ * print_newline ();
+ * flush stdout; *)
+ (if maybe > 0 &&
+ let delta =
+ List.fold_left (fold_delta i) 0 mentions.(maybe) in
+ (* print_string "delta ";
+ * print_int delta;
+ * print_newline ();
+ * flush stdout; *)
+ delta > 0
+ then
+ (vector_subtract usages.(maybe) current_resources;
+ result := maybe)
+ else incr cnt);
+ raise Exit)) available_regs;
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then (
+ vector_subtract usages.(i) current_resources;
+ result := i;
+ raise Exit)) ready;
+ -1
+ with Exit ->
+ !result in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (assert(times.(i) < 0);
+ ((* print_string "INSTR ISSUED: ";
+ * print_int i;
+ * print_newline ();
+ * flush stdout; *)
+ cnt := 0;
+ List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ ((* print_string "yaaaaaaaaaaaas ";
+ * print_int (Camlcoq.P.to_int r);
+ * print_newline (); *)
+ Hashtbl.remove live_regs r;
+ available_regs.(t)
+ <- available_regs.(t) + 1))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> ((* print_string "noooooooooo ";
+ * print_int (Camlcoq.P.to_int r);
+ * print_newline (); *)
+ Hashtbl.add live_regs r t;
+ available_regs.(t)
+ <- available_regs.(t) - 1)
+ | Some i -> ()
+ ) mentions.(i));
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ (* print_int i;
+ * flush stdout; *)
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ DebugPrint.debug_flag := false;
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ DebugPrint.debug_flag := false;
+ None
+
+;;
+
+
+
type bundle = int list;;
let rec extract_deps_to index = function
@@ -438,6 +713,12 @@ let reverse_problem problem =
{
max_latency = problem.max_latency;
resource_bounds = problem.resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
+ (* Not needed for the revlist sched, and for now we wont bother
+ with creating a reverse scheduler aware of reg press *)
+
+ typing = problem.typing;
+ reference_counting = problem.reference_counting;
instruction_usages = Array.init (nr_instructions + 1)
(fun i ->
if i=0
@@ -1259,5 +1540,6 @@ let scheduler_by_name name =
| "ilp" -> validated_scheduler cascaded_scheduler
| "list" -> validated_scheduler list_scheduler
| "revlist" -> validated_scheduler reverse_list_scheduler
+ | "regpres" -> validated_scheduler reg_pres_scheduler
| "greedy" -> greedy_scheduler
| s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index fb7af3f6..b5a5463b 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -23,6 +23,16 @@ type problem = {
resource_bounds : int array;
(** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
+ live_regs_entry : Registers.Regset.t;
+ (** The set of live pseudo-registers at entry. *)
+
+ typing : RTLtyping.regenv;
+ (** Register type map. *)
+
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
+ (** See RTLpathScheduleraux.reference_counting. *)
+
instruction_usages: int array array;
(** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
@@ -68,6 +78,10 @@ Once a clock tick is full go to the next.
@return [Some solution] when a solution is found, [None] if not. *)
val list_scheduler : problem -> solution option
+(** WIP : Same as list_scheduler, but schedules instructions which decrease
+register pressure when it gets too high. *)
+val reg_pres_scheduler : problem -> solution option
+
(** Schedule the problem using the order of instructions without any reordering *)
val greedy_scheduler : problem -> solution option
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index aeed39df..f3f09954 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code =
begin
debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
debug " liveins = "; print_ptree_regset li; debug "\n";
- debug " output_regs = "; print_regset outs; debug "}"
+ debug " output_regs = "; print_regset outs; debug "\n}"
end
let print_superblocks lsb code =
@@ -72,6 +72,168 @@ let get_superblocks code entry pm typing =
lsb
end
+(** the useful one. Returns a hashtable with bindings of shape
+ ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
+ ** [t] is its class (according to [typing]), and [n] the number of
+ ** times it's referenced as an argument in instructions of [seqa] ;
+ ** and an arrray containg the list of regs referenced by each
+ ** instruction, with a boolean to know whether it's as arg or dest *)
+let reference_counting (seqa : (instruction * Regset.t) array)
+ (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) :
+ (Registers.reg, int * int) Hashtbl.t *
+ (Registers.reg * bool) list array =
+ let retl = Hashtbl.create 42 in
+ let retr = Array.make (Array.length seqa) [] in
+ (* retr.(i) : (r, b) -> (r', b') -> ...
+ * where b = true if seen as arg, false if seen as dest
+ *)
+ List.iter (fun reg ->
+ Hashtbl.add retl
+ reg (Machregsaux.class_of_type (typing reg), 1)
+ ) (Registers.Regset.elements out_regs);
+ let add_reg reg =
+ match Hashtbl.find_opt retl reg with
+ | Some (t, n) -> Hashtbl.add retl reg (t, n+1)
+ | None -> Hashtbl.add retl reg (Machregsaux.class_of_type
+ (typing reg), 1)
+ in
+ let map_true = List.map (fun r -> r, true) in
+ Array.iteri (fun i (ins, _) ->
+ match ins with
+ | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (dest, false)::(map_true args)
+ | Icond(_,args,_,_,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- map_true args
+ | Istore(_,_,args,src,_) ->
+ List.iter (add_reg) args;
+ add_reg src;
+ retr.(i) <- (src, true)::(map_true args)
+ | Icall(_,fn,args,dest,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (dest,false)::(reg, true)::(map_true args)
+ | _ -> (dest,false)::(map_true args))
+
+ | Itailcall(_,fn,args) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (reg, true)::(map_true args)
+ | _ -> map_true args)
+ | Ibuiltin(_,args,dest,_) ->
+ let rec bar = function
+ | AST.BA r -> add_reg r;
+ retr.(i) <- (r, true)::retr.(i)
+ | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+ bar hi; bar lo
+ | _ -> ()
+ in
+ List.iter (bar) args;
+ let rec bad = function
+ | AST.BR r -> retr.(i) <- (r, false)::retr.(i)
+ | AST.BR_splitlong (hi, lo) ->
+ bad hi; bad lo
+ | _ -> ()
+ in
+ bad dest;
+ | Ijumptable (reg,_) | Ireturn (Some reg) ->
+ add_reg reg;
+ retr.(i) <- [reg, true]
+ | _ -> ()
+ ) seqa;
+ (* print_string "mentions\n";
+ * Array.iteri (fun i l ->
+ * print_int i;
+ * print_string ": [";
+ * List.iter (fun (r, b) ->
+ * print_int (Camlcoq.P.to_int r);
+ * print_string ":";
+ * print_string (if b then "a:" else "d");
+ * if b then print_int (snd (Hashtbl.find retl r));
+ * print_string ", "
+ * ) l;
+ * print_string "]\n";
+ * flush stdout;
+ * ) retr; *)
+ retl, retr
+
+
+let get_live_regs_entry (sb : superblock) code =
+ (if !Clflags.option_debug_compcert > 6
+ then debug_flag := true);
+ debug "getting live regs for superblock:\n";
+ print_superblock sb code;
+ debug "\n";
+ let seqa = Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.get_live_regs_entry"
+ ),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ sb.instructions in
+ let ret =
+ Array.fold_right (fun (ins, liveins) regset_i ->
+ let regset = Registers.Regset.union liveins regset_i in
+ match ins with
+ | Inop _ -> regset
+ | Iop (_, args, dest, _)
+ | Iload (_, _, _, args, dest, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (Registers.Regset.remove dest regset) args
+ | Istore (_, _, args, src, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (Registers.Regset.add src regset) args
+ | Icall (_, fn, args, dest, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ ((match fn with
+ | Datatypes.Coq_inl reg -> (Registers.Regset.add reg)
+ | Datatypes.Coq_inr _ -> (fun x -> x))
+ (Registers.Regset.remove dest regset))
+ args
+ | Itailcall (_, fn, args) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (match fn with
+ | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset)
+ | Datatypes.Coq_inr _ -> regset)
+ args
+ | Ibuiltin (_, args, dest, _) ->
+ List.fold_left (fun set arg ->
+ let rec add reg set =
+ match reg with
+ | AST.BA r -> Registers.Regset.add r set
+ | AST.BA_splitlong (hi, lo)
+ | AST.BA_addptr (hi, lo) -> add hi (add lo set)
+ | _ -> set
+ in add arg set)
+ (let rec rem dest set =
+ match dest with
+ | AST.BR r -> Registers.Regset.remove r set
+ | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set)
+ | _ -> set
+ in rem dest regset)
+ args
+ | Icond (_, args, _, _, _) ->
+ List.fold_left (fun set reg ->
+ Registers.Regset.add reg set)
+ regset args
+ | Ijumptable (reg, _)
+ | Ireturn (Some reg) ->
+ Registers.Regset.add reg regset
+ | _ -> regset
+ ) seqa sb.s_output_regs
+ in debug "live in regs: ";
+ print_regset ret;
+ debug "\n";
+ debug_flag := false;
+ ret
+
(* TODO David *)
let schedule_superblock sb code =
if not !Clflags.option_fprepass
@@ -90,15 +252,22 @@ let schedule_superblock sb code =
match predicted_successor ii with
| Some _ -> 0
| None -> 1 in
+ debug "hello\n";
+ let live_regs_entry = get_live_regs_entry sb code in
+ let seqa =
+ Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in
match PrepassSchedulingOracle.schedule_sequence
- (Array.map (fun i ->
- (match PTree.get i code with
- | Some ii -> ii
- | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
- (match PTree.get i sb.liveins with
- | Some s -> s
- | None -> Regset.empty))
- (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with
+ seqa
+ live_regs_entry
+ sb.typing
+ (reference_counting seqa sb.s_output_regs sb.typing) with
| None -> sb.instructions
| Some order ->
let ins' =
diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c
new file mode 100644
index 00000000..9e6adacb
--- /dev/null
+++ b/test/nardino/scheduling/entry_regs.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+
+int f(int n) {
+ if (n > 0)
+ return 42;
+ else
+ return n;
+}
+
+
+int main(int argc, char *argv[]) {
+ int a=1;
+ float b=2.;
+ int c = f(a);
+ a = 3;
+ int d = f(a);
+ printf("%e, %d, %d, %d", b, a, c, d);
+ return 0;
+}
diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c
new file mode 100644
index 00000000..1c36ee86
--- /dev/null
+++ b/test/nardino/scheduling/spille_backw.c
@@ -0,0 +1,114 @@
+int f(int k) {
+ int a1 = k;
+ int b1 = 2*a1;
+ int c = a1;
+ int a2 = k+1;
+ int b2 = 2*a2;
+ c += a2;
+ int a3 = k+2;
+ int b3 = 2*a3;
+ c += a3;
+ int a4 = k+3;
+ int b4 = 2*a4;
+ c += a4;
+ int a5 = k+4;
+ int b5 = 2*a5;
+ c += a5;
+ int a6 = k+5;
+ int b6 = 2*a6;
+ c += a6;
+ int a7 = k+6;
+ int b7 = 2*a7;
+ c += a7;
+ int a8 = k+7;
+ int b8 = 2*a8;
+ c += a8;
+ int a9 = k+8;
+ int b9 = 2*a9;
+ c += a9;
+ int a10 = k+9;
+ int b10 = 2*a10;
+ c += a10;
+ int a11 = k+10;
+ int b11 = 2*a11;
+ c += a11;
+ int a12 = k+11;
+ int b12 = 2*a12;
+ c += a12;
+ int a13 = k+12;
+ int b13 = 2*a13;
+ c += a13;
+ int a14 = k+13;
+ int b14 = 2*a14;
+ c += a14;
+ int a15 = k+14;
+ int b15 = 2*a15;
+ c += a15;
+ int a16 = k+15;
+ int b16 = 2*a16;
+ c += a16;
+ int a17 = k+16;
+ int b17 = 2*a17;
+ c += a17;
+ int a18 = k+17;
+ int b18 = 2*a18;
+ c += a18;
+ int a19 = k+18;
+ int b19 = 2*a19;
+ c += a19;
+ int a20 = k+19;
+ int b20 = 2*a20;
+ c += a20;
+ int a21 = k+20;
+ int b21 = 2*a21;
+ c += a21;
+ int a22 = k+21;
+ int b22 = 2*a22;
+ c += a22;
+ int a23 = k+22;
+ int b23 = 2*a23;
+ c += a23;
+ int a24 = k+23;
+ int b24 = 2*a24;
+ c += a24;
+ int a25 = k+24;
+ int b25 = 2*a25;
+ c += a25;
+ int a26 = k+25;
+ int b26 = 2*a26;
+ c += a26;
+ return
+ b13+
+ b12+
+ b11+
+ b10+
+ b9+
+ b8+
+ b7+
+ b6+
+ b5+
+ b4+
+ b3+
+ b2+
+ b1+
+ b14+
+ b15+
+ b16+
+ b17+
+ b18+
+ b19+
+ b20+
+ b21+
+ b22+
+ b23+
+ b23+
+ b24+
+ b25+
+ b26+
+ c;
+}
+
+int main(int argc, char *argv[]) {
+ f(3);
+ return 0;
+}
diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c
new file mode 100644
index 00000000..db88588b
--- /dev/null
+++ b/test/nardino/scheduling/spille_forw.c
@@ -0,0 +1,166 @@
+#include <stdio.h>
+
+int f(int n, float * arr) {
+ float a1 = (float) n;
+ float b1 = 2.*a1;
+ float c = a1;
+ float a2 = (float) n+1;
+ float b2 = 2.*a2;
+ c += a2;
+ float a3 = (float) n+2;
+ float b3 = 2.*a3;
+ c += a3;
+ float a4 = (float) n+3;
+ float b4 = 2.*a4;
+ c += a4;
+ float a5 = (float) n+4;
+ float b5 = 2.*a5;
+ c += a5;
+ float a6 = (float) n+5;
+ float b6 = 2.*a6;
+ c += a6;
+ float a7 = (float) n+6;
+ float b7 = 2.*a7;
+ c += a7;
+ float a8 = (float) n+7;
+ float b8 = 2.*a8;
+ c += a8;
+ float a9 = (float) n+8;
+ float b9 = 2.*a9;
+ c += a9;
+ float a10 = (float) n+9;
+ float b10 = 2.*a10;
+ c += a10;
+ float a11 = (float) n+10;
+ float b11 = 2.*a11;
+ c += a11;
+ float a12 = (float) n+11;
+ float b12 = 2.*a12;
+ c += a12;
+ float a13 = (float) n+12;
+ float b13 = 2.*a13;
+ c += a13;
+ float a14 = (float) n+13;
+ float b14 = 2.*a14;
+ c += a14;
+ float a15 = (float) n+14;
+ float b15 = 2.*a15;
+ c += a15;
+ float a16 = (float) n+15;
+ float b16 = 2.*a16;
+ c += a16;
+ float a17 = (float) n+16;
+ float b17 = 2.*a17;
+ c += a17;
+ float a18 = (float) n+17;
+ float b18 = 2.*a18;
+ c += a18;
+ float a19 = (float) n+18;
+ float b19 = 2.*a19;
+ c += a19;
+ float a20 = (float) n+19;
+ float b20 = 2.*a20;
+ c += a20;
+ float a21 = (float) n+20;
+ float b21 = 2.*a21;
+ c += a21;
+ float a22 = (float) n+21;
+ float b22 = 2.*a22;
+ c += a22;
+ float a23 = (float) n+22;
+ float b23 = 2.*a23;
+ c += a23;
+ float a24 = (float) n+23;
+ float b24 = 2.*a24;
+ c += a24;
+ float a25 = (float) n+24;
+ float b25 = 2.*a25;
+ c += a25;
+ float a26 = (float) n+25;
+ float b26 = 2.*a26;
+ c += a26;
+ float a27 = (float) n+26;
+ float b27 = 2.*a27;
+ c += a27;
+ float a28 = (float) n+27;
+ float b28 = 2.*a28;
+ c += a28;
+ float a29 = (float) n+28;
+ float b29 = 2.*a29;
+ c += a29;
+ float a30 = (float) n+29;
+ float b30 = 2.*a30;
+ c += a30;
+ /* arr[0] = a1; */
+ /* arr[1] = a2; */
+ /* arr[2] = a3; */
+ /* arr[3] = a4; */
+ /* arr[4] = a5; */
+ /* arr[5] = a6; */
+ /* arr[6] = a7; */
+ /* arr[7] = a8; */
+ /* arr[8] = a9; */
+ /* arr[9] = a10; */
+ /* arr[10] = a11; */
+ /* arr[11] = a12; */
+ /* arr[12] = a13; */
+ /* arr[13] = a14; */
+ /* arr[14] = a15; */
+ /* arr[15] = a16; */
+ /* arr[16] = a17; */
+ /* arr[17] = a18; */
+ /* arr[18] = a19; */
+ /* arr[19] = a20; */
+ /* arr[20] = a21; */
+ /* arr[21] = a22; */
+ /* arr[22] = a23; */
+ /* arr[23] = a24; */
+ /* arr[24] = a25; */
+ /* arr[25] = a26; */
+ /* arr[26] = a27; */
+ /* arr[27] = a28; */
+ /* arr[28] = a29; */
+ /* arr[29] = a30; */
+ return c +
+ b1+
+ b2+
+ b3+
+ b4+
+ b5+
+ b6+
+ b7+
+ b8+
+ b9+
+ b10+
+ b11+
+ b12+
+ b13+
+ b14+
+ b15+
+ b16+
+ b17+
+ b18+
+ b19+
+ b20+
+ b21+
+ b22+
+ b23+
+ b24+
+ b25+
+ b26+
+ b27+
+ b28+
+ b29+
+ b30;
+}
+
+
+
+
+
+
+int main(int argc, char *argv[]) {
+ float arr[30];
+ f(5, arr);
+ return 0;
+}