aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr>2021-08-10 23:44:55 +0200
committerJustus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr>2021-08-10 23:44:55 +0200
commit549cc16118d0c1f80084016c4a0d73a5d43dc221 (patch)
tree795c81295026aa0101284f61fe7724ec56f19891
parent9fe2fbc02858632c3611f5a93d0d5525db9fded5 (diff)
downloadcompcert-kvx-549cc16118d0c1f80084016c4a0d73a5d43dc221.tar.gz
compcert-kvx-549cc16118d0c1f80084016c4a0d73a5d43dc221.zip
Version of get_simple_dependencies which differentiates between input
regs and input-because-they-are-live-at-the-side-exit regs
-rw-r--r--aarch64/PrepassSchedulingOracle.ml203
1 files changed, 203 insertions, 0 deletions
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 6dc8711e..49e4870c 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -213,6 +213,209 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
failwith "Ireturn none"
end seqa;
!latency_constraints;;
+let get_simple_dependencies' (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_input_mem_formality i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 0
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_input_reg_formality i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _latency) -> add_constraint j i 0
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ List.iter (fun use ->
+ add_input_reg_formality i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem_formality i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
let get_fake_deps_liveness (opweights : opweights) (seqa : (instruction*Regset.t) array) =
let last_reg_reads : int list PTree.t ref = ref PTree.empty