aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2021-09-01 16:08:57 +0200
committerLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2021-09-01 16:08:57 +0200
commit269208723faff37e6f6539b71101515b17a8a36f (patch)
tree2a52dd6fc5ae0b65b2a40a08c8e20c2eb8357ff3 /aarch64
parent1fbe45e2d1f02ef6e8fb6fe7545728a744e047b8 (diff)
parent54a22d92bc18fa3ece958a097844caa5e7b2e0c5 (diff)
downloadcompcert-kvx-269208723faff37e6f6539b71101515b17a8a36f.tar.gz
compcert-kvx-269208723faff37e6f6539b71101515b17a8a36f.zip
[MERGE] BTL into kvx-work (replacing RTLpath)
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/BTL_SEsimplify.v41
-rw-r--r--aarch64/ExpansionOracle.ml6
-rw-r--r--aarch64/PrepassSchedulingOracle.ml638
3 files changed, 261 insertions, 424 deletions
diff --git a/aarch64/BTL_SEsimplify.v b/aarch64/BTL_SEsimplify.v
new file mode 100644
index 00000000..3e930439
--- /dev/null
+++ b/aarch64/BTL_SEsimplify.v
@@ -0,0 +1,41 @@
+Require Import Coqlib Floats Values Memory.
+Require Import Integers.
+Require Import Op Registers.
+Require Import BTL_SEtheory.
+Require Import BTL_SEsimuref.
+
+(** Target op simplifications using "fake" values *)
+
+Definition target_op_simplify (op: operation) (lr: list reg) (hrs: ristate): option sval :=
+ None.
+
+Definition target_cbranch_expanse (prev: ristate) (cond: condition) (args: list reg) : option (condition * list_sval) :=
+ None.
+
+(* Main proof of simplification *)
+
+Lemma target_op_simplify_correct ctx op lr hrs fsv st args: forall
+ (H: target_op_simplify op lr hrs = Some fsv)
+ (REF: ris_refines ctx hrs st)
+ (OK0: ris_ok ctx hrs)
+ (OK1: eval_list_sval ctx (list_sval_inj (map (si_sreg st) lr)) = Some args),
+ eval_sval ctx fsv = eval_operation (cge ctx) (csp ctx) op args (cm0 ctx).
+Proof.
+ unfold target_op_simplify; simpl.
+ intros H ? ? ?.
+ congruence.
+Qed.
+
+Lemma target_cbranch_expanse_correct hrs c l ctx st c' l': forall
+ (TARGET: target_cbranch_expanse hrs c l = Some (c', l'))
+ (REF: ris_refines ctx hrs st)
+ (OK: ris_ok ctx hrs),
+ eval_scondition ctx c' l' =
+ eval_scondition ctx c (list_sval_inj (map (si_sreg st) l)).
+Proof.
+ unfold target_cbranch_expanse; simpl.
+ intros H ? ?.
+ congruence.
+Qed.
+Global Opaque target_op_simplify.
+Global Opaque target_cbranch_expanse.
diff --git a/aarch64/ExpansionOracle.ml b/aarch64/ExpansionOracle.ml
index 3b63b80d..afcb29c2 100644
--- a/aarch64/ExpansionOracle.ml
+++ b/aarch64/ExpansionOracle.ml
@@ -10,8 +10,6 @@
(* *)
(* *************************************************************)
-open RTLpathCommon
+let expanse n ibf btl = btl
-let expanse (sb : superblock) code pm = (code, pm)
-
-let find_last_node_reg c = ()
+let find_last_reg c = ()
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 03a9e202..d7e80cd9 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -1,5 +1,3 @@
-(* *************************************************************)
-(* *)
(* The Compcert verified compiler *)
(* *)
(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
@@ -12,488 +10,288 @@
(* *************************************************************)
open AST
-open RTL
+open BTL
open Maps
open InstructionScheduler
open Registers
open PrepassSchedulingOracleDeps
-
+open PrintBTL
+open DebugPrint
+
let use_alias_analysis () = false
-
-let length_of_chunk = function
-| Mint8signed
-| Mint8unsigned -> 1
-| Mint16signed
-| Mint16unsigned -> 2
-| Mint32
-| Mfloat32
-| Many32 -> 4
-| Mint64
-| Mfloat64
-| Many64 -> 8;;
-let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+let build_constraints_and_resources (opweights : opweights) seqa btl =
let last_reg_reads : int list PTree.t ref = ref PTree.empty
- and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_reg_write : (int * int) PTree.t ref = ref PTree.empty
and last_mem_reads : int list ref = ref []
and last_mem_write : int option ref = ref None
and last_branch : int option ref = ref None
- and last_non_pipelined_op : int array = Array.make
- opweights.nr_non_pipelined_units ( -1 )
+ and last_non_pipelined_op : int array =
+ Array.make opweights.nr_non_pipelined_units (-1)
and latency_constraints : latency_constraint list ref = ref [] in
let add_constraint instr_from instr_to latency =
assert (instr_from <= instr_to);
assert (latency >= 0);
- if instr_from = instr_to
- then (if latency = 0
- then ()
- else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ if instr_from = instr_to then
+ if latency = 0 then ()
+ else
+ failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop"
else
latency_constraints :=
- { instr_from = instr_from;
- instr_to = instr_to;
- latency = latency
- }:: !latency_constraints
+ { instr_from; instr_to; latency } :: !latency_constraints
and get_last_reads reg =
- match PTree.get reg !last_reg_reads
- with Some l -> l
- | None -> [] in
+ match PTree.get reg !last_reg_reads with Some l -> l | None -> []
+ in
let add_input_mem i =
- if not (use_alias_analysis ())
- then
- begin
- begin
- (* Read after write *)
- match !last_mem_write with
- | None -> ()
- | Some j -> add_constraint j i 1
- end;
- last_mem_reads := i :: !last_mem_reads
- end
- and add_output_mem i =
- if not (use_alias_analysis ())
- then
- begin
- begin
- (* Write after write *)
- match !last_mem_write with
- | None -> ()
- | Some j -> add_constraint j i 1
- end;
- (* Write after read *)
- List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
- last_mem_write := Some i;
- last_mem_reads := []
- end
- and add_input_reg i reg =
- begin
+ if not (use_alias_analysis ()) then (
(* Read after write *)
- match PTree.get reg !last_reg_write with
- | None -> ()
- | Some (j, latency) -> add_constraint j i latency
- end;
- last_reg_reads := PTree.set reg
- (i :: get_last_reads reg)
- !last_reg_reads
- and add_output_reg i latency reg =
- begin
+ (match !last_mem_write with None -> () | Some j -> add_constraint j i 1);
+ last_mem_reads := i :: !last_mem_reads)
+ and add_output_mem i =
+ if not (use_alias_analysis ()) then (
(* Write after write *)
- match PTree.get reg !last_reg_write with
- | None -> ()
- | Some (j, _) -> add_constraint j i 1
- end;
- begin
+ (match !last_mem_write with None -> () | Some j -> add_constraint j i 1);
(* Write after read *)
- List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
- end;
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := [])
+ and add_input_reg i reg =
+ (* Read after write *)
+ (match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency);
+ last_reg_reads := PTree.set reg (i :: get_last_reads reg) !last_reg_reads
+ and add_output_reg i latency reg =
+ (* Write after write *)
+ (match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1);
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg);
last_reg_write := PTree.set reg (i, latency) !last_reg_write;
last_reg_reads := PTree.remove reg !last_reg_reads
- in
+ in
let add_input_regs i regs = List.iter (add_input_reg i) regs in
let rec add_builtin_res i (res : reg builtin_res) =
match res with
| BR r -> add_output_reg i 10 r
| BR_none -> ()
- | BR_splitlong (hi, lo) -> add_builtin_res i hi;
- add_builtin_res i lo in
+ | BR_splitlong (hi, lo) ->
+ add_builtin_res i hi;
+ add_builtin_res i lo
+ in
let rec add_builtin_arg i (ba : reg builtin_arg) =
match ba with
| BA r -> add_input_reg i r
| BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
- | BA_loadstack(_,_) -> add_input_mem i
+ | BA_loadstack (_, _) -> add_input_mem i
| BA_addrstack _ -> ()
- | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_loadglobal (_, _, _) -> add_input_mem i
| BA_addrglobal _ -> ()
- | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
- add_builtin_arg i lo
- | BA_addptr(a1, a2) -> add_builtin_arg i a1;
- add_builtin_arg i a2 in
- let irreversible_action i =
- match !last_branch with
- | None -> ()
- | Some j -> add_constraint j i 1 in
+ | BA_splitlong (hi, lo) ->
+ add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr (a1, a2) ->
+ add_builtin_arg i a1;
+ add_builtin_arg i a2
+ and irreversible_action i =
+ match !last_branch with None -> () | Some j -> add_constraint j i 1
+ in
let set_branch i =
irreversible_action i;
- last_branch := Some i in
- let add_non_pipelined_resources i resources =
+ last_branch := Some i
+ and add_non_pipelined_resources i resources =
Array.iter2
(fun latency last ->
if latency >= 0 && last >= 0 then add_constraint last i latency)
resources last_non_pipelined_op;
- Array.iteri (fun rsc latency ->
- if latency >= 0
- then last_non_pipelined_op.(rsc) <- i) resources
+ Array.iteri
+ (fun rsc latency -> if latency >= 0 then last_non_pipelined_op.(rsc) <- i)
+ resources
in
Array.iteri
- begin
- fun i (insn, other_uses) ->
- List.iter (fun use ->
- add_input_reg i use)
- (Regset.elements other_uses);
-
- match insn with
- | Inop _ -> ()
- | Iop(op, inputs, output, _) ->
- add_non_pipelined_resources i
- (opweights.non_pipelined_resources_of_op op (List.length inputs));
- (if Op.is_trapping_op op then irreversible_action i);
- add_input_regs i inputs;
- add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
- | Iload(trap, chunk, addressing, addr_regs, output, _) ->
- (if trap=TRAP then irreversible_action i);
- add_input_mem i;
- add_input_regs i addr_regs;
- add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
- | Istore(chunk, addressing, addr_regs, input, _) ->
- irreversible_action i;
- add_input_regs i addr_regs;
- add_input_reg i input;
- add_output_mem i
- | Icall(signature, ef, inputs, output, _) ->
- set_branch i;
- (match ef with
+ (fun i (inst, other_uses) ->
+ List.iter (fun use -> add_input_reg i use) (Regset.elements other_uses);
+ match inst with
+ | Bnop _ -> ()
+ | Bop (op, lr, rd, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length lr));
+ if Op.is_trapping_op op then irreversible_action i;
+ add_input_regs i lr;
+ add_output_reg i (opweights.latency_of_op op (List.length lr)) rd
+ | Bload (trap, chk, addr, lr, rd, _) ->
+ if trap = TRAP then irreversible_action i;
+ add_input_mem i;
+ add_input_regs i lr;
+ add_output_reg i
+ (opweights.latency_of_load trap chk addr (List.length lr))
+ rd
+ | Bstore (chk, addr, lr, src, _) ->
+ irreversible_action i;
+ add_input_regs i lr;
+ add_input_reg i src;
+ add_output_mem i
+ | Bcond (cond, lr, BF (Bgoto s, _), ibnot, _) ->
+ set_branch i;
+ add_input_mem i;
+ add_input_regs i lr
+ | Bcond (_, _, _, _, _) ->
+ failwith "build_constraints_and_resources: invalid Bcond"
+ | BF (Bcall (signature, ef, lr, rd, _), _) ->
+ set_branch i;
+ (match ef with
| Datatypes.Coq_inl r -> add_input_reg i r
- | Datatypes.Coq_inr symbol -> ()
- );
- add_input_mem i;
- add_input_regs i inputs;
- add_output_reg i (opweights.latency_of_call signature ef) output;
- add_output_mem i;
- failwith "Icall"
- | Itailcall(signature, ef, inputs) ->
- set_branch i;
- (match ef with
+ | Datatypes.Coq_inr symbol -> ());
+ add_input_mem i;
+ add_input_regs i lr;
+ add_output_reg i (opweights.latency_of_call signature ef) rd;
+ add_output_mem i;
+ failwith "build_constraints_and_resources: invalid Bcall"
+ | BF (Btailcall (signature, ef, lr), _) ->
+ set_branch i;
+ (match ef with
| Datatypes.Coq_inl r -> add_input_reg i r
- | Datatypes.Coq_inr symbol -> ()
- );
- add_input_mem i;
- add_input_regs i inputs;
- failwith "Itailcall"
- | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
- set_branch i;
- add_input_mem i;
- List.iter (add_builtin_arg i) builtin_inputs;
- add_builtin_res i builtin_output;
- add_output_mem i;
- failwith "Ibuiltin"
- | Icond(cond, inputs, _, _, _) ->
- set_branch i;
- add_input_mem i;
- add_input_regs i inputs
- | Ijumptable(input, _) ->
- set_branch i;
- add_input_reg i input;
- failwith "Ijumptable"
- | Ireturn(Some input) ->
- set_branch i;
- add_input_reg i input;
- failwith "Ireturn"
- | Ireturn(None) ->
- set_branch i;
- failwith "Ireturn none"
- end seqa;
- !latency_constraints;;
+ | Datatypes.Coq_inr symbol -> ());
+ add_input_mem i;
+ add_input_regs i lr;
+ failwith "build_constraints_and_resources: invalid Btailcall"
+ | BF (Bbuiltin (ef, lr, rd, _), _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) lr;
+ add_builtin_res i rd;
+ add_output_mem i;
+ failwith "build_constraints_and_resources: invalid Bbuiltin"
+ | BF (Bjumptable (lr, _), _) ->
+ set_branch i;
+ add_input_reg i lr;
+ failwith "build_constraints_and_resources: invalid Bjumptable"
+ | BF (Breturn (Some r), _) ->
+ set_branch i;
+ add_input_reg i r;
+ failwith "build_constraints_and_resources: invalid Breturn Some"
+ | BF (Breturn None, _) ->
+ set_branch i;
+ failwith "build_constraints_and_resources: invalid Breturn None"
+ | BF (Bgoto _, _) ->
+ failwith "build_constraints_and_resources: invalid Bgoto"
+ | Bseq (_, _) -> failwith "build_constraints_and_resources: Bseq")
+ seqa;
+ !latency_constraints
let resources_of_instruction (opweights : opweights) = function
- | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
- | Iop(op, inputs, output, _) ->
- opweights.resources_of_op op (List.length inputs)
- | Iload(trap, chunk, addressing, addr_regs, output, _) ->
- opweights.resources_of_load trap chunk addressing (List.length addr_regs)
- | Istore(chunk, addressing, addr_regs, input, _) ->
- opweights.resources_of_store chunk addressing (List.length addr_regs)
- | Icall(signature, ef, inputs, output, _) ->
- opweights.resources_of_call signature ef
- | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
- opweights.resources_of_builtin ef
- | Icond(cond, args, _, _ , _) ->
- opweights.resources_of_cond cond (List.length args)
- | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
-
-let print_sequence pp (seqa : instruction array) =
- Array.iteri (
- fun i (insn : instruction) ->
- PrintRTL.print_instruction pp (i, insn)) seqa;;
-
-type unique_id = int
-
-type 'a symbolic_term_node =
- | STop of Op.operation * 'a list
- | STinitial_reg of int
- | STother of int;;
-
-type symbolic_term = {
- hash_id : unique_id;
- hash_ct : symbolic_term symbolic_term_node
- };;
+ | Bnop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+ | Bop (op, inputs, output, _) ->
+ opweights.resources_of_op op (List.length inputs)
+ | Bload (trap, chunk, addressing, addr_regs, output, _) ->
+ opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+ | Bstore (chunk, addressing, addr_regs, input, _) ->
+ opweights.resources_of_store chunk addressing (List.length addr_regs)
+ | BF (Bcall (signature, ef, inputs, output, _), _) ->
+ opweights.resources_of_call signature ef
+ | BF (Bbuiltin (ef, builtin_inputs, builtin_output, _), _) ->
+ opweights.resources_of_builtin ef
+ | Bcond (cond, args, _, _, _) ->
+ opweights.resources_of_cond cond (List.length args)
+ | BF (Btailcall _, _) | BF (Bjumptable _, _) | BF (Breturn _, _) ->
+ opweights.pipelined_resource_bounds
+ | BF (Bgoto _, _) | Bseq (_, _) ->
+ failwith "resources_of_instruction: invalid btl instruction"
-let rec print_term channel term =
- match term.hash_ct with
- | STop(op, args) ->
- PrintOp.print_operation print_term channel (op, args)
- | STinitial_reg n -> Printf.fprintf channel "x%d" n
- | STother n -> Printf.fprintf channel "y%d" n;;
-
-type symbolic_term_table = {
- st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
- mutable st_next_id : unique_id };;
-
-let hash_init () = {
- st_table = Hashtbl.create 20;
- st_next_id = 0
- };;
-
-let ground_to_id = function
- | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
- | STinitial_reg r -> STinitial_reg r
- | STother i -> STother i;;
-
-let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
- let grounded = ground_to_id term in
- match Hashtbl.find_opt table.st_table grounded with
- | Some x -> x
- | None ->
- let term' = { hash_id = table.st_next_id;
- hash_ct = term } in
- (if table.st_next_id = max_int then failwith "hash: max_int");
- table.st_next_id <- table.st_next_id + 1;
- Hashtbl.add table.st_table grounded term';
- term';;
-
-type access = {
- base : symbolic_term;
- offset : int64;
- length : int
- };;
-
-let term_equal a b = (a.hash_id = b.hash_id);;
-
-let access_of_addressing get_reg chunk addressing args =
- match addressing, args with
- | (Op.Aindexed ofs), [reg] -> Some
- { base = get_reg reg;
- offset = Camlcoq.camlint64_of_ptrofs ofs;
- length = length_of_chunk chunk
- }
- | _, _ -> None ;;
-(* TODO: global *)
-
-let symbolic_execution (seqa : instruction array) =
- let regs = ref PTree.empty
- and table = hash_init() in
- let assign reg term = regs := PTree.set reg term !regs
- and hash term = hash_node table term in
- let get_reg reg =
- match PTree.get reg !regs with
- | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
- | Some x -> x in
- let targets = Array.make (Array.length seqa) None in
- Array.iteri
- begin
- fun i insn ->
- match insn with
- | Iop(Op.Omove, [input], output, _) ->
- assign output (get_reg input)
- | Iop(op, inputs, output, _) ->
- assign output (hash (STop(op, List.map get_reg inputs)))
-
- | Iload(trap, chunk, addressing, args, output, _) ->
- let access = access_of_addressing get_reg chunk addressing args in
- targets.(i) <- access;
- assign output (hash (STother(i)))
-
- | Icall(_, _, _, output, _)
- | Ibuiltin(_, _, BR output, _) ->
- assign output (hash (STother(i)))
-
- | Istore(chunk, addressing, args, va, _) ->
- let access = access_of_addressing get_reg chunk addressing args in
- targets.(i) <- access
-
- | Inop _ -> ()
- | Ibuiltin(_, _, BR_none, _) -> ()
- | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
-
- | Itailcall (_, _, _)
- |Icond (_, _, _, _, _)
- |Ijumptable (_, _)
- |Ireturn _ -> ()
- end seqa;
- targets;;
-
-let print_access channel = function
- | None -> Printf.fprintf channel "any"
- | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
-
-let print_targets channel seqa =
- let targets = symbolic_execution seqa in
+let print_sequence pp seqa =
Array.iteri
- (fun i insn ->
- match insn with
- | Iload _ -> Printf.fprintf channel "%d: load %a\n"
- i print_access targets.(i)
- | Istore _ -> Printf.fprintf channel "%d: store %a\n"
- i print_access targets.(i)
- | _ -> ()
- ) seqa;;
-
-let may_overlap a0 b0 =
- match a0, b0 with
- | (None, _) | (_ , None) -> true
- | (Some a), (Some b) ->
- if term_equal a.base b.base
- then (max a.offset b.offset) <
- (min (Int64.add (Int64.of_int a.length) a.offset)
- (Int64.add (Int64.of_int b.length) b.offset))
- else match a.base.hash_ct, b.base.hash_ct with
- | STop(Op.Oaddrsymbol(ida, ofsa),[]),
- STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
- (ida=idb) &&
- let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
- and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
- (max ao bo) <
- (min (Int64.add (Int64.of_int a.length) ao)
- (Int64.add (Int64.of_int b.length) bo))
- | STop(Op.Oaddrstack _, []),
- STop(Op.Oaddrsymbol _, [])
- | STop(Op.Oaddrsymbol _, []),
- STop(Op.Oaddrstack _, []) -> false
- | STop(Op.Oaddrstack(ofsa),[]),
- STop(Op.Oaddrstack(ofsb),[]) ->
- let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
- and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
- (max ao bo) <
- (min (Int64.add (Int64.of_int a.length) ao)
- (Int64.add (Int64.of_int b.length) bo))
- | _ -> true;;
+ (fun i (inst, other_uses) ->
+ debug "i=%d\n inst = " i;
+ print_btl_inst pp inst;
+ debug "\n other_uses=";
+ print_regset other_uses;
+ debug "\n")
+ seqa
-(*
-(* TODO suboptimal quadratic algorithm *)
-let get_alias_dependencies seqa =
- let targets = symbolic_execution seqa
- and deps = ref [] in
- let add_constraint instr_from instr_to latency =
- deps := { instr_from = instr_from;
- instr_to = instr_to;
- latency = latency
- }:: !deps in
- for i=0 to (Array.length seqa)-1
- do
- for j=0 to i-1
- do
- match seqa.(j), seqa.(i) with
- | (Istore _), ((Iload _) | (Istore _)) ->
- if may_overlap targets.(j) targets.(i)
- then add_constraint j i 1
- | (Iload _), (Istore _) ->
- if may_overlap targets.(j) targets.(i)
- then add_constraint j i 0
- | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
- | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
- add_constraint j i 1
- | (Inop _ | Iop _), _
- | _, (Inop _ | Iop _)
- | (Iload _), (Iload _) -> ()
- done
- done;
- !deps;;
- *)
+let length_of_chunk = function
+ | Mint8signed | Mint8unsigned -> 1
+ | Mint16signed | Mint16unsigned -> 2
+ | Mint32 | Mfloat32 | Many32 -> 4
+ | Mint64 | Mfloat64 | Many64 -> 8
let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
- (typing : RTLtyping.regenv) reference_counting seqa =
- let simple_deps = get_simple_dependencies opweights seqa in
- { max_latency = -1;
+ (typing : RTLtyping.regenv) reference_counting seqa btl =
+ let simple_deps = build_constraints_and_resources opweights seqa btl in
+ {
+ max_latency = -1;
resource_bounds = opweights.pipelined_resource_bounds;
live_regs_entry = live_entry_regs;
- typing = typing;
+ typing;
reference_counting = Some reference_counting;
- instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
- latency_constraints =
- (* if (use_alias_analysis ())
- then (get_alias_dependencies seqa) @ simple_deps
- else *) simple_deps };;
+ instruction_usages =
+ Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ latency_constraints = simple_deps;
+ }
let zigzag_scheduler problem early_ones =
let nr_instructions = get_nr_instructions problem in
- assert(nr_instructions = (Array.length early_ones));
+ assert (nr_instructions = Array.length early_ones);
match list_scheduler problem with
| Some fwd_schedule ->
- let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
- let constraints' = ref problem.latency_constraints in
- Array.iteri (fun i is_early ->
- if is_early then
- constraints' := {
- instr_from = i;
- instr_to = nr_instructions ;
- latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
- early_ones;
- validated_scheduler reverse_list_scheduler
- { problem with latency_constraints = !constraints' }
- | None -> None;;
-
-let prepass_scheduler_by_name name problem early_ones =
+ let fwd_makespan = fwd_schedule.(Array.length fwd_schedule - 1) in
+ let constraints' = ref problem.latency_constraints in
+ Array.iteri
+ (fun i is_early ->
+ if is_early then
+ constraints' :=
+ {
+ instr_from = i;
+ instr_to = nr_instructions;
+ latency = fwd_makespan - fwd_schedule.(i);
+ }
+ :: !constraints')
+ early_ones;
+ validated_scheduler reverse_list_scheduler
+ { problem with latency_constraints = !constraints' }
+ | None -> None
+
+let prepass_scheduler_by_name name problem seqa =
match name with
- | "zigzag" -> zigzag_scheduler problem early_ones
+ | "zigzag" ->
+ let early_ones =
+ Array.map
+ (fun (inst, _) ->
+ match inst with Bcond (_, _, _, _, _) -> true | _ -> false)
+ seqa
+ in
+ zigzag_scheduler problem early_ones
| _ -> scheduler_by_name name problem
-
-let schedule_sequence (seqa : (instruction*Regset.t) array)
- (live_regs_entry : Registers.Regset.t)
- (typing : RTLtyping.regenv)
- reference =
+
+let schedule_sequence seqa btl (live_regs_entry : Registers.Regset.t)
+ (typing : RTLtyping.regenv) reference =
let opweights = OpWeights.get_opweights () in
try
- if (Array.length seqa) <= 1
- then None
+ if Array.length seqa <= 1 then None
else
- begin
let nr_instructions = Array.length seqa in
- (if !Clflags.option_debug_compcert > 6
- then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
- let problem = define_problem opweights live_regs_entry
- typing reference seqa in
- (if !Clflags.option_debug_compcert > 7
- then (print_sequence stdout (Array.map fst seqa);
- print_problem stdout problem));
- match prepass_scheduler_by_name
- (!Clflags.option_fprepass_sched)
- problem
- (Array.map (fun (ins, _) ->
- match ins with
- | Icond _ -> true
- | _ -> false) seqa) with
- | None -> Printf.printf "no solution in prepass scheduling\n";
- Stdlib.exit 1
- (* TODO None *)
+ if !Clflags.option_debug_compcert > 6 then
+ Printf.printf "prepass scheduling length = %d\n" nr_instructions;
+ let problem =
+ define_problem opweights live_regs_entry typing reference seqa btl
+ in
+ if !Clflags.option_debug_compcert > 7 then (
+ print_sequence stdout seqa;
+ print_problem stdout problem);
+ match
+ prepass_scheduler_by_name !Clflags.option_fprepass_sched problem seqa
+ with
+ | None ->
+ Printf.printf "no solution in prepass scheduling\n";
+ None
| Some solution ->
- let positions = Array.init nr_instructions (fun i -> i) in
- Array.sort (fun i j ->
- let si = solution.(i) and sj = solution.(j) in
- if si < sj then -1
- else if si > sj then 1
- else i - j) positions;
- Some positions
- end
- with (Failure s) ->
+ let positions = Array.init nr_instructions (fun i -> i) in
+ Array.sort
+ (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1 else if si > sj then 1 else i - j)
+ positions;
+ Some positions
+ with Failure s ->
Printf.printf "failure in prepass scheduling: %s\n" s;
- None;;
-
+ None