aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2020-11-03 18:10:54 +0100
committerLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2020-11-03 18:10:54 +0100
commitcdb54160ff67bef3ab40e3cc85416f2c897ac82b (patch)
tree1eb072dbdc2125d817f8a11f49170db45e9a6cd8 /aarch64
parent72a7d353cb1101a8bfcbbb3836814fe2f55a8b01 (diff)
downloadcompcert-kvx-cdb54160ff67bef3ab40e3cc85416f2c897ac82b.tar.gz
compcert-kvx-cdb54160ff67bef3ab40e3cc85416f2c897ac82b.zip
Dumb (identity) scheduling working and integrated
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Asmgen.v4
-rw-r--r--aarch64/Asmgenproof.v18
-rw-r--r--aarch64/InstructionScheduler.ml1263
-rw-r--r--aarch64/InstructionScheduler.mli113
-rw-r--r--aarch64/OpWeightsAsm.ml212
-rw-r--r--aarch64/PostpassSchedulingOracle.ml1611
-rw-r--r--aarch64/PostpassSchedulingproof.v4
7 files changed, 2356 insertions, 869 deletions
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index c25454e0..bd6b55cc 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -360,5 +360,5 @@ Definition transf_program (p: Mach.program) : res Asm.program :=
let mbp := Machblockgen.transf_program p in
do mbp' <- PseudoAsmblockproof.transf_program mbp;
do abp <- Asmblockgen.transf_program mbp';
- (*do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;*)
- Asmblock_TRANSF.transf_program abp.
+ do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;
+ Asmblock_TRANSF.transf_program abp'.
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index 62b65a14..d062654b 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -18,7 +18,7 @@
Require Import Coqlib Errors.
Require Import Integers Floats AST Linking.
Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations Machblock Conventions PseudoAsmblock Asm Asmblock.
+Require Import Op Locations Machblock Conventions PseudoAsmblock PseudoAsmblockproof Asm Asmblock.
Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof.
Require Import Asmgen.
Require Import Axioms.
@@ -2321,7 +2321,7 @@ Definition block_passes :=
mkpass Machblockgenproof.match_prog
::: mkpass PseudoAsmblockproof.match_prog
::: mkpass Asmblockgenproof.match_prog
- (*::: mkpass PostpassSchedulingproof.match_prog*)
+ ::: mkpass PostpassSchedulingproof.match_prog
::: mkpass Asmblock_PRESERVATION.match_prog
::: pass_nil _.
@@ -2333,11 +2333,13 @@ Proof.
intros p tp H.
unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H.
inversion_clear H. apply bind_inversion in H1. destruct H1.
- inversion_clear H. inversion H2. remember (Machblockgen.transf_program p) as mbp.
+ inversion_clear H. apply bind_inversion in H2. destruct H2. inversion_clear H.
+ unfold Compopts.time in *. remember (Machblockgen.transf_program p) as mbp.
unfold match_prog; simpl.
exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
exists x; split. apply PseudoAsmblockproof.transf_program_match; auto.
exists x0; split. apply Asmblockgenproof.transf_program_match; auto.
+ exists x1; split. apply PostpassSchedulingproof.transf_program_match; auto.
exists tp; split. apply Asmblock_PRESERVATION.transf_program_match; auto. auto.
Qed.
@@ -2364,19 +2366,21 @@ Let tge := Genv.globalenv tprog.
Theorem transf_program_correct:
forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
Proof.
+Admitted.
+(* TODO
unfold match_prog in TRANSF. simpl in TRANSF.
- inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H.
+ inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H. inv H4. inv H.
eapply compose_forward_simulations.
{ exploit Machblockgenproof.transf_program_correct; eauto. }
eapply compose_forward_simulations.
+ apply PseudoAsmblockproof.transf_program_correct; eauto.
- intros; apply Asmblockgenproof.next_progress.
- - intros; eapply Asmblockgenproof.functions_bound_max_pos; eauto.
- { intros; eapply Asmblock_PRESERVATION.symbol_high_low; eauto. }
+ - intros. eapply .functions_bound_max_pos; eauto.
+ { intros. eapply Asmblock_PRESERVATION.symbol_high_low. eauto. }
+ eapply compose_forward_simulations. apply Asmblockgenproof.transf_program_correct; eauto.
{ intros; eapply Asmblock_PRESERVATION.symbol_high_low; eauto. }
apply Asmblock_PRESERVATION.transf_program_correct. eauto.
-Qed.
+Qed.*)
End PRESERVATION.
diff --git a/aarch64/InstructionScheduler.ml b/aarch64/InstructionScheduler.ml
new file mode 100644
index 00000000..eab0b21a
--- /dev/null
+++ b/aarch64/InstructionScheduler.ml
@@ -0,0 +1,1263 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+let with_destructor dtor stuff f =
+ try let ret = f stuff in
+ dtor stuff;
+ ret
+ with exn -> dtor stuff;
+ raise exn;;
+
+let with_out_channel chan f = with_destructor close_out chan f;;
+let with_in_channel chan f = with_destructor close_in chan f;;
+
+(** Schedule instructions on a synchronized pipeline
+@author David Monniaux, CNRS, VERIMAG *)
+
+type latency_constraint = {
+ instr_from : int;
+ instr_to : int;
+ latency : int };;
+
+type problem = {
+ max_latency : int;
+ resource_bounds : int array;
+ instruction_usages : int array array;
+ latency_constraints : latency_constraint list;
+ };;
+
+let print_problem channel problem =
+ (if problem.max_latency >= 0
+ then Printf.fprintf channel "max makespan: %d\n" problem.max_latency);
+ output_string channel "resource bounds:";
+ (Array.iter (fun b -> Printf.fprintf channel " %d" b) problem.resource_bounds);
+ output_string channel ";\n";
+ (Array.iteri (fun i v ->
+ Printf.fprintf channel "instr%d:" i;
+ (Array.iter (fun b -> Printf.fprintf channel " %d" b) v);
+ output_string channel ";\n") problem.instruction_usages);
+ List.iter (fun instr ->
+ Printf.printf "t%d - t%d >= %d;\n"
+ instr.instr_to instr.instr_from instr.latency)
+ problem.latency_constraints;;
+
+let get_nr_instructions problem = Array.length problem.instruction_usages;;
+let get_nr_resources problem = Array.length problem.resource_bounds;;
+
+type solution = int array
+type scheduler = problem -> solution option
+
+(* DISABLED
+(** Schedule the problem optimally by constraint solving using the Gecode solver. *)
+external gecode_scheduler : problem -> solution option =
+ "caml_gecode_schedule_instr";;
+ *)
+
+let maximum_slot_used times =
+ let maxi = ref (-1) in
+ for i=0 to (Array.length times)-2
+ do
+ maxi := max !maxi times.(i)
+ done;
+ !maxi;;
+
+let check_schedule (problem : problem) (times : solution) =
+ let nr_instructions = get_nr_instructions problem in
+ (if Array.length times <> nr_instructions+1
+ then failwith
+ (Printf.sprintf "check_schedule: %d times expected, got %d"
+ (nr_instructions+1) (Array.length times)));
+ (if problem.max_latency >= 0 && times.(nr_instructions)> problem.max_latency
+ then failwith "check_schedule: max_latency exceeded");
+ (Array.iteri (fun i time ->
+ (if time < 0
+ then failwith (Printf.sprintf "time[%d] < 0" i))) times);
+ let slot_resources = Array.init ((maximum_slot_used times)+1)
+ (fun _ -> Array.copy problem.resource_bounds) in
+ for i=0 to nr_instructions -1
+ do
+ let remaining_resources = slot_resources.(times.(i))
+ and used_resources = problem.instruction_usages.(i) in
+ for resource=0 to (Array.length used_resources)-1
+ do
+ let after = remaining_resources.(resource) - used_resources.(resource) in
+ (if after < 0
+ then failwith (Printf.sprintf "check_schedule: instruction %d exceeds resource %d at slot %d" i resource times.(i)));
+ remaining_resources.(resource) <- after
+ done
+ done;
+ List.iter (fun ctr ->
+ if times.(ctr.instr_to) - times.(ctr.instr_from) < ctr.latency
+ then failwith (Printf.sprintf "check_schedule: time[%d]=%d - time[%d]=%d < %d"
+ ctr.instr_to times.(ctr.instr_to)
+ ctr.instr_from times.(ctr.instr_from)
+ ctr.latency)
+ ) problem.latency_constraints;;
+
+let bound_max_time problem =
+ let total = ref(Array.length problem.instruction_usages) in
+ List.iter (fun ctr -> total := !total + ctr.latency) problem.latency_constraints;
+ !total;;
+
+let vector_less_equal a b =
+ try
+ Array.iter2 (fun x y ->
+ if x>y
+ then raise Exit) a b;
+ true
+ with Exit -> false;;
+
+let vector_subtract a b =
+ assert ((Array.length a) = (Array.length b));
+ for i=0 to (Array.length a)-1
+ do
+ b.(i) <- b.(i) - a.(i)
+ done;;
+
+(* The version with critical path ordering is much better! *)
+type list_scheduler_order =
+ | INSTRUCTION_ORDER
+ | CRITICAL_PATH_ORDER;;
+
+let int_max (x : int) (y : int) =
+ if x > y then x else y;;
+
+let int_min (x : int) (y : int) =
+ if x < y then x else y;;
+
+let get_predecessors problem =
+ let nr_instructions = get_nr_instructions problem in
+ let predecessors = Array.make (nr_instructions+1) [] in
+ List.iter (fun ctr ->
+ predecessors.(ctr.instr_to) <-
+ (ctr.instr_from, ctr.latency)::predecessors.(ctr.instr_to))
+ problem.latency_constraints;
+ predecessors;;
+
+let get_successors problem =
+ let nr_instructions = get_nr_instructions problem in
+ let successors = Array.make nr_instructions [] in
+ List.iter (fun ctr ->
+ successors.(ctr.instr_from) <-
+ (ctr.instr_to, ctr.latency)::successors.(ctr.instr_from))
+ problem.latency_constraints;
+ successors;;
+
+let critical_paths successors =
+ let nr_instructions = Array.length successors in
+ let path_lengths = Array.make nr_instructions (-1) in
+ let rec compute i =
+ if i=nr_instructions then 0 else
+ match path_lengths.(i) with
+ | -2 -> failwith "InstructionScheduler: the dependency graph has cycles"
+ | -1 -> path_lengths.(i) <- -2;
+ let x = List.fold_left
+ (fun cur (j, latency)-> int_max cur (latency+(compute j)))
+ 1 successors.(i)
+ in path_lengths.(i) <- x; x
+ | x -> x
+ in for i = nr_instructions-1 downto 0
+ do
+ ignore (compute i)
+ done;
+ path_lengths;;
+
+let maximum_critical_path problem =
+ let paths = critical_paths (get_successors problem) in
+ Array.fold_left int_max 0 paths;;
+
+let get_earliest_dates predecessors =
+ let nr_instructions = (Array.length predecessors)-1 in
+ let path_lengths = Array.make (nr_instructions+1) (-1) in
+ let rec compute i =
+ match path_lengths.(i) with
+ | -2 -> failwith "InstructionScheduler: the dependency graph has cycles"
+ | -1 -> path_lengths.(i) <- -2;
+ let x = List.fold_left
+ (fun cur (j, latency)-> int_max cur (latency+(compute j)))
+ 0 predecessors.(i)
+ in path_lengths.(i) <- x; x
+ | x -> x
+ in for i = 0 to nr_instructions
+ do
+ ignore (compute i)
+ done;
+ for i = 0 to nr_instructions - 1
+ do
+ path_lengths.(nr_instructions) <- int_max
+ path_lengths.(nr_instructions) (1 + path_lengths.(i))
+ done;
+ path_lengths;;
+
+exception Unschedulable
+
+let get_latest_dates deadline successors =
+ let nr_instructions = Array.length successors
+ and path_lengths = critical_paths successors in
+ Array.init (nr_instructions + 1)
+ (fun i ->
+ if i < nr_instructions then
+ let path_length = path_lengths.(i) in
+ assert (path_length >= 1);
+ (if path_length > deadline
+ then raise Unschedulable);
+ deadline - path_length
+ else deadline);;
+
+let priority_list_scheduler (order : list_scheduler_order)
+ (problem : problem) :
+ solution option =
+ let nr_instructions = get_nr_instructions problem in
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+
+ let priorities = match order with
+ | INSTRUCTION_ORDER -> None
+ | CRITICAL_PATH_ORDER -> Some (critical_paths successors) in
+
+ let module InstrSet =
+ Set.Make (struct type t=int
+ let compare = match priorities with
+ | None -> (fun x y -> x - y)
+ | Some p -> (fun x y ->
+ (match p.(y)-p.(x) with
+ | 0 -> x - y
+ | z -> z))
+ end) in
+
+ let max_time = bound_max_time problem in
+ let ready = Array.make max_time InstrSet.empty in
+ Array.iteri (fun i preds ->
+ if i<nr_instructions && preds=[]
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and current_resources = Array.copy problem.resource_bounds
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert(!time >= 0);
+ !time
+ with Exit -> -1
+
+ in
+ let advance_time() =
+ begin
+ (if !current_time < max_time-1
+ then
+ begin
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
+ ready.(!current_time) <- InstrSet.empty;
+ end);
+ incr current_time
+ end in
+
+ let attempt_scheduling ready usages =
+ let result = ref (-1) in
+ try
+ InstrSet.iter (fun i ->
+ (* Printf.printf "trying scheduling %d\n" i;
+ pr int_vector usages.(i);
+ print _vector current_resources; *)
+ if vector_less_equal usages.(i) current_resources
+ then
+ begin
+ vector_subtract usages.(i) current_resources;
+ result := i;
+ raise Exit
+ end) ready;
+ -1
+ with Exit -> !result in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i ->
+ begin
+ assert(times.(i) < 0);
+ times.(i) <- !current_time;
+ ready.(!current_time) <- InstrSet.remove i (ready.(!current_time));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ready.(to_time) <- InstrSet.add instr_to ready.(to_time))
+ successors.(i);
+ successors.(i) <- []
+ end
+ done;
+ try
+ let final_time = ref (-1) in
+ for i=0 to nr_instructions-1
+ do
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i)+1 then final_time := times.(i)+1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time
+ then final_time := target_time
+ ) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ Some times
+ with Exit -> None;;
+
+let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
+
+(* dummy code for placating ocaml's warnings *)
+let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
+
+type bundle = int list;;
+
+let rec extract_deps_to index = function
+ | [] -> []
+ | dep :: deps -> let extracts = extract_deps_to index deps in
+ if (dep.instr_to == index) then
+ dep :: extracts
+ else
+ extracts
+
+exception InvalidBundle;;
+
+let dependency_check problem bundle index =
+ let index_deps = extract_deps_to index problem.latency_constraints in
+ List.iter (fun i ->
+ List.iter (fun dep ->
+ if (dep.instr_from == i) then raise InvalidBundle
+ ) index_deps
+ ) bundle;;
+
+let rec make_bundle problem resources bundle index =
+ let resources_copy = Array.copy resources in
+ let nr_instructions = get_nr_instructions problem in
+ if (index >= nr_instructions) then (bundle, index+1) else
+ let inst_usage = problem.instruction_usages.(index) in
+ try match vector_less_equal inst_usage resources with
+ | false -> raise InvalidBundle
+ | true -> (
+ dependency_check problem bundle index;
+ vector_subtract problem.instruction_usages.(index) resources_copy;
+ make_bundle problem resources_copy (index::bundle) (index+1)
+ )
+ with InvalidBundle -> (bundle, index);;
+
+let rec make_bundles problem index : bundle list =
+ if index >= get_nr_instructions problem then
+ []
+ else
+ let (bundle, new_index) = make_bundle problem problem.resource_bounds [] index in
+ bundle :: (make_bundles problem new_index);;
+
+let bundles_to_schedule problem bundles : solution =
+ let nr_instructions = get_nr_instructions problem in
+ let schedule = Array.make (nr_instructions+1) (nr_instructions+4) in
+ let time = ref 0 in
+ List.iter (fun bundle ->
+ begin
+ List.iter (fun i ->
+ schedule.(i) <- !time
+ ) bundle;
+ time := !time + 1
+ end
+ ) bundles; schedule;;
+
+let greedy_scheduler (problem : problem) : solution option =
+ let bundles = make_bundles problem 0 in
+ Some (bundles_to_schedule problem bundles);;
+
+(* alternate implementation
+let swap_array_elements a i j =
+ let x = a.(i) in
+ a.(i) <- a.(j);
+ a.(j) <- x;;
+
+let array_reverse_slice a first last =
+ let i = ref first and j = ref last in
+ while i < j
+ do
+ swap_array_elements a !i !j;
+ incr i;
+ decr j
+ done;;
+
+let array_reverse a =
+ let a' = Array.copy a in
+ array_reverse_slice a' 0 ((Array.length a)-1);
+ a';;
+ *)
+
+(* unneeded
+let array_reverse a =
+ let n=Array.length a in
+ Array.init n (fun i -> a.(n-1-i));;
+ *)
+
+let reverse_constraint nr_instructions ctr =
+ { instr_to = nr_instructions -ctr.instr_from;
+ instr_from = nr_instructions - ctr.instr_to;
+ latency = ctr.latency };;
+
+(* unneeded
+let rec list_map_filter f = function
+ | [] -> []
+ | h::t ->
+ (match f h with
+ | None -> list_map_filter f t
+ | Some x -> x :: (list_map_filter f t));;
+ *)
+
+let reverse_problem problem =
+ let nr_instructions = get_nr_instructions problem in
+ {
+ max_latency = problem.max_latency;
+ resource_bounds = problem.resource_bounds;
+ instruction_usages = Array.init (nr_instructions + 1)
+ (fun i ->
+ if i=0
+ then Array.map (fun _ -> 0) problem.resource_bounds else problem.instruction_usages.(nr_instructions - i));
+ latency_constraints = List.map (reverse_constraint nr_instructions)
+ problem.latency_constraints
+ };;
+
+let max_scheduled_time solution =
+ let time = ref (-1) in
+ for i = 0 to ((Array.length solution) - 2)
+ do
+ time := max !time solution.(i)
+ done;
+ !time;;
+
+(*
+let recompute_makespan problem solution =
+ let n = (Array.length solution) - 1 and ms = ref 0 in
+ List.iter (fun cstr ->
+ if cstr.instr_to = n
+ then ms := max !ms (solution.(cstr.instr_from) + cstr.latency)
+ ) problem.latency_constraints;
+ !ms;;
+ *)
+
+let schedule_reversed (scheduler : problem -> solution option)
+ (problem : problem) =
+ match scheduler (reverse_problem problem) with
+ | None -> None
+ | Some solution ->
+ let nr_instructions = get_nr_instructions problem in
+ let makespan = max_scheduled_time solution in
+ let ret = Array.init (nr_instructions + 1)
+ (fun i -> makespan-solution.(nr_instructions-i)) in
+ ret.(nr_instructions) <- max ((max_scheduled_time ret) + 1)
+ (ret.(nr_instructions));
+ Some ret;;
+
+(** Schedule the problem using a greedy list scheduling algorithm, from the end. *)
+let reverse_list_scheduler = schedule_reversed list_scheduler;;
+
+let check_problem problem =
+ (if (Array.length problem.instruction_usages) < 1
+ then failwith "length(problem.instruction_usages) < 1");;
+
+let validated_scheduler (scheduler : problem -> solution option)
+ (problem : problem) =
+ check_problem problem;
+ match scheduler problem with
+ | None -> None
+ | (Some solution) as ret -> check_schedule problem solution; ret;;
+
+let get_max_latency solution =
+ solution.((Array.length solution)-1);;
+
+let show_date_ranges problem =
+ let deadline = problem.max_latency in
+ assert(deadline >= 0);
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem in
+ let earliest_dates : int array = get_earliest_dates predecessors
+ and latest_dates : int array = get_latest_dates deadline successors in
+ assert ((Array.length earliest_dates) =
+ (Array.length latest_dates));
+ Array.iteri (fun i early ->
+ let late = latest_dates.(i) in
+ Printf.printf "t[%d] in %d..%d\n" i early late)
+ earliest_dates;;
+
+type pseudo_boolean_problem_type =
+ | SATISFIABILITY
+ | OPTIMIZATION;;
+
+type pseudo_boolean_mapper = {
+ mapper_pb_type : pseudo_boolean_problem_type;
+ mapper_nr_instructions : int;
+ mapper_nr_pb_variables : int;
+ mapper_earliest_dates : int array;
+ mapper_latest_dates : int array;
+ mapper_var_offsets : int array;
+ mapper_final_predecessors : (int * int) list
+};;
+
+(* Latency constraints are:
+ presence of instr-to at each t <= sum of presences of instr-from at compatible times
+
+ if reverse_encoding
+ presence of instr-from at each t <= sum of presences of instr-to at compatible times *)
+
+(* Experiments show reverse_encoding=true multiplies time by 2 in sat4j
+ without making hard instances easier *)
+let direct_encoding = false
+and reverse_encoding = false
+and delta_encoding = true
+
+let pseudo_boolean_print_problem channel problem pb_type =
+ let deadline = problem.max_latency in
+ assert (deadline > 0);
+ let nr_instructions = get_nr_instructions problem
+ and nr_resources = get_nr_resources problem
+ and successors = get_successors problem
+ and predecessors = get_predecessors problem in
+ let earliest_dates = get_earliest_dates predecessors
+ and latest_dates = get_latest_dates deadline successors in
+ let var_offsets = Array.make
+ (match pb_type with
+ | OPTIMIZATION -> nr_instructions+1
+ | SATISFIABILITY -> nr_instructions) 0 in
+ let nr_pb_variables =
+ (let nr = ref 0 in
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ var_offsets.(i) <- !nr;
+ nr := !nr + latest_dates.(i) - earliest_dates.(i) + 1
+ done;
+ !nr)
+ and nr_pb_constraints =
+ (match pb_type with
+ | OPTIMIZATION -> nr_instructions+1
+ | SATISFIABILITY -> nr_instructions) +
+
+ (let count = ref 0 in
+ for t=0 to deadline-1
+ do
+ for j=0 to nr_resources-1
+ do
+ try
+ for i=0 to nr_instructions-1
+ do
+ let usage = problem.instruction_usages.(i).(j) in
+ if t >= earliest_dates.(i) && t <= latest_dates.(i)
+ && usage > 0 then raise Exit
+ done
+ with Exit -> incr count
+ done
+ done;
+ !count) +
+
+ (let count=ref 0 in
+ List.iter
+ (fun ctr ->
+ if ctr.instr_to < nr_instructions
+ then count := !count + 1 + latest_dates.(ctr.instr_to)
+ - earliest_dates.(ctr.instr_to)
+ + (if reverse_encoding
+ then 1 + latest_dates.(ctr.instr_from)
+ - earliest_dates.(ctr.instr_from)
+ else 0)
+ )
+ problem.latency_constraints;
+ !count) +
+
+ (match pb_type with
+ | OPTIMIZATION -> (1 + deadline - earliest_dates.(nr_instructions)) * nr_instructions
+ | SATISFIABILITY -> 0)
+ and measured_nr_constraints = ref 0 in
+
+ let pb_var i t =
+ assert(t >= earliest_dates.(i));
+ assert(t <= latest_dates.(i));
+ let v = 1+var_offsets.(i)+t-earliest_dates.(i) in
+ assert(v <= nr_pb_variables);
+ Printf.sprintf "x%d" v in
+
+ let end_constraint () =
+ begin
+ output_string channel ";\n";
+ incr measured_nr_constraints
+ end in
+
+ let gen_latency_constraint i_to i_from latency t_to =
+ Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n"
+ i_to i_from latency i_to t_to;
+ for t_from=earliest_dates.(i_from) to
+ int_min latest_dates.(i_from) (t_to - latency)
+ do
+ Printf.fprintf channel "+1 %s " (pb_var i_from t_from)
+ done;
+ Printf.fprintf channel "-1 %s " (pb_var i_to t_to);
+ Printf.fprintf channel ">= 0";
+ end_constraint()
+
+ and gen_dual_latency_constraint i_to i_from latency t_from =
+ Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n"
+ i_to i_from latency i_to t_from;
+ for t_to=int_max earliest_dates.(i_to) (t_from + latency)
+ to latest_dates.(i_to)
+ do
+ Printf.fprintf channel "+1 %s " (pb_var i_to t_to)
+ done;
+ Printf.fprintf channel "-1 %s " (pb_var i_from t_from);
+ Printf.fprintf channel ">= 0";
+ end_constraint()
+ in
+
+ Printf.fprintf channel "* #variable= %d #constraint= %d\n" nr_pb_variables nr_pb_constraints;
+ Printf.fprintf channel "* nr_instructions=%d deadline=%d\n" nr_instructions deadline;
+ begin
+ match pb_type with
+ | SATISFIABILITY -> ()
+ | OPTIMIZATION ->
+ output_string channel "min:";
+ for t=earliest_dates.(nr_instructions) to deadline
+ do
+ Printf.fprintf channel " %+d %s" t (pb_var nr_instructions t)
+ done;
+ output_string channel ";\n";
+ end;
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ let early = earliest_dates.(i) and late= latest_dates.(i) in
+ Printf.fprintf channel "* t[%d] in %d..%d\n" i early late;
+ for t=early to late
+ do
+ Printf.fprintf channel "+1 %s " (pb_var i t)
+ done;
+ Printf.fprintf channel "= 1";
+ end_constraint()
+ done;
+
+ for t=0 to deadline-1
+ do
+ for j=0 to nr_resources-1
+ do
+ let bound = problem.resource_bounds.(j)
+ and coeffs = ref [] in
+ for i=0 to nr_instructions-1
+ do
+ let usage = problem.instruction_usages.(i).(j) in
+ if t >= earliest_dates.(i) && t <= latest_dates.(i)
+ && usage > 0
+ then coeffs := (i, usage) :: !coeffs
+ done;
+ if !coeffs <> [] then
+ begin
+ Printf.fprintf channel "* resource #%d at t=%d <= %d\n" j t bound;
+ List.iter (fun (i, usage) ->
+ Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs;
+ Printf.fprintf channel ">= %d" (-bound);
+ end_constraint();
+ end
+ done
+ done;
+
+ List.iter
+ (fun ctr ->
+ if ctr.instr_to < nr_instructions then
+ begin
+ for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to)
+ do
+ gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to
+ done;
+ if reverse_encoding
+ then
+ for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from)
+ do
+ gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from
+ done
+ end
+ ) problem.latency_constraints;
+
+ begin
+ match pb_type with
+ | SATISFIABILITY -> ()
+ | OPTIMIZATION ->
+ let final_latencies = Array.make nr_instructions 1 in
+ List.iter (fun (i, latency) ->
+ final_latencies.(i) <- int_max final_latencies.(i) latency)
+ predecessors.(nr_instructions);
+ for t_to=earliest_dates.(nr_instructions) to deadline
+ do
+ for i_from = 0 to nr_instructions -1
+ do
+ gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to
+ done
+ done
+ end;
+ assert (!measured_nr_constraints = nr_pb_constraints);
+ {
+ mapper_pb_type = pb_type;
+ mapper_nr_instructions = nr_instructions;
+ mapper_nr_pb_variables = nr_pb_variables;
+ mapper_earliest_dates = earliest_dates;
+ mapper_latest_dates = latest_dates;
+ mapper_var_offsets = var_offsets;
+ mapper_final_predecessors = predecessors.(nr_instructions)
+ };;
+
+type pb_answer =
+ | Positive
+ | Negative
+ | Unknown
+
+let line_to_pb_solution sol line nr_pb_variables =
+ let assign s v =
+ begin
+ let i = int_of_string s in
+ sol.(i-1) <- v
+ end in
+ List.iter
+ begin
+ function "" -> ()
+ | item ->
+ (match String.get item 0 with
+ | '+' ->
+ assert ((String.length item) >= 3);
+ assert ((String.get item 1) = 'x');
+ assign (String.sub item 2 ((String.length item)-2)) Positive
+ | '-' ->
+ assert ((String.length item) >= 3);
+ assert ((String.get item 1) = 'x');
+ assign (String.sub item 2 ((String.length item)-2)) Negative
+ | 'x' ->
+ assert ((String.length item) >= 2);
+ assign (String.sub item 1 ((String.length item)-1)) Positive
+ | _ -> failwith "syntax error in pseudo Boolean solution: epected + - or x"
+ )
+ end
+ (String.split_on_char ' ' (String.sub line 2 ((String.length line)-2)));;
+
+let pb_solution_to_schedule mapper pb_solution =
+ Array.mapi (fun i offset ->
+ let first = mapper.mapper_earliest_dates.(i)
+ and last = mapper.mapper_latest_dates.(i)
+ and time = ref (-1) in
+ for t=first to last
+ do
+ match pb_solution.(t - first + offset) with
+ | Positive ->
+ (if !time = -1
+ then time:=t
+ else failwith "duplicate time in pseudo boolean solution")
+ | Negative -> ()
+ | Unknown -> failwith "unknown value in pseudo boolean solution"
+ done;
+ (if !time = -1
+ then failwith "no time in pseudo boolean solution");
+ !time
+ ) mapper.mapper_var_offsets;;
+
+let pseudo_boolean_read_solution mapper channel =
+ let optimum = ref (-1)
+ and optimum_found = ref false
+ and solution = Array.make mapper.mapper_nr_pb_variables Unknown in
+ try
+ while true do
+ match input_line channel with
+ | "" -> ()
+ | line ->
+ begin
+ match String.get line 0 with
+ | 'c' -> ()
+ | 'o' ->
+ assert ((String.length line) >= 2);
+ assert ((String.get line 1) = ' ');
+ optimum := int_of_string (String.sub line 2 ((String.length line)-2))
+ | 's' -> (match line with
+ | "s OPTIMUM FOUND" -> optimum_found := true
+ | "s SATISFIABLE" -> ()
+ | "s UNSATISFIABLE" -> close_in channel;
+ raise Unschedulable
+ | _ -> failwith line)
+ | 'v' -> line_to_pb_solution solution line mapper.mapper_nr_pb_variables
+ | x -> Printf.printf "unknown: %s\n" line
+ end
+ done;
+ assert false
+ with End_of_file ->
+ close_in channel;
+ begin
+ let sol = pb_solution_to_schedule mapper solution in
+ sol
+ end;;
+
+let recompute_max_latency mapper solution =
+ let maxi = ref (-1) in
+ for i=0 to (mapper.mapper_nr_instructions-1)
+ do
+ maxi := int_max !maxi (1+solution.(i))
+ done;
+ List.iter (fun (i, latency) ->
+ maxi := int_max !maxi (solution.(i) + latency)) mapper.mapper_final_predecessors;
+ !maxi;;
+
+let adjust_check_solution mapper solution =
+ match mapper.mapper_pb_type with
+ | OPTIMIZATION ->
+ let max_latency = recompute_max_latency mapper solution in
+ assert (max_latency = solution.(mapper.mapper_nr_instructions));
+ solution
+ | SATISFIABILITY ->
+ let max_latency = recompute_max_latency mapper solution in
+ Array.init (mapper.mapper_nr_instructions+1)
+ (fun i -> if i < mapper.mapper_nr_instructions
+ then solution.(i)
+ else max_latency);;
+
+(* let pseudo_boolean_solver = ref "/local/monniaux/progs/naps/naps" *)
+(* let pseudo_boolean_solver = ref "/local/monniaux/packages/sat4j/org.sat4j.pb.jar CuttingPlanes" *)
+
+(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar CuttingPlanes" *)
+(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar" *)
+(* let pseudo_boolean_solver = ref "clasp" *)
+(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/open-wbo/open-wbo_static -formula=1" *)
+(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/naps/naps" *)
+(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/minisatp/build/release/bin/minisatp" *)
+(* let pseudo_boolean_solver = ref "java -jar sat4j-pb.jar CuttingPlanesStar" *)
+let pseudo_boolean_solver = ref "pb_solver"
+
+let pseudo_boolean_scheduler pb_type problem =
+ try
+ let filename_in = "problem.opb" in
+ (* needed only if not using stdout and filename_out = "problem.sol" *)
+ let mapper =
+ with_out_channel (open_out filename_in)
+ (fun opb_problem ->
+ pseudo_boolean_print_problem opb_problem problem pb_type) in
+ Some (with_in_channel
+ (Unix.open_process_in (!pseudo_boolean_solver ^ " " ^ filename_in))
+ (fun opb_solution -> adjust_check_solution mapper (pseudo_boolean_read_solution mapper opb_solution)))
+ with
+ | Unschedulable -> None;;
+
+let rec reoptimizing_scheduler (scheduler : scheduler) (previous_solution : solution) (problem : problem) =
+ if (get_max_latency previous_solution)>1 then
+ begin
+ Printf.printf "reoptimizing < %d\n" (get_max_latency previous_solution);
+ flush stdout;
+ match scheduler
+ { problem with max_latency = (get_max_latency previous_solution)-1 }
+ with
+ | None -> previous_solution
+ | Some solution -> reoptimizing_scheduler scheduler solution problem
+ end
+ else previous_solution;;
+
+let smt_var i = Printf.sprintf "t%d" i
+
+let is_resource_used problem j =
+ try
+ Array.iter (fun usages ->
+ if usages.(j) > 0
+ then raise Exit) problem.instruction_usages;
+ false
+ with Exit -> true;;
+
+let smt_use_quantifiers = false
+
+let smt_print_problem channel problem =
+ let nr_instructions = get_nr_instructions problem in
+ let gen_smt_resource_constraint time j =
+ output_string channel "(<= (+";
+ Array.iteri
+ (fun i usages ->
+ let usage=usages.(j) in
+ if usage > 0
+ then Printf.fprintf channel " (ite (= %s %s) %d 0)"
+ time (smt_var i) usage)
+ problem.instruction_usages;
+ Printf.fprintf channel ") %d)" problem.resource_bounds.(j)
+ in
+ output_string channel "(set-option :produce-models true)\n";
+ for i=0 to nr_instructions
+ do
+ Printf.fprintf channel "(declare-const %s Int)\n" (smt_var i);
+ Printf.fprintf channel "(assert (>= %s 0))\n" (smt_var i)
+ done;
+ for i=0 to nr_instructions-1
+ do
+ Printf.fprintf channel "(assert (< %s %s))\n"
+ (smt_var i) (smt_var nr_instructions)
+ done;
+ (if problem.max_latency > 0
+ then Printf.fprintf channel "(assert (<= %s %d))\n"
+ (smt_var nr_instructions) problem.max_latency);
+ List.iter (fun ctr ->
+ Printf.fprintf channel "(assert (>= (- %s %s) %d))\n"
+ (smt_var ctr.instr_to)
+ (smt_var ctr.instr_from)
+ ctr.latency) problem.latency_constraints;
+ for j=0 to (Array.length problem.resource_bounds)-1
+ do
+ if is_resource_used problem j
+ then
+ begin
+ if smt_use_quantifiers
+ then
+ begin
+ Printf.fprintf channel
+ "; resource #%d <= %d\n(assert (forall ((t Int)) "
+ j problem.resource_bounds.(j);
+ gen_smt_resource_constraint "t" j;
+ output_string channel "))\n"
+ end
+ else
+ begin
+ (if problem.max_latency < 0
+ then failwith "quantifier explosion needs max latency");
+ for t=0 to problem.max_latency
+ do
+ Printf.fprintf channel
+ "; resource #%d <= %d at t=%d\n(assert "
+ j problem.resource_bounds.(j) t;
+ gen_smt_resource_constraint (string_of_int t) j;
+ output_string channel ")\n"
+ done
+ end
+ end
+ done;
+ output_string channel "(check-sat)(get-model)\n";;
+
+
+let ilp_print_problem channel problem pb_type =
+ let deadline = problem.max_latency in
+ assert (deadline > 0);
+ let nr_instructions = get_nr_instructions problem
+ and nr_resources = get_nr_resources problem
+ and successors = get_successors problem
+ and predecessors = get_predecessors problem in
+ let earliest_dates = get_earliest_dates predecessors
+ and latest_dates = get_latest_dates deadline successors in
+
+ let pb_var i t =
+ Printf.sprintf "x%d_%d" i t in
+
+ let gen_latency_constraint i_to i_from latency t_to =
+ Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n"
+ i_to i_from latency i_to t_to;
+ Printf.fprintf channel "c_%d_%d_%d_%d: "
+ i_to i_from latency t_to;
+ for t_from=earliest_dates.(i_from) to
+ int_min latest_dates.(i_from) (t_to - latency)
+ do
+ Printf.fprintf channel "+1 %s " (pb_var i_from t_from)
+ done;
+ Printf.fprintf channel "-1 %s " (pb_var i_to t_to);
+ output_string channel ">= 0\n"
+
+ and gen_dual_latency_constraint i_to i_from latency t_from =
+ Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n"
+ i_to i_from latency i_to t_from;
+ Printf.fprintf channel "d_%d_%d_%d_%d: "
+ i_to i_from latency t_from;
+ for t_to=int_max earliest_dates.(i_to) (t_from + latency)
+ to latest_dates.(i_to)
+ do
+ Printf.fprintf channel "+1 %s " (pb_var i_to t_to)
+ done;
+ Printf.fprintf channel "-1 %s " (pb_var i_from t_from);
+ Printf.fprintf channel ">= 0\n"
+
+ and gen_delta_constraint i_from i_to latency =
+ if delta_encoding
+ then Printf.fprintf channel "l_%d_%d_%d: +1 t%d -1 t%d >= %d\n"
+ i_from i_to latency i_to i_from latency
+
+ in
+
+ Printf.fprintf channel "\\ nr_instructions=%d deadline=%d\n" nr_instructions deadline;
+ begin
+ match pb_type with
+ | SATISFIABILITY -> output_string channel "Minimize dummy: 0\n"
+ | OPTIMIZATION ->
+ Printf.fprintf channel "Minimize\nmakespan: t%d\n" nr_instructions
+ end;
+ output_string channel "Subject To\n";
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ let early = earliest_dates.(i) and late= latest_dates.(i) in
+ Printf.fprintf channel "\\ t[%d] in %d..%d\ntimes%d: " i early late i;
+ for t=early to late
+ do
+ Printf.fprintf channel "+1 %s " (pb_var i t)
+ done;
+ Printf.fprintf channel "= 1\n"
+ done;
+
+ for t=0 to deadline-1
+ do
+ for j=0 to nr_resources-1
+ do
+ let bound = problem.resource_bounds.(j)
+ and coeffs = ref [] in
+ for i=0 to nr_instructions-1
+ do
+ let usage = problem.instruction_usages.(i).(j) in
+ if t >= earliest_dates.(i) && t <= latest_dates.(i)
+ && usage > 0
+ then coeffs := (i, usage) :: !coeffs
+ done;
+ if !coeffs <> [] then
+ begin
+ Printf.fprintf channel "\\ resource #%d at t=%d <= %d\nr%d_%d: " j t bound j t;
+ List.iter (fun (i, usage) ->
+ Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs;
+ Printf.fprintf channel ">= %d\n" (-bound)
+ end
+ done
+ done;
+
+ List.iter
+ (fun ctr ->
+ if ctr.instr_to < nr_instructions then
+ begin
+ gen_delta_constraint ctr.instr_from ctr.instr_to ctr.latency;
+ begin
+ if direct_encoding
+ then
+ for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to)
+ do
+ gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to
+ done
+ end;
+ begin
+ if reverse_encoding
+ then
+ for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from)
+ do
+ gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from
+ done
+ end
+ end
+ ) problem.latency_constraints;
+
+ begin
+ match pb_type with
+ | SATISFIABILITY -> ()
+ | OPTIMIZATION ->
+ let final_latencies = Array.make nr_instructions 1 in
+ List.iter (fun (i, latency) ->
+ final_latencies.(i) <- int_max final_latencies.(i) latency)
+ predecessors.(nr_instructions);
+ for i_from = 0 to nr_instructions -1
+ do
+ gen_delta_constraint i_from nr_instructions final_latencies.(i_from)
+ done;
+ for t_to=earliest_dates.(nr_instructions) to deadline
+ do
+ for i_from = 0 to nr_instructions -1
+ do
+ gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to
+ done
+ done
+ end;
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ Printf.fprintf channel "ct%d : -1 t%d" i i;
+ let early = earliest_dates.(i) and late= latest_dates.(i) in
+ for t=early to late do
+ Printf.fprintf channel " +%d %s" t (pb_var i t)
+ done;
+ output_string channel " = 0\n"
+ done;
+ output_string channel "Bounds\n";
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ let early = earliest_dates.(i) and late= latest_dates.(i) in
+ begin
+ Printf.fprintf channel "%d <= t%d <= %d\n" early i late;
+ if true then
+ for t=early to late do
+ Printf.fprintf channel "0 <= %s <= 1\n" (pb_var i t)
+ done
+ end
+ done;
+ output_string channel "Integer\n";
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ Printf.fprintf channel "t%d " i
+ done;
+ output_string channel "\nBinary\n";
+ for i=0 to (match pb_type with
+ | OPTIMIZATION -> nr_instructions
+ | SATISFIABILITY -> nr_instructions-1)
+ do
+ let early = earliest_dates.(i) and late= latest_dates.(i) in
+ for t=early to late do
+ output_string channel (pb_var i t);
+ output_string channel " "
+ done;
+ output_string channel "\n"
+ done;
+ output_string channel "End\n";
+ {
+ mapper_pb_type = pb_type;
+ mapper_nr_instructions = nr_instructions;
+ mapper_nr_pb_variables = 0;
+ mapper_earliest_dates = earliest_dates;
+ mapper_latest_dates = latest_dates;
+ mapper_var_offsets = [| |];
+ mapper_final_predecessors = predecessors.(nr_instructions)
+ };;
+
+(* Guess what? Cplex sometimes outputs 11.000000004 instead of integer 11 *)
+
+let positive_float_round x = truncate (x +. 0.5)
+
+let float_round (x : float) : int =
+ if x > 0.0
+ then positive_float_round x
+ else - (positive_float_round (-. x))
+
+let rounded_int_of_string x = float_round (float_of_string x)
+
+let ilp_read_solution mapper channel =
+ let times = Array.make
+ (match mapper.mapper_pb_type with
+ | OPTIMIZATION -> 1+mapper.mapper_nr_instructions
+ | SATISFIABILITY -> mapper.mapper_nr_instructions) (-1) in
+ try
+ while true do
+ let line = input_line channel in
+ ( if (String.length line) < 3
+ then failwith (Printf.sprintf "bad ilp output: length(line) < 3: %s" line));
+ match String.get line 0 with
+ | 'x' -> ()
+ | 't' -> let space =
+ try String.index line ' '
+ with Not_found ->
+ failwith "bad ilp output: no t variable number"
+ in
+ let tnumber =
+ try int_of_string (String.sub line 1 (space-1))
+ with Failure _ ->
+ failwith "bad ilp output: not a variable number"
+ in
+ (if tnumber < 0 || tnumber >= (Array.length times)
+ then failwith (Printf.sprintf "bad ilp output: not a correct variable number: %d (%d)" tnumber (Array.length times)));
+ let value =
+ let s = String.sub line (space+1) ((String.length line)-space-1) in
+ try rounded_int_of_string s
+ with Failure _ ->
+ failwith (Printf.sprintf "bad ilp output: not a time number (%s)" s)
+ in
+ (if value < 0
+ then failwith "bad ilp output: negative time");
+ times.(tnumber) <- value
+ | '#' -> ()
+ | '0' -> ()
+ | _ -> failwith (Printf.sprintf "bad ilp output: bad variable initial, line = %s" line)
+ done;
+ assert false
+ with End_of_file ->
+ Array.iteri (fun i x ->
+ if i<(Array.length times)-1
+ && x<0 then raise Unschedulable) times;
+ times;;
+
+let ilp_solver = ref "ilp_solver"
+
+let problem_nr = ref 0
+
+let ilp_scheduler pb_type problem =
+ try
+ let filename_in = Printf.sprintf "problem%05d.lp" !problem_nr
+ and filename_out = Printf.sprintf "problem%05d.sol" !problem_nr in
+ incr problem_nr;
+ let mapper = with_out_channel (open_out filename_in)
+ (fun opb_problem -> ilp_print_problem opb_problem problem pb_type) in
+
+ begin
+ match Unix.system (!ilp_solver ^ " " ^ filename_in ^ " " ^ filename_out) with
+ | Unix.WEXITED 0 ->
+ Some (with_in_channel
+ (open_in filename_out)
+ (fun opb_solution ->
+ adjust_check_solution mapper
+ (ilp_read_solution mapper opb_solution)))
+ | Unix.WEXITED _ -> failwith "failed to start ilp solver"
+ | _ -> None
+ end
+ with
+ | Unschedulable -> None;;
+
+let current_utime_all () =
+ let t = Unix.times() in
+ t.Unix.tms_cutime +. t.Unix.tms_utime;;
+
+let utime_all_fn fn arg =
+ let utime_start = current_utime_all () in
+ let output = fn arg in
+ let utime_end = current_utime_all () in
+ (output, utime_end -. utime_start);;
+
+let cascaded_scheduler (problem : problem) =
+ let (some_initial_solution, list_scheduler_time) =
+ utime_all_fn (validated_scheduler list_scheduler) problem in
+ match some_initial_solution with
+ | None -> None
+ | Some initial_solution ->
+ let (solution, reoptimizing_time) = utime_all_fn (reoptimizing_scheduler (validated_scheduler (ilp_scheduler SATISFIABILITY)) initial_solution) problem in
+ begin
+ let latency2 = get_max_latency solution
+ and latency1 = get_max_latency initial_solution in
+ Printf.printf "postpass %s: %d, %d, %d, %g, %g\n"
+ (if latency2 < latency1 then "REOPTIMIZED" else "unchanged")
+ (get_nr_instructions problem)
+ latency1 latency2
+ list_scheduler_time reoptimizing_time;
+ flush stdout
+ end;
+ Some solution;;
+
+let scheduler_by_name name =
+ match name with
+ | "ilp" -> validated_scheduler cascaded_scheduler
+ | "list" -> validated_scheduler list_scheduler
+ | "revlist" -> validated_scheduler reverse_list_scheduler
+ | "greedy" -> greedy_scheduler
+ | s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/aarch64/InstructionScheduler.mli b/aarch64/InstructionScheduler.mli
new file mode 100644
index 00000000..85e2a5c6
--- /dev/null
+++ b/aarch64/InstructionScheduler.mli
@@ -0,0 +1,113 @@
+(** Schedule instructions on a synchronized pipeline
+by David Monniaux, CNRS, VERIMAG *)
+
+(** A latency constraint: instruction number [instr_to] should be scheduled at least [latency] clock ticks before [instr_from].
+
+It is possible to specify [latency]=0, meaning that [instr_to] can be scheduled at the same clock tick as [instr_from], but not before.
+
+[instr_to] can be the special value equal to the number of instructions, meaning that it refers to the final output latency. *)
+type latency_constraint = {
+ instr_from : int;
+ instr_to : int;
+ latency : int;
+ }
+
+(** A scheduling problem.
+
+In addition to the latency constraints, the resource constraints should be satisfied: at every clock tick, the sum of vectors of resources used by the instructions scheduled at that tick does not exceed the resource bounds.
+*)
+type problem = {
+ max_latency : int;
+ (** An optional maximal total latency of the problem, after which the problem is deemed not schedulable. -1 means there should be no maximum. *)
+
+ resource_bounds : int array;
+ (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
+
+ instruction_usages: int array array;
+ (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
+
+ latency_constraints : latency_constraint list
+ (** The latency constraints that must be satisfied *)
+ };;
+
+(** Print problem for human readability. *)
+val print_problem : out_channel -> problem -> unit;;
+
+(** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *)
+type solution = int array
+
+(** A scheduling algorithm.
+The return value [Some x] is a solution [x].
+[None] means that scheduling failed. *)
+type scheduler = problem -> solution option;;
+
+(* DISABLED
+(** Schedule the problem optimally by constraint solving using the Gecode solver. *)
+external gecode_scheduler : problem -> solution option
+ = "caml_gecode_schedule_instr"
+ *)
+
+(** Get the number the last scheduling time used for an instruction in a solution.
+@return The last clock tick used *)
+val maximum_slot_used : solution -> int
+
+(** Validate that a solution is truly a solution of a scheduling problem.
+@raise Failure if validation fails *)
+val check_schedule : problem -> solution -> unit
+
+(** Schedule the problem using a greedy list scheduling algorithm, from the start.
+The first (according to instruction ordering) instruction that is ready (according to the latency constraints) is scheduled at the current clock tick.
+Once a clock tick is full go to the next.
+
+@return [Some solution] when a solution is found, [None] if not. *)
+val list_scheduler : problem -> solution option
+
+(** Schedule the problem using the order of instructions without any reordering *)
+val greedy_scheduler : problem -> solution option
+
+(** Schedule a problem using a scheduler applied in the opposite direction, e.g. for list scheduling from the end instead of the start. BUGGY *)
+val schedule_reversed : scheduler -> problem -> int array option
+
+(** Schedule a problem from the end using a list scheduler. BUGGY *)
+val reverse_list_scheduler : problem -> int array option
+
+(** Check that a problem is well-formed.
+@raise Failure if validation fails *)
+val check_problem : problem -> unit
+
+(** Apply a scheduler and validate the result against the input problem.
+@return The solution found
+@raise Failure if validation fails *)
+val validated_scheduler : scheduler -> problem -> solution option;;
+
+(** Get max latency from solution
+@return Max latency *)
+val get_max_latency : solution -> int;;
+
+(** Get the length of a maximal critical path
+@return Max length *)
+val maximum_critical_path : problem -> int;;
+
+(** Apply line scheduler then advanced solver
+@return A solution if found *)
+val cascaded_scheduler : problem -> solution option;;
+
+val show_date_ranges : problem -> unit;;
+
+type pseudo_boolean_problem_type =
+ | SATISFIABILITY
+ | OPTIMIZATION;;
+
+type pseudo_boolean_mapper
+val pseudo_boolean_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;;
+val pseudo_boolean_read_solution : pseudo_boolean_mapper -> in_channel -> solution;;
+val pseudo_boolean_scheduler : pseudo_boolean_problem_type -> problem -> solution option;;
+
+val smt_print_problem : out_channel -> problem -> unit;;
+
+val ilp_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;;
+
+val ilp_scheduler : pseudo_boolean_problem_type -> problem -> solution option;;
+
+(** Schedule a problem using a scheduler given by a string name *)
+val scheduler_by_name : string -> problem -> int array option;;
diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml
new file mode 100644
index 00000000..17869a99
--- /dev/null
+++ b/aarch64/OpWeightsAsm.ml
@@ -0,0 +1,212 @@
+open Asmblock;;
+(* TODO remove? open PrepassSchedulingOracleDeps;;*)
+
+module Cortex_A53=
+ struct
+ let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+ let nr_non_pipelined_units = 1;;
+
+ let latency_of_op (i : basic) (nargs : int) =
+ match i with
+ | PArith _
+ | PArithP _ _ -> 1 (* XXX *)
+ | PArithPP
+
+ let resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _
+ (* 32-bit integer arithmetic *)
+ | Oshift _
+ | Oadd
+ | Oaddshift _
+ | Oaddimm _
+ | Oneg
+ | Onegshift _
+ | Osub
+ | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+ | Omul
+ | Omuladd
+ | Omulsub -> [| 1; 1; 1; 0 |]
+ | Odiv
+ | Odivu -> [| 1; 0; 0; 0 |]
+ | Oand
+ | Oandshift _
+ | Oandimm _
+ | Oor
+ | Oorshift _
+ | Oorimm _
+ | Oxor
+ | Oxorshift _
+ | Oxorimm _
+ | Onot
+ | Onotshift _
+ | Obic
+ | Obicshift _
+ | Oorn
+ | Oornshift _
+ | Oeqv
+ | Oeqvshift _
+ | Oshl
+ | Oshr
+ | Oshru
+ | Oshrximm _
+ | Ozext _
+ | Osext _
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _
+
+(* 64-bit integer arithmetic *)
+ | Oshiftl _
+ | Oextend _
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl
+ | Oaddlshift _
+ | Oaddlext _
+ | Oaddlimm _
+ | Onegl
+ | Oneglshift _
+ | Osubl
+ | Osublshift _
+ | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+ | Odivl
+ | Odivlu -> [| 1; 0; 0; 0 |]
+ | Oandl
+ | Oandlshift _
+ | Oandlimm _
+ | Oorl
+ | Oorlshift _
+ | Oorlimm _
+ | Oxorl
+ | Oxorlshift _
+ | Oxorlimm _
+ | Onotl
+ | Onotlshift _
+ | Obicl
+ | Obiclshift _
+ | Oornl
+ | Oornlshift _
+ | Oeqvl
+ | Oeqvlshift _
+ | Oshll
+ | Oshrl
+ | Oshrlu
+ | Oshrlximm _
+ | Ozextl _
+ | Osextl _
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> [| 1; 1; 0; 0 |]
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+ | Odivf
+ (* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Odivfs (* r [rd = r1 / r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> [| 1 ; 1; 1; 0 |]
+
+(* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Odiv | Odivu -> [| 29 |]
+ | Odivfs -> [| 20 |]
+ | Odivl | Odivlu | Odivf -> [| 50 |]
+ | _ -> [| -1 |];;
+
+ let resources_of_cond (cmp : condition) (nargs : int) =
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+ let latency_of_call _ _ = 6;;
+
+ let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_call _ _ = resource_bounds;;
+ let resources_of_builtin _ = resource_bounds;;
+ end;;
+
+let get_opweights () : opweights =
+ match !Clflags.option_mtune with
+ | "cortex-a53" | "cortex-a35" | "" ->
+ {
+ pipelined_resource_bounds = Cortex_A53.resource_bounds;
+ nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+ latency_of_op = Cortex_A53.latency_of_op;
+ resources_of_op = Cortex_A53.resources_of_op;
+ non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+ latency_of_load = Cortex_A53.latency_of_load;
+ resources_of_load = Cortex_A53.resources_of_load;
+ resources_of_store = Cortex_A53.resources_of_store;
+ resources_of_cond = Cortex_A53.resources_of_cond;
+ latency_of_call = Cortex_A53.latency_of_call;
+ resources_of_call = Cortex_A53.resources_of_call;
+ resources_of_builtin = Cortex_A53.resources_of_builtin
+ }
+ | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index 2107ce22..31864384 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -12,12 +12,12 @@
(* *)
(* *************************************************************)
-open Asmvliw
+(*open Asm*)
open Asmblock
-open Printf
-open Camlcoq
-open InstructionScheduler
-open TargetPrinter.Target
+(*open Printf*)
+(*open Camlcoq*)
+(*open InstructionScheduler*)
+(*open TargetPrinter.Target*)
let debug = false
@@ -25,770 +25,689 @@ let debug = false
* Extracting infos from Asmvliw instructions
*)
-type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset
-
-type location = Reg of preg | Mem
+(* TODO remove? type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset*)
+(*type location = Reg of preg | Mem*)
type real_instruction =
- (* ALU *)
- | Addw | Andw | Compw | Mulw | Orw | Sbfw | Sbfxw | Sraw | Srlw | Sllw | Srsw | Rorw | Xorw
- | Addd | Andd | Compd | Muld | Ord | Sbfd | Sbfxd | Srad | Srld | Slld | Srsd | Xord
- | Nandw | Norw | Nxorw | Nandd | Nord | Nxord | Andnw | Ornw | Andnd | Ornd
- | Maddw | Maddd | Msbfw | Msbfd | Cmoved
- | Make | Nop | Extfz | Extfs | Insf
- | Addxw | Addxd
- (* LSU *)
- | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo
- | Sb | Sh | Sw | Sd | Sq | So
- (* BCU *)
- | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set
- (* FPU *)
- | Fabsd | Fabsw | Fnegw | Fnegd
- | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
- | Fmind | Fminw | Fmaxd | Fmaxw | Finvw
- | Ffmaw | Ffmad | Ffmsw | Ffmsd
- | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz
- | Fcompw | Fcompd
-
-type ab_inst_rec = {
- inst: real_instruction;
- write_locs : location list;
- read_locs : location list;
- read_at_id : location list; (* Must be contained in read_locs *)
- read_at_e1 : location list; (* idem *)
- imm : immediate option;
- is_control : bool;
-}
-
-(** Asmvliw constructor to real instructions *)
+ | Add | Adr | Adrp | And | Asr | B | Bic | Bl | Blr | Br | Cbnz | Cbz | Cls
+ | Clz | Cmn | Cmp | Csel | Cset | Eon | Eor | Fabs | Fadd | Fcmp | Fcsel
+ | Fcvt | Fcvtzs | Fcvtzu | Fdiv | Fmadd | Fmov | Fmsub | Fmul | Fnmadd
+ | Fnmul | Fnmsub | Fneg | Fsqrt | Fsub | Ldaxr | Ldp | Ldr | Ldrb | Ldrh
+ | Ldrsb | Ldrsh | Ldrsw | Lr | Lsl | Lsr | Madd | Mov | Movk | Movn | Movz
+ | Msub | Nop | Orn | Orr | Ret | Rev | Rev16 | Ror | Sbfiz | Sbfx | Scvtf
+ | Sdiv | Smulh | Stlxr | Stp | Str | Strb | Strh | Sub | Sxtb | Sxth | Sxtw
+ | Tbnz | Tbz | Tst | Ubfiz | Ubfx | Ucvtf | Udiv | Umulh | Uxtb | Uxth
+ | Uxtw | Uxtx
+
+(* TODO remove? type ab_inst_rec = {*)
+ (*inst: real_instruction;*)
+ (*write_locs : location list;*)
+ (*read_locs : location list;*)
+ (*read_at_id : location list; (* Must be contained in read_locs *)*)
+ (*read_at_e1 : location list; (* idem *)*)
+ (*imm : immediate option;*)
+ (*is_control : bool;*)
+(*}*)
+
+(** Asm constructor to real instructions *)
exception OpaqueInstruction
-let arith_rr_real = function
- | Pcvtl2w -> Addw
- | Pmv -> Addd
- | Pnegw -> Sbfw
- | Pnegl -> Sbfd
- | Psxwd -> Extfs
- | Pzxwd -> Extfz
- | Pextfz(_,_) -> Extfz
- | Pextfs(_,_) -> Extfs
- | Pextfzl(_,_) -> Extfz
- | Pextfsl(_,_) -> Extfs
- | Pfabsw -> Fabsw
- | Pfabsd -> Fabsd
- | Pfnegw -> Fnegw
- | Pfnegd -> Fnegd
- | Pfinvw -> Finvw
- | Pfnarrowdw -> Fnarrowdw
- | Pfwidenlwd -> Fwidenlwd
- | Pfloatwrnsz -> Floatwz
- | Pfloatuwrnsz -> Floatuwz
- | Pfloatudrnsz -> Floatudz
- | Pfloatdrnsz -> Floatdz
- | Pfixedwrzz -> Fixedwz
- | Pfixeduwrzz -> Fixeduwz
- | Pfixeddrzz -> Fixeddz
- | Pfixedudrzz -> Fixedudz
- | Pfixeddrzz_i32 -> Fixeddz
- | Pfixedudrzz_i32 -> Fixedudz
-
-let arith_rrr_real = function
- | Pcompw it -> Compw
- | Pcompl it -> Compd
- | Pfcompw ft -> Fcompw
- | Pfcompl ft -> Fcompd
- | Paddw -> Addw
- | Paddxw _ -> Addxw
- | Psubw -> Sbfw
- | Prevsubxw _ -> Sbfxw
- | Pmulw -> Mulw
- | Pandw -> Andw
- | Pnandw -> Nandw
- | Porw -> Orw
- | Pnorw -> Norw
- | Pxorw -> Xorw
- | Pnxorw -> Nxorw
- | Pandnw -> Andnw
- | Pornw -> Ornw
- | Psraw -> Sraw
- | Psrlw -> Srlw
- | Psrxw -> Srsw
- | Psllw -> Sllw
- | Paddl -> Addd
- | Paddxl _ -> Addxd
- | Psubl -> Sbfd
- | Prevsubxl _ -> Sbfxd
- | Pandl -> Andd
- | Pnandl -> Nandd
- | Porl -> Ord
- | Pnorl -> Nord
- | Pxorl -> Xord
- | Pnxorl -> Nxord
- | Pandnl -> Andnd
- | Pornl -> Ornd
- | Pmull -> Muld
- | Pslll -> Slld
- | Psrll -> Srld
- | Psrxl -> Srsd
- | Psral -> Srad
- | Pfaddd -> Faddd
- | Pfaddw -> Faddw
- | Pfsbfd -> Fsbfd
- | Pfsbfw -> Fsbfw
- | Pfmuld -> Fmuld
- | Pfmulw -> Fmulw
- | Pfmind -> Fmind
- | Pfminw -> Fminw
- | Pfmaxd -> Fmaxd
- | Pfmaxw -> Fmaxw
-
-let arith_rri32_real = function
- | Pcompiw it -> Compw
- | Paddiw -> Addw
- | Paddxiw _ -> Addxw
- | Prevsubiw -> Sbfw
- | Prevsubxiw _ -> Sbfxw
- | Pmuliw -> Mulw
- | Pandiw -> Andw
- | Pnandiw -> Nandw
- | Poriw -> Orw
- | Pnoriw -> Norw
- | Pxoriw -> Xorw
- | Pnxoriw -> Nxorw
- | Pandniw -> Andnw
- | Porniw -> Ornw
- | Psraiw -> Sraw
- | Psrxiw -> Srsw
- | Psrliw -> Srlw
- | Pslliw -> Sllw
- | Proriw -> Rorw
- | Psllil -> Slld
- | Psrlil -> Srld
- | Psrail -> Srad
- | Psrxil -> Srsd
-
-let arith_rri64_real = function
- | Pcompil it -> Compd
- | Paddil -> Addd
- | Prevsubil -> Sbfd
- | Paddxil _ -> Addxd
- | Prevsubxil _ -> Sbfxd
- | Pmulil -> Muld
- | Pandil -> Andd
- | Pnandil -> Nandd
- | Poril -> Ord
- | Pnoril -> Nord
- | Pxoril -> Xord
- | Pnxoril -> Nxord
- | Pandnil -> Andnd
- | Pornil -> Ornd
-
-
-let arith_arr_real = function
- | Pinsf (_, _) -> Insf
- | Pinsfl (_, _) -> Insf
-
-let arith_arrr_real = function
- | Pfmaddfw -> Ffmaw
- | Pfmaddfl -> Ffmad
- | Pfmsubfw -> Ffmsw
- | Pfmsubfl -> Ffmsd
- | Pmaddw -> Maddw
- | Pmaddl -> Maddd
- | Pmsubw -> Msbfw
- | Pmsubl -> Msbfd
- | Pcmove _ -> Cmoved
- | Pcmoveu _ -> Cmoved
-
-let arith_arri32_real = function
- | Pmaddiw -> Maddw
- | Pcmoveiw _ -> Cmoved
- | Pcmoveuiw _ -> Cmoved
-
-let arith_arri64_real = function
- | Pmaddil -> Maddd
- | Pcmoveil _ -> Cmoved
- | Pcmoveuil _ -> Cmoved
-
-let arith_ri32_real = Make
-
-let arith_ri64_real = Make
-
-let arith_rf32_real = Make
-
-let arith_rf64_real = Make
-
-let store_real = function
- | Psb -> Sb
- | Psh -> Sh
- | Psw -> Sw
- | Psw_a -> Sw
- | Psd -> Sd
- | Psd_a -> Sd
- | Pfss -> Sw
- | Pfsd -> Sd
-
-let load_real = function
- | Plb -> Lbs
- | Plbu -> Lbz
- | Plh -> Lhs
- | Plhu -> Lhz
- | Plw -> Lws
- | Plw_a -> Lws
- | Pld -> Ld
- | Pld_a -> Ld
- | Pfls -> Lws
- | Pfld -> Ld
-
-let set_real = Set
-let get_real = Get
-let nop_real = Nop
-let loadsymbol_real = Make
-let loadqrro_real = Lq
-let loadorro_real = Lo
-let storeqrro_real = Sq
-let storeorro_real = So
-
-let ret_real = Ret
-let call_real = Call
-let icall_real = Icall
-let goto_real = Goto
-let igoto_real = Igoto
-let jl_real = Goto
-let cb_real = Cb
-let cbu_real = Cb
-
-let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let arith_arri32_rec i rd rs imm32 =
- let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []
- in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;
- read_at_id = [] ; read_at_e1 = rae1 }
-
-let arith_arri64_rec i rd rs imm64 =
- let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []
- in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;
- read_at_id = []; read_at_e1 = rae1 }
-
-let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let arith_arrr_rec i rd rs1 rs2 =
- let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []
- in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;
- read_at_id = []; read_at_e1 = rae1 }
-
-let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let arith_r_rec i rd = match i with
- (* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)
- | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);
- is_control = false; read_at_id = []; read_at_e1 = [] }
-
-let arith_rec i =
- match i with
- | PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
- | PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
- | PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2)
- | PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs)
- (* Seems like single constant constructor types are elided *)
- | PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
- | PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
- | PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)
- | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;
- read_at_id = []; read_at_e1 = [] }
- | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;
- read_at_id = []; read_at_e1 = [] }
- | PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];
- imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
- | PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];
- imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
- | PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)
- | PArithR (i, rd) -> arith_r_rec i (IR rd)
-
-let load_rec i = match i with
- | PLoadRRO (trap, i, rs1, rs2, imm) ->
- { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;
- read_at_id = []; read_at_e1 = [] }
- | PLoadQRRO(rs, ra, imm) ->
- let (rs0, rs1) = gpreg_q_expand rs in
- { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;
- read_at_id = []; read_at_e1 = [] }
- | PLoadORRO(rs, ra, imm) ->
- let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
- { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];
- imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}
- | PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->
- { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let store_rec i = match i with
- | PStoreRRO (i, rs, ra, imm) ->
- { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));
- read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}
- | PStoreQRRO (rs, ra, imm) ->
- let (rs0, rs1) = gpreg_q_expand rs in
- { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));
- read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}
- | PStoreORRO (rs, ra, imm) ->
- let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
- { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];
- imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}
- | PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->
- { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;
- read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}
-
-let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;
- read_at_id = []; read_at_e1 = [] }
-
-let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;
- read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
-
-let basic_rec i =
- match i with
- | PArith i -> arith_rec i
- | PLoad i -> load_rec i
- | PStore i -> store_rec i
- | Pallocframe (_, _) -> raise OpaqueInstruction
- | Pfreeframe (_, _) -> raise OpaqueInstruction
- | Pget (rd, rs) -> get_rec rd rs
- | Pset (rd, rs) -> set_rec rd rs
- | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}
-
-let expand_rec = function
- | Pbuiltin _ -> raise OpaqueInstruction
-
-let ctl_flow_rec = function
- | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}
- | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
- | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;
- read_at_id = [Reg (IR r)]; read_at_e1 = [] }
- | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
- | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;
- read_at_id = [Reg (IR r)]; read_at_e1 = [] }
- | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
- | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
- read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
- | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
- read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
- | Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)
-
-let control_rec i =
- match i with
- | PExpand i -> expand_rec i
- | PCtlFlow i -> ctl_flow_rec i
-
-let rec basic_recs body = match body with
- | [] -> []
- | bi :: body -> (basic_rec bi) :: (basic_recs body)
-
-let exit_rec exit = match exit with
- | None -> []
- | Some ex -> [control_rec ex]
-
-let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)
+let arith_p_real = function
+ | Padrp(_,_) -> Adrp
+ | Pmovz(_,_,_) -> Movz
+ | Pmovn(_,_,_) -> Movn
+ | Pfmovimms(_) -> Fmov (* TODO not sure about this and below too *)
+ | Pfmovimmd(_) -> Fmov
+
+let arith_pp_real = function
+ | Pmov -> Mov
+ | Pmovk(_,_,_) -> Movk
+ | Paddadr(_,_) -> Add
+ | Psbfiz(_,_,_) -> Sbfiz
+ | Psbfx(_,_,_) -> Sbfx
+ | Pubfiz(_,_,_) -> Ubfiz
+ | Pubfx(_,_,_) -> Ubfx
+ | Pfmov -> Fmov
+ | Pfcvtds -> Fcvt
+ | Pfcvtsd -> Fcvt
+ | Pfabs(_) -> Fabs
+ | Pfneg(_) -> Fneg
+ | Pscvtf(_,_) -> Scvtf
+ | Pucvtf(_,_) -> Ucvtf
+ | Pfcvtzs(_,_) -> Fcvtzs
+ | Pfcvtzu(_,_) -> Fcvtzu
+ | Paddimm(_,_) -> Add
+ | Psubimm(_,_) -> Sub
+
+let arith_ppp_real = function
+ | Pasrv(_) -> Asr
+ | Plslv(_) -> Lsl
+ | Plsrv(_) -> Lsr
+ | Prorv(_) -> Ror
+ | Psmulh -> Smulh
+ | Pumulh -> Umulh
+ | Psdiv(_) -> Sdiv
+ | Pudiv(_) -> Udiv
+ | Paddext(_) -> Add
+ | Psubext(_) -> Sub
+ | Pfadd(_) -> Fadd
+ | Pfdiv(_) -> Fdiv
+ | Pfmul(_) -> Fmul
+ | Pfsub(_) -> Fsub
+
+let arith_comparison_p_real = function
+ | Pfcmp0(_) -> Fcmp
+ | Pcmpimm(_,_) -> Cmp
+ | Pcmnimm(_,_) -> Cmn
+ | Ptstimm(_,_) -> Tst
+
+let arith_comparison_pp_real = function
+ | Pcmpext(_) -> Cmp
+ | Pcmnext(_) -> Cmn
+ | Pfcmp(_) -> Fcmp
+
+let arith_comparison_r0r_real = function
+ | Pcmp(_,_) -> Cmp
+ | Pcmn(_,_) -> Cmn
+ | Ptst(_,_) -> Tst
+
+let arith_comparison_rr0r_real = function
+ | Padd(_,_) -> Add
+ | Psub(_,_) -> Sub
+ | Pand(_,_) -> And
+ | Pbic(_,_) -> Bic
+ | Peon(_,_) -> Eon
+ | Peor(_,_) -> Eor
+ | Porr(_,_) -> Orr
+ | Porn(_,_) -> Orn
+
+let arith_comparison_rr0_real = function
+ | Pandimm(_,_) -> And
+ | Peorimm(_,_) -> Eor
+ | Porrimm(_,_) -> Orr
+
+let arith_comparison_arrrr0_real = function
+ | Pmadd(_) -> Madd
+ | Pmsub(_) -> Msub
+
+let store_rs_a_real = function
+ | Pstrw -> Str
+ | Pstrw_a -> Str
+ | Pstrx -> Str
+ | Pstrx_a -> Str
+ | Pstrb -> Strb
+ | Pstrh -> Strh
+ | Pstrs -> Str
+ | Pstrd -> Str
+ | Pstrd_a -> Str
+
+let load_rs_a_real = function
+ | Pldrw -> Ldr
+ | Pldrw_a -> Ldr
+ | Pldrx -> Ldr
+ | Pldrx_a -> Ldr
+ | Pldrb(_) -> Ldrb
+ | Pldrsb(_) -> Ldrsb
+ | Pldrh(_) -> Ldrh
+ | Pldrsh(_) -> Ldrsh
+ | Pldrzw -> Ldr
+ | Pldrsw -> Ldrsw
+ | Pldrs -> Ldr
+ | Pldrd -> Ldr
+ | Pldrd_a -> Ldr
+
+
+
+(*let set_real = Set*)
+(*let get_real = Get*)
+(*let nop_real = Nop*)
+(*let loadsymbol_real = Make*)
+(*let loadqrro_real = Lq*)
+(*let loadorro_real = Lo*)
+(*let storeqrro_real = Sq*)
+(*let storeorro_real = So*)
+
+(*let ret_real = Ret*)
+(*let call_real = Call*)
+(*let icall_real = Icall*)
+(*let goto_real = Goto*)
+(*let igoto_real = Igoto*)
+(*let jl_real = Goto*)
+(*let cb_real = Cb*)
+(*let cbu_real = Cb*)
+
+(*let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let arith_arri32_rec i rd rs imm32 = *)
+ (*let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []*)
+ (*in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;*)
+ (*read_at_id = [] ; read_at_e1 = rae1 }*)
+
+(*let arith_arri64_rec i rd rs imm64 = *)
+ (*let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []*)
+ (*in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = rae1 }*)
+
+(*let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let arith_arrr_rec i rd rs1 rs2 = *)
+ (*let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []*)
+ (*in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = rae1 }*)
+
+(*let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let arith_r_rec i rd = match i with*)
+ (*(* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)*)
+ (*| Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);*)
+ (*is_control = false; read_at_id = []; read_at_e1 = [] }*)
+
+(*let arith_rec i =*)
+ (*match i with*)
+ (*| PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32))*)
+ (*| PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64))*)
+ (*| PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2)*)
+ (*| PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs)*)
+ (*(* Seems like single constant constructor types are elided *)*)
+ (*| PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))*)
+ (*| PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))*)
+ (*| PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)*)
+ (*| PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+ (*| PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+ (*| PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];*)
+ (*imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}*)
+ (*| PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];*)
+ (*imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}*)
+ (*| PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)*)
+ (*| PArithR (i, rd) -> arith_r_rec i (IR rd)*)
+
+(*let load_rec i = match i with*)
+ (*| PLoadRRO (trap, i, rs1, rs2, imm) ->*)
+ (*{ inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+ (*| PLoadQRRO(rs, ra, imm) ->*)
+ (*let (rs0, rs1) = gpreg_q_expand rs in*)
+ (*{ inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+ (*| PLoadORRO(rs, ra, imm) ->*)
+ (*let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in*)
+ (*{ inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];*)
+ (*imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}*)
+ (*| PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->*)
+ (*{ inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let store_rec i = match i with*)
+ (*| PStoreRRO (i, rs, ra, imm) ->*)
+ (*{ inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));*)
+ (*read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}*)
+ (*| PStoreQRRO (rs, ra, imm) ->*)
+ (*let (rs0, rs1) = gpreg_q_expand rs in*)
+ (*{ inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));*)
+ (*read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}*)
+ (*| PStoreORRO (rs, ra, imm) ->*)
+ (*let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in*)
+ (*{ inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];*)
+ (*imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}*)
+ (*| PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->*)
+ (*{ inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;*)
+ (*read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}*)
+
+(*let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;*)
+ (*read_at_id = []; read_at_e1 = [] }*)
+
+(*let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;*)
+ (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*)
+
+(*let basic_rec i =*)
+ (*match i with*)
+ (*| PArith i -> arith_rec i*)
+ (*| PLoad i -> load_rec i*)
+ (*| PStore i -> store_rec i*)
+ (*| Pallocframe (_, _) -> raise OpaqueInstruction*)
+ (*| Pfreeframe (_, _) -> raise OpaqueInstruction*)
+ (*| Pget (rd, rs) -> get_rec rd rs*)
+ (*| Pset (rd, rs) -> set_rec rd rs*)
+ (*| Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}*)
+
+(*let expand_rec = function*)
+ (*| Pbuiltin _ -> raise OpaqueInstruction*)
+
+(*let ctl_flow_rec = function*)
+ (*| Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}*)
+ (*| Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}*)
+ (*| Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;*)
+ (*read_at_id = [Reg (IR r)]; read_at_e1 = [] }*)
+ (*| Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}*)
+ (*| Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;*)
+ (*read_at_id = [Reg (IR r)]; read_at_e1 = [] }*)
+ (*| Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}*)
+ (*| Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;*)
+ (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*)
+ (*| Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;*)
+ (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*)
+ (*| Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)*)
+
+(*let control_rec i =*)
+ (*match i with*)
+ (*| PExpand i -> expand_rec i*)
+ (*| PCtlFlow i -> ctl_flow_rec i*)
+
+(*let rec basic_recs body = match body with*)
+ (*| [] -> []*)
+ (*| bi :: body -> (basic_rec bi) :: (basic_recs body)*)
+
+(*let exit_rec exit = match exit with*)
+ (*| None -> []*)
+ (*| Some ex -> [control_rec ex]*)
+
+(*let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)*)
(**
* Providing informations relative to the real instructions
*)
(** Abstraction providing all the necessary informations for solving the scheduling problem *)
-type inst_info = {
- write_locs : location list;
- read_locs : location list;
- reads_at_id : bool;
- reads_at_e1 : bool;
- is_control : bool;
- usage: int array; (* resources consumed by the instruction *)
- latency: int;
-}
-
-(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *)
-type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10
-
-let rec pow a = function
- | 0 -> Int64.one
- | 1 -> Int64.of_int a
- | n -> let b = pow a (n/2) in
- Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a))
-
-let signed_interval n : (int64 * int64) = begin
- assert (n > 0);
- let min = Int64.neg @@ pow 2 (n-1)
- and max = Int64.sub (pow 2 (n-1)) Int64.one
- in (min, max)
-end
-
-let within i interv = match interv with (min, max) -> (i >= min && i <= max)
-
-let signed_length (i:int64) =
- let rec f (i:int64) n =
- let interv = signed_interval n
- in if (within i interv) then n else f i (n+1)
- in f i 1
-
-let unsigned_length (i:int64) = (signed_length i) - 1
-
-let encode_imm (imm:int64) =
- if (Int64.compare imm Int64.zero < 0) then
- let length = signed_length imm
- in if length <= 10 then S10
- else if length <= 32 then U27L5
- else if length <= 37 then U27L10
- else if length <= 64 then E27U27L10
- else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
- else
- let length = unsigned_length imm
- in if length <= 6 then U6
- else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *)
- else if length <= 32 then U27L5
- else if length <= 37 then U27L10
- else if length <= 64 then E27U27L10
- else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
-
-(** Resources *)
-type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop
-
-let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]
-
-let rec find_index elt l =
- match l with
- | [] -> raise Not_found
- | e::l -> if (e == elt) then 0
- else 1 + find_index elt l
-
-let resource_id resource : int = find_index resource resource_names
-
-let resource_bound resource : int =
- match resource with
- | Rissue -> 8
- | Rtiny -> 4
- | Rlite -> 2
- | Rfull -> 1
- | Rlsu -> 1
- | Rmau -> 1
- | Rbcu -> 1
- | Rtca -> 1
- | Rauxr -> 1
- | Rauxw -> 1
- | Rcrrp -> 1
- | Rcrwl -> 1
- | Rcrwh -> 1
- | Rnop -> 4
-
-let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)
-
-(** Reservation tables *)
-let alu_full : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_lite : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_lite_x : int array = let resmap = fun r -> match r with
- | Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_lite_y : int array = let resmap = fun r -> match r with
- | Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_nop : int array = let resmap = fun r -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_tiny : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_tiny_x : int array = let resmap = fun r -> match r with
- | Rissue -> 2 | Rtiny -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let alu_tiny_y : int array = let resmap = fun r -> match r with
- | Rissue -> 3 | Rtiny -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let bcu : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rbcu -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let lsu_auxr : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let lsu_auxr_x : int array = let resmap = fun r -> match r with
- | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let lsu_auxr_y : int array = let resmap = fun r -> match r with
- | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let lsu_auxw : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let lsu_auxw_x : int array = let resmap = fun r -> match r with
- | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let lsu_auxw_y : int array = let resmap = fun r -> match r with
- | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let mau : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let mau_x : int array = let resmap = fun r -> match r with
- | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let mau_y : int array = let resmap = fun r -> match r with
- | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let mau_auxr : int array = let resmap = fun r -> match r with
- | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let mau_auxr_x : int array = let resmap = fun r -> match r with
- | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-let mau_auxr_y : int array = let resmap = fun r -> match r with
- | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
- in Array.of_list (List.map resmap resource_names)
-
-(** Real instructions *)
-
-exception InvalidEncoding
-
-let rec_to_usage r =
- let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i)
- | Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr)
-
- in match r.inst with
- | Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw
- | Nxorw | Andnw | Ornw ->
- (match encoding with None | Some U6 | Some S10 -> alu_tiny
- | Some U27L5 | Some U27L10 -> alu_tiny_x
- | _ -> raise InvalidEncoding)
- | Sbfxw | Sbfxd ->
- (match encoding with None -> alu_lite
- | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
- | _ -> raise InvalidEncoding)
- | Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord
- | Nxord | Andnd | Ornd ->
- (match encoding with None | Some U6 | Some S10 -> alu_tiny
- | Some U27L5 | Some U27L10 -> alu_tiny_x
- | Some E27U27L10 -> alu_tiny_y)
- |Cmoved ->
- (match encoding with None | Some U6 | Some S10 -> alu_lite
- | Some U27L5 | Some U27L10 -> alu_lite_x
- | Some E27U27L10 -> alu_lite_y)
- | Addxw ->
- (match encoding with None | Some U6 | Some S10 -> alu_lite
- | Some U27L5 | Some U27L10 -> alu_lite_x
- | _ -> raise InvalidEncoding)
- | Addxd ->
- (match encoding with None | Some U6 | Some S10 -> alu_lite
- | Some U27L5 | Some U27L10 -> alu_lite_x
- | Some E27U27L10 -> alu_lite_y)
- | Compw -> (match encoding with None -> alu_tiny
- | Some U6 | Some S10 | Some U27L5 -> alu_tiny_x
- | _ -> raise InvalidEncoding)
- | Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny
- | Some U27L5 | Some U27L10 -> alu_tiny_x
- | Some E27U27L10 -> alu_tiny_y)
- | Fcompw -> (match encoding with None -> alu_lite
- | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
- | _ -> raise InvalidEncoding)
- | Fcompd -> (match encoding with None -> alu_lite
- | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
- | _ -> raise InvalidEncoding)
- | Make -> (match encoding with Some U6 | Some S10 -> alu_tiny
- | Some U27L5 | Some U27L10 -> alu_tiny_x
- | Some E27U27L10 -> alu_tiny_y
- | _ -> raise InvalidEncoding)
- | Maddw | Msbfw -> (match encoding with None -> mau_auxr
- | Some U6 | Some S10 | Some U27L5 -> mau_auxr_x
- | _ -> raise InvalidEncoding)
- | Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr
- | Some U27L5 | Some U27L10 -> mau_auxr_x
- | Some E27U27L10 -> mau_auxr_y)
- | Mulw -> (match encoding with None -> mau
- | Some U6 | Some S10 | Some U27L5 -> mau_x
- | _ -> raise InvalidEncoding)
- | Muld -> (match encoding with None | Some U6 | Some S10 -> mau
- | Some U27L5 | Some U27L10 -> mau_x
- | Some E27U27L10 -> mau_y)
- | Nop -> alu_nop
- | Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding)
- (* TODO: check *)
- | Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)
- | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)
- | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau
- | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo ->
- (match encoding with None | Some U6 | Some S10 -> lsu_auxw
- | Some U27L5 | Some U27L10 -> lsu_auxw_x
- | Some E27U27L10 -> lsu_auxw_y)
- | Sb | Sh | Sw | Sd | Sq | So ->
- (match encoding with None | Some U6 | Some S10 -> lsu_auxr
- | Some U27L5 | Some U27L10 -> lsu_auxr_x
- | Some E27U27L10 -> lsu_auxr_y)
- | Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu
- | Get -> bcu_tiny_tiny_mau_xnop
- | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd
- | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite
- | Fnarrowdw -> alu_full
- | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
- | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau
-
-
-let inst_info_to_dlatency i =
- begin
- assert (not (i.reads_at_id && i.reads_at_e1));
- match i.reads_at_id with
- | true -> +1
- | false -> (match i.reads_at_e1 with
- | true -> -1
- | false -> 0)
- end
-
-let real_inst_to_latency = function
- | Nop -> 0 (* Only goes through ID *)
- | Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw
- (* TODO check rorw *)
- | Rorw | Nandw | Norw | Nxorw | Ornw | Andnw
- | Nandd | Nord | Nxord | Ornd | Andnd
- | Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make
- | Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd
- | Fmind | Fmaxd | Fminw | Fmaxw
- -> 1
- | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4
- | Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)
- | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3
- | Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)
- | Get -> 1
- | Set -> 4 (* According to the manual should be 3, but I measured 4 *)
- | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)
- | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
- | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
- | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4
-
-let rec empty_inter la = function
- | [] -> true
- | b::lb -> if (List.mem b la) then false else empty_inter la lb
-
-let rec_to_info r : inst_info =
- let usage = rec_to_usage r
- and latency = real_inst_to_latency r.inst
- and reads_at_id = not (empty_inter r.read_locs r.read_at_id)
- and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)
- in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;
- reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }
-
-let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
-
-let instruction_usages bb =
- let usages = List.map (fun info -> info.usage) (instruction_infos bb)
- in Array.of_list usages
+(*type inst_info = {*)
+ (*write_locs : location list;*)
+ (*read_locs : location list;*)
+ (*reads_at_id : bool;*)
+ (*reads_at_e1 : bool;*)
+ (*is_control : bool;*)
+ (*usage: int array; (* resources consumed by the instruction *)*)
+ (*latency: int;*)
+(*}*)
+
+(*(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *)*)
+(*type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10*)
+
+(*let rec pow a = function*)
+ (*| 0 -> Int64.one*)
+ (*| 1 -> Int64.of_int a*)
+ (*| n -> let b = pow a (n/2) in*)
+ (*Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a))*)
+
+(*let signed_interval n : (int64 * int64) = begin *)
+ (*assert (n > 0);*)
+ (*let min = Int64.neg @@ pow 2 (n-1)*)
+ (*and max = Int64.sub (pow 2 (n-1)) Int64.one*)
+ (*in (min, max)*)
+(*end*)
+
+(*let within i interv = match interv with (min, max) -> (i >= min && i <= max)*)
+
+(*let signed_length (i:int64) =*)
+ (*let rec f (i:int64) n = *)
+ (*let interv = signed_interval n*)
+ (*in if (within i interv) then n else f i (n+1)*)
+ (*in f i 1*)
+
+(*let unsigned_length (i:int64) = (signed_length i) - 1*)
+
+(*let encode_imm (imm:int64) =*)
+ (*if (Int64.compare imm Int64.zero < 0) then*)
+ (*let length = signed_length imm*)
+ (*in if length <= 10 then S10*)
+ (*else if length <= 32 then U27L5*)
+ (*else if length <= 37 then U27L10*)
+ (*else if length <= 64 then E27U27L10*)
+ (*else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm*)
+ (*else*)
+ (*let length = unsigned_length imm*)
+ (*in if length <= 6 then U6*)
+ (*else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *)*)
+ (*else if length <= 32 then U27L5*)
+ (*else if length <= 37 then U27L10*)
+ (*else if length <= 64 then E27U27L10*)
+ (*else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm*)
+
+(*(** Resources *)*)
+(*type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop*)
+
+(*let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]*)
+
+(*let rec find_index elt l =*)
+ (*match l with*)
+ (*| [] -> raise Not_found*)
+ (*| e::l -> if (e == elt) then 0*)
+ (*else 1 + find_index elt l*)
+
+(*let resource_id resource : int = find_index resource resource_names*)
+
+(*let resource_bound resource : int =*)
+ (*match resource with*)
+ (*| Rissue -> 8*)
+ (*| Rtiny -> 4*)
+ (*| Rlite -> 2*)
+ (*| Rfull -> 1*)
+ (*| Rlsu -> 1*)
+ (*| Rmau -> 1*)
+ (*| Rbcu -> 1*)
+ (*| Rtca -> 1*)
+ (*| Rauxr -> 1*)
+ (*| Rauxw -> 1*)
+ (*| Rcrrp -> 1*)
+ (*| Rcrwl -> 1*)
+ (*| Rcrwh -> 1*)
+ (*| Rnop -> 4*)
+
+(*let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)*)
+
+(*(** Reservation tables *)*)
+(*let alu_full : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_lite : int array = let resmap = fun r -> match r with *)
+ (*| Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_lite_x : int array = let resmap = fun r -> match r with *)
+ (*| Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_lite_y : int array = let resmap = fun r -> match r with *)
+ (*| Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_nop : int array = let resmap = fun r -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_tiny : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 1 | Rtiny -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_tiny_x : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 2 | Rtiny -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let alu_tiny_y : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 3 | Rtiny -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let bcu : int array = let resmap = fun r -> match r with *)
+ (*| Rissue -> 1 | Rbcu -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with *)
+ (*| Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let lsu_auxr : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let lsu_auxr_x : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let lsu_auxr_y : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let lsu_auxw : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let lsu_auxw_x : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let lsu_auxw_y : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let mau : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let mau_x : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let mau_y : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let mau_auxr : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let mau_auxr_x : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*let mau_auxr_y : int array = let resmap = fun r -> match r with*)
+ (*| Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*)
+ (*in Array.of_list (List.map resmap resource_names)*)
+
+(*(** Real instructions *)*)
+
+(*exception InvalidEncoding*)
+
+(*let rec_to_usage r =*)
+ (*let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i)*)
+ (*| Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr)*)
+
+ (*in match r.inst with*)
+ (*| Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw*)
+ (*| Nxorw | Andnw | Ornw -> *)
+ (*(match encoding with None | Some U6 | Some S10 -> alu_tiny *)
+ (*| Some U27L5 | Some U27L10 -> alu_tiny_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Sbfxw | Sbfxd ->*)
+ (*(match encoding with None -> alu_lite*)
+ (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord*)
+ (*| Nxord | Andnd | Ornd ->*)
+ (*(match encoding with None | Some U6 | Some S10 -> alu_tiny *)
+ (*| Some U27L5 | Some U27L10 -> alu_tiny_x*)
+ (*| Some E27U27L10 -> alu_tiny_y)*)
+ (*|Cmoved ->*)
+ (*(match encoding with None | Some U6 | Some S10 -> alu_lite*)
+ (*| Some U27L5 | Some U27L10 -> alu_lite_x*)
+ (*| Some E27U27L10 -> alu_lite_y)*)
+ (*| Addxw -> *)
+ (*(match encoding with None | Some U6 | Some S10 -> alu_lite *)
+ (*| Some U27L5 | Some U27L10 -> alu_lite_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Addxd -> *)
+ (*(match encoding with None | Some U6 | Some S10 -> alu_lite *)
+ (*| Some U27L5 | Some U27L10 -> alu_lite_x*)
+ (*| Some E27U27L10 -> alu_lite_y)*)
+ (*| Compw -> (match encoding with None -> alu_tiny*)
+ (*| Some U6 | Some S10 | Some U27L5 -> alu_tiny_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny*)
+ (*| Some U27L5 | Some U27L10 -> alu_tiny_x*)
+ (*| Some E27U27L10 -> alu_tiny_y)*)
+ (*| Fcompw -> (match encoding with None -> alu_lite*)
+ (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Fcompd -> (match encoding with None -> alu_lite*)
+ (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Make -> (match encoding with Some U6 | Some S10 -> alu_tiny *)
+ (*| Some U27L5 | Some U27L10 -> alu_tiny_x *)
+ (*| Some E27U27L10 -> alu_tiny_y *)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Maddw | Msbfw -> (match encoding with None -> mau_auxr*)
+ (*| Some U6 | Some S10 | Some U27L5 -> mau_auxr_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr*)
+ (*| Some U27L5 | Some U27L10 -> mau_auxr_x*)
+ (*| Some E27U27L10 -> mau_auxr_y)*)
+ (*| Mulw -> (match encoding with None -> mau*)
+ (*| Some U6 | Some S10 | Some U27L5 -> mau_x*)
+ (*| _ -> raise InvalidEncoding)*)
+ (*| Muld -> (match encoding with None | Some U6 | Some S10 -> mau*)
+ (*| Some U27L5 | Some U27L10 -> mau_x*)
+ (*| Some E27U27L10 -> mau_y)*)
+ (*| Nop -> alu_nop*)
+ (*| Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding)*)
+ (*(* TODO: check *)*)
+ (*| Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)*)
+ (*| Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)*)
+ (*| Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau*)
+ (*| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> *)
+ (*(match encoding with None | Some U6 | Some S10 -> lsu_auxw*)
+ (*| Some U27L5 | Some U27L10 -> lsu_auxw_x*)
+ (*| Some E27U27L10 -> lsu_auxw_y)*)
+ (*| Sb | Sh | Sw | Sd | Sq | So ->*)
+ (*(match encoding with None | Some U6 | Some S10 -> lsu_auxr*)
+ (*| Some U27L5 | Some U27L10 -> lsu_auxr_x*)
+ (*| Some E27U27L10 -> lsu_auxr_y)*)
+ (*| Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu*)
+ (*| Get -> bcu_tiny_tiny_mau_xnop*)
+ (*| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd*)
+ (*| Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite*)
+ (*| Fnarrowdw -> alu_full*)
+ (*| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw*)
+ (*| Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau*)
+
+
+(*let inst_info_to_dlatency i = *)
+ (*begin*)
+ (*assert (not (i.reads_at_id && i.reads_at_e1));*)
+ (*match i.reads_at_id with*)
+ (*| true -> +1*)
+ (*| false -> (match i.reads_at_e1 with*)
+ (*| true -> -1*)
+ (*| false -> 0)*)
+ (*end*)
+
+(*let real_inst_to_latency = function*)
+ (*| Nop -> 0 (* Only goes through ID *)*)
+ (*| Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw*)
+ (*(* TODO check rorw *)*)
+ (*| Rorw | Nandw | Norw | Nxorw | Ornw | Andnw*)
+ (*| Nandd | Nord | Nxord | Ornd | Andnd*)
+ (*| Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make*)
+ (*| Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd*)
+ (*| Fmind | Fmaxd | Fminw | Fmaxw*)
+ (*-> 1*)
+ (*| Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4*)
+ (*| Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)*)
+ (*| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3*)
+ (*| Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)*)
+ (*| Get -> 1*)
+ (*| Set -> 4 (* According to the manual should be 3, but I measured 4 *)*)
+ (*| Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)*)
+ (*| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1*)
+ (*| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw*)
+ (*| Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4*)
+
+(*let rec empty_inter la = function*)
+ (*| [] -> true*)
+ (*| b::lb -> if (List.mem b la) then false else empty_inter la lb*)
+
+(*let rec_to_info r : inst_info =*)
+ (*let usage = rec_to_usage r*)
+ (*and latency = real_inst_to_latency r.inst*)
+ (*and reads_at_id = not (empty_inter r.read_locs r.read_at_id)*)
+ (*and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)*)
+ (*in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;*)
+ (*reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }*)
+
+(*let instruction_infos bb = List.map rec_to_info (instruction_recs bb)*)
+
+(*let instruction_usages bb =*)
+ (*let usages = List.map (fun info -> info.usage) (instruction_infos bb)*)
+ (*in Array.of_list usages*)
(**
* Latency constraints building
*)
-(* type access = { inst: int; loc: location } *)
-
-let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr
-
-let loc2int = function
- | Mem -> 1
- | Reg pr -> preg2int pr
-
-(* module HashedLoc = struct
- type t = { loc: location; key: int }
- let equal l1 l2 = (l1.key = l2.key)
- let hash l = l.key
- let create (l:location) : t = { loc=l; key = loc2int l }
-end *)
-
-(* module LocHash = Hashtbl.Make(HashedLoc) *)
-module LocHash = Hashtbl
-
-(* Hash table : location => list of instruction ids *)
-
-let rec intlist n =
- if n < 0 then failwith "intlist: n < 0"
- else if n = 0 then []
- else (n-1) :: (intlist (n-1))
-
-let find_in_hash hashloc loc =
- match LocHash.find_opt hashloc loc with
- | Some idl -> idl
- | None -> []
-
-(* Returns a list of instruction ids *)
-let rec get_accesses hashloc (ll: location list) = match ll with
- | [] -> []
- | loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)
-
-let compute_latency (ifrom: inst_info) (ito: inst_info) =
- let dlat = inst_info_to_dlatency ito
- in let lat = ifrom.latency + dlat
- in assert (lat >= 0); if (lat == 0) then 1 else lat
-
-let latency_constraints bb =
- let written = LocHash.create 70
- and read = LocHash.create 70
- and count = ref 0
- and constraints = ref []
- and instr_infos = instruction_infos bb
- in let step (i: inst_info) =
- let raw = get_accesses written i.read_locs
- and waw = get_accesses written i.write_locs
- and war = get_accesses read i.write_locs
- in begin
- List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
- latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;
- List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
- latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;
- List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;
- if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);
- (* Updating "read" and "written" hashmaps *)
- List.iter (fun loc ->
- begin
- LocHash.replace written loc [!count];
- LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *)
- end) i.write_locs;
- List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs;
- count := !count + 1
- end
- in (List.iter step instr_infos; !constraints)
+(*(* type access = { inst: int; loc: location } *)*)
+
+(*let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr*)
+
+(*let loc2int = function*)
+ (*| Mem -> 1*)
+ (*| Reg pr -> preg2int pr*)
+
+(*(* module HashedLoc = struct*)
+ (*type t = { loc: location; key: int }*)
+ (*let equal l1 l2 = (l1.key = l2.key)*)
+ (*let hash l = l.key*)
+ (*let create (l:location) : t = { loc=l; key = loc2int l }*)
+(*end *)*)
+
+(*(* module LocHash = Hashtbl.Make(HashedLoc) *)*)
+(*module LocHash = Hashtbl*)
+
+(*(* Hash table : location => list of instruction ids *)*)
+
+(*let rec intlist n =*)
+ (*if n < 0 then failwith "intlist: n < 0"*)
+ (*else if n = 0 then []*)
+ (*else (n-1) :: (intlist (n-1))*)
+
+(*let find_in_hash hashloc loc =*)
+ (*match LocHash.find_opt hashloc loc with*)
+ (*| Some idl -> idl*)
+ (*| None -> []*)
+
+(*(* Returns a list of instruction ids *)*)
+(*let rec get_accesses hashloc (ll: location list) = match ll with*)
+ (*| [] -> []*)
+ (*| loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)*)
+
+(*let compute_latency (ifrom: inst_info) (ito: inst_info) =*)
+ (*let dlat = inst_info_to_dlatency ito*)
+ (*in let lat = ifrom.latency + dlat*)
+ (*in assert (lat >= 0); if (lat == 0) then 1 else lat*)
+
+(*let latency_constraints bb =*)
+ (*let written = LocHash.create 70*)
+ (*and read = LocHash.create 70*)
+ (*and count = ref 0*)
+ (*and constraints = ref []*)
+ (*and instr_infos = instruction_infos bb*)
+ (*in let step (i: inst_info) =*)
+ (*let raw = get_accesses written i.read_locs*)
+ (*and waw = get_accesses written i.write_locs*)
+ (*and war = get_accesses read i.write_locs*)
+ (*in begin*)
+ (*List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;*)
+ (*latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;*)
+ (*List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;*)
+ (*latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;*)
+ (*List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;*)
+ (*if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);*)
+ (*(* Updating "read" and "written" hashmaps *)*)
+ (*List.iter (fun loc ->*)
+ (*begin *)
+ (*LocHash.replace written loc [!count];*)
+ (*LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *)*)
+ (*end) i.write_locs;*)
+ (*List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs;*)
+ (*count := !count + 1*)
+ (*end*)
+ (*in (List.iter step instr_infos; !constraints)*)
(**
* Using the InstructionScheduler
*)
-let build_problem bb =
- { max_latency = -1; resource_bounds = resource_bounds;
- instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb }
+(* TODO RESUME HERE
+(*let build_problem bb = *)
+ (*{ max_latency = -1; resource_bounds = resource_bounds;*)
+ (*instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb }*)
let rec find_min_opt (l: int option list) =
match l with
@@ -810,7 +729,7 @@ let rec filter_indexes predicate = function
let get_from_indexes indexes l = List.map (List.nth l) indexes
let is_basic = function PBasic _ -> true | _ -> false
-let is_control = function PControl _ -> true | _ -> false
+let is_control = function PCtlFlow _ -> true | PExpand _ -> true | _ -> false
let to_basic = function PBasic i -> i | _ -> failwith "to_basic: control instruction found"
let to_control = function PControl i -> i | _ -> failwith "to_control: basic instruction found"
@@ -818,13 +737,10 @@ let bundlize li hd =
let last = List.nth li (List.length li - 1)
in if is_control last then
let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1)
- in let bli = List.map to_basic cut_li
- in { header = hd; body = bli; exit = Some (to_control last) }
+ in { header = hd; body = cut_li; exit = Some (to_control last) }
else
- let bli = List.map to_basic li
- in { header = hd; body = bli; exit = None }
+ { header = hd; body = li; exit = None }
-let apply_pbasic b = PBasic b
let extract_some o = match o with Some e -> e | None -> failwith "extract_some: None found"
let rec find_min = function
@@ -891,7 +807,7 @@ let minpack_list (l: int list) =
in List.map (fun m -> find_all_indices m l) mins
*)
-let bb_to_instrs bb = (List.map apply_pbasic bb.body) @ (match bb.exit with None -> [] | Some e -> [PControl e])
+let bb_to_instrs bb = bb.body @ (match bb.exit with None -> [] | Some e -> [Some e])
let bundlize_solution bb sol =
let tmp = (Array.to_list @@ Array.sub sol 0 (Array.length sol - 1))
@@ -901,69 +817,54 @@ let bundlize_solution bb sol =
| [] -> []
| pack :: packs -> bundlize (get_from_indexes pack instrs) hd :: (bund [] packs)
in bund bb.header packs
-
-let print_inst oc = function
- | Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n"
- | Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n"
- | Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n"
- | Asm.Pcvtl2w(rd, rs) -> fprintf oc " Pcvtl2w %a = %a\n" ireg rd ireg rs
- | i -> print_instruction oc i
-
-let print_bb oc bb =
- let asm_instructions = Asm.unfold_bblock bb
- in List.iter (print_inst oc) asm_instructions
-
-let print_schedule sched =
- print_string "[ ";
- Array.iter (fun x -> Printf.printf "%d; " x) sched;
- print_endline "]";;
-
-let do_schedule bb =
- let problem = build_problem bb in
- (if debug then print_problem stdout problem);
- let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem
- in match solution with
- | None -> failwith "Could not find a valid schedule"
- | Some sol ->
- ((if debug then print_schedule sol);
- let bundles = bundlize_solution bb sol in
- (if debug then
- begin
- Printf.eprintf "Scheduling the following group of instructions:\n";
- print_bb stderr bb;
- Printf.eprintf "Gave the following solution:\n";
- List.iter (print_bb stderr) bundles;
- Printf.eprintf "--------------------------------\n"
- end;
- bundles))
+*)
+(*let print_inst oc = function*)
+ (*| Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n"*)
+ (*| Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n"*)
+ (*| Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n"*)
+ (*| i -> print_instruction oc i*)
+
+(*let print_bb oc bb =*)
+ (*let asm_instructions = Asm.unfold_bblock bb*)
+ (*in List.iter (print_inst oc) asm_instructions*)
+
+(*let print_schedule sched =*)
+ (*print_string "[ ";*)
+ (*Array.iter (fun x -> Printf.printf "%d; " x) sched;*)
+ (*print_endline "]";;*)
+
+(*let do_schedule bb =*)
+ (*let problem = build_problem bb in*)
+ (*(if debug then print_problem stdout problem);*)
+ (*let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem*)
+ (*in match solution with*)
+ (*| None -> failwith "Could not find a valid schedule"*)
+ (*| Some sol ->*)
+ (*((if debug then print_schedule sol);*)
+ (*let bundles = bundlize_solution bb sol in *)
+ (*(if debug then*)
+ (*begin*)
+ (*Printf.eprintf "Scheduling the following group of instructions:\n";*)
+ (*print_bb stderr bb;*)
+ (*Printf.eprintf "Gave the following solution:\n";*)
+ (*List.iter (print_bb stderr) bundles;*)
+ (*Printf.eprintf "--------------------------------\n"*)
+ (*end;*)
+ (*bundles))*)
(**
* Dumb schedule if the above doesn't work
*)
-let bundlize_label l =
- match l with
- | [] -> []
- | l -> [{ header = l; body = []; exit = None }]
-
-let rec bundlize_basic l =
- match l with
- | [] -> []
- | b :: l -> { header = []; body = [b]; exit = None } :: bundlize_basic l
-
-let bundlize_exit e =
- match e with
- | Some e -> [{ header = []; body = []; exit = Some e }]
- | None -> []
-
-let dumb_schedule (bb : bblock) : bblock list = bundlize_label bb.header @ bundlize_basic bb.body @ bundlize_exit bb.exit
+(* Identity scheduling *)
+let dumb_schedule (bb : bblock) = (bb.body, bb.exit)
(**
* Separates the opaque instructions such as Pfreeframe and Pallocframe
*)
let is_opaque = function
- | PBasic (Pallocframe _) | PBasic (Pfreeframe _) | PControl (PExpand (Pbuiltin _)) -> true
+ | Pallocframe _ | Pfreeframe _ -> true
| _ -> false
(* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *)
@@ -972,58 +873,52 @@ let rec biggest_wo_opaque = function
| i :: li -> if is_opaque i then ([], li, Some i)
else let big, rem, opaque = biggest_wo_opaque li in (i :: big, rem, opaque);;
-let separate_opaque bb =
- let instrs = bb_to_instrs bb
- in let rec f hd li =
- match li with
- | [] -> []
- | li -> let big, rem, opaque = biggest_wo_opaque li in
- match opaque with
- | Some i ->
- (match big with
- | [] -> (bundlize [i] hd) :: (f [] rem)
- | big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem)
- )
- | None -> (bundlize big hd) :: (f [] rem)
- in f bb.header instrs
-
-let smart_schedule bb =
- let lbb = separate_opaque bb
- in let rec f = function
- | [] -> []
- | bb :: lbb ->
- let bundles =
- try do_schedule bb
- with OpaqueInstruction -> dumb_schedule bb
- | e ->
- let msg = Printexc.to_string e
- and stack = Printexc.get_backtrace ()
- in begin
- Printf.eprintf "In regards to this group of instructions:\n";
- print_bb stderr bb;
- Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack;
- failwith "Invalid schedule"
- (*
- Printf.eprintf "Issuing one instruction per bundle instead\n\n";
- dumb_schedule bb
- *)
- end
- in bundles @ (f lbb)
- in f lbb
-
-let bblock_to_bundles bb =
- if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);
+(*let separate_opaque bb =*)
+ (*let instrs = bb_to_instrs bb*)
+ (*in let rec f hd li =*)
+ (*match li with*)
+ (*| [] -> []*)
+ (*| li -> let big, rem, opaque = biggest_wo_opaque li in*)
+ (*match opaque with*)
+ (*| Some i ->*)
+ (*(match big with*)
+ (*| [] -> (bundlize [i] hd) :: (f [] rem)*)
+ (*| big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem)*)
+ (**)
+ (*| None -> (bundlize big hd) :: (f [] rem)*)
+ (*in f bb.header instrs*)
+
+(*let smart_schedule bb =*)
+ (*let lbb = separate_opaque bb*)
+ (*in let rec f = function*)
+ (*| [] -> []*)
+ (*| bb :: lbb -> *)
+ (*let bundles =*)
+ (*try do_schedule bb*)
+ (*with OpaqueInstruction -> dumb_schedule bb*)
+ (*| e -> *)
+ (*let msg = Printexc.to_string e*)
+ (*and stack = Printexc.get_backtrace ()*)
+ (*in begin*)
+ (*Printf.eprintf "In regards to this group of instructions:\n";*)
+ (*print_bb stderr bb;*)
+ (*Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack;*)
+ (*failwith "Invalid schedule"*)
+ (*(**)
+ (*Printf.eprintf "Issuing one instruction per bundle instead\n\n";*)
+ (*dumb_schedule bb*)
+ (**)*)
+ (*end*)
+ (*in bundles @ (f lbb)*)
+ (*in f lbb*)
+
+let bblock_schedule bb =
+ (*if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);*)
(* print_problem (build_problem bb); *)
- if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb
-
-(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *)
-
-let rec bundles_to_coq_schedule = function
- | [] -> ([], None)
- | bb :: [] -> ([bb.body], bb.exit)
- | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc)
+ (*TODO if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb*)
+ dumb_schedule bb
(** Called schedule function from Coq *)
-let schedule_notime bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto
-let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) schedule_notime bb
+(*let schedule_notime bb = let toto = bblock_schedule in toto*)
+let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) bblock_schedule bb
diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v
index 3d08232b..86a160bd 100644
--- a/aarch64/PostpassSchedulingproof.v
+++ b/aarch64/PostpassSchedulingproof.v
@@ -23,8 +23,7 @@ Require Import Axioms.
Local Open Scope error_monad_scope.
-(*
-Definition match_prog (p tp: Asmvliw.program) :=
+Definition match_prog (p tp: Asmblock.program) :=
match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
Lemma transf_program_match:
@@ -33,6 +32,7 @@ Proof.
intros. eapply match_transform_partial_program; eauto.
Qed.
+(*
Lemma regset_double_set_id:
forall r (rs: regset) v1 v2,
(rs # r <- v1 # r <- v2) = (rs # r <- v2).