diff options
-rw-r--r-- | aarch64/Asmgen.v | 4 | ||||
-rw-r--r-- | aarch64/Asmgenproof.v | 18 | ||||
-rw-r--r-- | aarch64/InstructionScheduler.ml | 1263 | ||||
-rw-r--r-- | aarch64/InstructionScheduler.mli | 113 | ||||
-rw-r--r-- | aarch64/OpWeightsAsm.ml | 212 | ||||
-rw-r--r-- | aarch64/PostpassSchedulingOracle.ml | 1611 | ||||
-rw-r--r-- | aarch64/PostpassSchedulingproof.v | 4 |
7 files changed, 2356 insertions, 869 deletions
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index c25454e0..bd6b55cc 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -360,5 +360,5 @@ Definition transf_program (p: Mach.program) : res Asm.program := let mbp := Machblockgen.transf_program p in do mbp' <- PseudoAsmblockproof.transf_program mbp; do abp <- Asmblockgen.transf_program mbp'; - (*do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;*) - Asmblock_TRANSF.transf_program abp. + do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp; + Asmblock_TRANSF.transf_program abp'. diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index 62b65a14..d062654b 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -18,7 +18,7 @@ Require Import Coqlib Errors. Require Import Integers Floats AST Linking. Require Import Values Memory Events Globalenvs Smallstep. -Require Import Op Locations Machblock Conventions PseudoAsmblock Asm Asmblock. +Require Import Op Locations Machblock Conventions PseudoAsmblock PseudoAsmblockproof Asm Asmblock. Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof. Require Import Asmgen. Require Import Axioms. @@ -2321,7 +2321,7 @@ Definition block_passes := mkpass Machblockgenproof.match_prog ::: mkpass PseudoAsmblockproof.match_prog ::: mkpass Asmblockgenproof.match_prog - (*::: mkpass PostpassSchedulingproof.match_prog*) + ::: mkpass PostpassSchedulingproof.match_prog ::: mkpass Asmblock_PRESERVATION.match_prog ::: pass_nil _. @@ -2333,11 +2333,13 @@ Proof. intros p tp H. unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H. inversion_clear H. apply bind_inversion in H1. destruct H1. - inversion_clear H. inversion H2. remember (Machblockgen.transf_program p) as mbp. + inversion_clear H. apply bind_inversion in H2. destruct H2. inversion_clear H. + unfold Compopts.time in *. remember (Machblockgen.transf_program p) as mbp. unfold match_prog; simpl. exists mbp; split. apply Machblockgenproof.transf_program_match; auto. exists x; split. apply PseudoAsmblockproof.transf_program_match; auto. exists x0; split. apply Asmblockgenproof.transf_program_match; auto. + exists x1; split. apply PostpassSchedulingproof.transf_program_match; auto. exists tp; split. apply Asmblock_PRESERVATION.transf_program_match; auto. auto. Qed. @@ -2364,19 +2366,21 @@ Let tge := Genv.globalenv tprog. Theorem transf_program_correct: forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog). Proof. +Admitted. +(* TODO unfold match_prog in TRANSF. simpl in TRANSF. - inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H. + inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H. inv H4. inv H. eapply compose_forward_simulations. { exploit Machblockgenproof.transf_program_correct; eauto. } eapply compose_forward_simulations. + apply PseudoAsmblockproof.transf_program_correct; eauto. - intros; apply Asmblockgenproof.next_progress. - - intros; eapply Asmblockgenproof.functions_bound_max_pos; eauto. - { intros; eapply Asmblock_PRESERVATION.symbol_high_low; eauto. } + - intros. eapply .functions_bound_max_pos; eauto. + { intros. eapply Asmblock_PRESERVATION.symbol_high_low. eauto. } + eapply compose_forward_simulations. apply Asmblockgenproof.transf_program_correct; eauto. { intros; eapply Asmblock_PRESERVATION.symbol_high_low; eauto. } apply Asmblock_PRESERVATION.transf_program_correct. eauto. -Qed. +Qed.*) End PRESERVATION. diff --git a/aarch64/InstructionScheduler.ml b/aarch64/InstructionScheduler.ml new file mode 100644 index 00000000..eab0b21a --- /dev/null +++ b/aarch64/InstructionScheduler.ml @@ -0,0 +1,1263 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +let with_destructor dtor stuff f = + try let ret = f stuff in + dtor stuff; + ret + with exn -> dtor stuff; + raise exn;; + +let with_out_channel chan f = with_destructor close_out chan f;; +let with_in_channel chan f = with_destructor close_in chan f;; + +(** Schedule instructions on a synchronized pipeline +@author David Monniaux, CNRS, VERIMAG *) + +type latency_constraint = { + instr_from : int; + instr_to : int; + latency : int };; + +type problem = { + max_latency : int; + resource_bounds : int array; + instruction_usages : int array array; + latency_constraints : latency_constraint list; + };; + +let print_problem channel problem = + (if problem.max_latency >= 0 + then Printf.fprintf channel "max makespan: %d\n" problem.max_latency); + output_string channel "resource bounds:"; + (Array.iter (fun b -> Printf.fprintf channel " %d" b) problem.resource_bounds); + output_string channel ";\n"; + (Array.iteri (fun i v -> + Printf.fprintf channel "instr%d:" i; + (Array.iter (fun b -> Printf.fprintf channel " %d" b) v); + output_string channel ";\n") problem.instruction_usages); + List.iter (fun instr -> + Printf.printf "t%d - t%d >= %d;\n" + instr.instr_to instr.instr_from instr.latency) + problem.latency_constraints;; + +let get_nr_instructions problem = Array.length problem.instruction_usages;; +let get_nr_resources problem = Array.length problem.resource_bounds;; + +type solution = int array +type scheduler = problem -> solution option + +(* DISABLED +(** Schedule the problem optimally by constraint solving using the Gecode solver. *) +external gecode_scheduler : problem -> solution option = + "caml_gecode_schedule_instr";; + *) + +let maximum_slot_used times = + let maxi = ref (-1) in + for i=0 to (Array.length times)-2 + do + maxi := max !maxi times.(i) + done; + !maxi;; + +let check_schedule (problem : problem) (times : solution) = + let nr_instructions = get_nr_instructions problem in + (if Array.length times <> nr_instructions+1 + then failwith + (Printf.sprintf "check_schedule: %d times expected, got %d" + (nr_instructions+1) (Array.length times))); + (if problem.max_latency >= 0 && times.(nr_instructions)> problem.max_latency + then failwith "check_schedule: max_latency exceeded"); + (Array.iteri (fun i time -> + (if time < 0 + then failwith (Printf.sprintf "time[%d] < 0" i))) times); + let slot_resources = Array.init ((maximum_slot_used times)+1) + (fun _ -> Array.copy problem.resource_bounds) in + for i=0 to nr_instructions -1 + do + let remaining_resources = slot_resources.(times.(i)) + and used_resources = problem.instruction_usages.(i) in + for resource=0 to (Array.length used_resources)-1 + do + let after = remaining_resources.(resource) - used_resources.(resource) in + (if after < 0 + then failwith (Printf.sprintf "check_schedule: instruction %d exceeds resource %d at slot %d" i resource times.(i))); + remaining_resources.(resource) <- after + done + done; + List.iter (fun ctr -> + if times.(ctr.instr_to) - times.(ctr.instr_from) < ctr.latency + then failwith (Printf.sprintf "check_schedule: time[%d]=%d - time[%d]=%d < %d" + ctr.instr_to times.(ctr.instr_to) + ctr.instr_from times.(ctr.instr_from) + ctr.latency) + ) problem.latency_constraints;; + +let bound_max_time problem = + let total = ref(Array.length problem.instruction_usages) in + List.iter (fun ctr -> total := !total + ctr.latency) problem.latency_constraints; + !total;; + +let vector_less_equal a b = + try + Array.iter2 (fun x y -> + if x>y + then raise Exit) a b; + true + with Exit -> false;; + +let vector_subtract a b = + assert ((Array.length a) = (Array.length b)); + for i=0 to (Array.length a)-1 + do + b.(i) <- b.(i) - a.(i) + done;; + +(* The version with critical path ordering is much better! *) +type list_scheduler_order = + | INSTRUCTION_ORDER + | CRITICAL_PATH_ORDER;; + +let int_max (x : int) (y : int) = + if x > y then x else y;; + +let int_min (x : int) (y : int) = + if x < y then x else y;; + +let get_predecessors problem = + let nr_instructions = get_nr_instructions problem in + let predecessors = Array.make (nr_instructions+1) [] in + List.iter (fun ctr -> + predecessors.(ctr.instr_to) <- + (ctr.instr_from, ctr.latency)::predecessors.(ctr.instr_to)) + problem.latency_constraints; + predecessors;; + +let get_successors problem = + let nr_instructions = get_nr_instructions problem in + let successors = Array.make nr_instructions [] in + List.iter (fun ctr -> + successors.(ctr.instr_from) <- + (ctr.instr_to, ctr.latency)::successors.(ctr.instr_from)) + problem.latency_constraints; + successors;; + +let critical_paths successors = + let nr_instructions = Array.length successors in + let path_lengths = Array.make nr_instructions (-1) in + let rec compute i = + if i=nr_instructions then 0 else + match path_lengths.(i) with + | -2 -> failwith "InstructionScheduler: the dependency graph has cycles" + | -1 -> path_lengths.(i) <- -2; + let x = List.fold_left + (fun cur (j, latency)-> int_max cur (latency+(compute j))) + 1 successors.(i) + in path_lengths.(i) <- x; x + | x -> x + in for i = nr_instructions-1 downto 0 + do + ignore (compute i) + done; + path_lengths;; + +let maximum_critical_path problem = + let paths = critical_paths (get_successors problem) in + Array.fold_left int_max 0 paths;; + +let get_earliest_dates predecessors = + let nr_instructions = (Array.length predecessors)-1 in + let path_lengths = Array.make (nr_instructions+1) (-1) in + let rec compute i = + match path_lengths.(i) with + | -2 -> failwith "InstructionScheduler: the dependency graph has cycles" + | -1 -> path_lengths.(i) <- -2; + let x = List.fold_left + (fun cur (j, latency)-> int_max cur (latency+(compute j))) + 0 predecessors.(i) + in path_lengths.(i) <- x; x + | x -> x + in for i = 0 to nr_instructions + do + ignore (compute i) + done; + for i = 0 to nr_instructions - 1 + do + path_lengths.(nr_instructions) <- int_max + path_lengths.(nr_instructions) (1 + path_lengths.(i)) + done; + path_lengths;; + +exception Unschedulable + +let get_latest_dates deadline successors = + let nr_instructions = Array.length successors + and path_lengths = critical_paths successors in + Array.init (nr_instructions + 1) + (fun i -> + if i < nr_instructions then + let path_length = path_lengths.(i) in + assert (path_length >= 1); + (if path_length > deadline + then raise Unschedulable); + deadline - path_length + else deadline);; + +let priority_list_scheduler (order : list_scheduler_order) + (problem : problem) : + solution option = + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + + let priorities = match order with + | INSTRUCTION_ORDER -> None + | CRITICAL_PATH_ORDER -> Some (critical_paths successors) in + + let module InstrSet = + Set.Make (struct type t=int + let compare = match priorities with + | None -> (fun x y -> x - y) + | Some p -> (fun x y -> + (match p.(y)-p.(x) with + | 0 -> x - y + | z -> z)) + end) in + + let max_time = bound_max_time problem in + let ready = Array.make max_time InstrSet.empty in + Array.iteri (fun i preds -> + if i<nr_instructions && preds=[] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and current_resources = Array.copy problem.resource_bounds + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert(!time >= 0); + !time + with Exit -> -1 + + in + let advance_time() = + begin + (if !current_time < max_time-1 + then + begin + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + ready.(!current_time) <- InstrSet.empty; + end); + incr current_time + end in + + let attempt_scheduling ready usages = + let result = ref (-1) in + try + InstrSet.iter (fun i -> + (* Printf.printf "trying scheduling %d\n" i; + pr int_vector usages.(i); + print _vector current_resources; *) + if vector_less_equal usages.(i) current_resources + then + begin + vector_subtract usages.(i) current_resources; + result := i; + raise Exit + end) ready; + -1 + with Exit -> !result in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time() + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> + begin + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ready.(to_time) <- InstrSet.add instr_to ready.(to_time)) + successors.(i); + successors.(i) <- [] + end + done; + try + let final_time = ref (-1) in + for i=0 to nr_instructions-1 + do + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i)+1 then final_time := times.(i)+1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time + then final_time := target_time + ) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + Some times + with Exit -> None;; + +let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; + +(* dummy code for placating ocaml's warnings *) +let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +type bundle = int list;; + +let rec extract_deps_to index = function + | [] -> [] + | dep :: deps -> let extracts = extract_deps_to index deps in + if (dep.instr_to == index) then + dep :: extracts + else + extracts + +exception InvalidBundle;; + +let dependency_check problem bundle index = + let index_deps = extract_deps_to index problem.latency_constraints in + List.iter (fun i -> + List.iter (fun dep -> + if (dep.instr_from == i) then raise InvalidBundle + ) index_deps + ) bundle;; + +let rec make_bundle problem resources bundle index = + let resources_copy = Array.copy resources in + let nr_instructions = get_nr_instructions problem in + if (index >= nr_instructions) then (bundle, index+1) else + let inst_usage = problem.instruction_usages.(index) in + try match vector_less_equal inst_usage resources with + | false -> raise InvalidBundle + | true -> ( + dependency_check problem bundle index; + vector_subtract problem.instruction_usages.(index) resources_copy; + make_bundle problem resources_copy (index::bundle) (index+1) + ) + with InvalidBundle -> (bundle, index);; + +let rec make_bundles problem index : bundle list = + if index >= get_nr_instructions problem then + [] + else + let (bundle, new_index) = make_bundle problem problem.resource_bounds [] index in + bundle :: (make_bundles problem new_index);; + +let bundles_to_schedule problem bundles : solution = + let nr_instructions = get_nr_instructions problem in + let schedule = Array.make (nr_instructions+1) (nr_instructions+4) in + let time = ref 0 in + List.iter (fun bundle -> + begin + List.iter (fun i -> + schedule.(i) <- !time + ) bundle; + time := !time + 1 + end + ) bundles; schedule;; + +let greedy_scheduler (problem : problem) : solution option = + let bundles = make_bundles problem 0 in + Some (bundles_to_schedule problem bundles);; + +(* alternate implementation +let swap_array_elements a i j = + let x = a.(i) in + a.(i) <- a.(j); + a.(j) <- x;; + +let array_reverse_slice a first last = + let i = ref first and j = ref last in + while i < j + do + swap_array_elements a !i !j; + incr i; + decr j + done;; + +let array_reverse a = + let a' = Array.copy a in + array_reverse_slice a' 0 ((Array.length a)-1); + a';; + *) + +(* unneeded +let array_reverse a = + let n=Array.length a in + Array.init n (fun i -> a.(n-1-i));; + *) + +let reverse_constraint nr_instructions ctr = + { instr_to = nr_instructions -ctr.instr_from; + instr_from = nr_instructions - ctr.instr_to; + latency = ctr.latency };; + +(* unneeded +let rec list_map_filter f = function + | [] -> [] + | h::t -> + (match f h with + | None -> list_map_filter f t + | Some x -> x :: (list_map_filter f t));; + *) + +let reverse_problem problem = + let nr_instructions = get_nr_instructions problem in + { + max_latency = problem.max_latency; + resource_bounds = problem.resource_bounds; + instruction_usages = Array.init (nr_instructions + 1) + (fun i -> + if i=0 + then Array.map (fun _ -> 0) problem.resource_bounds else problem.instruction_usages.(nr_instructions - i)); + latency_constraints = List.map (reverse_constraint nr_instructions) + problem.latency_constraints + };; + +let max_scheduled_time solution = + let time = ref (-1) in + for i = 0 to ((Array.length solution) - 2) + do + time := max !time solution.(i) + done; + !time;; + +(* +let recompute_makespan problem solution = + let n = (Array.length solution) - 1 and ms = ref 0 in + List.iter (fun cstr -> + if cstr.instr_to = n + then ms := max !ms (solution.(cstr.instr_from) + cstr.latency) + ) problem.latency_constraints; + !ms;; + *) + +let schedule_reversed (scheduler : problem -> solution option) + (problem : problem) = + match scheduler (reverse_problem problem) with + | None -> None + | Some solution -> + let nr_instructions = get_nr_instructions problem in + let makespan = max_scheduled_time solution in + let ret = Array.init (nr_instructions + 1) + (fun i -> makespan-solution.(nr_instructions-i)) in + ret.(nr_instructions) <- max ((max_scheduled_time ret) + 1) + (ret.(nr_instructions)); + Some ret;; + +(** Schedule the problem using a greedy list scheduling algorithm, from the end. *) +let reverse_list_scheduler = schedule_reversed list_scheduler;; + +let check_problem problem = + (if (Array.length problem.instruction_usages) < 1 + then failwith "length(problem.instruction_usages) < 1");; + +let validated_scheduler (scheduler : problem -> solution option) + (problem : problem) = + check_problem problem; + match scheduler problem with + | None -> None + | (Some solution) as ret -> check_schedule problem solution; ret;; + +let get_max_latency solution = + solution.((Array.length solution)-1);; + +let show_date_ranges problem = + let deadline = problem.max_latency in + assert(deadline >= 0); + let successors = get_successors problem + and predecessors = get_predecessors problem in + let earliest_dates : int array = get_earliest_dates predecessors + and latest_dates : int array = get_latest_dates deadline successors in + assert ((Array.length earliest_dates) = + (Array.length latest_dates)); + Array.iteri (fun i early -> + let late = latest_dates.(i) in + Printf.printf "t[%d] in %d..%d\n" i early late) + earliest_dates;; + +type pseudo_boolean_problem_type = + | SATISFIABILITY + | OPTIMIZATION;; + +type pseudo_boolean_mapper = { + mapper_pb_type : pseudo_boolean_problem_type; + mapper_nr_instructions : int; + mapper_nr_pb_variables : int; + mapper_earliest_dates : int array; + mapper_latest_dates : int array; + mapper_var_offsets : int array; + mapper_final_predecessors : (int * int) list +};; + +(* Latency constraints are: + presence of instr-to at each t <= sum of presences of instr-from at compatible times + + if reverse_encoding + presence of instr-from at each t <= sum of presences of instr-to at compatible times *) + +(* Experiments show reverse_encoding=true multiplies time by 2 in sat4j + without making hard instances easier *) +let direct_encoding = false +and reverse_encoding = false +and delta_encoding = true + +let pseudo_boolean_print_problem channel problem pb_type = + let deadline = problem.max_latency in + assert (deadline > 0); + let nr_instructions = get_nr_instructions problem + and nr_resources = get_nr_resources problem + and successors = get_successors problem + and predecessors = get_predecessors problem in + let earliest_dates = get_earliest_dates predecessors + and latest_dates = get_latest_dates deadline successors in + let var_offsets = Array.make + (match pb_type with + | OPTIMIZATION -> nr_instructions+1 + | SATISFIABILITY -> nr_instructions) 0 in + let nr_pb_variables = + (let nr = ref 0 in + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + var_offsets.(i) <- !nr; + nr := !nr + latest_dates.(i) - earliest_dates.(i) + 1 + done; + !nr) + and nr_pb_constraints = + (match pb_type with + | OPTIMIZATION -> nr_instructions+1 + | SATISFIABILITY -> nr_instructions) + + + (let count = ref 0 in + for t=0 to deadline-1 + do + for j=0 to nr_resources-1 + do + try + for i=0 to nr_instructions-1 + do + let usage = problem.instruction_usages.(i).(j) in + if t >= earliest_dates.(i) && t <= latest_dates.(i) + && usage > 0 then raise Exit + done + with Exit -> incr count + done + done; + !count) + + + (let count=ref 0 in + List.iter + (fun ctr -> + if ctr.instr_to < nr_instructions + then count := !count + 1 + latest_dates.(ctr.instr_to) + - earliest_dates.(ctr.instr_to) + + (if reverse_encoding + then 1 + latest_dates.(ctr.instr_from) + - earliest_dates.(ctr.instr_from) + else 0) + ) + problem.latency_constraints; + !count) + + + (match pb_type with + | OPTIMIZATION -> (1 + deadline - earliest_dates.(nr_instructions)) * nr_instructions + | SATISFIABILITY -> 0) + and measured_nr_constraints = ref 0 in + + let pb_var i t = + assert(t >= earliest_dates.(i)); + assert(t <= latest_dates.(i)); + let v = 1+var_offsets.(i)+t-earliest_dates.(i) in + assert(v <= nr_pb_variables); + Printf.sprintf "x%d" v in + + let end_constraint () = + begin + output_string channel ";\n"; + incr measured_nr_constraints + end in + + let gen_latency_constraint i_to i_from latency t_to = + Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n" + i_to i_from latency i_to t_to; + for t_from=earliest_dates.(i_from) to + int_min latest_dates.(i_from) (t_to - latency) + do + Printf.fprintf channel "+1 %s " (pb_var i_from t_from) + done; + Printf.fprintf channel "-1 %s " (pb_var i_to t_to); + Printf.fprintf channel ">= 0"; + end_constraint() + + and gen_dual_latency_constraint i_to i_from latency t_from = + Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n" + i_to i_from latency i_to t_from; + for t_to=int_max earliest_dates.(i_to) (t_from + latency) + to latest_dates.(i_to) + do + Printf.fprintf channel "+1 %s " (pb_var i_to t_to) + done; + Printf.fprintf channel "-1 %s " (pb_var i_from t_from); + Printf.fprintf channel ">= 0"; + end_constraint() + in + + Printf.fprintf channel "* #variable= %d #constraint= %d\n" nr_pb_variables nr_pb_constraints; + Printf.fprintf channel "* nr_instructions=%d deadline=%d\n" nr_instructions deadline; + begin + match pb_type with + | SATISFIABILITY -> () + | OPTIMIZATION -> + output_string channel "min:"; + for t=earliest_dates.(nr_instructions) to deadline + do + Printf.fprintf channel " %+d %s" t (pb_var nr_instructions t) + done; + output_string channel ";\n"; + end; + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + let early = earliest_dates.(i) and late= latest_dates.(i) in + Printf.fprintf channel "* t[%d] in %d..%d\n" i early late; + for t=early to late + do + Printf.fprintf channel "+1 %s " (pb_var i t) + done; + Printf.fprintf channel "= 1"; + end_constraint() + done; + + for t=0 to deadline-1 + do + for j=0 to nr_resources-1 + do + let bound = problem.resource_bounds.(j) + and coeffs = ref [] in + for i=0 to nr_instructions-1 + do + let usage = problem.instruction_usages.(i).(j) in + if t >= earliest_dates.(i) && t <= latest_dates.(i) + && usage > 0 + then coeffs := (i, usage) :: !coeffs + done; + if !coeffs <> [] then + begin + Printf.fprintf channel "* resource #%d at t=%d <= %d\n" j t bound; + List.iter (fun (i, usage) -> + Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs; + Printf.fprintf channel ">= %d" (-bound); + end_constraint(); + end + done + done; + + List.iter + (fun ctr -> + if ctr.instr_to < nr_instructions then + begin + for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to) + do + gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to + done; + if reverse_encoding + then + for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from) + do + gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from + done + end + ) problem.latency_constraints; + + begin + match pb_type with + | SATISFIABILITY -> () + | OPTIMIZATION -> + let final_latencies = Array.make nr_instructions 1 in + List.iter (fun (i, latency) -> + final_latencies.(i) <- int_max final_latencies.(i) latency) + predecessors.(nr_instructions); + for t_to=earliest_dates.(nr_instructions) to deadline + do + for i_from = 0 to nr_instructions -1 + do + gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to + done + done + end; + assert (!measured_nr_constraints = nr_pb_constraints); + { + mapper_pb_type = pb_type; + mapper_nr_instructions = nr_instructions; + mapper_nr_pb_variables = nr_pb_variables; + mapper_earliest_dates = earliest_dates; + mapper_latest_dates = latest_dates; + mapper_var_offsets = var_offsets; + mapper_final_predecessors = predecessors.(nr_instructions) + };; + +type pb_answer = + | Positive + | Negative + | Unknown + +let line_to_pb_solution sol line nr_pb_variables = + let assign s v = + begin + let i = int_of_string s in + sol.(i-1) <- v + end in + List.iter + begin + function "" -> () + | item -> + (match String.get item 0 with + | '+' -> + assert ((String.length item) >= 3); + assert ((String.get item 1) = 'x'); + assign (String.sub item 2 ((String.length item)-2)) Positive + | '-' -> + assert ((String.length item) >= 3); + assert ((String.get item 1) = 'x'); + assign (String.sub item 2 ((String.length item)-2)) Negative + | 'x' -> + assert ((String.length item) >= 2); + assign (String.sub item 1 ((String.length item)-1)) Positive + | _ -> failwith "syntax error in pseudo Boolean solution: epected + - or x" + ) + end + (String.split_on_char ' ' (String.sub line 2 ((String.length line)-2)));; + +let pb_solution_to_schedule mapper pb_solution = + Array.mapi (fun i offset -> + let first = mapper.mapper_earliest_dates.(i) + and last = mapper.mapper_latest_dates.(i) + and time = ref (-1) in + for t=first to last + do + match pb_solution.(t - first + offset) with + | Positive -> + (if !time = -1 + then time:=t + else failwith "duplicate time in pseudo boolean solution") + | Negative -> () + | Unknown -> failwith "unknown value in pseudo boolean solution" + done; + (if !time = -1 + then failwith "no time in pseudo boolean solution"); + !time + ) mapper.mapper_var_offsets;; + +let pseudo_boolean_read_solution mapper channel = + let optimum = ref (-1) + and optimum_found = ref false + and solution = Array.make mapper.mapper_nr_pb_variables Unknown in + try + while true do + match input_line channel with + | "" -> () + | line -> + begin + match String.get line 0 with + | 'c' -> () + | 'o' -> + assert ((String.length line) >= 2); + assert ((String.get line 1) = ' '); + optimum := int_of_string (String.sub line 2 ((String.length line)-2)) + | 's' -> (match line with + | "s OPTIMUM FOUND" -> optimum_found := true + | "s SATISFIABLE" -> () + | "s UNSATISFIABLE" -> close_in channel; + raise Unschedulable + | _ -> failwith line) + | 'v' -> line_to_pb_solution solution line mapper.mapper_nr_pb_variables + | x -> Printf.printf "unknown: %s\n" line + end + done; + assert false + with End_of_file -> + close_in channel; + begin + let sol = pb_solution_to_schedule mapper solution in + sol + end;; + +let recompute_max_latency mapper solution = + let maxi = ref (-1) in + for i=0 to (mapper.mapper_nr_instructions-1) + do + maxi := int_max !maxi (1+solution.(i)) + done; + List.iter (fun (i, latency) -> + maxi := int_max !maxi (solution.(i) + latency)) mapper.mapper_final_predecessors; + !maxi;; + +let adjust_check_solution mapper solution = + match mapper.mapper_pb_type with + | OPTIMIZATION -> + let max_latency = recompute_max_latency mapper solution in + assert (max_latency = solution.(mapper.mapper_nr_instructions)); + solution + | SATISFIABILITY -> + let max_latency = recompute_max_latency mapper solution in + Array.init (mapper.mapper_nr_instructions+1) + (fun i -> if i < mapper.mapper_nr_instructions + then solution.(i) + else max_latency);; + +(* let pseudo_boolean_solver = ref "/local/monniaux/progs/naps/naps" *) +(* let pseudo_boolean_solver = ref "/local/monniaux/packages/sat4j/org.sat4j.pb.jar CuttingPlanes" *) + +(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar CuttingPlanes" *) +(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar" *) +(* let pseudo_boolean_solver = ref "clasp" *) +(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/open-wbo/open-wbo_static -formula=1" *) +(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/naps/naps" *) +(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/minisatp/build/release/bin/minisatp" *) +(* let pseudo_boolean_solver = ref "java -jar sat4j-pb.jar CuttingPlanesStar" *) +let pseudo_boolean_solver = ref "pb_solver" + +let pseudo_boolean_scheduler pb_type problem = + try + let filename_in = "problem.opb" in + (* needed only if not using stdout and filename_out = "problem.sol" *) + let mapper = + with_out_channel (open_out filename_in) + (fun opb_problem -> + pseudo_boolean_print_problem opb_problem problem pb_type) in + Some (with_in_channel + (Unix.open_process_in (!pseudo_boolean_solver ^ " " ^ filename_in)) + (fun opb_solution -> adjust_check_solution mapper (pseudo_boolean_read_solution mapper opb_solution))) + with + | Unschedulable -> None;; + +let rec reoptimizing_scheduler (scheduler : scheduler) (previous_solution : solution) (problem : problem) = + if (get_max_latency previous_solution)>1 then + begin + Printf.printf "reoptimizing < %d\n" (get_max_latency previous_solution); + flush stdout; + match scheduler + { problem with max_latency = (get_max_latency previous_solution)-1 } + with + | None -> previous_solution + | Some solution -> reoptimizing_scheduler scheduler solution problem + end + else previous_solution;; + +let smt_var i = Printf.sprintf "t%d" i + +let is_resource_used problem j = + try + Array.iter (fun usages -> + if usages.(j) > 0 + then raise Exit) problem.instruction_usages; + false + with Exit -> true;; + +let smt_use_quantifiers = false + +let smt_print_problem channel problem = + let nr_instructions = get_nr_instructions problem in + let gen_smt_resource_constraint time j = + output_string channel "(<= (+"; + Array.iteri + (fun i usages -> + let usage=usages.(j) in + if usage > 0 + then Printf.fprintf channel " (ite (= %s %s) %d 0)" + time (smt_var i) usage) + problem.instruction_usages; + Printf.fprintf channel ") %d)" problem.resource_bounds.(j) + in + output_string channel "(set-option :produce-models true)\n"; + for i=0 to nr_instructions + do + Printf.fprintf channel "(declare-const %s Int)\n" (smt_var i); + Printf.fprintf channel "(assert (>= %s 0))\n" (smt_var i) + done; + for i=0 to nr_instructions-1 + do + Printf.fprintf channel "(assert (< %s %s))\n" + (smt_var i) (smt_var nr_instructions) + done; + (if problem.max_latency > 0 + then Printf.fprintf channel "(assert (<= %s %d))\n" + (smt_var nr_instructions) problem.max_latency); + List.iter (fun ctr -> + Printf.fprintf channel "(assert (>= (- %s %s) %d))\n" + (smt_var ctr.instr_to) + (smt_var ctr.instr_from) + ctr.latency) problem.latency_constraints; + for j=0 to (Array.length problem.resource_bounds)-1 + do + if is_resource_used problem j + then + begin + if smt_use_quantifiers + then + begin + Printf.fprintf channel + "; resource #%d <= %d\n(assert (forall ((t Int)) " + j problem.resource_bounds.(j); + gen_smt_resource_constraint "t" j; + output_string channel "))\n" + end + else + begin + (if problem.max_latency < 0 + then failwith "quantifier explosion needs max latency"); + for t=0 to problem.max_latency + do + Printf.fprintf channel + "; resource #%d <= %d at t=%d\n(assert " + j problem.resource_bounds.(j) t; + gen_smt_resource_constraint (string_of_int t) j; + output_string channel ")\n" + done + end + end + done; + output_string channel "(check-sat)(get-model)\n";; + + +let ilp_print_problem channel problem pb_type = + let deadline = problem.max_latency in + assert (deadline > 0); + let nr_instructions = get_nr_instructions problem + and nr_resources = get_nr_resources problem + and successors = get_successors problem + and predecessors = get_predecessors problem in + let earliest_dates = get_earliest_dates predecessors + and latest_dates = get_latest_dates deadline successors in + + let pb_var i t = + Printf.sprintf "x%d_%d" i t in + + let gen_latency_constraint i_to i_from latency t_to = + Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n" + i_to i_from latency i_to t_to; + Printf.fprintf channel "c_%d_%d_%d_%d: " + i_to i_from latency t_to; + for t_from=earliest_dates.(i_from) to + int_min latest_dates.(i_from) (t_to - latency) + do + Printf.fprintf channel "+1 %s " (pb_var i_from t_from) + done; + Printf.fprintf channel "-1 %s " (pb_var i_to t_to); + output_string channel ">= 0\n" + + and gen_dual_latency_constraint i_to i_from latency t_from = + Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n" + i_to i_from latency i_to t_from; + Printf.fprintf channel "d_%d_%d_%d_%d: " + i_to i_from latency t_from; + for t_to=int_max earliest_dates.(i_to) (t_from + latency) + to latest_dates.(i_to) + do + Printf.fprintf channel "+1 %s " (pb_var i_to t_to) + done; + Printf.fprintf channel "-1 %s " (pb_var i_from t_from); + Printf.fprintf channel ">= 0\n" + + and gen_delta_constraint i_from i_to latency = + if delta_encoding + then Printf.fprintf channel "l_%d_%d_%d: +1 t%d -1 t%d >= %d\n" + i_from i_to latency i_to i_from latency + + in + + Printf.fprintf channel "\\ nr_instructions=%d deadline=%d\n" nr_instructions deadline; + begin + match pb_type with + | SATISFIABILITY -> output_string channel "Minimize dummy: 0\n" + | OPTIMIZATION -> + Printf.fprintf channel "Minimize\nmakespan: t%d\n" nr_instructions + end; + output_string channel "Subject To\n"; + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + let early = earliest_dates.(i) and late= latest_dates.(i) in + Printf.fprintf channel "\\ t[%d] in %d..%d\ntimes%d: " i early late i; + for t=early to late + do + Printf.fprintf channel "+1 %s " (pb_var i t) + done; + Printf.fprintf channel "= 1\n" + done; + + for t=0 to deadline-1 + do + for j=0 to nr_resources-1 + do + let bound = problem.resource_bounds.(j) + and coeffs = ref [] in + for i=0 to nr_instructions-1 + do + let usage = problem.instruction_usages.(i).(j) in + if t >= earliest_dates.(i) && t <= latest_dates.(i) + && usage > 0 + then coeffs := (i, usage) :: !coeffs + done; + if !coeffs <> [] then + begin + Printf.fprintf channel "\\ resource #%d at t=%d <= %d\nr%d_%d: " j t bound j t; + List.iter (fun (i, usage) -> + Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs; + Printf.fprintf channel ">= %d\n" (-bound) + end + done + done; + + List.iter + (fun ctr -> + if ctr.instr_to < nr_instructions then + begin + gen_delta_constraint ctr.instr_from ctr.instr_to ctr.latency; + begin + if direct_encoding + then + for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to) + do + gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to + done + end; + begin + if reverse_encoding + then + for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from) + do + gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from + done + end + end + ) problem.latency_constraints; + + begin + match pb_type with + | SATISFIABILITY -> () + | OPTIMIZATION -> + let final_latencies = Array.make nr_instructions 1 in + List.iter (fun (i, latency) -> + final_latencies.(i) <- int_max final_latencies.(i) latency) + predecessors.(nr_instructions); + for i_from = 0 to nr_instructions -1 + do + gen_delta_constraint i_from nr_instructions final_latencies.(i_from) + done; + for t_to=earliest_dates.(nr_instructions) to deadline + do + for i_from = 0 to nr_instructions -1 + do + gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to + done + done + end; + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + Printf.fprintf channel "ct%d : -1 t%d" i i; + let early = earliest_dates.(i) and late= latest_dates.(i) in + for t=early to late do + Printf.fprintf channel " +%d %s" t (pb_var i t) + done; + output_string channel " = 0\n" + done; + output_string channel "Bounds\n"; + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + let early = earliest_dates.(i) and late= latest_dates.(i) in + begin + Printf.fprintf channel "%d <= t%d <= %d\n" early i late; + if true then + for t=early to late do + Printf.fprintf channel "0 <= %s <= 1\n" (pb_var i t) + done + end + done; + output_string channel "Integer\n"; + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + Printf.fprintf channel "t%d " i + done; + output_string channel "\nBinary\n"; + for i=0 to (match pb_type with + | OPTIMIZATION -> nr_instructions + | SATISFIABILITY -> nr_instructions-1) + do + let early = earliest_dates.(i) and late= latest_dates.(i) in + for t=early to late do + output_string channel (pb_var i t); + output_string channel " " + done; + output_string channel "\n" + done; + output_string channel "End\n"; + { + mapper_pb_type = pb_type; + mapper_nr_instructions = nr_instructions; + mapper_nr_pb_variables = 0; + mapper_earliest_dates = earliest_dates; + mapper_latest_dates = latest_dates; + mapper_var_offsets = [| |]; + mapper_final_predecessors = predecessors.(nr_instructions) + };; + +(* Guess what? Cplex sometimes outputs 11.000000004 instead of integer 11 *) + +let positive_float_round x = truncate (x +. 0.5) + +let float_round (x : float) : int = + if x > 0.0 + then positive_float_round x + else - (positive_float_round (-. x)) + +let rounded_int_of_string x = float_round (float_of_string x) + +let ilp_read_solution mapper channel = + let times = Array.make + (match mapper.mapper_pb_type with + | OPTIMIZATION -> 1+mapper.mapper_nr_instructions + | SATISFIABILITY -> mapper.mapper_nr_instructions) (-1) in + try + while true do + let line = input_line channel in + ( if (String.length line) < 3 + then failwith (Printf.sprintf "bad ilp output: length(line) < 3: %s" line)); + match String.get line 0 with + | 'x' -> () + | 't' -> let space = + try String.index line ' ' + with Not_found -> + failwith "bad ilp output: no t variable number" + in + let tnumber = + try int_of_string (String.sub line 1 (space-1)) + with Failure _ -> + failwith "bad ilp output: not a variable number" + in + (if tnumber < 0 || tnumber >= (Array.length times) + then failwith (Printf.sprintf "bad ilp output: not a correct variable number: %d (%d)" tnumber (Array.length times))); + let value = + let s = String.sub line (space+1) ((String.length line)-space-1) in + try rounded_int_of_string s + with Failure _ -> + failwith (Printf.sprintf "bad ilp output: not a time number (%s)" s) + in + (if value < 0 + then failwith "bad ilp output: negative time"); + times.(tnumber) <- value + | '#' -> () + | '0' -> () + | _ -> failwith (Printf.sprintf "bad ilp output: bad variable initial, line = %s" line) + done; + assert false + with End_of_file -> + Array.iteri (fun i x -> + if i<(Array.length times)-1 + && x<0 then raise Unschedulable) times; + times;; + +let ilp_solver = ref "ilp_solver" + +let problem_nr = ref 0 + +let ilp_scheduler pb_type problem = + try + let filename_in = Printf.sprintf "problem%05d.lp" !problem_nr + and filename_out = Printf.sprintf "problem%05d.sol" !problem_nr in + incr problem_nr; + let mapper = with_out_channel (open_out filename_in) + (fun opb_problem -> ilp_print_problem opb_problem problem pb_type) in + + begin + match Unix.system (!ilp_solver ^ " " ^ filename_in ^ " " ^ filename_out) with + | Unix.WEXITED 0 -> + Some (with_in_channel + (open_in filename_out) + (fun opb_solution -> + adjust_check_solution mapper + (ilp_read_solution mapper opb_solution))) + | Unix.WEXITED _ -> failwith "failed to start ilp solver" + | _ -> None + end + with + | Unschedulable -> None;; + +let current_utime_all () = + let t = Unix.times() in + t.Unix.tms_cutime +. t.Unix.tms_utime;; + +let utime_all_fn fn arg = + let utime_start = current_utime_all () in + let output = fn arg in + let utime_end = current_utime_all () in + (output, utime_end -. utime_start);; + +let cascaded_scheduler (problem : problem) = + let (some_initial_solution, list_scheduler_time) = + utime_all_fn (validated_scheduler list_scheduler) problem in + match some_initial_solution with + | None -> None + | Some initial_solution -> + let (solution, reoptimizing_time) = utime_all_fn (reoptimizing_scheduler (validated_scheduler (ilp_scheduler SATISFIABILITY)) initial_solution) problem in + begin + let latency2 = get_max_latency solution + and latency1 = get_max_latency initial_solution in + Printf.printf "postpass %s: %d, %d, %d, %g, %g\n" + (if latency2 < latency1 then "REOPTIMIZED" else "unchanged") + (get_nr_instructions problem) + latency1 latency2 + list_scheduler_time reoptimizing_time; + flush stdout + end; + Some solution;; + +let scheduler_by_name name = + match name with + | "ilp" -> validated_scheduler cascaded_scheduler + | "list" -> validated_scheduler list_scheduler + | "revlist" -> validated_scheduler reverse_list_scheduler + | "greedy" -> greedy_scheduler + | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/aarch64/InstructionScheduler.mli b/aarch64/InstructionScheduler.mli new file mode 100644 index 00000000..85e2a5c6 --- /dev/null +++ b/aarch64/InstructionScheduler.mli @@ -0,0 +1,113 @@ +(** Schedule instructions on a synchronized pipeline +by David Monniaux, CNRS, VERIMAG *) + +(** A latency constraint: instruction number [instr_to] should be scheduled at least [latency] clock ticks before [instr_from]. + +It is possible to specify [latency]=0, meaning that [instr_to] can be scheduled at the same clock tick as [instr_from], but not before. + +[instr_to] can be the special value equal to the number of instructions, meaning that it refers to the final output latency. *) +type latency_constraint = { + instr_from : int; + instr_to : int; + latency : int; + } + +(** A scheduling problem. + +In addition to the latency constraints, the resource constraints should be satisfied: at every clock tick, the sum of vectors of resources used by the instructions scheduled at that tick does not exceed the resource bounds. +*) +type problem = { + max_latency : int; + (** An optional maximal total latency of the problem, after which the problem is deemed not schedulable. -1 means there should be no maximum. *) + + resource_bounds : int array; + (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + + instruction_usages: int array array; + (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) + + latency_constraints : latency_constraint list + (** The latency constraints that must be satisfied *) + };; + +(** Print problem for human readability. *) +val print_problem : out_channel -> problem -> unit;; + +(** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *) +type solution = int array + +(** A scheduling algorithm. +The return value [Some x] is a solution [x]. +[None] means that scheduling failed. *) +type scheduler = problem -> solution option;; + +(* DISABLED +(** Schedule the problem optimally by constraint solving using the Gecode solver. *) +external gecode_scheduler : problem -> solution option + = "caml_gecode_schedule_instr" + *) + +(** Get the number the last scheduling time used for an instruction in a solution. +@return The last clock tick used *) +val maximum_slot_used : solution -> int + +(** Validate that a solution is truly a solution of a scheduling problem. +@raise Failure if validation fails *) +val check_schedule : problem -> solution -> unit + +(** Schedule the problem using a greedy list scheduling algorithm, from the start. +The first (according to instruction ordering) instruction that is ready (according to the latency constraints) is scheduled at the current clock tick. +Once a clock tick is full go to the next. + +@return [Some solution] when a solution is found, [None] if not. *) +val list_scheduler : problem -> solution option + +(** Schedule the problem using the order of instructions without any reordering *) +val greedy_scheduler : problem -> solution option + +(** Schedule a problem using a scheduler applied in the opposite direction, e.g. for list scheduling from the end instead of the start. BUGGY *) +val schedule_reversed : scheduler -> problem -> int array option + +(** Schedule a problem from the end using a list scheduler. BUGGY *) +val reverse_list_scheduler : problem -> int array option + +(** Check that a problem is well-formed. +@raise Failure if validation fails *) +val check_problem : problem -> unit + +(** Apply a scheduler and validate the result against the input problem. +@return The solution found +@raise Failure if validation fails *) +val validated_scheduler : scheduler -> problem -> solution option;; + +(** Get max latency from solution +@return Max latency *) +val get_max_latency : solution -> int;; + +(** Get the length of a maximal critical path +@return Max length *) +val maximum_critical_path : problem -> int;; + +(** Apply line scheduler then advanced solver +@return A solution if found *) +val cascaded_scheduler : problem -> solution option;; + +val show_date_ranges : problem -> unit;; + +type pseudo_boolean_problem_type = + | SATISFIABILITY + | OPTIMIZATION;; + +type pseudo_boolean_mapper +val pseudo_boolean_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;; +val pseudo_boolean_read_solution : pseudo_boolean_mapper -> in_channel -> solution;; +val pseudo_boolean_scheduler : pseudo_boolean_problem_type -> problem -> solution option;; + +val smt_print_problem : out_channel -> problem -> unit;; + +val ilp_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;; + +val ilp_scheduler : pseudo_boolean_problem_type -> problem -> solution option;; + +(** Schedule a problem using a scheduler given by a string name *) +val scheduler_by_name : string -> problem -> int array option;; diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml new file mode 100644 index 00000000..17869a99 --- /dev/null +++ b/aarch64/OpWeightsAsm.ml @@ -0,0 +1,212 @@ +open Asmblock;; +(* TODO remove? open PrepassSchedulingOracleDeps;;*) + +module Cortex_A53= + struct + let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1;; + + let latency_of_op (i : basic) (nargs : int) = + match i with + | PArith _ + | PArithP _ _ -> 1 (* XXX *) + | PArithPP + + let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ + (* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ + +(* 64-bit integer arithmetic *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf + (* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + +(* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; + let latency_of_call _ _ = 6;; + + let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_call _ _ = resource_bounds;; + let resources_of_builtin _ = resource_bounds;; + end;; + +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "cortex-a53" | "cortex-a35" | "" -> + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = Cortex_A53.resources_of_op; + non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op; + latency_of_load = Cortex_A53.latency_of_load; + resources_of_load = Cortex_A53.resources_of_load; + resources_of_store = Cortex_A53.resources_of_store; + resources_of_cond = Cortex_A53.resources_of_cond; + latency_of_call = Cortex_A53.latency_of_call; + resources_of_call = Cortex_A53.resources_of_call; + resources_of_builtin = Cortex_A53.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index 2107ce22..31864384 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -12,12 +12,12 @@ (* *) (* *************************************************************) -open Asmvliw +(*open Asm*) open Asmblock -open Printf -open Camlcoq -open InstructionScheduler -open TargetPrinter.Target +(*open Printf*) +(*open Camlcoq*) +(*open InstructionScheduler*) +(*open TargetPrinter.Target*) let debug = false @@ -25,770 +25,689 @@ let debug = false * Extracting infos from Asmvliw instructions *) -type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset - -type location = Reg of preg | Mem +(* TODO remove? type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset*) +(*type location = Reg of preg | Mem*) type real_instruction = - (* ALU *) - | Addw | Andw | Compw | Mulw | Orw | Sbfw | Sbfxw | Sraw | Srlw | Sllw | Srsw | Rorw | Xorw - | Addd | Andd | Compd | Muld | Ord | Sbfd | Sbfxd | Srad | Srld | Slld | Srsd | Xord - | Nandw | Norw | Nxorw | Nandd | Nord | Nxord | Andnw | Ornw | Andnd | Ornd - | Maddw | Maddd | Msbfw | Msbfd | Cmoved - | Make | Nop | Extfz | Extfs | Insf - | Addxw | Addxd - (* LSU *) - | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo - | Sb | Sh | Sw | Sd | Sq | So - (* BCU *) - | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set - (* FPU *) - | Fabsd | Fabsw | Fnegw | Fnegd - | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw - | Fmind | Fminw | Fmaxd | Fmaxw | Finvw - | Ffmaw | Ffmad | Ffmsw | Ffmsd - | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz - | Fcompw | Fcompd - -type ab_inst_rec = { - inst: real_instruction; - write_locs : location list; - read_locs : location list; - read_at_id : location list; (* Must be contained in read_locs *) - read_at_e1 : location list; (* idem *) - imm : immediate option; - is_control : bool; -} - -(** Asmvliw constructor to real instructions *) + | Add | Adr | Adrp | And | Asr | B | Bic | Bl | Blr | Br | Cbnz | Cbz | Cls + | Clz | Cmn | Cmp | Csel | Cset | Eon | Eor | Fabs | Fadd | Fcmp | Fcsel + | Fcvt | Fcvtzs | Fcvtzu | Fdiv | Fmadd | Fmov | Fmsub | Fmul | Fnmadd + | Fnmul | Fnmsub | Fneg | Fsqrt | Fsub | Ldaxr | Ldp | Ldr | Ldrb | Ldrh + | Ldrsb | Ldrsh | Ldrsw | Lr | Lsl | Lsr | Madd | Mov | Movk | Movn | Movz + | Msub | Nop | Orn | Orr | Ret | Rev | Rev16 | Ror | Sbfiz | Sbfx | Scvtf + | Sdiv | Smulh | Stlxr | Stp | Str | Strb | Strh | Sub | Sxtb | Sxth | Sxtw + | Tbnz | Tbz | Tst | Ubfiz | Ubfx | Ucvtf | Udiv | Umulh | Uxtb | Uxth + | Uxtw | Uxtx + +(* TODO remove? type ab_inst_rec = {*) + (*inst: real_instruction;*) + (*write_locs : location list;*) + (*read_locs : location list;*) + (*read_at_id : location list; (* Must be contained in read_locs *)*) + (*read_at_e1 : location list; (* idem *)*) + (*imm : immediate option;*) + (*is_control : bool;*) +(*}*) + +(** Asm constructor to real instructions *) exception OpaqueInstruction -let arith_rr_real = function - | Pcvtl2w -> Addw - | Pmv -> Addd - | Pnegw -> Sbfw - | Pnegl -> Sbfd - | Psxwd -> Extfs - | Pzxwd -> Extfz - | Pextfz(_,_) -> Extfz - | Pextfs(_,_) -> Extfs - | Pextfzl(_,_) -> Extfz - | Pextfsl(_,_) -> Extfs - | Pfabsw -> Fabsw - | Pfabsd -> Fabsd - | Pfnegw -> Fnegw - | Pfnegd -> Fnegd - | Pfinvw -> Finvw - | Pfnarrowdw -> Fnarrowdw - | Pfwidenlwd -> Fwidenlwd - | Pfloatwrnsz -> Floatwz - | Pfloatuwrnsz -> Floatuwz - | Pfloatudrnsz -> Floatudz - | Pfloatdrnsz -> Floatdz - | Pfixedwrzz -> Fixedwz - | Pfixeduwrzz -> Fixeduwz - | Pfixeddrzz -> Fixeddz - | Pfixedudrzz -> Fixedudz - | Pfixeddrzz_i32 -> Fixeddz - | Pfixedudrzz_i32 -> Fixedudz - -let arith_rrr_real = function - | Pcompw it -> Compw - | Pcompl it -> Compd - | Pfcompw ft -> Fcompw - | Pfcompl ft -> Fcompd - | Paddw -> Addw - | Paddxw _ -> Addxw - | Psubw -> Sbfw - | Prevsubxw _ -> Sbfxw - | Pmulw -> Mulw - | Pandw -> Andw - | Pnandw -> Nandw - | Porw -> Orw - | Pnorw -> Norw - | Pxorw -> Xorw - | Pnxorw -> Nxorw - | Pandnw -> Andnw - | Pornw -> Ornw - | Psraw -> Sraw - | Psrlw -> Srlw - | Psrxw -> Srsw - | Psllw -> Sllw - | Paddl -> Addd - | Paddxl _ -> Addxd - | Psubl -> Sbfd - | Prevsubxl _ -> Sbfxd - | Pandl -> Andd - | Pnandl -> Nandd - | Porl -> Ord - | Pnorl -> Nord - | Pxorl -> Xord - | Pnxorl -> Nxord - | Pandnl -> Andnd - | Pornl -> Ornd - | Pmull -> Muld - | Pslll -> Slld - | Psrll -> Srld - | Psrxl -> Srsd - | Psral -> Srad - | Pfaddd -> Faddd - | Pfaddw -> Faddw - | Pfsbfd -> Fsbfd - | Pfsbfw -> Fsbfw - | Pfmuld -> Fmuld - | Pfmulw -> Fmulw - | Pfmind -> Fmind - | Pfminw -> Fminw - | Pfmaxd -> Fmaxd - | Pfmaxw -> Fmaxw - -let arith_rri32_real = function - | Pcompiw it -> Compw - | Paddiw -> Addw - | Paddxiw _ -> Addxw - | Prevsubiw -> Sbfw - | Prevsubxiw _ -> Sbfxw - | Pmuliw -> Mulw - | Pandiw -> Andw - | Pnandiw -> Nandw - | Poriw -> Orw - | Pnoriw -> Norw - | Pxoriw -> Xorw - | Pnxoriw -> Nxorw - | Pandniw -> Andnw - | Porniw -> Ornw - | Psraiw -> Sraw - | Psrxiw -> Srsw - | Psrliw -> Srlw - | Pslliw -> Sllw - | Proriw -> Rorw - | Psllil -> Slld - | Psrlil -> Srld - | Psrail -> Srad - | Psrxil -> Srsd - -let arith_rri64_real = function - | Pcompil it -> Compd - | Paddil -> Addd - | Prevsubil -> Sbfd - | Paddxil _ -> Addxd - | Prevsubxil _ -> Sbfxd - | Pmulil -> Muld - | Pandil -> Andd - | Pnandil -> Nandd - | Poril -> Ord - | Pnoril -> Nord - | Pxoril -> Xord - | Pnxoril -> Nxord - | Pandnil -> Andnd - | Pornil -> Ornd - - -let arith_arr_real = function - | Pinsf (_, _) -> Insf - | Pinsfl (_, _) -> Insf - -let arith_arrr_real = function - | Pfmaddfw -> Ffmaw - | Pfmaddfl -> Ffmad - | Pfmsubfw -> Ffmsw - | Pfmsubfl -> Ffmsd - | Pmaddw -> Maddw - | Pmaddl -> Maddd - | Pmsubw -> Msbfw - | Pmsubl -> Msbfd - | Pcmove _ -> Cmoved - | Pcmoveu _ -> Cmoved - -let arith_arri32_real = function - | Pmaddiw -> Maddw - | Pcmoveiw _ -> Cmoved - | Pcmoveuiw _ -> Cmoved - -let arith_arri64_real = function - | Pmaddil -> Maddd - | Pcmoveil _ -> Cmoved - | Pcmoveuil _ -> Cmoved - -let arith_ri32_real = Make - -let arith_ri64_real = Make - -let arith_rf32_real = Make - -let arith_rf64_real = Make - -let store_real = function - | Psb -> Sb - | Psh -> Sh - | Psw -> Sw - | Psw_a -> Sw - | Psd -> Sd - | Psd_a -> Sd - | Pfss -> Sw - | Pfsd -> Sd - -let load_real = function - | Plb -> Lbs - | Plbu -> Lbz - | Plh -> Lhs - | Plhu -> Lhz - | Plw -> Lws - | Plw_a -> Lws - | Pld -> Ld - | Pld_a -> Ld - | Pfls -> Lws - | Pfld -> Ld - -let set_real = Set -let get_real = Get -let nop_real = Nop -let loadsymbol_real = Make -let loadqrro_real = Lq -let loadorro_real = Lo -let storeqrro_real = Sq -let storeorro_real = So - -let ret_real = Ret -let call_real = Call -let icall_real = Icall -let goto_real = Goto -let igoto_real = Igoto -let jl_real = Goto -let cb_real = Cb -let cbu_real = Cb - -let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let arith_arri32_rec i rd rs imm32 = - let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> [] - in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false; - read_at_id = [] ; read_at_e1 = rae1 } - -let arith_arri64_rec i rd rs imm64 = - let rae1 = match i with Pmaddil -> [Reg rd] | _ -> [] - in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false; - read_at_id = []; read_at_e1 = rae1 } - -let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let arith_arrr_rec i rd rs1 rs2 = - let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> [] - in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false; - read_at_id = []; read_at_e1 = rae1 } - -let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let arith_r_rec i rd = match i with - (* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *) - | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed); - is_control = false; read_at_id = []; read_at_e1 = [] } - -let arith_rec i = - match i with - | PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32)) - | PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64)) - | PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2) - | PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs) - (* Seems like single constant constructor types are elided *) - | PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32)) - | PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64)) - | PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2) - | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false; - read_at_id = []; read_at_e1 = [] } - | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false; - read_at_id = []; read_at_e1 = [] } - | PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = []; - imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []} - | PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = []; - imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []} - | PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs) - | PArithR (i, rd) -> arith_r_rec i (IR rd) - -let load_rec i = match i with - | PLoadRRO (trap, i, rs1, rs2, imm) -> - { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false; - read_at_id = []; read_at_e1 = [] } - | PLoadQRRO(rs, ra, imm) -> - let (rs0, rs1) = gpreg_q_expand rs in - { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false; - read_at_id = []; read_at_e1 = [] } - | PLoadORRO(rs, ra, imm) -> - let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in - { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)]; - imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []} - | PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) -> - { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let store_rec i = match i with - | PStoreRRO (i, rs, ra, imm) -> - { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm)); - read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false} - | PStoreQRRO (rs, ra, imm) -> - let (rs0, rs1) = gpreg_q_expand rs in - { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm)); - read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false} - | PStoreORRO (rs, ra, imm) -> - let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in - { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)]; - imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false} - | PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) -> - { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None; - read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false} - -let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false; - read_at_id = []; read_at_e1 = [] } - -let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false; - read_at_id = [Reg (IR rs)]; read_at_e1 = [] } - -let basic_rec i = - match i with - | PArith i -> arith_rec i - | PLoad i -> load_rec i - | PStore i -> store_rec i - | Pallocframe (_, _) -> raise OpaqueInstruction - | Pfreeframe (_, _) -> raise OpaqueInstruction - | Pget (rd, rs) -> get_rec rd rs - | Pset (rd, rs) -> set_rec rd rs - | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []} - -let expand_rec = function - | Pbuiltin _ -> raise OpaqueInstruction - -let ctl_flow_rec = function - | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []} - | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []} - | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true; - read_at_id = [Reg (IR r)]; read_at_e1 = [] } - | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []} - | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true; - read_at_id = [Reg (IR r)]; read_at_e1 = [] } - | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []} - | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true; - read_at_id = [Reg (IR rs)]; read_at_e1 = [] } - | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true; - read_at_id = [Reg (IR rs)]; read_at_e1 = [] } - | Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *) - -let control_rec i = - match i with - | PExpand i -> expand_rec i - | PCtlFlow i -> ctl_flow_rec i - -let rec basic_recs body = match body with - | [] -> [] - | bi :: body -> (basic_rec bi) :: (basic_recs body) - -let exit_rec exit = match exit with - | None -> [] - | Some ex -> [control_rec ex] - -let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit) +let arith_p_real = function + | Padrp(_,_) -> Adrp + | Pmovz(_,_,_) -> Movz + | Pmovn(_,_,_) -> Movn + | Pfmovimms(_) -> Fmov (* TODO not sure about this and below too *) + | Pfmovimmd(_) -> Fmov + +let arith_pp_real = function + | Pmov -> Mov + | Pmovk(_,_,_) -> Movk + | Paddadr(_,_) -> Add + | Psbfiz(_,_,_) -> Sbfiz + | Psbfx(_,_,_) -> Sbfx + | Pubfiz(_,_,_) -> Ubfiz + | Pubfx(_,_,_) -> Ubfx + | Pfmov -> Fmov + | Pfcvtds -> Fcvt + | Pfcvtsd -> Fcvt + | Pfabs(_) -> Fabs + | Pfneg(_) -> Fneg + | Pscvtf(_,_) -> Scvtf + | Pucvtf(_,_) -> Ucvtf + | Pfcvtzs(_,_) -> Fcvtzs + | Pfcvtzu(_,_) -> Fcvtzu + | Paddimm(_,_) -> Add + | Psubimm(_,_) -> Sub + +let arith_ppp_real = function + | Pasrv(_) -> Asr + | Plslv(_) -> Lsl + | Plsrv(_) -> Lsr + | Prorv(_) -> Ror + | Psmulh -> Smulh + | Pumulh -> Umulh + | Psdiv(_) -> Sdiv + | Pudiv(_) -> Udiv + | Paddext(_) -> Add + | Psubext(_) -> Sub + | Pfadd(_) -> Fadd + | Pfdiv(_) -> Fdiv + | Pfmul(_) -> Fmul + | Pfsub(_) -> Fsub + +let arith_comparison_p_real = function + | Pfcmp0(_) -> Fcmp + | Pcmpimm(_,_) -> Cmp + | Pcmnimm(_,_) -> Cmn + | Ptstimm(_,_) -> Tst + +let arith_comparison_pp_real = function + | Pcmpext(_) -> Cmp + | Pcmnext(_) -> Cmn + | Pfcmp(_) -> Fcmp + +let arith_comparison_r0r_real = function + | Pcmp(_,_) -> Cmp + | Pcmn(_,_) -> Cmn + | Ptst(_,_) -> Tst + +let arith_comparison_rr0r_real = function + | Padd(_,_) -> Add + | Psub(_,_) -> Sub + | Pand(_,_) -> And + | Pbic(_,_) -> Bic + | Peon(_,_) -> Eon + | Peor(_,_) -> Eor + | Porr(_,_) -> Orr + | Porn(_,_) -> Orn + +let arith_comparison_rr0_real = function + | Pandimm(_,_) -> And + | Peorimm(_,_) -> Eor + | Porrimm(_,_) -> Orr + +let arith_comparison_arrrr0_real = function + | Pmadd(_) -> Madd + | Pmsub(_) -> Msub + +let store_rs_a_real = function + | Pstrw -> Str + | Pstrw_a -> Str + | Pstrx -> Str + | Pstrx_a -> Str + | Pstrb -> Strb + | Pstrh -> Strh + | Pstrs -> Str + | Pstrd -> Str + | Pstrd_a -> Str + +let load_rs_a_real = function + | Pldrw -> Ldr + | Pldrw_a -> Ldr + | Pldrx -> Ldr + | Pldrx_a -> Ldr + | Pldrb(_) -> Ldrb + | Pldrsb(_) -> Ldrsb + | Pldrh(_) -> Ldrh + | Pldrsh(_) -> Ldrsh + | Pldrzw -> Ldr + | Pldrsw -> Ldrsw + | Pldrs -> Ldr + | Pldrd -> Ldr + | Pldrd_a -> Ldr + + + +(*let set_real = Set*) +(*let get_real = Get*) +(*let nop_real = Nop*) +(*let loadsymbol_real = Make*) +(*let loadqrro_real = Lq*) +(*let loadorro_real = Lo*) +(*let storeqrro_real = Sq*) +(*let storeorro_real = So*) + +(*let ret_real = Ret*) +(*let call_real = Call*) +(*let icall_real = Icall*) +(*let goto_real = Goto*) +(*let igoto_real = Igoto*) +(*let jl_real = Goto*) +(*let cb_real = Cb*) +(*let cbu_real = Cb*) + +(*let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let arith_arri32_rec i rd rs imm32 = *) + (*let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []*) + (*in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;*) + (*read_at_id = [] ; read_at_e1 = rae1 }*) + +(*let arith_arri64_rec i rd rs imm64 = *) + (*let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []*) + (*in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;*) + (*read_at_id = []; read_at_e1 = rae1 }*) + +(*let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let arith_arrr_rec i rd rs1 rs2 = *) + (*let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []*) + (*in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;*) + (*read_at_id = []; read_at_e1 = rae1 }*) + +(*let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let arith_r_rec i rd = match i with*) + (*(* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)*) + (*| Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);*) + (*is_control = false; read_at_id = []; read_at_e1 = [] }*) + +(*let arith_rec i =*) + (*match i with*) + (*| PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32))*) + (*| PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64))*) + (*| PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2)*) + (*| PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs)*) + (*(* Seems like single constant constructor types are elided *)*) + (*| PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))*) + (*| PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))*) + (*| PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)*) + (*| PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + (*| PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + (*| PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];*) + (*imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}*) + (*| PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];*) + (*imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}*) + (*| PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)*) + (*| PArithR (i, rd) -> arith_r_rec i (IR rd)*) + +(*let load_rec i = match i with*) + (*| PLoadRRO (trap, i, rs1, rs2, imm) ->*) + (*{ inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + (*| PLoadQRRO(rs, ra, imm) ->*) + (*let (rs0, rs1) = gpreg_q_expand rs in*) + (*{ inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + (*| PLoadORRO(rs, ra, imm) ->*) + (*let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in*) + (*{ inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];*) + (*imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}*) + (*| PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->*) + (*{ inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let store_rec i = match i with*) + (*| PStoreRRO (i, rs, ra, imm) ->*) + (*{ inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));*) + (*read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}*) + (*| PStoreQRRO (rs, ra, imm) ->*) + (*let (rs0, rs1) = gpreg_q_expand rs in*) + (*{ inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));*) + (*read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}*) + (*| PStoreORRO (rs, ra, imm) ->*) + (*let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in*) + (*{ inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];*) + (*imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}*) + (*| PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->*) + (*{ inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;*) + (*read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}*) + +(*let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;*) + (*read_at_id = []; read_at_e1 = [] }*) + +(*let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;*) + (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*) + +(*let basic_rec i =*) + (*match i with*) + (*| PArith i -> arith_rec i*) + (*| PLoad i -> load_rec i*) + (*| PStore i -> store_rec i*) + (*| Pallocframe (_, _) -> raise OpaqueInstruction*) + (*| Pfreeframe (_, _) -> raise OpaqueInstruction*) + (*| Pget (rd, rs) -> get_rec rd rs*) + (*| Pset (rd, rs) -> set_rec rd rs*) + (*| Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}*) + +(*let expand_rec = function*) + (*| Pbuiltin _ -> raise OpaqueInstruction*) + +(*let ctl_flow_rec = function*) + (*| Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}*) + (*| Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}*) + (*| Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;*) + (*read_at_id = [Reg (IR r)]; read_at_e1 = [] }*) + (*| Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}*) + (*| Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;*) + (*read_at_id = [Reg (IR r)]; read_at_e1 = [] }*) + (*| Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}*) + (*| Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;*) + (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*) + (*| Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;*) + (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*) + (*| Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)*) + +(*let control_rec i =*) + (*match i with*) + (*| PExpand i -> expand_rec i*) + (*| PCtlFlow i -> ctl_flow_rec i*) + +(*let rec basic_recs body = match body with*) + (*| [] -> []*) + (*| bi :: body -> (basic_rec bi) :: (basic_recs body)*) + +(*let exit_rec exit = match exit with*) + (*| None -> []*) + (*| Some ex -> [control_rec ex]*) + +(*let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)*) (** * Providing informations relative to the real instructions *) (** Abstraction providing all the necessary informations for solving the scheduling problem *) -type inst_info = { - write_locs : location list; - read_locs : location list; - reads_at_id : bool; - reads_at_e1 : bool; - is_control : bool; - usage: int array; (* resources consumed by the instruction *) - latency: int; -} - -(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *) -type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10 - -let rec pow a = function - | 0 -> Int64.one - | 1 -> Int64.of_int a - | n -> let b = pow a (n/2) in - Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a)) - -let signed_interval n : (int64 * int64) = begin - assert (n > 0); - let min = Int64.neg @@ pow 2 (n-1) - and max = Int64.sub (pow 2 (n-1)) Int64.one - in (min, max) -end - -let within i interv = match interv with (min, max) -> (i >= min && i <= max) - -let signed_length (i:int64) = - let rec f (i:int64) n = - let interv = signed_interval n - in if (within i interv) then n else f i (n+1) - in f i 1 - -let unsigned_length (i:int64) = (signed_length i) - 1 - -let encode_imm (imm:int64) = - if (Int64.compare imm Int64.zero < 0) then - let length = signed_length imm - in if length <= 10 then S10 - else if length <= 32 then U27L5 - else if length <= 37 then U27L10 - else if length <= 64 then E27U27L10 - else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm - else - let length = unsigned_length imm - in if length <= 6 then U6 - else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *) - else if length <= 32 then U27L5 - else if length <= 37 then U27L10 - else if length <= 64 then E27U27L10 - else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm - -(** Resources *) -type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop - -let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop] - -let rec find_index elt l = - match l with - | [] -> raise Not_found - | e::l -> if (e == elt) then 0 - else 1 + find_index elt l - -let resource_id resource : int = find_index resource resource_names - -let resource_bound resource : int = - match resource with - | Rissue -> 8 - | Rtiny -> 4 - | Rlite -> 2 - | Rfull -> 1 - | Rlsu -> 1 - | Rmau -> 1 - | Rbcu -> 1 - | Rtca -> 1 - | Rauxr -> 1 - | Rauxw -> 1 - | Rcrrp -> 1 - | Rcrwl -> 1 - | Rcrwh -> 1 - | Rnop -> 4 - -let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names) - -(** Reservation tables *) -let alu_full : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_lite : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_lite_x : int array = let resmap = fun r -> match r with - | Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_lite_y : int array = let resmap = fun r -> match r with - | Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_nop : int array = let resmap = fun r -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_tiny : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_tiny_x : int array = let resmap = fun r -> match r with - | Rissue -> 2 | Rtiny -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let alu_tiny_y : int array = let resmap = fun r -> match r with - | Rissue -> 3 | Rtiny -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let bcu : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rbcu -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let lsu_auxr : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let lsu_auxr_x : int array = let resmap = fun r -> match r with - | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let lsu_auxr_y : int array = let resmap = fun r -> match r with - | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let lsu_auxw : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let lsu_auxw_x : int array = let resmap = fun r -> match r with - | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let lsu_auxw_y : int array = let resmap = fun r -> match r with - | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let mau : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let mau_x : int array = let resmap = fun r -> match r with - | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let mau_y : int array = let resmap = fun r -> match r with - | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let mau_auxr : int array = let resmap = fun r -> match r with - | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let mau_auxr_x : int array = let resmap = fun r -> match r with - | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -let mau_auxr_y : int array = let resmap = fun r -> match r with - | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0 - in Array.of_list (List.map resmap resource_names) - -(** Real instructions *) - -exception InvalidEncoding - -let rec_to_usage r = - let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i) - | Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr) - - in match r.inst with - | Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw - | Nxorw | Andnw | Ornw -> - (match encoding with None | Some U6 | Some S10 -> alu_tiny - | Some U27L5 | Some U27L10 -> alu_tiny_x - | _ -> raise InvalidEncoding) - | Sbfxw | Sbfxd -> - (match encoding with None -> alu_lite - | Some U6 | Some S10 | Some U27L5 -> alu_lite_x - | _ -> raise InvalidEncoding) - | Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord - | Nxord | Andnd | Ornd -> - (match encoding with None | Some U6 | Some S10 -> alu_tiny - | Some U27L5 | Some U27L10 -> alu_tiny_x - | Some E27U27L10 -> alu_tiny_y) - |Cmoved -> - (match encoding with None | Some U6 | Some S10 -> alu_lite - | Some U27L5 | Some U27L10 -> alu_lite_x - | Some E27U27L10 -> alu_lite_y) - | Addxw -> - (match encoding with None | Some U6 | Some S10 -> alu_lite - | Some U27L5 | Some U27L10 -> alu_lite_x - | _ -> raise InvalidEncoding) - | Addxd -> - (match encoding with None | Some U6 | Some S10 -> alu_lite - | Some U27L5 | Some U27L10 -> alu_lite_x - | Some E27U27L10 -> alu_lite_y) - | Compw -> (match encoding with None -> alu_tiny - | Some U6 | Some S10 | Some U27L5 -> alu_tiny_x - | _ -> raise InvalidEncoding) - | Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny - | Some U27L5 | Some U27L10 -> alu_tiny_x - | Some E27U27L10 -> alu_tiny_y) - | Fcompw -> (match encoding with None -> alu_lite - | Some U6 | Some S10 | Some U27L5 -> alu_lite_x - | _ -> raise InvalidEncoding) - | Fcompd -> (match encoding with None -> alu_lite - | Some U6 | Some S10 | Some U27L5 -> alu_lite_x - | _ -> raise InvalidEncoding) - | Make -> (match encoding with Some U6 | Some S10 -> alu_tiny - | Some U27L5 | Some U27L10 -> alu_tiny_x - | Some E27U27L10 -> alu_tiny_y - | _ -> raise InvalidEncoding) - | Maddw | Msbfw -> (match encoding with None -> mau_auxr - | Some U6 | Some S10 | Some U27L5 -> mau_auxr_x - | _ -> raise InvalidEncoding) - | Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr - | Some U27L5 | Some U27L10 -> mau_auxr_x - | Some E27U27L10 -> mau_auxr_y) - | Mulw -> (match encoding with None -> mau - | Some U6 | Some S10 | Some U27L5 -> mau_x - | _ -> raise InvalidEncoding) - | Muld -> (match encoding with None | Some U6 | Some S10 -> mau - | Some U27L5 | Some U27L10 -> mau_x - | Some E27U27L10 -> mau_y) - | Nop -> alu_nop - | Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding) - (* TODO: check *) - | Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding) - | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding) - | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau - | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> - (match encoding with None | Some U6 | Some S10 -> lsu_auxw - | Some U27L5 | Some U27L10 -> lsu_auxw_x - | Some E27U27L10 -> lsu_auxw_y) - | Sb | Sh | Sw | Sd | Sq | So -> - (match encoding with None | Some U6 | Some S10 -> lsu_auxr - | Some U27L5 | Some U27L10 -> lsu_auxr_x - | Some E27U27L10 -> lsu_auxr_y) - | Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu - | Get -> bcu_tiny_tiny_mau_xnop - | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd - | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite - | Fnarrowdw -> alu_full - | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw - | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau - - -let inst_info_to_dlatency i = - begin - assert (not (i.reads_at_id && i.reads_at_e1)); - match i.reads_at_id with - | true -> +1 - | false -> (match i.reads_at_e1 with - | true -> -1 - | false -> 0) - end - -let real_inst_to_latency = function - | Nop -> 0 (* Only goes through ID *) - | Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw - (* TODO check rorw *) - | Rorw | Nandw | Norw | Nxorw | Ornw | Andnw - | Nandd | Nord | Nxord | Ornd | Andnd - | Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make - | Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd - | Fmind | Fmaxd | Fminw | Fmaxw - -> 1 - | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4 - | Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *) - | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3 - | Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *) - | Get -> 1 - | Set -> 4 (* According to the manual should be 3, but I measured 4 *) - | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *) - | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1 - | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw - | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4 - -let rec empty_inter la = function - | [] -> true - | b::lb -> if (List.mem b la) then false else empty_inter la lb - -let rec_to_info r : inst_info = - let usage = rec_to_usage r - and latency = real_inst_to_latency r.inst - and reads_at_id = not (empty_inter r.read_locs r.read_at_id) - and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1) - in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control; - reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 } - -let instruction_infos bb = List.map rec_to_info (instruction_recs bb) - -let instruction_usages bb = - let usages = List.map (fun info -> info.usage) (instruction_infos bb) - in Array.of_list usages +(*type inst_info = {*) + (*write_locs : location list;*) + (*read_locs : location list;*) + (*reads_at_id : bool;*) + (*reads_at_e1 : bool;*) + (*is_control : bool;*) + (*usage: int array; (* resources consumed by the instruction *)*) + (*latency: int;*) +(*}*) + +(*(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *)*) +(*type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10*) + +(*let rec pow a = function*) + (*| 0 -> Int64.one*) + (*| 1 -> Int64.of_int a*) + (*| n -> let b = pow a (n/2) in*) + (*Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a))*) + +(*let signed_interval n : (int64 * int64) = begin *) + (*assert (n > 0);*) + (*let min = Int64.neg @@ pow 2 (n-1)*) + (*and max = Int64.sub (pow 2 (n-1)) Int64.one*) + (*in (min, max)*) +(*end*) + +(*let within i interv = match interv with (min, max) -> (i >= min && i <= max)*) + +(*let signed_length (i:int64) =*) + (*let rec f (i:int64) n = *) + (*let interv = signed_interval n*) + (*in if (within i interv) then n else f i (n+1)*) + (*in f i 1*) + +(*let unsigned_length (i:int64) = (signed_length i) - 1*) + +(*let encode_imm (imm:int64) =*) + (*if (Int64.compare imm Int64.zero < 0) then*) + (*let length = signed_length imm*) + (*in if length <= 10 then S10*) + (*else if length <= 32 then U27L5*) + (*else if length <= 37 then U27L10*) + (*else if length <= 64 then E27U27L10*) + (*else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm*) + (*else*) + (*let length = unsigned_length imm*) + (*in if length <= 6 then U6*) + (*else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *)*) + (*else if length <= 32 then U27L5*) + (*else if length <= 37 then U27L10*) + (*else if length <= 64 then E27U27L10*) + (*else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm*) + +(*(** Resources *)*) +(*type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop*) + +(*let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]*) + +(*let rec find_index elt l =*) + (*match l with*) + (*| [] -> raise Not_found*) + (*| e::l -> if (e == elt) then 0*) + (*else 1 + find_index elt l*) + +(*let resource_id resource : int = find_index resource resource_names*) + +(*let resource_bound resource : int =*) + (*match resource with*) + (*| Rissue -> 8*) + (*| Rtiny -> 4*) + (*| Rlite -> 2*) + (*| Rfull -> 1*) + (*| Rlsu -> 1*) + (*| Rmau -> 1*) + (*| Rbcu -> 1*) + (*| Rtca -> 1*) + (*| Rauxr -> 1*) + (*| Rauxw -> 1*) + (*| Rcrrp -> 1*) + (*| Rcrwl -> 1*) + (*| Rcrwh -> 1*) + (*| Rnop -> 4*) + +(*let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)*) + +(*(** Reservation tables *)*) +(*let alu_full : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_lite : int array = let resmap = fun r -> match r with *) + (*| Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_lite_x : int array = let resmap = fun r -> match r with *) + (*| Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_lite_y : int array = let resmap = fun r -> match r with *) + (*| Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_nop : int array = let resmap = fun r -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_tiny : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 1 | Rtiny -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_tiny_x : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 2 | Rtiny -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let alu_tiny_y : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 3 | Rtiny -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let bcu : int array = let resmap = fun r -> match r with *) + (*| Rissue -> 1 | Rbcu -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with *) + (*| Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let lsu_auxr : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let lsu_auxr_x : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let lsu_auxr_y : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let lsu_auxw : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let lsu_auxw_x : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let lsu_auxw_y : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let mau : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let mau_x : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let mau_y : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let mau_auxr : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let mau_auxr_x : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*let mau_auxr_y : int array = let resmap = fun r -> match r with*) + (*| Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*) + (*in Array.of_list (List.map resmap resource_names)*) + +(*(** Real instructions *)*) + +(*exception InvalidEncoding*) + +(*let rec_to_usage r =*) + (*let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i)*) + (*| Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr)*) + + (*in match r.inst with*) + (*| Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw*) + (*| Nxorw | Andnw | Ornw -> *) + (*(match encoding with None | Some U6 | Some S10 -> alu_tiny *) + (*| Some U27L5 | Some U27L10 -> alu_tiny_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Sbfxw | Sbfxd ->*) + (*(match encoding with None -> alu_lite*) + (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord*) + (*| Nxord | Andnd | Ornd ->*) + (*(match encoding with None | Some U6 | Some S10 -> alu_tiny *) + (*| Some U27L5 | Some U27L10 -> alu_tiny_x*) + (*| Some E27U27L10 -> alu_tiny_y)*) + (*|Cmoved ->*) + (*(match encoding with None | Some U6 | Some S10 -> alu_lite*) + (*| Some U27L5 | Some U27L10 -> alu_lite_x*) + (*| Some E27U27L10 -> alu_lite_y)*) + (*| Addxw -> *) + (*(match encoding with None | Some U6 | Some S10 -> alu_lite *) + (*| Some U27L5 | Some U27L10 -> alu_lite_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Addxd -> *) + (*(match encoding with None | Some U6 | Some S10 -> alu_lite *) + (*| Some U27L5 | Some U27L10 -> alu_lite_x*) + (*| Some E27U27L10 -> alu_lite_y)*) + (*| Compw -> (match encoding with None -> alu_tiny*) + (*| Some U6 | Some S10 | Some U27L5 -> alu_tiny_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny*) + (*| Some U27L5 | Some U27L10 -> alu_tiny_x*) + (*| Some E27U27L10 -> alu_tiny_y)*) + (*| Fcompw -> (match encoding with None -> alu_lite*) + (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Fcompd -> (match encoding with None -> alu_lite*) + (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Make -> (match encoding with Some U6 | Some S10 -> alu_tiny *) + (*| Some U27L5 | Some U27L10 -> alu_tiny_x *) + (*| Some E27U27L10 -> alu_tiny_y *) + (*| _ -> raise InvalidEncoding)*) + (*| Maddw | Msbfw -> (match encoding with None -> mau_auxr*) + (*| Some U6 | Some S10 | Some U27L5 -> mau_auxr_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr*) + (*| Some U27L5 | Some U27L10 -> mau_auxr_x*) + (*| Some E27U27L10 -> mau_auxr_y)*) + (*| Mulw -> (match encoding with None -> mau*) + (*| Some U6 | Some S10 | Some U27L5 -> mau_x*) + (*| _ -> raise InvalidEncoding)*) + (*| Muld -> (match encoding with None | Some U6 | Some S10 -> mau*) + (*| Some U27L5 | Some U27L10 -> mau_x*) + (*| Some E27U27L10 -> mau_y)*) + (*| Nop -> alu_nop*) + (*| Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding)*) + (*(* TODO: check *)*) + (*| Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)*) + (*| Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)*) + (*| Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau*) + (*| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> *) + (*(match encoding with None | Some U6 | Some S10 -> lsu_auxw*) + (*| Some U27L5 | Some U27L10 -> lsu_auxw_x*) + (*| Some E27U27L10 -> lsu_auxw_y)*) + (*| Sb | Sh | Sw | Sd | Sq | So ->*) + (*(match encoding with None | Some U6 | Some S10 -> lsu_auxr*) + (*| Some U27L5 | Some U27L10 -> lsu_auxr_x*) + (*| Some E27U27L10 -> lsu_auxr_y)*) + (*| Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu*) + (*| Get -> bcu_tiny_tiny_mau_xnop*) + (*| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd*) + (*| Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite*) + (*| Fnarrowdw -> alu_full*) + (*| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw*) + (*| Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau*) + + +(*let inst_info_to_dlatency i = *) + (*begin*) + (*assert (not (i.reads_at_id && i.reads_at_e1));*) + (*match i.reads_at_id with*) + (*| true -> +1*) + (*| false -> (match i.reads_at_e1 with*) + (*| true -> -1*) + (*| false -> 0)*) + (*end*) + +(*let real_inst_to_latency = function*) + (*| Nop -> 0 (* Only goes through ID *)*) + (*| Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw*) + (*(* TODO check rorw *)*) + (*| Rorw | Nandw | Norw | Nxorw | Ornw | Andnw*) + (*| Nandd | Nord | Nxord | Ornd | Andnd*) + (*| Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make*) + (*| Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd*) + (*| Fmind | Fmaxd | Fminw | Fmaxw*) + (*-> 1*) + (*| Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4*) + (*| Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)*) + (*| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3*) + (*| Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)*) + (*| Get -> 1*) + (*| Set -> 4 (* According to the manual should be 3, but I measured 4 *)*) + (*| Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)*) + (*| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1*) + (*| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw*) + (*| Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4*) + +(*let rec empty_inter la = function*) + (*| [] -> true*) + (*| b::lb -> if (List.mem b la) then false else empty_inter la lb*) + +(*let rec_to_info r : inst_info =*) + (*let usage = rec_to_usage r*) + (*and latency = real_inst_to_latency r.inst*) + (*and reads_at_id = not (empty_inter r.read_locs r.read_at_id)*) + (*and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)*) + (*in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;*) + (*reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }*) + +(*let instruction_infos bb = List.map rec_to_info (instruction_recs bb)*) + +(*let instruction_usages bb =*) + (*let usages = List.map (fun info -> info.usage) (instruction_infos bb)*) + (*in Array.of_list usages*) (** * Latency constraints building *) -(* type access = { inst: int; loc: location } *) - -let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr - -let loc2int = function - | Mem -> 1 - | Reg pr -> preg2int pr - -(* module HashedLoc = struct - type t = { loc: location; key: int } - let equal l1 l2 = (l1.key = l2.key) - let hash l = l.key - let create (l:location) : t = { loc=l; key = loc2int l } -end *) - -(* module LocHash = Hashtbl.Make(HashedLoc) *) -module LocHash = Hashtbl - -(* Hash table : location => list of instruction ids *) - -let rec intlist n = - if n < 0 then failwith "intlist: n < 0" - else if n = 0 then [] - else (n-1) :: (intlist (n-1)) - -let find_in_hash hashloc loc = - match LocHash.find_opt hashloc loc with - | Some idl -> idl - | None -> [] - -(* Returns a list of instruction ids *) -let rec get_accesses hashloc (ll: location list) = match ll with - | [] -> [] - | loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs) - -let compute_latency (ifrom: inst_info) (ito: inst_info) = - let dlat = inst_info_to_dlatency ito - in let lat = ifrom.latency + dlat - in assert (lat >= 0); if (lat == 0) then 1 else lat - -let latency_constraints bb = - let written = LocHash.create 70 - and read = LocHash.create 70 - and count = ref 0 - and constraints = ref [] - and instr_infos = instruction_infos bb - in let step (i: inst_info) = - let raw = get_accesses written i.read_locs - and waw = get_accesses written i.write_locs - and war = get_accesses read i.write_locs - in begin - List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; - latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw; - List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; - latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw; - List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war; - if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count); - (* Updating "read" and "written" hashmaps *) - List.iter (fun loc -> - begin - LocHash.replace written loc [!count]; - LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *) - end) i.write_locs; - List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs; - count := !count + 1 - end - in (List.iter step instr_infos; !constraints) +(*(* type access = { inst: int; loc: location } *)*) + +(*let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr*) + +(*let loc2int = function*) + (*| Mem -> 1*) + (*| Reg pr -> preg2int pr*) + +(*(* module HashedLoc = struct*) + (*type t = { loc: location; key: int }*) + (*let equal l1 l2 = (l1.key = l2.key)*) + (*let hash l = l.key*) + (*let create (l:location) : t = { loc=l; key = loc2int l }*) +(*end *)*) + +(*(* module LocHash = Hashtbl.Make(HashedLoc) *)*) +(*module LocHash = Hashtbl*) + +(*(* Hash table : location => list of instruction ids *)*) + +(*let rec intlist n =*) + (*if n < 0 then failwith "intlist: n < 0"*) + (*else if n = 0 then []*) + (*else (n-1) :: (intlist (n-1))*) + +(*let find_in_hash hashloc loc =*) + (*match LocHash.find_opt hashloc loc with*) + (*| Some idl -> idl*) + (*| None -> []*) + +(*(* Returns a list of instruction ids *)*) +(*let rec get_accesses hashloc (ll: location list) = match ll with*) + (*| [] -> []*) + (*| loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)*) + +(*let compute_latency (ifrom: inst_info) (ito: inst_info) =*) + (*let dlat = inst_info_to_dlatency ito*) + (*in let lat = ifrom.latency + dlat*) + (*in assert (lat >= 0); if (lat == 0) then 1 else lat*) + +(*let latency_constraints bb =*) + (*let written = LocHash.create 70*) + (*and read = LocHash.create 70*) + (*and count = ref 0*) + (*and constraints = ref []*) + (*and instr_infos = instruction_infos bb*) + (*in let step (i: inst_info) =*) + (*let raw = get_accesses written i.read_locs*) + (*and waw = get_accesses written i.write_locs*) + (*and war = get_accesses read i.write_locs*) + (*in begin*) + (*List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;*) + (*latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;*) + (*List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;*) + (*latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;*) + (*List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;*) + (*if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);*) + (*(* Updating "read" and "written" hashmaps *)*) + (*List.iter (fun loc ->*) + (*begin *) + (*LocHash.replace written loc [!count];*) + (*LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *)*) + (*end) i.write_locs;*) + (*List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs;*) + (*count := !count + 1*) + (*end*) + (*in (List.iter step instr_infos; !constraints)*) (** * Using the InstructionScheduler *) -let build_problem bb = - { max_latency = -1; resource_bounds = resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +(* TODO RESUME HERE +(*let build_problem bb = *) + (*{ max_latency = -1; resource_bounds = resource_bounds;*) + (*instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb }*) let rec find_min_opt (l: int option list) = match l with @@ -810,7 +729,7 @@ let rec filter_indexes predicate = function let get_from_indexes indexes l = List.map (List.nth l) indexes let is_basic = function PBasic _ -> true | _ -> false -let is_control = function PControl _ -> true | _ -> false +let is_control = function PCtlFlow _ -> true | PExpand _ -> true | _ -> false let to_basic = function PBasic i -> i | _ -> failwith "to_basic: control instruction found" let to_control = function PControl i -> i | _ -> failwith "to_control: basic instruction found" @@ -818,13 +737,10 @@ let bundlize li hd = let last = List.nth li (List.length li - 1) in if is_control last then let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) - in let bli = List.map to_basic cut_li - in { header = hd; body = bli; exit = Some (to_control last) } + in { header = hd; body = cut_li; exit = Some (to_control last) } else - let bli = List.map to_basic li - in { header = hd; body = bli; exit = None } + { header = hd; body = li; exit = None } -let apply_pbasic b = PBasic b let extract_some o = match o with Some e -> e | None -> failwith "extract_some: None found" let rec find_min = function @@ -891,7 +807,7 @@ let minpack_list (l: int list) = in List.map (fun m -> find_all_indices m l) mins *) -let bb_to_instrs bb = (List.map apply_pbasic bb.body) @ (match bb.exit with None -> [] | Some e -> [PControl e]) +let bb_to_instrs bb = bb.body @ (match bb.exit with None -> [] | Some e -> [Some e]) let bundlize_solution bb sol = let tmp = (Array.to_list @@ Array.sub sol 0 (Array.length sol - 1)) @@ -901,69 +817,54 @@ let bundlize_solution bb sol = | [] -> [] | pack :: packs -> bundlize (get_from_indexes pack instrs) hd :: (bund [] packs) in bund bb.header packs - -let print_inst oc = function - | Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n" - | Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n" - | Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n" - | Asm.Pcvtl2w(rd, rs) -> fprintf oc " Pcvtl2w %a = %a\n" ireg rd ireg rs - | i -> print_instruction oc i - -let print_bb oc bb = - let asm_instructions = Asm.unfold_bblock bb - in List.iter (print_inst oc) asm_instructions - -let print_schedule sched = - print_string "[ "; - Array.iter (fun x -> Printf.printf "%d; " x) sched; - print_endline "]";; - -let do_schedule bb = - let problem = build_problem bb in - (if debug then print_problem stdout problem); - let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem - in match solution with - | None -> failwith "Could not find a valid schedule" - | Some sol -> - ((if debug then print_schedule sol); - let bundles = bundlize_solution bb sol in - (if debug then - begin - Printf.eprintf "Scheduling the following group of instructions:\n"; - print_bb stderr bb; - Printf.eprintf "Gave the following solution:\n"; - List.iter (print_bb stderr) bundles; - Printf.eprintf "--------------------------------\n" - end; - bundles)) +*) +(*let print_inst oc = function*) + (*| Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n"*) + (*| Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n"*) + (*| Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n"*) + (*| i -> print_instruction oc i*) + +(*let print_bb oc bb =*) + (*let asm_instructions = Asm.unfold_bblock bb*) + (*in List.iter (print_inst oc) asm_instructions*) + +(*let print_schedule sched =*) + (*print_string "[ ";*) + (*Array.iter (fun x -> Printf.printf "%d; " x) sched;*) + (*print_endline "]";;*) + +(*let do_schedule bb =*) + (*let problem = build_problem bb in*) + (*(if debug then print_problem stdout problem);*) + (*let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem*) + (*in match solution with*) + (*| None -> failwith "Could not find a valid schedule"*) + (*| Some sol ->*) + (*((if debug then print_schedule sol);*) + (*let bundles = bundlize_solution bb sol in *) + (*(if debug then*) + (*begin*) + (*Printf.eprintf "Scheduling the following group of instructions:\n";*) + (*print_bb stderr bb;*) + (*Printf.eprintf "Gave the following solution:\n";*) + (*List.iter (print_bb stderr) bundles;*) + (*Printf.eprintf "--------------------------------\n"*) + (*end;*) + (*bundles))*) (** * Dumb schedule if the above doesn't work *) -let bundlize_label l = - match l with - | [] -> [] - | l -> [{ header = l; body = []; exit = None }] - -let rec bundlize_basic l = - match l with - | [] -> [] - | b :: l -> { header = []; body = [b]; exit = None } :: bundlize_basic l - -let bundlize_exit e = - match e with - | Some e -> [{ header = []; body = []; exit = Some e }] - | None -> [] - -let dumb_schedule (bb : bblock) : bblock list = bundlize_label bb.header @ bundlize_basic bb.body @ bundlize_exit bb.exit +(* Identity scheduling *) +let dumb_schedule (bb : bblock) = (bb.body, bb.exit) (** * Separates the opaque instructions such as Pfreeframe and Pallocframe *) let is_opaque = function - | PBasic (Pallocframe _) | PBasic (Pfreeframe _) | PControl (PExpand (Pbuiltin _)) -> true + | Pallocframe _ | Pfreeframe _ -> true | _ -> false (* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *) @@ -972,58 +873,52 @@ let rec biggest_wo_opaque = function | i :: li -> if is_opaque i then ([], li, Some i) else let big, rem, opaque = biggest_wo_opaque li in (i :: big, rem, opaque);; -let separate_opaque bb = - let instrs = bb_to_instrs bb - in let rec f hd li = - match li with - | [] -> [] - | li -> let big, rem, opaque = biggest_wo_opaque li in - match opaque with - | Some i -> - (match big with - | [] -> (bundlize [i] hd) :: (f [] rem) - | big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem) - ) - | None -> (bundlize big hd) :: (f [] rem) - in f bb.header instrs - -let smart_schedule bb = - let lbb = separate_opaque bb - in let rec f = function - | [] -> [] - | bb :: lbb -> - let bundles = - try do_schedule bb - with OpaqueInstruction -> dumb_schedule bb - | e -> - let msg = Printexc.to_string e - and stack = Printexc.get_backtrace () - in begin - Printf.eprintf "In regards to this group of instructions:\n"; - print_bb stderr bb; - Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack; - failwith "Invalid schedule" - (* - Printf.eprintf "Issuing one instruction per bundle instead\n\n"; - dumb_schedule bb - *) - end - in bundles @ (f lbb) - in f lbb - -let bblock_to_bundles bb = - if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb); +(*let separate_opaque bb =*) + (*let instrs = bb_to_instrs bb*) + (*in let rec f hd li =*) + (*match li with*) + (*| [] -> []*) + (*| li -> let big, rem, opaque = biggest_wo_opaque li in*) + (*match opaque with*) + (*| Some i ->*) + (*(match big with*) + (*| [] -> (bundlize [i] hd) :: (f [] rem)*) + (*| big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem)*) + (**) + (*| None -> (bundlize big hd) :: (f [] rem)*) + (*in f bb.header instrs*) + +(*let smart_schedule bb =*) + (*let lbb = separate_opaque bb*) + (*in let rec f = function*) + (*| [] -> []*) + (*| bb :: lbb -> *) + (*let bundles =*) + (*try do_schedule bb*) + (*with OpaqueInstruction -> dumb_schedule bb*) + (*| e -> *) + (*let msg = Printexc.to_string e*) + (*and stack = Printexc.get_backtrace ()*) + (*in begin*) + (*Printf.eprintf "In regards to this group of instructions:\n";*) + (*print_bb stderr bb;*) + (*Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack;*) + (*failwith "Invalid schedule"*) + (*(**) + (*Printf.eprintf "Issuing one instruction per bundle instead\n\n";*) + (*dumb_schedule bb*) + (**)*) + (*end*) + (*in bundles @ (f lbb)*) + (*in f lbb*) + +let bblock_schedule bb = + (*if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);*) (* print_problem (build_problem bb); *) - if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb - -(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *) - -let rec bundles_to_coq_schedule = function - | [] -> ([], None) - | bb :: [] -> ([bb.body], bb.exit) - | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc) + (*TODO if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb*) + dumb_schedule bb (** Called schedule function from Coq *) -let schedule_notime bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto -let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) schedule_notime bb +(*let schedule_notime bb = let toto = bblock_schedule in toto*) +let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) bblock_schedule bb diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v index 3d08232b..86a160bd 100644 --- a/aarch64/PostpassSchedulingproof.v +++ b/aarch64/PostpassSchedulingproof.v @@ -23,8 +23,7 @@ Require Import Axioms. Local Open Scope error_monad_scope. -(* -Definition match_prog (p tp: Asmvliw.program) := +Definition match_prog (p tp: Asmblock.program) := match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp. Lemma transf_program_match: @@ -33,6 +32,7 @@ Proof. intros. eapply match_transform_partial_program; eauto. Qed. +(* Lemma regset_double_set_id: forall r (rs: regset) v1 v2, (rs # r <- v1 # r <- v2) = (rs # r <- v2). |