diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-11-27 17:56:53 +0100 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-11-27 18:04:35 +0100 |
commit | ed78594947276264beea0b608c2a101d9f31b18f (patch) | |
tree | 2b4c04d9a6fc5e531f38e6b7f4810241cfe49896 /aarch64 | |
parent | 63942e04b0fcb84d54f066122c31ca4c3aa99ad4 (diff) | |
parent | 43a7cc2a7305395b20d92b240362ddfdb43963ff (diff) | |
download | compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.tar.gz compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.zip |
Merge branch 'kvx-test-prepass' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into aarch64-prepass+postpass
Diffstat (limited to 'aarch64')
-rw-r--r-- | aarch64/ConstpropOpproof.v | 121 | ||||
-rw-r--r-- | aarch64/InstructionScheduler.ml | 1263 | ||||
-rw-r--r-- | aarch64/InstructionScheduler.mli | 113 | ||||
-rw-r--r-- | aarch64/Op.v | 179 | ||||
-rw-r--r-- | aarch64/OpWeights.ml | 353 | ||||
-rw-r--r-- | aarch64/PrepassSchedulingOracle.ml | 477 | ||||
-rw-r--r-- | aarch64/PrepassSchedulingOracleDeps.ml | 17 | ||||
-rw-r--r-- | aarch64/SelectLongproof.v | 26 | ||||
-rw-r--r-- | aarch64/SelectOpproof.v | 28 | ||||
-rw-r--r-- | aarch64/ValueAOp.v | 24 |
10 files changed, 1056 insertions, 1545 deletions
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v index deab7cd4..c777062c 100644 --- a/aarch64/ConstpropOpproof.v +++ b/aarch64/ConstpropOpproof.v @@ -335,40 +335,63 @@ Qed. Lemma make_divimm_correct: forall n r1 r2 v, - Val.divs e#r1 e#r2 = Some v -> + Val.maketotal (Val.divs e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - destruct (Int.ltu i (Int.repr 31)) eqn:?. - exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence. - exists v; auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divs_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 31)) eqn:iLT31. + { + cbn. + exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto. + destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto. + rewrite Val.divs_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. Qed. + Lemma make_divuimm_correct: forall n r1 r2 v, - Val.divu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divu e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divuimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divuimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - econstructor; split. simpl; eauto. - rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). - rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divu_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + cbn. + rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). + exists (Val.shru e # r1 (Vint i)); split; auto. + destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto. + rewrite Val.divu_pow2 with (y:=v) (n:=n). + all: auto. + } + exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence. Qed. Lemma make_andimm_correct: @@ -503,34 +526,60 @@ Qed. Lemma make_divlimm_correct: forall n r1 r2 v, - Val.divls e#r1 e#r2 = Some v -> + Val.maketotal (Val.divls e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divlimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divlimm. - destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?. - rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto. - exists v; auto. - exists v; auto. + destruct (Int64.is_power2' n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 63)) eqn:iLT63. + { + cbn. + exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto. + rewrite H0 in H. + destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto. + { + subst v0. + rewrite Val.divls_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + subst. auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. Qed. + Lemma make_divluimm_correct: forall n r1 r2 v, - Val.divlu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divlu e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divluimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divluimm. destruct (Int64.is_power2' n) eqn:?. + { econstructor; split. simpl; eauto. - rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). - rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. - simpl. - erewrite Int64.is_power2'_range by eauto. - erewrite Int64.divu_pow2' by eauto. auto. - exists v; auto. + rewrite H0 in H. destruct (e#r1); inv H. + all: cbn; auto. + { + rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). + destruct (Int64.eq n Int64.zero); cbn; auto. + erewrite Int64.is_power2'_range by eauto. + erewrite Int64.divu_pow2' by eauto. auto. + } + } + exists v; split; auto. + cbn. + rewrite H. + reflexivity. Qed. Lemma make_andlimm_correct: @@ -679,10 +728,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto. - (* divs *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divimm_correct; auto. + apply make_divimm_correct; auto. congruence. - (* divu *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divuimm_correct; auto. + apply make_divuimm_correct; auto. congruence. - (* and 1 *) rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto. - (* and 2 *) @@ -745,10 +794,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. - (* divl *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divlimm_correct; auto. + apply make_divlimm_correct; auto. congruence. - (* divlu *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divluimm_correct; auto. + apply make_divluimm_correct; auto. congruence. - (* andl 1 *) rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto. - (* andl 2 *) diff --git a/aarch64/InstructionScheduler.ml b/aarch64/InstructionScheduler.ml deleted file mode 100644 index eab0b21a..00000000 --- a/aarch64/InstructionScheduler.ml +++ /dev/null @@ -1,1263 +0,0 @@ -(* *************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* David Monniaux CNRS, VERIMAG *) -(* Cyril Six Kalray *) -(* *) -(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) -(* This file is distributed under the terms of the INRIA *) -(* Non-Commercial License Agreement. *) -(* *) -(* *************************************************************) - -let with_destructor dtor stuff f = - try let ret = f stuff in - dtor stuff; - ret - with exn -> dtor stuff; - raise exn;; - -let with_out_channel chan f = with_destructor close_out chan f;; -let with_in_channel chan f = with_destructor close_in chan f;; - -(** Schedule instructions on a synchronized pipeline -@author David Monniaux, CNRS, VERIMAG *) - -type latency_constraint = { - instr_from : int; - instr_to : int; - latency : int };; - -type problem = { - max_latency : int; - resource_bounds : int array; - instruction_usages : int array array; - latency_constraints : latency_constraint list; - };; - -let print_problem channel problem = - (if problem.max_latency >= 0 - then Printf.fprintf channel "max makespan: %d\n" problem.max_latency); - output_string channel "resource bounds:"; - (Array.iter (fun b -> Printf.fprintf channel " %d" b) problem.resource_bounds); - output_string channel ";\n"; - (Array.iteri (fun i v -> - Printf.fprintf channel "instr%d:" i; - (Array.iter (fun b -> Printf.fprintf channel " %d" b) v); - output_string channel ";\n") problem.instruction_usages); - List.iter (fun instr -> - Printf.printf "t%d - t%d >= %d;\n" - instr.instr_to instr.instr_from instr.latency) - problem.latency_constraints;; - -let get_nr_instructions problem = Array.length problem.instruction_usages;; -let get_nr_resources problem = Array.length problem.resource_bounds;; - -type solution = int array -type scheduler = problem -> solution option - -(* DISABLED -(** Schedule the problem optimally by constraint solving using the Gecode solver. *) -external gecode_scheduler : problem -> solution option = - "caml_gecode_schedule_instr";; - *) - -let maximum_slot_used times = - let maxi = ref (-1) in - for i=0 to (Array.length times)-2 - do - maxi := max !maxi times.(i) - done; - !maxi;; - -let check_schedule (problem : problem) (times : solution) = - let nr_instructions = get_nr_instructions problem in - (if Array.length times <> nr_instructions+1 - then failwith - (Printf.sprintf "check_schedule: %d times expected, got %d" - (nr_instructions+1) (Array.length times))); - (if problem.max_latency >= 0 && times.(nr_instructions)> problem.max_latency - then failwith "check_schedule: max_latency exceeded"); - (Array.iteri (fun i time -> - (if time < 0 - then failwith (Printf.sprintf "time[%d] < 0" i))) times); - let slot_resources = Array.init ((maximum_slot_used times)+1) - (fun _ -> Array.copy problem.resource_bounds) in - for i=0 to nr_instructions -1 - do - let remaining_resources = slot_resources.(times.(i)) - and used_resources = problem.instruction_usages.(i) in - for resource=0 to (Array.length used_resources)-1 - do - let after = remaining_resources.(resource) - used_resources.(resource) in - (if after < 0 - then failwith (Printf.sprintf "check_schedule: instruction %d exceeds resource %d at slot %d" i resource times.(i))); - remaining_resources.(resource) <- after - done - done; - List.iter (fun ctr -> - if times.(ctr.instr_to) - times.(ctr.instr_from) < ctr.latency - then failwith (Printf.sprintf "check_schedule: time[%d]=%d - time[%d]=%d < %d" - ctr.instr_to times.(ctr.instr_to) - ctr.instr_from times.(ctr.instr_from) - ctr.latency) - ) problem.latency_constraints;; - -let bound_max_time problem = - let total = ref(Array.length problem.instruction_usages) in - List.iter (fun ctr -> total := !total + ctr.latency) problem.latency_constraints; - !total;; - -let vector_less_equal a b = - try - Array.iter2 (fun x y -> - if x>y - then raise Exit) a b; - true - with Exit -> false;; - -let vector_subtract a b = - assert ((Array.length a) = (Array.length b)); - for i=0 to (Array.length a)-1 - do - b.(i) <- b.(i) - a.(i) - done;; - -(* The version with critical path ordering is much better! *) -type list_scheduler_order = - | INSTRUCTION_ORDER - | CRITICAL_PATH_ORDER;; - -let int_max (x : int) (y : int) = - if x > y then x else y;; - -let int_min (x : int) (y : int) = - if x < y then x else y;; - -let get_predecessors problem = - let nr_instructions = get_nr_instructions problem in - let predecessors = Array.make (nr_instructions+1) [] in - List.iter (fun ctr -> - predecessors.(ctr.instr_to) <- - (ctr.instr_from, ctr.latency)::predecessors.(ctr.instr_to)) - problem.latency_constraints; - predecessors;; - -let get_successors problem = - let nr_instructions = get_nr_instructions problem in - let successors = Array.make nr_instructions [] in - List.iter (fun ctr -> - successors.(ctr.instr_from) <- - (ctr.instr_to, ctr.latency)::successors.(ctr.instr_from)) - problem.latency_constraints; - successors;; - -let critical_paths successors = - let nr_instructions = Array.length successors in - let path_lengths = Array.make nr_instructions (-1) in - let rec compute i = - if i=nr_instructions then 0 else - match path_lengths.(i) with - | -2 -> failwith "InstructionScheduler: the dependency graph has cycles" - | -1 -> path_lengths.(i) <- -2; - let x = List.fold_left - (fun cur (j, latency)-> int_max cur (latency+(compute j))) - 1 successors.(i) - in path_lengths.(i) <- x; x - | x -> x - in for i = nr_instructions-1 downto 0 - do - ignore (compute i) - done; - path_lengths;; - -let maximum_critical_path problem = - let paths = critical_paths (get_successors problem) in - Array.fold_left int_max 0 paths;; - -let get_earliest_dates predecessors = - let nr_instructions = (Array.length predecessors)-1 in - let path_lengths = Array.make (nr_instructions+1) (-1) in - let rec compute i = - match path_lengths.(i) with - | -2 -> failwith "InstructionScheduler: the dependency graph has cycles" - | -1 -> path_lengths.(i) <- -2; - let x = List.fold_left - (fun cur (j, latency)-> int_max cur (latency+(compute j))) - 0 predecessors.(i) - in path_lengths.(i) <- x; x - | x -> x - in for i = 0 to nr_instructions - do - ignore (compute i) - done; - for i = 0 to nr_instructions - 1 - do - path_lengths.(nr_instructions) <- int_max - path_lengths.(nr_instructions) (1 + path_lengths.(i)) - done; - path_lengths;; - -exception Unschedulable - -let get_latest_dates deadline successors = - let nr_instructions = Array.length successors - and path_lengths = critical_paths successors in - Array.init (nr_instructions + 1) - (fun i -> - if i < nr_instructions then - let path_length = path_lengths.(i) in - assert (path_length >= 1); - (if path_length > deadline - then raise Unschedulable); - deadline - path_length - else deadline);; - -let priority_list_scheduler (order : list_scheduler_order) - (problem : problem) : - solution option = - let nr_instructions = get_nr_instructions problem in - let successors = get_successors problem - and predecessors = get_predecessors problem - and times = Array.make (nr_instructions+1) (-1) in - - let priorities = match order with - | INSTRUCTION_ORDER -> None - | CRITICAL_PATH_ORDER -> Some (critical_paths successors) in - - let module InstrSet = - Set.Make (struct type t=int - let compare = match priorities with - | None -> (fun x y -> x - y) - | Some p -> (fun x y -> - (match p.(y)-p.(x) with - | 0 -> x - y - | z -> z)) - end) in - - let max_time = bound_max_time problem in - let ready = Array.make max_time InstrSet.empty in - Array.iteri (fun i preds -> - if i<nr_instructions && preds=[] - then ready.(0) <- InstrSet.add i ready.(0)) predecessors; - - let current_time = ref 0 - and current_resources = Array.copy problem.resource_bounds - and earliest_time i = - try - let time = ref (-1) in - List.iter (fun (j, latency) -> - if times.(j) < 0 - then raise Exit - else let t = times.(j) + latency in - if t > !time - then time := t) predecessors.(i); - assert(!time >= 0); - !time - with Exit -> -1 - - in - let advance_time() = - begin - (if !current_time < max_time-1 - then - begin - Array.blit problem.resource_bounds 0 current_resources 0 - (Array.length current_resources); - ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); - ready.(!current_time) <- InstrSet.empty; - end); - incr current_time - end in - - let attempt_scheduling ready usages = - let result = ref (-1) in - try - InstrSet.iter (fun i -> - (* Printf.printf "trying scheduling %d\n" i; - pr int_vector usages.(i); - print _vector current_resources; *) - if vector_less_equal usages.(i) current_resources - then - begin - vector_subtract usages.(i) current_resources; - result := i; - raise Exit - end) ready; - -1 - with Exit -> !result in - - while !current_time < max_time - do - if (InstrSet.is_empty ready.(!current_time)) - then advance_time() - else - match attempt_scheduling ready.(!current_time) - problem.instruction_usages with - | -1 -> advance_time() - | i -> - begin - assert(times.(i) < 0); - times.(i) <- !current_time; - ready.(!current_time) <- InstrSet.remove i (ready.(!current_time)); - List.iter (fun (instr_to, latency) -> - if instr_to < nr_instructions then - match earliest_time instr_to with - | -1 -> () - | to_time -> - ready.(to_time) <- InstrSet.add instr_to ready.(to_time)) - successors.(i); - successors.(i) <- [] - end - done; - try - let final_time = ref (-1) in - for i=0 to nr_instructions-1 - do - (if times.(i) < 0 then raise Exit); - (if !final_time < times.(i)+1 then final_time := times.(i)+1) - done; - List.iter (fun (i, latency) -> - let target_time = latency + times.(i) in - if target_time > !final_time - then final_time := target_time - ) predecessors.(nr_instructions); - times.(nr_instructions) <- !final_time; - Some times - with Exit -> None;; - -let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; - -(* dummy code for placating ocaml's warnings *) -let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; - -type bundle = int list;; - -let rec extract_deps_to index = function - | [] -> [] - | dep :: deps -> let extracts = extract_deps_to index deps in - if (dep.instr_to == index) then - dep :: extracts - else - extracts - -exception InvalidBundle;; - -let dependency_check problem bundle index = - let index_deps = extract_deps_to index problem.latency_constraints in - List.iter (fun i -> - List.iter (fun dep -> - if (dep.instr_from == i) then raise InvalidBundle - ) index_deps - ) bundle;; - -let rec make_bundle problem resources bundle index = - let resources_copy = Array.copy resources in - let nr_instructions = get_nr_instructions problem in - if (index >= nr_instructions) then (bundle, index+1) else - let inst_usage = problem.instruction_usages.(index) in - try match vector_less_equal inst_usage resources with - | false -> raise InvalidBundle - | true -> ( - dependency_check problem bundle index; - vector_subtract problem.instruction_usages.(index) resources_copy; - make_bundle problem resources_copy (index::bundle) (index+1) - ) - with InvalidBundle -> (bundle, index);; - -let rec make_bundles problem index : bundle list = - if index >= get_nr_instructions problem then - [] - else - let (bundle, new_index) = make_bundle problem problem.resource_bounds [] index in - bundle :: (make_bundles problem new_index);; - -let bundles_to_schedule problem bundles : solution = - let nr_instructions = get_nr_instructions problem in - let schedule = Array.make (nr_instructions+1) (nr_instructions+4) in - let time = ref 0 in - List.iter (fun bundle -> - begin - List.iter (fun i -> - schedule.(i) <- !time - ) bundle; - time := !time + 1 - end - ) bundles; schedule;; - -let greedy_scheduler (problem : problem) : solution option = - let bundles = make_bundles problem 0 in - Some (bundles_to_schedule problem bundles);; - -(* alternate implementation -let swap_array_elements a i j = - let x = a.(i) in - a.(i) <- a.(j); - a.(j) <- x;; - -let array_reverse_slice a first last = - let i = ref first and j = ref last in - while i < j - do - swap_array_elements a !i !j; - incr i; - decr j - done;; - -let array_reverse a = - let a' = Array.copy a in - array_reverse_slice a' 0 ((Array.length a)-1); - a';; - *) - -(* unneeded -let array_reverse a = - let n=Array.length a in - Array.init n (fun i -> a.(n-1-i));; - *) - -let reverse_constraint nr_instructions ctr = - { instr_to = nr_instructions -ctr.instr_from; - instr_from = nr_instructions - ctr.instr_to; - latency = ctr.latency };; - -(* unneeded -let rec list_map_filter f = function - | [] -> [] - | h::t -> - (match f h with - | None -> list_map_filter f t - | Some x -> x :: (list_map_filter f t));; - *) - -let reverse_problem problem = - let nr_instructions = get_nr_instructions problem in - { - max_latency = problem.max_latency; - resource_bounds = problem.resource_bounds; - instruction_usages = Array.init (nr_instructions + 1) - (fun i -> - if i=0 - then Array.map (fun _ -> 0) problem.resource_bounds else problem.instruction_usages.(nr_instructions - i)); - latency_constraints = List.map (reverse_constraint nr_instructions) - problem.latency_constraints - };; - -let max_scheduled_time solution = - let time = ref (-1) in - for i = 0 to ((Array.length solution) - 2) - do - time := max !time solution.(i) - done; - !time;; - -(* -let recompute_makespan problem solution = - let n = (Array.length solution) - 1 and ms = ref 0 in - List.iter (fun cstr -> - if cstr.instr_to = n - then ms := max !ms (solution.(cstr.instr_from) + cstr.latency) - ) problem.latency_constraints; - !ms;; - *) - -let schedule_reversed (scheduler : problem -> solution option) - (problem : problem) = - match scheduler (reverse_problem problem) with - | None -> None - | Some solution -> - let nr_instructions = get_nr_instructions problem in - let makespan = max_scheduled_time solution in - let ret = Array.init (nr_instructions + 1) - (fun i -> makespan-solution.(nr_instructions-i)) in - ret.(nr_instructions) <- max ((max_scheduled_time ret) + 1) - (ret.(nr_instructions)); - Some ret;; - -(** Schedule the problem using a greedy list scheduling algorithm, from the end. *) -let reverse_list_scheduler = schedule_reversed list_scheduler;; - -let check_problem problem = - (if (Array.length problem.instruction_usages) < 1 - then failwith "length(problem.instruction_usages) < 1");; - -let validated_scheduler (scheduler : problem -> solution option) - (problem : problem) = - check_problem problem; - match scheduler problem with - | None -> None - | (Some solution) as ret -> check_schedule problem solution; ret;; - -let get_max_latency solution = - solution.((Array.length solution)-1);; - -let show_date_ranges problem = - let deadline = problem.max_latency in - assert(deadline >= 0); - let successors = get_successors problem - and predecessors = get_predecessors problem in - let earliest_dates : int array = get_earliest_dates predecessors - and latest_dates : int array = get_latest_dates deadline successors in - assert ((Array.length earliest_dates) = - (Array.length latest_dates)); - Array.iteri (fun i early -> - let late = latest_dates.(i) in - Printf.printf "t[%d] in %d..%d\n" i early late) - earliest_dates;; - -type pseudo_boolean_problem_type = - | SATISFIABILITY - | OPTIMIZATION;; - -type pseudo_boolean_mapper = { - mapper_pb_type : pseudo_boolean_problem_type; - mapper_nr_instructions : int; - mapper_nr_pb_variables : int; - mapper_earliest_dates : int array; - mapper_latest_dates : int array; - mapper_var_offsets : int array; - mapper_final_predecessors : (int * int) list -};; - -(* Latency constraints are: - presence of instr-to at each t <= sum of presences of instr-from at compatible times - - if reverse_encoding - presence of instr-from at each t <= sum of presences of instr-to at compatible times *) - -(* Experiments show reverse_encoding=true multiplies time by 2 in sat4j - without making hard instances easier *) -let direct_encoding = false -and reverse_encoding = false -and delta_encoding = true - -let pseudo_boolean_print_problem channel problem pb_type = - let deadline = problem.max_latency in - assert (deadline > 0); - let nr_instructions = get_nr_instructions problem - and nr_resources = get_nr_resources problem - and successors = get_successors problem - and predecessors = get_predecessors problem in - let earliest_dates = get_earliest_dates predecessors - and latest_dates = get_latest_dates deadline successors in - let var_offsets = Array.make - (match pb_type with - | OPTIMIZATION -> nr_instructions+1 - | SATISFIABILITY -> nr_instructions) 0 in - let nr_pb_variables = - (let nr = ref 0 in - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - var_offsets.(i) <- !nr; - nr := !nr + latest_dates.(i) - earliest_dates.(i) + 1 - done; - !nr) - and nr_pb_constraints = - (match pb_type with - | OPTIMIZATION -> nr_instructions+1 - | SATISFIABILITY -> nr_instructions) + - - (let count = ref 0 in - for t=0 to deadline-1 - do - for j=0 to nr_resources-1 - do - try - for i=0 to nr_instructions-1 - do - let usage = problem.instruction_usages.(i).(j) in - if t >= earliest_dates.(i) && t <= latest_dates.(i) - && usage > 0 then raise Exit - done - with Exit -> incr count - done - done; - !count) + - - (let count=ref 0 in - List.iter - (fun ctr -> - if ctr.instr_to < nr_instructions - then count := !count + 1 + latest_dates.(ctr.instr_to) - - earliest_dates.(ctr.instr_to) - + (if reverse_encoding - then 1 + latest_dates.(ctr.instr_from) - - earliest_dates.(ctr.instr_from) - else 0) - ) - problem.latency_constraints; - !count) + - - (match pb_type with - | OPTIMIZATION -> (1 + deadline - earliest_dates.(nr_instructions)) * nr_instructions - | SATISFIABILITY -> 0) - and measured_nr_constraints = ref 0 in - - let pb_var i t = - assert(t >= earliest_dates.(i)); - assert(t <= latest_dates.(i)); - let v = 1+var_offsets.(i)+t-earliest_dates.(i) in - assert(v <= nr_pb_variables); - Printf.sprintf "x%d" v in - - let end_constraint () = - begin - output_string channel ";\n"; - incr measured_nr_constraints - end in - - let gen_latency_constraint i_to i_from latency t_to = - Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n" - i_to i_from latency i_to t_to; - for t_from=earliest_dates.(i_from) to - int_min latest_dates.(i_from) (t_to - latency) - do - Printf.fprintf channel "+1 %s " (pb_var i_from t_from) - done; - Printf.fprintf channel "-1 %s " (pb_var i_to t_to); - Printf.fprintf channel ">= 0"; - end_constraint() - - and gen_dual_latency_constraint i_to i_from latency t_from = - Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n" - i_to i_from latency i_to t_from; - for t_to=int_max earliest_dates.(i_to) (t_from + latency) - to latest_dates.(i_to) - do - Printf.fprintf channel "+1 %s " (pb_var i_to t_to) - done; - Printf.fprintf channel "-1 %s " (pb_var i_from t_from); - Printf.fprintf channel ">= 0"; - end_constraint() - in - - Printf.fprintf channel "* #variable= %d #constraint= %d\n" nr_pb_variables nr_pb_constraints; - Printf.fprintf channel "* nr_instructions=%d deadline=%d\n" nr_instructions deadline; - begin - match pb_type with - | SATISFIABILITY -> () - | OPTIMIZATION -> - output_string channel "min:"; - for t=earliest_dates.(nr_instructions) to deadline - do - Printf.fprintf channel " %+d %s" t (pb_var nr_instructions t) - done; - output_string channel ";\n"; - end; - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - let early = earliest_dates.(i) and late= latest_dates.(i) in - Printf.fprintf channel "* t[%d] in %d..%d\n" i early late; - for t=early to late - do - Printf.fprintf channel "+1 %s " (pb_var i t) - done; - Printf.fprintf channel "= 1"; - end_constraint() - done; - - for t=0 to deadline-1 - do - for j=0 to nr_resources-1 - do - let bound = problem.resource_bounds.(j) - and coeffs = ref [] in - for i=0 to nr_instructions-1 - do - let usage = problem.instruction_usages.(i).(j) in - if t >= earliest_dates.(i) && t <= latest_dates.(i) - && usage > 0 - then coeffs := (i, usage) :: !coeffs - done; - if !coeffs <> [] then - begin - Printf.fprintf channel "* resource #%d at t=%d <= %d\n" j t bound; - List.iter (fun (i, usage) -> - Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs; - Printf.fprintf channel ">= %d" (-bound); - end_constraint(); - end - done - done; - - List.iter - (fun ctr -> - if ctr.instr_to < nr_instructions then - begin - for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to) - do - gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to - done; - if reverse_encoding - then - for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from) - do - gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from - done - end - ) problem.latency_constraints; - - begin - match pb_type with - | SATISFIABILITY -> () - | OPTIMIZATION -> - let final_latencies = Array.make nr_instructions 1 in - List.iter (fun (i, latency) -> - final_latencies.(i) <- int_max final_latencies.(i) latency) - predecessors.(nr_instructions); - for t_to=earliest_dates.(nr_instructions) to deadline - do - for i_from = 0 to nr_instructions -1 - do - gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to - done - done - end; - assert (!measured_nr_constraints = nr_pb_constraints); - { - mapper_pb_type = pb_type; - mapper_nr_instructions = nr_instructions; - mapper_nr_pb_variables = nr_pb_variables; - mapper_earliest_dates = earliest_dates; - mapper_latest_dates = latest_dates; - mapper_var_offsets = var_offsets; - mapper_final_predecessors = predecessors.(nr_instructions) - };; - -type pb_answer = - | Positive - | Negative - | Unknown - -let line_to_pb_solution sol line nr_pb_variables = - let assign s v = - begin - let i = int_of_string s in - sol.(i-1) <- v - end in - List.iter - begin - function "" -> () - | item -> - (match String.get item 0 with - | '+' -> - assert ((String.length item) >= 3); - assert ((String.get item 1) = 'x'); - assign (String.sub item 2 ((String.length item)-2)) Positive - | '-' -> - assert ((String.length item) >= 3); - assert ((String.get item 1) = 'x'); - assign (String.sub item 2 ((String.length item)-2)) Negative - | 'x' -> - assert ((String.length item) >= 2); - assign (String.sub item 1 ((String.length item)-1)) Positive - | _ -> failwith "syntax error in pseudo Boolean solution: epected + - or x" - ) - end - (String.split_on_char ' ' (String.sub line 2 ((String.length line)-2)));; - -let pb_solution_to_schedule mapper pb_solution = - Array.mapi (fun i offset -> - let first = mapper.mapper_earliest_dates.(i) - and last = mapper.mapper_latest_dates.(i) - and time = ref (-1) in - for t=first to last - do - match pb_solution.(t - first + offset) with - | Positive -> - (if !time = -1 - then time:=t - else failwith "duplicate time in pseudo boolean solution") - | Negative -> () - | Unknown -> failwith "unknown value in pseudo boolean solution" - done; - (if !time = -1 - then failwith "no time in pseudo boolean solution"); - !time - ) mapper.mapper_var_offsets;; - -let pseudo_boolean_read_solution mapper channel = - let optimum = ref (-1) - and optimum_found = ref false - and solution = Array.make mapper.mapper_nr_pb_variables Unknown in - try - while true do - match input_line channel with - | "" -> () - | line -> - begin - match String.get line 0 with - | 'c' -> () - | 'o' -> - assert ((String.length line) >= 2); - assert ((String.get line 1) = ' '); - optimum := int_of_string (String.sub line 2 ((String.length line)-2)) - | 's' -> (match line with - | "s OPTIMUM FOUND" -> optimum_found := true - | "s SATISFIABLE" -> () - | "s UNSATISFIABLE" -> close_in channel; - raise Unschedulable - | _ -> failwith line) - | 'v' -> line_to_pb_solution solution line mapper.mapper_nr_pb_variables - | x -> Printf.printf "unknown: %s\n" line - end - done; - assert false - with End_of_file -> - close_in channel; - begin - let sol = pb_solution_to_schedule mapper solution in - sol - end;; - -let recompute_max_latency mapper solution = - let maxi = ref (-1) in - for i=0 to (mapper.mapper_nr_instructions-1) - do - maxi := int_max !maxi (1+solution.(i)) - done; - List.iter (fun (i, latency) -> - maxi := int_max !maxi (solution.(i) + latency)) mapper.mapper_final_predecessors; - !maxi;; - -let adjust_check_solution mapper solution = - match mapper.mapper_pb_type with - | OPTIMIZATION -> - let max_latency = recompute_max_latency mapper solution in - assert (max_latency = solution.(mapper.mapper_nr_instructions)); - solution - | SATISFIABILITY -> - let max_latency = recompute_max_latency mapper solution in - Array.init (mapper.mapper_nr_instructions+1) - (fun i -> if i < mapper.mapper_nr_instructions - then solution.(i) - else max_latency);; - -(* let pseudo_boolean_solver = ref "/local/monniaux/progs/naps/naps" *) -(* let pseudo_boolean_solver = ref "/local/monniaux/packages/sat4j/org.sat4j.pb.jar CuttingPlanes" *) - -(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar CuttingPlanes" *) -(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar" *) -(* let pseudo_boolean_solver = ref "clasp" *) -(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/open-wbo/open-wbo_static -formula=1" *) -(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/naps/naps" *) -(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/minisatp/build/release/bin/minisatp" *) -(* let pseudo_boolean_solver = ref "java -jar sat4j-pb.jar CuttingPlanesStar" *) -let pseudo_boolean_solver = ref "pb_solver" - -let pseudo_boolean_scheduler pb_type problem = - try - let filename_in = "problem.opb" in - (* needed only if not using stdout and filename_out = "problem.sol" *) - let mapper = - with_out_channel (open_out filename_in) - (fun opb_problem -> - pseudo_boolean_print_problem opb_problem problem pb_type) in - Some (with_in_channel - (Unix.open_process_in (!pseudo_boolean_solver ^ " " ^ filename_in)) - (fun opb_solution -> adjust_check_solution mapper (pseudo_boolean_read_solution mapper opb_solution))) - with - | Unschedulable -> None;; - -let rec reoptimizing_scheduler (scheduler : scheduler) (previous_solution : solution) (problem : problem) = - if (get_max_latency previous_solution)>1 then - begin - Printf.printf "reoptimizing < %d\n" (get_max_latency previous_solution); - flush stdout; - match scheduler - { problem with max_latency = (get_max_latency previous_solution)-1 } - with - | None -> previous_solution - | Some solution -> reoptimizing_scheduler scheduler solution problem - end - else previous_solution;; - -let smt_var i = Printf.sprintf "t%d" i - -let is_resource_used problem j = - try - Array.iter (fun usages -> - if usages.(j) > 0 - then raise Exit) problem.instruction_usages; - false - with Exit -> true;; - -let smt_use_quantifiers = false - -let smt_print_problem channel problem = - let nr_instructions = get_nr_instructions problem in - let gen_smt_resource_constraint time j = - output_string channel "(<= (+"; - Array.iteri - (fun i usages -> - let usage=usages.(j) in - if usage > 0 - then Printf.fprintf channel " (ite (= %s %s) %d 0)" - time (smt_var i) usage) - problem.instruction_usages; - Printf.fprintf channel ") %d)" problem.resource_bounds.(j) - in - output_string channel "(set-option :produce-models true)\n"; - for i=0 to nr_instructions - do - Printf.fprintf channel "(declare-const %s Int)\n" (smt_var i); - Printf.fprintf channel "(assert (>= %s 0))\n" (smt_var i) - done; - for i=0 to nr_instructions-1 - do - Printf.fprintf channel "(assert (< %s %s))\n" - (smt_var i) (smt_var nr_instructions) - done; - (if problem.max_latency > 0 - then Printf.fprintf channel "(assert (<= %s %d))\n" - (smt_var nr_instructions) problem.max_latency); - List.iter (fun ctr -> - Printf.fprintf channel "(assert (>= (- %s %s) %d))\n" - (smt_var ctr.instr_to) - (smt_var ctr.instr_from) - ctr.latency) problem.latency_constraints; - for j=0 to (Array.length problem.resource_bounds)-1 - do - if is_resource_used problem j - then - begin - if smt_use_quantifiers - then - begin - Printf.fprintf channel - "; resource #%d <= %d\n(assert (forall ((t Int)) " - j problem.resource_bounds.(j); - gen_smt_resource_constraint "t" j; - output_string channel "))\n" - end - else - begin - (if problem.max_latency < 0 - then failwith "quantifier explosion needs max latency"); - for t=0 to problem.max_latency - do - Printf.fprintf channel - "; resource #%d <= %d at t=%d\n(assert " - j problem.resource_bounds.(j) t; - gen_smt_resource_constraint (string_of_int t) j; - output_string channel ")\n" - done - end - end - done; - output_string channel "(check-sat)(get-model)\n";; - - -let ilp_print_problem channel problem pb_type = - let deadline = problem.max_latency in - assert (deadline > 0); - let nr_instructions = get_nr_instructions problem - and nr_resources = get_nr_resources problem - and successors = get_successors problem - and predecessors = get_predecessors problem in - let earliest_dates = get_earliest_dates predecessors - and latest_dates = get_latest_dates deadline successors in - - let pb_var i t = - Printf.sprintf "x%d_%d" i t in - - let gen_latency_constraint i_to i_from latency t_to = - Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n" - i_to i_from latency i_to t_to; - Printf.fprintf channel "c_%d_%d_%d_%d: " - i_to i_from latency t_to; - for t_from=earliest_dates.(i_from) to - int_min latest_dates.(i_from) (t_to - latency) - do - Printf.fprintf channel "+1 %s " (pb_var i_from t_from) - done; - Printf.fprintf channel "-1 %s " (pb_var i_to t_to); - output_string channel ">= 0\n" - - and gen_dual_latency_constraint i_to i_from latency t_from = - Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n" - i_to i_from latency i_to t_from; - Printf.fprintf channel "d_%d_%d_%d_%d: " - i_to i_from latency t_from; - for t_to=int_max earliest_dates.(i_to) (t_from + latency) - to latest_dates.(i_to) - do - Printf.fprintf channel "+1 %s " (pb_var i_to t_to) - done; - Printf.fprintf channel "-1 %s " (pb_var i_from t_from); - Printf.fprintf channel ">= 0\n" - - and gen_delta_constraint i_from i_to latency = - if delta_encoding - then Printf.fprintf channel "l_%d_%d_%d: +1 t%d -1 t%d >= %d\n" - i_from i_to latency i_to i_from latency - - in - - Printf.fprintf channel "\\ nr_instructions=%d deadline=%d\n" nr_instructions deadline; - begin - match pb_type with - | SATISFIABILITY -> output_string channel "Minimize dummy: 0\n" - | OPTIMIZATION -> - Printf.fprintf channel "Minimize\nmakespan: t%d\n" nr_instructions - end; - output_string channel "Subject To\n"; - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - let early = earliest_dates.(i) and late= latest_dates.(i) in - Printf.fprintf channel "\\ t[%d] in %d..%d\ntimes%d: " i early late i; - for t=early to late - do - Printf.fprintf channel "+1 %s " (pb_var i t) - done; - Printf.fprintf channel "= 1\n" - done; - - for t=0 to deadline-1 - do - for j=0 to nr_resources-1 - do - let bound = problem.resource_bounds.(j) - and coeffs = ref [] in - for i=0 to nr_instructions-1 - do - let usage = problem.instruction_usages.(i).(j) in - if t >= earliest_dates.(i) && t <= latest_dates.(i) - && usage > 0 - then coeffs := (i, usage) :: !coeffs - done; - if !coeffs <> [] then - begin - Printf.fprintf channel "\\ resource #%d at t=%d <= %d\nr%d_%d: " j t bound j t; - List.iter (fun (i, usage) -> - Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs; - Printf.fprintf channel ">= %d\n" (-bound) - end - done - done; - - List.iter - (fun ctr -> - if ctr.instr_to < nr_instructions then - begin - gen_delta_constraint ctr.instr_from ctr.instr_to ctr.latency; - begin - if direct_encoding - then - for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to) - do - gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to - done - end; - begin - if reverse_encoding - then - for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from) - do - gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from - done - end - end - ) problem.latency_constraints; - - begin - match pb_type with - | SATISFIABILITY -> () - | OPTIMIZATION -> - let final_latencies = Array.make nr_instructions 1 in - List.iter (fun (i, latency) -> - final_latencies.(i) <- int_max final_latencies.(i) latency) - predecessors.(nr_instructions); - for i_from = 0 to nr_instructions -1 - do - gen_delta_constraint i_from nr_instructions final_latencies.(i_from) - done; - for t_to=earliest_dates.(nr_instructions) to deadline - do - for i_from = 0 to nr_instructions -1 - do - gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to - done - done - end; - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - Printf.fprintf channel "ct%d : -1 t%d" i i; - let early = earliest_dates.(i) and late= latest_dates.(i) in - for t=early to late do - Printf.fprintf channel " +%d %s" t (pb_var i t) - done; - output_string channel " = 0\n" - done; - output_string channel "Bounds\n"; - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - let early = earliest_dates.(i) and late= latest_dates.(i) in - begin - Printf.fprintf channel "%d <= t%d <= %d\n" early i late; - if true then - for t=early to late do - Printf.fprintf channel "0 <= %s <= 1\n" (pb_var i t) - done - end - done; - output_string channel "Integer\n"; - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - Printf.fprintf channel "t%d " i - done; - output_string channel "\nBinary\n"; - for i=0 to (match pb_type with - | OPTIMIZATION -> nr_instructions - | SATISFIABILITY -> nr_instructions-1) - do - let early = earliest_dates.(i) and late= latest_dates.(i) in - for t=early to late do - output_string channel (pb_var i t); - output_string channel " " - done; - output_string channel "\n" - done; - output_string channel "End\n"; - { - mapper_pb_type = pb_type; - mapper_nr_instructions = nr_instructions; - mapper_nr_pb_variables = 0; - mapper_earliest_dates = earliest_dates; - mapper_latest_dates = latest_dates; - mapper_var_offsets = [| |]; - mapper_final_predecessors = predecessors.(nr_instructions) - };; - -(* Guess what? Cplex sometimes outputs 11.000000004 instead of integer 11 *) - -let positive_float_round x = truncate (x +. 0.5) - -let float_round (x : float) : int = - if x > 0.0 - then positive_float_round x - else - (positive_float_round (-. x)) - -let rounded_int_of_string x = float_round (float_of_string x) - -let ilp_read_solution mapper channel = - let times = Array.make - (match mapper.mapper_pb_type with - | OPTIMIZATION -> 1+mapper.mapper_nr_instructions - | SATISFIABILITY -> mapper.mapper_nr_instructions) (-1) in - try - while true do - let line = input_line channel in - ( if (String.length line) < 3 - then failwith (Printf.sprintf "bad ilp output: length(line) < 3: %s" line)); - match String.get line 0 with - | 'x' -> () - | 't' -> let space = - try String.index line ' ' - with Not_found -> - failwith "bad ilp output: no t variable number" - in - let tnumber = - try int_of_string (String.sub line 1 (space-1)) - with Failure _ -> - failwith "bad ilp output: not a variable number" - in - (if tnumber < 0 || tnumber >= (Array.length times) - then failwith (Printf.sprintf "bad ilp output: not a correct variable number: %d (%d)" tnumber (Array.length times))); - let value = - let s = String.sub line (space+1) ((String.length line)-space-1) in - try rounded_int_of_string s - with Failure _ -> - failwith (Printf.sprintf "bad ilp output: not a time number (%s)" s) - in - (if value < 0 - then failwith "bad ilp output: negative time"); - times.(tnumber) <- value - | '#' -> () - | '0' -> () - | _ -> failwith (Printf.sprintf "bad ilp output: bad variable initial, line = %s" line) - done; - assert false - with End_of_file -> - Array.iteri (fun i x -> - if i<(Array.length times)-1 - && x<0 then raise Unschedulable) times; - times;; - -let ilp_solver = ref "ilp_solver" - -let problem_nr = ref 0 - -let ilp_scheduler pb_type problem = - try - let filename_in = Printf.sprintf "problem%05d.lp" !problem_nr - and filename_out = Printf.sprintf "problem%05d.sol" !problem_nr in - incr problem_nr; - let mapper = with_out_channel (open_out filename_in) - (fun opb_problem -> ilp_print_problem opb_problem problem pb_type) in - - begin - match Unix.system (!ilp_solver ^ " " ^ filename_in ^ " " ^ filename_out) with - | Unix.WEXITED 0 -> - Some (with_in_channel - (open_in filename_out) - (fun opb_solution -> - adjust_check_solution mapper - (ilp_read_solution mapper opb_solution))) - | Unix.WEXITED _ -> failwith "failed to start ilp solver" - | _ -> None - end - with - | Unschedulable -> None;; - -let current_utime_all () = - let t = Unix.times() in - t.Unix.tms_cutime +. t.Unix.tms_utime;; - -let utime_all_fn fn arg = - let utime_start = current_utime_all () in - let output = fn arg in - let utime_end = current_utime_all () in - (output, utime_end -. utime_start);; - -let cascaded_scheduler (problem : problem) = - let (some_initial_solution, list_scheduler_time) = - utime_all_fn (validated_scheduler list_scheduler) problem in - match some_initial_solution with - | None -> None - | Some initial_solution -> - let (solution, reoptimizing_time) = utime_all_fn (reoptimizing_scheduler (validated_scheduler (ilp_scheduler SATISFIABILITY)) initial_solution) problem in - begin - let latency2 = get_max_latency solution - and latency1 = get_max_latency initial_solution in - Printf.printf "postpass %s: %d, %d, %d, %g, %g\n" - (if latency2 < latency1 then "REOPTIMIZED" else "unchanged") - (get_nr_instructions problem) - latency1 latency2 - list_scheduler_time reoptimizing_time; - flush stdout - end; - Some solution;; - -let scheduler_by_name name = - match name with - | "ilp" -> validated_scheduler cascaded_scheduler - | "list" -> validated_scheduler list_scheduler - | "revlist" -> validated_scheduler reverse_list_scheduler - | "greedy" -> greedy_scheduler - | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/aarch64/InstructionScheduler.mli b/aarch64/InstructionScheduler.mli deleted file mode 100644 index 85e2a5c6..00000000 --- a/aarch64/InstructionScheduler.mli +++ /dev/null @@ -1,113 +0,0 @@ -(** Schedule instructions on a synchronized pipeline -by David Monniaux, CNRS, VERIMAG *) - -(** A latency constraint: instruction number [instr_to] should be scheduled at least [latency] clock ticks before [instr_from]. - -It is possible to specify [latency]=0, meaning that [instr_to] can be scheduled at the same clock tick as [instr_from], but not before. - -[instr_to] can be the special value equal to the number of instructions, meaning that it refers to the final output latency. *) -type latency_constraint = { - instr_from : int; - instr_to : int; - latency : int; - } - -(** A scheduling problem. - -In addition to the latency constraints, the resource constraints should be satisfied: at every clock tick, the sum of vectors of resources used by the instructions scheduled at that tick does not exceed the resource bounds. -*) -type problem = { - max_latency : int; - (** An optional maximal total latency of the problem, after which the problem is deemed not schedulable. -1 means there should be no maximum. *) - - resource_bounds : int array; - (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) - - instruction_usages: int array array; - (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) - - latency_constraints : latency_constraint list - (** The latency constraints that must be satisfied *) - };; - -(** Print problem for human readability. *) -val print_problem : out_channel -> problem -> unit;; - -(** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *) -type solution = int array - -(** A scheduling algorithm. -The return value [Some x] is a solution [x]. -[None] means that scheduling failed. *) -type scheduler = problem -> solution option;; - -(* DISABLED -(** Schedule the problem optimally by constraint solving using the Gecode solver. *) -external gecode_scheduler : problem -> solution option - = "caml_gecode_schedule_instr" - *) - -(** Get the number the last scheduling time used for an instruction in a solution. -@return The last clock tick used *) -val maximum_slot_used : solution -> int - -(** Validate that a solution is truly a solution of a scheduling problem. -@raise Failure if validation fails *) -val check_schedule : problem -> solution -> unit - -(** Schedule the problem using a greedy list scheduling algorithm, from the start. -The first (according to instruction ordering) instruction that is ready (according to the latency constraints) is scheduled at the current clock tick. -Once a clock tick is full go to the next. - -@return [Some solution] when a solution is found, [None] if not. *) -val list_scheduler : problem -> solution option - -(** Schedule the problem using the order of instructions without any reordering *) -val greedy_scheduler : problem -> solution option - -(** Schedule a problem using a scheduler applied in the opposite direction, e.g. for list scheduling from the end instead of the start. BUGGY *) -val schedule_reversed : scheduler -> problem -> int array option - -(** Schedule a problem from the end using a list scheduler. BUGGY *) -val reverse_list_scheduler : problem -> int array option - -(** Check that a problem is well-formed. -@raise Failure if validation fails *) -val check_problem : problem -> unit - -(** Apply a scheduler and validate the result against the input problem. -@return The solution found -@raise Failure if validation fails *) -val validated_scheduler : scheduler -> problem -> solution option;; - -(** Get max latency from solution -@return Max latency *) -val get_max_latency : solution -> int;; - -(** Get the length of a maximal critical path -@return Max length *) -val maximum_critical_path : problem -> int;; - -(** Apply line scheduler then advanced solver -@return A solution if found *) -val cascaded_scheduler : problem -> solution option;; - -val show_date_ranges : problem -> unit;; - -type pseudo_boolean_problem_type = - | SATISFIABILITY - | OPTIMIZATION;; - -type pseudo_boolean_mapper -val pseudo_boolean_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;; -val pseudo_boolean_read_solution : pseudo_boolean_mapper -> in_channel -> solution;; -val pseudo_boolean_scheduler : pseudo_boolean_problem_type -> problem -> solution option;; - -val smt_print_problem : out_channel -> problem -> unit;; - -val ilp_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;; - -val ilp_scheduler : pseudo_boolean_problem_type -> problem -> solution option;; - -(** Schedule a problem using a scheduler given by a string name *) -val scheduler_by_name : string -> problem -> int array option;; diff --git a/aarch64/Op.v b/aarch64/Op.v index f720e545..f2a8e6fb 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -386,8 +386,8 @@ Definition eval_operation | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3)) | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3)) - | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 - | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 + | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2)) + | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2)) | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a)) | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n)) @@ -408,7 +408,7 @@ Definition eval_operation | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2) | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2) | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2) - | Oshrximm n, v1::nil => Val.shrx v1 (Vint n) + | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n))) | Ozext s, v1 :: nil => Some (Val.zero_ext s v1) | Osext s, v1 :: nil => Some (Val.sign_ext s v1) | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a)) @@ -435,8 +435,8 @@ Definition eval_operation | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3)) | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2) | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2) - | Odivl, v1 :: v2 :: nil => Val.divls v1 v2 - | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2 + | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2)) + | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2)) | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2) | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a)) | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n)) @@ -457,7 +457,7 @@ Definition eval_operation | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2) | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2) | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2) - | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n) + | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n))) | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1) | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1) | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a)) @@ -481,22 +481,22 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1) | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1) - | Ointoffloat, v1::nil => Val.intoffloat v1 - | Ointuoffloat, v1::nil => Val.intuoffloat v1 - | Ofloatofint, v1::nil => Val.floatofint v1 - | Ofloatofintu, v1::nil => Val.floatofintu v1 - | Ointofsingle, v1::nil => Val.intofsingle v1 - | Ointuofsingle, v1::nil => Val.intuofsingle v1 - | Osingleofint, v1::nil => Val.singleofint v1 - | Osingleofintu, v1::nil => Val.singleofintu v1 - | Olongoffloat, v1::nil => Val.longoffloat v1 - | Olonguoffloat, v1::nil => Val.longuoffloat v1 - | Ofloatoflong, v1::nil => Val.floatoflong v1 - | Ofloatoflongu, v1::nil => Val.floatoflongu v1 - | Olongofsingle, v1::nil => Val.longofsingle v1 - | Olonguofsingle, v1::nil => Val.longuofsingle v1 - | Osingleoflong, v1::nil => Val.singleoflong v1 - | Osingleoflongu, v1::nil => Val.singleoflongu v1 + | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1)) + | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1)) + | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1)) + | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1)) + | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1)) + | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1)) + | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1)) + | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1)) + | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1)) + | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1)) + | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1)) + | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1)) + | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1)) + | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1)) + | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1)) + | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1)) | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m)) | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty) @@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... destruct v1... - apply type_add. - apply type_sub. - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero); inv H2... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (_ || _); trivial... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (Int.eq i0 Int.zero); constructor. - destruct v0... destruct v1... - destruct v0... destruct (eval_shift s v1 a)... - destruct v0... @@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... @@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - apply type_subl. - destruct v0... destruct v1... - destruct v0... destruct v1... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero); inv H2... + - destruct v0; destruct v1; cbn; trivial. + destruct (_ || _); cbn; trivial. + - destruct v0; destruct v1; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. - destruct v0... destruct v1... - destruct v0... destruct (eval_shiftl s v1 a)... - destruct v0... @@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... @@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... - destruct v0... (* intoffloat, intuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial. (* floatofint, floatofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* intofsingle, intuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial. (* singleofint, singleofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longoffloat, longuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial. (* floatoflong, floatoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longofsingle, longuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial. (* singleoflong, singleoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* cmp *) - destruct (eval_condition cond vl m) as [[]|]... - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I. @@ -924,16 +926,7 @@ Qed. Definition is_trapping_op (op : operation) := match op with - | Odiv | Odivu | Odivl | Odivlu - | Oshrximm _ | Oshrlximm _ - | Ointoffloat | Ointuoffloat - | Ointofsingle | Ointuofsingle - | Ofloatofint | Ofloatofintu - | Osingleofint | Osingleofintu - | Olongoffloat | Olonguoffloat - | Olongofsingle | Olonguofsingle - | Ofloatoflong | Ofloatoflongu - | Osingleoflong | Osingleoflongu => true + | Omove => false | _ => false end. @@ -1423,12 +1416,12 @@ Proof. - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto. - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto. (* div, divu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero - || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero); inv H2. TrivialExists. + - inv H4; inv H2; trivial. cbn. + destruct (_ || _); cbn; + constructor. + - inv H4; inv H2; trivial. cbn. + destruct (Int.eq i0 Int.zero); cbn; + constructor. (* and*) - inv H4; inv H2; simpl; auto. - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1460,8 +1453,8 @@ Proof. (* shru *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto. (* shrx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1496,12 +1489,10 @@ Proof. - inv H4; inv H2; simpl; auto. - inv H4; inv H2; simpl; auto. (* divl, divlu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero - || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists. + - inv H4; inv H2; cbn; trivial. + destruct (_ || _); cbn; trivial. + - inv H4; inv H2; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. (* andl *) - inv H4; inv H2; simpl; auto. - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1533,8 +1524,8 @@ Proof. (* shrlu *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. (* shrlx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1565,37 +1556,29 @@ Proof. - inv H4; simpl; auto. - inv H4; simpl; auto. (* intoffloat, intuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial. (* floatofint, floatofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* intofsingle, intuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial. (* singleofint, singleofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longoffloat, longuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial. (* floatoflong, floatoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longofsingle, longuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial. (* singleoflong, singleoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* cmp, sel *) - subst v1. destruct (eval_condition cond vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml new file mode 100644 index 00000000..5cdd002c --- /dev/null +++ b/aarch64/OpWeights.ml @@ -0,0 +1,353 @@ +open Op;; +open PrepassSchedulingOracleDeps;; + +module Cortex_A53= + struct + let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1;; + + let latency_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ -> 1 + | Oshift _ -> 2 + | Oadd -> 1 + | Oaddshift _ -> 2 + | Oaddimm _ + | Oneg -> 1 + | Onegshift _ -> 2 + | Osub -> 1 + | Osubshift _ -> 2 + | Omul + | Omuladd + | Omulsub -> 4 + | Odiv + | Odivu -> 29 + | Oand -> 1 + | Oandshift _ -> 2 + | Oandimm _ -> 1 + | Oor -> 1 + | Oorshift _ -> 2 + | Oorimm _ -> 1 + | Oxor -> 1 + | Oxorshift _ -> 2 + | Oxorimm _ -> 1 + | Onot -> 1 + | Onotshift _ -> 2 + | Obic -> 1 + | Obicshift _ -> 2 + | Oorn -> 1 + | Oornshift _ -> 2 + | Oeqv -> 1 + | Oeqvshift _ -> 2 + | Oshl + | Oshr + | Oshru -> 2 + | Oshrximm _ -> 6 + | Ozext _ + | Osext _ -> 1 + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ -> 2 + + (* 64-bit integer arithmetic *) + | Oshiftl _ -> 2 + | Oextend _ -> 1 + | Omakelong + | Olowlong + | Ohighlong + | Oaddl -> 1 + | Oaddlshift _ + | Oaddlext _ -> 2 + | Oaddlimm _ + | Onegl -> 1 + | Oneglshift _ -> 2 + | Osubl -> 1 + | Osublshift _ + | Osublext _ -> 2 + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> 4 + | Odivl -> 50 + | Odivlu -> 50 + | Oandl -> 1 + | Oandlshift _ -> 2 + | Oandlimm _ + | Oorl -> 1 + | Oorlshift _ -> 2 + | Oorlimm _ + | Oxorl -> 1 + | Oxorlshift _ -> 2 + | Oxorlimm _ + | Onotl -> 1 + | Onotlshift _ -> 2 + | Obicl -> 1 + | Obiclshift _ -> 2 + | Oornl -> 1 + | Oornlshift _ -> 2 + | Oeqvl -> 1 + | Oeqvlshift _ -> 2 + | Oshll + | Oshrl + | Oshrlu -> 2 + | Oshrlximm _ -> 6 + | Ozextl _ + | Osextl _ -> 1 + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> 2 + + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) +(* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> 6 + | Odivf -> 50 (* r [rd = r1 / r2] *) + | Odivfs -> 20 + (* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 + | _ -> 1);; + + let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ + (* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ + +(* 64-bit integer arithmetic *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf + (* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + +(* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; + let latency_of_call _ _ = 6;; + + let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_call _ _ = resource_bounds;; + let resources_of_builtin _ = resource_bounds;; + end;; + +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "cortex-a53" | "cortex-a35" | "" -> + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = Cortex_A53.resources_of_op; + non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op; + latency_of_load = Cortex_A53.latency_of_load; + resources_of_load = Cortex_A53.resources_of_load; + resources_of_store = Cortex_A53.resources_of_store; + resources_of_cond = Cortex_A53.resources_of_cond; + latency_of_call = Cortex_A53.latency_of_call; + resources_of_call = Cortex_A53.resources_of_call; + resources_of_builtin = Cortex_A53.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..2c3eb14f --- /dev/null +++ b/aarch64/PrepassSchedulingOracle.ml @@ -0,0 +1,477 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/aarch64/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v index 60dc1a12..513ee9bd 100644 --- a/aarch64/SelectLongproof.v +++ b/aarch64/SelectLongproof.v @@ -559,25 +559,29 @@ Qed. Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls. Proof. red; intros; unfold divls_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls. Proof. red; intros; unfold modls_base, modl_aux. exploit Val.modls_divls; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu. Proof. red; intros; unfold divlu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu. Proof. red; intros; unfold modlu_base, modl_aux. exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrxlimm: @@ -592,7 +596,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0. rewrite Int64.shrx'_zero. auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) @@ -726,42 +730,42 @@ Qed. Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. End CMCONSTR. diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index 3379cbd8..9ce7a8bf 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -666,7 +666,8 @@ Theorem eval_divs_base: Val.divs x y = Some z -> exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v. Proof. - intros; unfold divs_base; TrivialExists. + intros; unfold divs_base; TrivialExists; cbn. + rewrite H1. reflexivity. Qed. Theorem eval_mods_base: @@ -678,7 +679,8 @@ Theorem eval_mods_base: Proof. intros; unfold mods_base, mod_aux. exploit Val.mods_divs; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + cbn. rewrite A. reflexivity. Qed. Theorem eval_divu_base: @@ -689,6 +691,7 @@ Theorem eval_divu_base: exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. Proof. intros; unfold divu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modu_base: @@ -700,7 +703,8 @@ Theorem eval_modu_base: Proof. intros; unfold modu_base, mod_aux. exploit Val.modu_divu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrximm: @@ -715,7 +719,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0. rewrite Int.shrx_zero by (compute; auto). auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) @@ -928,7 +932,7 @@ Theorem eval_intoffloat: Val.intoffloat x = Some y -> exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofint: @@ -939,7 +943,7 @@ Theorem eval_floatofint: Proof. intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuoffloat: @@ -948,7 +952,7 @@ Theorem eval_intuoffloat: Val.intuoffloat x = Some y -> exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofintu: @@ -959,7 +963,7 @@ Theorem eval_floatofintu: Proof. intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intofsingle: @@ -968,7 +972,7 @@ Theorem eval_intofsingle: Val.intofsingle x = Some y -> exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofint: @@ -979,7 +983,7 @@ Theorem eval_singleofint: Proof. intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuofsingle: @@ -988,7 +992,7 @@ Theorem eval_intuofsingle: Val.intuofsingle x = Some y -> exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofintu: @@ -999,7 +1003,7 @@ Theorem eval_singleofintu: Proof. intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** Selection *) diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v index e0d98c85..e6a60d4e 100644 --- a/aarch64/ValueAOp.v +++ b/aarch64/ValueAOp.v @@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omul, v1::v2::nil => mul v1 v2 | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3) | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3) - | Odiv, v1::v2::nil => divs v1 v2 - | Odivu, v1::v2::nil => divu v1 v2 + | Odiv, v1::v2::nil => divs_total v1 v2 + | Odivu, v1::v2::nil => divu_total v1 v2 | Oand, v1::v2::nil => and v1 v2 | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a) | Oandimm n, v1::nil => and v1 (I n) @@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3) | Omullhs, v1::v2::nil => mullhs v1 v2 | Omullhu, v1::v2::nil => mullhu v1 v2 - | Odivl, v1::v2::nil => divls v1 v2 - | Odivlu, v1::v2::nil => divlu v1 v2 + | Odivl, v1::v2::nil => divls_total v1 v2 + | Odivlu, v1::v2::nil => divlu_total v1 v2 | Oandl, v1::v2::nil => andl v1 v2 | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a) | Oandlimm n, v1::nil => andl v1 (L n) @@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Osingleoffloat, v1::nil => singleoffloat v1 | Ofloatofsingle, v1::nil => floatofsingle v1 - | Ointoffloat, v1::nil => intoffloat v1 - | Ointuoffloat, v1::nil => intuoffloat v1 + | Ointoffloat, v1::nil => intoffloat_total v1 + | Ointuoffloat, v1::nil => intuoffloat_total v1 | Ofloatofint, v1::nil => floatofint v1 | Ofloatofintu, v1::nil => floatofintu v1 - | Ointofsingle, v1::nil => intofsingle v1 - | Ointuofsingle, v1::nil => intuofsingle v1 + | Ointofsingle, v1::nil => intofsingle_total v1 + | Ointuofsingle, v1::nil => intuofsingle_total v1 | Osingleofint, v1::nil => singleofint v1 | Osingleofintu, v1::nil => singleofintu v1 - | Olongoffloat, v1::nil => longoffloat v1 - | Olonguoffloat, v1::nil => longuoffloat v1 + | Olongoffloat, v1::nil => longoffloat_total v1 + | Olonguoffloat, v1::nil => longuoffloat_total v1 | Ofloatoflong, v1::nil => floatoflong v1 | Ofloatoflongu, v1::nil => floatoflongu v1 - | Olongofsingle, v1::nil => longofsingle v1 - | Olonguofsingle, v1::nil => longuofsingle v1 + | Olongofsingle, v1::nil => longofsingle_total v1 + | Olonguofsingle, v1::nil => longuofsingle_total v1 | Osingleoflong, v1::nil => singleoflong v1 | Osingleoflongu, v1::nil => singleoflongu v1 |