aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-27 17:56:53 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-27 18:04:35 +0100
commited78594947276264beea0b608c2a101d9f31b18f (patch)
tree2b4c04d9a6fc5e531f38e6b7f4810241cfe49896 /aarch64
parent63942e04b0fcb84d54f066122c31ca4c3aa99ad4 (diff)
parent43a7cc2a7305395b20d92b240362ddfdb43963ff (diff)
downloadcompcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.tar.gz
compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.zip
Merge branch 'kvx-test-prepass' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into aarch64-prepass+postpass
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/ConstpropOpproof.v121
-rw-r--r--aarch64/InstructionScheduler.ml1263
-rw-r--r--aarch64/InstructionScheduler.mli113
-rw-r--r--aarch64/Op.v179
-rw-r--r--aarch64/OpWeights.ml353
-rw-r--r--aarch64/PrepassSchedulingOracle.ml477
-rw-r--r--aarch64/PrepassSchedulingOracleDeps.ml17
-rw-r--r--aarch64/SelectLongproof.v26
-rw-r--r--aarch64/SelectOpproof.v28
-rw-r--r--aarch64/ValueAOp.v24
10 files changed, 1056 insertions, 1545 deletions
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
Lemma make_divimm_correct:
forall n r1 r2 v,
- Val.divs e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divs e#r1 e#r2) = v ->
e#r2 = Vint n ->
let (op, args) := make_divimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divimm.
- predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
- destruct (e#r1) eqn:?;
- try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
- inv H; auto.
- destruct (Int.is_power2 n) eqn:?.
- destruct (Int.ltu i (Int.repr 31)) eqn:?.
- exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
- exists v; auto.
- exists v; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+ { destruct (e # r1) eqn:Er1.
+ all: try (cbn; exists (e # r1); split; auto; fail).
+ rewrite Val.divs_one.
+ cbn.
+ rewrite Er1.
+ exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+ destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+ {
+ cbn.
+ exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+ destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+ rewrite Val.divs_pow2 with (y:=v) (n:=n).
+ cbn.
+ all: auto.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
Qed.
+
Lemma make_divuimm_correct:
forall n r1 r2 v,
- Val.divu e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divu e#r1 e#r2) = v ->
e#r2 = Vint n ->
let (op, args) := make_divuimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divuimm.
- predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
- destruct (e#r1) eqn:?;
- try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
- inv H; auto.
- destruct (Int.is_power2 n) eqn:?.
- econstructor; split. simpl; eauto.
- rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
- rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
- exists v; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+ { destruct (e # r1) eqn:Er1.
+ all: try (cbn; exists (e # r1); split; auto; fail).
+ rewrite Val.divu_one.
+ cbn.
+ rewrite Er1.
+ exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+ cbn.
+ rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+ exists (Val.shru e # r1 (Vint i)); split; auto.
+ destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+ rewrite Val.divu_pow2 with (y:=v) (n:=n).
+ all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
Qed.
Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
Lemma make_divlimm_correct:
forall n r1 r2 v,
- Val.divls e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divls e#r1 e#r2) = v ->
e#r2 = Vlong n ->
let (op, args) := make_divlimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divlimm.
- destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
- rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
- exists v; auto.
- exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+ destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+ {
+ cbn.
+ exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+ rewrite H0 in H.
+ destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+ {
+ subst v0.
+ rewrite Val.divls_pow2 with (y:=v) (n:=n).
+ cbn.
+ all: auto.
+ }
+ subst. auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
Qed.
+
Lemma make_divluimm_correct:
forall n r1 r2 v,
- Val.divlu e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divlu e#r1 e#r2) = v ->
e#r2 = Vlong n ->
let (op, args) := make_divluimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divluimm.
destruct (Int64.is_power2' n) eqn:?.
+ {
econstructor; split. simpl; eauto.
- rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
- rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
- simpl.
- erewrite Int64.is_power2'_range by eauto.
- erewrite Int64.divu_pow2' by eauto. auto.
- exists v; auto.
+ rewrite H0 in H. destruct (e#r1); inv H.
+ all: cbn; auto.
+ {
+ rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+ destruct (Int64.eq n Int64.zero); cbn; auto.
+ erewrite Int64.is_power2'_range by eauto.
+ erewrite Int64.divu_pow2' by eauto. auto.
+ }
+ }
+ exists v; split; auto.
+ cbn.
+ rewrite H.
+ reflexivity.
Qed.
Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
- (* divs *)
assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divimm_correct; auto.
+ apply make_divimm_correct; auto. congruence.
- (* divu *)
assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divuimm_correct; auto.
+ apply make_divuimm_correct; auto. congruence.
- (* and 1 *)
rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
- (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
- (* divl *)
assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divlimm_correct; auto.
+ apply make_divlimm_correct; auto. congruence.
- (* divlu *)
assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divluimm_correct; auto.
+ apply make_divluimm_correct; auto. congruence.
- (* andl 1 *)
rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
- (* andl 2 *)
diff --git a/aarch64/InstructionScheduler.ml b/aarch64/InstructionScheduler.ml
deleted file mode 100644
index eab0b21a..00000000
--- a/aarch64/InstructionScheduler.ml
+++ /dev/null
@@ -1,1263 +0,0 @@
-(* *************************************************************)
-(* *)
-(* The Compcert verified compiler *)
-(* *)
-(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
-(* David Monniaux CNRS, VERIMAG *)
-(* Cyril Six Kalray *)
-(* *)
-(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
-(* This file is distributed under the terms of the INRIA *)
-(* Non-Commercial License Agreement. *)
-(* *)
-(* *************************************************************)
-
-let with_destructor dtor stuff f =
- try let ret = f stuff in
- dtor stuff;
- ret
- with exn -> dtor stuff;
- raise exn;;
-
-let with_out_channel chan f = with_destructor close_out chan f;;
-let with_in_channel chan f = with_destructor close_in chan f;;
-
-(** Schedule instructions on a synchronized pipeline
-@author David Monniaux, CNRS, VERIMAG *)
-
-type latency_constraint = {
- instr_from : int;
- instr_to : int;
- latency : int };;
-
-type problem = {
- max_latency : int;
- resource_bounds : int array;
- instruction_usages : int array array;
- latency_constraints : latency_constraint list;
- };;
-
-let print_problem channel problem =
- (if problem.max_latency >= 0
- then Printf.fprintf channel "max makespan: %d\n" problem.max_latency);
- output_string channel "resource bounds:";
- (Array.iter (fun b -> Printf.fprintf channel " %d" b) problem.resource_bounds);
- output_string channel ";\n";
- (Array.iteri (fun i v ->
- Printf.fprintf channel "instr%d:" i;
- (Array.iter (fun b -> Printf.fprintf channel " %d" b) v);
- output_string channel ";\n") problem.instruction_usages);
- List.iter (fun instr ->
- Printf.printf "t%d - t%d >= %d;\n"
- instr.instr_to instr.instr_from instr.latency)
- problem.latency_constraints;;
-
-let get_nr_instructions problem = Array.length problem.instruction_usages;;
-let get_nr_resources problem = Array.length problem.resource_bounds;;
-
-type solution = int array
-type scheduler = problem -> solution option
-
-(* DISABLED
-(** Schedule the problem optimally by constraint solving using the Gecode solver. *)
-external gecode_scheduler : problem -> solution option =
- "caml_gecode_schedule_instr";;
- *)
-
-let maximum_slot_used times =
- let maxi = ref (-1) in
- for i=0 to (Array.length times)-2
- do
- maxi := max !maxi times.(i)
- done;
- !maxi;;
-
-let check_schedule (problem : problem) (times : solution) =
- let nr_instructions = get_nr_instructions problem in
- (if Array.length times <> nr_instructions+1
- then failwith
- (Printf.sprintf "check_schedule: %d times expected, got %d"
- (nr_instructions+1) (Array.length times)));
- (if problem.max_latency >= 0 && times.(nr_instructions)> problem.max_latency
- then failwith "check_schedule: max_latency exceeded");
- (Array.iteri (fun i time ->
- (if time < 0
- then failwith (Printf.sprintf "time[%d] < 0" i))) times);
- let slot_resources = Array.init ((maximum_slot_used times)+1)
- (fun _ -> Array.copy problem.resource_bounds) in
- for i=0 to nr_instructions -1
- do
- let remaining_resources = slot_resources.(times.(i))
- and used_resources = problem.instruction_usages.(i) in
- for resource=0 to (Array.length used_resources)-1
- do
- let after = remaining_resources.(resource) - used_resources.(resource) in
- (if after < 0
- then failwith (Printf.sprintf "check_schedule: instruction %d exceeds resource %d at slot %d" i resource times.(i)));
- remaining_resources.(resource) <- after
- done
- done;
- List.iter (fun ctr ->
- if times.(ctr.instr_to) - times.(ctr.instr_from) < ctr.latency
- then failwith (Printf.sprintf "check_schedule: time[%d]=%d - time[%d]=%d < %d"
- ctr.instr_to times.(ctr.instr_to)
- ctr.instr_from times.(ctr.instr_from)
- ctr.latency)
- ) problem.latency_constraints;;
-
-let bound_max_time problem =
- let total = ref(Array.length problem.instruction_usages) in
- List.iter (fun ctr -> total := !total + ctr.latency) problem.latency_constraints;
- !total;;
-
-let vector_less_equal a b =
- try
- Array.iter2 (fun x y ->
- if x>y
- then raise Exit) a b;
- true
- with Exit -> false;;
-
-let vector_subtract a b =
- assert ((Array.length a) = (Array.length b));
- for i=0 to (Array.length a)-1
- do
- b.(i) <- b.(i) - a.(i)
- done;;
-
-(* The version with critical path ordering is much better! *)
-type list_scheduler_order =
- | INSTRUCTION_ORDER
- | CRITICAL_PATH_ORDER;;
-
-let int_max (x : int) (y : int) =
- if x > y then x else y;;
-
-let int_min (x : int) (y : int) =
- if x < y then x else y;;
-
-let get_predecessors problem =
- let nr_instructions = get_nr_instructions problem in
- let predecessors = Array.make (nr_instructions+1) [] in
- List.iter (fun ctr ->
- predecessors.(ctr.instr_to) <-
- (ctr.instr_from, ctr.latency)::predecessors.(ctr.instr_to))
- problem.latency_constraints;
- predecessors;;
-
-let get_successors problem =
- let nr_instructions = get_nr_instructions problem in
- let successors = Array.make nr_instructions [] in
- List.iter (fun ctr ->
- successors.(ctr.instr_from) <-
- (ctr.instr_to, ctr.latency)::successors.(ctr.instr_from))
- problem.latency_constraints;
- successors;;
-
-let critical_paths successors =
- let nr_instructions = Array.length successors in
- let path_lengths = Array.make nr_instructions (-1) in
- let rec compute i =
- if i=nr_instructions then 0 else
- match path_lengths.(i) with
- | -2 -> failwith "InstructionScheduler: the dependency graph has cycles"
- | -1 -> path_lengths.(i) <- -2;
- let x = List.fold_left
- (fun cur (j, latency)-> int_max cur (latency+(compute j)))
- 1 successors.(i)
- in path_lengths.(i) <- x; x
- | x -> x
- in for i = nr_instructions-1 downto 0
- do
- ignore (compute i)
- done;
- path_lengths;;
-
-let maximum_critical_path problem =
- let paths = critical_paths (get_successors problem) in
- Array.fold_left int_max 0 paths;;
-
-let get_earliest_dates predecessors =
- let nr_instructions = (Array.length predecessors)-1 in
- let path_lengths = Array.make (nr_instructions+1) (-1) in
- let rec compute i =
- match path_lengths.(i) with
- | -2 -> failwith "InstructionScheduler: the dependency graph has cycles"
- | -1 -> path_lengths.(i) <- -2;
- let x = List.fold_left
- (fun cur (j, latency)-> int_max cur (latency+(compute j)))
- 0 predecessors.(i)
- in path_lengths.(i) <- x; x
- | x -> x
- in for i = 0 to nr_instructions
- do
- ignore (compute i)
- done;
- for i = 0 to nr_instructions - 1
- do
- path_lengths.(nr_instructions) <- int_max
- path_lengths.(nr_instructions) (1 + path_lengths.(i))
- done;
- path_lengths;;
-
-exception Unschedulable
-
-let get_latest_dates deadline successors =
- let nr_instructions = Array.length successors
- and path_lengths = critical_paths successors in
- Array.init (nr_instructions + 1)
- (fun i ->
- if i < nr_instructions then
- let path_length = path_lengths.(i) in
- assert (path_length >= 1);
- (if path_length > deadline
- then raise Unschedulable);
- deadline - path_length
- else deadline);;
-
-let priority_list_scheduler (order : list_scheduler_order)
- (problem : problem) :
- solution option =
- let nr_instructions = get_nr_instructions problem in
- let successors = get_successors problem
- and predecessors = get_predecessors problem
- and times = Array.make (nr_instructions+1) (-1) in
-
- let priorities = match order with
- | INSTRUCTION_ORDER -> None
- | CRITICAL_PATH_ORDER -> Some (critical_paths successors) in
-
- let module InstrSet =
- Set.Make (struct type t=int
- let compare = match priorities with
- | None -> (fun x y -> x - y)
- | Some p -> (fun x y ->
- (match p.(y)-p.(x) with
- | 0 -> x - y
- | z -> z))
- end) in
-
- let max_time = bound_max_time problem in
- let ready = Array.make max_time InstrSet.empty in
- Array.iteri (fun i preds ->
- if i<nr_instructions && preds=[]
- then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
-
- let current_time = ref 0
- and current_resources = Array.copy problem.resource_bounds
- and earliest_time i =
- try
- let time = ref (-1) in
- List.iter (fun (j, latency) ->
- if times.(j) < 0
- then raise Exit
- else let t = times.(j) + latency in
- if t > !time
- then time := t) predecessors.(i);
- assert(!time >= 0);
- !time
- with Exit -> -1
-
- in
- let advance_time() =
- begin
- (if !current_time < max_time-1
- then
- begin
- Array.blit problem.resource_bounds 0 current_resources 0
- (Array.length current_resources);
- ready.(!current_time + 1) <-
- InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
- ready.(!current_time) <- InstrSet.empty;
- end);
- incr current_time
- end in
-
- let attempt_scheduling ready usages =
- let result = ref (-1) in
- try
- InstrSet.iter (fun i ->
- (* Printf.printf "trying scheduling %d\n" i;
- pr int_vector usages.(i);
- print _vector current_resources; *)
- if vector_less_equal usages.(i) current_resources
- then
- begin
- vector_subtract usages.(i) current_resources;
- result := i;
- raise Exit
- end) ready;
- -1
- with Exit -> !result in
-
- while !current_time < max_time
- do
- if (InstrSet.is_empty ready.(!current_time))
- then advance_time()
- else
- match attempt_scheduling ready.(!current_time)
- problem.instruction_usages with
- | -1 -> advance_time()
- | i ->
- begin
- assert(times.(i) < 0);
- times.(i) <- !current_time;
- ready.(!current_time) <- InstrSet.remove i (ready.(!current_time));
- List.iter (fun (instr_to, latency) ->
- if instr_to < nr_instructions then
- match earliest_time instr_to with
- | -1 -> ()
- | to_time ->
- ready.(to_time) <- InstrSet.add instr_to ready.(to_time))
- successors.(i);
- successors.(i) <- []
- end
- done;
- try
- let final_time = ref (-1) in
- for i=0 to nr_instructions-1
- do
- (if times.(i) < 0 then raise Exit);
- (if !final_time < times.(i)+1 then final_time := times.(i)+1)
- done;
- List.iter (fun (i, latency) ->
- let target_time = latency + times.(i) in
- if target_time > !final_time
- then final_time := target_time
- ) predecessors.(nr_instructions);
- times.(nr_instructions) <- !final_time;
- Some times
- with Exit -> None;;
-
-let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
-
-(* dummy code for placating ocaml's warnings *)
-let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
-
-type bundle = int list;;
-
-let rec extract_deps_to index = function
- | [] -> []
- | dep :: deps -> let extracts = extract_deps_to index deps in
- if (dep.instr_to == index) then
- dep :: extracts
- else
- extracts
-
-exception InvalidBundle;;
-
-let dependency_check problem bundle index =
- let index_deps = extract_deps_to index problem.latency_constraints in
- List.iter (fun i ->
- List.iter (fun dep ->
- if (dep.instr_from == i) then raise InvalidBundle
- ) index_deps
- ) bundle;;
-
-let rec make_bundle problem resources bundle index =
- let resources_copy = Array.copy resources in
- let nr_instructions = get_nr_instructions problem in
- if (index >= nr_instructions) then (bundle, index+1) else
- let inst_usage = problem.instruction_usages.(index) in
- try match vector_less_equal inst_usage resources with
- | false -> raise InvalidBundle
- | true -> (
- dependency_check problem bundle index;
- vector_subtract problem.instruction_usages.(index) resources_copy;
- make_bundle problem resources_copy (index::bundle) (index+1)
- )
- with InvalidBundle -> (bundle, index);;
-
-let rec make_bundles problem index : bundle list =
- if index >= get_nr_instructions problem then
- []
- else
- let (bundle, new_index) = make_bundle problem problem.resource_bounds [] index in
- bundle :: (make_bundles problem new_index);;
-
-let bundles_to_schedule problem bundles : solution =
- let nr_instructions = get_nr_instructions problem in
- let schedule = Array.make (nr_instructions+1) (nr_instructions+4) in
- let time = ref 0 in
- List.iter (fun bundle ->
- begin
- List.iter (fun i ->
- schedule.(i) <- !time
- ) bundle;
- time := !time + 1
- end
- ) bundles; schedule;;
-
-let greedy_scheduler (problem : problem) : solution option =
- let bundles = make_bundles problem 0 in
- Some (bundles_to_schedule problem bundles);;
-
-(* alternate implementation
-let swap_array_elements a i j =
- let x = a.(i) in
- a.(i) <- a.(j);
- a.(j) <- x;;
-
-let array_reverse_slice a first last =
- let i = ref first and j = ref last in
- while i < j
- do
- swap_array_elements a !i !j;
- incr i;
- decr j
- done;;
-
-let array_reverse a =
- let a' = Array.copy a in
- array_reverse_slice a' 0 ((Array.length a)-1);
- a';;
- *)
-
-(* unneeded
-let array_reverse a =
- let n=Array.length a in
- Array.init n (fun i -> a.(n-1-i));;
- *)
-
-let reverse_constraint nr_instructions ctr =
- { instr_to = nr_instructions -ctr.instr_from;
- instr_from = nr_instructions - ctr.instr_to;
- latency = ctr.latency };;
-
-(* unneeded
-let rec list_map_filter f = function
- | [] -> []
- | h::t ->
- (match f h with
- | None -> list_map_filter f t
- | Some x -> x :: (list_map_filter f t));;
- *)
-
-let reverse_problem problem =
- let nr_instructions = get_nr_instructions problem in
- {
- max_latency = problem.max_latency;
- resource_bounds = problem.resource_bounds;
- instruction_usages = Array.init (nr_instructions + 1)
- (fun i ->
- if i=0
- then Array.map (fun _ -> 0) problem.resource_bounds else problem.instruction_usages.(nr_instructions - i));
- latency_constraints = List.map (reverse_constraint nr_instructions)
- problem.latency_constraints
- };;
-
-let max_scheduled_time solution =
- let time = ref (-1) in
- for i = 0 to ((Array.length solution) - 2)
- do
- time := max !time solution.(i)
- done;
- !time;;
-
-(*
-let recompute_makespan problem solution =
- let n = (Array.length solution) - 1 and ms = ref 0 in
- List.iter (fun cstr ->
- if cstr.instr_to = n
- then ms := max !ms (solution.(cstr.instr_from) + cstr.latency)
- ) problem.latency_constraints;
- !ms;;
- *)
-
-let schedule_reversed (scheduler : problem -> solution option)
- (problem : problem) =
- match scheduler (reverse_problem problem) with
- | None -> None
- | Some solution ->
- let nr_instructions = get_nr_instructions problem in
- let makespan = max_scheduled_time solution in
- let ret = Array.init (nr_instructions + 1)
- (fun i -> makespan-solution.(nr_instructions-i)) in
- ret.(nr_instructions) <- max ((max_scheduled_time ret) + 1)
- (ret.(nr_instructions));
- Some ret;;
-
-(** Schedule the problem using a greedy list scheduling algorithm, from the end. *)
-let reverse_list_scheduler = schedule_reversed list_scheduler;;
-
-let check_problem problem =
- (if (Array.length problem.instruction_usages) < 1
- then failwith "length(problem.instruction_usages) < 1");;
-
-let validated_scheduler (scheduler : problem -> solution option)
- (problem : problem) =
- check_problem problem;
- match scheduler problem with
- | None -> None
- | (Some solution) as ret -> check_schedule problem solution; ret;;
-
-let get_max_latency solution =
- solution.((Array.length solution)-1);;
-
-let show_date_ranges problem =
- let deadline = problem.max_latency in
- assert(deadline >= 0);
- let successors = get_successors problem
- and predecessors = get_predecessors problem in
- let earliest_dates : int array = get_earliest_dates predecessors
- and latest_dates : int array = get_latest_dates deadline successors in
- assert ((Array.length earliest_dates) =
- (Array.length latest_dates));
- Array.iteri (fun i early ->
- let late = latest_dates.(i) in
- Printf.printf "t[%d] in %d..%d\n" i early late)
- earliest_dates;;
-
-type pseudo_boolean_problem_type =
- | SATISFIABILITY
- | OPTIMIZATION;;
-
-type pseudo_boolean_mapper = {
- mapper_pb_type : pseudo_boolean_problem_type;
- mapper_nr_instructions : int;
- mapper_nr_pb_variables : int;
- mapper_earliest_dates : int array;
- mapper_latest_dates : int array;
- mapper_var_offsets : int array;
- mapper_final_predecessors : (int * int) list
-};;
-
-(* Latency constraints are:
- presence of instr-to at each t <= sum of presences of instr-from at compatible times
-
- if reverse_encoding
- presence of instr-from at each t <= sum of presences of instr-to at compatible times *)
-
-(* Experiments show reverse_encoding=true multiplies time by 2 in sat4j
- without making hard instances easier *)
-let direct_encoding = false
-and reverse_encoding = false
-and delta_encoding = true
-
-let pseudo_boolean_print_problem channel problem pb_type =
- let deadline = problem.max_latency in
- assert (deadline > 0);
- let nr_instructions = get_nr_instructions problem
- and nr_resources = get_nr_resources problem
- and successors = get_successors problem
- and predecessors = get_predecessors problem in
- let earliest_dates = get_earliest_dates predecessors
- and latest_dates = get_latest_dates deadline successors in
- let var_offsets = Array.make
- (match pb_type with
- | OPTIMIZATION -> nr_instructions+1
- | SATISFIABILITY -> nr_instructions) 0 in
- let nr_pb_variables =
- (let nr = ref 0 in
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- var_offsets.(i) <- !nr;
- nr := !nr + latest_dates.(i) - earliest_dates.(i) + 1
- done;
- !nr)
- and nr_pb_constraints =
- (match pb_type with
- | OPTIMIZATION -> nr_instructions+1
- | SATISFIABILITY -> nr_instructions) +
-
- (let count = ref 0 in
- for t=0 to deadline-1
- do
- for j=0 to nr_resources-1
- do
- try
- for i=0 to nr_instructions-1
- do
- let usage = problem.instruction_usages.(i).(j) in
- if t >= earliest_dates.(i) && t <= latest_dates.(i)
- && usage > 0 then raise Exit
- done
- with Exit -> incr count
- done
- done;
- !count) +
-
- (let count=ref 0 in
- List.iter
- (fun ctr ->
- if ctr.instr_to < nr_instructions
- then count := !count + 1 + latest_dates.(ctr.instr_to)
- - earliest_dates.(ctr.instr_to)
- + (if reverse_encoding
- then 1 + latest_dates.(ctr.instr_from)
- - earliest_dates.(ctr.instr_from)
- else 0)
- )
- problem.latency_constraints;
- !count) +
-
- (match pb_type with
- | OPTIMIZATION -> (1 + deadline - earliest_dates.(nr_instructions)) * nr_instructions
- | SATISFIABILITY -> 0)
- and measured_nr_constraints = ref 0 in
-
- let pb_var i t =
- assert(t >= earliest_dates.(i));
- assert(t <= latest_dates.(i));
- let v = 1+var_offsets.(i)+t-earliest_dates.(i) in
- assert(v <= nr_pb_variables);
- Printf.sprintf "x%d" v in
-
- let end_constraint () =
- begin
- output_string channel ";\n";
- incr measured_nr_constraints
- end in
-
- let gen_latency_constraint i_to i_from latency t_to =
- Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n"
- i_to i_from latency i_to t_to;
- for t_from=earliest_dates.(i_from) to
- int_min latest_dates.(i_from) (t_to - latency)
- do
- Printf.fprintf channel "+1 %s " (pb_var i_from t_from)
- done;
- Printf.fprintf channel "-1 %s " (pb_var i_to t_to);
- Printf.fprintf channel ">= 0";
- end_constraint()
-
- and gen_dual_latency_constraint i_to i_from latency t_from =
- Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n"
- i_to i_from latency i_to t_from;
- for t_to=int_max earliest_dates.(i_to) (t_from + latency)
- to latest_dates.(i_to)
- do
- Printf.fprintf channel "+1 %s " (pb_var i_to t_to)
- done;
- Printf.fprintf channel "-1 %s " (pb_var i_from t_from);
- Printf.fprintf channel ">= 0";
- end_constraint()
- in
-
- Printf.fprintf channel "* #variable= %d #constraint= %d\n" nr_pb_variables nr_pb_constraints;
- Printf.fprintf channel "* nr_instructions=%d deadline=%d\n" nr_instructions deadline;
- begin
- match pb_type with
- | SATISFIABILITY -> ()
- | OPTIMIZATION ->
- output_string channel "min:";
- for t=earliest_dates.(nr_instructions) to deadline
- do
- Printf.fprintf channel " %+d %s" t (pb_var nr_instructions t)
- done;
- output_string channel ";\n";
- end;
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- let early = earliest_dates.(i) and late= latest_dates.(i) in
- Printf.fprintf channel "* t[%d] in %d..%d\n" i early late;
- for t=early to late
- do
- Printf.fprintf channel "+1 %s " (pb_var i t)
- done;
- Printf.fprintf channel "= 1";
- end_constraint()
- done;
-
- for t=0 to deadline-1
- do
- for j=0 to nr_resources-1
- do
- let bound = problem.resource_bounds.(j)
- and coeffs = ref [] in
- for i=0 to nr_instructions-1
- do
- let usage = problem.instruction_usages.(i).(j) in
- if t >= earliest_dates.(i) && t <= latest_dates.(i)
- && usage > 0
- then coeffs := (i, usage) :: !coeffs
- done;
- if !coeffs <> [] then
- begin
- Printf.fprintf channel "* resource #%d at t=%d <= %d\n" j t bound;
- List.iter (fun (i, usage) ->
- Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs;
- Printf.fprintf channel ">= %d" (-bound);
- end_constraint();
- end
- done
- done;
-
- List.iter
- (fun ctr ->
- if ctr.instr_to < nr_instructions then
- begin
- for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to)
- do
- gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to
- done;
- if reverse_encoding
- then
- for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from)
- do
- gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from
- done
- end
- ) problem.latency_constraints;
-
- begin
- match pb_type with
- | SATISFIABILITY -> ()
- | OPTIMIZATION ->
- let final_latencies = Array.make nr_instructions 1 in
- List.iter (fun (i, latency) ->
- final_latencies.(i) <- int_max final_latencies.(i) latency)
- predecessors.(nr_instructions);
- for t_to=earliest_dates.(nr_instructions) to deadline
- do
- for i_from = 0 to nr_instructions -1
- do
- gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to
- done
- done
- end;
- assert (!measured_nr_constraints = nr_pb_constraints);
- {
- mapper_pb_type = pb_type;
- mapper_nr_instructions = nr_instructions;
- mapper_nr_pb_variables = nr_pb_variables;
- mapper_earliest_dates = earliest_dates;
- mapper_latest_dates = latest_dates;
- mapper_var_offsets = var_offsets;
- mapper_final_predecessors = predecessors.(nr_instructions)
- };;
-
-type pb_answer =
- | Positive
- | Negative
- | Unknown
-
-let line_to_pb_solution sol line nr_pb_variables =
- let assign s v =
- begin
- let i = int_of_string s in
- sol.(i-1) <- v
- end in
- List.iter
- begin
- function "" -> ()
- | item ->
- (match String.get item 0 with
- | '+' ->
- assert ((String.length item) >= 3);
- assert ((String.get item 1) = 'x');
- assign (String.sub item 2 ((String.length item)-2)) Positive
- | '-' ->
- assert ((String.length item) >= 3);
- assert ((String.get item 1) = 'x');
- assign (String.sub item 2 ((String.length item)-2)) Negative
- | 'x' ->
- assert ((String.length item) >= 2);
- assign (String.sub item 1 ((String.length item)-1)) Positive
- | _ -> failwith "syntax error in pseudo Boolean solution: epected + - or x"
- )
- end
- (String.split_on_char ' ' (String.sub line 2 ((String.length line)-2)));;
-
-let pb_solution_to_schedule mapper pb_solution =
- Array.mapi (fun i offset ->
- let first = mapper.mapper_earliest_dates.(i)
- and last = mapper.mapper_latest_dates.(i)
- and time = ref (-1) in
- for t=first to last
- do
- match pb_solution.(t - first + offset) with
- | Positive ->
- (if !time = -1
- then time:=t
- else failwith "duplicate time in pseudo boolean solution")
- | Negative -> ()
- | Unknown -> failwith "unknown value in pseudo boolean solution"
- done;
- (if !time = -1
- then failwith "no time in pseudo boolean solution");
- !time
- ) mapper.mapper_var_offsets;;
-
-let pseudo_boolean_read_solution mapper channel =
- let optimum = ref (-1)
- and optimum_found = ref false
- and solution = Array.make mapper.mapper_nr_pb_variables Unknown in
- try
- while true do
- match input_line channel with
- | "" -> ()
- | line ->
- begin
- match String.get line 0 with
- | 'c' -> ()
- | 'o' ->
- assert ((String.length line) >= 2);
- assert ((String.get line 1) = ' ');
- optimum := int_of_string (String.sub line 2 ((String.length line)-2))
- | 's' -> (match line with
- | "s OPTIMUM FOUND" -> optimum_found := true
- | "s SATISFIABLE" -> ()
- | "s UNSATISFIABLE" -> close_in channel;
- raise Unschedulable
- | _ -> failwith line)
- | 'v' -> line_to_pb_solution solution line mapper.mapper_nr_pb_variables
- | x -> Printf.printf "unknown: %s\n" line
- end
- done;
- assert false
- with End_of_file ->
- close_in channel;
- begin
- let sol = pb_solution_to_schedule mapper solution in
- sol
- end;;
-
-let recompute_max_latency mapper solution =
- let maxi = ref (-1) in
- for i=0 to (mapper.mapper_nr_instructions-1)
- do
- maxi := int_max !maxi (1+solution.(i))
- done;
- List.iter (fun (i, latency) ->
- maxi := int_max !maxi (solution.(i) + latency)) mapper.mapper_final_predecessors;
- !maxi;;
-
-let adjust_check_solution mapper solution =
- match mapper.mapper_pb_type with
- | OPTIMIZATION ->
- let max_latency = recompute_max_latency mapper solution in
- assert (max_latency = solution.(mapper.mapper_nr_instructions));
- solution
- | SATISFIABILITY ->
- let max_latency = recompute_max_latency mapper solution in
- Array.init (mapper.mapper_nr_instructions+1)
- (fun i -> if i < mapper.mapper_nr_instructions
- then solution.(i)
- else max_latency);;
-
-(* let pseudo_boolean_solver = ref "/local/monniaux/progs/naps/naps" *)
-(* let pseudo_boolean_solver = ref "/local/monniaux/packages/sat4j/org.sat4j.pb.jar CuttingPlanes" *)
-
-(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar CuttingPlanes" *)
-(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar" *)
-(* let pseudo_boolean_solver = ref "clasp" *)
-(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/open-wbo/open-wbo_static -formula=1" *)
-(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/naps/naps" *)
-(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/minisatp/build/release/bin/minisatp" *)
-(* let pseudo_boolean_solver = ref "java -jar sat4j-pb.jar CuttingPlanesStar" *)
-let pseudo_boolean_solver = ref "pb_solver"
-
-let pseudo_boolean_scheduler pb_type problem =
- try
- let filename_in = "problem.opb" in
- (* needed only if not using stdout and filename_out = "problem.sol" *)
- let mapper =
- with_out_channel (open_out filename_in)
- (fun opb_problem ->
- pseudo_boolean_print_problem opb_problem problem pb_type) in
- Some (with_in_channel
- (Unix.open_process_in (!pseudo_boolean_solver ^ " " ^ filename_in))
- (fun opb_solution -> adjust_check_solution mapper (pseudo_boolean_read_solution mapper opb_solution)))
- with
- | Unschedulable -> None;;
-
-let rec reoptimizing_scheduler (scheduler : scheduler) (previous_solution : solution) (problem : problem) =
- if (get_max_latency previous_solution)>1 then
- begin
- Printf.printf "reoptimizing < %d\n" (get_max_latency previous_solution);
- flush stdout;
- match scheduler
- { problem with max_latency = (get_max_latency previous_solution)-1 }
- with
- | None -> previous_solution
- | Some solution -> reoptimizing_scheduler scheduler solution problem
- end
- else previous_solution;;
-
-let smt_var i = Printf.sprintf "t%d" i
-
-let is_resource_used problem j =
- try
- Array.iter (fun usages ->
- if usages.(j) > 0
- then raise Exit) problem.instruction_usages;
- false
- with Exit -> true;;
-
-let smt_use_quantifiers = false
-
-let smt_print_problem channel problem =
- let nr_instructions = get_nr_instructions problem in
- let gen_smt_resource_constraint time j =
- output_string channel "(<= (+";
- Array.iteri
- (fun i usages ->
- let usage=usages.(j) in
- if usage > 0
- then Printf.fprintf channel " (ite (= %s %s) %d 0)"
- time (smt_var i) usage)
- problem.instruction_usages;
- Printf.fprintf channel ") %d)" problem.resource_bounds.(j)
- in
- output_string channel "(set-option :produce-models true)\n";
- for i=0 to nr_instructions
- do
- Printf.fprintf channel "(declare-const %s Int)\n" (smt_var i);
- Printf.fprintf channel "(assert (>= %s 0))\n" (smt_var i)
- done;
- for i=0 to nr_instructions-1
- do
- Printf.fprintf channel "(assert (< %s %s))\n"
- (smt_var i) (smt_var nr_instructions)
- done;
- (if problem.max_latency > 0
- then Printf.fprintf channel "(assert (<= %s %d))\n"
- (smt_var nr_instructions) problem.max_latency);
- List.iter (fun ctr ->
- Printf.fprintf channel "(assert (>= (- %s %s) %d))\n"
- (smt_var ctr.instr_to)
- (smt_var ctr.instr_from)
- ctr.latency) problem.latency_constraints;
- for j=0 to (Array.length problem.resource_bounds)-1
- do
- if is_resource_used problem j
- then
- begin
- if smt_use_quantifiers
- then
- begin
- Printf.fprintf channel
- "; resource #%d <= %d\n(assert (forall ((t Int)) "
- j problem.resource_bounds.(j);
- gen_smt_resource_constraint "t" j;
- output_string channel "))\n"
- end
- else
- begin
- (if problem.max_latency < 0
- then failwith "quantifier explosion needs max latency");
- for t=0 to problem.max_latency
- do
- Printf.fprintf channel
- "; resource #%d <= %d at t=%d\n(assert "
- j problem.resource_bounds.(j) t;
- gen_smt_resource_constraint (string_of_int t) j;
- output_string channel ")\n"
- done
- end
- end
- done;
- output_string channel "(check-sat)(get-model)\n";;
-
-
-let ilp_print_problem channel problem pb_type =
- let deadline = problem.max_latency in
- assert (deadline > 0);
- let nr_instructions = get_nr_instructions problem
- and nr_resources = get_nr_resources problem
- and successors = get_successors problem
- and predecessors = get_predecessors problem in
- let earliest_dates = get_earliest_dates predecessors
- and latest_dates = get_latest_dates deadline successors in
-
- let pb_var i t =
- Printf.sprintf "x%d_%d" i t in
-
- let gen_latency_constraint i_to i_from latency t_to =
- Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n"
- i_to i_from latency i_to t_to;
- Printf.fprintf channel "c_%d_%d_%d_%d: "
- i_to i_from latency t_to;
- for t_from=earliest_dates.(i_from) to
- int_min latest_dates.(i_from) (t_to - latency)
- do
- Printf.fprintf channel "+1 %s " (pb_var i_from t_from)
- done;
- Printf.fprintf channel "-1 %s " (pb_var i_to t_to);
- output_string channel ">= 0\n"
-
- and gen_dual_latency_constraint i_to i_from latency t_from =
- Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n"
- i_to i_from latency i_to t_from;
- Printf.fprintf channel "d_%d_%d_%d_%d: "
- i_to i_from latency t_from;
- for t_to=int_max earliest_dates.(i_to) (t_from + latency)
- to latest_dates.(i_to)
- do
- Printf.fprintf channel "+1 %s " (pb_var i_to t_to)
- done;
- Printf.fprintf channel "-1 %s " (pb_var i_from t_from);
- Printf.fprintf channel ">= 0\n"
-
- and gen_delta_constraint i_from i_to latency =
- if delta_encoding
- then Printf.fprintf channel "l_%d_%d_%d: +1 t%d -1 t%d >= %d\n"
- i_from i_to latency i_to i_from latency
-
- in
-
- Printf.fprintf channel "\\ nr_instructions=%d deadline=%d\n" nr_instructions deadline;
- begin
- match pb_type with
- | SATISFIABILITY -> output_string channel "Minimize dummy: 0\n"
- | OPTIMIZATION ->
- Printf.fprintf channel "Minimize\nmakespan: t%d\n" nr_instructions
- end;
- output_string channel "Subject To\n";
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- let early = earliest_dates.(i) and late= latest_dates.(i) in
- Printf.fprintf channel "\\ t[%d] in %d..%d\ntimes%d: " i early late i;
- for t=early to late
- do
- Printf.fprintf channel "+1 %s " (pb_var i t)
- done;
- Printf.fprintf channel "= 1\n"
- done;
-
- for t=0 to deadline-1
- do
- for j=0 to nr_resources-1
- do
- let bound = problem.resource_bounds.(j)
- and coeffs = ref [] in
- for i=0 to nr_instructions-1
- do
- let usage = problem.instruction_usages.(i).(j) in
- if t >= earliest_dates.(i) && t <= latest_dates.(i)
- && usage > 0
- then coeffs := (i, usage) :: !coeffs
- done;
- if !coeffs <> [] then
- begin
- Printf.fprintf channel "\\ resource #%d at t=%d <= %d\nr%d_%d: " j t bound j t;
- List.iter (fun (i, usage) ->
- Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs;
- Printf.fprintf channel ">= %d\n" (-bound)
- end
- done
- done;
-
- List.iter
- (fun ctr ->
- if ctr.instr_to < nr_instructions then
- begin
- gen_delta_constraint ctr.instr_from ctr.instr_to ctr.latency;
- begin
- if direct_encoding
- then
- for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to)
- do
- gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to
- done
- end;
- begin
- if reverse_encoding
- then
- for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from)
- do
- gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from
- done
- end
- end
- ) problem.latency_constraints;
-
- begin
- match pb_type with
- | SATISFIABILITY -> ()
- | OPTIMIZATION ->
- let final_latencies = Array.make nr_instructions 1 in
- List.iter (fun (i, latency) ->
- final_latencies.(i) <- int_max final_latencies.(i) latency)
- predecessors.(nr_instructions);
- for i_from = 0 to nr_instructions -1
- do
- gen_delta_constraint i_from nr_instructions final_latencies.(i_from)
- done;
- for t_to=earliest_dates.(nr_instructions) to deadline
- do
- for i_from = 0 to nr_instructions -1
- do
- gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to
- done
- done
- end;
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- Printf.fprintf channel "ct%d : -1 t%d" i i;
- let early = earliest_dates.(i) and late= latest_dates.(i) in
- for t=early to late do
- Printf.fprintf channel " +%d %s" t (pb_var i t)
- done;
- output_string channel " = 0\n"
- done;
- output_string channel "Bounds\n";
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- let early = earliest_dates.(i) and late= latest_dates.(i) in
- begin
- Printf.fprintf channel "%d <= t%d <= %d\n" early i late;
- if true then
- for t=early to late do
- Printf.fprintf channel "0 <= %s <= 1\n" (pb_var i t)
- done
- end
- done;
- output_string channel "Integer\n";
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- Printf.fprintf channel "t%d " i
- done;
- output_string channel "\nBinary\n";
- for i=0 to (match pb_type with
- | OPTIMIZATION -> nr_instructions
- | SATISFIABILITY -> nr_instructions-1)
- do
- let early = earliest_dates.(i) and late= latest_dates.(i) in
- for t=early to late do
- output_string channel (pb_var i t);
- output_string channel " "
- done;
- output_string channel "\n"
- done;
- output_string channel "End\n";
- {
- mapper_pb_type = pb_type;
- mapper_nr_instructions = nr_instructions;
- mapper_nr_pb_variables = 0;
- mapper_earliest_dates = earliest_dates;
- mapper_latest_dates = latest_dates;
- mapper_var_offsets = [| |];
- mapper_final_predecessors = predecessors.(nr_instructions)
- };;
-
-(* Guess what? Cplex sometimes outputs 11.000000004 instead of integer 11 *)
-
-let positive_float_round x = truncate (x +. 0.5)
-
-let float_round (x : float) : int =
- if x > 0.0
- then positive_float_round x
- else - (positive_float_round (-. x))
-
-let rounded_int_of_string x = float_round (float_of_string x)
-
-let ilp_read_solution mapper channel =
- let times = Array.make
- (match mapper.mapper_pb_type with
- | OPTIMIZATION -> 1+mapper.mapper_nr_instructions
- | SATISFIABILITY -> mapper.mapper_nr_instructions) (-1) in
- try
- while true do
- let line = input_line channel in
- ( if (String.length line) < 3
- then failwith (Printf.sprintf "bad ilp output: length(line) < 3: %s" line));
- match String.get line 0 with
- | 'x' -> ()
- | 't' -> let space =
- try String.index line ' '
- with Not_found ->
- failwith "bad ilp output: no t variable number"
- in
- let tnumber =
- try int_of_string (String.sub line 1 (space-1))
- with Failure _ ->
- failwith "bad ilp output: not a variable number"
- in
- (if tnumber < 0 || tnumber >= (Array.length times)
- then failwith (Printf.sprintf "bad ilp output: not a correct variable number: %d (%d)" tnumber (Array.length times)));
- let value =
- let s = String.sub line (space+1) ((String.length line)-space-1) in
- try rounded_int_of_string s
- with Failure _ ->
- failwith (Printf.sprintf "bad ilp output: not a time number (%s)" s)
- in
- (if value < 0
- then failwith "bad ilp output: negative time");
- times.(tnumber) <- value
- | '#' -> ()
- | '0' -> ()
- | _ -> failwith (Printf.sprintf "bad ilp output: bad variable initial, line = %s" line)
- done;
- assert false
- with End_of_file ->
- Array.iteri (fun i x ->
- if i<(Array.length times)-1
- && x<0 then raise Unschedulable) times;
- times;;
-
-let ilp_solver = ref "ilp_solver"
-
-let problem_nr = ref 0
-
-let ilp_scheduler pb_type problem =
- try
- let filename_in = Printf.sprintf "problem%05d.lp" !problem_nr
- and filename_out = Printf.sprintf "problem%05d.sol" !problem_nr in
- incr problem_nr;
- let mapper = with_out_channel (open_out filename_in)
- (fun opb_problem -> ilp_print_problem opb_problem problem pb_type) in
-
- begin
- match Unix.system (!ilp_solver ^ " " ^ filename_in ^ " " ^ filename_out) with
- | Unix.WEXITED 0 ->
- Some (with_in_channel
- (open_in filename_out)
- (fun opb_solution ->
- adjust_check_solution mapper
- (ilp_read_solution mapper opb_solution)))
- | Unix.WEXITED _ -> failwith "failed to start ilp solver"
- | _ -> None
- end
- with
- | Unschedulable -> None;;
-
-let current_utime_all () =
- let t = Unix.times() in
- t.Unix.tms_cutime +. t.Unix.tms_utime;;
-
-let utime_all_fn fn arg =
- let utime_start = current_utime_all () in
- let output = fn arg in
- let utime_end = current_utime_all () in
- (output, utime_end -. utime_start);;
-
-let cascaded_scheduler (problem : problem) =
- let (some_initial_solution, list_scheduler_time) =
- utime_all_fn (validated_scheduler list_scheduler) problem in
- match some_initial_solution with
- | None -> None
- | Some initial_solution ->
- let (solution, reoptimizing_time) = utime_all_fn (reoptimizing_scheduler (validated_scheduler (ilp_scheduler SATISFIABILITY)) initial_solution) problem in
- begin
- let latency2 = get_max_latency solution
- and latency1 = get_max_latency initial_solution in
- Printf.printf "postpass %s: %d, %d, %d, %g, %g\n"
- (if latency2 < latency1 then "REOPTIMIZED" else "unchanged")
- (get_nr_instructions problem)
- latency1 latency2
- list_scheduler_time reoptimizing_time;
- flush stdout
- end;
- Some solution;;
-
-let scheduler_by_name name =
- match name with
- | "ilp" -> validated_scheduler cascaded_scheduler
- | "list" -> validated_scheduler list_scheduler
- | "revlist" -> validated_scheduler reverse_list_scheduler
- | "greedy" -> greedy_scheduler
- | s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/aarch64/InstructionScheduler.mli b/aarch64/InstructionScheduler.mli
deleted file mode 100644
index 85e2a5c6..00000000
--- a/aarch64/InstructionScheduler.mli
+++ /dev/null
@@ -1,113 +0,0 @@
-(** Schedule instructions on a synchronized pipeline
-by David Monniaux, CNRS, VERIMAG *)
-
-(** A latency constraint: instruction number [instr_to] should be scheduled at least [latency] clock ticks before [instr_from].
-
-It is possible to specify [latency]=0, meaning that [instr_to] can be scheduled at the same clock tick as [instr_from], but not before.
-
-[instr_to] can be the special value equal to the number of instructions, meaning that it refers to the final output latency. *)
-type latency_constraint = {
- instr_from : int;
- instr_to : int;
- latency : int;
- }
-
-(** A scheduling problem.
-
-In addition to the latency constraints, the resource constraints should be satisfied: at every clock tick, the sum of vectors of resources used by the instructions scheduled at that tick does not exceed the resource bounds.
-*)
-type problem = {
- max_latency : int;
- (** An optional maximal total latency of the problem, after which the problem is deemed not schedulable. -1 means there should be no maximum. *)
-
- resource_bounds : int array;
- (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
-
- instruction_usages: int array array;
- (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
-
- latency_constraints : latency_constraint list
- (** The latency constraints that must be satisfied *)
- };;
-
-(** Print problem for human readability. *)
-val print_problem : out_channel -> problem -> unit;;
-
-(** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *)
-type solution = int array
-
-(** A scheduling algorithm.
-The return value [Some x] is a solution [x].
-[None] means that scheduling failed. *)
-type scheduler = problem -> solution option;;
-
-(* DISABLED
-(** Schedule the problem optimally by constraint solving using the Gecode solver. *)
-external gecode_scheduler : problem -> solution option
- = "caml_gecode_schedule_instr"
- *)
-
-(** Get the number the last scheduling time used for an instruction in a solution.
-@return The last clock tick used *)
-val maximum_slot_used : solution -> int
-
-(** Validate that a solution is truly a solution of a scheduling problem.
-@raise Failure if validation fails *)
-val check_schedule : problem -> solution -> unit
-
-(** Schedule the problem using a greedy list scheduling algorithm, from the start.
-The first (according to instruction ordering) instruction that is ready (according to the latency constraints) is scheduled at the current clock tick.
-Once a clock tick is full go to the next.
-
-@return [Some solution] when a solution is found, [None] if not. *)
-val list_scheduler : problem -> solution option
-
-(** Schedule the problem using the order of instructions without any reordering *)
-val greedy_scheduler : problem -> solution option
-
-(** Schedule a problem using a scheduler applied in the opposite direction, e.g. for list scheduling from the end instead of the start. BUGGY *)
-val schedule_reversed : scheduler -> problem -> int array option
-
-(** Schedule a problem from the end using a list scheduler. BUGGY *)
-val reverse_list_scheduler : problem -> int array option
-
-(** Check that a problem is well-formed.
-@raise Failure if validation fails *)
-val check_problem : problem -> unit
-
-(** Apply a scheduler and validate the result against the input problem.
-@return The solution found
-@raise Failure if validation fails *)
-val validated_scheduler : scheduler -> problem -> solution option;;
-
-(** Get max latency from solution
-@return Max latency *)
-val get_max_latency : solution -> int;;
-
-(** Get the length of a maximal critical path
-@return Max length *)
-val maximum_critical_path : problem -> int;;
-
-(** Apply line scheduler then advanced solver
-@return A solution if found *)
-val cascaded_scheduler : problem -> solution option;;
-
-val show_date_ranges : problem -> unit;;
-
-type pseudo_boolean_problem_type =
- | SATISFIABILITY
- | OPTIMIZATION;;
-
-type pseudo_boolean_mapper
-val pseudo_boolean_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;;
-val pseudo_boolean_read_solution : pseudo_boolean_mapper -> in_channel -> solution;;
-val pseudo_boolean_scheduler : pseudo_boolean_problem_type -> problem -> solution option;;
-
-val smt_print_problem : out_channel -> problem -> unit;;
-
-val ilp_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;;
-
-val ilp_scheduler : pseudo_boolean_problem_type -> problem -> solution option;;
-
-(** Schedule a problem using a scheduler given by a string name *)
-val scheduler_by_name : string -> problem -> int array option;;
diff --git a/aarch64/Op.v b/aarch64/Op.v
index f720e545..f2a8e6fb 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
| Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
| Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
| Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
- | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
- | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+ | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+ | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
| Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
| Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
| Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
| Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
| Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
| Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
- | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+ | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
| Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
| Osext s, v1 :: nil => Some (Val.sign_ext s v1)
| Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
| Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
| Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
| Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
- | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
- | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+ | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+ | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
| Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
| Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
| Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
| Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
| Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
| Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
- | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+ | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
| Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
| Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
| Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
| Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
| Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
- | Ointoffloat, v1::nil => Val.intoffloat v1
- | Ointuoffloat, v1::nil => Val.intuoffloat v1
- | Ofloatofint, v1::nil => Val.floatofint v1
- | Ofloatofintu, v1::nil => Val.floatofintu v1
- | Ointofsingle, v1::nil => Val.intofsingle v1
- | Ointuofsingle, v1::nil => Val.intuofsingle v1
- | Osingleofint, v1::nil => Val.singleofint v1
- | Osingleofintu, v1::nil => Val.singleofintu v1
- | Olongoffloat, v1::nil => Val.longoffloat v1
- | Olonguoffloat, v1::nil => Val.longuoffloat v1
- | Ofloatoflong, v1::nil => Val.floatoflong v1
- | Ofloatoflongu, v1::nil => Val.floatoflongu v1
- | Olongofsingle, v1::nil => Val.longofsingle v1
- | Olonguofsingle, v1::nil => Val.longuofsingle v1
- | Osingleoflong, v1::nil => Val.singleoflong v1
- | Osingleoflongu, v1::nil => Val.singleoflongu v1
+ | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+ | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+ | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+ | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+ | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+ | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+ | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+ | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+ | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+ | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+ | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+ | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+ | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+ | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+ | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+ | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
| Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
| Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0... destruct v1...
- apply type_add.
- apply type_sub.
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int.eq i0 Int.zero); inv H2...
+ - destruct v0; destruct v1; cbn in *; trivial.
+ destruct (_ || _); trivial...
+ - destruct v0; destruct v1; cbn in *; trivial.
+ destruct (Int.eq i0 Int.zero); constructor.
- destruct v0... destruct v1...
- destruct v0... destruct (eval_shift s v1 a)...
- destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
- destruct v0...
- destruct v0...
- destruct (Val.zero_ext s v0)... simpl; rewrite a32_range...
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- apply type_subl.
- destruct v0... destruct v1...
- destruct v0... destruct v1...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int64.eq i0 Int64.zero); inv H2...
+ - destruct v0; destruct v1; cbn; trivial.
+ destruct (_ || _); cbn; trivial.
+ - destruct v0; destruct v1; cbn; trivial.
+ destruct (Int64.eq i0 Int64.zero); cbn; trivial.
- destruct v0... destruct v1...
- destruct v0... destruct (eval_shiftl s v1 a)...
- destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
- destruct v0...
- destruct v0...
- destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range...
@@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0...
- destruct v0...
(* intoffloat, intuoffloat *)
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
(* floatofint, floatofintu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* intofsingle, intuofsingle *)
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
(* singleofint, singleofintu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* longoffloat, longuoffloat *)
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
(* floatoflong, floatoflongu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* longofsingle, longuofsingle *)
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
(* singleoflong, singleoflongu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* cmp *)
- destruct (eval_condition cond vl m) as [[]|]...
- unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -924,16 +926,7 @@ Qed.
Definition is_trapping_op (op : operation) :=
match op with
- | Odiv | Odivu | Odivl | Odivlu
- | Oshrximm _ | Oshrlximm _
- | Ointoffloat | Ointuoffloat
- | Ointofsingle | Ointuofsingle
- | Ofloatofint | Ofloatofintu
- | Osingleofint | Osingleofintu
- | Olongoffloat | Olonguoffloat
- | Olongofsingle | Olonguofsingle
- | Ofloatoflong | Ofloatoflongu
- | Osingleoflong | Osingleoflongu => true
+ | Omove => false
| _ => false
end.
@@ -1423,12 +1416,12 @@ Proof.
- apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
- apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
(* div, divu *)
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int.eq i0 Int.zero
- || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
- TrivialExists.
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+ - inv H4; inv H2; trivial. cbn.
+ destruct (_ || _); cbn;
+ constructor.
+ - inv H4; inv H2; trivial. cbn.
+ destruct (Int.eq i0 Int.zero); cbn;
+ constructor.
(* and*)
- inv H4; inv H2; simpl; auto.
- generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1460,8 +1453,8 @@ Proof.
(* shru *)
- inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
(* shrx *)
- - inv H4; simpl in H1; try discriminate. simpl.
- destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
(* shift-ext *)
- inv H4; simpl; auto.
- inv H4; simpl; auto.
@@ -1496,12 +1489,10 @@ Proof.
- inv H4; inv H2; simpl; auto.
- inv H4; inv H2; simpl; auto.
(* divl, divlu *)
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int64.eq i0 Int64.zero
- || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
- TrivialExists.
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+ - inv H4; inv H2; cbn; trivial.
+ destruct (_ || _); cbn; trivial.
+ - inv H4; inv H2; cbn; trivial.
+ destruct (Int64.eq i0 Int64.zero); cbn; trivial.
(* andl *)
- inv H4; inv H2; simpl; auto.
- generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1533,8 +1524,8 @@ Proof.
(* shrlu *)
- inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
(* shrlx *)
- - inv H4; simpl in H1; try discriminate. simpl.
- destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
(* shift-ext *)
- inv H4; simpl; auto.
- inv H4; simpl; auto.
@@ -1565,37 +1556,29 @@ Proof.
- inv H4; simpl; auto.
- inv H4; simpl; auto.
(* intoffloat, intuoffloat *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
- exists (Vint i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
- exists (Vint i); auto.
+ - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
(* floatofint, floatofintu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* intofsingle, intuofsingle *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
- exists (Vint i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
- exists (Vint i); auto.
+ - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
(* singleofint, singleofintu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* longoffloat, longuoffloat *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
+ - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
(* floatoflong, floatoflongu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* longofsingle, longuofsingle *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
+ - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
(* singleoflong, singleoflongu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* cmp, sel *)
- subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+ struct
+ let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+ let nr_non_pipelined_units = 1;;
+
+ let latency_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _ -> 1
+ | Oshift _ -> 2
+ | Oadd -> 1
+ | Oaddshift _ -> 2
+ | Oaddimm _
+ | Oneg -> 1
+ | Onegshift _ -> 2
+ | Osub -> 1
+ | Osubshift _ -> 2
+ | Omul
+ | Omuladd
+ | Omulsub -> 4
+ | Odiv
+ | Odivu -> 29
+ | Oand -> 1
+ | Oandshift _ -> 2
+ | Oandimm _ -> 1
+ | Oor -> 1
+ | Oorshift _ -> 2
+ | Oorimm _ -> 1
+ | Oxor -> 1
+ | Oxorshift _ -> 2
+ | Oxorimm _ -> 1
+ | Onot -> 1
+ | Onotshift _ -> 2
+ | Obic -> 1
+ | Obicshift _ -> 2
+ | Oorn -> 1
+ | Oornshift _ -> 2
+ | Oeqv -> 1
+ | Oeqvshift _ -> 2
+ | Oshl
+ | Oshr
+ | Oshru -> 2
+ | Oshrximm _ -> 6
+ | Ozext _
+ | Osext _ -> 1
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _ -> 2
+
+ (* 64-bit integer arithmetic *)
+ | Oshiftl _ -> 2
+ | Oextend _ -> 1
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl -> 1
+ | Oaddlshift _
+ | Oaddlext _ -> 2
+ | Oaddlimm _
+ | Onegl -> 1
+ | Oneglshift _ -> 2
+ | Osubl -> 1
+ | Osublshift _
+ | Osublext _ -> 2
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> 4
+ | Odivl -> 50
+ | Odivlu -> 50
+ | Oandl -> 1
+ | Oandlshift _ -> 2
+ | Oandlimm _
+ | Oorl -> 1
+ | Oorlshift _ -> 2
+ | Oorlimm _
+ | Oxorl -> 1
+ | Oxorlshift _ -> 2
+ | Oxorlimm _
+ | Onotl -> 1
+ | Onotlshift _ -> 2
+ | Obicl -> 1
+ | Obiclshift _ -> 2
+ | Oornl -> 1
+ | Oornlshift _ -> 2
+ | Oeqvl -> 1
+ | Oeqvlshift _ -> 2
+ | Oshll
+ | Oshrl
+ | Oshrlu -> 2
+ | Oshrlximm _ -> 6
+ | Ozextl _
+ | Osextl _ -> 1
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> 2
+
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> 6
+ | Odivf -> 50 (* r [rd = r1 / r2] *)
+ | Odivfs -> 20
+ (* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6
+ | _ -> 1);;
+
+ let resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _
+ (* 32-bit integer arithmetic *)
+ | Oshift _
+ | Oadd
+ | Oaddshift _
+ | Oaddimm _
+ | Oneg
+ | Onegshift _
+ | Osub
+ | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+ | Omul
+ | Omuladd
+ | Omulsub -> [| 1; 1; 1; 0 |]
+ | Odiv
+ | Odivu -> [| 1; 0; 0; 0 |]
+ | Oand
+ | Oandshift _
+ | Oandimm _
+ | Oor
+ | Oorshift _
+ | Oorimm _
+ | Oxor
+ | Oxorshift _
+ | Oxorimm _
+ | Onot
+ | Onotshift _
+ | Obic
+ | Obicshift _
+ | Oorn
+ | Oornshift _
+ | Oeqv
+ | Oeqvshift _
+ | Oshl
+ | Oshr
+ | Oshru
+ | Oshrximm _
+ | Ozext _
+ | Osext _
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _
+
+(* 64-bit integer arithmetic *)
+ | Oshiftl _
+ | Oextend _
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl
+ | Oaddlshift _
+ | Oaddlext _
+ | Oaddlimm _
+ | Onegl
+ | Oneglshift _
+ | Osubl
+ | Osublshift _
+ | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+ | Odivl
+ | Odivlu -> [| 1; 0; 0; 0 |]
+ | Oandl
+ | Oandlshift _
+ | Oandlimm _
+ | Oorl
+ | Oorlshift _
+ | Oorlimm _
+ | Oxorl
+ | Oxorlshift _
+ | Oxorlimm _
+ | Onotl
+ | Onotlshift _
+ | Obicl
+ | Obiclshift _
+ | Oornl
+ | Oornlshift _
+ | Oeqvl
+ | Oeqvlshift _
+ | Oshll
+ | Oshrl
+ | Oshrlu
+ | Oshrlximm _
+ | Ozextl _
+ | Osextl _
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> [| 1; 1; 0; 0 |]
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+ | Odivf
+ (* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Odivfs (* r [rd = r1 / r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> [| 1 ; 1; 1; 0 |]
+
+(* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Odiv | Odivu -> [| 29 |]
+ | Odivfs -> [| 20 |]
+ | Odivl | Odivlu | Odivf -> [| 50 |]
+ | _ -> [| -1 |];;
+
+ let resources_of_cond (cmp : condition) (nargs : int) =
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+ let latency_of_call _ _ = 6;;
+
+ let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_call _ _ = resource_bounds;;
+ let resources_of_builtin _ = resource_bounds;;
+ end;;
+
+let get_opweights () : opweights =
+ match !Clflags.option_mtune with
+ | "cortex-a53" | "cortex-a35" | "" ->
+ {
+ pipelined_resource_bounds = Cortex_A53.resource_bounds;
+ nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+ latency_of_op = Cortex_A53.latency_of_op;
+ resources_of_op = Cortex_A53.resources_of_op;
+ non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+ latency_of_load = Cortex_A53.latency_of_load;
+ resources_of_load = Cortex_A53.resources_of_load;
+ resources_of_store = Cortex_A53.resources_of_store;
+ resources_of_cond = Cortex_A53.resources_of_cond;
+ latency_of_call = Cortex_A53.latency_of_call;
+ resources_of_call = Cortex_A53.resources_of_call;
+ resources_of_builtin = Cortex_A53.resources_of_builtin
+ }
+ | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..2c3eb14f
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,477 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+
+let use_alias_analysis () = false
+
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ List.iter (fun use ->
+ add_input_reg i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+ | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+ | Iop(op, inputs, output, _) ->
+ opweights.resources_of_op op (List.length inputs)
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ opweights.resources_of_store chunk addressing (List.length addr_regs)
+ | Icall(signature, ef, inputs, output, _) ->
+ opweights.resources_of_call signature ef
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ opweights.resources_of_builtin ef
+ | Icond(cond, args, _, _ , _) ->
+ opweights.resources_of_cond cond (List.length args)
+ | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+
+let print_sequence pp (seqa : instruction array) =
+ Array.iteri (
+ fun i (insn : instruction) ->
+ PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+
+type 'a symbolic_term_node =
+ | STop of Op.operation * 'a list
+ | STinitial_reg of int
+ | STother of int;;
+
+type symbolic_term = {
+ hash_id : unique_id;
+ hash_ct : symbolic_term symbolic_term_node
+ };;
+
+let rec print_term channel term =
+ match term.hash_ct with
+ | STop(op, args) ->
+ PrintOp.print_operation print_term channel (op, args)
+ | STinitial_reg n -> Printf.fprintf channel "x%d" n
+ | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+ st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+ mutable st_next_id : unique_id };;
+
+let hash_init () = {
+ st_table = Hashtbl.create 20;
+ st_next_id = 0
+ };;
+
+let ground_to_id = function
+ | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+ | STinitial_reg r -> STinitial_reg r
+ | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+ let grounded = ground_to_id term in
+ match Hashtbl.find_opt table.st_table grounded with
+ | Some x -> x
+ | None ->
+ let term' = { hash_id = table.st_next_id;
+ hash_ct = term } in
+ (if table.st_next_id = max_int then failwith "hash: max_int");
+ table.st_next_id <- table.st_next_id + 1;
+ Hashtbl.add table.st_table grounded term';
+ term';;
+
+type access = {
+ base : symbolic_term;
+ offset : int64;
+ length : int
+ };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+ match addressing, args with
+ | (Op.Aindexed ofs), [reg] -> Some
+ { base = get_reg reg;
+ offset = Camlcoq.camlint64_of_ptrofs ofs;
+ length = length_of_chunk chunk
+ }
+ | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+ let regs = ref PTree.empty
+ and table = hash_init() in
+ let assign reg term = regs := PTree.set reg term !regs
+ and hash term = hash_node table term in
+ let get_reg reg =
+ match PTree.get reg !regs with
+ | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+ | Some x -> x in
+ let targets = Array.make (Array.length seqa) None in
+ Array.iteri
+ begin
+ fun i insn ->
+ match insn with
+ | Iop(Op.Omove, [input], output, _) ->
+ assign output (get_reg input)
+ | Iop(op, inputs, output, _) ->
+ assign output (hash (STop(op, List.map get_reg inputs)))
+
+ | Iload(trap, chunk, addressing, args, output, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access;
+ assign output (hash (STother(i)))
+
+ | Icall(_, _, _, output, _)
+ | Ibuiltin(_, _, BR output, _) ->
+ assign output (hash (STother(i)))
+
+ | Istore(chunk, addressing, args, va, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access
+
+ | Inop _ -> ()
+ | Ibuiltin(_, _, BR_none, _) -> ()
+ | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+ | Itailcall (_, _, _)
+ |Icond (_, _, _, _, _)
+ |Ijumptable (_, _)
+ |Ireturn _ -> ()
+ end seqa;
+ targets;;
+
+let print_access channel = function
+ | None -> Printf.fprintf channel "any"
+ | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+ let targets = symbolic_execution seqa in
+ Array.iteri
+ (fun i insn ->
+ match insn with
+ | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+ i print_access targets.(i)
+ | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+ i print_access targets.(i)
+ | _ -> ()
+ ) seqa;;
+
+let may_overlap a0 b0 =
+ match a0, b0 with
+ | (None, _) | (_ , None) -> true
+ | (Some a), (Some b) ->
+ if term_equal a.base b.base
+ then (max a.offset b.offset) <
+ (min (Int64.add (Int64.of_int a.length) a.offset)
+ (Int64.add (Int64.of_int b.length) b.offset))
+ else match a.base.hash_ct, b.base.hash_ct with
+ | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+ STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+ (ida=idb) &&
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | STop(Op.Oaddrstack _, []),
+ STop(Op.Oaddrsymbol _, [])
+ | STop(Op.Oaddrsymbol _, []),
+ STop(Op.Oaddrstack _, []) -> false
+ | STop(Op.Oaddrstack(ofsa),[]),
+ STop(Op.Oaddrstack(ofsb),[]) ->
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+ let targets = symbolic_execution seqa
+ and deps = ref [] in
+ let add_constraint instr_from instr_to latency =
+ deps := { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !deps in
+ for i=0 to (Array.length seqa)-1
+ do
+ for j=0 to i-1
+ do
+ match seqa.(j), seqa.(i) with
+ | (Istore _), ((Iload _) | (Istore _)) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 1
+ | (Iload _), (Istore _) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 0
+ | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+ | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+ add_constraint j i 1
+ | (Inop _ | Iop _), _
+ | _, (Inop _ | Iop _)
+ | (Iload _), (Iload _) -> ()
+ done
+ done;
+ !deps;;
+ *)
+
+let define_problem (opweights : opweights) seqa =
+ let simple_deps = get_simple_dependencies opweights seqa in
+ { max_latency = -1;
+ resource_bounds = opweights.pipelined_resource_bounds;
+ instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ latency_constraints =
+ (* if (use_alias_analysis ())
+ then (get_alias_dependencies seqa) @ simple_deps
+ else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+ let nr_instructions = get_nr_instructions problem in
+ assert(nr_instructions = (Array.length early_ones));
+ match list_scheduler problem with
+ | Some fwd_schedule ->
+ let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+ let constraints' = ref problem.latency_constraints in
+ Array.iteri (fun i is_early ->
+ if is_early then
+ constraints' := {
+ instr_from = i;
+ instr_to = nr_instructions ;
+ latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+ early_ones;
+ validated_scheduler reverse_list_scheduler
+ { problem with latency_constraints = !constraints' }
+ | None -> None;;
+
+let prepass_scheduler_by_name name problem early_ones =
+ match name with
+ | "zigzag" -> zigzag_scheduler problem early_ones
+ | _ -> scheduler_by_name name problem
+
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+ let opweights = OpWeights.get_opweights () in
+ try
+ if (Array.length seqa) <= 1
+ then None
+ else
+ begin
+ let nr_instructions = Array.length seqa in
+ (if !Clflags.option_debug_compcert > 6
+ then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+ let problem = define_problem opweights seqa in
+ (if !Clflags.option_debug_compcert > 7
+ then (print_sequence stdout (Array.map fst seqa);
+ print_problem stdout problem));
+ match prepass_scheduler_by_name
+ (!Clflags.option_fprepass_sched)
+ problem
+ (Array.map (fun (ins, _) ->
+ match ins with
+ | Icond _ -> true
+ | _ -> false) seqa) with
+ | None -> Printf.printf "no solution in prepass scheduling\n";
+ None
+ | Some solution ->
+ let positions = Array.init nr_instructions (fun i -> i) in
+ Array.sort (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1
+ else if si > sj then 1
+ else i - j) positions;
+ Some positions
+ end
+ with (Failure s) ->
+ Printf.printf "failure in prepass scheduling: %s\n" s;
+ None;;
+
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+
+type opweights =
+ {
+ pipelined_resource_bounds : int array;
+ nr_non_pipelined_units : int;
+ latency_of_op : Op.operation -> int -> int;
+ resources_of_op : Op.operation -> int -> int array;
+ non_pipelined_resources_of_op : Op.operation -> int -> int array;
+ latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+ resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_cond : Op.condition -> int -> int array;
+ latency_of_call : AST.signature -> called_function -> int;
+ resources_of_call : AST.signature -> called_function -> int array;
+ resources_of_builtin : AST.external_function -> int array
+ };;
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
Proof.
red; intros; unfold divls_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
Proof.
red; intros; unfold modls_base, modl_aux.
exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
Proof.
red; intros; unfold divlu_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
Proof.
red; intros; unfold modlu_base, modl_aux.
exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
destruct x; simpl in H0; try discriminate.
change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** General shifts *)
@@ -726,42 +730,42 @@ Qed.
Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
Val.divs x y = Some z ->
exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
Proof.
- intros; unfold divs_base; TrivialExists.
+ intros; unfold divs_base; TrivialExists; cbn.
+ rewrite H1. reflexivity.
Qed.
Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
Proof.
intros; unfold mods_base, mod_aux.
exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ cbn. rewrite A. reflexivity.
Qed.
Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
Proof.
intros; unfold divu_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
Proof.
intros; unfold modu_base, mod_aux.
exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
destruct x; simpl in H0; try discriminate.
change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** General shifts *)
@@ -928,7 +932,7 @@ Theorem eval_intoffloat:
Val.intoffloat x = Some y ->
exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatofint:
@@ -939,7 +943,7 @@ Theorem eval_floatofint:
Proof.
intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intuoffloat:
@@ -948,7 +952,7 @@ Theorem eval_intuoffloat:
Val.intuoffloat x = Some y ->
exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatofintu:
@@ -959,7 +963,7 @@ Theorem eval_floatofintu:
Proof.
intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intofsingle:
@@ -968,7 +972,7 @@ Theorem eval_intofsingle:
Val.intofsingle x = Some y ->
exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleofint:
@@ -979,7 +983,7 @@ Theorem eval_singleofint:
Proof.
intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intuofsingle:
@@ -988,7 +992,7 @@ Theorem eval_intuofsingle:
Val.intuofsingle x = Some y ->
exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleofintu:
@@ -999,7 +1003,7 @@ Theorem eval_singleofintu:
Proof.
intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** Selection *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Omul, v1::v2::nil => mul v1 v2
| Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
| Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
- | Odiv, v1::v2::nil => divs v1 v2
- | Odivu, v1::v2::nil => divu v1 v2
+ | Odiv, v1::v2::nil => divs_total v1 v2
+ | Odivu, v1::v2::nil => divu_total v1 v2
| Oand, v1::v2::nil => and v1 v2
| Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
| Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
| Omullhs, v1::v2::nil => mullhs v1 v2
| Omullhu, v1::v2::nil => mullhu v1 v2
- | Odivl, v1::v2::nil => divls v1 v2
- | Odivlu, v1::v2::nil => divlu v1 v2
+ | Odivl, v1::v2::nil => divls_total v1 v2
+ | Odivlu, v1::v2::nil => divlu_total v1 v2
| Oandl, v1::v2::nil => andl v1 v2
| Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
| Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Osingleoffloat, v1::nil => singleoffloat v1
| Ofloatofsingle, v1::nil => floatofsingle v1
- | Ointoffloat, v1::nil => intoffloat v1
- | Ointuoffloat, v1::nil => intuoffloat v1
+ | Ointoffloat, v1::nil => intoffloat_total v1
+ | Ointuoffloat, v1::nil => intuoffloat_total v1
| Ofloatofint, v1::nil => floatofint v1
| Ofloatofintu, v1::nil => floatofintu v1
- | Ointofsingle, v1::nil => intofsingle v1
- | Ointuofsingle, v1::nil => intuofsingle v1
+ | Ointofsingle, v1::nil => intofsingle_total v1
+ | Ointuofsingle, v1::nil => intuofsingle_total v1
| Osingleofint, v1::nil => singleofint v1
| Osingleofintu, v1::nil => singleofintu v1
- | Olongoffloat, v1::nil => longoffloat v1
- | Olonguoffloat, v1::nil => longuoffloat v1
+ | Olongoffloat, v1::nil => longoffloat_total v1
+ | Olonguoffloat, v1::nil => longuoffloat_total v1
| Ofloatoflong, v1::nil => floatoflong v1
| Ofloatoflongu, v1::nil => floatoflongu v1
- | Olongofsingle, v1::nil => longofsingle v1
- | Olonguofsingle, v1::nil => longuofsingle v1
+ | Olongofsingle, v1::nil => longofsingle_total v1
+ | Olonguofsingle, v1::nil => longuofsingle_total v1
| Osingleoflong, v1::nil => singleoflong v1
| Osingleoflongu, v1::nil => singleoflongu v1