Merge branch 'kvx-test-prepass' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into aarch64-prepass+postpass

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-11-27 17:56:53 +0100
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-11-27 18:04:35 +0100
commit: ed78594947276264beea0b608c2a101d9f31b18f (patch)
tree: 2b4c04d9a6fc5e531f38e6b7f4810241cfe49896 /aarch64
parent: 63942e04b0fcb84d54f066122c31ca4c3aa99ad4 (diff)
parent: 43a7cc2a7305395b20d92b240362ddfdb43963ff (diff)
download: compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.tar.gz
compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.zip
10 files changed, 1056 insertions, 1545 deletions
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
+
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
+
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  {
+    rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  apply make_divimm_correct; auto. congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divuimm_correct; auto.
+  apply make_divuimm_correct; auto. congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divlimm_correct; auto.
+  apply make_divlimm_correct; auto. congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divluimm_correct; auto.
+  apply make_divluimm_correct; auto. congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/aarch64/InstructionScheduler.ml b/aarch64/InstructionScheduler.ml
deleted file mode 100644
index eab0b21a..00000000
--- a/aarch64/InstructionScheduler.ml
+++ /dev/null
@@ -1,1263 +0,0 @@
-(* *************************************************************)
-(*                                                             *)
-(*             The Compcert verified compiler                  *)
-(*                                                             *)
-(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
-(*           David Monniaux     CNRS, VERIMAG                  *)
-(*           Cyril Six          Kalray                         *)
-(*                                                             *)
-(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
-(*  This file is distributed under the terms of the INRIA      *)
-(*  Non-Commercial License Agreement.                          *)
-(*                                                             *)
-(* *************************************************************)
-
-let with_destructor dtor stuff f =
-  try let ret = f stuff in
-      dtor stuff;
-      ret
-  with exn -> dtor stuff;
-              raise exn;;
-
-let with_out_channel chan f = with_destructor close_out chan f;;
-let with_in_channel chan f = with_destructor close_in chan f;;
-
-(** Schedule instructions on a synchronized pipeline
-@author David Monniaux, CNRS, VERIMAG *)
-
-type latency_constraint = {
-    instr_from : int;
-    instr_to : int;
-    latency : int };;
-
-type problem = {
-  max_latency : int;
-  resource_bounds : int array;
-  instruction_usages : int array array;
-  latency_constraints : latency_constraint list;
-  };;
-
-let print_problem channel problem =
-  (if problem.max_latency >= 0
-   then Printf.fprintf channel "max makespan: %d\n" problem.max_latency);
-  output_string channel "resource bounds:";
-  (Array.iter (fun b -> Printf.fprintf channel " %d" b) problem.resource_bounds);
-  output_string channel ";\n";
-  (Array.iteri (fun i v ->
-       Printf.fprintf channel "instr%d:" i;
-       (Array.iter (fun b -> Printf.fprintf channel " %d" b) v);
-       output_string channel ";\n") problem.instruction_usages);
-  List.iter (fun instr ->
-      Printf.printf "t%d - t%d >= %d;\n"
-        instr.instr_to instr.instr_from instr.latency)
-    problem.latency_constraints;;
-
-let get_nr_instructions problem = Array.length problem.instruction_usages;;
-let get_nr_resources problem = Array.length problem.resource_bounds;;
-
-type solution = int array
-type scheduler = problem -> solution option
-
-(* DISABLED				    
-(** Schedule the problem optimally by constraint solving using the Gecode solver. *)
-external gecode_scheduler : problem -> solution option =
-  "caml_gecode_schedule_instr";;
- *)
-				     
-let maximum_slot_used times =
-  let maxi = ref (-1) in
-  for i=0 to (Array.length times)-2
-  do
-    maxi := max !maxi times.(i)
-  done;
-  !maxi;;
-
-let check_schedule (problem : problem) (times : solution) =
-  let nr_instructions = get_nr_instructions problem in
-  (if Array.length times <> nr_instructions+1
-   then failwith
-          (Printf.sprintf "check_schedule: %d times expected, got %d"
-            (nr_instructions+1) (Array.length times)));
-  (if problem.max_latency >= 0 && times.(nr_instructions)> problem.max_latency
-   then failwith "check_schedule: max_latency exceeded");
-  (Array.iteri (fun i time ->
-       (if time < 0
-        then failwith (Printf.sprintf "time[%d] < 0" i))) times);
-  let slot_resources = Array.init ((maximum_slot_used times)+1)
-                         (fun _ -> Array.copy problem.resource_bounds) in
-  for i=0 to nr_instructions -1
-  do
-    let remaining_resources = slot_resources.(times.(i))
-    and used_resources = problem.instruction_usages.(i) in
-    for resource=0 to (Array.length used_resources)-1
-    do
-      let after = remaining_resources.(resource) - used_resources.(resource) in
-      (if after < 0
-       then failwith (Printf.sprintf "check_schedule: instruction %d exceeds resource %d at slot %d" i resource times.(i)));
-      remaining_resources.(resource) <- after
-    done
-  done;
-  List.iter (fun ctr ->
-      if times.(ctr.instr_to) - times.(ctr.instr_from) < ctr.latency
-      then failwith (Printf.sprintf "check_schedule: time[%d]=%d - time[%d]=%d < %d"
-                       ctr.instr_to times.(ctr.instr_to)
-                       ctr.instr_from times.(ctr.instr_from)
-                       ctr.latency)
-    ) problem.latency_constraints;;
-
-let bound_max_time problem =
-  let total = ref(Array.length problem.instruction_usages) in
-  List.iter (fun ctr -> total := !total + ctr.latency) problem.latency_constraints;
-  !total;;
-
-let vector_less_equal a b =
-  try
-    Array.iter2 (fun x y ->
-        if x>y
-        then raise Exit) a b;
-    true
-  with Exit -> false;;
-
-let vector_subtract a b =
-  assert ((Array.length a) = (Array.length b));
-  for i=0 to (Array.length a)-1
-  do
-    b.(i) <- b.(i) - a.(i)
-  done;;
-
-(* The version with critical path ordering is much better! *)
-type list_scheduler_order =
-  | INSTRUCTION_ORDER
-  | CRITICAL_PATH_ORDER;;
-
-let int_max (x : int) (y : int) =
-  if x > y then x else y;;
-
-let int_min (x : int) (y : int) =
-  if x < y then x else y;;
-
-let get_predecessors problem =
-  let nr_instructions = get_nr_instructions problem in
-  let predecessors = Array.make (nr_instructions+1) [] in
-  List.iter (fun ctr ->
-      predecessors.(ctr.instr_to) <-
-        (ctr.instr_from, ctr.latency)::predecessors.(ctr.instr_to))
-    problem.latency_constraints;
-  predecessors;;
-
-let get_successors problem =
-  let nr_instructions = get_nr_instructions problem in
-  let successors = Array.make nr_instructions [] in
-  List.iter (fun ctr ->
-      successors.(ctr.instr_from) <-
-             (ctr.instr_to, ctr.latency)::successors.(ctr.instr_from))
-    problem.latency_constraints;
-  successors;;
-
-let critical_paths successors =
-  let nr_instructions = Array.length successors in
-  let path_lengths =  Array.make nr_instructions (-1) in
-  let rec compute i =
-    if i=nr_instructions then 0 else
-      match path_lengths.(i) with
-      | -2 -> failwith "InstructionScheduler: the dependency graph has cycles"
-      | -1 -> path_lengths.(i) <- -2;
-              let x = List.fold_left
-                        (fun cur (j, latency)-> int_max cur (latency+(compute j)))
-                        1 successors.(i)
-              in path_lengths.(i) <- x; x
-      | x -> x
-  in for i = nr_instructions-1 downto 0
-     do
-       ignore (compute i)
-     done;
-     path_lengths;;
-
-let maximum_critical_path problem =
-  let paths = critical_paths (get_successors problem) in
-  Array.fold_left int_max 0 paths;;
-
-let get_earliest_dates predecessors =
-  let nr_instructions = (Array.length predecessors)-1 in
-  let path_lengths =  Array.make (nr_instructions+1) (-1) in
-  let rec compute i =
-    match path_lengths.(i) with
-    | -2 -> failwith "InstructionScheduler: the dependency graph has cycles"
-    | -1 -> path_lengths.(i) <- -2;
-            let x = List.fold_left
-                      (fun cur (j, latency)-> int_max cur (latency+(compute j)))
-                      0 predecessors.(i)
-            in path_lengths.(i) <- x; x
-    | x -> x
-  in for i = 0 to nr_instructions
-     do
-       ignore (compute i)
-     done;
-     for i = 0 to nr_instructions - 1
-     do
-       path_lengths.(nr_instructions) <- int_max
-	 path_lengths.(nr_instructions) (1 + path_lengths.(i))
-     done;
-     path_lengths;;
-
-exception Unschedulable
-        
-let get_latest_dates deadline successors =
-  let nr_instructions = Array.length successors
-  and path_lengths = critical_paths successors in
-  Array.init (nr_instructions + 1)
-	     (fun i ->
-	      if i < nr_instructions then
-		let path_length = path_lengths.(i) in
-		assert (path_length >= 1);
-		(if path_length > deadline
-                 then raise Unschedulable);
-		deadline - path_length
-	      else deadline);;
-  
-let priority_list_scheduler (order : list_scheduler_order)
-      (problem : problem) :
-      solution option =
-  let nr_instructions = get_nr_instructions problem in
-  let successors = get_successors problem
-  and predecessors = get_predecessors problem
-  and times = Array.make (nr_instructions+1) (-1) in
-
-  let priorities = match order with
-    | INSTRUCTION_ORDER -> None
-    | CRITICAL_PATH_ORDER -> Some (critical_paths successors) in
-  
-  let module InstrSet =
-    Set.Make (struct type t=int
-                     let compare = match priorities with
-                       | None -> (fun x y -> x - y)
-                       | Some p -> (fun x y ->
-                         (match p.(y)-p.(x) with
-                          | 0 -> x - y
-                          | z -> z))
-              end) in
-
-  let max_time = bound_max_time problem in
-  let ready = Array.make max_time InstrSet.empty in
-  Array.iteri (fun i preds ->
-      if i<nr_instructions && preds=[]
-      then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
-  
-  let current_time = ref 0
-  and current_resources = Array.copy problem.resource_bounds
-  and earliest_time i =
-    try
-      let time = ref (-1) in
-      List.iter (fun (j, latency) ->
-          if times.(j) < 0
-          then raise Exit
-          else let t = times.(j) + latency in
-               if t > !time
-               then time := t) predecessors.(i);
-      assert(!time >= 0);
-      !time
-    with Exit -> -1
-               
-  in
-  let advance_time() =
-    begin
-      (if !current_time < max_time-1
-      then
-        begin
-          Array.blit problem.resource_bounds 0 current_resources 0
-            (Array.length current_resources);
-          ready.(!current_time + 1) <-
-            InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
-          ready.(!current_time) <- InstrSet.empty;
-        end);
-      incr current_time
-    end in
-
-  let attempt_scheduling ready usages =
-    let result = ref (-1) in
-    try
-      InstrSet.iter (fun i ->
-          (* Printf.printf "trying scheduling %d\n" i;
-        pr   int_vector usages.(i);
-        print           _vector current_resources; *)
-          if vector_less_equal usages.(i) current_resources
-          then
-            begin
-              vector_subtract usages.(i) current_resources;
-              result := i;
-              raise Exit
-            end) ready;
-      -1
-    with Exit -> !result in
-  
-  while !current_time < max_time
-  do
-    if (InstrSet.is_empty ready.(!current_time))
-     then advance_time()
-    else
-      match attempt_scheduling ready.(!current_time)
-              problem.instruction_usages with
-    | -1 -> advance_time()
-    | i ->
-       begin
-         assert(times.(i) < 0);
-         times.(i) <- !current_time;
-         ready.(!current_time) <- InstrSet.remove i (ready.(!current_time));
-         List.iter (fun (instr_to, latency) ->
-             if instr_to < nr_instructions then
-               match earliest_time instr_to with
-               | -1 -> ()
-               | to_time ->
-                  ready.(to_time) <- InstrSet.add instr_to ready.(to_time))
-           successors.(i);
-         successors.(i) <- []
-       end
-  done;
-  try
-    let final_time = ref (-1) in
-    for i=0 to nr_instructions-1
-    do
-      (if times.(i) < 0 then raise Exit);
-      (if !final_time < times.(i)+1 then final_time := times.(i)+1)
-    done;
-    List.iter (fun (i, latency) ->
-        let target_time = latency + times.(i) in
-        if target_time > !final_time
-        then final_time := target_time
-      ) predecessors.(nr_instructions);
-    times.(nr_instructions) <- !final_time;
-    Some times
-  with Exit -> None;;
-
-let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
-
-(* dummy code for placating ocaml's warnings *)
-let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
-
-type bundle = int list;;
-
-let rec extract_deps_to index = function
-  | [] -> []
-  | dep :: deps -> let extracts = extract_deps_to index deps in
-      if (dep.instr_to == index) then 
-        dep :: extracts
-      else
-        extracts
-
-exception InvalidBundle;;
-
-let dependency_check problem bundle index =
-  let index_deps = extract_deps_to index problem.latency_constraints in
-  List.iter (fun i -> 
-    List.iter (fun dep ->
-      if (dep.instr_from == i) then raise InvalidBundle
-    ) index_deps
-  ) bundle;;
-
-let rec make_bundle problem resources bundle index =
-  let resources_copy = Array.copy resources in
-  let nr_instructions = get_nr_instructions problem in
-  if (index >= nr_instructions) then (bundle, index+1) else
-  let inst_usage = problem.instruction_usages.(index) in
-  try match vector_less_equal inst_usage resources with
-  | false -> raise InvalidBundle
-  | true -> (
-      dependency_check problem bundle index;
-      vector_subtract problem.instruction_usages.(index) resources_copy;
-      make_bundle problem resources_copy (index::bundle) (index+1)
-      )
-  with InvalidBundle -> (bundle, index);;
-
-let rec make_bundles problem index : bundle list =
-  if index >= get_nr_instructions problem then
-    []
-  else
-    let (bundle, new_index) = make_bundle problem problem.resource_bounds [] index in
-    bundle :: (make_bundles problem new_index);;
-
-let bundles_to_schedule problem bundles : solution =
-  let nr_instructions = get_nr_instructions problem in
-  let schedule = Array.make (nr_instructions+1) (nr_instructions+4) in
-  let time = ref 0 in
-  List.iter (fun bundle ->
-    begin
-    List.iter (fun i ->
-      schedule.(i) <- !time
-    ) bundle;
-    time := !time + 1
-    end
-  ) bundles; schedule;;
-
-let greedy_scheduler (problem : problem) : solution option =
-  let bundles = make_bundles problem 0 in
-  Some (bundles_to_schedule problem bundles);;
-  
-(* alternate implementation
-let swap_array_elements a i j =
-  let x = a.(i) in
-  a.(i) <- a.(j);
-  a.(j) <- x;;
-
-let array_reverse_slice a first last =
-  let i = ref first and j = ref last in
-  while i < j
-  do
-    swap_array_elements a !i !j;
-    incr i;
-    decr j
-  done;;
-
-let array_reverse a =
-  let a' = Array.copy a in
-  array_reverse_slice a' 0 ((Array.length a)-1);
-  a';;
- *)
-
-(* unneeded
-let array_reverse a =
-  let n=Array.length a in
-  Array.init n (fun i -> a.(n-1-i));;
- *)
-
-let reverse_constraint nr_instructions ctr =
-  { instr_to = nr_instructions -ctr.instr_from;
-    instr_from =  nr_instructions - ctr.instr_to;
-    latency = ctr.latency };;
-
-(* unneeded
-let rec list_map_filter f = function
-  | [] ->  []                                      
-  | h::t ->
-     (match f h with
-      | None ->  list_map_filter f t
-      | Some x -> x :: (list_map_filter f t));;
- *)
-
-let reverse_problem problem =
-  let nr_instructions = get_nr_instructions problem in
-  {
-    max_latency = problem.max_latency;
-    resource_bounds = problem.resource_bounds;
-    instruction_usages = Array.init (nr_instructions + 1)
-      (fun i ->
-        if i=0
-        then Array.map (fun _ -> 0) problem.resource_bounds                             else problem.instruction_usages.(nr_instructions - i));
-    latency_constraints = List.map (reverse_constraint nr_instructions)
-                            problem.latency_constraints
-  };;
-
-let max_scheduled_time solution =
-  let time = ref (-1) in
-  for i = 0 to ((Array.length solution) - 2)
-  do
-    time := max !time solution.(i)
-  done;
-  !time;;
-
-(*
-let recompute_makespan problem solution =
-  let n = (Array.length solution) - 1 and ms = ref 0 in
-  List.iter (fun cstr ->
-      if cstr.instr_to = n
-      then ms := max !ms (solution.(cstr.instr_from) + cstr.latency) 
-    ) problem.latency_constraints;
-  !ms;;
- *)
-
-let schedule_reversed (scheduler : problem -> solution option)
-    (problem : problem) =
-  match scheduler (reverse_problem problem) with
-  | None -> None
-  | Some solution ->
-     let nr_instructions = get_nr_instructions problem in
-     let makespan = max_scheduled_time solution in
-     let ret = Array.init (nr_instructions + 1)
-                 (fun i -> makespan-solution.(nr_instructions-i)) in
-     ret.(nr_instructions) <- max ((max_scheduled_time ret) + 1)
-                                (ret.(nr_instructions));
-     Some ret;;
-
-(** Schedule the problem using a greedy list scheduling algorithm, from the end. *)
-let reverse_list_scheduler = schedule_reversed list_scheduler;;
-
-let check_problem problem =
-  (if (Array.length problem.instruction_usages) < 1
-   then failwith "length(problem.instruction_usages) < 1");;
-
-let validated_scheduler (scheduler : problem -> solution option)
-      (problem : problem) =
-  check_problem problem;
-  match scheduler problem with
-  | None -> None
-  | (Some solution) as ret -> check_schedule problem solution; ret;;
-
-let get_max_latency solution =
-  solution.((Array.length solution)-1);;
-  
-let show_date_ranges problem =
-  let deadline = problem.max_latency in
-  assert(deadline >= 0);
-  let successors = get_successors problem
-  and predecessors = get_predecessors problem in
-  let earliest_dates : int array = get_earliest_dates predecessors
-  and latest_dates : int array = get_latest_dates deadline successors in
-  assert ((Array.length earliest_dates) =
-            (Array.length latest_dates));
-  Array.iteri (fun i early ->
-      let late = latest_dates.(i) in
-      Printf.printf "t[%d] in %d..%d\n" i early late)
-    earliest_dates;;
-
-type pseudo_boolean_problem_type =
-  | SATISFIABILITY
-  | OPTIMIZATION;;
-
-type pseudo_boolean_mapper = {
-  mapper_pb_type : pseudo_boolean_problem_type;
-  mapper_nr_instructions : int;
-  mapper_nr_pb_variables : int;
-  mapper_earliest_dates : int array;
-  mapper_latest_dates : int array;
-  mapper_var_offsets : int array;
-  mapper_final_predecessors : (int * int) list
-};;
-
-(* Latency constraints are:
-  presence of instr-to at each t <= sum of presences of instr-from at compatible times
-  
-  if reverse_encoding
-  presence of instr-from at each t <= sum of presences of instr-to at compatible times *)
-
-(* Experiments show reverse_encoding=true multiplies time by 2 in sat4j
-   without making hard instances easier *)
-let direct_encoding = false
-and reverse_encoding = false
-and delta_encoding = true
-                   
-let pseudo_boolean_print_problem channel problem pb_type =
-  let deadline = problem.max_latency in
-  assert (deadline > 0);
-  let nr_instructions = get_nr_instructions problem
-  and nr_resources = get_nr_resources problem
-  and successors = get_successors problem
-  and predecessors = get_predecessors problem in
-  let earliest_dates = get_earliest_dates predecessors
-  and latest_dates = get_latest_dates deadline successors in
-  let var_offsets = Array.make
-                      (match pb_type with
-                       | OPTIMIZATION -> nr_instructions+1
-                       | SATISFIABILITY -> nr_instructions) 0 in
-  let nr_pb_variables =
-    (let nr = ref 0 in
-     for i=0 to (match pb_type with
-                 | OPTIMIZATION -> nr_instructions
-                 | SATISFIABILITY -> nr_instructions-1)
-     do
-        var_offsets.(i) <- !nr;
-        nr := !nr + latest_dates.(i) - earliest_dates.(i) + 1
-     done;
-     !nr)
-  and nr_pb_constraints =
-    (match pb_type with
-     | OPTIMIZATION -> nr_instructions+1
-     | SATISFIABILITY -> nr_instructions) +
-
-    (let count = ref 0 in
-     for t=0 to deadline-1
-     do
-       for j=0 to nr_resources-1
-       do
-         try
-           for i=0 to nr_instructions-1
-           do
-             let usage = problem.instruction_usages.(i).(j) in
-             if t >= earliest_dates.(i) && t <= latest_dates.(i)
-                && usage > 0 then raise Exit
-           done
-         with Exit -> incr count
-       done
-     done;
-     !count) +
-      
-     (let count=ref 0 in
-      List.iter
-        (fun ctr ->
-          if ctr.instr_to < nr_instructions
-          then count := !count + 1 + latest_dates.(ctr.instr_to)
-                                   - earliest_dates.(ctr.instr_to)
-                    + (if reverse_encoding
-                       then 1 + latest_dates.(ctr.instr_from)
-                -      earliest_dates.(ctr.instr_from)
-                       else 0)
-        )
-        problem.latency_constraints;
-      !count) +
-      
-      (match pb_type with
-       | OPTIMIZATION -> (1 + deadline - earliest_dates.(nr_instructions)) * nr_instructions
-       | SATISFIABILITY -> 0)
-  and measured_nr_constraints = ref 0 in
-
-  let pb_var i t =
-    assert(t >= earliest_dates.(i));
-    assert(t <= latest_dates.(i));
-    let v = 1+var_offsets.(i)+t-earliest_dates.(i) in
-    assert(v <= nr_pb_variables);
-    Printf.sprintf "x%d" v in
-  
-  let end_constraint () =
-    begin
-      output_string channel ";\n";
-      incr measured_nr_constraints
-    end in
-
-  let gen_latency_constraint i_to i_from latency t_to =
-    Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n"
-		   i_to i_from latency i_to t_to;
-        for t_from=earliest_dates.(i_from) to
-              int_min latest_dates.(i_from) (t_to - latency)
-        do
-          Printf.fprintf channel "+1 %s " (pb_var i_from t_from)
-        done;
-        Printf.fprintf channel "-1 %s " (pb_var i_to t_to);
-        Printf.fprintf channel ">= 0";
-        end_constraint()
-
-  and gen_dual_latency_constraint i_to i_from latency t_from =
-    Printf.fprintf channel "* t[%d] - t[%d] >= %d when t[%d]=%d\n"
-		   i_to i_from latency i_to t_from;
-        for t_to=int_max earliest_dates.(i_to) (t_from + latency)
-            to latest_dates.(i_to)
-        do
-          Printf.fprintf channel "+1 %s " (pb_var i_to t_to)
-        done;
-        Printf.fprintf channel "-1 %s " (pb_var i_from t_from);
-        Printf.fprintf channel ">= 0";
-        end_constraint()
-  in
-  
-  Printf.fprintf channel "* #variable= %d #constraint= %d\n" nr_pb_variables nr_pb_constraints;
-  Printf.fprintf channel "* nr_instructions=%d deadline=%d\n" nr_instructions deadline;
-  begin
-    match pb_type with
-    | SATISFIABILITY -> ()
-    | OPTIMIZATION ->
-       output_string channel "min:";
-       for t=earliest_dates.(nr_instructions) to deadline
-       do
-         Printf.fprintf channel " %+d %s" t (pb_var nr_instructions t)
-       done;
-       output_string channel ";\n";
-  end;
-  for i=0 to (match pb_type with
-              | OPTIMIZATION -> nr_instructions
-              | SATISFIABILITY -> nr_instructions-1)
-  do
-    let early = earliest_dates.(i) and late= latest_dates.(i) in
-    Printf.fprintf channel "* t[%d] in %d..%d\n" i early late;
-    for t=early to late
-    do
-      Printf.fprintf channel "+1 %s " (pb_var i t)
-    done;
-    Printf.fprintf channel "= 1";
-    end_constraint()
-  done;
-
-  for t=0 to deadline-1
-  do
-    for j=0 to nr_resources-1
-    do
-      let bound = problem.resource_bounds.(j)
-      and coeffs = ref [] in
-      for i=0 to nr_instructions-1
-      do
-        let usage = problem.instruction_usages.(i).(j) in
-        if t >= earliest_dates.(i) && t <= latest_dates.(i)
-           && usage > 0
-        then coeffs := (i, usage) :: !coeffs 
-      done;
-      if !coeffs <> [] then
-        begin
-          Printf.fprintf channel "* resource #%d at t=%d <= %d\n" j t bound;
-          List.iter (fun (i, usage) ->
-              Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs;
-          Printf.fprintf channel ">= %d" (-bound);
-          end_constraint();
-        end
-    done
-  done;
-  
-  List.iter
-    (fun ctr ->
-      if ctr.instr_to < nr_instructions then
-        begin
-          for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to)
-          do
-	    gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to
-          done;
-          if reverse_encoding
-          then
-            for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from)
-            do
-	      gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from
-            done
-        end
-    ) problem.latency_constraints;
-  
-  begin
-    match pb_type with
-    | SATISFIABILITY -> ()
-    | OPTIMIZATION ->
-       let final_latencies = Array.make nr_instructions 1 in
-       List.iter (fun (i, latency) ->
-	   final_latencies.(i) <- int_max final_latencies.(i) latency)
-	 predecessors.(nr_instructions);
-       for t_to=earliest_dates.(nr_instructions) to deadline
-       do
-         for i_from = 0 to nr_instructions -1
-         do
-	   gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to
-         done
-       done
-  end;
-  assert (!measured_nr_constraints = nr_pb_constraints);
-  {
-    mapper_pb_type = pb_type;
-    mapper_nr_instructions = nr_instructions;
-    mapper_nr_pb_variables = nr_pb_variables;
-    mapper_earliest_dates = earliest_dates;
-    mapper_latest_dates = latest_dates;
-    mapper_var_offsets = var_offsets;
-    mapper_final_predecessors = predecessors.(nr_instructions)
-  };;
-
-type pb_answer =
-  | Positive
-  | Negative
-  | Unknown
-      
-let line_to_pb_solution sol line nr_pb_variables =
-  let assign s v =
-    begin
-      let i = int_of_string s in
-      sol.(i-1) <- v
-    end in
-  List.iter
-    begin
-      function "" -> ()
-	     | item ->
-		(match String.get item 0 with
-		 | '+' ->
-		    assert ((String.length item) >= 3);
-		    assert ((String.get item 1) = 'x');
-		    assign (String.sub item 2 ((String.length item)-2)) Positive
-		 | '-' ->
-		    assert ((String.length item) >= 3);
-		    assert ((String.get item 1) = 'x');
-		    assign (String.sub item 2 ((String.length item)-2)) Negative
-		 | 'x' ->
-		    assert ((String.length item) >= 2);
-		    assign (String.sub item 1 ((String.length item)-1)) Positive
-                 | _ -> failwith "syntax error in pseudo Boolean solution: epected + - or x" 
-		)
-    end
-    (String.split_on_char ' ' (String.sub line 2 ((String.length line)-2)));;
-
-let pb_solution_to_schedule mapper pb_solution =
-  Array.mapi (fun i offset ->
-	      let first = mapper.mapper_earliest_dates.(i)
-	      and last = mapper.mapper_latest_dates.(i)
-	      and time = ref (-1) in
-	      for t=first to last
-	      do
-		match pb_solution.(t - first + offset) with
-		| Positive ->
-		   (if !time = -1
-		    then time:=t
-		    else failwith "duplicate time in pseudo boolean solution")
-		| Negative -> ()
-		| Unknown -> failwith "unknown value in pseudo boolean solution"
-	      done;
-	      (if !time = -1
-	       then failwith "no time in pseudo boolean solution");
-	      !time
-	    ) mapper.mapper_var_offsets;;
-  
-let pseudo_boolean_read_solution mapper channel =
-  let optimum = ref (-1)
-  and optimum_found = ref false
-  and solution = Array.make mapper.mapper_nr_pb_variables Unknown in
-  try
-    while true do
-      match input_line channel with
-      | "" -> ()
-      | line ->
-	 begin
-	   match String.get line 0 with
-	   | 'c' -> ()
-	   | 'o' ->
-	      assert ((String.length line) >= 2);
-	      assert ((String.get line 1) = ' ');
-	      optimum := int_of_string (String.sub line 2 ((String.length line)-2))
-	   | 's' -> (match line with
-		     | "s OPTIMUM FOUND" -> optimum_found := true
-		     | "s SATISFIABLE" -> ()
-		     | "s UNSATISFIABLE" -> close_in channel;
-		                            raise Unschedulable
-                     | _ -> failwith line)
-	   | 'v' -> line_to_pb_solution solution line mapper.mapper_nr_pb_variables
-	   | x -> Printf.printf "unknown: %s\n" line
-	 end
-    done;
-    assert false
-  with End_of_file ->
-       close_in channel;
-       begin
-	 let sol = pb_solution_to_schedule mapper solution in
-	 sol
-       end;;
-
-let recompute_max_latency mapper solution =
-  let maxi = ref (-1) in
-  for i=0 to (mapper.mapper_nr_instructions-1)
-  do
-    maxi := int_max !maxi (1+solution.(i))
-  done;
-  List.iter (fun (i, latency) ->
-      maxi := int_max !maxi (solution.(i) + latency)) mapper.mapper_final_predecessors;
-  !maxi;;
-  
-let adjust_check_solution mapper solution =
-  match mapper.mapper_pb_type with
-  | OPTIMIZATION ->
-     let max_latency = recompute_max_latency mapper solution in
-     assert (max_latency = solution.(mapper.mapper_nr_instructions));
-     solution
-  | SATISFIABILITY ->
-     let max_latency = recompute_max_latency mapper solution in
-     Array.init (mapper.mapper_nr_instructions+1)
-       (fun i -> if i < mapper.mapper_nr_instructions
-                 then solution.(i)
-                 else max_latency);;
-
-(* let pseudo_boolean_solver = ref "/local/monniaux/progs/naps/naps" *)
-(* let pseudo_boolean_solver = ref "/local/monniaux/packages/sat4j/org.sat4j.pb.jar CuttingPlanes" *)
-
-(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar CuttingPlanes" *)
-(* let pseudo_boolean_solver = ref "java -jar /usr/share/java/org.sat4j.pb.jar" *)
-(* let pseudo_boolean_solver = ref "clasp" *)
-(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/open-wbo/open-wbo_static -formula=1" *)
-(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/naps/naps" *)
-(* let pseudo_boolean_solver = ref "/home/monniaux/progs/CP/minisatp/build/release/bin/minisatp" *)
-(* let pseudo_boolean_solver = ref "java -jar sat4j-pb.jar CuttingPlanesStar" *)
-let pseudo_boolean_solver = ref "pb_solver"
-
-let pseudo_boolean_scheduler pb_type problem =
-  try
-    let filename_in = "problem.opb" in
-    (* needed only if not using stdout and filename_out = "problem.sol" *)
-    let mapper =
-      with_out_channel (open_out filename_in)
-        (fun opb_problem ->
-          pseudo_boolean_print_problem opb_problem problem pb_type) in
-    Some (with_in_channel
-      (Unix.open_process_in (!pseudo_boolean_solver ^ " " ^ filename_in))
-      (fun opb_solution -> adjust_check_solution mapper (pseudo_boolean_read_solution mapper opb_solution)))
-  with
-  | Unschedulable -> None;;
-				 
-let rec reoptimizing_scheduler (scheduler : scheduler) (previous_solution : solution) (problem : problem) =
-  if (get_max_latency previous_solution)>1 then
-    begin
-      Printf.printf "reoptimizing < %d\n" (get_max_latency previous_solution);
-      flush stdout;
-      match scheduler
-              { problem with max_latency = (get_max_latency previous_solution)-1 }
-      with
-      | None -> previous_solution
-      | Some solution -> reoptimizing_scheduler scheduler solution problem
-    end
-  else previous_solution;;
-
-let smt_var i = Printf.sprintf "t%d" i
-
-let is_resource_used problem j =
-  try
-    Array.iter (fun usages ->
-        if usages.(j) > 0
-        then raise Exit) problem.instruction_usages;
-    false
-  with Exit -> true;;
-
-let smt_use_quantifiers = false
-                        
-let smt_print_problem channel problem =
-  let nr_instructions = get_nr_instructions problem in
-  let gen_smt_resource_constraint time j =
-        output_string channel "(<= (+";
-        Array.iteri
-          (fun i usages ->
-            let usage=usages.(j) in
-            if usage > 0
-            then Printf.fprintf channel " (ite (= %s %s) %d 0)"
-                   time (smt_var i) usage)
-          problem.instruction_usages;
-        Printf.fprintf channel ") %d)" problem.resource_bounds.(j)
-  in
-  output_string channel "(set-option :produce-models true)\n"; 
-  for i=0 to nr_instructions
-  do
-    Printf.fprintf channel "(declare-const %s Int)\n" (smt_var i);
-    Printf.fprintf channel "(assert (>= %s 0))\n" (smt_var i)
-  done;
-  for i=0 to nr_instructions-1
-  do
-    Printf.fprintf channel "(assert (< %s %s))\n"
-      (smt_var i) (smt_var nr_instructions)
-  done;
-  (if problem.max_latency > 0
-   then Printf.fprintf channel "(assert (<= %s %d))\n"
-          (smt_var nr_instructions) problem.max_latency);
-  List.iter (fun ctr ->
-      Printf.fprintf channel "(assert (>= (- %s %s) %d))\n"
-        (smt_var ctr.instr_to)
-        (smt_var ctr.instr_from)
-        ctr.latency) problem.latency_constraints;
-  for j=0 to (Array.length problem.resource_bounds)-1
-  do
-    if is_resource_used problem j
-    then
-      begin
-        if smt_use_quantifiers
-        then
-          begin
-            Printf.fprintf channel
-              "; resource #%d <= %d\n(assert (forall ((t Int)) "
-              j problem.resource_bounds.(j);
-            gen_smt_resource_constraint "t" j;
-            output_string channel "))\n"
-          end
-        else
-          begin
-            (if problem.max_latency < 0
-             then failwith "quantifier explosion needs max latency");
-            for t=0 to problem.max_latency
-            do
-              Printf.fprintf channel
-                "; resource #%d <= %d at t=%d\n(assert "
-                j problem.resource_bounds.(j) t;
-              gen_smt_resource_constraint (string_of_int t) j;
-              output_string channel ")\n"
-            done
-          end
-      end
-  done;
-  output_string channel "(check-sat)(get-model)\n";;
-      
-                  
-let ilp_print_problem channel problem pb_type =
-  let deadline = problem.max_latency in
-  assert (deadline > 0);
-  let nr_instructions = get_nr_instructions problem
-  and nr_resources = get_nr_resources problem
-  and successors = get_successors problem
-  and predecessors = get_predecessors problem in
-  let earliest_dates = get_earliest_dates predecessors
-  and latest_dates = get_latest_dates deadline successors in
-
-  let pb_var i t =
-    Printf.sprintf "x%d_%d" i t in
-
-  let gen_latency_constraint i_to i_from latency t_to =
-    Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n"
-		   i_to i_from latency i_to t_to;
-    Printf.fprintf channel "c_%d_%d_%d_%d: "
-		   i_to i_from latency t_to;
-        for t_from=earliest_dates.(i_from) to
-              int_min latest_dates.(i_from) (t_to - latency)
-        do
-          Printf.fprintf channel "+1 %s " (pb_var i_from t_from)
-        done;
-        Printf.fprintf channel "-1 %s " (pb_var i_to t_to);
-        output_string channel ">= 0\n"
-
-  and gen_dual_latency_constraint i_to i_from latency t_from =
-    Printf.fprintf channel "\\ t[%d] - t[%d] >= %d when t[%d]=%d\n"
-      i_to i_from latency i_to t_from;
-    Printf.fprintf channel "d_%d_%d_%d_%d: "
-      i_to i_from latency t_from;
-        for t_to=int_max earliest_dates.(i_to) (t_from + latency)
-            to latest_dates.(i_to)
-        do
-          Printf.fprintf channel "+1 %s " (pb_var i_to t_to)
-        done;
-        Printf.fprintf channel "-1 %s " (pb_var i_from t_from);
-        Printf.fprintf channel ">= 0\n"
-
-  and gen_delta_constraint i_from i_to latency =
-    if delta_encoding
-    then  Printf.fprintf channel "l_%d_%d_%d: +1 t%d -1 t%d >= %d\n"
-            i_from i_to latency i_to i_from latency
-
-  in
-  
-  Printf.fprintf channel "\\ nr_instructions=%d deadline=%d\n" nr_instructions deadline;
-  begin
-    match pb_type with
-    | SATISFIABILITY -> output_string channel "Minimize dummy: 0\n"
-    | OPTIMIZATION ->
-       Printf.fprintf channel "Minimize\nmakespan: t%d\n" nr_instructions
-  end;
-  output_string channel "Subject To\n";
-  for i=0 to (match pb_type with
-              | OPTIMIZATION -> nr_instructions
-              | SATISFIABILITY -> nr_instructions-1)
-  do
-    let early = earliest_dates.(i) and late= latest_dates.(i) in
-    Printf.fprintf channel "\\ t[%d] in %d..%d\ntimes%d: " i early late i;
-    for t=early to late
-    do
-      Printf.fprintf channel "+1 %s " (pb_var i t)
-    done;
-    Printf.fprintf channel "= 1\n"
-  done;
-
-  for t=0 to deadline-1
-  do
-    for j=0 to nr_resources-1
-    do
-      let bound = problem.resource_bounds.(j)
-      and coeffs = ref [] in
-      for i=0 to nr_instructions-1
-      do
-        let usage = problem.instruction_usages.(i).(j) in
-        if t >= earliest_dates.(i) && t <= latest_dates.(i)
-           && usage > 0
-        then coeffs := (i, usage) :: !coeffs 
-      done;
-      if !coeffs <> [] then
-        begin
-          Printf.fprintf channel "\\ resource #%d at t=%d <= %d\nr%d_%d: " j t bound j t;
-          List.iter (fun (i, usage) ->
-              Printf.fprintf channel "%+d %s " (-usage) (pb_var i t)) !coeffs;
-          Printf.fprintf channel ">= %d\n" (-bound)
-        end
-    done
-  done;
-  
-  List.iter
-    (fun ctr ->
-      if ctr.instr_to < nr_instructions then
-        begin
-          gen_delta_constraint ctr.instr_from ctr.instr_to ctr.latency;
-          begin
-            if direct_encoding
-            then
-              for t_to=earliest_dates.(ctr.instr_to) to latest_dates.(ctr.instr_to)
-              do
-	        gen_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_to
-              done
-          end;
-          begin
-            if reverse_encoding
-            then
-              for t_from=earliest_dates.(ctr.instr_from) to latest_dates.(ctr.instr_from)
-              do
-	        gen_dual_latency_constraint ctr.instr_to ctr.instr_from ctr.latency t_from
-              done
-          end
-        end
-    ) problem.latency_constraints;
-  
-  begin
-    match pb_type with
-    | SATISFIABILITY -> ()
-    | OPTIMIZATION ->
-       let final_latencies = Array.make nr_instructions 1 in
-       List.iter (fun (i, latency) ->
-	   final_latencies.(i) <- int_max final_latencies.(i) latency)
-	 predecessors.(nr_instructions);
-       for i_from = 0 to nr_instructions -1
-       do
-	 gen_delta_constraint i_from nr_instructions final_latencies.(i_from)
-       done;
-       for t_to=earliest_dates.(nr_instructions) to deadline
-       do
-         for i_from = 0 to nr_instructions -1
-         do
-	   gen_latency_constraint nr_instructions i_from final_latencies.(i_from) t_to
-         done
-       done
-  end;
-  for i=0 to (match pb_type with
-              | OPTIMIZATION -> nr_instructions
-              | SATISFIABILITY -> nr_instructions-1)
-  do
-    Printf.fprintf channel "ct%d : -1 t%d" i i;
-    let early = earliest_dates.(i) and late= latest_dates.(i) in
-    for t=early to late do
-      Printf.fprintf channel " +%d %s" t (pb_var i t)
-    done;
-    output_string channel " = 0\n"
-  done;
-  output_string channel "Bounds\n";
-  for i=0 to (match pb_type with
-              | OPTIMIZATION -> nr_instructions
-              | SATISFIABILITY -> nr_instructions-1)
-  do
-    let early = earliest_dates.(i) and late= latest_dates.(i) in
-    begin
-      Printf.fprintf channel "%d <= t%d <= %d\n" early i late;
-      if true then
-        for t=early to late do
-          Printf.fprintf channel "0 <= %s <= 1\n" (pb_var i t)
-        done
-    end
-  done;
-  output_string channel "Integer\n";
-  for i=0 to (match pb_type with
-              | OPTIMIZATION -> nr_instructions
-              | SATISFIABILITY -> nr_instructions-1)
-  do
-    Printf.fprintf channel "t%d " i
-  done;
-  output_string channel "\nBinary\n";
-  for i=0 to (match pb_type with
-              | OPTIMIZATION -> nr_instructions
-              | SATISFIABILITY -> nr_instructions-1)
-  do
-    let early = earliest_dates.(i) and late= latest_dates.(i) in
-    for t=early to late do
-      output_string channel (pb_var i t);
-      output_string channel " "
-    done;
-    output_string channel "\n"
-  done;
-  output_string channel "End\n";
-  {
-    mapper_pb_type = pb_type;
-    mapper_nr_instructions = nr_instructions;
-    mapper_nr_pb_variables = 0;
-    mapper_earliest_dates = earliest_dates;
-    mapper_latest_dates = latest_dates;
-    mapper_var_offsets = [| |];
-    mapper_final_predecessors = predecessors.(nr_instructions)
-  };;
-
-(* Guess what? Cplex sometimes outputs 11.000000004 instead of integer 11 *)
-
-let positive_float_round x = truncate (x +. 0.5)
-                           
-let float_round (x : float) : int =
-  if x > 0.0
-  then positive_float_round x
-  else - (positive_float_round (-. x))
-  
-let rounded_int_of_string x =  float_round (float_of_string x)
-                             
-let ilp_read_solution mapper channel =
-  let times = Array.make
-                (match mapper.mapper_pb_type with
-                 | OPTIMIZATION -> 1+mapper.mapper_nr_instructions
-                 | SATISFIABILITY -> mapper.mapper_nr_instructions) (-1) in
-  try
-    while true do
-      let line = input_line channel in
-      ( if (String.length line) < 3
-        then failwith (Printf.sprintf "bad ilp output: length(line) < 3: %s" line));
-      match String.get line 0 with
-      | 'x' -> ()
-      | 't' -> let space =
-                 try String.index line ' '
-                 with Not_found ->
-                   failwith "bad ilp output: no t variable number"
-               in
-               let tnumber =
-                 try int_of_string (String.sub line 1 (space-1))
-                 with Failure _ ->
-                   failwith "bad ilp output: not a variable number"
-               in
-               (if tnumber < 0 || tnumber >= (Array.length times)
-                then failwith (Printf.sprintf "bad ilp output: not a correct variable number: %d (%d)" tnumber (Array.length times)));
-               let value =
-                 let s = String.sub line (space+1) ((String.length line)-space-1) in
-                 try rounded_int_of_string s
-                 with Failure _ ->
-                   failwith (Printf.sprintf "bad ilp output: not a time number (%s)" s)
-               in
-               (if value < 0
-                then failwith "bad ilp output: negative time");
-               times.(tnumber) <- value
-      | '#' -> ()
-      | '0' -> ()
-      | _ -> failwith (Printf.sprintf "bad ilp output: bad variable initial, line = %s" line)
-    done;
-    assert false
-  with End_of_file ->
-    Array.iteri (fun i x ->
-        if i<(Array.length times)-1
-           && x<0 then raise Unschedulable) times;
-    times;;
-
-let ilp_solver = ref "ilp_solver"
-
-let problem_nr = ref 0
-
-let ilp_scheduler pb_type problem =
-  try
-    let filename_in = Printf.sprintf  "problem%05d.lp" !problem_nr
-    and filename_out = Printf.sprintf "problem%05d.sol" !problem_nr in
-    incr problem_nr;
-    let mapper = with_out_channel (open_out filename_in)
-      (fun opb_problem -> ilp_print_problem opb_problem problem pb_type) in
-
-    begin
-      match Unix.system (!ilp_solver ^ " " ^ filename_in ^ " " ^ filename_out) with
-      | Unix.WEXITED 0 ->
-         Some (with_in_channel
-                 (open_in filename_out)
-                 (fun opb_solution ->
-                   adjust_check_solution mapper
-                     (ilp_read_solution mapper opb_solution)))
-      | Unix.WEXITED _ -> failwith "failed to start ilp solver"
-      | _ -> None
-    end
-  with
-  | Unschedulable -> None;;
-
-let current_utime_all () =
-  let t = Unix.times() in
-  t.Unix.tms_cutime +. t.Unix.tms_utime;;
-
-let utime_all_fn fn arg =
-  let utime_start = current_utime_all () in
-  let output = fn arg in
-  let utime_end = current_utime_all () in
-  (output, utime_end -. utime_start);;
-    
-let cascaded_scheduler (problem : problem) =
-  let (some_initial_solution, list_scheduler_time) =
-    utime_all_fn (validated_scheduler list_scheduler) problem in
-  match some_initial_solution with
-  | None -> None
-  | Some initial_solution ->
-     let (solution, reoptimizing_time) = utime_all_fn (reoptimizing_scheduler (validated_scheduler (ilp_scheduler SATISFIABILITY)) initial_solution) problem in
-     begin
-       let latency2 = get_max_latency solution
-       and latency1 = get_max_latency initial_solution in
-       Printf.printf "postpass %s: %d, %d, %d, %g, %g\n"
-		     (if latency2 < latency1 then "REOPTIMIZED" else "unchanged")
-		     (get_nr_instructions problem)
-		     latency1 latency2
-		     list_scheduler_time reoptimizing_time;
-       flush stdout
-     end;
-     Some solution;;
-
-let scheduler_by_name name =
-  match name with
-  | "ilp" -> validated_scheduler cascaded_scheduler
-  | "list" -> validated_scheduler list_scheduler
-  | "revlist" -> validated_scheduler reverse_list_scheduler
-  | "greedy" -> greedy_scheduler
-  | s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/aarch64/InstructionScheduler.mli b/aarch64/InstructionScheduler.mli
deleted file mode 100644
index 85e2a5c6..00000000
--- a/aarch64/InstructionScheduler.mli
+++ /dev/null
@@ -1,113 +0,0 @@
-(** Schedule instructions on a synchronized pipeline
-by David Monniaux, CNRS, VERIMAG *)
-
-(** A latency constraint: instruction number [instr_to] should be scheduled at least [latency] clock ticks before [instr_from].
-
-It is possible to specify [latency]=0, meaning that [instr_to] can be scheduled at the same clock tick as [instr_from], but not before.
-
-[instr_to] can be the special value equal to the number of instructions, meaning that it refers to the final output latency. *)
-type latency_constraint = {
-  instr_from : int;
-  instr_to : int;
-  latency : int;
-  }
-                        
-(** A scheduling problem.
-
-In addition to the latency constraints, the resource constraints should be satisfied: at every clock tick, the sum of vectors of resources used by the instructions scheduled at that tick does not exceed the resource bounds.
-*)
-type problem = {
-    max_latency : int;
-    (** An optional maximal total latency of the problem, after which the problem is deemed not schedulable. -1 means there should be no maximum. *)
-
-    resource_bounds : int array;
-    (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
-
-    instruction_usages: int array array;
-    (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
-
-    latency_constraints : latency_constraint list
-    (** The latency constraints that must be satisfied *)
-  };;
-
-(** Print problem for human readability. *)
-val print_problem : out_channel -> problem -> unit;;
-
-(** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *)
-type solution = int array
-              
-(** A scheduling algorithm.
-The return value [Some x] is a solution [x].
-[None] means that scheduling failed. *)
-type scheduler = problem -> solution option;;
-
-(* DISABLED
-(** Schedule the problem optimally by constraint solving using the Gecode solver. *)
-external gecode_scheduler : problem -> solution option
-  = "caml_gecode_schedule_instr"
- *)
-  
-(** Get the number the last scheduling time used for an instruction in a solution.
-@return The last clock tick used *)
-val maximum_slot_used : solution -> int
-
-(** Validate that a solution is truly a solution of a scheduling problem.
-@raise Failure if validation fails *)
-val check_schedule : problem -> solution -> unit
-
-(** Schedule the problem using a greedy list scheduling algorithm, from the start.
-The first (according to instruction ordering) instruction that is ready (according to the latency constraints) is scheduled at the current clock tick.
-Once a clock tick is full go to the next.
-
-@return [Some solution] when a solution is found, [None] if not. *)
-val list_scheduler : problem -> solution option
-
-(** Schedule the problem using the order of instructions without any reordering *)
-val greedy_scheduler : problem -> solution option
-
-(** Schedule a problem using a scheduler applied in the opposite direction, e.g. for list scheduling from the end instead of the start. BUGGY *)
-val schedule_reversed : scheduler -> problem -> int array option
-
-(** Schedule a problem from the end using a list scheduler. BUGGY *)
-val reverse_list_scheduler : problem -> int array option
-
-(** Check that a problem is well-formed.
-@raise Failure if validation fails *)
-val check_problem : problem -> unit
-  
-(** Apply a scheduler and validate the result against the input problem.
-@return The solution found
-@raise Failure if validation fails *)
-val validated_scheduler : scheduler -> problem -> solution option;;
-
-(** Get max latency from solution
-@return Max latency *)
-val get_max_latency : solution -> int;;
-
-(** Get the length of a maximal critical path
-@return Max length *)
-val maximum_critical_path : problem -> int;;
-
-(** Apply line scheduler then advanced solver
-@return A solution if found *)
-val cascaded_scheduler : problem -> solution option;;
-
-val show_date_ranges : problem -> unit;;
-
-type pseudo_boolean_problem_type =
-  | SATISFIABILITY
-  | OPTIMIZATION;;
-
-type pseudo_boolean_mapper
-val pseudo_boolean_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;;
-val pseudo_boolean_read_solution : pseudo_boolean_mapper -> in_channel -> solution;;
-val pseudo_boolean_scheduler : pseudo_boolean_problem_type -> problem -> solution option;;
-
-val smt_print_problem : out_channel -> problem -> unit;;
-
-val ilp_print_problem : out_channel -> problem -> pseudo_boolean_problem_type -> pseudo_boolean_mapper;;
-
-val ilp_scheduler : pseudo_boolean_problem_type -> problem -> solution option;;
-
-(** Schedule a problem using a scheduler given by a string name *)
-val scheduler_by_name : string -> problem -> int array option;;
diff --git a/aarch64/Op.v b/aarch64/Op.v
index f720e545..f2a8e6fb 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
   | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
   | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
   | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
   | Osext s, v1 :: nil => Some (Val.sign_ext s v1)
   | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
   | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
-  | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+  | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
   | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
   | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
   | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
   | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
   | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
   | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
-  | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
   | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
   | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
 
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
 
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0... destruct v1...
   - apply type_add.
   - apply type_sub.
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (_  || _); trivial...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (Int.eq i0 Int.zero); constructor.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shift s v1 a)...
   - destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... 
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_subl.
   - destruct v0... destruct v1...
   - destruct v0... destruct v1...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shiftl s v1 a)...
   - destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... 
@@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m) as [[]|]...
   - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -924,16 +926,7 @@ Qed.
 
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivu | Odivl | Odivlu
-  | Oshrximm _  | Oshrlximm _
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Ofloatofint | Ofloatofintu
-  | Osingleofint | Osingleofintu
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Ofloatoflong | Ofloatoflongu
-  | Osingleoflong | Osingleoflongu => true
+  | Omove => false
   | _ => false
   end.
                 
@@ -1423,12 +1416,12 @@ Proof.
   - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
   - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (_ || _); cbn;
+    constructor.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (Int.eq i0 Int.zero); cbn;
+    constructor.
   (* and*)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1460,8 +1453,8 @@ Proof.
   (* shru *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1496,12 +1489,10 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl *)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1533,8 +1524,8 @@ Proof.
   (* shrlu *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   (* shrlx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1565,37 +1556,29 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* cmp, sel *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+  struct
+    let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+    let nr_non_pipelined_units = 1;;
+
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _ -> 1
+      | Oshift _ -> 2
+      | Oadd -> 1
+      | Oaddshift _ -> 2
+      | Oaddimm _
+        | Oneg -> 1                   
+      | Onegshift _ -> 2
+      | Osub -> 1
+      | Osubshift _ -> 2
+      | Omul
+        | Omuladd  
+        | Omulsub -> 4
+      | Odiv
+        | Odivu -> 29
+      | Oand -> 1
+      | Oandshift _ -> 2
+      | Oandimm _ -> 1
+      | Oor -> 1
+      | Oorshift _ -> 2
+      | Oorimm _ -> 1
+      | Oxor -> 1
+      | Oxorshift _ -> 2
+      | Oxorimm _ -> 1
+      | Onot -> 1
+      | Onotshift _ -> 2
+      | Obic -> 1
+      | Obicshift _ -> 2
+      | Oorn -> 1
+      | Oornshift _ -> 2
+      | Oeqv -> 1
+      | Oeqvshift _ -> 2
+      | Oshl 
+        | Oshr
+        | Oshru -> 2
+      | Oshrximm _ -> 6
+      | Ozext _
+        | Osext _ -> 1
+      | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _ -> 2
+                      
+      (* 64-bit integer arithmetic *)
+      | Oshiftl _ -> 2
+      | Oextend _ -> 1
+      | Omakelong
+        | Olowlong 
+        | Ohighlong 
+        | Oaddl -> 1
+      | Oaddlshift _
+        | Oaddlext _ -> 2
+      | Oaddlimm _
+        | Onegl -> 1         
+      | Oneglshift _ -> 2
+      | Osubl -> 1
+      | Osublshift _
+        | Osublext _ -> 2
+      | Omull
+        | Omulladd 
+        | Omullsub 
+        | Omullhs 
+        | Omullhu -> 4
+      | Odivl -> 50
+      | Odivlu -> 50
+      | Oandl -> 1
+      | Oandlshift _ -> 2
+      | Oandlimm _
+        | Oorl -> 1 
+      | Oorlshift _ -> 2
+      | Oorlimm _
+        | Oxorl -> 1
+      | Oxorlshift _ -> 2
+      | Oxorlimm _
+        | Onotl -> 1
+      | Onotlshift _ -> 2
+      | Obicl -> 1
+      | Obiclshift _ -> 2
+      | Oornl -> 1
+      | Oornlshift _ -> 2
+      | Oeqvl -> 1
+      | Oeqvlshift _ -> 2
+      | Oshll 
+        | Oshrl
+        | Oshrlu -> 2
+      | Oshrlximm _ -> 6
+      | Ozextl _
+        | Osextl _ -> 1
+      | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> 2
+                       
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> 6
+      | Odivf -> 50                                         (* r [rd = r1 / r2] *)
+      | Odivfs -> 20
+  (*  Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
+          | _ -> 1);;
+    
+    let resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _
+        (* 32-bit integer arithmetic *)
+        | Oshift _
+        | Oadd 
+        | Oaddshift _
+        | Oaddimm _
+        | Oneg         
+        | Onegshift _
+        | Osub
+        | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+      | Omul 
+        | Omuladd
+        | Omulsub -> [| 1; 1; 1; 0 |]
+      | Odiv 
+        | Odivu -> [| 1; 0; 0; 0 |]
+      | Oand 
+        | Oandshift _
+        | Oandimm _
+        | Oor
+        | Oorshift _
+        | Oorimm _
+        | Oxor
+        | Oxorshift _
+        | Oxorimm _
+        | Onot
+        | Onotshift _
+        | Obic
+        | Obicshift _
+        | Oorn
+        | Oornshift _
+        | Oeqv
+        | Oeqvshift _
+        | Oshl
+        | Oshr
+        | Oshru
+        | Oshrximm _
+        | Ozext _
+        | Osext  _
+        | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _
+  
+(* 64-bit integer arithmetic *)
+        | Oshiftl _
+        | Oextend _
+        | Omakelong
+        | Olowlong 
+        | Ohighlong
+        | Oaddl 
+        | Oaddlshift _
+        | Oaddlext _
+        | Oaddlimm _
+        | Onegl        
+        | Oneglshift _
+        | Osubl
+        | Osublshift _
+        | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+      | Omull 
+        | Omulladd 
+        | Omullsub
+        | Omullhs 
+        | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+      | Odivl 
+        | Odivlu -> [| 1; 0; 0; 0 |]
+      | Oandl 
+        | Oandlshift _
+        | Oandlimm _
+        | Oorl
+        | Oorlshift _
+        | Oorlimm _
+        | Oxorl
+        | Oxorlshift _
+        | Oxorlimm _
+        | Onotl
+        | Onotlshift _
+        | Obicl
+        | Obiclshift _
+        | Oornl
+        | Oornlshift _
+        | Oeqvl
+        | Oeqvlshift _
+        | Oshll
+        | Oshrl
+        | Oshrlu 
+        | Oshrlximm _
+        | Ozextl _
+        | Osextl _
+        | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> [| 1; 1; 0; 0 |]
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+        | Odivf
+        (* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Odivfs                                              (* r [rd = r1 / r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> [| 1 ; 1; 1; 0 |]
+         
+(* Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+             [| 1; 1; 1; 0 |]
+          | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cmp : condition) (nargs : int) =
+      (match cmp with
+       | Ccompf _                              (* r FP comparison *)
+         | Cnotcompf _                           (* r negation of an FP comparison *)
+         | Ccompfzero _                          (* r comparison with 0.0 *)
+         | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+         | Ccompfs _                             (* r FP comparison *)
+         | Cnotcompfs _                          (* r negation of an FP comparison *)
+         | Ccompfszero _                         (* r equal to 0.0 *)
+         | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+          [| 1; 1; 1; 0 |]
+       | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "cortex-a53" | "cortex-a35" | "" ->
+     {
+       pipelined_resource_bounds = Cortex_A53.resource_bounds;
+       nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+       latency_of_op = Cortex_A53.latency_of_op;
+       resources_of_op = Cortex_A53.resources_of_op;
+       non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+       latency_of_load = Cortex_A53.latency_of_load;
+       resources_of_load = Cortex_A53.resources_of_load;
+       resources_of_store = Cortex_A53.resources_of_store;
+       resources_of_cond = Cortex_A53.resources_of_cond;
+       latency_of_call = Cortex_A53.latency_of_call;
+       resources_of_call = Cortex_A53.resources_of_call;
+       resources_of_builtin = Cortex_A53.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..2c3eb14f
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,477 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
 Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
 Proof.
   red; intros; unfold divls_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
 Proof.
   red; intros; unfold modls_base, modl_aux.
   exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
 Proof.
   red; intros; unfold divlu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
 Proof.
   red; intros; unfold modlu_base, modl_aux.
   exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
   rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -726,42 +730,42 @@ Qed.
 
 Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists.  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
   Val.divs x y = Some z ->
   exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold divs_base; TrivialExists.
+  intros; unfold divs_base; TrivialExists; cbn.
+  rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
 Proof.
   intros; unfold mods_base, mod_aux.
   exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  cbn. rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
   exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
   intros; unfold divu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
 Proof.
   intros; unfold modu_base, mod_aux.
   exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
   rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -928,7 +932,7 @@ Theorem eval_intoffloat:
   Val.intoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -939,7 +943,7 @@ Theorem eval_floatofint:
 Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -948,7 +952,7 @@ Theorem eval_intuoffloat:
   Val.intuoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -959,7 +963,7 @@ Theorem eval_floatofintu:
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -968,7 +972,7 @@ Theorem eval_intofsingle:
   Val.intofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -979,7 +983,7 @@ Theorem eval_singleofint:
 Proof.
   intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -988,7 +992,7 @@ Theorem eval_intuofsingle:
   Val.intuofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -999,7 +1003,7 @@ Theorem eval_singleofintu:
 Proof.
   intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 (** Selection *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
   | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
   | Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
   | Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
 
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-11-27 17:56:53 +0100
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-11-27 18:04:35 +0100
commit	ed78594947276264beea0b608c2a101d9f31b18f (patch)
tree	2b4c04d9a6fc5e531f38e6b7f4810241cfe49896 /aarch64
parent	63942e04b0fcb84d54f066122c31ca4c3aa99ad4 (diff)
parent	43a7cc2a7305395b20d92b240362ddfdb43963ff (diff)
download	compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.tar.gz compcert-kvx-ed78594947276264beea0b608c2a101d9f31b18f.zip