Merge branch 'kvx-work' into kvx-work-merge3.8

Conflicts: Makefile configure
author: Cyril SIX <cyril.six@kalray.eu> 2020-12-04 17:41:14 +0100
committer: Cyril SIX <cyril.six@kalray.eu> 2020-12-04 17:41:14 +0100
commit: 60ff1e39bac5ab35c46698cbc1ed7a76fc936cab (patch)
tree: 87ff5f3b209e5659e967f862dab1517bb2b32baa /aarch64
parent: f2fb8540c94ceb9892510f83bd7d6734fe9d422f (diff)
parent: d2197102d6b81e225865cfac5f1d319d168e1e23 (diff)
download: compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.tar.gz
compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.zip
11 files changed, 1231 insertions, 175 deletions
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
index 0e36bd05..35f1f2d7 100644
--- a/aarch64/Asmgenproof1.v
+++ b/aarch64/Asmgenproof1.v
@@ -881,7 +881,40 @@ Proof.
     split. subst v; Simpl.
     split; intros; Simpl.
 Qed.
- 
+
+
+Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx32; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split.
+  + intros. Simpl.
+  + Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.add _ _); cbn; Simpl.
+    ** Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.shr _ _); cbn; Simpl.
+    ** Simpl.
+Qed.
+
 Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
   Val.shrxl rs#r1 (Vint n) = Some v ->
   r1 <> X16 ->
@@ -918,6 +951,38 @@ Proof.
     split; intros; Simpl.
 Qed.
 
+Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx64; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split.
+  + intros. Simpl.
+  + Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.addl _ _); cbn; Simpl.
+    ** Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.shrl _ _); cbn; Simpl.
+    ** Simpl.
+Qed.
+
 (** Condition bits *)
 
 Lemma compare_int_spec: forall rs v1 v2 m,
@@ -1660,10 +1725,19 @@ Local Transparent Val.add.
   TranslOpBase.
   destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
 - (* shrx *)
-  exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL.
+  {
+    exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
   intros (rs' & A & B & C & D).
   econstructor; split. eexact A. split. rewrite B; auto.
   split; auto.
+  }
+  exploit (exec_shrx32_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C).
+  econstructor; split. { eexact A. }
+  split. { cbn. constructor. }
+  split; auto.           
+  
 - (* zero-ext *)
   TranslOpBase.
   destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
@@ -1736,9 +1810,18 @@ Local Transparent Val.add.
   TranslOpBase.
   destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
 - (* shrx *)
+  destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL.
+  {
   exploit (exec_shrx64 x x0 n); eauto with asmgen.
   apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
   econstructor; split. eexact A. split. rewrite B; auto. auto.
+  }
+  exploit (exec_shrx64_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C).
+  econstructor; split. { eexact A. }
+  split. { cbn. constructor. }
+  split; auto.           
+
 - (* zero-ext-l *)
   TranslOpBase.
   destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
index a23e41a8..d5c7ee0f 100644
--- a/aarch64/CSE2deps.v
+++ b/aarch64/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
index dbd46142..653c88f4 100644
--- a/aarch64/CSE2depsproof.v
+++ b/aarch64/CSE2depsproof.v
@@ -104,9 +104,71 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
-End SOUNDNESS.
 
 
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: try rewrite ptrofs_modulus in *.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
+End SOUNDNESS.
+
 Section SOUNDNESS.
   Variable F V : Type.
   Variable genv: Genv.t F V.
@@ -124,7 +186,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+ - (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -134,7 +196,14 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
+
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
+
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  {
+    rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  apply make_divimm_correct; auto. congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divuimm_correct; auto.
+  apply make_divuimm_correct; auto. congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divlimm_correct; auto.
+  apply make_divlimm_correct; auto. congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divluimm_correct; auto.
+  apply make_divluimm_correct; auto. congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/aarch64/Op.v b/aarch64/Op.v
index afc25aa6..f2a8e6fb 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
   | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
   | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
   | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
   | Osext s, v1 :: nil => Some (Val.sign_ext s v1)
   | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
   | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
-  | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+  | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
   | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
   | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
   | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
   | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
   | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
   | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
-  | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
   | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
   | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
 
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
 
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0... destruct v1...
   - apply type_add.
   - apply type_sub.
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (_  || _); trivial...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (Int.eq i0 Int.zero); constructor.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shift s v1 a)...
   - destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... 
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_subl.
   - destruct v0... destruct v1...
   - destruct v0... destruct v1...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shiftl s v1 a)...
   - destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... 
@@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m) as [[]|]...
   - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -924,16 +926,7 @@ Qed.
 
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivu | Odivl | Odivlu
-  | Oshrximm _  | Oshrlximm _
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Ofloatofint | Ofloatofintu
-  | Osingleofint | Osingleofintu
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Ofloatoflong | Ofloatoflongu
-  | Osingleoflong | Osingleoflongu => true
+  | Omove => false
   | _ => false
   end.
                 
@@ -1209,6 +1202,20 @@ Proof.
   rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct cond; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
@@ -1409,12 +1416,12 @@ Proof.
   - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
   - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (_ || _); cbn;
+    constructor.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (Int.eq i0 Int.zero); cbn;
+    constructor.
   (* and*)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1446,8 +1453,8 @@ Proof.
   (* shru *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1482,12 +1489,10 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl *)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1519,8 +1524,8 @@ Proof.
   (* shrlu *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   (* shrlx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1551,37 +1556,29 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* cmp, sel *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+  struct
+    let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+    let nr_non_pipelined_units = 1;;
+
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _ -> 1
+      | Oshift _ -> 2
+      | Oadd -> 1
+      | Oaddshift _ -> 2
+      | Oaddimm _
+        | Oneg -> 1                   
+      | Onegshift _ -> 2
+      | Osub -> 1
+      | Osubshift _ -> 2
+      | Omul
+        | Omuladd  
+        | Omulsub -> 4
+      | Odiv
+        | Odivu -> 29
+      | Oand -> 1
+      | Oandshift _ -> 2
+      | Oandimm _ -> 1
+      | Oor -> 1
+      | Oorshift _ -> 2
+      | Oorimm _ -> 1
+      | Oxor -> 1
+      | Oxorshift _ -> 2
+      | Oxorimm _ -> 1
+      | Onot -> 1
+      | Onotshift _ -> 2
+      | Obic -> 1
+      | Obicshift _ -> 2
+      | Oorn -> 1
+      | Oornshift _ -> 2
+      | Oeqv -> 1
+      | Oeqvshift _ -> 2
+      | Oshl 
+        | Oshr
+        | Oshru -> 2
+      | Oshrximm _ -> 6
+      | Ozext _
+        | Osext _ -> 1
+      | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _ -> 2
+                      
+      (* 64-bit integer arithmetic *)
+      | Oshiftl _ -> 2
+      | Oextend _ -> 1
+      | Omakelong
+        | Olowlong 
+        | Ohighlong 
+        | Oaddl -> 1
+      | Oaddlshift _
+        | Oaddlext _ -> 2
+      | Oaddlimm _
+        | Onegl -> 1         
+      | Oneglshift _ -> 2
+      | Osubl -> 1
+      | Osublshift _
+        | Osublext _ -> 2
+      | Omull
+        | Omulladd 
+        | Omullsub 
+        | Omullhs 
+        | Omullhu -> 4
+      | Odivl -> 50
+      | Odivlu -> 50
+      | Oandl -> 1
+      | Oandlshift _ -> 2
+      | Oandlimm _
+        | Oorl -> 1 
+      | Oorlshift _ -> 2
+      | Oorlimm _
+        | Oxorl -> 1
+      | Oxorlshift _ -> 2
+      | Oxorlimm _
+        | Onotl -> 1
+      | Onotlshift _ -> 2
+      | Obicl -> 1
+      | Obiclshift _ -> 2
+      | Oornl -> 1
+      | Oornlshift _ -> 2
+      | Oeqvl -> 1
+      | Oeqvlshift _ -> 2
+      | Oshll 
+        | Oshrl
+        | Oshrlu -> 2
+      | Oshrlximm _ -> 6
+      | Ozextl _
+        | Osextl _ -> 1
+      | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> 2
+                       
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> 6
+      | Odivf -> 50                                         (* r [rd = r1 / r2] *)
+      | Odivfs -> 20
+  (*  Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
+          | _ -> 1);;
+    
+    let resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _
+        (* 32-bit integer arithmetic *)
+        | Oshift _
+        | Oadd 
+        | Oaddshift _
+        | Oaddimm _
+        | Oneg         
+        | Onegshift _
+        | Osub
+        | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+      | Omul 
+        | Omuladd
+        | Omulsub -> [| 1; 1; 1; 0 |]
+      | Odiv 
+        | Odivu -> [| 1; 0; 0; 0 |]
+      | Oand 
+        | Oandshift _
+        | Oandimm _
+        | Oor
+        | Oorshift _
+        | Oorimm _
+        | Oxor
+        | Oxorshift _
+        | Oxorimm _
+        | Onot
+        | Onotshift _
+        | Obic
+        | Obicshift _
+        | Oorn
+        | Oornshift _
+        | Oeqv
+        | Oeqvshift _
+        | Oshl
+        | Oshr
+        | Oshru
+        | Oshrximm _
+        | Ozext _
+        | Osext  _
+        | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _
+  
+(* 64-bit integer arithmetic *)
+        | Oshiftl _
+        | Oextend _
+        | Omakelong
+        | Olowlong 
+        | Ohighlong
+        | Oaddl 
+        | Oaddlshift _
+        | Oaddlext _
+        | Oaddlimm _
+        | Onegl        
+        | Oneglshift _
+        | Osubl
+        | Osublshift _
+        | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+      | Omull 
+        | Omulladd 
+        | Omullsub
+        | Omullhs 
+        | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+      | Odivl 
+        | Odivlu -> [| 1; 0; 0; 0 |]
+      | Oandl 
+        | Oandlshift _
+        | Oandlimm _
+        | Oorl
+        | Oorlshift _
+        | Oorlimm _
+        | Oxorl
+        | Oxorlshift _
+        | Oxorlimm _
+        | Onotl
+        | Onotlshift _
+        | Obicl
+        | Obiclshift _
+        | Oornl
+        | Oornlshift _
+        | Oeqvl
+        | Oeqvlshift _
+        | Oshll
+        | Oshrl
+        | Oshrlu 
+        | Oshrlximm _
+        | Ozextl _
+        | Osextl _
+        | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> [| 1; 1; 0; 0 |]
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+        | Odivf
+        (* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Odivfs                                              (* r [rd = r1 / r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> [| 1 ; 1; 1; 0 |]
+         
+(* Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+             [| 1; 1; 1; 0 |]
+          | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cmp : condition) (nargs : int) =
+      (match cmp with
+       | Ccompf _                              (* r FP comparison *)
+         | Cnotcompf _                           (* r negation of an FP comparison *)
+         | Ccompfzero _                          (* r comparison with 0.0 *)
+         | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+         | Ccompfs _                             (* r FP comparison *)
+         | Cnotcompfs _                          (* r negation of an FP comparison *)
+         | Ccompfszero _                         (* r equal to 0.0 *)
+         | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+          [| 1; 1; 1; 0 |]
+       | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "cortex-a53" | "cortex-a35" | "" ->
+     {
+       pipelined_resource_bounds = Cortex_A53.resource_bounds;
+       nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+       latency_of_op = Cortex_A53.latency_of_op;
+       resources_of_op = Cortex_A53.resources_of_op;
+       non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+       latency_of_load = Cortex_A53.latency_of_load;
+       resources_of_load = Cortex_A53.resources_of_load;
+       resources_of_store = Cortex_A53.resources_of_store;
+       resources_of_cond = Cortex_A53.resources_of_cond;
+       latency_of_call = Cortex_A53.latency_of_call;
+       resources_of_call = Cortex_A53.resources_of_call;
+       resources_of_builtin = Cortex_A53.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..2c3eb14f
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,477 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
 Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
 Proof.
   red; intros; unfold divls_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
 Proof.
   red; intros; unfold modls_base, modl_aux.
   exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
 Proof.
   red; intros; unfold divlu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
 Proof.
   red; intros; unfold modlu_base, modl_aux.
   exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
   rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -726,42 +730,42 @@ Qed.
 
 Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists.  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
   Val.divs x y = Some z ->
   exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold divs_base; TrivialExists.
+  intros; unfold divs_base; TrivialExists; cbn.
+  rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
 Proof.
   intros; unfold mods_base, mod_aux.
   exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  cbn. rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
   exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
   intros; unfold divu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
 Proof.
   intros; unfold modu_base, mod_aux.
   exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
   rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -928,7 +932,7 @@ Theorem eval_intoffloat:
   Val.intoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -939,7 +943,7 @@ Theorem eval_floatofint:
 Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -948,7 +952,7 @@ Theorem eval_intuoffloat:
   Val.intuoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -959,7 +963,7 @@ Theorem eval_floatofintu:
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -968,7 +972,7 @@ Theorem eval_intofsingle:
   Val.intofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -979,7 +983,7 @@ Theorem eval_singleofint:
 Proof.
   intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -988,7 +992,7 @@ Theorem eval_intuofsingle:
   Val.intuofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -999,7 +1003,7 @@ Theorem eval_singleofintu:
 Proof.
   intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 (** Selection *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
   | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
   | Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
   | Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
 
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
author	Cyril SIX <cyril.six@kalray.eu>	2020-12-04 17:41:14 +0100
committer	Cyril SIX <cyril.six@kalray.eu>	2020-12-04 17:41:14 +0100
commit	60ff1e39bac5ab35c46698cbc1ed7a76fc936cab (patch)
tree	87ff5f3b209e5659e967f862dab1517bb2b32baa /aarch64
parent	f2fb8540c94ceb9892510f83bd7d6734fe9d422f (diff)
parent	d2197102d6b81e225865cfac5f1d319d168e1e23 (diff)
download	compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.tar.gz compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.zip