104 files changed, 11993 insertions, 1053 deletions
diff --git a/Makefile b/Makefile
index f3e5aef5..8b2fb534 100644
--- a/Makefile
+++ b/Makefile
@@ -29,17 +29,23 @@ endif
 
 BACKENDLIB?=Asmgenproof0.v
 
-DIRS := lib common $(ARCHDIRS) backend cfrontend driver exportclight cparser
+DIRS := lib lib/Impure common $(ARCHDIRS) scheduling backend cfrontend driver \
+	exportclight cparser
+
+RECDIRS:=lib common $(ARCHDIRS) scheduling backend cfrontend driver flocq exportclight \
+  cparser
 
 COQINCLUDES := $(foreach d, $(DIRS), -R $(d) compcert.$(subst /,.,$d))
 
 ifeq ($(LIBRARY_FLOCQ),local)
 DIRS += flocq/Core flocq/Prop flocq/Calc flocq/IEEE754
+RECDIRS += flocq
 COQINCLUDES += -R flocq Flocq
 endif
 
 ifeq ($(LIBRARY_MENHIRLIB),local)
 DIRS += MenhirLib
+RECDIRS += MenhirLib
 COQINCLUDES += -R MenhirLib MenhirLib
 endif
 
@@ -75,7 +81,9 @@ VLIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \
   HashedSet.v \
   Iteration.v Zbits.v Integers.v Archi.v IEEE754_extra.v Floats.v \
   Parmov.v UnionFind.v Wfsimpl.v \
-  Postorder.v FSetAVLplus.v IntvSets.v Decidableplus.v BoolEqual.v
+  Postorder.v FSetAVLplus.v IntvSets.v Decidableplus.v BoolEqual.v \
+  ImpConfig.v  ImpExtern.v  ImpIO.v     ImpMonads.v \
+  ImpCore.v    ImpHCons.v   ImpLoops.v  ImpPrelude.v
 
 # Parts common to the front-ends and the back-end (in common/)
 
@@ -131,6 +139,13 @@ ifneq ($(ARCH),kvx)
   BACKEND += Asmgenproof1.v 
 endif
 
+SCHEDULING= \
+  RTLpathLivegenproof.v    RTLpathSE_simu_specs.v \
+  RTLpathLivegen.v         RTLpathSE_impl.v \
+  RTLpathproof.v           RTLpathSE_theory.v \
+  RTLpathSchedulerproof.v  RTLpath.v \
+  RTLpathScheduler.v
+
 # C front-end modules (in cfrontend/)
 
 CFRONTEND=Ctypes.v Cop.v Csyntax.v Csem.v Ctyping.v Cstrategy.v Cexec.v \
@@ -160,7 +175,7 @@ DRIVER=Compopts.v Compiler.v Complements.v
 
 # All source files
 
-FILES=$(VLIB) $(COMMON) $(BACKEND) $(CFRONTEND) $(DRIVER) $(FLOCQ) \
+FILES=$(VLIB) $(COMMON) $(BACKEND) $(SCHEDULING) $(CFRONTEND) $(DRIVER) $(FLOCQ) \
   $(MENHIRLIB) $(PARSER)
 
 # Generated source files
@@ -206,6 +221,10 @@ ccomp: .depend.extr compcert.ini driver/Version.ml FORCE
 ccomp.byte: .depend.extr compcert.ini driver/Version.ml FORCE
 	$(MAKE) -f Makefile.extr ccomp.byte
 
+# DM force compilation without checking dependencies
+ccomp.force: .depend.extr compcert.ini driver/Version.ml FORCE
+	$(MAKE) -f Makefile.extr ccomp.force
+
 clightgen: .depend.extr compcert.ini exportclight/Clightdefs.vo driver/Version.ml FORCE
 	$(MAKE) -f Makefile.extr clightgen
 clightgen.byte: .depend.extr compcert.ini exportclight/Clightdefs.vo driver/Version.ml FORCE
diff --git a/Makefile.extr b/Makefile.extr
index 4f1c3c65..f4ab74b5 100644
--- a/Makefile.extr
+++ b/Makefile.extr
@@ -43,8 +43,9 @@ cparser/pre_parser_messages.ml:
 # Directories containing plain Caml code
 
 DIRS=extraction \
-  lib common $(ARCH) backend cfrontend cparser driver \
-  exportclight debug kvx/unittest kvx/abstractbb/Impure/ocaml
+  lib common $(ARCH) scheduling backend cfrontend cparser driver \
+  exportclight debug kvx/unittest lib/Impure/ocaml \
+  kvx/lib
 
 INCLUDES=$(patsubst %,-I %, $(DIRS))
 
@@ -109,6 +110,10 @@ ccomp: ccomp.byte
 	@$(COPY) $+ $@
 endif
 
+# DM force compilation without checking dependencies
+ccomp.force:
+	$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $(CCOMP_OBJS)
+	
 ccomp.byte: $(CCOMP_OBJS:.cmx=.cmo)
 	@echo "Linking $@"
 	@$(OCAMLC) -o $@ $(LIBS_BYTE) $+
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
index 0e36bd05..35f1f2d7 100644
--- a/aarch64/Asmgenproof1.v
+++ b/aarch64/Asmgenproof1.v
@@ -881,7 +881,40 @@ Proof.
     split. subst v; Simpl.
     split; intros; Simpl.
 Qed.
- 
+
+
+Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx32; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split.
+  + intros. Simpl.
+  + Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.add _ _); cbn; Simpl.
+    ** Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.shr _ _); cbn; Simpl.
+    ** Simpl.
+Qed.
+
 Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
   Val.shrxl rs#r1 (Vint n) = Some v ->
   r1 <> X16 ->
@@ -918,6 +951,38 @@ Proof.
     split; intros; Simpl.
 Qed.
 
+Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx64; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split.
+  + intros. Simpl.
+  + Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.addl _ _); cbn; Simpl.
+    ** Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.shrl _ _); cbn; Simpl.
+    ** Simpl.
+Qed.
+
 (** Condition bits *)
 
 Lemma compare_int_spec: forall rs v1 v2 m,
@@ -1660,10 +1725,19 @@ Local Transparent Val.add.
   TranslOpBase.
   destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
 - (* shrx *)
-  exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL.
+  {
+    exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
   intros (rs' & A & B & C & D).
   econstructor; split. eexact A. split. rewrite B; auto.
   split; auto.
+  }
+  exploit (exec_shrx32_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C).
+  econstructor; split. { eexact A. }
+  split. { cbn. constructor. }
+  split; auto.           
+  
 - (* zero-ext *)
   TranslOpBase.
   destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
@@ -1736,9 +1810,18 @@ Local Transparent Val.add.
   TranslOpBase.
   destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
 - (* shrx *)
+  destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL.
+  {
   exploit (exec_shrx64 x x0 n); eauto with asmgen.
   apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
   econstructor; split. eexact A. split. rewrite B; auto. auto.
+  }
+  exploit (exec_shrx64_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C).
+  econstructor; split. { eexact A. }
+  split. { cbn. constructor. }
+  split; auto.           
+
 - (* zero-ext-l *)
   TranslOpBase.
   destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
index a23e41a8..d5c7ee0f 100644
--- a/aarch64/CSE2deps.v
+++ b/aarch64/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
index dbd46142..653c88f4 100644
--- a/aarch64/CSE2depsproof.v
+++ b/aarch64/CSE2depsproof.v
@@ -104,9 +104,71 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
-End SOUNDNESS.
 
 
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: try rewrite ptrofs_modulus in *.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
+End SOUNDNESS.
+
 Section SOUNDNESS.
   Variable F V : Type.
   Variable genv: Genv.t F V.
@@ -124,7 +186,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+ - (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -134,7 +196,14 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
+
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
+
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  {
+    rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  apply make_divimm_correct; auto. congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divuimm_correct; auto.
+  apply make_divuimm_correct; auto. congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divlimm_correct; auto.
+  apply make_divlimm_correct; auto. congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divluimm_correct; auto.
+  apply make_divluimm_correct; auto. congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/aarch64/Op.v b/aarch64/Op.v
index afc25aa6..f2a8e6fb 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
   | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
   | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
   | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
   | Osext s, v1 :: nil => Some (Val.sign_ext s v1)
   | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
   | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
-  | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+  | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
   | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
   | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
   | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
   | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
   | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
   | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
-  | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
   | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
   | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
 
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
 
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0... destruct v1...
   - apply type_add.
   - apply type_sub.
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (_  || _); trivial...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (Int.eq i0 Int.zero); constructor.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shift s v1 a)...
   - destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... 
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_subl.
   - destruct v0... destruct v1...
   - destruct v0... destruct v1...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shiftl s v1 a)...
   - destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... 
@@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m) as [[]|]...
   - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -924,16 +926,7 @@ Qed.
 
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivu | Odivl | Odivlu
-  | Oshrximm _  | Oshrlximm _
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Ofloatofint | Ofloatofintu
-  | Osingleofint | Osingleofintu
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Ofloatoflong | Ofloatoflongu
-  | Osingleoflong | Osingleoflongu => true
+  | Omove => false
   | _ => false
   end.
                 
@@ -1209,6 +1202,20 @@ Proof.
   rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct cond; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
@@ -1409,12 +1416,12 @@ Proof.
   - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
   - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (_ || _); cbn;
+    constructor.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (Int.eq i0 Int.zero); cbn;
+    constructor.
   (* and*)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1446,8 +1453,8 @@ Proof.
   (* shru *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1482,12 +1489,10 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl *)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1519,8 +1524,8 @@ Proof.
   (* shrlu *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   (* shrlx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1551,37 +1556,29 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* cmp, sel *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+  struct
+    let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+    let nr_non_pipelined_units = 1;;
+
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _ -> 1
+      | Oshift _ -> 2
+      | Oadd -> 1
+      | Oaddshift _ -> 2
+      | Oaddimm _
+        | Oneg -> 1                   
+      | Onegshift _ -> 2
+      | Osub -> 1
+      | Osubshift _ -> 2
+      | Omul
+        | Omuladd  
+        | Omulsub -> 4
+      | Odiv
+        | Odivu -> 29
+      | Oand -> 1
+      | Oandshift _ -> 2
+      | Oandimm _ -> 1
+      | Oor -> 1
+      | Oorshift _ -> 2
+      | Oorimm _ -> 1
+      | Oxor -> 1
+      | Oxorshift _ -> 2
+      | Oxorimm _ -> 1
+      | Onot -> 1
+      | Onotshift _ -> 2
+      | Obic -> 1
+      | Obicshift _ -> 2
+      | Oorn -> 1
+      | Oornshift _ -> 2
+      | Oeqv -> 1
+      | Oeqvshift _ -> 2
+      | Oshl 
+        | Oshr
+        | Oshru -> 2
+      | Oshrximm _ -> 6
+      | Ozext _
+        | Osext _ -> 1
+      | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _ -> 2
+                      
+      (* 64-bit integer arithmetic *)
+      | Oshiftl _ -> 2
+      | Oextend _ -> 1
+      | Omakelong
+        | Olowlong 
+        | Ohighlong 
+        | Oaddl -> 1
+      | Oaddlshift _
+        | Oaddlext _ -> 2
+      | Oaddlimm _
+        | Onegl -> 1         
+      | Oneglshift _ -> 2
+      | Osubl -> 1
+      | Osublshift _
+        | Osublext _ -> 2
+      | Omull
+        | Omulladd 
+        | Omullsub 
+        | Omullhs 
+        | Omullhu -> 4
+      | Odivl -> 50
+      | Odivlu -> 50
+      | Oandl -> 1
+      | Oandlshift _ -> 2
+      | Oandlimm _
+        | Oorl -> 1 
+      | Oorlshift _ -> 2
+      | Oorlimm _
+        | Oxorl -> 1
+      | Oxorlshift _ -> 2
+      | Oxorlimm _
+        | Onotl -> 1
+      | Onotlshift _ -> 2
+      | Obicl -> 1
+      | Obiclshift _ -> 2
+      | Oornl -> 1
+      | Oornlshift _ -> 2
+      | Oeqvl -> 1
+      | Oeqvlshift _ -> 2
+      | Oshll 
+        | Oshrl
+        | Oshrlu -> 2
+      | Oshrlximm _ -> 6
+      | Ozextl _
+        | Osextl _ -> 1
+      | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> 2
+                       
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> 6
+      | Odivf -> 50                                         (* r [rd = r1 / r2] *)
+      | Odivfs -> 20
+  (*  Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
+          | _ -> 1);;
+    
+    let resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _
+        (* 32-bit integer arithmetic *)
+        | Oshift _
+        | Oadd 
+        | Oaddshift _
+        | Oaddimm _
+        | Oneg         
+        | Onegshift _
+        | Osub
+        | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+      | Omul 
+        | Omuladd
+        | Omulsub -> [| 1; 1; 1; 0 |]
+      | Odiv 
+        | Odivu -> [| 1; 0; 0; 0 |]
+      | Oand 
+        | Oandshift _
+        | Oandimm _
+        | Oor
+        | Oorshift _
+        | Oorimm _
+        | Oxor
+        | Oxorshift _
+        | Oxorimm _
+        | Onot
+        | Onotshift _
+        | Obic
+        | Obicshift _
+        | Oorn
+        | Oornshift _
+        | Oeqv
+        | Oeqvshift _
+        | Oshl
+        | Oshr
+        | Oshru
+        | Oshrximm _
+        | Ozext _
+        | Osext  _
+        | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _
+  
+(* 64-bit integer arithmetic *)
+        | Oshiftl _
+        | Oextend _
+        | Omakelong
+        | Olowlong 
+        | Ohighlong
+        | Oaddl 
+        | Oaddlshift _
+        | Oaddlext _
+        | Oaddlimm _
+        | Onegl        
+        | Oneglshift _
+        | Osubl
+        | Osublshift _
+        | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+      | Omull 
+        | Omulladd 
+        | Omullsub
+        | Omullhs 
+        | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+      | Odivl 
+        | Odivlu -> [| 1; 0; 0; 0 |]
+      | Oandl 
+        | Oandlshift _
+        | Oandlimm _
+        | Oorl
+        | Oorlshift _
+        | Oorlimm _
+        | Oxorl
+        | Oxorlshift _
+        | Oxorlimm _
+        | Onotl
+        | Onotlshift _
+        | Obicl
+        | Obiclshift _
+        | Oornl
+        | Oornlshift _
+        | Oeqvl
+        | Oeqvlshift _
+        | Oshll
+        | Oshrl
+        | Oshrlu 
+        | Oshrlximm _
+        | Ozextl _
+        | Osextl _
+        | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> [| 1; 1; 0; 0 |]
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+        | Odivf
+        (* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Odivfs                                              (* r [rd = r1 / r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> [| 1 ; 1; 1; 0 |]
+         
+(* Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+             [| 1; 1; 1; 0 |]
+          | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cmp : condition) (nargs : int) =
+      (match cmp with
+       | Ccompf _                              (* r FP comparison *)
+         | Cnotcompf _                           (* r negation of an FP comparison *)
+         | Ccompfzero _                          (* r comparison with 0.0 *)
+         | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+         | Ccompfs _                             (* r FP comparison *)
+         | Cnotcompfs _                          (* r negation of an FP comparison *)
+         | Ccompfszero _                         (* r equal to 0.0 *)
+         | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+          [| 1; 1; 1; 0 |]
+       | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "cortex-a53" | "cortex-a35" | "" ->
+     {
+       pipelined_resource_bounds = Cortex_A53.resource_bounds;
+       nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+       latency_of_op = Cortex_A53.latency_of_op;
+       resources_of_op = Cortex_A53.resources_of_op;
+       non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+       latency_of_load = Cortex_A53.latency_of_load;
+       resources_of_load = Cortex_A53.resources_of_load;
+       resources_of_store = Cortex_A53.resources_of_store;
+       resources_of_cond = Cortex_A53.resources_of_cond;
+       latency_of_call = Cortex_A53.latency_of_call;
+       resources_of_call = Cortex_A53.resources_of_call;
+       resources_of_builtin = Cortex_A53.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..2c3eb14f
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,477 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
 Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
 Proof.
   red; intros; unfold divls_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
 Proof.
   red; intros; unfold modls_base, modl_aux.
   exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
 Proof.
   red; intros; unfold divlu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
 Proof.
   red; intros; unfold modlu_base, modl_aux.
   exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
   rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -726,42 +730,42 @@ Qed.
 
 Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists.  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
   Val.divs x y = Some z ->
   exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold divs_base; TrivialExists.
+  intros; unfold divs_base; TrivialExists; cbn.
+  rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
 Proof.
   intros; unfold mods_base, mod_aux.
   exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  cbn. rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
   exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
   intros; unfold divu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
 Proof.
   intros; unfold modu_base, mod_aux.
   exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
   rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -928,7 +932,7 @@ Theorem eval_intoffloat:
   Val.intoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -939,7 +943,7 @@ Theorem eval_floatofint:
 Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -948,7 +952,7 @@ Theorem eval_intuoffloat:
   Val.intuoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -959,7 +963,7 @@ Theorem eval_floatofintu:
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -968,7 +972,7 @@ Theorem eval_intofsingle:
   Val.intofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -979,7 +983,7 @@ Theorem eval_singleofint:
 Proof.
   intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -988,7 +992,7 @@ Theorem eval_intuofsingle:
   Val.intuofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -999,7 +1003,7 @@ Theorem eval_singleofintu:
 Proof.
   intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 (** Selection *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
   | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
   | Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
   | Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
 
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
 
diff --git a/arm/CSE2deps.v b/arm/CSE2deps.v
index d48dabf3..4592f408 100644
--- a/arm/CSE2deps.v
+++ b/arm/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/arm/CSE2depsproof.v b/arm/CSE2depsproof.v
index 28ef41ca..7dd0914e 100644
--- a/arm/CSE2depsproof.v
+++ b/arm/CSE2depsproof.v
@@ -105,6 +105,68 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
+
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: try rewrite ptrofs_modulus in *.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
 End SOUNDNESS.
 
 
@@ -125,7 +187,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -135,7 +197,15 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/arm/Op.v b/arm/Op.v
index 25e48ce1..6f22cece 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -751,6 +751,20 @@ Proof.
   auto.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct c; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct c; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_operation (op: operation) : list ident :=
diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..9885fd52
--- /dev/null
+++ b/arm/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../x86/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v
index 5ed04bc4..8b7f1190 100644
--- a/backend/CSE3analysis.v
+++ b/backend/CSE3analysis.v
@@ -181,12 +181,24 @@ Definition eq_depends_on_mem eq :=
   | SOp op => op_depends_on_memory op
   end.
 
+Definition eq_depends_on_store eq :=
+  match eq_op eq with
+  | SLoad _ _ => true
+  | SOp op => false
+  end.
+
 Definition get_mem_kills (eqs : PTree.t equation) : PSet.t :=
   PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
                 if eq_depends_on_mem eq
                 then PSet.add eqno already
                 else already) eqs PSet.empty.
 
+Definition get_store_kills (eqs : PTree.t equation) : PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
+                if eq_depends_on_store eq
+                then PSet.add eqno already
+                else already) eqs PSet.empty.
+
 Definition is_move (op : operation) :
   { op = Omove } + { op <> Omove }.
 Proof.
@@ -216,6 +228,7 @@ Record eq_context := mkeqcontext
                          eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t;
                          eq_kill_reg : reg -> PSet.t;
                          eq_kill_mem : unit -> PSet.t;
+                         eq_kill_store : unit -> PSet.t;
                          eq_moves : reg -> PSet.t }.
 
 Section OPERATIONS.
@@ -342,6 +355,9 @@ Section OPERATIONS.
                   (oper1 dst op args' rel)
       end.
   
+  Definition kill_store (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_store ctx tt).
+
   Definition clever_kill_store
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
              (src : reg)
@@ -358,7 +374,7 @@ Section OPERATIONS.
                 may_overlap chunk addr args chunk' addr' (eq_args eq)
               end
             end)
-         (PSet.inter rel (eq_kill_mem ctx tt))).
+         (PSet.inter rel (eq_kill_store ctx tt))).
 
   Definition store2
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
@@ -366,7 +382,7 @@ Section OPERATIONS.
              (rel : RELATION.t) : RELATION.t :=
     if Compopts.optim_CSE3_alias_analysis tt
     then clever_kill_store chunk addr args src rel
-    else kill_mem rel.
+    else kill_store rel.
 
   Definition store1
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
@@ -501,6 +517,7 @@ Definition context_from_hints (hints : analysis_hints) :=
   let eqs := hint_eq_catalog hints in
   let reg_kills := get_reg_kills eqs in 
   let mem_kills := get_mem_kills eqs in
+  let store_kills := get_store_kills eqs in
   let moves := get_moves eqs in
   {|
     eq_catalog := fun eq_id => PTree.get eq_id eqs;
@@ -508,5 +525,6 @@ Definition context_from_hints (hints : analysis_hints) :=
     eq_rhs_oracle  := hint_eq_rhs_oracle hints;
     eq_kill_reg := fun reg => PMap.get reg reg_kills;
     eq_kill_mem := fun _ => mem_kills;
+    eq_kill_store := fun _ => store_kills;
     eq_moves    := fun reg => PMap.get reg moves
   |}.
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
index e37ef61f..e038331c 100644
--- a/backend/CSE3analysisaux.ml
+++ b/backend/CSE3analysisaux.ml
@@ -174,6 +174,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
   and rhs_table = Hashtbl.create 100
   and cur_kill_reg = ref (PMap.init PSet.empty)
   and cur_kill_mem = ref PSet.empty
+  and cur_kill_store = ref PSet.empty
   and cur_moves = ref (PMap.init PSet.empty) in
   let eq_find_oracle node eq =
     assert (not (is_trivial eq));
@@ -216,6 +217,8 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
            (eq.eq_lhs :: eq.eq_args);
          (if eq_depends_on_mem eq
           then cur_kill_mem := PSet.add coq_id !cur_kill_mem);
+         (if eq_depends_on_store eq
+          then cur_kill_store := PSet.add coq_id !cur_kill_store);
          (match eq.eq_op, eq.eq_args with
           | (SOp Op.Omove), [rhs] -> imp_add_i_j cur_moves eq.eq_lhs coq_id
           | _, _ -> ());
@@ -232,6 +235,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
               eq_rhs_oracle  = rhs_find_oracle ;
               eq_kill_reg    = (fun reg -> PMap.get reg !cur_kill_reg);
               eq_kill_mem    = (fun () -> !cur_kill_mem);
+              eq_kill_store  = (fun () -> !cur_kill_store);
               eq_moves       = (fun reg -> PMap.get reg !cur_moves)
             } in
   match internal_analysis ctx tenv f
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
index 1e5b88c3..b298ea65 100644
--- a/backend/CSE3analysisproof.v
+++ b/backend/CSE3analysisproof.v
@@ -133,13 +133,18 @@ Definition xlget_kills (eqs : list (eq_id * equation)) (m :  Regmap.t PSet.t) :
     add_i_j (eq_lhs (snd item)) (fst item)
             (add_ilist_j (eq_args (snd item)) (fst item) already)) eqs m.
 
-
 Definition xlget_mem_kills (eqs : list (positive * equation)) (m : PSet.t) : PSet.t :=
 (fold_left
        (fun (a : PSet.t) (p : positive * equation) =>
         if eq_depends_on_mem (snd p) then PSet.add (fst p) a else a)
        eqs m).
 
+Definition xlget_store_kills (eqs : list (positive * equation)) (m : PSet.t) : PSet.t :=
+(fold_left
+       (fun (a : PSet.t) (p : positive * equation) =>
+        if eq_depends_on_store (snd p) then PSet.add (fst p) a else a)
+       eqs m).
+
 Lemma xlget_kills_monotone :
   forall eqs m i j,
     PSet.contains (Regmap.get i m) j = true ->
@@ -170,6 +175,24 @@ Qed.
 
 Hint Resolve xlget_mem_kills_monotone : cse3.
 
+Lemma xlget_store_kills_monotone :
+  forall eqs m j,
+    PSet.contains m j = true ->
+    PSet.contains (xlget_store_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  destruct eq_depends_on_store.
+  - apply IHeqs.
+    destruct (peq (fst a) j).
+    + subst j. apply PSet.gadds.
+    + rewrite PSet.gaddo by congruence.
+      trivial.
+  - auto.
+Qed.
+
+Hint Resolve xlget_store_kills_monotone : cse3.
+
 Lemma xlget_kills_has_lhs :
   forall eqs m lhs sop args j,
     In (j, {| eq_lhs := lhs;
@@ -333,6 +356,60 @@ Qed.
 
 Hint Resolve context_from_hints_get_kills_has_eq_depends_on_mem : cse3.
 
+Lemma xlget_kills_has_eq_depends_on_store :
+  forall eqs eq j m,
+    In (j, eq) eqs ->
+    eq_depends_on_store eq = true ->
+    PSet.contains (xlget_store_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros.
+  destruct H.
+  { subst a.
+    simpl.
+    rewrite H0.
+    apply xlget_store_kills_monotone.
+    apply PSet.gadds.
+  }
+  eauto.
+Qed.
+
+Hint Resolve xlget_kills_has_eq_depends_on_store : cse3.
+
+Lemma get_kills_has_eq_depends_on_store :
+  forall eqs eq j,
+    PTree.get j eqs = Some eq ->
+    eq_depends_on_store eq = true ->
+    PSet.contains (get_store_kills eqs) j = true.
+Proof.
+  intros.
+  unfold get_store_kills.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : PSet.t) (p : positive * equation) =>
+        if eq_depends_on_store (snd p) then PSet.add (fst p) a else a)
+       (PTree.elements eqs) PSet.empty)
+    with (xlget_store_kills (PTree.elements eqs) PSet.empty).
+  eapply xlget_kills_has_eq_depends_on_store.
+  apply PTree.elements_correct.
+  eassumption.
+  trivial.
+Qed.
+  
+Lemma context_from_hints_get_kills_has_eq_depends_on_store :
+  forall hints eq j,
+    PTree.get j (hint_eq_catalog hints) = Some eq ->
+    eq_depends_on_store eq = true ->
+    PSet.contains (eq_kill_store (context_from_hints hints) tt) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_eq_depends_on_store; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_eq_depends_on_store : cse3.
+
 Definition eq_involves (eq : equation) (i : reg) :=
   i = (eq_lhs eq) \/ In i (eq_args eq).
 
@@ -418,6 +495,12 @@ Section SOUNDNESS.
       eq_depends_on_mem eq = true ->
       PSet.contains (eq_kill_mem ctx tt) j = true.
 
+  Hypothesis ctx_kill_store_has_depends_on_store :
+    forall eq j,
+      eq_catalog ctx j = Some eq ->
+      eq_depends_on_store eq = true ->
+      PSet.contains (eq_kill_store ctx tt) j = true.
+
   Theorem kill_reg_sound :
     forall rel rs m dst v,
       (sem_rel rel rs m) ->
@@ -574,6 +657,55 @@ Section SOUNDNESS.
 
   Hint Resolve kill_mem_sound : cse3.
 
+  (* TODO: shouldn't this already be proved somewhere else? *)
+  Lemma store_preserves_validity:
+    forall m m' wchunk a v
+      (STORE : Mem.storev wchunk m a v = Some m')
+      (b : block) (z : Z),
+      Mem.valid_pointer m' b z = Mem.valid_pointer m b z.
+  Proof.
+    unfold Mem.storev.
+    intros.
+    destruct a; try discriminate.
+    Local Transparent Mem.store.
+    unfold Mem.store in STORE.
+    destruct Mem.valid_access_dec in STORE.
+    2: discriminate.
+    inv STORE.
+    reflexivity.
+  Qed.
+
+  Hint Resolve store_preserves_validity : cse3.
+  
+  Theorem kill_store_sound :
+    forall rel rs m m' wchunk a v,
+      (sem_rel rel rs m) ->
+      (Mem.storev wchunk m a v = Some m') ->
+      (sem_rel (kill_store (ctx:=ctx) rel) rs m').
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_store.
+    intros until v.
+    intros REL STORE i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    intros SUBTRACT CATALOG.
+    rewrite PSet.gsubtract in SUBTRACT.
+    rewrite andb_true_iff in SUBTRACT.
+    intuition.
+    destruct (eq_op eq) as [op | chunk addr] eqn:OP.
+    - rewrite op_valid_pointer_eq with (m2 := m).
+      assumption.
+      eapply store_preserves_validity; eauto.
+    - specialize ctx_kill_store_has_depends_on_store with (eq0 := eq) (j := i).
+      destruct eq as [lhs op args]; simpl in *.
+      rewrite OP in ctx_kill_store_has_depends_on_store.
+      rewrite negb_true_iff in H0.
+      rewrite OP in CATALOG.
+      intuition.
+      congruence.
+  Qed.
+ 
+  Hint Resolve kill_store_sound : cse3.
+ 
   Theorem eq_find_sound:
     forall no eq id,
       eq_find (ctx := ctx) no eq = Some id ->
@@ -895,13 +1027,12 @@ Section SOUNDNESS.
     unfold sem_eq in *.
     simpl in *.
     destruct eq_op as [op' | chunk' addr']; simpl.
-    - destruct (op_depends_on_memory op') eqn:DEPENDS.
-      + erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto.
-        discriminate.
-      + rewrite op_depends_on_memory_correct with (m2:=m); trivial.
+    - rewrite op_valid_pointer_eq with (m2 := m).
+      + cbn in *.
         apply REL; auto.
+      + eapply store_preserves_validity; eauto.
     - simpl in REL.
-      erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto.
+      erewrite ctx_kill_store_has_depends_on_store in CONTAINS by eauto.
       simpl in CONTAINS.
       rewrite negb_true_iff in CONTAINS.
       destruct (eval_addressing genv sp addr' rs ## eq_args) as [a'|] eqn:ADDR'.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 76b5616b..c9985dc4 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -115,16 +115,18 @@ let ptree_printbool pt =
 
 (* Looks ahead (until a branch) to see if a node further down verifies
  * the given predicate *)
-let rec look_ahead code node is_loop_header predicate =
+let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
   if (predicate node) then true
-  else match (rtl_successors @@ get_some @@ PTree.get node code) with
+  else match (successors @@ get_some @@ PTree.get node code) with
     | [n] -> if (predicate n) then true
         else (
           if (get_some @@ PTree.get n is_loop_header) then false
-          else look_ahead code n is_loop_header predicate
+          else look_ahead_gen successors code n is_loop_header predicate
         )
     | _ -> false
 
+let look_ahead = look_ahead_gen rtl_successors
+
 (** 
  * Heuristics mostly based on the paper Branch Prediction for Free 
  *)
@@ -545,7 +547,8 @@ let is_a_nop code n =
  * preds: mapping node -> predecessors
  * ptree: the revmap
  * trace: the trace to follow tail duplication on *)
-let tail_duplicate code preds ptree trace =
+let tail_duplicate code preds is_loop_header ptree trace =
+  debug "Tail_duplicate on that trace: %a\n" print_trace trace;
   (* next_int: unused integer that can be used for the next duplication *)
   let next_int = ref (next_free_pc code)
   (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
@@ -560,7 +563,12 @@ let tail_duplicate code preds ptree trace =
           if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
           else
             let node_preds = ptree_get_some n preds
-            in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+            in let node_preds_nolast = 
+              (* We traverse loop headers without initiating tail duplication 
+               * (see case of two imbricated loops) *)
+              if (get_some @@ PTree.get n is_loop_header) then []
+              else List.filter (fun e -> e <> get_some !last_node) node_preds
+            (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
             in let final_node_preds = match !last_duplicate with
               | None -> node_preds_nolast
               | Some n' -> n' :: node_preds_nolast
@@ -581,12 +589,12 @@ let tail_duplicate code preds ptree trace =
   in let new_code, new_ptree = f code ptree true trace
   in (new_code, new_ptree, !nb_duplicated)
 
-let superblockify_traces code preds traces ptree =
+let superblockify_traces code preds is_loop_header traces ptree =
   let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
   in let rec f code ptree = function
     | [] -> (code, ptree, 0)
     | trace :: traces ->
-        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
         in if (nb_duplicated < max_nb_duplicated)
           then (debug "End duplication\n"; f new_code new_ptree traces)
           else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
@@ -615,15 +623,29 @@ type innerLoop = {
   preds: P.t list;
   body: P.t list;
   head: P.t; (* head of the loop *)
-  finals: P.t list (* the final instructions, which loops back to the head *)
+  finals: P.t list; (* the final instructions, which loops back to the head *)
   (* There may be more than one ; for instance if there is an if inside the loop with both
-   * branches leading to a goto backedge *)
+   * branches leading to a goto backedge
+   * Such cases usually happen after a tail-duplication *)
+  sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+    (* may be None if we predict that we do not loop *)
 }
 
 let print_pset = LICMaux.pp_pset
 
+let print_option_pint oc o =
+  if !debug_flag then
+    match o with
+    | None -> Printf.fprintf oc "None"
+    | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
+
 let print_inner_loop iloop =
-  debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body
+  debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+    print_intlist iloop.preds
+    print_intlist iloop.body
+    (P.to_int iloop.head)
+    print_intlist iloop.finals
+    print_option_pint iloop.sb_final
 
 let rec print_inner_loops = function
 | [] -> ()
@@ -692,6 +714,34 @@ let get_inner_loops f code is_loop_header =
   !iloops
   *)
 
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+  | Some true -> [n1]
+  | Some false -> [n2]
+  | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+  let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+  let headers = List.filter (fun n -> 
+    get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in 
+  match headers with
+  | [] -> (
+      let next_nodes = List.filter (fun n -> HashedSet.PSet.contains trace n)
+        (rtl_successors_pref @@ get_some @@ PTree.get node code) in
+      match next_nodes with
+      | [n] -> find_last_node_before_loop code n trace is_loop_header
+      | _ -> None (* May happen when we predict that a loop is not taken *)
+    )
+  | [h] -> Some node
+  | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
 let get_inner_loops f code is_loop_header =
   let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; 
     fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
@@ -718,7 +768,10 @@ let get_inner_loops f code is_loop_header =
           debug "HEADPREDS: %a\n" print_intlist head_preds;
           filtered
         end in
-      { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals }
+      let sb_final = find_last_node_before_loop code head body is_loop_header in
+      let body = HashedSet.PSet.elements body in
+      { preds = preds; body = body; head = head; finals = finals;
+        sb_final = sb_final; }
     ) 
     (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
      * We remove those to get just the inner loops *)
@@ -836,31 +889,36 @@ let unroll_inner_loops_single f code revmap =
     (!code', !revmap')
   end
 
+let is_some o = match o with Some _ -> true | None -> false
+
 (* Unrolls the body of the inner loop once - duplicating the exit condition as well 
  * 1) Clones body into body'
- * 2) Links the last instruction of body into the first of body' 
+ * 2) Links the last instruction of body (sb_final) into the first of body' 
  * 3) Links the last instruction of body' into the first of body
  *)
 let unroll_inner_loop_body code revmap iloop =
+  debug "iloop = "; print_inner_loop iloop;
   let body = iloop.body in
   let limit = !Clflags.option_funrollbody in
   if count_ignore_nops code body > limit then begin
     debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
     (code, revmap)
+  end else if not @@ is_some iloop.sb_final then begin
+    debug "The loop body does not form a superblock OR we have predicted that we do not loop";
+    (code, revmap)
   end else
     let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
     let code' = ref code2 in
     let head' = apply_map fwmap (iloop.head) in
     let finals' = apply_map_list fwmap (iloop.finals) in
     begin
-      code' := change_pointers !code' iloop.head head' iloop.finals;
+      code' := change_pointers !code' iloop.head head' [get_some @@ iloop.sb_final];
       code' := change_pointers !code' head' iloop.head finals'; 
       (!code', revmap2)
     end
 
 let unroll_inner_loops_body f code revmap =
   let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
-  (* debug_flag := true; *)
   let inner_loops = get_inner_loops f code is_loop_header in
   debug "Number of loops found: %d\n" (List.length inner_loops);
   let code' = ref code in
@@ -870,7 +928,7 @@ let unroll_inner_loops_body f code revmap =
     List.iter (fun iloop ->
       let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
       code' := new_code; revmap' := new_revmap
-    ) inner_loops; (* debug_flag := false; *)
+    ) inner_loops;
     (!code', !revmap')
   end
 
@@ -966,6 +1024,7 @@ let tail_duplicate f =
     if !Clflags.option_ftailduplicate > 0 then
       let traces = select_traces code entrypoint in
       let preds = get_predecessors_rtl code in
-      superblockify_traces code preds traces revmap
+      let is_loop_header = get_loop_headers code entrypoint in
+      superblockify_traces code preds is_loop_header traces revmap
     else (code, revmap) in
   ((code, entrypoint), revmap)
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
index 6283e129..602d078d 100644
--- a/backend/LICMaux.ml
+++ b/backend/LICMaux.ml
@@ -255,8 +255,8 @@ let rewrite_loop_body (last_alloc : reg ref)
                                (List.map (map_reg mapper) args),
                                new_res));
                 PTree.set res new_res mapper
-             | Iload(_, chunk, addr, args, res, pc')
-             | Istore(chunk, addr, args, res, pc')
+             | Iload(_, chunk, addr, args, v, pc')
+             | Istore(chunk, addr, args, v, pc')
                   when Archi.has_notrap_loads &&
                        !Clflags.option_fnontrap_loads ->
                 let new_res = P.succ !last_alloc in
@@ -264,7 +264,7 @@ let rewrite_loop_body (last_alloc : reg ref)
                 add_inj (INJload(chunk, addr,
                                  (List.map (map_reg mapper) args),
                                  new_res));
-                PTree.set res new_res mapper
+                PTree.set v new_res mapper
              | _ -> mapper in
            List.iter (fun x ->
                if PSet.contains loop_body x
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 3fe61470..22658fb7 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -324,7 +324,6 @@ Local Opaque mreg_type.
  
     apply wt_setreg; auto; try (apply wt_undef_regs; auto).
     eapply Val.has_subtype; eauto.
-    
     change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto.
     red; intros; subst op. simpl in ISMOVE.
     destruct args; try discriminate. destruct args; discriminate.
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index 4c172020..87e8a1fc 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -133,10 +133,10 @@ let print_program pp (prog: LTL.program) =
 
 let destination : string option ref = ref None
 
-let print_if prog =
+let print_if passno prog =
   match !destination with
   | None -> ()
   | Some f ->
-      let oc = open_out f in
+      let oc = open_out (f ^ "." ^ Z.to_string passno) in
       print_program oc prog;
       close_out oc
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index 78458582..269ebb6f 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -3,6 +3,7 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -12,7 +13,7 @@
 
 (** Branch tunneling (optimization of branches to branches). *)
 
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
 Require Import AST.
 Require Import LTL.
 
@@ -21,10 +22,10 @@ Require Import LTL.
   so that they jump directly to the end of the branch sequence.
   For example:
 <<
-     L1: nop L2;                          L1: nop L3;
-     L2; nop L3;               becomes    L2: nop L3;
+     L1: if (cond) nop L2;                L1: nop L3;
+     L2: nop L3;               becomes    L2: nop L3;
      L3: instr;                           L3: instr;
-     L4: if (cond) goto L1;               L4: if (cond) goto L3;
+     L4: if (cond) goto L1;               L4: if (cond) nop L1;
 >>
   This optimization can be applied to several of our intermediate
   languages.  We choose to perform it on the [LTL] language,
@@ -37,11 +38,14 @@ Require Import LTL.
   dead code (as the "nop L3" in the example above).
 *)
 
-(** The naive implementation of branch tunneling would replace
-  any branch to a node [pc] by a branch to the node
-  [branch_target f pc], defined as follows:
+(** The implementation consists in two passes: the first pass
+    records the branch t of each "nop"
+    and the second pass replace any "nop" node to [pc]
+    by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
 <<
-  branch_target f pc = branch_target f pc'  if f(pc) = nop pc'
+  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
                      = pc                   otherwise
 >>
   However, this definition can fail to terminate if
@@ -50,56 +54,114 @@ Require Import LTL.
      L1: nop L1;
 >>
   or
-<<   L1: nop L2;
+<<
+     L1: nop L2;
      L2: nop L1;
 >>
   Coq warns us of this fact by not accepting the definition
-  of [branch_target] above.
+  of [branch_t] above.
+
+  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+  for every instruction [pc: nop pc'] in the function.
+
+  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+  we need to iterate until we reach a fixpoint.
+
+  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+  in order to help the proof.
+
+  A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
 
-  To handle this problem, we proceed in two passes.  The first pass
-  populates a union-find data structure, adding equalities [pc = pc']
-  for every instruction [pc: nop pc'] in the function. *)
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF. 
+Extract Constant branch_target => "Tunnelingaux.branch_target".
 
-Module U := UnionFind.UF(PTree).
+Local Open Scope error_monad_scope.
 
-Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t :=
-  match b with
-  | Lbranch s :: _ => U.union uf pc s
-  | _ => uf
+Definition get (td: UF) pc:node*Z :=
+  match td!pc with
+  | Some (t,d) => (t,Z.abs d)
+  | _ => (pc,0)
   end.
 
-Definition record_gotos (f: LTL.function) : U.t :=
-  PTree.fold record_goto f.(fn_code) U.empty.
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+    | None => OK tt
+    | Some (tpc, dpc) =>
+       let dpc := Z.abs dpc in
+       match bb with
+       | Lbranch s ::_ =>
+         let (ts, ds) := get td s in 
+         if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Lbranch")
+         else Error (msg "invalid skip of Lbranch")
+       | Lcond _ _ s1 s2 _ :: _ =>
+          let (ts1, ds1) := get td s1 in
+          let (ts2, ds2) := get td s2 in
+          if peq tpc ts1 then
+            if peq tpc ts2 then
+              if zlt ds1 dpc then
+                if zlt ds2 dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this block")
+      end
+   end.
+
+Definition check_code (td: UF) (c:code): res unit
+  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
 
 (** The second pass rewrites all LTL instructions, replacing every
-  successor [s] of every instruction by the canonical representative
+  successor [s] of every instruction by [t s], the canonical representative
   of its equivalence class in the union-find data structure. *)
 
-Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
   match i with
-  | Lbranch s => Lbranch (U.repr uf s)
+  | Lbranch s => Lbranch (t s)
   | Lcond cond args s1 s2 info =>
-      let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
+      let s1' := t s1 in let s2' := t s2 in
       if peq s1' s2'
       then Lbranch s1'
       else Lcond cond args s1' s2' info
-  | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
+  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
   | _ => i
   end.
 
-Definition tunnel_block (uf: U.t) (b: bblock) : bblock :=
-  List.map (tunnel_instr uf) b.
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+  List.map (tunnel_instr t) b.
 
-Definition tunnel_function (f: LTL.function) : LTL.function :=
-  let uf := record_gotos f in
-  mkfunction
-    (fn_sig f)
-    (fn_stacksize f)
-    (PTree.map1 (tunnel_block uf) (fn_code f))
-    (U.repr uf (fn_entrypoint f)).
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+  let td := branch_target f in
+  let c := (fn_code f) in
+  if check_included td c then 
+    do _ <- check_code td c ; OK
+    (mkfunction
+      (fn_sig f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_block td) c)
+      (td (fn_entrypoint f)))
+  else 
+   Error (msg "Some node of the union-find is not in the CFG")
+   .
 
-Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
-  transf_fundef tunnel_function f.
+Definition tunnel_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef tunnel_function f.
 
-Definition transf_program (p: LTL.program) : LTL.program :=
-  transform_program tunnel_fundef p.
+Definition transf_program (p: program) : res program :=
+  transform_partial_program tunnel_fundef p.
diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
new file mode 100644
index 00000000..87e6d303
--- /dev/null
+++ b/backend/Tunnelingaux.ml
@@ -0,0 +1,283 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [Tunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+
+let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
+let debug_flag = ref false
+let final_dump = false   (* set to true to have a more verbose debugging *)
+
+let debug fmt =
+  if !debug_flag then Printf.eprintf fmt
+  else Printf.ifprintf stderr fmt
+
+exception BugOnPC of int
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of LTL instructions *)
+  LBRANCH of node
+| LCOND of node * node
+| OTHER
+and node = {
+    lab : label;
+    mutable inst: simple_inst;
+    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+    mutable dist: int;
+    mutable tag: int
+  }
+
+(* type of the (simplified) CFG *)
+type cfg = {
+    nodes: (int, node) Hashtbl.t;
+    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+    mutable num_rems: int;
+    mutable iter_num: int (* number of iterations in elimination of conditions *)
+  }
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+  match n.inst with
+  | LCOND(s1,s2) ->
+     if n.link != n
+     then update c n
+     else if n.tag < c.iter_num then (
+       (* we try to change the condition ... *)
+       n.tag <- c.iter_num; (* ... but at most once by iteration *)
+       let ts1 = target c s1 in
+       let ts2 = target c s2 in
+       if ts1 == ts2 then (n.link <- ts1; ts1) else n
+     ) else n
+  | _ ->
+     if n.link != n
+     then update c n
+     else n
+and update c n =
+  let t = target c n.link in
+  n.link <- t; t
+
+let get_node c p =
+  let li = P.to_int p in
+  try
+    Hashtbl.find c.nodes li
+  with
+    Not_found ->
+      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
+      Hashtbl.add c.nodes li n;
+      n
+
+let set_branch c p s =
+  let li = P.to_int p in
+  try
+    let n = Hashtbl.find c.nodes li in
+    n.inst <- LBRANCH s;
+    n.link <- target c s
+  with
+    Not_found ->
+      let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in
+      Hashtbl.add c.nodes li n
+
+
+(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+let build_simplified_cfg c acc pc bb =
+  match bb with
+  | Lbranch s :: _ ->
+     let ns = get_node c s in
+     set_branch c pc ns;
+     acc
+  | Lcond (_, _, s1, s2, _) :: _ ->
+     c.num_rems <- c.num_rems + 1;
+     let ns1 = get_node c s1 in
+     let ns2 = get_node c s2 in
+     let npc = get_node c pc in
+     npc.inst <- LCOND(ns1, ns2);
+     npc::acc
+  | _ -> acc
+
+(* try to change a condition into a branch
+[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
+*)
+let try_change_cond c acc pc =
+  match pc.inst with
+  | LCOND(s1,s2) ->
+     let ts1 = target c s1 in
+     let ts2 = target c s2 in
+     if ts1 == ts2 then (
+       pc.link <- ts1;
+       c.num_rems <- c.num_rems - 1;
+       acc
+     ) else
+       pc::acc
+  | _  -> raise (BugOnPC (lab_i pc)) (* LCOND expected *)
+
+(* repeat [try_change_cond] until no condition is changed into a branch *)
+let rec repeat_change_cond c =
+  c.iter_num <- c.iter_num + 1;
+  debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
+  let old =  c.num_rems in
+  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+  let curr = c.num_rems in
+  let continue =
+    match limit_tunneling with
+    | Some n -> curr < old && c.iter_num < n
+    | None -> curr < old
+  in
+  if continue
+  then repeat_change_cond c
+
+
+(* compute the final distance of each nop nodes to its target *)
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+  if n.dist = undef_dist
+  then (
+    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+    n.dist <-
+      (match n.inst with
+       | OTHER -> 0
+       | LBRANCH p -> 1 + dist p
+       | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+    n.dist
+  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+    else n.dist
+
+let final_export f c =
+  let count = ref 0 in
+  let filter_nops_init_dist _ n acc =
+    let tn = target c n in
+    if tn == n
+    then (
+      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+      acc
+    ) else (
+      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+      count := !count+1;
+      n::acc
+    )
+  in
+  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+  debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count;
+  res
+
+(*********************************************)
+(*** START: printing and debugging functions *)
+
+let string_of_labeli nodes ipc =
+  try
+    let pc = Hashtbl.find nodes ipc in
+    if pc.link == pc
+    then Printf.sprintf "(Target@%d)" (dist pc)
+    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+  with
+    Not_found -> ""
+
+let print_bblock c println (pc, bb) =
+  match bb with
+  | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+  | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+  | _ -> debug "%d " pc; true
+
+
+let print_cfg f c  =
+  let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in
+  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+  let ep = P.to_int f.fn_entrypoint in
+  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+  let println = Array.fold_left (print_bblock c) false a in
+  (if println then debug "\n");debug "remaining cond:";
+  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+  debug "\n"
+
+(*************************************************************)
+(* Copy-paste of the extracted code of the verifier          *)
+(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+let get td pc =
+  match PTree.get pc td with
+  | Some p -> let (t0, d) = p in (t0, d)
+  | None -> (pc, Z.of_uint 0)
+
+let check_bblock td pc bb =
+  match PTree.get pc td with
+  | Some p ->
+    let (tpc, dpc) = p in
+    let dpc0 = dpc in
+    (match bb with
+     | [] ->
+       raise (BugOnPC (P.to_int pc))
+     | i :: _ ->
+       (match i with
+        | Lbranch s ->
+          let (ts, ds) = get td s in
+          if peq tpc ts
+          then if zlt ds dpc0
+               then ()
+               else raise (BugOnPC (P.to_int pc))
+          else raise (BugOnPC (P.to_int pc))
+        | Lcond (_, _, s1, s2, _) ->
+          let (ts1, ds1) = get td s1 in
+          let (ts2, ds2) = get td s2 in
+          if peq tpc ts1
+          then if peq tpc ts2
+               then if zlt ds1 dpc0
+                    then if zlt ds2 dpc0
+                         then ()
+                         else raise (BugOnPC (P.to_int pc))
+                    else raise (BugOnPC (P.to_int pc))
+               else raise (BugOnPC (P.to_int pc))
+          else raise (BugOnPC (P.to_int pc))
+        | _ ->
+          raise (BugOnPC (P.to_int pc))))
+  | None -> ()
+
+(** val check_code : coq_UF -> code -> unit res **)
+
+let check_code td c =
+  PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (())
+
+(*** END: copy-paste & debugging functions *******)
+
+let branch_target f =
+  debug "* Tunneling.branch_target: starting on a new function\n";
+  if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+  c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
+  repeat_change_cond c;
+  let res = final_export f c in
+  if !debug_flag then (
+    try
+      check_code res f.fn_code;
+      if final_dump then print_cfg f c;
+    with e -> (
+      print_cfg f c;
+      check_code res f.fn_code
+    )
+  );
+  res
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index cdf6c800..126b7b87 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -3,6 +3,7 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -12,131 +13,163 @@
 
 (** Correctness proof for the branch tunneling optimization. *)
 
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
 Require Import AST Linking.
 Require Import Values Memory Events Globalenvs Smallstep.
 Require Import Op Locations LTL.
 Require Import Tunneling.
 
-Definition match_prog (p tp: program) :=
-  match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp.
+Local Open Scope nat.
 
-Lemma transf_program_match:
-  forall p, match_prog p (transf_program p).
+
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+   ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td: UF) (c: code): 
+  check_included_spec c td (check_included td c).
+Proof. 
+  apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_included_spec.
+  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+  intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+     : target_bounds target bound pc ob
+ | TB_branch s bb
+     (EQ: target pc = target s)
+     (DECREASE: bound s < bound pc)
+     : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+     (EQ1: target pc = target s1)
+     (EQ2: target pc = target s2)
+     (DEC1: bound s1 < bound pc)
+     (DEC2: bound s2 < bound pc)
+     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
 Proof.
-  intros. eapply match_transform_program; eauto.
+  unfold target, get. intros H; rewrite H; auto.
 Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
 
-(** * Properties of the branch map computed using union-find. *)
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; omega || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
 
-(** A variant of [record_goto] that also incrementally computes a measure [f: node -> nat]
-  counting the number of [Lnop] instructions starting at a given [pc] that were eliminated. *)
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
 
-Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
-  fun x => if peq (U.repr u s) pc then f x
-           else if peq (U.repr u x) pc then (f x + f s + 1)%nat
-           else f x.
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+  check_bblock td pc bb = OK tt -> 
+  target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+  unfold check_bblock, bound.
+  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+  destruct bb as [|[ ] bb]; simpl; try congruence.
+  + destruct (get td s) as (ts, ds) eqn:Hs.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_branch.
+    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_cond.
+    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
 
-Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) :=
-  match b with
-  | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f)
-  | _ => uf
-  end.
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
 
-Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop :=
-  forall pc,
-  match c!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat
-  | _ =>
-      U.repr (fst uf) pc = pc
-  end.
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_code_spec.
+  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+  intros X; inversion X; subst.
+  apply check_bblock_correct; auto.
+Qed.
 
-Lemma record_gotos'_correct:
-  forall c,
-  branch_map_correct c (PTree.fold record_goto' c (U.empty, fun (x: node) => O)).
+Theorem branch_target_bounds:
+  forall f tf pc, 
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
 Proof.
-  intros.
-  apply PTree_Properties.fold_rec with (P := fun c uf => branch_map_correct c uf).
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros _.
+  destruct ((fn_code f)!pc) eqn:X.
+  - exploit check_code_correct; eauto.
+  - exploit check_included_correct; eauto.
+    congruence.
+Qed.
 
-- (* extensionality *)
-  intros. red; intros. rewrite <- H. apply H0.
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf -> 
+  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros X; inversion X; clear X; subst.
+  simpl. rewrite PTree.gmap1. auto.
+Qed.
 
-- (* base case *)
-  red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty.
+Lemma tunnel_fundef_Internal:
+  forall f tf, tunnel_fundef (Internal f) = OK tf
+  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
+Proof.
+  intros f tf; simpl.
+  destruct (tunnel_function f) eqn:X; simpl; try congruence.
+  intros EQ; inversion EQ.
+  eexists; split; eauto.
+Qed.
 
-- (* inductive case *)
-  intros m uf pc bb; intros. destruct uf as [u f].
-  assert (PC: U.repr u pc = pc).
-    generalize (H1 pc). rewrite H. auto.
-  assert (record_goto' (u, f) pc bb = (u, f)
-          \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)).
-    unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto.
-  destruct H2 as [B | [s [bb' [EQ B]]]].
-
-+ (* u and f are unchanged *)
-  rewrite B.
-  red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'.
-  destruct bb; auto. destruct i; auto.
-  apply H1.
-
-+ (* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *)
-  rewrite B.
-  red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ.
-
-* (* The new instruction *)
-  rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3.
-  unfold measure_edge. destruct (peq (U.repr u s) pc). auto. right. split. auto.
-  rewrite PC. rewrite peq_true. omega.
-
-* (* An old instruction *)
-  assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc').
-  { intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. }
-  generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto.
-  intros [P | [P Q]]. left; auto. right.
-  split. apply U.sameclass_union_2. auto.
-  unfold measure_edge. destruct (peq (U.repr u s) pc). auto.
-  rewrite P. destruct (peq (U.repr u s0) pc). omega. auto.
-Qed.
-
-Definition record_gotos' (f: function) :=
-  PTree.fold record_goto' f.(fn_code) (U.empty, fun (x: node) => O).
-
-Lemma record_gotos_gotos':
-  forall f, fst (record_gotos' f) = record_gotos f.
-Proof.
-  intros. unfold record_gotos', record_gotos.
-  repeat rewrite PTree.fold_spec.
-  generalize (PTree.elements (fn_code f)) (U.empty) (fun _ : node => O).
-  induction l; intros; simpl.
-  auto.
-  unfold record_goto' at 2. unfold record_goto at 2.
-  destruct (snd a). apply IHl. destruct i; apply IHl.
-Qed.
-
-Definition branch_target (f: function) (pc: node) : node :=
-  U.repr (record_gotos f) pc.
-
-Definition count_gotos (f: function) (pc: node) : nat :=
-  snd (record_gotos' f) pc.
-
-Theorem record_gotos_correct:
-  forall f pc,
-  match f.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-       branch_target f pc = pc \/
-       (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat
-  | _ => branch_target f pc = pc
-  end.
+Lemma tunnel_fundef_External:
+  forall tf ef, tunnel_fundef (External ef) = OK tf
+  -> tf = External ef.
 Proof.
-  intros.
-  generalize (record_gotos'_correct f.(fn_code) pc). simpl.
-  fold (record_gotos' f). unfold branch_map_correct, branch_target, count_gotos.
-  rewrite record_gotos_gotos'. auto.
+  intros tf ef; simpl. intros H; inversion H; auto.
 Qed.
 
 (** * Preservation of semantics *)
 
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
 Section PRESERVATION.
 
 Variables prog tprog: program.
@@ -145,32 +178,65 @@ Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
 Lemma functions_translated:
-  forall v f,
+  forall (v: val) (f: fundef),
   Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_transf TRANSL).
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
 
 Lemma function_ptr_translated:
   forall v f,
   Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_ptr_transf TRANSL).
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
 
-Lemma symbols_preserved:
-  forall id,
-  Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (Genv.find_symbol_transf TRANSL).
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
 
 Lemma senv_preserved:
   Senv.equiv ge tge.
-Proof (Genv.senv_transf TRANSL).
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
 
 Lemma sig_preserved:
-  forall f, funsig (tunnel_fundef f) = funsig f.
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
 Proof.
-  destruct f; reflexivity.
+  intros. destruct f.
+  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
+    destruct (check_included _ _); try congruence.
+    monadInv EQ. simpl; auto.
+  - simpl in H. monadInv H. reflexivity.
 Qed.
 
+Lemma fn_stacksize_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+
 (** The proof of semantic preservation is a simulation argument
   based on diagrams of the following form:
 <<
@@ -185,7 +251,7 @@ Qed.
   between states [st1] and [st2], as well as the postcondition between
   [st1'] and [st2'].  One transition in the source code (left) can correspond
   to zero or one transition in the transformed code (right).  The
-  "zero transition" case occurs when executing a [Lgoto] instruction
+  "zero transition" case occurs when executing a [Lnop] instruction
   in the source code that has been removed by tunneling.
 
   In the definition of [match_states], what changes between the original and
@@ -194,52 +260,52 @@ Qed.
   and memory states, since some [Vundef] values can become more defined
   as a consequence of eliminating useless [Lcond] instructions. *)
 
-Definition tunneled_block (f: function) (b: bblock) :=
-  tunnel_block (record_gotos f) b.
-
-Definition tunneled_code (f: function) :=
-  PTree.map1 (tunneled_block f) (fn_code f).
-
 Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
   forall l, Val.lessdef (ls1 l) (ls2 l).
 
 Inductive match_stackframes: stackframe -> stackframe -> Prop :=
   | match_stackframes_intro:
-      forall f sp ls0 bb tls0,
+      forall f tf sp ls0 bb tls0,
       locmap_lessdef ls0 tls0 ->
+      tunnel_function f = OK tf ->
       match_stackframes
          (Stackframe f sp ls0 bb)
-         (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)).
+         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
 
 Inductive match_states: state -> state -> Prop :=
   | match_states_intro:
-      forall s f sp pc ls m ts tls tm
+      forall s f tf sp pc ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
       match_states (State s f sp pc ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+                   (State ts tf sp (branch_target f pc) tls tm)
   | match_states_block:
-      forall s f sp bb ls m ts tls tm
+      forall s f tf sp bb ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
       match_states (Block s f sp bb ls m)
-                   (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm)
+                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
   | match_states_interm:
-      forall s f sp pc bb ls m ts tls tm
+      forall s f tf sp pc i bb ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Block s f sp (Lbranch pc :: bb) ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+        (MEM: Mem.extends m tm)
+        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp (i :: bb) ls m)
+                   (State ts tf sp pc tls tm)
   | match_states_call:
-      forall s f ls m ts tls tm
+      forall s f tf ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_fundef f = OK tf),
       match_states (Callstate s f ls m)
-                   (Callstate ts (tunnel_fundef f) tls tm)
+                   (Callstate ts tf tls tm)
   | match_states_return:
       forall s ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
@@ -289,22 +355,6 @@ Proof.
   induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
 Qed.
 
-(*
-Lemma locmap_undef_lessdef:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2).
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_lessdef_1:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2.
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto. 
-Qed.
-*)
-
 Lemma locmap_getpair_lessdef:
   forall p ls1 ls2,
   locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
@@ -348,15 +398,16 @@ Lemma find_function_translated:
   forall ros ls tls fd,
   locmap_lessdef ls tls ->
   find_function ge ros ls = Some fd ->
-  find_function tge ros tls = Some (tunnel_fundef fd).
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
 Proof.
   intros. destruct ros; simpl in *.
 - assert (E: tls (R m) = ls (R m)).
   { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
     generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
-  rewrite E. apply functions_translated; auto.
+  rewrite E. exploit functions_translated; eauto.
 - rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
-  apply function_ptr_translated; auto.
+  exploit function_ptr_translated; eauto.
+  intros (tf & X1 & X2). exists tf; intuition.
 Qed.
 
 Lemma call_regs_lessdef:
@@ -383,11 +434,12 @@ Qed.
 
 Definition measure (st: state) : nat :=
   match st with
-  | State s f sp pc ls m => (count_gotos f pc * 2)%nat
-  | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
-  | Block s f sp bb ls m => 0%nat
-  | Callstate s f ls m => 0%nat
-  | Returnstate s ls m => 0%nat
+  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+  | Block s f sp bb ls m => 0
+  | Callstate s f ls m => 0
+  | Returnstate s ls m => 0
   end.
 
 Lemma match_parent_locset:
@@ -406,24 +458,23 @@ Lemma tunnel_step_correct:
   (exists st2', step tge st1' t st2' /\ match_states st2 st2')
   \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
 Proof.
-  induction 1; intros; try inv MS.
+  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
 
 - (* entering a block *)
-  assert (DEFAULT: branch_target f pc = pc ->
-    (exists st2' : state,
-     step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2'
-     /\ match_states (Block s f sp bb rs m) st2')).
-  { intros. rewrite H0. econstructor; split.
-    econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto.
-    econstructor; eauto. }
-
-  generalize (record_gotos_correct f pc). rewrite H.
-  destruct bb; auto. destruct i; auto.
-  intros [A | [B C]]. auto.
-  right. split. simpl. omega.
-  split. auto.
-  rewrite B. econstructor; eauto.
-
+  exploit (branch_target_bounds f tf pc); eauto.
+  rewrite H. intros X; inversion X.
+  + (* TB_default *) 
+    rewrite TB; left. econstructor; split.
+    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+    * econstructor; eauto.
+  + (* FT_branch *)
+    simpl; right.
+    rewrite EQ; repeat (econstructor; omega || eauto).
+  + (* FT_cond *)
+    simpl; right.
+    repeat (econstructor; omega || eauto); simpl.
+    apply Nat.max_case; omega.
+    destruct (peq _ _); try congruence.
 - (* Lop *)
   exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
   intros (tv & EV & LD).
@@ -485,20 +536,25 @@ Proof.
   eauto. eauto.
   econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lcall *)
-  left; simpl; econstructor; split.
-  eapply exec_Lcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto.
-  rewrite sig_preserved. auto.
-  econstructor; eauto.
-  constructor; auto.
-  constructor; auto.
+  left; simpl.
+  exploit find_function_translated; eauto.
+  intros (tfd & Htfd & FIND).
+  econstructor; split.
+  + eapply exec_Lcall; eauto.
+    erewrite sig_preserved; eauto.
+  + econstructor; eauto.
+    constructor; auto.
+    constructor; auto.
 - (* Ltailcall *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
+  exploit find_function_translated. 2: eauto.
+  { eauto using return_regs_lessdef, match_parent_locset. }
+  intros (tfd & Htfd & FIND).
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
   left; simpl; econstructor; split.
-  eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset.
-  apply sig_preserved.
-  econstructor; eauto using return_regs_lessdef, match_parent_locset.
+  + eapply exec_Ltailcall; eauto.
+    * eapply sig_preserved; eauto.
+    * erewrite fn_stacksize_preserved; eauto.
+  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
 - (* Lbuiltin *)
   exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
   exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
@@ -513,45 +569,58 @@ Proof.
   fold (branch_target f pc). econstructor; eauto.
 - (* Lbranch (eliminated) *)
   right; split. simpl. omega. split. auto. constructor; auto.
-
-- (* Lcond *)
-  simpl tunneled_block.
-  set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2).
-  destruct (peq s1 s2).
-+ left; econstructor; split.
-  eapply exec_Lbranch. 
-  destruct b.
-* constructor; eauto using locmap_undef_regs_lessdef_1.
-* rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
-+ left; econstructor; split.
-  eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
-  destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
-
+- (* Lcond (preserved) *)
+  simpl; left; destruct (peq _ _) eqn: EQ.
+  + econstructor; split.
+    eapply exec_Lbranch. 
+    destruct b.
+    * constructor; eauto using locmap_undef_regs_lessdef_1.
+    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+  + econstructor; split.
+    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+  destruct (peq _ _) eqn: EQ; try inv H1.
+  right; split; simpl. 
+  + destruct b.
+    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega.
+    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega.
+  + destruct b.
+    -- repeat (constructor; auto).
+    -- rewrite e; repeat (constructor; auto).
 - (* Ljumptable *)
   assert (tls (R arg) = Vint n).
   { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
   left; simpl; econstructor; split.
   eapply exec_Ljumptable.
-  eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto.
+  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
   econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lreturn *)
   exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
   left; simpl; econstructor; split.
-  eapply exec_Lreturn; eauto.
-  constructor; eauto using return_regs_lessdef, match_parent_locset.
+  + eapply exec_Lreturn; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + constructor; eauto using return_regs_lessdef, match_parent_locset.
 - (* internal function *)
+  exploit tunnel_fundef_Internal; eauto.
+  intros (tf' & TF' & ITF). subst.
   exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros (tm' & ALLOC & MEM'). 
-  left; simpl; econstructor; split.
-  eapply exec_function_internal; eauto.
-  simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+  intros (tm' & ALLOC & MEM').
+  left; simpl.
+  econstructor; split.
+  + eapply exec_function_internal; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + simpl.
+    erewrite (fn_entrypoint_preserved f tf'); auto.
+    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
 - (* external function *)
   exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
   intros (tvres & tm' & A & B & C & D).
   left; simpl; econstructor; split.
-  eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+  + erewrite (tunnel_fundef_External tf ef); eauto.
+    eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
 - (* return *)
   inv STK. inv H1.
   left; econstructor; split.
@@ -564,14 +633,15 @@ Lemma transf_initial_states:
   exists st2, initial_state tprog st2 /\ match_states st1 st2.
 Proof.
   intros. inversion H.
-  exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split.
+  exploit function_ptr_translated; eauto.
+  intros (tf & Htf & Hf).
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
   econstructor; eauto.
-  apply (Genv.init_mem_transf TRANSL); auto.
+  apply (Genv.init_mem_transf_partial TRANSL); auto.
   rewrite (match_program_main TRANSL).
   rewrite symbols_preserved. eauto.
-  apply function_ptr_translated; auto.
-  rewrite <- H3. apply sig_preserved.
-  constructor. constructor. red; simpl; auto. apply Mem.extends_refl.
+  rewrite <- H3. apply sig_preserved. auto.
+  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
 Qed.
 
 Lemma transf_final_states:
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index 779e7bb9..f1a46baa 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -2069,7 +2069,6 @@ Definition divfs := binop_single Float32.div.
 Lemma divfs_sound:
   forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divfs v w) (divfs x y).
 Proof (binop_single_sound Float32.div).
-
 (** Conversions *)
 
 Definition zero_ext (nbits: Z) (v: aval) :=
@@ -2483,6 +2482,468 @@ Proof.
   destruct 1; simpl; auto with va.
 Qed.
 
+
+(* Extensions for KVX and Risc-V *)
+
+Definition intoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_int f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_intu f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_long f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_longu f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_long f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_longu f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma intoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intoffloat v)) (intoffloat_total x).
+Proof.
+  unfold Val.intoffloat, intoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuoffloat v)) (intuoffloat_total x).
+Proof.
+  unfold Val.intoffloat, intoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intofsingle v)) (intofsingle_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuofsingle v)) (intuofsingle_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma singleofint_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleofint v)) (singleofint x).
+Proof.
+  unfold Val.singleofint, singleofint; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma singleofintu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleofintu v)) (singleofintu x).
+Proof.
+  unfold Val.singleofintu, singleofintu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma longoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longoffloat v)) (longoffloat_total x).
+Proof.
+  unfold Val.longoffloat, longoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuoffloat v)) (longuoffloat_total x).
+Proof.
+  unfold Val.longoffloat, longoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longofsingle v)) (longofsingle_total x).
+Proof.
+  unfold Val.longofsingle, longofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuofsingle v)) (longuofsingle_total x).
+Proof.
+  unfold Val.longofsingle, longofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma singleoflong_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleoflong v)) (singleoflong x).
+Proof.
+  unfold Val.singleoflong, singleoflong; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma singleoflongu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleoflongu v)) (singleoflongu x).
+Proof.
+  unfold Val.singleoflongu, singleoflongu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatoflong_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatoflong v)) (floatoflong x).
+Proof.
+  unfold Val.floatoflong, floatoflong; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatoflongu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatoflongu v)) (floatoflongu x).
+Proof.
+  unfold Val.floatoflongu, floatoflongu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatofint_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatofint v)) (floatofint x).
+Proof.
+  unfold Val.floatofint, floatofint; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatofintu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatofintu v)) (floatofintu x).
+Proof.
+  unfold Val.floatofintu, floatofintu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+
+Definition divs_total (v w: aval) := 
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone
+      then ntop
+      else I (Int.divs i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divs_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divs v w)) (divs_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition divu_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+       then ntop
+      else I (Int.divu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divu v w)) (divu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct Int.eq eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct Int.eq eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition mods_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone
+      then ntop
+      else I (Int.mods i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma mods_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.mods v w)) (mods_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition modu_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      then ntop
+      else I (Int.modu i1 i2)
+  | I i2, _ => uns (provenance v) (usize i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modu v w)) (modu_total x y).
+Proof.
+  assert (UNS: forall i j, j <> Int.zero -> is_uns (usize j) (Int.modu i j)).
+  {
+    intros. apply is_uns_mon with (usize (Int.modu i j)).
+    { apply is_uns_usize.
+    }
+    unfold usize, Int.size.
+    apply Zsize_monotone.
+    generalize (Int.unsigned_range_2 j); intros RANGE.
+    assert (Int.unsigned j <> 0).
+    { red; intros; elim H. rewrite <- (Int.repr_unsigned j). rewrite H0. auto. }
+    exploit (Z_mod_lt (Int.unsigned i) (Int.unsigned j)). omega. intros MOD.
+    unfold Int.modu. rewrite Int.unsigned_repr. omega. omega.
+  }
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct Int.eq eqn:E; unfold ntop; cbn; auto with va.
+  }
+  all: try discriminate.
+  all: unfold ntop2; auto with va.
+  all: try (destruct Int.eq eqn:E; cbn; unfold ntop2; auto with va; fail).
+  all: try apply vmatch_uns_undef.
+  
+  all:
+    generalize (Int.eq_spec i0 Int.zero);
+    destruct (Int.eq i0 Int.zero);
+    cbn;
+    intro.
+  all: try apply vmatch_uns_undef.
+  all: apply vmatch_uns; auto.
+Qed.
+
+
+Lemma shrx_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrx v w)) (shrx x y).
+Proof.
+  intros until y. intros HX HY.
+  inv HX; inv HY; cbn.
+  all: unfold ntop1; auto with va.
+  all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va.
+Qed.
+
+
+Definition divls_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone
+      then ntop
+      else L (Int64.divs i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divls_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divls v w)) (divls_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+Definition divlu_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+       then ntop
+      else L (Int64.divu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divlu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divlu v w)) (divlu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct Int64.eq eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+
+Definition modls_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone
+      then ntop
+      else L (Int64.mods i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modls_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modls v w)) (modls_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+
+Definition modlu_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      then ntop
+      else L (Int64.modu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modlu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modlu v w)) (modlu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct Int64.eq eqn:E; cbn; unfold ntop2, ntop; auto with va.
+Qed.
+
+Lemma shrxl_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrxl v w)) (shrxl x y).
+Proof.
+  intros until y. intros HX HY.
+  inv HX; inv HY; cbn.
+  all: unfold ntop1; auto with va.
+  all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va.
+Qed.
+
 (** Comparisons and variation intervals *)
 
 Definition cmp_intv (c: comparison) (i: Z * Z) (n: Z) : abool :=
@@ -4734,6 +5195,26 @@ Hint Resolve cnot_sound symbol_address_sound
        longoffloat_sound longuoffloat_sound floatoflong_sound floatoflongu_sound
        longofsingle_sound longuofsingle_sound singleoflong_sound singleoflongu_sound
        longofwords_sound loword_sound hiword_sound
+       intoffloat_total_sound
+       intuoffloat_total_sound
+       intofsingle_total_sound
+       intuofsingle_total_sound
+       singleofint_total_sound
+       singleofintu_total_sound
+       longoffloat_total_sound
+       longuoffloat_total_sound
+       longofsingle_total_sound
+       longuofsingle_total_sound
+       singleoflong_total_sound
+       singleoflongu_total_sound
+       floatoflong_total_sound
+       floatoflongu_total_sound
+       floatofint_total_sound
+       floatofintu_total_sound
+       divu_total_sound divs_total_sound
+       modu_total_sound mods_total_sound shrx_total_sound
+       divlu_total_sound divls_total_sound
+       modlu_total_sound modls_total_sound shrxl_total_sound
        cmpu_bool_sound cmp_bool_sound cmplu_bool_sound cmpl_bool_sound
        cmpf_bool_sound cmpfs_bool_sound
        maskzero_sound : va.
diff --git a/common/Memory.v b/common/Memory.v
index cd8a2001..65f36966 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -1322,6 +1322,18 @@ Proof.
   eapply load_store_same.
   eassumption.
 Qed.
+
+Theorem storev_preserv_valid (b : block) (ofs: Z): valid_pointer m1 b ofs = valid_pointer m2 b ofs.
+Proof.
+  unfold storev in STORE.
+  cut (valid_pointer m1 b ofs = true <-> valid_pointer m2 b ofs = true).
+  { destruct (valid_pointer _ _ _), (valid_pointer _ _ _); intuition congruence. }
+  destruct addr; try congruence.
+  rewrite! valid_pointer_valid_access. split.
+  - intros; eapply store_valid_access_1; eauto.
+  - intros; eapply store_valid_access_2; eauto.
+Qed.
+
 End STOREV.
 
 Lemma load_store_overlap:
diff --git a/common/Values.v b/common/Values.v
index 6401ba52..41138e8e 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -2706,3 +2706,24 @@ Proof.
   unfold compose_meminj; rewrite H1; rewrite H3; eauto.
   rewrite Ptrofs.add_assoc. decEq. unfold Ptrofs.add. apply Ptrofs.eqm_samerepr. auto with ints.
 Qed.
+
+
+(** Particular cases of extensionality lemma *)
+
+Lemma cmpu_bool_valid_pointer_eq vptr1 vptr2 c v1 v2:
+   (forall (b : block) (z : Z), vptr1 b z = vptr2 b z) ->
+   Val.cmpu_bool vptr1 c v1 v2 = Val.cmpu_bool vptr2 c v1 v2.
+Proof.
+  intros EQ; unfold Val.cmpu_bool; destruct v1; try congruence;
+  destruct v2; try congruence;
+  rewrite !EQ; auto.
+Qed.
+
+Lemma cmplu_bool_valid_pointer_eq  vptr1 vptr2 c v1 v2:
+   (forall (b : block) (z : Z), vptr1 b z = vptr2 b z) ->
+   Val.cmplu_bool vptr1 c v1 v2 = Val.cmplu_bool vptr2 c v1 v2.
+Proof.
+  intros EQ; unfold Val.cmplu_bool; destruct v1; try congruence;
+  destruct v2; try congruence;
+  rewrite !EQ; auto.
+Qed.
diff --git a/configure b/configure
index 801f182d..415dbb03 100755
--- a/configure
+++ b/configure
@@ -815,7 +815,7 @@ fi
 
 if [ "$arch" = "kvx" ]; then
 cat >> Makefile.config <<EOF
-ARCHDIRS=$arch $arch/lib $arch/abstractbb $arch/abstractbb/Impure
+ARCHDIRS=$arch $arch/lib $arch/abstractbb
 EXECUTE=kvx-cluster --syscall=libstd_scalls.so --
 CFLAGS= -D __KVX_COS__
 SIMU=kvx-cluster -- 
@@ -823,8 +823,7 @@ BACKENDLIB=Machblock.v Machblockgen.v Machblockgenproof.v\\
     Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asmvliw.v Asmblockprops.v\\
     ForwardSimulationBlock.v PostpassScheduling.v PostpassSchedulingproof.v\\
     Asmblockdeps.v DecBoolOps.v Chunks.v Peephole.v ExtValues.v ExtFloats.v\\
-    AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v\\
-    ImpConfig.v ImpCore.v ImpExtern.v ImpHCons.v ImpIO.v ImpLoops.v ImpMonads.v ImpPrelude.v
+    AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v
 EOF
 fi
 
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 989a7096..a5b47264 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -45,8 +45,15 @@ let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops
 let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *)
 let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *)
 
+(* Scheduling *)
+let option_mtune = ref ""
+                
+let option_fprepass = ref false
+let option_fprepass_sched = ref "list"
+
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
+
 let option_fifconversion = ref true
 let option_Obranchless = ref false
 let option_falignfunctions = ref (None: int option)
diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand
index 00db9b36..a751b232 100644
--- a/driver/Compiler.vexpand
+++ b/driver/Compiler.vexpand
@@ -54,7 +54,7 @@ Require Import Compopts.
 Parameter print_Clight: Clight.program -> unit.
 Parameter print_Cminor: Cminor.program -> unit.
 Parameter print_RTL: Z -> RTL.program -> unit.
-Parameter print_LTL: LTL.program -> unit.
+Parameter print_LTL: Z -> LTL.program -> unit.
 Parameter print_Mach: Mach.program -> unit.
 
 Local Open Scope string_scope.
@@ -298,6 +298,14 @@ EXPAND_ASM_SEMANTICS
     eapply RTLgenproof.transf_program_correct; eassumption.
 EXPAND_RTL_FORWARD_SIMULATIONS
   eapply compose_forward_simulations.
+    eapply RTLpathLivegenproof.transf_program_correct; eassumption.
+  pose proof RTLpathLivegenproof.all_fundef_liveness_ok as X.
+    refine (modusponens _ _ (X _ _ _) _); eauto. intro.
+  eapply compose_forward_simulations.
+    eapply RTLpathSchedulerproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
+    eapply RTLpathproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
     eapply Allocationproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply Tunnelingproof.transf_program_correct; eassumption.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 07edf2d1..7d23c695 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -206,7 +206,11 @@ Processing options:
   -fcse3-refine         Refine CSE3 invariants by descending iteration [on]
   -fmove-loop-invariants Perform loop-invariant code motion [off]
   -fredundancy   Perform redundancy elimination [on]
-  -fpostpass     Perform postpass scheduling (only for K1 architecture) [on]
+  -mtune=         Type of CPU (for scheduling on some architectures)
+  -fprepass      Perform prepass scheduling (only on some architectures) [off]
+  -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
+                   (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+  -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
   -fpredict      Insert static branch prediction information [on]
@@ -416,6 +420,8 @@ let cmdline_actions =
   @ f_opt "cse3-refine" option_fcse3_refine
   @ f_opt "move-loop-invariants" option_fmove_loop_invariants
   @ f_opt "redundancy" option_fredundancy
+  @ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
+  @ f_opt "prepass" option_fprepass
   @ f_opt "postpass" option_fpostpass
   @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ]
   @ f_opt "predict" option_fpredict
@@ -423,6 +429,7 @@ let cmdline_actions =
   @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
   @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
   @ f_opt "tracelinearize" option_ftracelinearize
+  @ f_opt_str "prepass" option_fprepass option_fprepass_sched
   @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
   @ f_opt "inline" option_finline
   @ f_opt "inline-functions-called-once" option_finline_functions_called_once
diff --git a/kvx/CSE2deps.v b/kvx/CSE2deps.v
index b4b80e2f..c0deacf0 100644
--- a/kvx/CSE2deps.v
+++ b/kvx/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/kvx/CSE2depsproof.v b/kvx/CSE2depsproof.v
index 6c584450..a5f7b317 100644
--- a/kvx/CSE2depsproof.v
+++ b/kvx/CSE2depsproof.v
@@ -123,17 +123,24 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
-  destruct args as [ | base [ | ]]. 1,3: discriminate.
-  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
-  cbn in OVERLAP.
-  destruct (peq base base'). 2: discriminate.
-  subst base'.
-  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
-  2: discriminate.
-  cbn in *.
-  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+  - (* Aindexed / Aindexed *)
+    destruct args as [ | base [ | ]]. 1,3: discriminate.
+    destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+    cbn in OVERLAP.
+    destruct (peq base base'). 2: discriminate.
+    subst base'.
+    destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+    2: discriminate.
+    cbn in *.
+    eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  - (* Ainstack / Ainstack *)
+    destruct args. 2: discriminate.
+    destruct args'. 2: discriminate.
+    cbn in OVERLAP.
+    destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+    2: discriminate.
+    cbn in *.
+    eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/kvx/Op.v b/kvx/Op.v
index e2ffa3e5..794bc87b 100644
--- a/kvx/Op.v
+++ b/kvx/Op.v
@@ -1238,6 +1238,26 @@ Proof.
       unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op; simpl; try congruence.
+  - intros MEM; destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intros MEM; destruct c0; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intros MEM; destruct c0; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intros MEM; destruct c0; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
diff --git a/kvx/OpWeights.ml b/kvx/OpWeights.ml
new file mode 100644
index 00000000..23c2e5d3
--- /dev/null
+++ b/kvx/OpWeights.ml
@@ -0,0 +1,115 @@
+open Op;;
+open PostpassSchedulingOracle;;
+open PrepassSchedulingOracleDeps;;
+
+module KV3 =
+  struct
+let resource_bounds = PostpassSchedulingOracle.resource_bounds;;
+let nr_non_pipelined_units = 0;;
+
+let rec nlist_rec x l = function
+  | 0 -> l
+  | n when n > 0 -> nlist_rec x (x :: l) (n-1)
+  | _ -> failwith "nlist_rec";;
+let nlist x n = nlist_rec x [] n;;
+
+let bogus_register = Machregs.R0;;
+let bogus_inputs n = nlist bogus_register n;;
+
+let insns_of_op (op : operation) (nargs : int) =
+  match Asmblockgen.transl_op op
+          (bogus_inputs nargs) bogus_register [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_op"
+  | Errors.OK insns -> insns;;
+
+let insn_of_op op nargs =
+  match insns_of_op op nargs with
+  | [] -> failwith "OpWeights.insn_of_op"
+  | h::_ -> h;;
+
+let insns_of_cond (cond : condition) (nargs : int) =
+  match Asmblockgen.transl_cond_op cond
+           Asmvliw.GPR0 (bogus_inputs nargs) [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_cond"
+  | Errors.OK insns -> insns;;
+
+let insn_of_cond cond nargs =
+  match insns_of_cond cond nargs with
+  | [] -> failwith "OpWeights.insn_of_cond"
+  | h::_ -> h;;
+
+let insns_of_load trap chunk addressing (nargs : int) =
+  match Asmblockgen.transl_load trap chunk addressing
+          (bogus_inputs nargs) bogus_register [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_load"
+  | Errors.OK insns -> insns;;
+
+let insn_of_load trap chunk addressing nargs =
+  match insns_of_load trap chunk addressing nargs with
+  | [] -> failwith "OpWeights.insn_of_load"
+  | h::_ -> h;;
+
+let insns_of_store chunk addressing (nargs : int) =
+  match Asmblockgen.transl_store chunk addressing
+          (bogus_inputs nargs) bogus_register [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_store"
+  | Errors.OK insns -> insns;;
+
+let insn_of_store chunk addressing nargs =
+  match insns_of_store chunk addressing nargs with
+  | [] -> failwith "OpWeights.insn_of_store"
+  | h::_ -> h;;
+
+let latency_of_op (op : operation) (nargs : int) =
+  let insn = insn_of_op op nargs in
+  let record = basic_rec insn in
+  let latency = real_inst_to_latency record.inst in
+  latency;;
+
+let resources_of_op (op : operation) (nargs : int) =
+  let insn = insn_of_op op nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+  
+let non_pipelined_resources_of_op (op : operation) (nargs : int) = [| |]
+
+let resources_of_cond (cond : condition) (nargs : int) =
+  let insn = insn_of_cond cond nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+
+let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+let latency_of_call _ _ = 6;;
+
+let resources_of_load trap chunk addressing nargs =
+  let insn = insn_of_load trap chunk addressing nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+
+let resources_of_store chunk addressing nargs =
+  let insn = insn_of_store chunk addressing nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+
+let resources_of_call _ _ = resource_bounds;;
+let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "kv3" | "" ->
+     {
+       pipelined_resource_bounds = KV3.resource_bounds;
+       nr_non_pipelined_units = KV3.nr_non_pipelined_units;
+       latency_of_op = KV3.latency_of_op;
+       resources_of_op = KV3.resources_of_op;
+       non_pipelined_resources_of_op = KV3.non_pipelined_resources_of_op;
+       latency_of_load = KV3.latency_of_load;
+       resources_of_load = KV3.resources_of_load;
+       resources_of_store = KV3.resources_of_store;
+       resources_of_cond = KV3.resources_of_cond;
+       latency_of_call = KV3.latency_of_call;
+       resources_of_call = KV3.resources_of_call;
+       resources_of_builtin = KV3.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..912e9ffa
--- /dev/null
+++ b/kvx/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml
new file mode 120000
index 00000000..1e955b85
--- /dev/null
+++ b/kvx/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracleDeps.ml
+\ No newline at end of file
diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v
index 8c834de5..7a301929 100644
--- a/kvx/SelectOpproof.v
+++ b/kvx/SelectOpproof.v
@@ -1199,7 +1199,6 @@ Qed.
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
   red; intros until x. unfold cast8unsigned.
-
   rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate.
 Qed.
 
diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v
index 122c9a60..87554258 100644
--- a/kvx/ValueAOp.v
+++ b/kvx/ValueAOp.v
@@ -16,87 +16,6 @@
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op ExtValues ExtFloats RTL ValueDomain.
-
-Definition intoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_int f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition intuoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_intu f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition intofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_int f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition intuofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_intu f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_long f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longuoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_longu f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_long f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longuofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_longu f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
 Definition minf := binop_float ExtFloat.min.
 Definition maxf := binop_float ExtFloat.max.
 Definition minfs := binop_single ExtFloat32.min.
@@ -400,196 +319,6 @@ Hypothesis GENV: genv_match bc ge.
 Variable sp: block.
 Hypothesis STACK: bc sp = BCstack.
 
-Lemma intoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intoffloat v)) (intoffloat_total x).
-Proof.
-  unfold Val.intoffloat, intoffloat_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float.to_int f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intoffloat_total_sound : va.
-
-Lemma intuoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intuoffloat v)) (intuoffloat_total x).
-Proof.
-  unfold Val.intoffloat, intoffloat_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float.to_intu f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intuoffloat_total_sound : va.
-
-Lemma intofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intofsingle v)) (intofsingle_total x).
-Proof.
-  unfold Val.intofsingle, intofsingle_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float32.to_int f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intofsingle_total_sound : va.
-
-Lemma intuofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intuofsingle v)) (intuofsingle_total x).
-Proof.
-  unfold Val.intofsingle, intofsingle_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float32.to_intu f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intuofsingle_total_sound : va.
-
-Lemma singleofint_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleofint v)) (singleofint x).
-Proof.
-  unfold Val.singleofint, singleofint; intros.
-  inv H; cbn.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleofint_total_sound : va.
-
-Lemma singleofintu_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleofintu v)) (singleofintu x).
-Proof.
-  unfold Val.singleofintu, singleofintu; intros.
-  inv H; cbn.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleofintu_total_sound : va.
-
-Lemma longoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longoffloat v)) (longoffloat_total x).
-Proof.
-  unfold Val.longoffloat, longoffloat_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float.to_long f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longoffloat_total_sound : va.
-
-Lemma longuoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longuoffloat v)) (longuoffloat_total x).
-Proof.
-  unfold Val.longoffloat, longoffloat_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float.to_longu f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longuoffloat_total_sound : va.
-
-Lemma longofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longofsingle v)) (longofsingle_total x).
-Proof.
-  unfold Val.longofsingle, longofsingle_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float32.to_long f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longofsingle_total_sound : va.
-
-Lemma longuofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longuofsingle v)) (longuofsingle_total x).
-Proof.
-  unfold Val.longofsingle, longofsingle_total. intros.
-  inv MATCH; cbn in *; try constructor.
-  all: destruct (Float32.to_longu f) as [i|] eqn:E; cbn; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longuofsingle_total_sound : va.
-
-Lemma singleoflong_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleoflong v)) (singleoflong x).
-Proof.
-  unfold Val.singleoflong, singleoflong; intros.
-  inv H; cbn.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleoflong_total_sound : va.
-
-Lemma singleoflongu_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleoflongu v)) (singleoflongu x).
-Proof.
-  unfold Val.singleoflongu, singleoflongu; intros.
-  inv H; cbn.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleoflongu_total_sound : va.
-
-Lemma floatoflong_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.floatoflong v)) (floatoflong x).
-Proof.
-  unfold Val.floatoflong, floatoflong; intros.
-  inv H; cbn.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve floatoflong_total_sound : va.
-
-Lemma floatoflongu_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.floatoflongu v)) (floatoflongu x).
-Proof.
-  unfold Val.floatoflongu, floatoflongu; intros.
-  inv H; cbn.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve floatoflongu_total_sound : va.
-
 Lemma minf_sound:
   forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.minf v w) (minf x y).
 Proof.
@@ -815,16 +544,6 @@ Proof.
     inv H1; cbn; try constructor.
     all: destruct Int.ltu; [cbn | constructor; fail].
     all: auto with va.
-  - replace (match Val.shll a1 (Vint (int_of_shift1_4 shift)) with
-    | Vlong n2 => Vlong (Int64.add n n2)
-    | Vptr b2 ofs2 =>
-        if Archi.ptr64
-        then Vptr b2 (Ptrofs.add ofs2 (Ptrofs.of_int64 n))
-        else Vundef
-    | _ => Vundef
-             end) with (Val.addl (Vlong n) (Val.shll a1 (Vint (int_of_shift1_4 shift)))).
-    + eauto with va.
-    + destruct a1; destruct shift; reflexivity.
   - inv H1; constructor.
   - replace (match Val.shll a1 (Vint (int_of_shift1_4 shift)) with
     | Vlong n2 => Vlong (Int64.sub n n2)
@@ -832,10 +551,6 @@ Proof.
              end) with (Val.subl (Vlong n) (Val.shll a1 (Vint (int_of_shift1_4 shift)))).
     + eauto with va.
     + destruct a1; destruct shift; reflexivity.
-  - (* shrxl *)
-    inv H1; cbn; try constructor.
-    all: destruct Int.ltu; [cbn | constructor; fail].
-    all: auto with va.
   - apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
 
   (* extfz *)
diff --git a/kvx/abstractbb/Impure/ImpConfig.v b/lib/Impure/ImpConfig.v
index dd9785b5..dd9785b5 100644
--- a/kvx/abstractbb/Impure/ImpConfig.v
+++ b/lib/Impure/ImpConfig.v
diff --git a/kvx/abstractbb/Impure/ImpCore.v b/lib/Impure/ImpCore.v
index 508b3f19..508b3f19 100644
--- a/kvx/abstractbb/Impure/ImpCore.v
+++ b/lib/Impure/ImpCore.v
diff --git a/kvx/abstractbb/Impure/ImpExtern.v b/lib/Impure/ImpExtern.v
index 8fb3cf3b..8fb3cf3b 100644
--- a/kvx/abstractbb/Impure/ImpExtern.v
+++ b/lib/Impure/ImpExtern.v
diff --git a/kvx/abstractbb/Impure/ImpHCons.v b/lib/Impure/ImpHCons.v
index 637116cc..637116cc 100644
--- a/kvx/abstractbb/Impure/ImpHCons.v
+++ b/lib/Impure/ImpHCons.v
diff --git a/kvx/abstractbb/Impure/ImpIO.v b/lib/Impure/ImpIO.v
index 6c02c395..6c02c395 100644
--- a/kvx/abstractbb/Impure/ImpIO.v
+++ b/lib/Impure/ImpIO.v
diff --git a/kvx/abstractbb/Impure/ImpLoops.v b/lib/Impure/ImpLoops.v
index 33376c19..33376c19 100644
--- a/kvx/abstractbb/Impure/ImpLoops.v
+++ b/lib/Impure/ImpLoops.v
diff --git a/kvx/abstractbb/Impure/ImpMonads.v b/lib/Impure/ImpMonads.v
index f01a2755..f01a2755 100644
--- a/kvx/abstractbb/Impure/ImpMonads.v
+++ b/lib/Impure/ImpMonads.v
diff --git a/kvx/abstractbb/Impure/ImpPrelude.v b/lib/Impure/ImpPrelude.v
index de4c7973..de4c7973 100644
--- a/kvx/abstractbb/Impure/ImpPrelude.v
+++ b/lib/Impure/ImpPrelude.v
diff --git a/kvx/abstractbb/Impure/LICENSE b/lib/Impure/LICENSE
index 65c5ca88..65c5ca88 100644
--- a/kvx/abstractbb/Impure/LICENSE
+++ b/lib/Impure/LICENSE
diff --git a/kvx/abstractbb/Impure/README.md b/lib/Impure/README.md
index 2b19d14a..2b19d14a 100644
--- a/kvx/abstractbb/Impure/README.md
+++ b/lib/Impure/README.md
diff --git a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.ml b/lib/Impure/ocaml/ImpHConsOracles.ml
index 2b66899b..68a33a91 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.ml
+++ b/lib/Impure/ocaml/ImpHConsOracles.ml
@@ -38,12 +38,18 @@ let xhCons (type a) (hp:a hashP) =
   let t = MyHashtbl.create 1000 in
   let logs = ref [] in
   {
-   hC = (fun (k:a hashinfo) ->
+    hC = (fun (k:a hashinfo) ->
+     (* DEBUG:
+     Printf.printf "*in %d -- look for hcodes= " (Obj.magic t);
+     List.iter (fun i -> Printf.printf "%d " i) k.hcodes;
+     print_newline();
+     *)
      match MyHashtbl.find_opt t k with
      | Some d -> d
-     | None -> (*print_string "+";*)
-        let d = hp.set_hid k.hdata (MyHashtbl.length t) in
-        MyHashtbl.add t {k with hdata = d } d; d);
+     | None ->
+         (* DEBUG: Printf.printf "*in %d -- new hid:%d" (Obj.magic t) (MyHashtbl.length t); print_newline(); *)
+         let d = hp.set_hid k.hdata (MyHashtbl.length t) in
+         MyHashtbl.add t {k with hdata = d } d; d);
    next_log = (fun info -> logs := (MyHashtbl.length t, info)::(!logs));
    next_hid = (fun () -> MyHashtbl.length t);
    remove = (fun (x:a hashinfo) -> MyHashtbl.remove t x);
diff --git a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.mli b/lib/Impure/ocaml/ImpHConsOracles.mli
index 5075d176..5075d176 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.mli
+++ b/lib/Impure/ocaml/ImpHConsOracles.mli
diff --git a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.ml b/lib/Impure/ocaml/ImpIOOracles.ml
index 9e63c12d..9e63c12d 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.ml
+++ b/lib/Impure/ocaml/ImpIOOracles.ml
diff --git a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.mli b/lib/Impure/ocaml/ImpIOOracles.mli
index 6064286a..6064286a 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.mli
+++ b/lib/Impure/ocaml/ImpIOOracles.mli
diff --git a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.ml b/lib/Impure/ocaml/ImpLoopOracles.ml
index cb7625e5..cb7625e5 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.ml
+++ b/lib/Impure/ocaml/ImpLoopOracles.ml
diff --git a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.mli b/lib/Impure/ocaml/ImpLoopOracles.mli
index 194696a1..194696a1 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.mli
+++ b/lib/Impure/ocaml/ImpLoopOracles.mli
diff --git a/lib/UnionFind.v b/lib/UnionFind.v
index 20bb91cd..bd1b763b 100644
--- a/lib/UnionFind.v
+++ b/lib/UnionFind.v
@@ -124,6 +124,15 @@ Module Type UNIONFIND.
         pathlen uf x + pathlen uf b + 1
       else
         pathlen uf x.
+   Axiom pathlen_union:
+    forall uf a b x,
+    pathlen (union uf a b) x =
+      if elt_eq (repr uf a) (repr uf b) then
+        pathlen uf x
+     else if elt_eq (repr uf x) (repr uf a) then
+        (pathlen uf x)+1
+      else
+        (pathlen uf x).
   Axiom pathlen_gt_merge:
     forall uf a b x y,
     repr uf x = repr uf y ->
@@ -531,6 +540,7 @@ Qed.
 
 End PATHLEN.
 
+
 (* Path length and merge *)
 
 Lemma pathlen_merge:
@@ -549,16 +559,49 @@ Proof.
   set (uf' := identify uf (repr uf a) b (repr_res_none uf a) (not_eq_sym n)).
   pattern x. apply (well_founded_ind (mwf uf')); intros.
   rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G.
-  rewrite H; auto. simpl in G. rewrite M.gsspec in G.
-  destruct (M.elt_eq x0 (repr uf a)). rewrite e. rewrite repr_canonical. rewrite dec_eq_true.
-  inversion G. subst x'. rewrite dec_eq_false; auto.
-  replace (pathlen uf (repr uf a)) with 0. omega.
-  symmetry. apply pathlen_none. apply repr_res_none.
-  rewrite (repr_unroll uf x0), (pathlen_unroll uf x0); rewrite G.
-  destruct (M.elt_eq (repr uf x') (repr uf a)); omega.
-  simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate.
-  rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto.
-  symmetry. apply pathlen_zero; auto. apply repr_none; auto.
+  + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G.
+    destruct (M.elt_eq x0 (repr uf a)).
+    - rewrite e, repr_canonical, dec_eq_true.
+      inversion G. subst x'. rewrite dec_eq_false; auto.
+      replace (pathlen uf (repr uf a)) with 0; try omega.
+      symmetry. apply pathlen_none. apply repr_res_none.
+    - rewrite (repr_unroll uf x0), (pathlen_unroll uf x0), G.
+      destruct (M.elt_eq (repr uf x') (repr uf a)); omega.
+  + clear H; simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate.
+    rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto.
+    symmetry. apply pathlen_zero; auto. apply repr_none; auto.
+Qed.
+
+Lemma pathlen_union:
+  forall uf a b x,
+  pathlen (union uf a b) x =
+    if M.elt_eq (repr uf a) (repr uf b) then
+      pathlen uf x
+    else if M.elt_eq (repr uf x) (repr uf a) then
+      (pathlen uf x)+1
+    else
+      (pathlen uf x).
+Proof.
+  intros. unfold union.
+  destruct (M.elt_eq (repr uf a) (repr uf b)).
+  auto.
+  set (uf' := identify uf _ _ _ _).
+  assert (LENa: pathlen uf (repr uf a) = 0).
+  { apply pathlen_none. apply repr_res_none. }
+  pattern x. apply (well_founded_ind (mwf uf')); intros.
+  rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G.
+  + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G.
+    destruct (M.elt_eq x0 (repr uf a)).
+    - inversion G; clear G. subst.
+      rewrite !repr_canonical, dec_eq_true.
+      rewrite dec_eq_false; auto.
+      rewrite LENa. rewrite (pathlen_none uf (repr uf b)); try omega.
+      apply repr_res_none.
+    - rewrite (repr_unroll uf x0), G, ! (pathlen_some _ _ _ G).
+      destruct (M.elt_eq _ _); auto.
+  + clear H. simpl in G. rewrite M.gsspec in G.
+    destruct (M.elt_eq _ (repr uf a)); try discriminate.
+    rewrite (repr_none _ _ G), !(pathlen_none _ _ G), dec_eq_false; auto.
 Qed.
 
 Lemma pathlen_gt_merge:
diff --git a/powerpc/CSE2deps.v b/powerpc/CSE2deps.v
index d48dabf3..4592f408 100644
--- a/powerpc/CSE2deps.v
+++ b/powerpc/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/powerpc/CSE2depsproof.v b/powerpc/CSE2depsproof.v
index 123341da..ede09dd6 100644
--- a/powerpc/CSE2depsproof.v
+++ b/powerpc/CSE2depsproof.v
@@ -111,6 +111,66 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
+
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try rewrite ptrofs_modulus in *.
+    all: destruct Archi.ptr64.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
 End SOUNDNESS.
 
 
@@ -131,7 +191,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -141,7 +201,14 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/powerpc/Op.v b/powerpc/Op.v
index a0ee5bb8..4f14bfac 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -797,6 +797,20 @@ Proof.
   auto.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct c; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct c; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_operation (op: operation) : list ident :=
diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..9885fd52
--- /dev/null
+++ b/powerpc/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../x86/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v
index 8678a5dc..d2255e66 100644
--- a/riscV/Asmgenproof1.v
+++ b/riscV/Asmgenproof1.v
@@ -1035,7 +1035,9 @@ Opaque Int.eq.
   intros (rs' & A & B & C).
   exists rs'; split; eauto. rewrite B; auto with asmgen.
 - (* shrximm *)
-  clear H. exploit Val.shrx_shr_3; eauto. intros E; subst v; clear EV.
+  destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL; cbn.
+  {
+  exploit Val.shrx_shr_3; eauto. intros E; subst v.
   destruct (Int.eq n Int.zero).
 + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl. 
@@ -1052,6 +1054,24 @@ Opaque Int.eq.
     eapply exec_straight_step. simpl; reflexivity. auto. 
     apply exec_straight_one. simpl; reflexivity. auto. 
     split; intros; Simpl.
+  }
+  destruct (Int.eq n Int.zero).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros; Simpl. 
++ destruct (Int.eq n Int.one).
+  * econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    apply exec_straight_one. simpl; reflexivity. auto.
+    split; intros; Simpl.
+  * change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
+    econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    apply exec_straight_one. simpl; reflexivity. auto. 
+    split; intros; Simpl.
+  
 - (* longofintu *)
   econstructor; split.
   eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto.
@@ -1076,7 +1096,27 @@ Opaque Int.eq.
   intros (rs' & A & B & C).
   exists rs'; split; eauto. rewrite B; auto with asmgen.
 - (* shrxlimm *)
-  clear H. exploit Val.shrxl_shrl_3; eauto. intros E; subst v; clear EV.
+  destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL.
+  {
+   exploit Val.shrxl_shrl_3; eauto. intros E; subst v.
+  destruct (Int.eq n Int.zero).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros; Simpl. 
++ destruct (Int.eq n Int.one).
+  * econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    apply exec_straight_one. simpl; reflexivity. auto.
+    split; intros; Simpl.
+
+  * change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
+    econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    apply exec_straight_one. simpl; reflexivity. auto. 
+    split; intros; Simpl.
+  }
   destruct (Int.eq n Int.zero).
 + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl. 
@@ -1094,6 +1134,7 @@ Opaque Int.eq.
     eapply exec_straight_step. simpl; reflexivity. auto. 
     apply exec_straight_one. simpl; reflexivity. auto. 
     split; intros; Simpl.
+  
 - (* cond *)
   exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
   exists rs'; split. eexact A. eauto with asmgen.
diff --git a/riscV/CSE2deps.v b/riscV/CSE2deps.v
index b4b80e2f..c0deacf0 100644
--- a/riscV/CSE2deps.v
+++ b/riscV/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/riscV/CSE2depsproof.v b/riscV/CSE2depsproof.v
index f283c8ac..cf9e62b1 100644
--- a/riscV/CSE2depsproof.v
+++ b/riscV/CSE2depsproof.v
@@ -123,7 +123,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -133,7 +133,15 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/riscV/ConstpropOpproof.v b/riscV/ConstpropOpproof.v
index 765aa035..26a50317 100644
--- a/riscV/ConstpropOpproof.v
+++ b/riscV/ConstpropOpproof.v
@@ -265,52 +265,84 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_moduimm_correct:
   forall n r1 r2 v,
-  Val.modu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.modu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_moduimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_moduimm.
   destruct (Int.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq. eapply Val.modu_pow2; eauto. congruence.
-  exists v; auto.
+  { destruct (Val.modu e # r1 e # r2) eqn:MODU; cbn in H.
+    { subst v0.
+      exists v; split; auto.
+      cbn. decEq. eapply Val.modu_pow2; eauto. congruence.
+    }
+    subst v.
+    eexists; split; auto.
+    cbn. reflexivity.
+  }
+  exists v; split; auto.
+  cbn.
+  congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -444,48 +476,82 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  { 
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_modluimm_correct:
   forall n r1 r2 v,
-  Val.modlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.modlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_modluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_modluimm.
   destruct (Int64.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. 
-  simpl. erewrite Int64.modu_and by eauto. auto.
-  exists v; auto.
+  {
+  econstructor; split. simpl; eauto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  { 
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.modu_and by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -633,14 +699,17 @@ Proof.
 - (* mul 2*)
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
-  assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  assert (e#r2 = Vint n2). { clear H0. InvApproxRegs; SimplVM; auto. }
+                           apply make_divimm_correct; auto.
+  congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divuimm_correct; auto.
+  congruence.
 - (* modu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_moduimm_correct; auto.
+  congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -680,12 +749,15 @@ Proof.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divlimm_correct; auto.
+  congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divluimm_correct; auto.
+  congruence.
 - (* modlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_modluimm_correct; auto.
+  congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/riscV/Op.v b/riscV/Op.v
index 14d07e0b..62999a2f 100644
--- a/riscV/Op.v
+++ b/riscV/Op.v
@@ -241,10 +241,10 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2)
   | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2)
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
-  | Omod, v1 :: v2 :: nil => Val.mods v1 v2
-  | Omodu, v1 :: v2 :: nil => Val.modu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
+  | Omod, v1 :: v2 :: nil => Some (Val.maketotal (Val.mods v1 v2))
+  | Omodu, v1 :: v2 :: nil => Some (Val.maketotal (Val.modu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
   | Oor, v1 :: v2 :: nil => Some (Val.or v1 v2)
@@ -257,7 +257,7 @@ Definition eval_operation
   | Oshrimm n, v1 :: nil => Some (Val.shr v1 (Vint n))
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
   | Oshruimm n, v1 :: nil => Some (Val.shru v1 (Vint n))
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Omakelong, v1::v2::nil => Some (Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some (Val.loword v1)
   | Ohighlong, v1::nil => Some (Val.hiword v1)
@@ -270,10 +270,10 @@ Definition eval_operation
   | Omull, v1::v2::nil => Some (Val.mull v1 v2)
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1::v2::nil => Val.divls v1 v2
-  | Odivlu, v1::v2::nil => Val.divlu v1 v2
-  | Omodl, v1::v2::nil => Val.modls v1 v2
-  | Omodlu, v1::v2::nil => Val.modlu v1 v2
+  | Odivl, v1::v2::nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1::v2::nil => Some (Val.maketotal (Val.divlu v1 v2))
+  | Omodl, v1::v2::nil => Some (Val.maketotal (Val.modls v1 v2))
+  | Omodlu, v1::v2::nil => Some (Val.maketotal (Val.modlu v1 v2))
   | Oandl, v1::v2::nil => Some(Val.andl v1 v2)
   | Oandlimm n, v1::nil => Some (Val.andl v1 (Vlong n))
   | Oorl, v1::v2::nil => Some(Val.orl v1 v2)
@@ -286,7 +286,7 @@ Definition eval_operation
   | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n))
   | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2)
   | Oshrluimm n, v1::nil => Some (Val.shrlu v1 (Vint n))
-  | Oshrxlimm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrxlimm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Onegf, v1::nil => Some (Val.negf v1)
   | Oabsf, v1::nil => Some (Val.absf v1)
   | Oaddf, v1::v2::nil => Some (Val.addf v1 v2)
@@ -301,22 +301,22 @@ Definition eval_operation
   | Odivfs, v1::v2::nil => Some (Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | _, _ => None
   end.
@@ -539,15 +539,17 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0; destruct v1...
   (* div, divu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero
+         || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero); cbn; trivial.
   (* mod, modu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero
+         || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero); cbn; trivial.
   (* and, andimm *)
   - destruct v0; destruct v1...
   - destruct v0...
@@ -567,7 +569,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
   (* shrx *)
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   (* makelong, lowlong, highlong *)
   - destruct v0; destruct v1...
   - destruct v0...
@@ -588,15 +591,19 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0; destruct v1...
   (* divl, divlu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* modl, modlu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl, andlimm *)
   - destruct v0; destruct v1...
   - destruct v0...
@@ -616,7 +623,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
   (* shrxl *)
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   (* negf, absf *)
   - destruct v0...
   - destruct v0...
@@ -639,50 +647,47 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m)... destruct b...
 Qed.
 
-
+(* This should not be simplified to "false" because it breaks proofs elsewhere. *)
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivl | Odivu | Odivlu
-  | Omod | Omodl | Omodu | Omodlu
-  | Oshrximm _ | Oshrxlimm _
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Osingleofint | Osingleofintu
-  | Osingleoflong | Osingleoflongu
-  | Ofloatofint | Ofloatofintu
-  | Ofloatoflong | Ofloatoflongu => true
+  | Omove => false
   | _ => false
   end.
-                
 
 Definition args_of_operation op :=
   if eq_operation op Omove
@@ -872,6 +877,16 @@ Proof.
   unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op; simpl; try congruence.
+  intros MEM; destruct cond; repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
@@ -1033,19 +1048,29 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+              || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
   (* mod, modu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int.eq i0 Int.zero
-                     || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+              || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
   (* and, andimm *)
   - inv H4; inv H2; simpl; auto.
   - inv H4; simpl; auto.
@@ -1065,8 +1090,10 @@ Proof.
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; try apply Val.val_inject_undef.
+    destruct (Int.ltu n (Int.repr 31)); cbn.
+    apply Val.inject_int.
+    apply Val.val_inject_undef.
   (* makelong, highlong, lowlong *)
   - inv H4; inv H2; simpl; auto.
   - inv H4; simpl; auto.
@@ -1085,19 +1112,31 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
   (* modl, modlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int64.eq i0 Int64.zero
-                     || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
   (* andl, andlimm *)
   - inv H4; inv H2; simpl; auto.
   - inv H4; simpl; auto.
@@ -1116,9 +1155,11 @@ Proof.
   (* shru, shruimm *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
-  (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+    (* shrx *)
+  - inv H4; cbn; try apply Val.val_inject_undef.
+    destruct (Int.ltu n (Int.repr 63)); cbn.
+    apply Val.inject_long.
+    apply Val.val_inject_undef.
   (* negf, absf *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1141,37 +1182,37 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_int f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_intu f0); cbn; auto.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_int f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_intu f0); cbn; auto.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_long f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_longu f0); cbn; auto.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_long f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_longu f0); cbn; auto.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* cmp *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/riscV/OpWeights.ml b/riscV/OpWeights.ml
new file mode 100644
index 00000000..75a913c6
--- /dev/null
+++ b/riscV/OpWeights.ml
@@ -0,0 +1,161 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Rocket =
+  struct
+    (* Attempt at modeling the Rocket core *)
+    
+    let resource_bounds = [| 1 |];;
+    let nr_non_pipelined_units = 1;; (* divider *)
+    
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omul  | Omulhs  | Omulhu
+        | Omull | Omullhs | Omullhu -> 4
+                                     
+      | Onegf -> 1               (*r [rd = - r1] *)
+      | Oabsf                    (*r [rd = abs(r1)] *)
+        | Oaddf                    (*r [rd = r1 + r2] *)
+        | Osubf                    (*r [rd = r1 - r2] *)
+        | Omulf -> 6               (*r [rd = r1 * r2] *)
+      | Onegfs -> 1              (*r [rd = - r1] *)
+      | Oabsfs                   (*r [rd = abs(r1)] *)
+        | Oaddfs                   (*r [rd = r1 + r2] *)
+        | Osubfs                   (*r [rd = r1 - r2] *)
+        | Omulfs -> 4              (*r [rd = r1 * r2] *)
+      | Osingleoffloat           (*r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle -> 4      (*r [rd] is [r1] extended to double-precision float *)
+      (*c Conversions between int and float: *)
+      | Ointoffloat              (*r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat             (*r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint              (*r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu -> 6        (*r [rd = float64_of_unsigned_int(r1)] *)
+      | Ointofsingle             (*r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle            (*r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint             (*r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu -> 4       (*r [rd = float32_of_unsigned_int(r1)] *)
+      | Olongoffloat             (*r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat            (*r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong             (*r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu -> 6       (*r [rd = float64_of_unsigned_long(r1)] *)
+      | Olongofsingle            (*r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle           (*r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong            (*r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu -> 4      (*r [rd = float32_of_unsigned_int(r1)] *)
+                          
+      | Odiv | Odivu | Odivl | Odivlu -> 16  
+      | Odivfs -> 35
+      | Odivf -> 50
+
+      | Ocmp cond ->
+         (match cond with
+          | Ccomp _
+            | Ccompu _
+            | Ccompimm _
+            | Ccompuimm _
+            | Ccompl _
+            | Ccomplu _
+            | Ccomplimm _
+            | Ccompluimm _ -> 1
+          | Ccompf _
+            | Cnotcompf _ -> 6
+          | Ccompfs _
+            | Cnotcompfs _ -> 4)
+      | _ -> 1;;
+    
+    let resources_of_op (op : operation) (nargs : int) = resource_bounds;;
+  
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cond : condition) (nargs : int) = resource_bounds;;
+
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = resource_bounds;;
+    
+    let resources_of_store chunk addressing nargs = resource_bounds;;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+module SweRV_EH1 =
+  struct
+    (* Attempt at modeling SweRV EH1
+       [| issues ; LSU ; multiplier |] *)
+    let resource_bounds = [| 2 ; 1; 1 |];;
+    let nr_non_pipelined_units = 1;; (* divider *)
+    
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omul  | Omulhs  | Omulhu
+        | Omull | Omullhs | Omullhu -> 3
+      | Odiv | Odivu | Odivl | Odivlu -> 16  
+      | _ -> 1;;
+    
+    let resources_of_op (op : operation) (nargs : int) = 
+      match op with
+      | Omul  | Omulhs  | Omulhu
+        | Omull | Omullhs | Omullhu -> [| 1 ; 0 ; 1 |]
+      | Odiv | Odivu | Odivl | Odivlu -> [| 0 ; 0; 0 |]                        
+      | _ -> [| 1; 0; 0; 0 |];;
+    
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cond : condition) (nargs : int) = [| 1; 0; 0 |];;
+    
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = [| 1; 1; 0 |];;
+    
+    let resources_of_store chunk addressing nargs = [| 1; 1; 0 |];;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "rocket" | "" ->
+     {
+       pipelined_resource_bounds = Rocket.resource_bounds;
+       nr_non_pipelined_units = Rocket.nr_non_pipelined_units;
+       latency_of_op = Rocket.latency_of_op;
+       resources_of_op = Rocket.resources_of_op;
+       non_pipelined_resources_of_op = Rocket.non_pipelined_resources_of_op;
+       latency_of_load = Rocket.latency_of_load;
+       resources_of_load = Rocket.resources_of_load;
+       resources_of_store = Rocket.resources_of_store;
+       resources_of_cond = Rocket.resources_of_cond;
+       latency_of_call = Rocket.latency_of_call;
+       resources_of_call = Rocket.resources_of_call;
+       resources_of_builtin = Rocket.resources_of_builtin
+     }
+  | "SweRV_EH1" | "EH1" ->
+     {
+       pipelined_resource_bounds = SweRV_EH1.resource_bounds;
+       nr_non_pipelined_units = SweRV_EH1.nr_non_pipelined_units;
+       latency_of_op = SweRV_EH1.latency_of_op;
+       resources_of_op = SweRV_EH1.resources_of_op;
+       non_pipelined_resources_of_op = SweRV_EH1.non_pipelined_resources_of_op;
+       latency_of_load = SweRV_EH1.latency_of_load;
+       resources_of_load = SweRV_EH1.resources_of_load;
+       resources_of_store = SweRV_EH1.resources_of_store;
+       resources_of_cond = SweRV_EH1.resources_of_cond;
+       latency_of_call = SweRV_EH1.latency_of_call;
+       resources_of_call = SweRV_EH1.resources_of_call;
+       resources_of_builtin = SweRV_EH1.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..912e9ffa
--- /dev/null
+++ b/riscV/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml
new file mode 120000
index 00000000..1e955b85
--- /dev/null
+++ b/riscV/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracleDeps.ml
+\ No newline at end of file
diff --git a/riscV/SelectLongproof.v b/riscV/SelectLongproof.v
index d47b6d64..0fc578bf 100644
--- a/riscV/SelectLongproof.v
+++ b/riscV/SelectLongproof.v
@@ -455,6 +455,10 @@ Proof.
   unfold divls_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_divls_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
@@ -462,6 +466,10 @@ Proof.
   unfold modls_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_modls_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
@@ -469,6 +477,10 @@ Proof.
   unfold divlu_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_divlu_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
@@ -476,6 +488,10 @@ Proof.
   unfold modlu_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_modlu_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -490,33 +506,9 @@ Proof.
 - subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto.
   change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto.
 - TrivialExists.
-(*
-  intros. unfold shrxlimm. destruct Archi.splitlong eqn:SL.
-+ eapply SplitLongproof.eval_shrxlimm; eauto using Archi.splitlong_ptr32.
-+ destruct x; simpl in H0; try discriminate. 
-  destruct (Int.ltu n (Int.repr 63)) eqn:LTU; inv H0.
-  predSpec Int.eq Int.eq_spec n Int.zero.
-  - subst n. exists (Vlong i); split; auto. rewrite Int64.shrx'_zero. auto.
-  - assert (NZ: Int.unsigned n <> 0).
-    { intro EQ; elim H0. rewrite <- (Int.repr_unsigned n). rewrite EQ; auto. }
-    assert (LT: 0 <= Int.unsigned n < 63) by (apply Int.ltu_inv in LTU; assumption).
-    assert (LTU2: Int.ltu (Int.sub Int64.iwordsize' n) Int64.iwordsize' = true).
-    { unfold Int.ltu; apply zlt_true.
-      unfold Int.sub. change (Int.unsigned Int64.iwordsize') with 64. 
-      rewrite Int.unsigned_repr. omega. 
-      assert (64 < Int.max_unsigned) by reflexivity. omega. }
-    assert (X: eval_expr ge sp e m le
-               (Eop (Oshrlimm (Int.repr (Int64.zwordsize - 1))) (a ::: Enil))
-               (Vlong (Int64.shr' i (Int.repr (Int64.zwordsize - 1))))).
-    { EvalOp. }
-    assert (Y: eval_expr ge sp e m le (shrxlimm_inner a n)
-               (Vlong (Int64.shru' (Int64.shr' i (Int.repr (Int64.zwordsize - 1))) (Int.sub Int64.iwordsize' n)))).
-    { EvalOp. simpl. rewrite LTU2. auto. }
-    TrivialExists. 
-    constructor. EvalOp. simpl; eauto. constructor. 
-    simpl. unfold Int.ltu; rewrite zlt_true. rewrite Int64.shrx'_shr_2 by auto. reflexivity. 
-    change (Int.unsigned Int64.iwordsize') with 64; omega.
-*)
+  cbn.
+  rewrite H0.
+  reflexivity.
 Qed.
 
 Theorem eval_cmplu:
@@ -566,6 +558,7 @@ Proof.
   unfold longoffloat; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longoffloat; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
@@ -573,6 +566,7 @@ Proof.
   unfold longuoffloat; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longuoffloat; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
@@ -580,6 +574,7 @@ Proof.
   unfold floatoflong; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_floatoflong; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
@@ -587,6 +582,7 @@ Proof.
   unfold floatoflongu; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_floatoflongu; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
@@ -594,6 +590,7 @@ Proof.
   unfold longofsingle; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longofsingle; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
@@ -601,6 +598,7 @@ Proof.
   unfold longuofsingle; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longuofsingle; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
@@ -608,6 +606,7 @@ Proof.
   unfold singleoflong; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_singleoflong; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
@@ -615,6 +614,7 @@ Proof.
   unfold singleoflongu; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_singleoflongu; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/riscV/SelectOpproof.v b/riscV/SelectOpproof.v
index 7f2014dc..1d13702a 100644
--- a/riscV/SelectOpproof.v
+++ b/riscV/SelectOpproof.v
@@ -506,7 +506,12 @@ Theorem eval_divs_base:
     Val.divs x y = Some z ->
     exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divs_base. exists z; split. EvalOp. auto.
+  intros. unfold divs_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_mods_base:
@@ -516,7 +521,12 @@ Theorem eval_mods_base:
     Val.mods x y = Some z ->
     exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold mods_base. exists z; split. EvalOp. auto.
+  intros. unfold mods_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_divu_base:
@@ -526,7 +536,12 @@ Theorem eval_divu_base:
     Val.divu x y = Some z ->
     exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divu_base. exists z; split. EvalOp. auto.
+  intros. unfold divu_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modu_base:
@@ -536,7 +551,12 @@ Theorem eval_modu_base:
     Val.modu x y = Some z ->
     exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold modu_base. exists z; split. EvalOp. auto.
+  intros. unfold modu_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_shrximm:
@@ -553,34 +573,12 @@ Proof.
   replace (Int.shrx i Int.zero) with i. auto.
   unfold Int.shrx, Int.divs. rewrite Int.shl_zero.
   change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
-  econstructor; split. EvalOp. auto.
-(*
-  intros. destruct x; simpl in H0; try discriminate. 
-  destruct (Int.ltu n (Int.repr 31)) eqn:LTU; inv H0.
-  unfold shrximm.
-  predSpec Int.eq Int.eq_spec n Int.zero.
-  - subst n. exists (Vint i); split; auto.
-    unfold Int.shrx, Int.divs. rewrite Z.quot_1_r. rewrite Int.repr_signed. auto.
-  - assert (NZ: Int.unsigned n <> 0).
-    { intro EQ; elim H0. rewrite <- (Int.repr_unsigned n). rewrite EQ; auto. }
-    assert (LT: 0 <= Int.unsigned n < 31) by (apply Int.ltu_inv in LTU; assumption).
-    assert (LTU2: Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize = true).
-    { unfold Int.ltu; apply zlt_true.
-      unfold Int.sub. change (Int.unsigned Int.iwordsize) with 32. 
-      rewrite Int.unsigned_repr. omega. 
-      assert (32 < Int.max_unsigned) by reflexivity. omega. }
-    assert (X: eval_expr ge sp e m le
-               (Eop (Oshrimm (Int.repr (Int.zwordsize - 1))) (a ::: Enil))
-               (Vint (Int.shr i (Int.repr (Int.zwordsize - 1))))).
-    { EvalOp. }
-    assert (Y: eval_expr ge sp e m le (shrximm_inner a n)
-               (Vint (Int.shru (Int.shr i (Int.repr (Int.zwordsize - 1))) (Int.sub Int.iwordsize n)))).
-    { EvalOp. simpl. rewrite LTU2. auto. }
-    TrivialExists. 
-    constructor. EvalOp. simpl; eauto. constructor. 
-    simpl. unfold Int.ltu; rewrite zlt_true. rewrite Int.shrx_shr_2 by auto. reflexivity. 
-    change (Int.unsigned Int.iwordsize) with 32; omega.
-*)
+  econstructor; split. EvalOp.
+  cbn.
+  rewrite H0.
+  cbn.
+  reflexivity.
+  apply Val.lessdef_refl.
 Qed.
 
 Theorem eval_shl: binary_constructor_sound shl Val.shl.
@@ -790,6 +788,7 @@ Theorem eval_intoffloat:
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intoffloat. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -799,6 +798,7 @@ Theorem eval_intuoffloat:
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intuoffloat. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -810,6 +810,7 @@ Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
   InvEval. simpl in H0. TrivialExists.
   TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -821,6 +822,7 @@ Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros.
   InvEval. simpl in H0. TrivialExists.
   TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -830,6 +832,7 @@ Theorem eval_intofsingle:
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -839,6 +842,7 @@ Theorem eval_singleofint:
   exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold singleofint; TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -848,6 +852,7 @@ Theorem eval_intuofsingle:
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intuofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -857,6 +862,7 @@ Theorem eval_singleofintu:
   exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intuofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
diff --git a/riscV/ValueAOp.v b/riscV/ValueAOp.v
index 5670b5fe..f4b7b4d6 100644
--- a/riscV/ValueAOp.v
+++ b/riscV/ValueAOp.v
@@ -13,6 +13,7 @@
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op RTL ValueDomain.
+Require Import Zbits.
 
 (** Value analysis for RISC V operators *)
 
@@ -59,10 +60,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omulhs, v1::v2::nil => mulhs v1 v2
   | Omulhu, v1::v2::nil => mulhu v1 v2
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
-  | Omod, v1::v2::nil => mods v1 v2
-  | Omodu, v1::v2::nil => modu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
+  | Omod, v1::v2::nil => mods_total v1 v2
+  | Omodu, v1::v2::nil => modu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandimm n, v1::nil => and v1 (I n)
   | Oor, v1::v2::nil => or v1 v2
@@ -88,10 +89,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omull, v1::v2::nil => mull v1 v2
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
-  | Omodl, v1::v2::nil => modls v1 v2
-  | Omodlu, v1::v2::nil => modlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
+  | Omodl, v1::v2::nil => modls_total v1 v2
+  | Omodlu, v1::v2::nil => modlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlimm n, v1::nil => andl v1 (L n)
   | Oorl, v1::v2::nil => orl v1 v2
@@ -119,20 +120,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
   | Ocmp c, _ => of_optbool (eval_static_condition c vl)
diff --git a/runtime/Makefile b/runtime/Makefile
index ea3c914f..6f70fa87 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -38,6 +38,8 @@ OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
   vararg.o
 endif
 
+AR=ar
+
 OBJS+=write_profiling_table.o
 
 LIB=libcompcert.a
@@ -59,7 +61,7 @@ endif
 
 $(LIB): $(OBJS)
 	rm -f $(LIB)
-	ar rcs $(LIB) $(OBJS)
+	$(AR) rcs $(LIB) $(OBJS)
 
 %.o: %.s
 	$(CASMRUNTIME) -o $@ $^
diff --git a/kvx/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..eab0b21a 100644
--- a/kvx/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
diff --git a/kvx/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index 85e2a5c6..fb7af3f6 100644
--- a/kvx/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -33,6 +33,12 @@ type problem = {
 (** Print problem for human readability. *)
 val print_problem : out_channel -> problem -> unit;;
 
+(** Get the number of instructions in a problem *)
+val get_nr_instructions : problem -> int;;
+
+(** Get the number of resources in a problem *)
+val get_nr_resources : problem -> int;;
+
 (** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *)
 type solution = int array
               
diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v
new file mode 100644
index 00000000..35512652
--- /dev/null
+++ b/scheduling/RTLpath.v
@@ -0,0 +1,1067 @@
+(** We introduce a data-structure extending the RTL CFG into a control-flow graph over "traces" (in the sense of "trace-scheduling")
+    Here, we use the word "path" instead of "trace" because "trace" has already a meaning in CompCert: 
+      a "path" is simply a list of successive nodes in the CFG (modulo some additional wellformness conditions).
+
+    Actually, we extend syntactically the notion of RTL programs with a structure of "path_map": 
+    this gives an alternative view of the CFG -- where "nodes" are paths instead of simple instructions.
+    Our wellformness condition on paths express that:
+      - the CFG on paths is wellformed: any successor of a given path points to another path (possibly the same).
+      - execution of a paths only emit single events.
+
+    We represent each path only by a natural: the number of nodes in the path. These nodes are recovered from a static notion of "default successor".
+    This notion of path is thus incomplete. For example, if a path contains a whole loop (and for example, unrools it several times), 
+    then this loop must be a suffix of the path.
+
+    However: it is sufficient in order to represent superblocks (each superblock being represented as a path).
+    A superblock decomposition of the CFG exactly corresponds to the case where each node is in at most one path.
+
+    Our goal is to provide two bisimulable semantics:
+      - one is simply the RTL semantics
+      - the other is based on a notion of "path-step": each path is executed in a single step.
+
+    Remark that all analyses on RTL programs should thus be appliable for "free" also for RTLpath programs !
+*)
+
+Require Import Coqlib Maps.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL Linking.
+
+Declare Scope option_monad_scope.
+
+Notation "'SOME' X <- A 'IN' B" := (match A with Some X => B | None => None end)
+         (at level 200, X ident, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Notation "'ASSERT' A 'IN' B" := (if A then B else None)
+         (at level 200, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Local Open Scope option_monad_scope.
+
+(** * Syntax of RTLpath programs *)
+
+(** Internal instruction = instruction with a default successor in a path. *)
+
+Definition default_succ (i: instruction): option node :=
+  match i with
+  | Inop s => Some s
+  | Iop op args res s => Some s
+  | Iload _ chunk addr args dst s => Some s
+  | Istore chunk addr args src s => Some s
+  | Icond cond args ifso ifnot _ => Some ifnot
+  | _ => None (* TODO: we could choose a successor for jumptable ? *)
+  end.
+
+Definition early_exit (i: instruction): option node := (* FIXME: for jumptable, replace [node] by [list node] *)
+  match i with
+  | Icond cond args ifso ifnot _ => Some ifso
+  | _ => None
+  end.
+
+(** Our notion of path. 
+
+    We do not formally require that the set of path is a partition of the CFG.
+    path may have intersections !
+
+    Moreover, we do not formally require that path have a single entry-point (a superblock structure) 
+
+    But, in practice, these properties are probably necessary in order to ensure the success of dynamic verification of scheduling.
+
+    Here: we only require that each exit-point of a path is the entry-point of a path
+     (and that internal node of a path are internal instructions)
+*)
+
+
+(* By convention, we say that node [n] is the entry-point of a path if it is a key of the path_map.
+
+   Such a path of entry [n] is defined from a natural [path] representing the [path] default-successors of [n].
+
+   Remark: a path can loop several times in the CFG.
+
+*)
+
+Record path_info := { 
+    psize: nat; (* number minus 1 of instructions in the path *)
+    input_regs: Regset.t;
+    (** Registers that are used (as input_regs) by the "fallthrough successors" of the path *)
+    (** This field is not used by the verificator, but is helpful for the superblock scheduler *)
+    output_regs: Regset.t 
+}.
+
+Definition path_map: Type := PTree.t path_info.
+
+Definition path_entry (*c:code*) (pm: path_map) (n: node): Prop 
+  := pm!n <> None (*/\ c!n <> None*).
+
+Inductive wellformed_path (c:code) (pm: path_map): nat -> node -> Prop :=
+  | wf_last_node i pc:
+      c!pc = Some i ->
+      (forall n, List.In n (successors_instr i) -> path_entry (*c*) pm n) ->
+      wellformed_path c pm 0 pc
+  | wf_internal_node path i pc pc':
+      c!pc = Some i ->
+      default_succ i = Some pc' ->
+      (forall n, early_exit i = Some n -> path_entry (*c*) pm n) ->
+      wellformed_path c pm path pc' ->
+      wellformed_path c pm (S path) pc.
+
+(* all paths defined from the path_map are wellformed *)
+Definition wellformed_path_map (c:code) (pm: path_map): Prop :=
+  forall n path, pm!n = Some path -> wellformed_path c pm path.(psize) n.
+
+(** We "extend" the notion of RTL program with the additional structure for path.
+
+    There is thus a trivial "forgetful functor" from RTLpath programs to RTL ones.
+*)
+
+Record function : Type := 
+ { fn_RTL:> RTL.function;
+   fn_path: path_map;
+   (* condition 1 below: the entry-point of the code is an entry-point of a path *) 
+   fn_entry_point_wf: path_entry (*fn_RTL.(fn_code)*) fn_path fn_RTL.(fn_entrypoint); 
+   (* condition 2 below: the path_map is well-formed *) 
+   fn_path_wf: wellformed_path_map fn_RTL.(fn_code) fn_path
+ }.
+
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+Definition genv := Genv.t fundef unit.
+
+Definition fundef_RTL (fu: fundef) : RTL.fundef := 
+  match fu with
+  | Internal f => Internal f.(fn_RTL)
+  | External ef => External ef
+  end.
+Coercion fundef_RTL: fundef >-> RTL.fundef.
+
+Definition transf_program (p: program) : RTL.program := transform_program fundef_RTL p.
+Coercion transf_program: program >-> RTL.program.
+
+(** * Path-step semantics of RTLpath programs *)
+
+(* Semantics of internal instructions (mimicking RTL semantics) *)
+
+Record istate := mk_istate { icontinue: bool; ipc: node; irs: regset; imem: mem }.
+
+(* FIXME - prediction *)
+(* Internal step through the path *)
+Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem): option istate :=
+  match i with
+  | Inop pc' => Some (mk_istate true pc' rs m)
+  | Iop op args res pc' =>
+      SOME v <- eval_operation ge sp op rs##args m IN
+      Some (mk_istate true pc' (rs#res <- v) m)
+  | Iload TRAP chunk addr args dst pc' =>
+      SOME a <- eval_addressing ge sp addr rs##args IN
+      SOME v <- Mem.loadv chunk m a IN
+      Some (mk_istate true pc' (rs#dst <- v) m)
+  | Iload NOTRAP chunk addr args dst pc' =>
+      let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in
+      match (eval_addressing ge sp addr rs##args) with
+      | None => Some default_state
+      | Some a => match (Mem.loadv chunk m a) with
+          | None => Some default_state
+          | Some v => Some (mk_istate true pc' (rs#dst <- v) m)
+          end
+      end
+  | Istore chunk addr args src pc' =>
+      SOME a <- eval_addressing ge sp addr rs##args IN
+      SOME m' <- Mem.storev chunk m a rs#src IN
+      Some (mk_istate true pc' rs m')
+  | Icond cond args ifso ifnot _ =>
+      SOME b <- eval_condition cond rs##args m IN
+      Some (mk_istate (negb b) (if b then ifso else ifnot) rs m)
+  | _ => None (* TODO jumptable ? *)
+  end.
+
+(** Execution of a path in a single step *)
+
+(* Executes until a state [st] is reached where st.(continue) is false *)
+Fixpoint isteps ge (path:nat) (f: function) sp rs m pc: option istate :=
+  match path with
+  | O => Some (mk_istate true pc rs m)
+  | S p =>
+    SOME i <- (fn_code f)!pc IN
+    SOME st <- istep ge i sp rs m IN
+    if (icontinue st) then
+      isteps ge p f sp (irs st) (imem st) (ipc st)
+    else
+      Some st
+  end.
+
+Definition find_function (pge: genv) (ros: reg + ident) (rs: regset) : option fundef :=
+  match ros with
+  | inl r => Genv.find_funct pge rs#r
+  | inr symb =>
+      match Genv.find_symbol pge symb with
+      | None => None
+      | Some b => Genv.find_funct_ptr pge b
+      end
+  end.
+
+Inductive stackframe : Type :=
+  | Stackframe
+             (res: reg)            (**r where to store the result *)
+             (f: function)         (**r calling function *)
+             (sp: val)             (**r stack pointer in calling function *)
+             (pc: node)            (**r program point in calling function *)
+             (rs: regset)          (**r register state in calling function *)
+  .
+
+Definition stf_RTL (st: stackframe): RTL.stackframe :=
+  match st with
+  | Stackframe res f sp pc rs => RTL.Stackframe res f sp pc rs
+  end.
+
+Fixpoint stack_RTL (stack: list stackframe): list RTL.stackframe :=
+  match stack with
+  | nil => nil
+  | cons stf stack' => cons (stf_RTL stf) (stack_RTL stack')
+  end.
+
+Inductive state : Type :=
+  | State
+             (stack: list stackframe) (**r call stack *)
+             (f: function)            (**r current function *)
+             (sp: val)                (**r stack pointer *)
+             (pc: node)               (**r current program point in [c] *)
+             (rs: regset)             (**r register state *)
+             (m: mem)                 (**r memory state *)
+  | Callstate
+             (stack: list stackframe) (**r call stack *)
+             (f: fundef)              (**r function to call *)
+             (args: list val)         (**r arguments to the call *)
+             (m: mem)                 (**r memory state *)
+  | Returnstate
+             (stack: list stackframe) (**r call stack *)
+             (v: val)                 (**r return value for the call *)
+             (m: mem)                 (**r memory state *)
+  .
+
+Definition state_RTL (s: state): RTL.state :=
+  match s with
+  | State stack f sp pc rs m => RTL.State (stack_RTL stack) f sp pc rs m
+  | Callstate stack f args m => RTL.Callstate (stack_RTL stack) f args m
+  | Returnstate stack v m => RTL.Returnstate (stack_RTL stack) v m
+  end.
+Coercion state_RTL: state >-> RTL.state.
+
+(* Used to execute the last instruction of a path (isteps is only in charge of executing the instructions before the last) *)
+Inductive path_last_step ge pge stack (f: function): val -> node -> regset -> mem -> trace -> state -> Prop :=
+  | exec_istate i sp pc rs m st:
+     (fn_code f)!pc = Some i ->
+     istep ge i sp rs m = Some st ->
+     path_last_step ge pge stack f sp pc rs m
+                    E0 (State stack f sp (ipc st) (irs st) (imem st))
+  | exec_Icall sp pc rs m sig ros args res pc' fd:
+      (fn_code f)!pc = Some(Icall sig ros args res pc') ->
+      find_function pge ros rs = Some fd ->
+      funsig fd = sig ->
+      path_last_step ge pge stack f sp pc rs m
+        E0 (Callstate (Stackframe res f sp pc' rs :: stack) fd rs##args m)
+  | exec_Itailcall stk pc rs m sig ros args fd m':
+      (fn_code f)!pc = Some(Itailcall sig ros args) ->
+      find_function pge ros rs = Some fd ->
+      funsig fd = sig ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      path_last_step ge pge stack f (Vptr stk Ptrofs.zero) pc rs m
+        E0 (Callstate stack fd rs##args m')
+  | exec_Ibuiltin sp pc rs m ef args res pc' vargs t vres m':
+      (fn_code f)!pc = Some(Ibuiltin ef args res pc') ->
+      eval_builtin_args ge (fun r => rs#r) sp m args vargs ->
+      external_call ef ge vargs m t vres m' ->
+      path_last_step ge pge stack f sp pc rs m
+         t (State stack f sp pc' (regmap_setres res vres rs) m')
+  | exec_Ijumptable sp pc rs m arg tbl n pc': (* TODO remove jumptable from here ? *)
+      (fn_code f)!pc = Some(Ijumptable arg tbl) ->
+      rs#arg = Vint n ->
+      list_nth_z tbl (Int.unsigned n) = Some pc' ->
+      path_last_step ge pge stack f sp pc rs m
+        E0 (State stack f sp pc' rs m)
+  | exec_Ireturn stk pc rs m or m':
+      (fn_code f)!pc = Some(Ireturn or) ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      path_last_step ge pge stack f (Vptr stk Ptrofs.zero) pc rs m
+        E0 (Returnstate stack (regmap_optget or Vundef rs) m').
+
+(* Executes an entire path *)
+Inductive path_step ge pge (path:nat) stack f sp rs m pc: trace -> state -> Prop :=
+  | exec_early_exit st:
+     isteps ge path f sp rs m pc = Some st ->
+     (icontinue st) = false ->
+     path_step ge pge path stack f sp rs m pc E0 (State stack f sp (ipc st) (irs st) (imem st))
+  | exec_normal_exit st t s:
+     isteps ge path f sp rs m pc = Some st ->
+     (icontinue st) = true ->
+     path_last_step ge pge stack f sp (ipc st) (irs st) (imem st) t s ->
+     path_step ge pge path stack f sp rs m pc t s.
+
+(* Either internal path execution, or the usual exec_function / exec_return borrowed from RTL *)
+Inductive step ge pge: state -> trace -> state -> Prop :=
+  | exec_path path stack f sp rs m pc t s: 
+      (fn_path f)!pc = Some path ->
+      path_step ge pge path.(psize) stack f sp rs m pc t s ->
+      step ge pge (State stack f sp pc rs m) t s
+  | exec_function_internal s f args m m' stk:
+      Mem.alloc m 0 (fn_RTL f).(fn_stacksize) = (m', stk) ->
+      step ge pge (Callstate s (Internal f) args m)
+        E0 (State s
+                  f
+                  (Vptr stk Ptrofs.zero)
+                  f.(fn_entrypoint)
+                  (init_regs args f.(fn_params))
+                  m')
+  | exec_function_external s ef args res t m m':
+      external_call ef ge args m t res m' ->
+      step ge pge (Callstate s (External ef) args m) 
+            t (Returnstate s res m')
+  | exec_return res f sp pc rs s vres m:
+      step ge pge (Returnstate (Stackframe res f sp pc rs :: s) vres m)
+        E0 (State s f sp pc (rs#res <- vres) m).
+
+Inductive initial_state (p:program) : state -> Prop :=
+    initial_state_intro  (b : block) (f : fundef) (m0 : mem):
+                          Genv.init_mem p = Some m0 ->
+                          Genv.find_symbol (Genv.globalenv p) (prog_main p) = Some b ->
+                          Genv.find_funct_ptr (Genv.globalenv p) b = Some f ->
+                          funsig f = signature_main -> initial_state p (Callstate nil f nil m0).
+
+Definition final_state (st: state) (i:int): Prop 
+ := RTL.final_state st i.
+
+Definition semantics (p: program) :=
+  Semantics (step (Genv.globalenv (transf_program p))) (initial_state p) final_state (Genv.globalenv p).
+
+(** * Proving the bisimulation between (semantics p) and (RTL.semantics p). *)
+
+(** ** Preliminaries: simple tactics for option-monad *)
+
+Lemma destruct_SOME A B (P: option B -> Prop) (e: option A) (f: A -> option B):
+  (forall x, e = Some x -> P (f x)) -> (e = None -> P None) -> (P (SOME x <- e IN f x)).
+Proof.
+  intros; destruct e; simpl; auto.
+Qed.
+
+Lemma destruct_ASSERT B (P: option B -> Prop) (e: bool) (x: option B):
+  (e = true -> P x) -> (e = false -> P None) -> (P (ASSERT e IN x)).
+Proof.
+  intros; destruct e; simpl; auto.
+Qed.
+
+Ltac inversion_SOME x :=
+  try (eapply destruct_SOME; [ let x := fresh x in intro x | simpl; try congruence ]).
+
+Ltac inversion_ASSERT :=
+  try (eapply destruct_ASSERT; [ idtac | simpl; try congruence ]).
+
+Ltac simplify_someHyp :=
+  match goal with
+  | H: None = Some _ |- _  => inversion H; clear H; subst
+  | H: Some _ = None |- _  => inversion H; clear H; subst
+  | H: ?t = ?t |- _ => clear H
+  | H: Some _ = Some _ |- _  => inversion H; clear H; subst
+  | H: Some _ <> None |- _ => clear H
+  | H: None <> Some _ |- _ => clear H
+  | H: _ = Some _ |- _ => (try rewrite !H in * |- *); generalize H; clear H
+  | H: _ = None |- _ => (try rewrite !H in * |- *); generalize H; clear H
+  end.
+
+Ltac explore_destruct :=
+  repeat (match goal with
+  | [H: ?expr = ?val |- context[match ?expr with | _ => _ end]] => rewrite H
+  | [H: match ?var with | _ => _ end |- _] => destruct var
+  | [ |- context[match ?m with | _ => _ end] ] => destruct m
+  | _ => discriminate
+  end).
+
+Ltac simplify_someHyps := 
+  repeat (simplify_someHyp; simpl in * |- *).
+
+Ltac try_simplify_someHyps := 
+  try (intros; simplify_someHyps; eauto).
+
+(* TODO: try to improve this tactic with a better control over names and inversion *)
+Ltac simplify_SOME x :=
+  (repeat inversion_SOME x); try_simplify_someHyps.
+
+(** ** The easy way: Forward simulation of RTLpath by RTL 
+
+This way can be viewed as a correctness property: all transitions in RTLpath are valid RTL transitions !
+
+*)
+
+Local Hint Resolve RTL.exec_Inop RTL.exec_Iop RTL.exec_Iload RTL.exec_Istore RTL.exec_Icond RTL.exec_Iload_notrap1 RTL.exec_Iload_notrap2: core.
+
+(* istep reflects RTL.step *)
+Lemma istep_correct ge i stack (f:function) sp rs m st :
+  istep ge i sp rs m = Some st ->
+  forall pc, (fn_code f)!pc = Some i ->
+  RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  destruct i; simpl; try congruence; simplify_SOME x.
+  1-3: explore_destruct; simplify_SOME x.
+Qed.
+
+Local Hint Resolve star_refl: core.
+
+(* isteps reflects a star relation on RTL.step *)
+Lemma isteps_correct ge path stack f sp: forall rs m pc st,
+  isteps ge path f sp rs m pc = Some st ->
+  star RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  induction path; simpl; try_simplify_someHyps.
+  inversion_SOME i; intros Hi.
+  inversion_SOME st0; intros Hst0.
+  destruct (icontinue st0) eqn:cont.
+  + intros; eapply star_step.
+    - eapply istep_correct; eauto.
+    - simpl; eauto.
+    - auto.
+  + intros; simplify_someHyp; eapply star_step.
+    - eapply istep_correct; eauto.
+    - simpl; eauto.
+    - auto.
+Qed.
+
+Lemma isteps_correct_early_exit ge path stack f sp: forall rs m pc st,
+  isteps ge path f sp rs m pc = Some st ->
+  st.(icontinue) = false ->
+  plus RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  destruct path; simpl; try_simplify_someHyps; try congruence.
+  inversion_SOME i; intros Hi.
+  inversion_SOME st0; intros Hst0.
+  destruct (icontinue st0) eqn:cont.
+  + intros; eapply plus_left.
+    - eapply istep_correct; eauto.
+    - eapply isteps_correct; eauto.
+    - auto.
+  + intros X; inversion X; subst.
+    eapply plus_one.
+    eapply istep_correct; eauto.
+Qed.
+
+Local Hint Resolve list_forall2_nil match_globdef_fun linkorder_refl match_globvar_intro: core.
+
+Section CORRECTNESS.
+
+Variable p: program.
+
+Lemma match_prog_RTL: match_program (fun _ f tf => tf = fundef_RTL f) eq p (transf_program p).
+Proof.
+  eapply match_transform_program; eauto.
+Qed.
+
+Let pge := Genv.globalenv p.
+Let ge := Genv.globalenv (transf_program p).
+
+Lemma senv_preserved: Senv.equiv pge ge.
+Proof (Genv.senv_match match_prog_RTL).
+
+Lemma symbols_preserved s: Genv.find_symbol ge s = Genv.find_symbol pge s.
+Proof (Genv.find_symbol_match match_prog_RTL s).
+
+Lemma find_function_RTL_match ros rs fd:
+  find_function pge ros rs = Some fd -> RTL.find_function ge ros rs = Some (fundef_RTL fd).
+Proof.
+  destruct ros; simpl.
+  + intro; exploit (Genv.find_funct_match match_prog_RTL); eauto.
+    intros (cuint & tf & H1 & H2 & H3); subst; auto.
+  + rewrite symbols_preserved.
+    destruct (Genv.find_symbol pge i); simpl; try congruence.
+    intro; exploit (Genv.find_funct_ptr_match match_prog_RTL); eauto. 
+    intros (cuint & tf & H1 & H2 & H3); subst; auto.
+Qed.
+
+Local Hint Resolve istep_correct RTL.exec_Ibuiltin RTL.exec_Ijumptable RTL.exec_Ireturn RTL.exec_Icall RTL.exec_Itailcall find_function_RTL_match: core.
+
+Lemma path_last_step_correct stack f sp pc rs m t s:
+  path_last_step ge pge stack f sp pc rs m t s ->
+  RTL.step ge (State stack f sp pc rs m) t s.
+Proof.
+  destruct 1; try (eapply istep_correct); simpl; eauto.
+Qed.
+
+Lemma path_step_correct path stack f sp pc rs m t s:
+  path_step ge pge path stack f sp rs m pc t s ->
+  plus RTL.step ge (State stack f sp pc rs m) t s.
+Proof.
+  destruct 1.
+  + eapply isteps_correct_early_exit; eauto.
+  + eapply plus_right.
+    eapply isteps_correct; eauto.
+    eapply path_last_step_correct; eauto.
+    auto.
+Qed.
+
+Local Hint Resolve plus_one RTL.exec_function_internal RTL.exec_function_external RTL.exec_return: core.
+
+Lemma step_correct s t s': step ge pge s t s' -> plus RTL.step ge s t s'.
+Proof.
+  destruct 1; try (eapply path_step_correct); simpl; eauto.
+Qed.
+
+Theorem RTLpath_correct: forward_simulation (semantics p) (RTL.semantics p).
+Proof.
+  eapply forward_simulation_plus with (match_states := fun s1 s2 => s2 = state_RTL s1); simpl; auto.
+  - apply senv_preserved.
+  - destruct 1; intros; eexists; intuition eauto. econstructor; eauto.
+    + apply (Genv.init_mem_match match_prog_RTL); auto.
+    + rewrite (Genv.find_symbol_match match_prog_RTL).
+      rewrite (match_program_main match_prog_RTL); eauto.
+    + exploit (Genv.find_funct_ptr_match match_prog_RTL); eauto.
+      intros (cunit & tf0 & XX); intuition subst; eauto.
+  - unfold final_state; intros; subst; eauto.
+  - intros; subst. eexists; intuition.
+    eapply step_correct; eauto.
+Qed.
+
+End CORRECTNESS.
+
+Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
+  prog_defs p1 = prog_defs p2 ->
+  prog_public p1 = prog_public p2 ->
+  prog_main p1 = prog_main p2 ->
+  p1 = p2.
+Proof.
+  intros. destruct p1. destruct p2. simpl in *. subst. auto.
+Qed.
+
+Lemma cons_extract {A: Type} : forall (l: list A) a b, a = b -> a::l = b::l.
+Proof.
+  intros. congruence.
+Qed.
+
+(* Definition transf_program : RTLpath.program -> RTL.program := transform_program fundef_RTL.
+
+Lemma transf_program_proj: forall p, transf_program (transf_program p) = p.
+Proof.
+  intros p. destruct p as [defs pub main]. unfold program_proj. simpl.
+  apply program_equals; simpl; auto.
+  induction defs.
+  - simpl; auto.
+  - simpl. rewrite IHdefs. 
+    destruct a as [id gd]; simpl.
+    destruct gd as [f|v]; simpl; auto.
+    rewrite transf_fundef_proj. auto.
+Qed. *)
+
+
+(** The hard way: Forward simulation of RTL by RTLpath 
+
+This way can be viewed as a completeness property: all transitions in RTL can be represented as RTLpath transitions !
+
+*)
+
+(* This lemma is probably needed to compose a pass from RTL -> RTLpath with other passes.*)
+Lemma match_RTL_prog {LA: Linker fundef} {LV: Linker unit} p: match_program (fun _ f tf => f = fundef_RTL tf) eq (transf_program p) p.
+Proof. 
+  unfold match_program, match_program_gen; intuition.
+  unfold transf_program at 2; simpl.
+  generalize (prog_defs p).
+  induction l as [|a l]; simpl; eauto.
+  destruct a; simpl.
+  intros; eapply list_forall2_cons; eauto.
+  unfold match_ident_globdef; simpl; intuition; destruct g as [f|v]; simpl; eauto.
+  eapply match_globdef_var. destruct v; eauto.
+Qed.
+
+(* Theory of wellformed paths *)
+
+Fixpoint nth_default_succ (c: code) (path:nat) (pc: node): option node :=
+  match path with
+  | O => Some pc
+  | S path' => 
+     SOME i <- c!pc IN
+     SOME pc' <- default_succ i IN
+     nth_default_succ c path' pc' 
+  end.
+
+Lemma wellformed_suffix_path c pm path path':
+   (path' <= path)%nat -> 
+  forall pc, wellformed_path c pm path pc -> 
+   exists pc', nth_default_succ c (path-path') pc = Some pc' /\ wellformed_path c pm path' pc'.
+Proof.
+  induction 1 as [|m].
+  + intros. enough (path'-path'=0)%nat as ->; [simpl;eauto|omega].
+  + intros pc WF; enough (S m-path'=S (m-path'))%nat as ->; [simpl;eauto|omega].
+    inversion WF; subst; clear WF; intros; simplify_someHyps.
+    intros; simplify_someHyps; eauto.
+Qed.
+
+Definition nth_default_succ_inst (c: code) (path:nat) pc: option instruction :=
+   SOME pc <- nth_default_succ c path pc IN
+   c!pc.
+
+Lemma final_node_path f path pc:
+   (fn_path f)!pc = Some path -> 
+   exists i, nth_default_succ_inst (fn_code f) path.(psize) pc = Some i 
+             /\ (forall n, List.In n (successors_instr i) -> path_entry (*fn_code f*) (fn_path f) n).
+Proof.
+  intros; exploit fn_path_wf; eauto.
+  intro WF.
+  set (ps:=path.(psize)).
+  exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps O); omega || eauto.
+  destruct 1 as (pc' & NTH_SUCC & WF'); auto.
+  assert (ps - 0 = ps)%nat as HH by omega. rewrite HH in NTH_SUCC. clear HH.
+  unfold nth_default_succ_inst.
+  inversion WF'; clear WF'; subst. simplify_someHyps; eauto.
+Qed.
+
+Lemma internal_node_path path f path0 pc:
+   (fn_path f)!pc = (Some path0) -> 
+   (path < path0.(psize))%nat ->
+   exists i pc',
+      nth_default_succ_inst (fn_code f) path pc = Some i /\
+      default_succ i = Some pc' /\
+      (forall n, early_exit i = Some n -> path_entry (*fn_code f*) (fn_path f) n).
+Proof.
+  intros; exploit fn_path_wf; eauto.
+  set (ps:=path0.(psize)).
+  intro WF; exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps (ps-path)); eauto. { omega. }
+  destruct 1 as (pc' & NTH_SUCC & WF').
+  assert (ps - (ps - path) = path)%nat as HH by omega. rewrite HH in NTH_SUCC. clear HH.
+  unfold nth_default_succ_inst. 
+  inversion WF'; clear WF'; subst. { omega. }
+  simplify_someHyps; eauto.
+Qed.
+
+Lemma initialize_path (*c*) pm n: path_entry (*c*) pm n -> exists path, pm!n = Some path.
+Proof.
+  unfold path_entry; destruct pm!n; eauto. intuition congruence.
+Qed.
+Local Hint Resolve fn_entry_point_wf: core.
+Local Opaque path_entry.
+
+Lemma istep_successors ge i sp rs m st:
+  istep ge i sp rs m = Some st -> 
+  In (ipc st) (successors_instr i).
+Proof.
+  destruct i; simpl; try congruence; simplify_SOME x.
+  all: explore_destruct; simplify_SOME x.
+Qed.
+
+Lemma istep_normal_exit ge i sp rs m st:
+  istep ge i sp rs m = Some st ->
+  st.(icontinue) = true ->
+  default_succ i = Some st.(ipc).
+Proof.
+  destruct i; simpl; try congruence; simplify_SOME x.
+  all: explore_destruct; simplify_SOME x.
+Qed.
+
+Lemma isteps_normal_exit ge path f sp: forall rs m pc st,
+  st.(icontinue) = true ->
+  isteps ge path f sp rs m pc = Some st ->
+  nth_default_succ (fn_code f) path pc = Some st.(ipc).
+Proof.
+  induction path; simpl. { try_simplify_someHyps. }
+  intros rs m pc st CONT; try_simplify_someHyps.
+  inversion_SOME i; intros Hi.
+  inversion_SOME st0; intros Hst0.
+  destruct (icontinue st0) eqn:X; try congruence.
+  try_simplify_someHyps.
+  intros; erewrite istep_normal_exit; eauto.
+Qed.
+
+
+(* TODO: the three following lemmas could maybe simplified by introducing an auxiliary
+    left-recursive definition equivalent to isteps ?
+*)
+Lemma isteps_step_right ge path f sp: forall rs m pc st i,
+  isteps ge path f sp rs m pc = Some st ->
+  st.(icontinue) = true ->
+  (fn_code f)!(st.(ipc)) = Some i ->
+  istep ge i sp st.(irs) st.(imem) = isteps ge (S path) f sp rs m pc.
+Proof.
+  induction path.
+  + simpl; intros; try_simplify_someHyps. simplify_SOME st.
+    destruct st as [b]; destruct b; simpl; auto.
+  + intros rs m pc st i H.
+    simpl in H.
+    generalize H; clear H; simplify_SOME xx.
+    destruct (icontinue xx0) eqn: CONTxx0.
+    * intros; erewrite IHpath; eauto.
+    * intros; congruence.
+Qed.
+
+Lemma isteps_inversion_early ge path f sp: forall rs m pc st,
+  isteps ge path f sp rs m pc = Some st ->
+  (icontinue st)=false ->
+  exists st0 i path0,
+    (path > path0)%nat /\
+    isteps ge path0 f sp rs m pc = Some st0 /\ 
+    st0.(icontinue) = true /\ 
+    (fn_code f)!(st0.(ipc)) = Some i /\
+    istep ge i sp st0.(irs) st0.(imem) = Some st.
+Proof.
+  induction path as [|path]; simpl.
+  - intros; try_simplify_someHyps; try congruence.
+  - intros rs m pc st; inversion_SOME i; inversion_SOME st0.
+    destruct (icontinue st0) eqn: CONT.
+    + intros STEP PC STEPS CONT0. exploit IHpath; eauto.
+      clear STEPS.
+      intros (st1 & i0 & path0 & BOUND & STEP1 & CONT1 & X1 & X2); auto.
+      exists st1. exists i0. exists (S path0). intuition.
+      simpl; try_simplify_someHyps. 
+      rewrite CONT. auto.
+    + intros; try_simplify_someHyps; try congruence.
+      eexists. exists i. exists O; simpl. intuition eauto.
+      omega.
+Qed.
+
+Lemma isteps_resize ge path0 path1 f sp rs m pc st: 
+ (path0 <= path1)%nat ->
+  isteps ge path0 f sp rs m pc = Some st ->
+  (icontinue st)=false ->
+  isteps ge path1 f sp rs m pc = Some st.
+Proof.
+  induction 1 as [|path1]; simpl; auto.
+  intros PSTEP CONT. exploit IHle; auto. clear PSTEP IHle H path0.
+  generalize rs m pc st CONT; clear rs m pc st CONT. 
+  induction path1 as [|path]; simpl; auto.
+  - intros; try_simplify_someHyps; try congruence.
+  - intros rs m pc st; inversion_SOME i; inversion_SOME st0; intros; try_simplify_someHyps.
+    destruct (icontinue st0) eqn: CONT0; eauto.
+Qed.
+
+(* FIXME - add prediction *)
+Inductive is_early_exit pc: instruction -> Prop :=
+  | Icond_early_exit cond args ifnot predict:
+     is_early_exit pc (Icond cond args pc ifnot predict)
+ . (* TODO add jumptable here ? *)
+
+Lemma istep_early_exit ge i sp rs m st :
+  istep ge i sp rs m = Some st -> 
+  st.(icontinue) = false -> 
+  st.(irs) = rs /\ st.(imem) = m /\ is_early_exit st.(ipc) i.
+Proof.
+  Local Hint Resolve Icond_early_exit: core.
+  destruct i; simpl; try congruence; simplify_SOME b; simpl; try congruence.
+  all: explore_destruct; simplify_SOME b; try discriminate.
+Qed.
+
+Section COMPLETENESS.
+
+Variable p: program.
+
+Let pge := Genv.globalenv p.
+Let ge := Genv.globalenv (transf_program p).
+
+Lemma find_funct_ptr_RTL_preserv b f:
+   Genv.find_funct_ptr ge b = Some f -> (exists f0, Genv.find_funct_ptr pge b = Some f0 /\ f = f0).
+Proof.
+  intros; exploit (Genv.find_funct_ptr_match (match_RTL_prog p)); eauto.
+  destruct 1 as (cunit & tf & X & Y & Z); subst.
+  eauto.
+Qed.
+
+Lemma find_RTL_function_match ros rs fd:
+  RTL.find_function ge ros rs = Some fd -> exists fd', fd = fundef_RTL fd' /\ find_function pge ros rs = Some fd'.
+Proof.
+  destruct ros; simpl.
+  + intro; exploit (Genv.find_funct_match (match_RTL_prog p)); eauto.
+    intros (cuint & tf & H1 & H2 & H3); subst; eauto.
+  + rewrite (symbols_preserved p); unfold pge.
+    destruct (Genv.find_symbol (Genv.globalenv p) i); simpl; try congruence.
+    intro; exploit find_funct_ptr_RTL_preserv; eauto.
+    intros (tf & H1 & H2); subst; eauto.
+Qed.
+
+
+(** *** Definition of well-formed stacks and of match_states *)
+Definition wf_stf (st: stackframe): Prop :=
+  match st with
+  | Stackframe res f sp pc rs => path_entry (*f.(fn_code)*) f.(fn_path) pc
+  end.
+
+Definition wf_stackframe (stack: list stackframe): Prop :=
+  forall st, List.In st stack -> wf_stf st.
+
+Lemma wf_stackframe_nil: wf_stackframe nil.
+Proof.
+  unfold wf_stackframe; simpl. tauto.
+Qed.
+Local Hint Resolve wf_stackframe_nil: core.
+
+Lemma wf_stackframe_cons st stack:
+  wf_stackframe (st::stack) <-> (wf_stf st) /\ wf_stackframe stack.
+Proof.
+  unfold wf_stackframe; simpl; intuition (subst; auto).
+Qed.
+
+Definition stack_of (s: state):  list stackframe :=
+  match s with
+  | State stack f sp pc rs m => stack
+  | Callstate stack f args m => stack
+  | Returnstate stack v m => stack
+  end.
+
+Definition is_inst (s: RTL.state): bool :=
+  match s with
+  | RTL.State stack f sp pc rs m => true
+  | _ => false
+  end.
+
+Inductive match_inst_states_goal (idx: nat) (s1:RTL.state): state -> Prop :=
+  | State_match path stack f sp pc rs m s2:
+    (fn_path f)!pc = Some path ->
+    (idx <= path.(psize))%nat ->
+      isteps ge (path.(psize)-idx) f sp rs m pc = Some s2 ->
+      s1 = State stack f sp s2.(ipc) s2.(irs) s2.(imem) ->
+      match_inst_states_goal idx s1 (State stack f sp pc rs m).
+
+Definition match_inst_states (idx: nat) (s1:RTL.state) (s2:state): Prop :=
+  if is_inst s1 then match_inst_states_goal idx s1 s2 else s1 = state_RTL s2.
+
+Definition match_states (idx: nat) (s1:RTL.state) (s2:state): Prop :=
+    match_inst_states idx s1 s2
+ /\ wf_stackframe (stack_of s2).
+
+(** *** Auxiliary lemmas of completeness *)
+Lemma istep_complete t i stack f sp rs m pc s':
+  RTL.step ge (State stack f sp pc rs m) t s' ->
+  (fn_code f)!pc = Some i -> 
+  default_succ i <> None -> 
+  t = E0 /\ exists st, istep ge i sp rs m = Some st /\ s'=(State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  intros H X; inversion H; simpl; subst; try rewrite X in * |-; clear X; simplify_someHyps; try congruence;
+  (split; auto); simplify_someHyps; eexists; split; simplify_someHyps; eauto.
+  all: explore_destruct; simplify_SOME a.
+Qed.
+
+Lemma stuttering path idx stack f sp rs m pc st t s1':
+   isteps ge (path.(psize)-(S idx)) f sp rs m pc = Some st ->
+   (fn_path f)!pc = Some path ->
+   (S idx <= path.(psize))%nat ->
+   st.(icontinue) = true ->
+   RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' ->
+   t = E0 /\ match_inst_states idx s1' (State stack f sp pc rs m).
+Proof.
+  intros PSTEP PATH BOUND CONT RSTEP; exploit (internal_node_path (path.(psize)-(S idx))); omega || eauto.
+  intros (i & pc' & Hi & Hpc & DUM).
+  unfold nth_default_succ_inst in Hi.
+  erewrite isteps_normal_exit in Hi; eauto.
+  exploit istep_complete; congruence || eauto.
+  intros (SILENT & st0 & STEP0 & EQ).
+  intuition; subst; unfold match_inst_states; simpl.
+  intros; refine (State_match _ _ path stack f sp pc rs m _ PATH _ _ _); simpl; omega || eauto.
+  set (ps:=path.(psize)). enough (ps - idx = S (ps - (S idx)))%nat as ->; try omega.
+  erewrite <- isteps_step_right; eauto.
+Qed.
+
+Lemma normal_exit path stack f sp rs m pc st t s1':
+   isteps ge path.(psize) f sp rs m pc = Some st ->
+   (fn_path f)!pc = Some path ->
+   st.(icontinue) = true ->
+   RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' ->
+   wf_stackframe stack ->
+   exists s2', 
+      (path_last_step ge pge stack f sp st.(ipc) st.(irs) st.(imem)) t s2' 
+       /\ (exists idx', match_states idx' s1' s2').
+Proof.
+  Local Hint Resolve istep_successors list_nth_z_in: core. (* Hint for path_entry proofs *)
+  intros PSTEP PATH CONT RSTEP WF; exploit (final_node_path f path); eauto.
+  intros (i & Hi & SUCCS).
+  unfold nth_default_succ_inst in Hi.
+  erewrite isteps_normal_exit in Hi; eauto.
+  destruct (default_succ i) eqn:Hn0.
+  + (* exec_istate *)
+    exploit istep_complete; congruence || eauto.
+    intros (SILENT & st0 & STEP0 & EQ); subst.
+    exploit (exec_istate ge pge); eauto.
+    eexists; intuition eauto.
+    unfold match_states, match_inst_states; simpl.
+    destruct (initialize_path (*fn_code f*) (fn_path f) (ipc st0)) as (path0 & Hpath0); eauto.
+    exists (path0.(psize)); intuition eauto.
+    econstructor; eauto.
+    * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega.
+    * simpl; eauto.
+  + generalize Hi; inversion RSTEP; clear RSTEP; subst; (repeat (simplify_someHyp; simpl in * |- * )); try congruence; eauto.
+    - (* Icall *)
+      intros; exploit find_RTL_function_match; eauto.
+      intros (fd' & MATCHfd & Hfd'); subst.
+      exploit (exec_Icall ge pge); eauto.
+      eexists; intuition eauto.
+      eexists O; unfold match_states, match_inst_states; simpl; intuition eauto.
+      rewrite wf_stackframe_cons; intuition simpl; eauto.
+    - (* Itailcall *)
+      intros; exploit find_RTL_function_match; eauto.
+      intros (fd' & MATCHfd & Hfd'); subst.
+      exploit (exec_Itailcall ge pge); eauto.
+      eexists; intuition eauto.
+      eexists O; unfold match_states, match_inst_states; simpl; intuition eauto.
+    - (* Ibuiltin *)
+      intros; exploit exec_Ibuiltin; eauto.
+      eexists; intuition eauto.
+      unfold match_states, match_inst_states; simpl.
+      destruct (initialize_path (*fn_code f*) (fn_path f) pc') as (path0 & Hpath0); eauto.
+      exists path0.(psize); intuition eauto.
+      econstructor; eauto.
+      * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega.
+      * simpl; eauto.
+   - (* Ijumptable *)
+      intros; exploit exec_Ijumptable; eauto.
+      eexists; intuition eauto.
+      unfold match_states, match_inst_states; simpl.
+      destruct (initialize_path (*fn_code f*) (fn_path f) pc') as (path0 & Hpath0); eauto.
+      exists path0.(psize); intuition eauto.
+      econstructor; eauto.
+      * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega.
+      * simpl; eauto.
+  - (* Ireturn *)
+      intros; exploit exec_Ireturn; eauto.
+      eexists; intuition eauto.
+      eexists O; unfold match_states, match_inst_states; simpl; intuition eauto.
+Qed.
+
+Lemma path_step_complete stack f sp rs m pc t s1' idx path st:
+  isteps ge (path.(psize)-idx) f sp rs m pc = Some st ->
+  (fn_path f)!pc = Some path ->
+  (idx <= path.(psize))%nat ->
+  RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' ->
+  wf_stackframe stack ->
+  exists idx' s2', 
+      (path_step ge pge path.(psize) stack f sp rs m pc t s2' 
+       \/  (t = E0 /\ s2'=(State stack f sp pc rs m) /\ (idx' < idx)%nat)
+       \/ (exists path', path_step ge pge path.(psize) stack f sp rs m pc E0 (State stack f sp st.(ipc) st.(irs) st.(imem))
+                         /\ (fn_path f)!(ipc st) = Some path' /\ path'.(psize) = O
+                         /\ path_step ge pge path'.(psize) stack f sp st.(irs) st.(imem) st.(ipc) t s2')
+       )
+      /\ match_states idx' s1' s2'.
+Proof.
+  Local Hint Resolve exec_early_exit exec_normal_exit: core.
+  intros PSTEP PATH BOUND RSTEP WF; destruct (st.(icontinue)) eqn: CONT.
+  destruct idx as [ | idx].
+  + (* path_step on normal_exit *)
+     assert (path.(psize)-0=path.(psize))%nat as HH by omega. rewrite HH in PSTEP. clear HH.
+     exploit normal_exit; eauto.
+     intros (s2' & LSTEP & (idx' & MATCH)).
+     exists idx'; exists s2'; intuition eauto.
+  + (* stuttering step *)
+    exploit stuttering; eauto.
+    unfold match_states; exists idx; exists (State stack f sp pc rs m); 
+    intuition.
+  + (* one or two path_step on early_exit *)
+    exploit (isteps_resize ge (path.(psize) - idx)%nat path.(psize)); eauto; try omega.
+    clear PSTEP; intros PSTEP.
+    (* TODO for clarification: move the assert below into a separate lemma *)
+    assert (HPATH0: exists path0, (fn_path f)!(ipc st) = Some path0).
+    { clear RSTEP.
+      exploit isteps_inversion_early; eauto.
+      intros (st0 & i & path0 & BOUND0 & PSTEP0 & CONT0 & PC0 & STEP0).
+      exploit istep_early_exit; eauto.
+      intros (X1 & X2 & EARLY_EXIT).
+      destruct st as [cont pc0 rs0 m0]; simpl in * |- *; intuition subst.
+      exploit (internal_node_path path0); omega || eauto.
+      intros (i' & pc' & Hi' & Hpc' & ENTRY).
+      unfold nth_default_succ_inst in Hi'.
+      erewrite isteps_normal_exit in Hi'; eauto.
+      clear pc' Hpc' STEP0 PSTEP0 BOUND0; try_simplify_someHyps; intros.
+      destruct EARLY_EXIT as [cond args ifnot]; simpl in ENTRY;
+      destruct (initialize_path (*fn_code f*) (fn_path f) pc0); eauto.
+    }
+    destruct HPATH0 as (path1  & Hpath1).
+    destruct (path1.(psize)) as [|ps] eqn:Hpath1size.
+    * (* two step case *)
+      exploit (normal_exit path1); try rewrite Hpath1size; simpl; eauto.
+      simpl; intros (s2' & LSTEP & (idx' & MATCH)).
+      exists idx'. exists s2'. constructor; auto.
+      right. right. eexists; intuition eauto.
+      (* now, prove the last step *)
+      rewrite Hpath1size; exploit exec_normal_exit. 4:{ eauto. } 
+      - simpl; eauto.
+      - simpl; eauto.
+      - simpl; eauto.
+    * (* single step case *)
+      exploit (stuttering path1 ps stack f sp (irs st) (imem st) (ipc st)); simpl; auto.
+      - { rewrite Hpath1size; enough (S ps-S ps=0)%nat as ->; try omega.  simpl; eauto. }
+      - omega.
+      - simpl; eauto.
+      - simpl; eauto.
+      - intuition subst.
+        repeat eexists; intuition eauto.
+Qed.
+
+Lemma step_noninst_complete s1 t s1' s2:
+  is_inst s1 = false ->
+  s1 = state_RTL s2 ->
+  RTL.step ge s1 t s1' ->
+  wf_stackframe (stack_of s2) ->
+  exists s2', step ge pge s2 t s2' /\ exists idx, match_states idx s1' s2'.
+Proof.
+  intros H0 H1 H2 WFSTACK; destruct s2; subst; simpl in * |- *; try congruence;
+  inversion H2; clear H2; subst; try_simplify_someHyps; try congruence.
+  + (* exec_function_internal *)
+    destruct f; simpl in H3; inversion H3; subst; clear H3.
+    eexists; constructor 1.
+    * eapply exec_function_internal; eauto.
+    * unfold match_states, match_inst_states; simpl.
+      destruct (initialize_path (*fn_code f*) (fn_path f) (fn_entrypoint (fn_RTL f))) as (path & Hpath); eauto.
+      exists path.(psize). constructor; auto.
+      econstructor; eauto.
+      - set (ps:=path.(psize)). enough (ps-ps=O)%nat as ->; simpl; eauto.
+        omega.
+      - simpl; auto.
+  + (* exec_function_external *)
+    destruct f; simpl in H3 |-; inversion H3; subst; clear H3.
+    eexists; constructor 1.
+    * apply exec_function_external; eauto.
+    * unfold match_states, match_inst_states; simpl. exists O; auto.
+  + (* exec_return *)
+    destruct stack eqn: Hstack; simpl in H1; inversion H1; clear H1; subst.
+    destruct s0 eqn: Hs0; simpl in H0; inversion H0; clear H0; subst.
+    eexists; constructor 1.
+    * apply exec_return.
+    * unfold match_states, match_inst_states; simpl.
+      rewrite wf_stackframe_cons in WFSTACK.
+      destruct WFSTACK as (H0 & H1); simpl in H0.
+      destruct (initialize_path (*fn_code f0*) (fn_path f0) pc0) as (path & Hpath); eauto.
+      exists path.(psize). constructor; auto.
+      econstructor; eauto.
+      - set (ps:=path.(psize)). enough (ps-ps=O)%nat as ->; simpl; eauto.
+        omega.
+      - simpl; auto.
+Qed.
+
+(** *** The main completeness lemma and the simulation theorem...*)
+Lemma step_complete s1 t s1' idx s2:
+ match_states idx s1 s2 ->
+ RTL.step ge s1 t s1' ->
+ exists idx' s2', (plus (step ge) pge s2 t s2' \/ (t = E0 /\ s2=s2' /\ (idx' < idx)%nat)) /\ match_states idx' s1' s2'.
+Proof.
+  Local Hint Resolve plus_one plus_two exec_path: core.
+  unfold match_states at 1, match_inst_states. intros (IS_INST & WFSTACK). destruct (is_inst s1) eqn: His1.
+  - clear His1; destruct IS_INST as [path stack f sp pc rs m s2 X X0 X1 X2]; auto; subst; simpl in * |- *. 
+    intros STEP; exploit path_step_complete; eauto.
+    intros (idx' & s2' & H0 & H1).
+    eexists; eexists; eauto.
+    destruct H0 as [H0|[H0|(path'&H0)]]; intuition subst; eauto.
+  - intros; exploit step_noninst_complete; eauto.
+    intros (s2' & STEP & (idx0 & MATCH)).
+    exists idx0; exists s2'; intuition auto.
+Qed.
+
+Theorem RTLpath_complete: forward_simulation (RTL.semantics p) (semantics p).
+Proof.
+  eapply (Forward_simulation (L1:=RTL.semantics p) (L2:=semantics p) lt match_states).
+  constructor 1; simpl.
+  - apply lt_wf.
+  - unfold match_states, match_inst_states. destruct 1; simpl; exists O.
+    destruct (find_funct_ptr_RTL_preserv b f) as (f0 & X1 & X2); subst; eauto.
+    exists (Callstate nil f0 nil m0). simpl; split; try econstructor; eauto.
+    + apply (Genv.init_mem_match (match_RTL_prog p)); auto.
+    + rewrite (Genv.find_symbol_match (match_RTL_prog p)).
+      rewrite (match_program_main (match_RTL_prog p)); eauto.
+  - unfold final_state, match_states, match_inst_states. intros i s1 s2 r (H0 & H1) H2; destruct H2.
+    destruct s2; simpl in * |- *; inversion H0; subst.
+    constructor.
+  - Local Hint Resolve star_refl: core.
+    intros; exploit step_complete; eauto.
+    destruct 1 as (idx' & s2' & X).
+    exists idx'. exists s2'. intuition (subst; eauto). 
+  - intros id; destruct (senv_preserved p); simpl in * |-. intuition.
+Qed.
+
+End COMPLETENESS.
diff --git a/scheduling/RTLpathLivegen.v b/scheduling/RTLpathLivegen.v
new file mode 100644
index 00000000..1f0ebe3c
--- /dev/null
+++ b/scheduling/RTLpathLivegen.v
@@ -0,0 +1,290 @@
+(** Building a RTLpath program with liveness annotation.
+*)
+
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import Lattice.
+Require Import AST.
+Require Import Op.
+Require Import Registers.
+Require Import Globalenvs Smallstep RTL RTLpath.
+Require Import Bool Errors.
+Require Import Program.
+
+Local Open Scope lazy_bool_scope.
+
+Local Open Scope option_monad_scope.
+
+Axiom build_path_map: RTL.function -> path_map.
+
+Extract Constant build_path_map => "RTLpathLivegenaux.build_path_map".
+
+Fixpoint list_mem (rl: list reg) (alive: Regset.t) {struct rl}: bool :=
+  match rl with
+  | nil => true
+  | r1 :: rs => Regset.mem r1 alive &&& list_mem rs alive
+  end.
+
+Definition exit_checker {A} (pm: path_map) (alive: Regset.t) (pc: node) (v:A): option A :=
+   SOME path <- pm!pc IN
+   ASSERT Regset.subset path.(input_regs) alive IN
+   Some v.
+
+Lemma exit_checker_path_entry A (pm: path_map) (alive: Regset.t) (pc: node) (v:A) res:
+  exit_checker pm alive pc v = Some res -> path_entry pm pc.
+Proof.
+  unfold exit_checker, path_entry.
+  inversion_SOME path; simpl; congruence.
+Qed.
+
+Lemma exit_checker_res A (pm: path_map) (alive: Regset.t) (pc: node) (v:A) res:
+  exit_checker pm alive pc v = Some res -> v=res.
+Proof.
+  unfold exit_checker, path_entry.
+  inversion_SOME path; try_simplify_someHyps.
+  inversion_ASSERT; try_simplify_someHyps.
+Qed.
+
+(* FIXME - what about trap? *)
+Definition iinst_checker (pm: path_map) (alive: Regset.t) (i: instruction): option (Regset.t * node)  :=
+  match i with
+  | Inop pc' => Some (alive, pc')
+  | Iop op args dst pc' => 
+      ASSERT list_mem args alive IN
+      Some (Regset.add dst alive, pc')
+  | Iload _ chunk addr args dst pc' =>
+      ASSERT list_mem args alive IN
+      Some (Regset.add dst alive, pc')
+  | Istore chunk addr args src pc' =>
+      ASSERT Regset.mem src alive IN
+      ASSERT list_mem args alive IN
+      Some (alive, pc')
+  | Icond cond args ifso ifnot _ =>
+      ASSERT list_mem args alive IN
+      exit_checker pm alive ifso (alive, ifnot)
+  | _ => None (* TODO jumptable ? *)
+  end.
+
+
+Local Hint Resolve exit_checker_path_entry: core.
+
+Lemma iinst_checker_path_entry (pm: path_map) (alive: Regset.t) (i: instruction) res pc:
+  iinst_checker pm alive i = Some res -> 
+  early_exit i = Some pc -> path_entry pm pc.
+Proof.
+  destruct i; simpl; try_simplify_someHyps; subst.
+  inversion_ASSERT; try_simplify_someHyps.
+Qed.
+
+Lemma iinst_checker_default_succ (pm: path_map) (alive: Regset.t) (i: instruction) res pc:
+  iinst_checker pm alive i = Some res -> 
+  pc = snd res ->
+  default_succ i = Some pc.
+Proof.
+  destruct i; simpl; try_simplify_someHyps; subst;
+  repeat (inversion_ASSERT); try_simplify_someHyps.
+  intros; exploit exit_checker_res; eauto.
+  intros; subst. simpl; auto.
+Qed.
+
+Fixpoint ipath_checker (ps:nat) (f: RTL.function) (pm: path_map) (alive: Regset.t) (pc:node): option (Regset.t * node) :=
+  match ps with
+  | O => Some (alive, pc)
+  | S p =>
+    SOME i <- f.(fn_code)!pc IN
+    SOME res <- iinst_checker pm alive i IN
+    ipath_checker p f pm (fst res) (snd res)
+  end.
+
+Lemma ipath_checker_wellformed f pm ps: forall alive pc res,
+   ipath_checker ps f pm alive pc = Some res -> 
+   wellformed_path f.(fn_code) pm 0 (snd res) ->
+   wellformed_path f.(fn_code) pm ps pc.
+Proof.
+  induction ps; simpl; try_simplify_someHyps.
+  inversion_SOME i; inversion_SOME res'.
+  intros. eapply wf_internal_node; eauto.
+  * eapply iinst_checker_default_succ; eauto.
+  * intros; eapply iinst_checker_path_entry; eauto.
+Qed.
+
+Definition reg_option_mem (or: option reg) (alive: Regset.t) :=
+  match or with None => true | Some r => Regset.mem r alive end.
+
+Definition reg_sum_mem (ros: reg + ident) (alive: Regset.t) :=
+  match ros with inl r => Regset.mem r alive | inr s => true end.
+
+(* NB: definition following [regmap_setres] in [RTL.step] semantics *)
+Definition reg_builtin_res (res: builtin_res reg) (alive: Regset.t): Regset.t :=
+  match res with
+  | BR r => Regset.add r alive
+  | _ => alive
+  end.
+
+Fixpoint exit_list_checker (pm: path_map) (alive: Regset.t) (l: list node): bool :=
+   match l with
+   | nil => true
+   | pc::l' => exit_checker pm alive pc tt &&& exit_list_checker pm alive l'
+   end.
+
+Lemma lazy_and_Some_true A (o: option A) (b: bool): o &&& b = true <-> (exists v, o = Some v) /\ b = true.
+Proof.
+  destruct o; simpl; intuition. 
+  - eauto.
+  - firstorder. try_simplify_someHyps.
+Qed.
+
+Lemma lazy_and_Some_tt_true (o: option unit) (b: bool): o &&& b = true <-> o = Some tt /\ b = true.
+Proof.
+   intros; rewrite lazy_and_Some_true; firstorder.
+   destruct x; auto.
+Qed.
+
+
+Lemma exit_list_checker_correct pm alive l pc:
+  exit_list_checker pm alive l = true -> List.In pc l -> exit_checker pm alive pc tt = Some tt.
+Proof.
+  intros EXIT PC; induction l; intuition.
+  simpl in * |-. rewrite lazy_and_Some_tt_true in EXIT.
+  firstorder (subst; eauto).
+Qed.
+
+Local Hint Resolve exit_list_checker_correct: core.
+
+Definition inst_checker (pm: path_map) (alive: Regset.t) (i: instruction): option unit :=
+   match i with
+   | Icall sig ros args res pc' =>
+      ASSERT list_mem args alive IN
+      ASSERT reg_sum_mem ros alive IN
+      exit_checker pm (Regset.add res alive) pc' tt
+   | Itailcall sig ros args =>
+      ASSERT list_mem args alive IN
+      ASSERT reg_sum_mem ros alive IN
+      Some tt
+   | Ibuiltin ef args res pc' =>
+      ASSERT list_mem (params_of_builtin_args args) alive IN 
+      exit_checker pm (reg_builtin_res res alive) pc' tt
+   | Ijumptable arg tbl =>
+      ASSERT Regset.mem arg alive IN
+      ASSERT exit_list_checker pm alive tbl IN
+      Some tt
+   | Ireturn optarg =>
+      ASSERT (reg_option_mem optarg) alive IN 
+      Some tt
+   | _ => 
+      SOME res <- iinst_checker pm alive i IN
+      exit_checker pm (fst res) (snd res) tt
+   end.
+
+Lemma inst_checker_wellformed (c:code) pc (pm: path_map) (alive: Regset.t) (i: instruction):
+  inst_checker pm alive i = Some tt ->
+  c!pc = Some i -> wellformed_path c pm 0 pc.
+Proof.
+  intros CHECK PC. eapply wf_last_node; eauto.
+  clear c pc PC. intros pc PC.
+  destruct i; simpl in * |- *; intuition (subst; eauto);
+  try (generalize CHECK; clear CHECK; try (inversion_SOME path); repeat inversion_ASSERT; try_simplify_someHyps).
+  intros X; exploit exit_checker_res; eauto.
+  clear X. intros; subst; eauto.
+Qed.
+
+Definition path_checker (f: RTL.function) pm (pc: node) (path:path_info): option unit :=
+   SOME res <- ipath_checker (path.(psize)) f pm (path.(input_regs)) pc  IN
+   SOME i <- f.(fn_code)!(snd res) IN
+   inst_checker pm (fst res) i.
+
+Lemma path_checker_wellformed f pm pc path:
+   path_checker f pm pc path = Some tt -> wellformed_path (f.(fn_code)) pm (path.(psize)) pc.
+Proof.
+  unfold path_checker.
+  inversion_SOME res.
+  inversion_SOME i.
+  intros; eapply ipath_checker_wellformed; eauto.
+  eapply inst_checker_wellformed; eauto.
+Qed.
+
+Fixpoint list_path_checker f pm (l:list (node*path_info)): bool :=
+  match l with
+  | nil => true
+  | (pc, path)::l' =>
+      path_checker f pm pc path &&& list_path_checker f pm l'
+  end.
+
+Lemma list_path_checker_correct f pm l: 
+  list_path_checker f pm l = true -> forall e, List.In e l -> path_checker f pm (fst e) (snd e) = Some tt.
+Proof.
+  intros CHECKER e H; induction l as [|(pc & path) l]; intuition.
+  simpl in * |- *. rewrite lazy_and_Some_tt_true in CHECKER. intuition (subst; auto).
+Qed.
+
+Definition function_checker (f: RTL.function) pm: bool := 
+  pm!(f.(fn_entrypoint)) &&& list_path_checker f pm (PTree.elements pm).
+
+Lemma function_checker_correct f pm pc path: 
+  function_checker f pm = true -> 
+  pm!pc = Some path -> 
+  path_checker f pm pc path = Some tt.
+Proof.
+  unfold function_checker; rewrite lazy_and_Some_true.
+  intros (ENTRY & PATH) PC.
+  exploit list_path_checker_correct; eauto.
+  - eapply PTree.elements_correct; eauto.
+  - simpl; auto.
+Qed.
+
+Lemma function_checker_wellformed_path_map f pm:
+  function_checker f pm = true -> wellformed_path_map f.(fn_code) pm.
+Proof.
+  unfold wellformed_path_map.
+  intros; eapply path_checker_wellformed; eauto.
+  intros; eapply function_checker_correct; eauto.
+Qed.
+
+Lemma function_checker_path_entry f pm:
+  function_checker f pm = true -> path_entry pm (f.(fn_entrypoint)).
+Proof.
+  unfold function_checker; rewrite lazy_and_Some_true;
+  unfold path_entry. firstorder congruence.
+Qed.
+
+Definition liveness_ok_function (f: function): Prop :=
+  forall pc path, f.(fn_path)!pc = Some path -> path_checker f f.(fn_path) pc path = Some tt.
+
+Program Definition transf_function (f: RTL.function): { r: res function | forall f', r = OK f' -> liveness_ok_function f' /\ f'.(fn_RTL) = f } :=
+  let pm := build_path_map f in
+  match function_checker f pm with
+  | true => OK {| fn_RTL := f; fn_path := pm |}
+  | false => Error(msg "RTLpathGen: function_checker failed")
+  end.
+Obligation 1.
+  apply function_checker_path_entry; auto.
+Qed.
+Obligation 2.
+  apply function_checker_wellformed_path_map; auto.
+Qed.
+Obligation 3.
+  unfold liveness_ok_function; simpl; intros; intuition.
+  apply function_checker_correct; auto.
+Qed.
+
+Definition transf_fundef (f: RTL.fundef) : res fundef :=
+  transf_partial_fundef (fun f => ` (transf_function f)) f.
+
+Inductive liveness_ok_fundef: fundef -> Prop :=
+  | liveness_ok_Internal f: liveness_ok_function f -> liveness_ok_fundef (Internal f)
+  | liveness_ok_External ef: liveness_ok_fundef (External ef).
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> (liveness_ok_fundef f') /\ fundef_RTL f' = f.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  - destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+    destruct (H _ EQ).
+    intuition subst; auto. apply liveness_ok_Internal; auto.
+  - intuition. apply liveness_ok_External; auto.
+Qed.
+
+Definition transf_program (p: RTL.program) : res program :=
+  transform_partial_program transf_fundef p.
+
diff --git a/scheduling/RTLpathLivegenaux.ml b/scheduling/RTLpathLivegenaux.ml
new file mode 100644
index 00000000..dd971db8
--- /dev/null
+++ b/scheduling/RTLpathLivegenaux.ml
@@ -0,0 +1,309 @@
+open RTL
+open RTLpath
+open Registers
+open Maps
+open Camlcoq
+open Datatypes
+open Kildall
+open Lattice
+
+let debug_flag = ref false
+
+let dprintf fmt = let open Printf in
+  match !debug_flag with
+  | true -> printf fmt
+  | false -> ifprintf stdout fmt
+
+let get_some = function
+| None -> failwith "Got None instead of Some _"
+| Some thing -> thing
+
+let successors_inst = function
+| Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,_) -> [n1; n2]
+| Ijumptable (_,l) -> l
+| Itailcall _ | Ireturn _ -> []
+
+let predicted_successor = function
+| Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> Some n
+| Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> None
+| Icond (_,_,n1,n2,p) -> (
+    match p with
+    | Some true -> Some n1
+    | Some false -> Some n2
+    | None -> None )
+| Ijumptable _ | Itailcall _ | Ireturn _ -> None
+
+let non_predicted_successors i =
+  match predicted_successor i with
+  | None -> successors_inst i
+  | Some n -> List.filter (fun n' -> n != n') (successors_inst i)
+
+let rec list_to_regset = function
+  | [] -> Regset.empty
+  | r::l -> Regset.add r (list_to_regset l)
+
+let get_input_regs i = 
+  let empty = Regset.empty in
+  match i with
+  | Inop _ -> empty
+  | Iop (_,lr,_,_) | Iload (_,_,_,lr,_,_) | Icond (_,lr,_,_,_) -> list_to_regset lr
+  | Istore (_,_,lr,r,_) -> Regset.add r (list_to_regset lr)
+  | Icall (_, ri, lr, _, _) | Itailcall (_, ri, lr) -> begin
+      let rs = list_to_regset lr in
+      match ri with
+      | Coq_inr _ -> rs
+      | Coq_inl r -> Regset.add r rs
+    end
+  | Ibuiltin (_, lbr, _, _) -> list_to_regset @@ AST.params_of_builtin_args lbr
+  | Ijumptable (r, _) -> Regset.add r empty
+  | Ireturn opr -> (match opr with Some r -> Regset.add r empty | None -> empty)
+
+let get_output_reg i =
+  match i with
+  | Inop _ | Istore _ | Icond _ | Itailcall _ | Ijumptable _ | Ireturn _ -> None
+  | Iop (_, _, r, _) | Iload (_, _, _, _, r, _) | Icall (_, _, _, r, _) -> Some r
+  | Ibuiltin (_, _, brr, _) -> (match brr with AST.BR r -> Some r | _ -> None)
+
+(* adapted from Linearizeaux.get_join_points *)
+let get_join_points code entry =
+  let reached = ref (PTree.map (fun n i -> false) code) in
+  let reached_twice = ref (PTree.map (fun n i -> false) code) in
+  let rec traverse pc =
+    if get_some @@ PTree.get pc !reached then begin
+      if not (get_some @@ PTree.get pc !reached_twice) then
+        reached_twice := PTree.set pc true !reached_twice
+    end else begin
+      reached := PTree.set pc true !reached;
+      traverse_succs (successors_inst @@ get_some @@ PTree.get pc code)
+    end
+  and traverse_succs = function
+    | [] -> ()
+    | [pc] -> traverse pc
+    | pc :: l -> traverse pc; traverse_succs l
+  in traverse entry; !reached_twice
+
+(* Does not set the input_regs and liveouts field *)
+let get_path_map code entry join_points =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let path_map = ref PTree.empty in
+  let rec dig_path e =
+    let psize = ref (-1) in
+    let path_successors = ref [] in
+    let rec dig_path_rec n : (path_info * node list) option =
+      if not (get_some @@ PTree.get n !visited) then 
+        let inst = get_some @@ PTree.get n code in
+        begin
+          visited := PTree.set n true !visited;
+          psize := !psize + 1;
+          let successor = match predicted_successor inst with
+          | None -> None
+          | Some n' -> if get_some @@ PTree.get n' join_points then None else Some n'
+          in match successor with
+          | Some n' -> begin
+                path_successors := !path_successors @ non_predicted_successors inst;
+                dig_path_rec n'
+              end
+          | None -> Some ({ psize = (Camlcoq.Nat.of_int !psize);
+                            input_regs = Regset.empty; output_regs = Regset.empty },
+                        !path_successors @ successors_inst inst)
+        end
+      else None
+    in match dig_path_rec e with
+    | None -> ()
+    | Some ret ->
+        let (path_info, succs) = ret in
+        begin
+          path_map := PTree.set e path_info !path_map;
+          List.iter dig_path succs
+        end
+  in begin
+    dig_path entry;
+    !path_map
+  end
+
+let print_regset rs = begin
+  dprintf "[";
+  List.iter (fun n -> dprintf "%d " (P.to_int n)) (Regset.elements rs);
+  dprintf "]"
+end
+
+let print_ptree_regset pt = begin
+  dprintf "[";
+  List.iter (fun (n, rs) ->
+    dprintf "\n\t";
+    dprintf "%d: " (P.to_int n);
+    print_regset rs
+  ) (PTree.elements pt);
+  dprintf "]"
+end
+
+let transfer f pc after = let open Liveness in
+  match PTree.get pc f.fn_code with
+  | Some i ->
+    (match i with
+    | Inop _ -> after
+    | Iop (_, args, res, _) ->
+        reg_list_live args (Regset.remove res after)
+    | Iload (_, _, _, args, dst, _) ->
+        reg_list_live args (Regset.remove dst after)
+    | Istore (_, _, args, src, _) ->
+        reg_list_live args (Regset.add src after)
+    | Icall (_, ros, args, res, _) ->
+        reg_list_live args (reg_sum_live ros (Regset.remove res after))
+    | Itailcall (_, ros, args) ->
+        reg_list_live args (reg_sum_live ros Regset.empty)
+    | Ibuiltin (_, args, res, _) ->
+        reg_list_live (AST.params_of_builtin_args args)
+          (reg_list_dead (AST.params_of_builtin_res res) after)
+    | Icond (_, args, _, _, _) ->
+        reg_list_live args after
+    | Ijumptable (arg, _) ->
+        Regset.add arg after
+    | Ireturn optarg ->
+        reg_option_live optarg Regset.empty)
+  | None -> Regset.empty
+
+module RegsetLat = LFSet(Regset)
+
+module DS = Backward_Dataflow_Solver(RegsetLat)(NodeSetBackward)
+
+let analyze f =
+  let liveouts = get_some @@ DS.fixpoint f.fn_code successors_instr (transfer f) in
+  PTree.map (fun n _ -> let lo = PMap.get n liveouts in transfer f n lo) f.fn_code
+
+(** OLD CODE - If needed to have our own kildall
+
+let transfer after = let open Liveness in function
+  | Inop _ -> after
+  | Iop (_, args, res, _) ->
+      reg_list_live args (Regset.remove res after)
+  | Iload (_, _, _, args, dst, _) ->
+      reg_list_live args (Regset.remove dst after)
+  | Istore (_, _, args, src, _) ->
+      reg_list_live args (Regset.add src after)
+  | Icall (_, ros, args, res, _) ->
+      reg_list_live args (reg_sum_live ros (Regset.remove res after))
+  | Itailcall (_, ros, args) ->
+      reg_list_live args (reg_sum_live ros Regset.empty)
+  | Ibuiltin (_, args, res, _) ->
+      reg_list_live (AST.params_of_builtin_args args)
+        (reg_list_dead (AST.params_of_builtin_res res) after)
+  | Icond (_, args, _, _, _) ->
+      reg_list_live args after
+  | Ijumptable (arg, _) ->
+      Regset.add arg after
+  | Ireturn optarg ->
+      reg_option_live optarg Regset.empty
+
+let get_last_nodes f =
+  let visited = ref (PTree.map (fun n i -> false) f.fn_code) in
+  let rec step n =
+    let inst = get_some @@ PTree.get n f.fn_code in
+    let successors = successors_inst inst in
+    if get_some @@ PTree.get n !visited then []
+    else begin
+
+let analyze f =
+  let liveness = ref (PTree.map (fun n i -> None) f.fn_code) in
+  let predecessors = Duplicateaux.get_predecessors_rtl f.fn_code in
+  let last_nodes = get_last_nodes f in
+  let rec step liveout n = (* liveout is the input_regs from the successor *)
+    let inst = get_some @@ PTree.get n f.fn_code in
+    let continue = ref true in
+    let alive = match get_some @@ PTree.get n !liveness with
+    | None -> transfer liveout inst
+    | Some pre_alive -> begin
+        let union = Regset.union pre_alive liveout in
+        let new_alive = transfer union inst in
+        (if Regset.equal pre_alive new_alive then continue := false);
+        new_alive
+      end
+    in begin
+      liveness := PTree.set n (Some alive) !liveness;
+      if !continue then
+        let preds = get_some @@ PTree.get n predecessors in
+        List.iter (step alive) preds
+    end
+  in begin
+    List.iter (step Regset.empty) last_nodes;
+    let liveness_noopt = PTree.map (fun n i -> get_some i) !liveness in
+    begin
+      debug_flag := true;
+      dprintf "Liveness: "; print_ptree_regset liveness_noopt; dprintf "\n";
+      debug_flag := false;
+      liveness_noopt
+    end
+  end
+*)
+
+let rec traverse code n size =
+  let inst = get_some @@ PTree.get n code in
+  if (size == 0) then inst
+  else 
+    let n' = get_some @@ predicted_successor inst in
+    traverse code n' (size-1)
+
+let get_outputs liveness code n pi =
+  let last_instruction = traverse code n (Camlcoq.Nat.to_int pi.psize) in
+  let path_last_successors = successors_inst last_instruction in
+  let list_input_regs = List.map (
+      fun n -> get_some @@ PTree.get n liveness
+    ) path_last_successors in
+  List.fold_left Regset.union Regset.empty list_input_regs
+
+let set_pathmap_liveness f pm =
+  let liveness = analyze f in
+  let new_pm = ref PTree.empty in
+  let code = f.fn_code in
+  begin
+    dprintf "Liveness: "; print_ptree_regset liveness; dprintf "\n";
+    List.iter (fun (n, pi) ->
+      let inputs = get_some @@ PTree.get n liveness in
+      let outputs = get_outputs liveness code n pi in
+      new_pm := PTree.set n
+        {psize=pi.psize; input_regs=inputs; output_regs=outputs} !new_pm
+    ) (PTree.elements pm);
+    !new_pm
+  end
+
+let print_true_nodes booltree = begin
+  dprintf "[";
+  List.iter (fun (n,b) ->
+    if b then dprintf "%d " (P.to_int n)
+  ) (PTree.elements booltree);
+  dprintf "]";
+end
+
+let print_path_info pi = begin
+  dprintf "(psize=%d; " (Camlcoq.Nat.to_int pi.psize);
+  dprintf "input_regs=";
+  print_regset pi.input_regs;
+  dprintf "; output_regs=";
+  print_regset pi.output_regs;
+  dprintf ")"
+end
+
+let print_path_map path_map = begin
+  dprintf "[";
+  List.iter (fun (n,pi) ->
+    dprintf "\n\t";
+    dprintf "%d: " (P.to_int n);
+    print_path_info pi
+  ) (PTree.elements path_map);
+  dprintf "]"
+end
+
+let build_path_map f = 
+  let code = f.fn_code in
+  let entry = f.fn_entrypoint in
+  let join_points = get_join_points code entry in
+  let path_map = set_pathmap_liveness f @@ get_path_map code entry join_points in
+  begin
+    dprintf "Join points: ";
+    print_true_nodes join_points;
+    dprintf "\nPath map: ";
+    print_path_map path_map;
+    dprintf "\n";
+    path_map
+  end
diff --git a/scheduling/RTLpathLivegenproof.v b/scheduling/RTLpathLivegenproof.v
new file mode 100644
index 00000000..c6125985
--- /dev/null
+++ b/scheduling/RTLpathLivegenproof.v
@@ -0,0 +1,736 @@
+(** Proofs of the liveness properties from the liveness checker of RTLpathLivengen.
+*)
+
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import Lattice.
+Require Import AST.
+Require Import Op.
+Require Import Registers.
+Require Import Globalenvs Smallstep RTL RTLpath RTLpathLivegen.
+Require Import Bool Errors Linking Values Events.
+Require Import Program.
+
+Definition match_prog (p: RTL.program) (tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog: RTL.program.
+Variables tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tpge := Genv.globalenv tprog.
+Let tge := Genv.globalenv (RTLpath.transf_program tprog).
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL).
+  apply (Genv.find_symbol_match (match_prog_RTL tprog)).
+Qed.
+
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_preserved: Senv.equiv ge tge.
+Proof.
+  eapply senv_transitivity. { eapply (Genv.senv_match TRANSL). }
+  eapply RTLpath.senv_preserved.
+Qed.
+
+Lemma function_ptr_preserved v f: Genv.find_funct_ptr ge v = Some f -> 
+  exists tf, Genv.find_funct_ptr tpge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros; apply (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+
+Lemma function_ptr_RTL_preserved v f: Genv.find_funct_ptr ge v = Some f -> Genv.find_funct_ptr tge v = Some f.
+Proof.
+  intros; exploit function_ptr_preserved; eauto.
+  intros (tf & Htf & TRANS).
+  exploit (Genv.find_funct_ptr_match (match_prog_RTL tprog)); eauto.
+  intros (cunit & tf0 & X & Y & DUM); subst.
+  unfold tge. rewrite X. 
+  exploit transf_fundef_correct; eauto. 
+  intuition subst; auto.
+Qed.
+
+Lemma find_function_preserved ros rs fd:
+  RTL.find_function ge ros rs = Some fd -> RTL.find_function tge ros rs = Some fd.
+Proof.
+  intros H; assert (X: exists tfd, find_function tpge ros rs = Some tfd /\ fd = fundef_RTL tfd). 
+  * destruct ros; simpl in * |- *.
+    + intros; exploit (Genv.find_funct_match TRANSL); eauto.
+      intros (cuint & tf & H1 & H2 & H3); subst; repeat econstructor; eauto.
+      exploit transf_fundef_correct; eauto. 
+      intuition auto.
+    + rewrite <- (Genv.find_symbol_match TRANSL) in H.
+    unfold tpge. destruct (Genv.find_symbol _ i); simpl; try congruence.
+    exploit function_ptr_preserved; eauto.
+    intros (tf & H1 & H2); subst; repeat econstructor; eauto.
+    exploit transf_fundef_correct; eauto. 
+    intuition auto.
+ * destruct X as (tf & X1 & X2); subst.
+   eapply find_function_RTL_match; eauto.
+Qed.
+
+
+Local Hint Resolve symbols_preserved senv_preserved: core.
+
+Lemma transf_program_RTL_correct: 
+  forward_simulation (RTL.semantics prog) (RTL.semantics (RTLpath.transf_program tprog)).
+Proof.
+  eapply forward_simulation_step with (match_states:=fun (s1 s2:RTL.state) => s1=s2); simpl; eauto.
+  - eapply senv_preserved.
+  - (* initial states *)
+    intros s1 INIT. destruct INIT as [b f m0 ge0 INIT SYMB PTR SIG]. eexists; intuition eauto.
+    econstructor; eauto.
+    + intros; eapply (Genv.init_mem_match (match_prog_RTL tprog)). apply (Genv.init_mem_match TRANSL); auto.
+    + rewrite symbols_preserved. 
+      replace (prog_main (RTLpath.transf_program tprog)) with (prog_main prog).
+      * eapply SYMB.
+      * erewrite (match_program_main (match_prog_RTL tprog)). erewrite (match_program_main TRANSL); auto.
+    + exploit function_ptr_RTL_preserved; eauto.
+  - intros; subst; auto.
+  - intros s t s2 STEP s1 H; subst.
+    eexists; intuition.
+    destruct STEP. 
+    + (* Inop *) eapply exec_Inop; eauto.
+    + (* Iop *) eapply exec_Iop; eauto.
+      erewrite eval_operation_preserved; eauto.
+    + (* Iload *) eapply exec_Iload; eauto. 
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Iload notrap1 *) eapply exec_Iload_notrap1; eauto.
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Iload notrap2 *) eapply exec_Iload_notrap2; eauto.
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Istore *) eapply exec_Istore; eauto.
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Icall *)
+        eapply RTL.exec_Icall; eauto.
+        eapply find_function_preserved; eauto.
+    + (* Itailcall *)
+        eapply RTL.exec_Itailcall; eauto.
+        eapply find_function_preserved; eauto.
+    + (* Ibuiltin *)
+      eapply RTL.exec_Ibuiltin; eauto.
+      * eapply eval_builtin_args_preserved; eauto.
+      * eapply external_call_symbols_preserved; eauto.
+    + (* Icond *)
+      eapply exec_Icond; eauto.
+    + (* Ijumptable *)
+      eapply RTL.exec_Ijumptable; eauto.
+    + (* Ireturn *)
+      eapply RTL.exec_Ireturn; eauto.
+    + (* exec_function_internal *)
+      eapply RTL.exec_function_internal; eauto.
+    + (* exec_function_external *)
+      eapply RTL.exec_function_external; eauto.
+      eapply external_call_symbols_preserved; eauto.
+    + (* exec_return *)
+      eapply RTL.exec_return; eauto.
+Qed.
+
+Theorem transf_program_correct: 
+  forward_simulation (RTL.semantics prog) (RTLpath.semantics tprog).
+Proof.
+  eapply compose_forward_simulations.
+  + eapply transf_program_RTL_correct.
+  + eapply RTLpath_complete.
+Qed.
+
+
+(* Properties used in hypothesis of [RTLpathLiveproofs.step_eqlive] theorem *)
+Theorem all_fundef_liveness_ok b f:
+  Genv.find_funct_ptr tpge b = Some f -> liveness_ok_fundef f.
+Proof.
+  unfold match_prog, match_program in TRANSL.
+  unfold Genv.find_funct_ptr, tpge; simpl; intro X.
+  destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+  destruct y as [tf0|]; try congruence.
+  inversion X as [H1]. subst. clear X.
+  remember (@Gfun fundef unit f) as f2.
+  destruct H as [ctx' f1 f2 H0|]; try congruence.
+  inversion Heqf2 as [H2]. subst; clear Heqf2.
+  exploit transf_fundef_correct; eauto.
+  intuition.
+Qed.
+
+End PRESERVATION.
+
+Local Open Scope lazy_bool_scope.
+Local Open Scope option_monad_scope.
+
+Local Notation ext alive := (fun r => Regset.In r alive).
+
+Lemma regset_add_spec live r1 r2: Regset.In r1 (Regset.add r2 live) <-> (r1 = r2 \/ Regset.In r1 live).
+Proof.
+  destruct (Pos.eq_dec r1 r2).
+  - subst. intuition; eapply Regset.add_1; auto.
+  - intuition. 
+    * right. eapply Regset.add_3; eauto.
+    * eapply Regset.add_2; auto.
+Qed.
+
+Definition eqlive_reg (alive: Regset.elt -> Prop) (rs1 rs2: regset): Prop :=
+ forall r, (alive r) -> rs1#r = rs2#r. 
+
+Lemma eqlive_reg_refl alive rs: eqlive_reg alive rs rs.
+Proof.
+  unfold eqlive_reg; auto.
+Qed.
+
+Lemma eqlive_reg_symmetry alive rs1 rs2: eqlive_reg alive rs1 rs2 -> eqlive_reg alive rs2 rs1.
+Proof.
+  unfold eqlive_reg; intros; symmetry; auto.
+Qed.
+
+Lemma eqlive_reg_trans alive rs1 rs2 rs3: eqlive_reg alive rs1 rs2 -> eqlive_reg alive rs2 rs3 -> eqlive_reg alive rs1 rs3.
+Proof.
+  unfold eqlive_reg; intros H0 H1 r H. rewrite H0; eauto.
+Qed.
+
+Lemma eqlive_reg_update (alive: Regset.elt -> Prop) rs1 rs2 r v: eqlive_reg (fun r1 => r1 <> r /\ alive r1) rs1 rs2 -> eqlive_reg alive (rs1 # r <- v) (rs2 # r <- v).
+Proof.
+  unfold eqlive_reg; intros EQLIVE r0 ALIVE.
+  destruct (Pos.eq_dec r r0) as [H|H].
+  - subst. rewrite! Regmap.gss. auto.
+  - rewrite! Regmap.gso; auto.
+Qed.
+
+Lemma eqlive_reg_monotonic (alive1 alive2: Regset.elt -> Prop) rs1 rs2: eqlive_reg alive2 rs1 rs2 -> (forall r, alive1 r -> alive2 r) ->  eqlive_reg alive1 rs1 rs2.
+Proof.
+  unfold eqlive_reg; intuition.
+Qed.
+
+Lemma eqlive_reg_triv rs1 rs2: (forall r, rs1#r = rs2#r) <-> eqlive_reg (fun _ => True) rs1 rs2.
+Proof.
+  unfold eqlive_reg; intuition.
+Qed.
+
+Lemma eqlive_reg_triv_trans alive rs1 rs2 rs3: eqlive_reg alive rs1 rs2 -> (forall r, rs2#r = rs3#r) -> eqlive_reg alive rs1 rs3.
+Proof.
+  rewrite eqlive_reg_triv; intros; eapply eqlive_reg_trans; eauto.
+  eapply eqlive_reg_monotonic; eauto.
+  simpl; eauto.
+Qed.
+
+Local Hint Resolve Regset.mem_2 Regset.subset_2: core.
+
+Lemma lazy_and_true (b1 b2: bool): b1 &&& b2 = true <-> b1 = true /\ b2 = true.
+Proof.
+  destruct b1; simpl; intuition.
+Qed.
+
+Lemma list_mem_correct (rl: list reg) (alive: Regset.t):
+  list_mem rl alive = true -> forall r, List.In r rl -> ext alive r.
+Proof.
+  induction rl; simpl; try rewrite lazy_and_true; intuition subst; auto.
+Qed.
+
+Lemma eqlive_reg_list (alive: Regset.elt -> Prop) args rs1 rs2: eqlive_reg alive rs1 rs2 -> (forall r, List.In r args -> (alive r)) -> rs1##args = rs2##args.
+Proof.
+  induction args; simpl; auto.
+  intros EQLIVE ALIVE; rewrite IHargs; auto.
+  unfold eqlive_reg in EQLIVE.
+  rewrite EQLIVE; auto.
+Qed.
+
+Lemma eqlive_reg_listmem (alive: Regset.t) args rs1 rs2: eqlive_reg (ext alive) rs1 rs2 -> list_mem args alive = true -> rs1##args = rs2##args.
+Proof.
+  intros; eapply eqlive_reg_list; eauto.
+  intros; eapply list_mem_correct; eauto.
+Qed.
+
+Record eqlive_istate alive (st1 st2: istate): Prop :=
+   { eqlive_continue: icontinue st1 = icontinue st2;
+     eqlive_ipc: ipc st1 = ipc st2;
+     eqlive_irs: eqlive_reg alive (irs st1) (irs st2);
+     eqlive_imem: (imem st1) = (imem st2) }.
+
+Lemma iinst_checker_eqlive ge sp pm alive i res rs1 rs2 m st1: 
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  iinst_checker pm alive i = Some res -> 
+  istep ge i sp rs1 m = Some st1 -> 
+  exists st2, istep ge i sp rs2 m = Some st2 /\ eqlive_istate (ext (fst res)) st1 st2.
+Proof.
+  intros EQLIVE.
+  destruct i; simpl; try_simplify_someHyps.
+  - (* Inop *)
+    repeat (econstructor; eauto).
+  - (* Iop *)
+    inversion_ASSERT; try_simplify_someHyps.
+    inversion_SOME v. intros EVAL.
+    erewrite <- eqlive_reg_listmem; eauto.
+    try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+    eapply eqlive_reg_update.
+    eapply eqlive_reg_monotonic; eauto.
+    intros r0; rewrite regset_add_spec. 
+    intuition.
+  - (* Iload *)
+    inversion_ASSERT; try_simplify_someHyps.
+    destruct t. (* TODO - simplify that proof ? *)
+    + inversion_SOME a0. intros EVAL.
+      erewrite <- eqlive_reg_listmem; eauto.
+      try_simplify_someHyps.
+      inversion_SOME v; try_simplify_someHyps.
+      repeat (econstructor; simpl; eauto).
+      eapply eqlive_reg_update.
+      eapply eqlive_reg_monotonic; eauto.
+      intros r0; rewrite regset_add_spec.
+      intuition.
+    + erewrite <- (eqlive_reg_listmem _ _ rs1 rs2); eauto.
+      destruct (eval_addressing _ _ _ _).
+      * destruct (Memory.Mem.loadv _ _ _).
+        ** intros. inv H1. repeat (econstructor; simpl; eauto).
+           eapply eqlive_reg_update.
+           eapply eqlive_reg_monotonic; eauto.
+           intros r0; rewrite regset_add_spec.
+           intuition.
+        ** intros. inv H1. repeat (econstructor; simpl; eauto).
+           eapply eqlive_reg_update.
+           eapply eqlive_reg_monotonic; eauto.
+           intros r0; rewrite regset_add_spec.
+           intuition.
+      * intros. inv H1. repeat (econstructor; simpl; eauto).
+        eapply eqlive_reg_update.
+        eapply eqlive_reg_monotonic; eauto.
+        intros r0; rewrite regset_add_spec.
+        intuition.
+  - (* Istore *)
+    (repeat inversion_ASSERT); try_simplify_someHyps.
+    inversion_SOME a0. intros EVAL.
+    erewrite <- eqlive_reg_listmem; eauto.
+    rewrite <- (EQLIVE r); auto.
+    inversion_SOME v; try_simplify_someHyps.
+    try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+  - (* Icond *)
+    inversion_ASSERT.
+    inversion_SOME b. intros EVAL.
+    intros ARGS; erewrite <- eqlive_reg_listmem; eauto.
+    try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+    exploit exit_checker_res; eauto.
+    intro; subst; simpl. auto.
+Qed.
+
+Lemma iinst_checker_istep_continue ge sp pm alive i res rs m st: 
+  iinst_checker pm alive i = Some res ->
+  istep ge i sp rs m = Some st ->
+  icontinue st = true ->
+  (snd res)=(ipc st).
+Proof.
+  intros; exploit iinst_checker_default_succ; eauto.
+  erewrite istep_normal_exit; eauto.
+  congruence.
+Qed.
+
+Lemma exit_checker_eqlive A (pm: path_map) (alive: Regset.t) (pc: node) (v:A) res rs1 rs2:
+  exit_checker pm alive pc v = Some res ->  
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  exists path, pm!pc = Some path /\ eqlive_reg (ext path.(input_regs)) rs1 rs2.
+Proof.
+  unfold exit_checker.
+  inversion_SOME path.
+  inversion_ASSERT. try_simplify_someHyps.
+  repeat (econstructor; eauto).
+  intros; eapply eqlive_reg_monotonic; eauto.
+  intros; exploit Regset.subset_2; eauto.
+Qed.
+
+Lemma iinst_checker_eqlive_stopped ge sp pm alive i res rs1 rs2 m st1: 
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  istep ge i sp rs1 m = Some st1 ->
+  iinst_checker pm alive i = Some res -> 
+  icontinue st1 = false ->
+  exists path st2, pm!(ipc st1) = Some path /\ istep ge i sp rs2 m = Some st2 /\ eqlive_istate (ext path.(input_regs)) st1 st2.
+Proof.
+  intros EQLIVE.
+  set (tmp := istep ge i sp rs2).
+  destruct i; simpl; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b);  try_simplify_someHyps; try congruence.
+  1-3: explore_destruct; simpl; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b);  try_simplify_someHyps; try congruence.
+  (* Icond *)
+  unfold tmp; clear tmp; simpl.
+  intros EVAL; erewrite <- eqlive_reg_listmem; eauto.
+  try_simplify_someHyps.
+  destruct b eqn:EQb; simpl in * |-; try congruence.
+  intros; exploit exit_checker_eqlive; eauto.
+  intros (path & PATH & EQLIVE2).
+  repeat (econstructor; simpl; eauto).
+Qed.
+
+Lemma ipath_checker_eqlive_normal ge ps (f:function) sp pm: forall alive pc res rs1 rs2 m st1, 
+  eqlive_reg (ext alive) rs1 rs2 ->
+  ipath_checker ps f pm alive pc = Some res ->
+  isteps ge ps f sp rs1 m pc = Some st1 ->
+  icontinue st1 = true ->
+  exists st2, isteps ge ps f sp rs2 m pc = Some st2 /\ eqlive_istate (ext (fst res)) st1 st2.
+Proof.
+  induction ps as [|ps]; simpl; try_simplify_someHyps.
+  - repeat (econstructor; simpl; eauto).
+  - inversion_SOME i; try_simplify_someHyps.
+    inversion_SOME res0.
+    inversion_SOME st0.
+    intros.
+    exploit iinst_checker_eqlive; eauto.
+    destruct 1 as (st2 & ISTEP & [CONT PC RS MEM]).
+    try_simplify_someHyps.
+    rewrite <- CONT, <- MEM, <- PC.
+    destruct (icontinue st0) eqn:CONT'.
+    * intros; exploit iinst_checker_istep_continue; eauto.
+      rewrite <- PC; intros X; rewrite X in * |-. eauto.
+    * try_simplify_someHyps.
+      congruence.
+Qed.
+
+Lemma ipath_checker_isteps_continue ge ps (f:function) sp pm: forall alive pc res rs m st, 
+  ipath_checker ps f pm alive pc = Some res ->
+  isteps ge ps f sp rs m pc = Some st ->
+  icontinue st = true ->
+  (snd res)=(ipc st).
+Proof.
+  induction ps as [|ps]; simpl; try_simplify_someHyps.
+  inversion_SOME i; try_simplify_someHyps.
+  inversion_SOME res0.
+  inversion_SOME st0.
+  destruct (icontinue st0) eqn:CONT'.
+  - intros; exploit iinst_checker_istep_continue; eauto.
+    intros EQ; rewrite EQ in * |-; clear EQ; eauto.
+  - try_simplify_someHyps; congruence.
+Qed.
+
+Lemma ipath_checker_eqlive_stopped ge ps (f:function) sp pm: forall alive pc res rs1 rs2 m st1, 
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  ipath_checker ps f pm alive pc = Some res -> 
+  isteps ge ps f sp rs1 m pc = Some st1 -> 
+  icontinue st1 = false ->
+  exists path st2, pm!(ipc st1) = Some path /\ isteps ge ps f sp rs2 m pc = Some st2 /\ eqlive_istate (ext path.(input_regs)) st1 st2.
+Proof.
+  induction ps as [|ps]; simpl; try_simplify_someHyps; try congruence.
+  inversion_SOME i; try_simplify_someHyps.
+  inversion_SOME res0.
+  inversion_SOME st0.
+  intros.
+  destruct (icontinue st0) eqn:CONT'; try_simplify_someHyps; intros.
+  * intros; exploit iinst_checker_eqlive; eauto.
+    destruct 1 as (st2 & ISTEP & [CONT PC RS MEM]).
+    exploit iinst_checker_istep_continue; eauto.
+    intros PC'.
+    try_simplify_someHyps.
+    rewrite PC', <- CONT, <- MEM, <- PC, CONT'.
+    eauto.
+  * intros; exploit iinst_checker_eqlive_stopped; eauto.
+    intros EQLIVE; generalize EQLIVE; destruct 1 as (path & st2 & PATH & ISTEP & [CONT PC RS MEM]).
+    try_simplify_someHyps.
+    rewrite <- CONT, <- MEM, <- PC, CONT'.
+    try_simplify_someHyps.
+Qed.
+
+Inductive eqlive_stackframes: stackframe -> stackframe -> Prop :=
+  | eqlive_stackframes_intro path res f sp pc rs1 rs2
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: forall v, eqlive_reg (ext path.(input_regs)) (rs1 # res <- v) (rs2 # res <- v)):
+       eqlive_stackframes (Stackframe res f sp pc rs1) (Stackframe res f sp pc rs2). 
+
+Inductive eqlive_states: state -> state -> Prop :=
+  | eqlive_states_intro 
+      path st1 st2 f sp pc rs1 rs2 m
+      (STACKS: list_forall2 eqlive_stackframes st1 st2)
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: eqlive_reg (ext path.(input_regs)) rs1 rs2):
+      eqlive_states (State st1 f sp pc rs1 m) (State st2 f sp pc rs2 m)
+  | eqlive_states_call st1 st2 f args m
+      (LIVE: liveness_ok_fundef f)
+      (STACKS: list_forall2 eqlive_stackframes st1 st2):
+      eqlive_states (Callstate st1 f args m) (Callstate st2 f args m)
+  | eqlive_states_return st1 st2 v m
+      (STACKS: list_forall2 eqlive_stackframes st1 st2):
+      eqlive_states (Returnstate st1 v m) (Returnstate st2 v m).
+
+
+Section LivenessProperties.
+
+Variable prog: program.
+
+Let pge := Genv.globalenv prog.
+Let ge := Genv.globalenv (RTLpath.transf_program prog).
+
+Hypothesis all_fundef_liveness_ok: forall b f,
+  Genv.find_funct_ptr pge b = Some f -> 
+  liveness_ok_fundef f.
+
+Lemma find_funct_liveness_ok v fd:
+  Genv.find_funct pge v = Some fd -> liveness_ok_fundef fd.
+Proof.
+  unfold Genv.find_funct.
+  destruct v; try congruence.
+  destruct (Integers.Ptrofs.eq_dec _ _); try congruence.
+  eapply all_fundef_liveness_ok; eauto.
+Qed.
+
+Lemma find_function_liveness_ok ros rs f:
+  find_function pge ros rs = Some f -> liveness_ok_fundef f.
+Proof.
+  destruct ros as [r|i]; simpl.
+  - intros; eapply find_funct_liveness_ok; eauto.
+  - destruct (Genv.find_symbol pge i); try congruence.
+    eapply all_fundef_liveness_ok; eauto.
+Qed.
+
+Lemma find_function_eqlive alive ros rs1 rs2:
+  eqlive_reg (ext alive) rs1 rs2 ->
+  reg_sum_mem ros alive = true ->
+  find_function pge ros rs1 = find_function pge ros rs2.
+Proof.
+  intros EQLIVE.
+  destruct ros; simpl; auto.
+  intros H; erewrite (EQLIVE r); eauto.
+Qed.
+
+Lemma inst_checker_from_iinst_checker i sp rs m st pm alive:
+  istep ge i sp rs m = Some st -> 
+  inst_checker pm alive i = (SOME res <- iinst_checker pm alive i IN exit_checker pm (fst res) (snd res) tt).
+Proof.
+  destruct i; simpl; try congruence. 
+Qed.
+
+Lemma exit_checker_eqlive_ext1 (pm: path_map) (alive: Regset.t) (pc: node) r rs1 rs2:
+  exit_checker pm (Regset.add r alive) pc tt = Some tt ->  
+  eqlive_reg (ext alive) rs1 rs2 ->
+  exists path, pm!pc = Some path /\ (forall v, eqlive_reg (ext path.(input_regs)) (rs1 # r <- v) (rs2 # r <- v)).
+Proof.
+  unfold exit_checker.
+  inversion_SOME path.
+  inversion_ASSERT. try_simplify_someHyps.
+  repeat (econstructor; eauto).
+  intros; eapply eqlive_reg_update; eauto.
+  eapply eqlive_reg_monotonic; eauto.
+  intros r0 [X1 X2]; exploit Regset.subset_2; eauto.
+  rewrite regset_add_spec. intuition subst.
+Qed.
+
+Local Hint Resolve in_or_app: local.
+Lemma eqlive_eval_builtin_args alive rs1 rs2 sp m args vargs:
+  eqlive_reg alive rs1 rs2 ->
+  Events.eval_builtin_args ge (fun r => rs1 # r) sp m args vargs ->
+  (forall r, List.In r (params_of_builtin_args args) -> alive r) ->
+  Events.eval_builtin_args ge (fun r => rs2 # r) sp m args vargs.
+Proof.
+  unfold Events.eval_builtin_args.
+  intros EQLIVE; induction 1 as [|a1 al b1 bl EVAL1 EVALL]; simpl.
+  { econstructor; eauto. }
+  intro X. 
+  assert (X1: eqlive_reg (fun r => In r (params_of_builtin_arg a1)) rs1 rs2).
+  { eapply eqlive_reg_monotonic; eauto with local. }
+  lapply IHEVALL; eauto with local.
+  clear X IHEVALL; intro X. econstructor; eauto.
+  generalize X1; clear EVALL X1 X.
+  induction EVAL1; simpl; try (econstructor; eauto; fail).
+  - intros X1; erewrite X1; [ econstructor; eauto | eauto ].
+  - intros; econstructor.
+    + eapply IHEVAL1_1; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+    + eapply IHEVAL1_2; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+  - intros; econstructor.
+    + eapply IHEVAL1_1; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+    + eapply IHEVAL1_2; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+Qed.
+
+Lemma exit_checker_eqlive_builtin_res (pm: path_map) (alive: Regset.t) (pc: node) rs1 rs2 (res:builtin_res reg):
+  exit_checker pm (reg_builtin_res res alive) pc tt = Some tt ->
+  eqlive_reg (ext alive) rs1 rs2 ->
+  exists path, pm!pc = Some path /\ (forall vres, eqlive_reg (ext path.(input_regs)) (regmap_setres res vres rs1) (regmap_setres res vres rs2)).
+Proof.
+  destruct res; simpl.
+  - intros; exploit exit_checker_eqlive_ext1; eauto.
+  - intros; exploit exit_checker_eqlive; eauto.
+    intros (path & PATH & EQLIVE).
+    eexists; intuition eauto.
+  - intros; exploit exit_checker_eqlive; eauto.
+    intros (path & PATH & EQLIVE).
+    eexists; intuition eauto.
+Qed.
+
+Lemma exit_list_checker_eqlive (pm: path_map) (alive: Regset.t) (tbl: list node) rs1 rs2 pc: forall n,
+  exit_list_checker pm alive tbl = true ->  
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  list_nth_z tbl n = Some pc ->
+  exists path, pm!pc = Some path /\ eqlive_reg (ext path.(input_regs)) rs1 rs2.
+Proof.
+  induction tbl; simpl.
+  - intros; try congruence.
+  - intros n; rewrite lazy_and_Some_tt_true; destruct (zeq n 0) eqn: Hn.
+    * try_simplify_someHyps; intuition.
+      exploit exit_checker_eqlive; eauto.
+    * intuition. eapply IHtbl; eauto.
+Qed.
+
+Lemma inst_checker_eqlive (f: function) sp alive pc i rs1 rs2 m stk1 stk2 t s1:
+  list_forall2 eqlive_stackframes stk1 stk2 ->
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  liveness_ok_function f ->
+  (fn_code f) ! pc = Some i ->
+  path_last_step ge pge stk1 f sp pc rs1 m t s1 ->
+  inst_checker (fn_path f) alive i = Some tt -> 
+  exists s2, path_last_step ge pge stk2 f sp pc rs2 m t s2 /\ eqlive_states s1 s2.
+Proof.
+  intros STACKS EQLIVE LIVENESS PC; 
+  destruct 1 as [i' sp pc rs1 m st1|
+                 sp pc rs1 m sig ros args res pc' fd|
+                 st1 pc rs1 m sig ros args fd m'|
+                 sp pc rs1 m ef args res pc' vargs t vres m'|
+                 sp pc rs1 m arg tbl n pc' |
+                 st1 pc rs1 m optr m']; 
+  try_simplify_someHyps.
+  + (* istate *)
+    intros PC ISTEP. erewrite inst_checker_from_iinst_checker; eauto.
+    inversion_SOME res.
+    intros.
+    destruct (icontinue st1) eqn: CONT.
+    - (* CONT => true *)
+      exploit iinst_checker_eqlive; eauto.
+      destruct 1 as (st2 & ISTEP2 & [CONT' PC2 RS MEM]).
+      repeat (econstructor; simpl; eauto).
+      rewrite <- MEM, <- PC2.
+      exploit exit_checker_eqlive; eauto.
+      intros (path & PATH & EQLIVE2).
+      eapply eqlive_states_intro; eauto.
+      erewrite <- iinst_checker_istep_continue; eauto.
+    - (* CONT => false *)
+      intros; exploit iinst_checker_eqlive_stopped; eauto.
+      destruct 1 as (path & st2 & PATH & ISTEP2 & [CONT2 PC2 RS MEM]).
+      repeat (econstructor; simpl; eauto).
+      rewrite <- MEM, <- PC2.
+      eapply eqlive_states_intro; eauto.
+  + (* Icall *)
+    repeat inversion_ASSERT. intros.
+    exploit exit_checker_eqlive_ext1; eauto.
+    intros (path & PATH & EQLIVE2).
+    eexists; split.
+    - eapply exec_Icall; eauto.
+      erewrite <- find_function_eqlive; eauto.
+    - erewrite eqlive_reg_listmem; eauto.
+      eapply eqlive_states_call; eauto.
+      eapply find_function_liveness_ok; eauto.
+      repeat (econstructor; eauto).
+  + (* Itailcall *)
+    repeat inversion_ASSERT. intros.
+    eexists; split.
+    - eapply exec_Itailcall; eauto.
+      erewrite <- find_function_eqlive; eauto.
+    - erewrite eqlive_reg_listmem; eauto.
+      eapply eqlive_states_call; eauto.
+      eapply find_function_liveness_ok; eauto.
+  + (* Ibuiltin *)
+    repeat inversion_ASSERT. intros.
+    exploit exit_checker_eqlive_builtin_res; eauto.
+    intros (path & PATH & EQLIVE2).
+    eexists; split.
+    - eapply exec_Ibuiltin; eauto.
+      eapply eqlive_eval_builtin_args; eauto.
+      intros; eapply list_mem_correct; eauto.
+    - repeat (econstructor; simpl; eauto).
+  + (* Ijumptable *)
+    repeat inversion_ASSERT. intros.
+    exploit exit_list_checker_eqlive; eauto.
+    intros (path & PATH & EQLIVE2).
+    eexists; split.
+    - eapply exec_Ijumptable; eauto.
+      erewrite <- EQLIVE; eauto.
+    - repeat (econstructor; simpl; eauto).
+  + (* Ireturn *)
+    repeat inversion_ASSERT. intros.
+    eexists; split.
+    - eapply exec_Ireturn; eauto.
+    - destruct optr; simpl in * |- *.
+      * erewrite (EQLIVE r); eauto.
+        eapply eqlive_states_return; eauto.
+      * eapply eqlive_states_return; eauto.
+Qed.
+
+Lemma path_step_eqlive path stk1 f sp rs1 m pc t s1 stk2 rs2:
+  path_step ge pge (psize path) stk1 f sp rs1 m pc t s1 ->
+  list_forall2 eqlive_stackframes stk1 stk2 ->
+  eqlive_reg (ext (input_regs path)) rs1 rs2 ->
+  liveness_ok_function f ->
+  (fn_path f) ! pc = Some path ->
+   exists s2, path_step ge pge (psize path) stk2 f sp rs2 m pc t s2 /\ eqlive_states s1 s2.
+Proof.
+  intros STEP STACKS EQLIVE LIVE PC.
+  unfold liveness_ok_function in LIVE.
+  exploit LIVE; eauto.
+  unfold path_checker.
+  inversion_SOME res; (* destruct res as [alive pc']. *) intros ICHECK. (* simpl. *)
+  inversion_SOME i; intros PC'.
+  destruct STEP as [st ISTEPS CONT|].
+  - (* early_exit *)
+    intros; exploit ipath_checker_eqlive_stopped; eauto.
+    destruct 1 as (path2 & st2 & PATH & ISTEP2 & [CONT2 PC2 RS MEM]).
+    repeat (econstructor; simpl; eauto).
+    rewrite <- MEM, <- PC2.
+    eapply eqlive_states_intro; eauto.
+  - (* normal_exit *)
+    intros; exploit ipath_checker_eqlive_normal; eauto.
+    destruct 1 as (st2 & ISTEP2 & [CONT' PC2 RS MEM]).
+    exploit ipath_checker_isteps_continue; eauto.
+    intros PC3; rewrite <- PC3, <- PC2 in * |-.
+    exploit inst_checker_eqlive; eauto.
+    intros (s2 & LAST_STEP & EQLIVE2).
+     eexists; split; eauto.
+     eapply exec_normal_exit; eauto.
+     rewrite <- PC3, <- MEM; auto.
+Qed.
+
+Theorem step_eqlive t s1 s1' s2: 
+  step ge pge s1 t s1' ->
+  eqlive_states s1 s2 ->
+  exists s2', step ge pge s2 t s2' /\ eqlive_states s1' s2'.
+Proof.
+  destruct 1 as [path stack f sp rs m pc t s PATH STEP | | | ].
+  - intros EQLIVE; inv EQLIVE; simplify_someHyps. 
+    intro PATH.
+    exploit path_step_eqlive; eauto.
+    intros (s2 & STEP2 & EQUIV2). 
+    eexists; split; eauto.
+    eapply exec_path; eauto.
+  - intros EQLIVE; inv EQLIVE; inv LIVE.
+    exploit initialize_path. { eapply fn_entry_point_wf. }
+    intros (path & Hpath).
+    eexists; split.
+    * eapply exec_function_internal; eauto.
+    * eapply eqlive_states_intro; eauto.
+      eapply eqlive_reg_refl.
+  - intros EQLIVE; inv EQLIVE.
+    eexists; split.
+    * eapply exec_function_external; eauto.
+    * eapply eqlive_states_return; eauto.
+  - intros EQLIVE; inv EQLIVE.
+    inversion STACKS as [|s1 st1 s' s2 STACK STACKS']; subst; clear STACKS.
+    inv STACK.
+    exists (State s2 f sp pc (rs2 # res <- vres) m); split.
+    * apply exec_return.
+    * eapply eqlive_states_intro; eauto.
+Qed.
+
+End LivenessProperties.
diff --git a/scheduling/RTLpathSE_impl.v b/scheduling/RTLpathSE_impl.v
new file mode 100644
index 00000000..38930a75
--- /dev/null
+++ b/scheduling/RTLpathSE_impl.v
@@ -0,0 +1,1509 @@
+(** Implementation and refinement of the symbolic execution *)
+
+Require Import Coqlib Maps Floats.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL RTLpath.
+Require Import Errors.
+Require Import RTLpathSE_theory RTLpathLivegenproof.
+Require Import Axioms RTLpathSE_simu_specs.
+
+Local Open Scope error_monad_scope.
+Local Open Scope option_monad_scope.
+
+Require Import Impure.ImpHCons.
+Import Notations.
+Import HConsing.
+
+Local Open Scope impure.
+Local Open Scope hse.
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+Definition XDEBUG {A} (x:A) (k: A -> ?? pstring): ?? unit := RET tt. (* TO REMOVE DEBUG INFO *)
+(* Definition XDEBUG {A} (x:A) (k: A -> ?? pstring): ?? unit := DO s <~ k x;; println ("DEBUG simu_check:" +; s). (* TO INSERT DEBUG INFO *) *)
+
+Definition DEBUG (s: pstring): ?? unit := XDEBUG tt (fun _ => RET s).
+
+(** * Implementation of Data-structure use in Hash-consing *)
+
+Definition hsval_get_hid (hsv: hsval): hashcode :=
+  match hsv with
+  | HSinput _ hid => hid
+  | HSop _ _ hid => hid
+  | HSload _ _ _ _ _ hid => hid
+  end.
+
+Definition list_hsval_get_hid (lhsv: list_hsval): hashcode :=
+  match lhsv with
+  | HSnil hid => hid
+  | HScons _ _ hid => hid
+  end.
+
+Definition hsmem_get_hid (hsm: hsmem): hashcode :=
+  match hsm with
+  | HSinit hid => hid
+  | HSstore _ _ _ _ _ hid => hid
+  end.
+
+Definition hsval_set_hid (hsv: hsval) (hid: hashcode): hsval :=
+  match hsv with
+  | HSinput r _ => HSinput r hid
+  | HSop o lhsv _ => HSop o lhsv hid
+  | HSload hsm trap chunk addr lhsv _ => HSload hsm trap chunk addr lhsv hid
+  end.
+
+Definition list_hsval_set_hid (lhsv: list_hsval) (hid: hashcode): list_hsval :=
+  match lhsv with
+  | HSnil _ => HSnil hid
+  | HScons hsv lhsv _ => HScons hsv lhsv hid
+  end.
+
+Definition hsmem_set_hid (hsm: hsmem) (hid: hashcode): hsmem :=
+  match hsm with
+  | HSinit _ => HSinit hid
+  | HSstore hsm chunk addr lhsv srce _ => HSstore hsm chunk addr lhsv srce hid
+  end.
+
+
+Lemma hsval_set_hid_correct x y ge sp rs0 m0:
+  hsval_set_hid x unknown_hid = hsval_set_hid y unknown_hid ->
+  seval_hsval ge sp x rs0 m0 = seval_hsval ge sp y rs0 m0.
+Proof.
+  destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve hsval_set_hid_correct: core.
+
+Lemma list_hsval_set_hid_correct x y ge sp rs0 m0:
+  list_hsval_set_hid x unknown_hid = list_hsval_set_hid y unknown_hid ->
+  seval_list_hsval ge sp x rs0 m0 = seval_list_hsval ge sp y rs0 m0.
+Proof.
+  destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve list_hsval_set_hid_correct: core.
+
+Lemma hsmem_set_hid_correct x y ge sp rs0 m0:
+  hsmem_set_hid x unknown_hid = hsmem_set_hid y unknown_hid ->
+  seval_hsmem ge sp x rs0 m0 = seval_hsmem ge sp y rs0 m0.
+Proof.
+  destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve hsmem_set_hid_correct: core.
+
+(** Now, we build the hash-Cons value from a "hash_eq".
+
+  Informal specification: 
+    [hash_eq] must be consistent with the "hashed" constructors defined above.
+
+  We expect that hashinfo values in the code of these "hashed" constructors verify:
+    (hash_eq (hdata x) (hdata y) ~> true) <-> (hcodes x)=(hcodes y)
+*)
+
+
+Definition hsval_hash_eq (sv1 sv2: hsval): ?? bool :=
+  match sv1, sv2 with
+  | HSinput r1 _, HSinput r2 _ => struct_eq r1 r2 (* NB: really need a struct_eq here ? *)
+  | HSop op1 lsv1 _, HSop op2 lsv2 _  =>
+     DO b1 <~ phys_eq lsv1 lsv2;;
+     if b1
+     then struct_eq op1 op2 (* NB: really need a struct_eq here ? *)
+     else RET false
+  | HSload sm1 trap1 chk1 addr1 lsv1 _, HSload sm2 trap2 chk2 addr2 lsv2 _ =>
+     DO b1 <~ phys_eq lsv1 lsv2;;
+     DO b2 <~ phys_eq sm1 sm2;;
+     DO b3 <~ struct_eq trap1 trap2;;
+     DO b4 <~ struct_eq chk1 chk2;;
+     if b1 && b2 && b3 && b4
+     then struct_eq addr1 addr2
+     else RET false
+  | _,_ => RET false
+  end.
+
+
+Lemma and_true_split a b: a && b = true <-> a = true /\ b = true.
+Proof.
+  destruct a; simpl; intuition.
+Qed.
+
+Lemma hsval_hash_eq_correct x y:
+  WHEN hsval_hash_eq x y ~> b THEN 
+   b = true -> hsval_set_hid x unknown_hid = hsval_set_hid y unknown_hid.
+Proof.
+  destruct x, y; wlp_simplify; try (rewrite !and_true_split in *); intuition; subst; try congruence.
+Qed.
+Global Opaque hsval_hash_eq.
+Local Hint Resolve hsval_hash_eq_correct: wlp.
+
+Definition list_hsval_hash_eq (lsv1 lsv2: list_hsval): ?? bool :=
+  match lsv1, lsv2 with
+  | HSnil _, HSnil _ => RET true
+  | HScons sv1 lsv1' _, HScons sv2 lsv2' _  =>
+     DO b <~ phys_eq lsv1' lsv2';;
+     if b 
+     then phys_eq sv1 sv2
+     else RET false
+  | _,_ => RET false
+  end.
+
+Lemma list_hsval_hash_eq_correct x y:
+  WHEN list_hsval_hash_eq x y ~> b THEN 
+   b = true -> list_hsval_set_hid x unknown_hid = list_hsval_set_hid y unknown_hid.
+Proof.
+  destruct x, y; wlp_simplify; try (rewrite !and_true_split in *); intuition; subst; try congruence.
+Qed.
+Global Opaque list_hsval_hash_eq.
+Local Hint Resolve list_hsval_hash_eq_correct: wlp.
+
+Definition hsmem_hash_eq (sm1 sm2: hsmem): ?? bool :=
+  match sm1, sm2 with
+  | HSinit _, HSinit _ => RET true
+  | HSstore sm1 chk1 addr1 lsv1 sv1 _, HSstore sm2 chk2 addr2 lsv2 sv2 _ =>
+     DO b1 <~ phys_eq lsv1 lsv2;;
+     DO b2 <~ phys_eq sm1 sm2;;
+     DO b3 <~ phys_eq sv1 sv2;;
+     DO b4 <~ struct_eq chk1 chk2;;
+     if b1 && b2 && b3 && b4
+     then struct_eq addr1 addr2
+     else RET false
+  | _,_ => RET false
+  end.
+
+Lemma hsmem_hash_eq_correct x y:
+  WHEN hsmem_hash_eq x y ~> b THEN 
+   b = true -> hsmem_set_hid x unknown_hid = hsmem_set_hid y unknown_hid.
+Proof.
+  destruct x, y; wlp_simplify; try (rewrite !and_true_split in *); intuition; subst; try congruence.
+Qed.
+Global Opaque hsmem_hash_eq.
+Local Hint Resolve hsmem_hash_eq_correct: wlp.
+
+
+Definition hSVAL: hashP hsval := {| hash_eq := hsval_hash_eq; get_hid:=hsval_get_hid; set_hid:=hsval_set_hid |}. 
+Definition hLSVAL: hashP list_hsval := {| hash_eq := list_hsval_hash_eq; get_hid:= list_hsval_get_hid; set_hid:= list_hsval_set_hid |}.
+Definition hSMEM: hashP hsmem := {| hash_eq := hsmem_hash_eq; get_hid:= hsmem_get_hid; set_hid:= hsmem_set_hid |}.
+
+Program Definition mk_hash_params: Dict.hash_params hsval :=
+ {|
+    Dict.test_eq := phys_eq;
+    Dict.hashing := fun (ht: hsval) => RET (hsval_get_hid ht);
+    Dict.log := fun hv =>
+         DO hv_name <~ string_of_hashcode (hsval_get_hid hv);;
+         println ("unexpected undef behavior of hashcode:" +; (CamlStr hv_name)) |}.
+Obligation 1.
+  wlp_simplify.
+Qed.
+
+(** ** various auxiliary (trivial lemmas) *)
+Lemma hsilocal_refines_sreg ge sp rs0 m0 hst st:
+  hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst -> forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (si_sreg st r) rs0 m0.
+Proof.
+  unfold hsilocal_refines; intuition.
+Qed.
+Local Hint Resolve hsilocal_refines_sreg: core.
+
+Lemma hsilocal_refines_valid_pointer ge sp rs0 m0 hst st:
+  hsilocal_refines ge sp rs0 m0 hst st -> forall m b ofs, seval_smem ge sp st.(si_smem) rs0 m0 = Some m -> Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs.
+Proof.
+  unfold hsilocal_refines; intuition.
+Qed.
+Local Hint Resolve hsilocal_refines_valid_pointer: core.
+
+Lemma hsilocal_refines_smem_refines ge sp rs0 m0 hst st:
+  hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst -> smem_refines ge sp rs0 m0 (hsi_smem hst) (st.(si_smem)).
+Proof.
+  unfold hsilocal_refines; intuition.
+Qed.
+Local Hint Resolve hsilocal_refines_smem_refines: core.
+
+Lemma hsistate_refines_dyn_exits ge sp rs0 m0 hst st:
+  hsistate_refines_dyn ge sp rs0 m0 hst st -> hsiexits_refines_dyn ge sp rs0 m0 (hsi_exits hst) (si_exits st).
+Proof.
+  unfold hsistate_refines_dyn; intuition.
+Qed.
+Local Hint Resolve hsistate_refines_dyn_exits: core.
+
+Lemma hsistate_refines_dyn_local ge sp rs0 m0 hst st:
+  hsistate_refines_dyn ge sp rs0 m0 hst st -> hsilocal_refines ge sp rs0 m0 (hsi_local hst) (si_local st).
+Proof.
+  unfold hsistate_refines_dyn; intuition.
+Qed.
+Local Hint Resolve hsistate_refines_dyn_local: core.
+
+Lemma hsistate_refines_dyn_nested ge sp rs0 m0 hst st:
+  hsistate_refines_dyn ge sp rs0 m0 hst st -> nested_sok ge sp rs0 m0 (si_local st) (si_exits st).
+Proof.
+  unfold hsistate_refines_dyn; intuition.
+Qed.
+Local Hint Resolve hsistate_refines_dyn_nested: core.
+
+(** * Implementation of symbolic execution *)
+Section CanonBuilding.
+
+Variable hC_hsval: hashinfo hsval -> ?? hsval.
+
+Hypothesis hC_hsval_correct: forall hs,
+  WHEN hC_hsval hs ~> hs' THEN forall ge sp rs0 m0,
+    seval_hsval ge sp (hdata hs) rs0 m0 = seval_hsval ge sp hs' rs0 m0.
+
+Variable hC_list_hsval: hashinfo list_hsval -> ?? list_hsval.
+Hypothesis hC_list_hsval_correct: forall lh,
+  WHEN hC_list_hsval lh ~> lh' THEN forall ge sp rs0 m0,
+    seval_list_hsval ge sp (hdata lh) rs0 m0 = seval_list_hsval ge sp lh' rs0 m0.
+
+Variable hC_hsmem: hashinfo hsmem -> ?? hsmem.
+Hypothesis hC_hsmem_correct: forall hm,
+  WHEN hC_hsmem hm ~> hm' THEN forall ge sp rs0 m0,
+    seval_hsmem ge sp (hdata hm) rs0 m0 = seval_hsmem ge sp hm' rs0 m0.
+
+(* First, we wrap constructors for hashed values !*)
+
+Definition reg_hcode := 1.
+Definition op_hcode := 2.
+Definition load_hcode := 3.
+
+Definition hSinput_hcodes (r: reg) :=
+   DO hc <~ hash reg_hcode;;
+   DO hv <~ hash r;;
+   RET [hc;hv].
+Extraction Inline hSinput_hcodes.
+
+Definition hSinput (r:reg): ?? hsval :=
+   DO hv <~ hSinput_hcodes r;;
+   hC_hsval {| hdata:=HSinput r unknown_hid; hcodes :=hv; |}.
+
+Lemma hSinput_correct r:
+  WHEN hSinput r ~> hv THEN forall ge sp rs0 m0,
+    sval_refines ge sp rs0 m0 hv (Sinput r).
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSinput.
+Local Hint Resolve hSinput_correct: wlp.
+
+Definition hSop_hcodes (op:operation) (lhsv: list_hsval) :=
+   DO hc <~ hash op_hcode;;
+   DO hv <~ hash op;;
+   RET [hc;hv;list_hsval_get_hid lhsv].
+Extraction Inline hSop_hcodes.
+
+Definition hSop (op:operation) (lhsv: list_hsval): ?? hsval :=
+   DO hv <~ hSop_hcodes op lhsv;;
+   hC_hsval {| hdata:=HSop op lhsv unknown_hid; hcodes :=hv |}.
+
+Lemma hSop_correct op lhsv:
+  WHEN hSop op lhsv ~> hv THEN forall ge sp rs0 m0 lsv sm m
+   (MEM: seval_smem ge sp sm rs0 m0 = Some m)
+   (MVALID: forall b ofs, Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+   (LR: list_sval_refines ge sp rs0 m0 lhsv lsv),
+   sval_refines ge sp rs0 m0 hv (Sop op lsv sm).
+Proof.
+  wlp_simplify.
+  rewrite <- H, MEM, LR.
+  destruct (seval_list_sval _ _ lsv _); try congruence.
+  eapply op_valid_pointer_eq; eauto.
+Qed.
+Global Opaque hSop.
+Local Hint Resolve hSop_correct: wlp.
+
+Definition hSload_hcodes (hsm: hsmem) (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval):=
+   DO hc <~ hash load_hcode;;
+   DO hv1 <~ hash trap;;
+   DO hv2 <~ hash chunk;;
+   DO hv3 <~ hash addr;;
+   RET [hc; hsmem_get_hid hsm; hv1; hv2; hv3; list_hsval_get_hid lhsv].
+Extraction Inline hSload_hcodes.
+
+Definition hSload (hsm: hsmem) (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval): ?? hsval :=
+   DO hv <~ hSload_hcodes hsm trap chunk addr lhsv;;
+   hC_hsval {| hdata := HSload hsm trap chunk addr lhsv unknown_hid; hcodes := hv |}.
+
+Lemma hSload_correct hsm trap chunk addr lhsv:
+  WHEN hSload hsm trap chunk addr lhsv ~> hv THEN forall ge sp rs0 m0 lsv sm
+    (LR: list_sval_refines ge sp rs0 m0 lhsv lsv)
+    (MR: smem_refines ge sp rs0 m0 hsm sm),
+    sval_refines ge sp rs0 m0 hv (Sload sm trap chunk addr lsv).
+Proof.
+  wlp_simplify.
+  rewrite <- LR, <- MR.
+  auto.
+Qed.
+Global Opaque hSload.
+Local Hint Resolve hSload_correct: wlp.
+
+Definition hSnil (_: unit): ?? list_hsval :=
+   hC_list_hsval {| hdata := HSnil unknown_hid; hcodes := nil |}.
+
+Lemma hSnil_correct:
+  WHEN hSnil() ~> hv THEN forall ge sp rs0 m0,
+    list_sval_refines ge sp rs0 m0 hv Snil.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSnil.
+Local Hint Resolve hSnil_correct: wlp.
+
+Definition hScons (hsv: hsval) (lhsv: list_hsval): ?? list_hsval :=
+   hC_list_hsval {| hdata := HScons hsv lhsv unknown_hid; hcodes := [hsval_get_hid hsv; list_hsval_get_hid lhsv] |}.
+
+Lemma hScons_correct hsv lhsv:
+  WHEN hScons hsv lhsv ~> lhsv' THEN forall ge sp rs0 m0 sv lsv
+    (VR: sval_refines ge sp rs0 m0 hsv sv)
+    (LR: list_sval_refines ge sp rs0 m0 lhsv lsv),
+    list_sval_refines ge sp rs0 m0 lhsv' (Scons sv lsv).
+Proof.
+  wlp_simplify.
+  rewrite <- VR, <- LR.
+  auto.
+Qed.
+Global Opaque hScons.
+Local Hint Resolve hScons_correct: wlp.
+
+Definition hSinit (_: unit): ?? hsmem :=
+   hC_hsmem {| hdata := HSinit unknown_hid; hcodes := nil |}.
+
+Lemma hSinit_correct:
+  WHEN hSinit() ~> hm THEN forall ge sp rs0 m0,
+    smem_refines ge sp rs0 m0 hm Sinit.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSinit.
+Local Hint Resolve hSinit_correct: wlp.
+
+Definition hSstore_hcodes (hsm: hsmem) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (srce: hsval):=
+   DO hv1 <~ hash chunk;;
+   DO hv2 <~ hash addr;;
+   RET [hsmem_get_hid hsm; hv1; hv2; list_hsval_get_hid lhsv; hsval_get_hid srce].
+Extraction Inline hSstore_hcodes.
+
+Definition hSstore (hsm: hsmem) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (srce: hsval): ?? hsmem :=
+   DO hv <~ hSstore_hcodes hsm chunk addr lhsv srce;;
+   hC_hsmem {| hdata := HSstore hsm chunk addr lhsv srce unknown_hid; hcodes := hv |}.
+
+Lemma hSstore_correct hsm chunk addr lhsv hsv:
+  WHEN hSstore hsm chunk addr lhsv hsv ~> hsm' THEN forall ge sp rs0 m0 lsv sm sv
+    (LR: list_sval_refines ge sp rs0 m0 lhsv lsv)
+    (MR: smem_refines ge sp rs0 m0 hsm sm)
+    (VR: sval_refines ge sp rs0 m0 hsv sv),
+    smem_refines ge sp rs0 m0 hsm' (Sstore sm chunk addr lsv sv).
+Proof.
+  wlp_simplify.
+  rewrite <- LR, <- MR, <- VR.
+  auto.
+Qed.
+Global Opaque hSstore.
+Local Hint Resolve hSstore_correct: wlp.
+
+Definition hsi_sreg_get (hst: PTree.t hsval) r: ?? hsval :=
+   match PTree.get r hst with 
+   | None => hSinput r
+   | Some sv => RET sv
+   end.
+
+Lemma hsi_sreg_get_correct hst r:
+  WHEN hsi_sreg_get hst r ~> hsv THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    sval_refines ge sp rs0 m0 hsv (f r).
+Proof.
+  unfold hsi_sreg_eval, hsi_sreg_proj; wlp_simplify; rewrite <- RR; try_simplify_someHyps.
+Qed.
+Global Opaque hsi_sreg_get.
+Local Hint Resolve hsi_sreg_get_correct: wlp.
+
+Fixpoint hlist_args (hst: PTree.t hsval) (l: list reg): ?? list_hsval :=
+  match l with
+  | nil => hSnil()
+  | r::l =>
+    DO v <~ hsi_sreg_get hst r;;
+    DO lhsv <~ hlist_args hst l;;
+    hScons v lhsv
+  end.
+
+Lemma hlist_args_correct hst l:
+  WHEN hlist_args hst l ~> lhsv THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    list_sval_refines ge sp rs0 m0 lhsv (list_sval_inj (List.map f l)).
+Proof.
+  induction l; wlp_simplify.
+Qed.
+Global Opaque hlist_args.
+Local Hint Resolve hlist_args_correct: wlp.
+
+(** ** Assignment of memory *)
+Definition hslocal_set_smem (hst:hsistate_local) hm :=
+  {| hsi_smem := hm;
+     hsi_ok_lsval := hsi_ok_lsval hst;
+     hsi_sreg:= hsi_sreg hst
+  |}.
+
+Lemma sok_local_set_mem ge sp rs0 m0 st sm:
+  sok_local ge sp rs0 m0 (slocal_set_smem st sm)
+  <-> (sok_local ge sp rs0 m0 st /\ seval_smem ge sp sm rs0 m0 <> None).
+Proof.
+  unfold slocal_set_smem, sok_local; simpl; intuition (subst; eauto).
+Qed.
+
+Lemma hsok_local_set_mem ge sp rs0 m0 hst hsm:
+  (seval_hsmem ge sp (hsi_smem hst) rs0 m0 = None -> seval_hsmem ge sp hsm rs0 m0 = None) ->
+  hsok_local ge sp rs0 m0 (hslocal_set_smem hst hsm)
+  <-> (hsok_local ge sp rs0 m0 hst /\ seval_hsmem ge sp hsm rs0 m0 <> None).
+Proof.
+  unfold hslocal_set_smem, hsok_local; simpl; intuition.
+Qed.
+
+Lemma hslocal_set_mem_correct ge sp rs0 m0 hst st hsm sm:
+  (seval_hsmem ge sp (hsi_smem hst) rs0 m0 = None -> seval_hsmem ge sp hsm rs0 m0 = None) ->
+  (forall m b ofs, seval_smem ge sp sm rs0 m0 = Some m -> Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs) ->
+  hsilocal_refines ge sp rs0 m0 hst st ->
+  (hsok_local ge sp rs0 m0 hst -> smem_refines ge sp rs0 m0 hsm sm) ->
+  hsilocal_refines ge sp rs0 m0 (hslocal_set_smem hst hsm) (slocal_set_smem st sm).
+Proof.
+  intros PRESERV SMVALID (OKEQ & SMEMEQ' & REGEQ & MVALID) SMEMEQ.
+  split; rewrite! hsok_local_set_mem; simpl; eauto; try tauto.
+  rewrite sok_local_set_mem.
+  intuition congruence.
+Qed.
+
+Definition hslocal_store (hst: hsistate_local) chunk addr args src: ?? hsistate_local :=
+   let pt := hst.(hsi_sreg) in
+   DO hargs <~ hlist_args pt args;;
+   DO hsrc <~ hsi_sreg_get pt src;;
+   DO hm <~ hSstore hst chunk addr hargs hsrc;;
+   RET (hslocal_set_smem hst hm).
+
+Lemma hslocal_store_correct hst chunk addr args src:
+  WHEN hslocal_store hst chunk addr args src ~> hst' THEN forall ge sp rs0 m0 st
+    (REF: hsilocal_refines ge sp rs0 m0 hst st),
+    hsilocal_refines ge sp rs0 m0 hst' (slocal_store st chunk addr args src).
+Proof.
+  wlp_simplify.
+  eapply hslocal_set_mem_correct; simpl; eauto.
+  + intros X; erewrite H1; eauto.
+    rewrite X. simplify_SOME z.
+  + unfold hsilocal_refines in *; 
+    simplify_SOME z; intuition. 
+    erewrite <- Mem.storev_preserv_valid; [| eauto].
+    eauto.
+  + unfold hsilocal_refines in *; intuition eauto.
+Qed.
+Global Opaque hslocal_store.
+Local Hint Resolve hslocal_store_correct: wlp.
+
+(** ** Assignment of local state *)
+
+Definition hsist_set_local (hst: hsistate) (pc: node) (hnxt: hsistate_local): hsistate :=
+   {| hsi_pc := pc; hsi_exits := hst.(hsi_exits); hsi_local:= hnxt |}.
+
+Lemma hsist_set_local_correct_stat hst st pc hnxt nxt:
+  hsistate_refines_stat hst st ->
+  hsistate_refines_stat (hsist_set_local hst pc hnxt) (sist_set_local st pc nxt).
+Proof.
+  unfold hsistate_refines_stat; simpl; intuition.
+Qed.
+
+Lemma hsist_set_local_correct_dyn ge sp rs0 m0 hst st pc hnxt nxt:
+  hsistate_refines_dyn ge sp rs0 m0 hst st ->
+  hsilocal_refines ge sp rs0 m0 hnxt nxt ->
+  (sok_local ge sp rs0 m0 nxt -> sok_local ge sp rs0 m0 (si_local st)) ->
+  hsistate_refines_dyn ge sp rs0 m0 (hsist_set_local hst pc hnxt) (sist_set_local st pc nxt).
+Proof.
+  unfold hsistate_refines_dyn; simpl.
+  intros (EREF & LREF & NESTED) LREFN SOK; intuition.
+  destruct NESTED as [|st0 se lse TOP NEST]; econstructor; simpl; auto.
+Qed.
+
+(** ** Assignment of registers *)
+
+(** locally new symbolic values during symbolic execution *)
+Inductive root_sval: Type :=
+| Rop (op: operation)
+| Rload (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing)
+.
+
+Definition root_apply (rsv: root_sval) (lr: list reg) (st: sistate_local): sval :=
+  let lsv := list_sval_inj (List.map (si_sreg st) lr) in
+  let sm := si_smem st in
+  match rsv with
+  | Rop op => Sop op lsv sm
+  | Rload trap chunk addr => Sload sm trap chunk addr lsv
+  end.
+Coercion root_apply: root_sval >-> Funclass.
+
+Definition root_happly (rsv: root_sval) (lr: list reg) (hst: hsistate_local) : ?? hsval :=
+  DO lhsv <~ hlist_args hst lr;;
+  match rsv with
+  | Rop op => hSop op lhsv
+  | Rload trap chunk addr => hSload hst trap chunk addr lhsv
+  end.
+
+Lemma root_happly_correct (rsv: root_sval) lr hst:
+  WHEN root_happly rsv lr hst ~> hv' THEN forall ge sp rs0 m0 st
+    (REF:hsilocal_refines ge sp rs0 m0 hst st)
+    (OK:hsok_local ge sp rs0 m0 hst),
+    sval_refines ge sp rs0 m0 hv' (rsv lr st).
+Proof.
+   unfold hsilocal_refines, root_apply, root_happly; destruct rsv; wlp_simplify.
+   unfold sok_local in *.
+   generalize (H0 ge sp rs0 m0 (list_sval_inj (map (si_sreg st) lr)) (si_smem st)); clear H0.
+   destruct (seval_smem ge sp (si_smem st) rs0 m0) as [m|] eqn:X; eauto.
+   intuition congruence.
+Qed.
+Global Opaque root_happly.
+Hint Resolve root_happly_correct: wlp.
+
+Local Open Scope lazy_bool_scope.
+
+(* NB: return [false] if the rsv cannot fail *)
+Definition may_trap (rsv: root_sval) (lr: list reg): bool :=
+  match rsv with 
+  | Rop op => is_trapping_op op ||| negb (Nat.eqb (length lr) (args_of_operation op))  (* cf. lemma is_trapping_op_sound *)
+  | Rload TRAP _ _  => true
+  | _ => false
+  end.
+
+Lemma lazy_orb_negb_false (b1 b2:bool):
+  (b1 ||| negb b2) = false <-> (b1 = false /\ b2 = true).
+Proof.
+  unfold negb; explore; simpl; intuition (try congruence).
+Qed.
+
+Lemma seval_list_sval_length ge sp rs0 m0 (f: reg -> sval) (l:list reg):
+  forall l', seval_list_sval ge sp (list_sval_inj (List.map f l)) rs0 m0 = Some l' ->
+  Datatypes.length l = Datatypes.length l'.
+Proof.
+  induction l.
+  - simpl. intros. inv H. reflexivity.
+  - simpl. intros. destruct (seval_sval _ _ _ _ _); [|discriminate].
+    destruct (seval_list_sval _ _ _ _ _) eqn:SLS; [|discriminate]. inv H. simpl.
+    erewrite IHl; eauto.
+Qed.
+
+Lemma may_trap_correct (ge: RTL.genv) (sp:val) (rsv: root_sval) (rs0: regset) (m0: mem) (lr: list reg) st:
+  may_trap rsv lr = false -> 
+  seval_list_sval ge sp (list_sval_inj (List.map (si_sreg st) lr)) rs0 m0 <> None ->
+  seval_smem ge sp (si_smem st) rs0 m0 <> None ->
+  seval_sval ge sp (rsv lr st) rs0 m0 <> None.
+Proof.
+  destruct rsv; simpl; try congruence.
+  - rewrite lazy_orb_negb_false. intros (TRAP1 & TRAP2) OK1 OK2.
+    explore; try congruence.
+    eapply is_trapping_op_sound; eauto.
+    erewrite <- seval_list_sval_length; eauto.
+    apply Nat.eqb_eq in TRAP2.
+    assumption.
+  - intros X OK1 OK2.
+    explore; try congruence.
+Qed.
+
+(** simplify a symbolic value before assignment to a register *)
+Definition simplify (rsv: root_sval) (lr: list reg) (hst: hsistate_local): ?? hsval :=
+  match rsv with
+  | Rop op =>
+     match is_move_operation op lr with
+     | Some arg => hsi_sreg_get hst arg (** optimization of Omove *)
+     | None =>
+       DO lhsv <~ hlist_args hst lr;;
+       hSop op lhsv
+     end
+  | Rload _ chunk addr => 
+       DO lhsv <~ hlist_args hst lr;;
+       hSload hst NOTRAP chunk addr lhsv
+  end.
+
+Lemma simplify_correct rsv lr hst:
+  WHEN simplify rsv lr hst ~> hv THEN forall ge sp rs0 m0 st
+    (REF: hsilocal_refines ge sp rs0 m0 hst st)
+    (OK0: hsok_local ge sp rs0 m0 hst)
+    (OK1: seval_sval ge sp (rsv lr st) rs0 m0 <> None),
+    sval_refines ge sp rs0 m0 hv (rsv lr st).
+Proof.
+  destruct rsv; simpl; auto.
+  - (* Rop *)
+    destruct (is_move_operation _ _) eqn: Hmove; wlp_simplify.
+    + exploit is_move_operation_correct; eauto.
+      intros (Hop & Hlsv); subst; simpl in *.
+      simplify_SOME z.
+      * erewrite H; eauto.
+      * try_simplify_someHyps; congruence.
+      * congruence.
+    + clear Hmove.
+      generalize (H0 ge sp rs0 m0 (list_sval_inj (map (si_sreg st) lr)) (si_smem st)); clear H0.
+      destruct (seval_smem ge sp (si_smem st) rs0 m0) as [m|] eqn:X; eauto.
+      intro H0; clear H0; simplify_SOME z; congruence. (* absurd case *)
+  - (* Rload *)
+    destruct trap; wlp_simplify.
+    erewrite H0; eauto.
+    erewrite H; eauto.
+    erewrite hsilocal_refines_smem_refines; eauto.
+    destruct (seval_list_sval _ _ _ _) as [args|] eqn: Hargs; try congruence.
+    destruct (eval_addressing _ _ _ _) as [a|] eqn: Ha; try congruence.
+    destruct (seval_smem _ _ _ _) as [m|] eqn: Hm; try congruence.
+    destruct (Mem.loadv _ _ _); try congruence.
+Qed.
+Global Opaque simplify.
+Local Hint Resolve simplify_correct: wlp.
+
+Definition red_PTree_set (r: reg) (hsv: hsval) (hst: PTree.t hsval): PTree.t hsval :=
+  match hsv with
+  | HSinput r' _ =>
+     if Pos.eq_dec r r' 
+     then PTree.remove r' hst
+     else PTree.set r hsv hst
+  | _ => PTree.set r hsv hst
+  end.
+
+Lemma red_PTree_set_correct (r r0:reg) hsv hst ge sp rs0 m0:
+  hsi_sreg_eval ge sp (red_PTree_set r hsv hst) r0 rs0 m0 = hsi_sreg_eval ge sp (PTree.set r hsv hst) r0 rs0 m0.
+Proof.
+  destruct hsv; simpl; auto.
+  destruct (Pos.eq_dec r r1); auto.
+  subst; unfold hsi_sreg_eval, hsi_sreg_proj.
+  destruct (Pos.eq_dec r0 r1); auto.
+  - subst; rewrite PTree.grs, PTree.gss; simpl; auto.
+  - rewrite PTree.gro, PTree.gso; simpl; auto.
+Qed.
+
+Lemma red_PTree_set_refines (r r0:reg) hsv hst sv st ge sp rs0 m0:
+ hsilocal_refines ge sp rs0 m0 hst st ->
+ sval_refines ge sp rs0 m0 hsv sv ->
+ hsok_local ge sp rs0 m0 hst ->
+ hsi_sreg_eval ge sp (red_PTree_set r hsv hst) r0 rs0 m0 = seval_sval ge sp (if Pos.eq_dec r r0 then sv else si_sreg st r0) rs0 m0.
+Proof.
+  intros; rewrite red_PTree_set_correct.
+  exploit hsilocal_refines_sreg; eauto.
+  unfold hsi_sreg_eval, hsi_sreg_proj.
+  destruct (Pos.eq_dec r r0); auto.
+  - subst. rewrite PTree.gss; simpl; auto.
+  - rewrite PTree.gso; simpl; eauto.
+Qed.
+
+Lemma sok_local_set_sreg (rsv:root_sval) ge sp rs0 m0 st r lr:
+  sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st))
+  <-> (sok_local ge sp rs0 m0 st /\ seval_sval ge sp (rsv lr st) rs0 m0 <> None).
+Proof.
+  unfold slocal_set_sreg, sok_local; simpl; split.
+  + intros ((SVAL0 & PRE) & SMEM & SVAL).
+    repeat (split; try tauto).
+    - intros r0; generalize (SVAL r0); clear SVAL; destruct (Pos.eq_dec r r0); try congruence.
+    - generalize (SVAL r); clear SVAL; destruct (Pos.eq_dec r r); try congruence.
+  + intros ((PRE & SMEM & SVAL0) & SVAL).
+    repeat (split; try tauto; eauto).
+    intros r0;  destruct (Pos.eq_dec r r0); try congruence.
+Qed.
+
+Definition hslocal_set_sreg (hst: hsistate_local) (r: reg) (rsv: root_sval) (lr: list reg): ?? hsistate_local :=
+  DO ok_lhsv <~
+   (if may_trap rsv lr
+    then DO hv <~ root_happly rsv lr hst;;
+         XDEBUG hv (fun hv => DO hv_name <~ string_of_hashcode (hsval_get_hid hv);; RET ("-- insert undef behavior of hashcode:" +; (CamlStr hv_name))%string);;
+         RET (hv::(hsi_ok_lsval hst))
+    else RET (hsi_ok_lsval hst));;
+  DO simp <~ simplify rsv lr hst;;
+  RET {| hsi_smem := hst;
+         hsi_ok_lsval := ok_lhsv;
+         hsi_sreg := red_PTree_set r simp (hsi_sreg hst) |}.
+
+Lemma hslocal_set_sreg_correct hst r rsv lr:
+  WHEN hslocal_set_sreg hst r rsv lr ~> hst' THEN forall ge sp rs0 m0 st
+    (REF: hsilocal_refines ge sp rs0 m0 hst st),
+    hsilocal_refines ge sp rs0 m0 hst' (slocal_set_sreg st r (rsv lr st)).
+Proof.
+  wlp_simplify.
+  + (* may_trap ~> true *)
+    assert (X: sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st)) <->
+               hsok_local ge sp rs0 m0 {| hsi_smem := hst; hsi_ok_lsval := exta :: hsi_ok_lsval hst; hsi_sreg := red_PTree_set r exta0 hst |}).
+    { rewrite sok_local_set_sreg; generalize REF.
+      intros (OKeq & MEM & REG & MVALID); rewrite OKeq; clear OKeq.
+      unfold hsok_local; simpl; intuition (subst; eauto);
+      erewrite <- H0 in *; eauto; unfold hsok_local; simpl; intuition eauto.
+    }
+    unfold hsilocal_refines; simpl; split; auto.
+    rewrite <- X, sok_local_set_sreg. intuition eauto.
+    - destruct REF; intuition eauto.
+    - generalize REF; intros (OKEQ & _). rewrite OKEQ in * |-; erewrite red_PTree_set_refines; eauto.
+  + (* may_trap ~> false *)
+    assert (X: sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st)) <->
+               hsok_local ge sp rs0 m0 {| hsi_smem := hst; hsi_ok_lsval := hsi_ok_lsval hst; hsi_sreg := red_PTree_set r exta hst |}).
+    { 
+      rewrite sok_local_set_sreg; generalize REF.
+      intros (OKeq & MEM & REG & MVALID); rewrite OKeq.
+      unfold hsok_local; simpl; intuition (subst; eauto).
+      assert (X0:hsok_local ge sp rs0 m0 hst). { unfold hsok_local; intuition. }
+      exploit may_trap_correct; eauto.
+      * intro X1; eapply seval_list_sval_inj_not_none; eauto.
+        assert (X2: sok_local ge sp rs0 m0 st). { intuition. }
+        unfold sok_local in X2; intuition eauto.
+      * rewrite <- MEM; eauto.
+    }
+    unfold hsilocal_refines; simpl; split; auto.
+    rewrite <- X, sok_local_set_sreg. intuition eauto.
+    - destruct REF; intuition eauto.
+    - generalize REF; intros (OKEQ & _). rewrite OKEQ in * |-; erewrite red_PTree_set_refines; eauto.
+Qed.
+Global Opaque hslocal_set_sreg.
+Local Hint Resolve hslocal_set_sreg_correct: wlp.
+
+(** ** Execution of one instruction *)
+
+Definition hsiexec_inst (i: instruction) (hst: hsistate): ?? (option hsistate) := 
+  match i with
+  | Inop pc' => 
+      RET (Some (hsist_set_local hst pc' hst.(hsi_local)))
+  | Iop op args dst pc' =>
+      DO next <~ hslocal_set_sreg hst.(hsi_local) dst (Rop op) args;;
+      RET (Some (hsist_set_local hst pc' next))
+  | Iload trap chunk addr args dst pc' =>
+      DO next <~ hslocal_set_sreg hst.(hsi_local) dst (Rload trap chunk addr) args;;
+      RET (Some (hsist_set_local hst pc' next))
+  | Istore chunk addr args src pc' =>
+      DO next <~ hslocal_store hst.(hsi_local) chunk addr args src;;
+      RET (Some (hsist_set_local hst pc' next))
+  | Icond cond args ifso ifnot _ =>
+      let prev := hst.(hsi_local) in
+      DO vargs <~ hlist_args prev args ;;
+      let ex := {| hsi_cond:=cond; hsi_scondargs:=vargs; hsi_elocal := prev; hsi_ifso := ifso |} in
+      RET (Some {| hsi_pc := ifnot; hsi_exits := ex::hst.(hsi_exits); hsi_local := prev |})
+  | _ => RET None (* TODO jumptable ? *)
+  end.
+
+
+Remark hsiexec_inst_None_correct i hst:
+  WHEN hsiexec_inst i hst ~> o THEN forall st, o = None -> siexec_inst i st = None.
+Proof.
+  destruct i; wlp_simplify; congruence.
+Qed.
+
+Lemma seval_condition_refines hst st ge sp cond hargs args rs m:
+  hsok_local ge sp rs m hst -> 
+  hsilocal_refines ge sp rs m hst st ->
+  list_sval_refines ge sp rs m hargs args ->
+  hseval_condition ge sp cond hargs (hsi_smem hst) rs m
+  = seval_condition ge sp cond args (si_smem st) rs m.
+ Proof.
+  intros HOK (_ & MEMEQ & _) LR. unfold hseval_condition, seval_condition.
+  rewrite LR, <- MEMEQ; auto.
+Qed.
+
+Lemma sok_local_set_sreg_simp (rsv:root_sval) ge sp rs0 m0 st r lr:
+  sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st))
+  -> sok_local ge sp rs0 m0 st.
+Proof.
+  rewrite sok_local_set_sreg; intuition.
+Qed.
+
+Local Hint Resolve hsist_set_local_correct_stat: core.
+
+Lemma hsiexec_inst_correct i hst:
+  WHEN hsiexec_inst i hst ~> o THEN forall hst' st,
+   o = Some hst' ->
+   exists st', siexec_inst i st = Some st'
+    /\ (forall (REF:hsistate_refines_stat hst st), hsistate_refines_stat hst' st')
+    /\ (forall ge sp rs0 m0 (REF:hsistate_refines_dyn ge sp rs0 m0 hst st), hsistate_refines_dyn ge sp rs0 m0 hst' st').
+Proof.
+  destruct i; simpl; wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
+  - (* refines_dyn Iop *)
+    eapply hsist_set_local_correct_dyn; eauto.
+    generalize (sok_local_set_sreg_simp (Rop o)); simpl; eauto.
+  - (* refines_dyn Iload *)
+    eapply hsist_set_local_correct_dyn; eauto.
+    generalize (sok_local_set_sreg_simp (Rload t0 m a)); simpl; eauto.
+  - (* refines_dyn Istore *)
+    eapply hsist_set_local_correct_dyn; eauto.
+    unfold sok_local; simpl; intuition.
+  - (* refines_stat Icond *)
+    unfold hsistate_refines_stat, hsiexits_refines_stat in *; simpl; intuition.
+    constructor; simpl; eauto.
+    constructor.
+  - (* refines_dyn Icond *)
+    destruct REF as (EXREF & LREF & NEST).
+    split.
+    + constructor; simpl; auto.
+      constructor; simpl; auto.
+      intros; erewrite seval_condition_refines; eauto.
+    + split; simpl; auto.
+      destruct NEST as [|st0 se lse TOP NEST];
+      econstructor; simpl; auto; constructor; auto.
+Qed.
+Global Opaque hsiexec_inst.
+Local Hint Resolve hsiexec_inst_correct: wlp.
+
+
+Definition some_or_fail {A} (o: option A) (msg: pstring): ?? A :=
+  match o with
+  | Some x => RET x
+  | None => FAILWITH msg
+  end.
+
+Fixpoint hsiexec_path (path:nat) (f: function) (hst: hsistate): ?? hsistate :=
+  match path with
+  | O => RET hst
+  | S p =>
+    let pc := hst.(hsi_pc) in
+    XDEBUG pc (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("- sym exec node: " +; name_pc)%string);;
+    DO i <~ some_or_fail ((fn_code f)!pc) "hsiexec_path.internal_error.1";;
+    DO ohst1 <~ hsiexec_inst i hst;;
+    DO hst1 <~ some_or_fail ohst1 "hsiexec_path.internal_error.2";;
+    hsiexec_path p f hst1
+  end.
+
+Lemma hsiexec_path_correct path f: forall hst,
+  WHEN hsiexec_path path f hst ~> hst' THEN forall st
+  (RSTAT:hsistate_refines_stat hst st),
+  exists st', siexec_path path f st = Some st'
+    /\ hsistate_refines_stat hst' st'
+    /\ (forall ge sp rs0 m0 (REF:hsistate_refines_dyn ge sp rs0 m0 hst st), hsistate_refines_dyn ge sp rs0 m0 hst' st').
+Proof.
+  induction path; wlp_simplify; try_simplify_someHyps. clear IHpath.
+  generalize RSTAT; intros (PCEQ & _) INSTEQ.
+  rewrite <- PCEQ, INSTEQ; simpl.
+  exploit H0; eauto. clear H0.
+  intros (st0 & SINST & ISTAT & IDYN); erewrite SINST.
+  exploit H1; eauto. clear H1.
+  intros (st' & SPATH & PSTAT & PDYN).
+  eexists; intuition eauto.
+Qed.
+Global Opaque hsiexec_path.
+Local Hint Resolve hsiexec_path_correct: wlp.
+
+Fixpoint hbuiltin_arg (hst: PTree.t hsval) (arg : builtin_arg reg): ?? builtin_arg hsval := 
+  match arg with
+  | BA r => 
+         DO v <~ hsi_sreg_get hst r;;
+         RET (BA v)
+  | BA_int n => RET (BA_int n)
+  | BA_long n => RET (BA_long n)
+  | BA_float f0 => RET (BA_float f0)
+  | BA_single s => RET (BA_single s)
+  | BA_loadstack chunk ptr => RET (BA_loadstack chunk ptr)
+  | BA_addrstack ptr => RET (BA_addrstack ptr)
+  | BA_loadglobal chunk id ptr => RET (BA_loadglobal chunk id ptr)
+  | BA_addrglobal id ptr => RET (BA_addrglobal id ptr)
+  | BA_splitlong ba1 ba2 => 
+    DO v1 <~ hbuiltin_arg hst ba1;;
+    DO v2 <~ hbuiltin_arg hst ba2;;
+    RET (BA_splitlong v1 v2)
+  | BA_addptr ba1 ba2 => 
+    DO v1 <~ hbuiltin_arg hst ba1;;
+    DO v2 <~ hbuiltin_arg hst ba2;;
+    RET (BA_addptr v1 v2)
+  end.
+
+Lemma hbuiltin_arg_correct hst arg:
+  WHEN hbuiltin_arg hst arg ~> hargs THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    seval_builtin_sval ge sp (builtin_arg_map hsval_proj hargs) rs0 m0 = seval_builtin_sval ge sp (builtin_arg_map f arg) rs0 m0.
+Proof.
+  induction arg; wlp_simplify.
+  + erewrite H; eauto.
+  + erewrite H; eauto.
+    erewrite H0; eauto.
+  + erewrite H; eauto.
+    erewrite H0; eauto.
+Qed.
+Global Opaque hbuiltin_arg.
+Local Hint Resolve hbuiltin_arg_correct: wlp.
+
+Fixpoint hbuiltin_args (hst: PTree.t hsval) (args: list (builtin_arg reg)): ?? list (builtin_arg hsval) :=
+  match args with
+  | nil => RET nil
+  | a::l =>
+    DO ha <~ hbuiltin_arg hst a;;
+    DO hl <~ hbuiltin_args hst l;;
+    RET (ha::hl)
+    end.
+
+Lemma hbuiltin_args_correct hst args:
+  WHEN hbuiltin_args hst args ~> hargs THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    bargs_refines ge sp rs0 m0 hargs (List.map (builtin_arg_map f) args).
+Proof.
+  unfold bargs_refines, seval_builtin_args; induction args; wlp_simplify.
+  erewrite H; eauto.
+  erewrite H0; eauto.
+Qed.
+Global Opaque hbuiltin_args.
+Local Hint Resolve hbuiltin_args_correct: wlp.
+
+Definition hsum_left (hst: PTree.t hsval) (ros: reg + ident): ?? (hsval + ident) :=
+  match ros with
+  | inl r => DO hr <~ hsi_sreg_get hst r;; RET (inl hr) 
+  | inr s => RET (inr s)
+  end.
+
+Lemma hsum_left_correct hst ros:
+  WHEN hsum_left hst ros ~> hsi THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    sum_refines ge sp rs0 m0 hsi (sum_left_map f ros).
+Proof.
+  unfold sum_refines; destruct ros; wlp_simplify.
+Qed.
+Global Opaque hsum_left.
+Local Hint Resolve hsum_left_correct: wlp.
+
+Definition hsexec_final (i: instruction) (hst: PTree.t hsval): ?? hsfval :=
+  match i with
+  | Icall sig ros args res pc =>
+    DO svos <~ hsum_left hst ros;;
+    DO sargs <~ hlist_args hst args;;
+    RET (HScall sig svos sargs res pc)
+  | Itailcall sig ros args =>
+    DO svos <~ hsum_left hst ros;;
+    DO sargs <~ hlist_args hst args;;
+    RET (HStailcall sig svos sargs)
+  | Ibuiltin ef args res pc =>
+    DO sargs <~ hbuiltin_args hst args;;
+    RET (HSbuiltin ef sargs res pc)
+  | Ijumptable reg tbl =>
+    DO sv <~ hsi_sreg_get hst reg;;
+    RET (HSjumptable sv tbl)
+  | Ireturn or =>
+    match or with
+    | Some r => DO hr <~ hsi_sreg_get hst r;; RET (HSreturn (Some hr))
+    | None => RET (HSreturn None)
+    end
+  | _ => RET (HSnone)
+  end.
+
+Lemma hsexec_final_correct (hsl: hsistate_local) i:
+  WHEN hsexec_final i hsl ~> hsf THEN forall ge sp rs0 m0 sl
+   (OK:  hsok_local ge sp rs0 m0 hsl)
+   (REF: hsilocal_refines ge sp rs0 m0 hsl sl),
+   hfinal_refines ge sp rs0 m0 hsf (sexec_final i sl).
+Proof.
+  destruct i; wlp_simplify; try econstructor; simpl; eauto.
+Qed.
+Global Opaque hsexec_final.
+Local Hint Resolve hsexec_final_correct: wlp.
+
+Definition init_hsistate_local (_:unit): ?? hsistate_local
+  := DO hm <~ hSinit ();;
+     RET {| hsi_smem := hm; hsi_ok_lsval := nil; hsi_sreg := PTree.empty hsval |}.
+
+Lemma init_hsistate_local_correct:
+  WHEN init_hsistate_local () ~> hsl THEN forall ge sp rs0 m0,
+  hsilocal_refines ge sp rs0 m0 hsl init_sistate_local.
+Proof.
+  unfold hsilocal_refines; wlp_simplify.
+  - unfold hsok_local; simpl; intuition. erewrite H in *; congruence.
+  - unfold hsok_local, sok_local; simpl in *; intuition; try congruence.
+  - unfold hsi_sreg_eval, hsi_sreg_proj. rewrite PTree.gempty. reflexivity.
+  - try_simplify_someHyps.
+Qed.
+Global Opaque init_hsistate_local.
+Local Hint Resolve init_hsistate_local_correct: wlp.
+
+Definition init_hsistate pc: ?? hsistate
+  := DO hst <~ init_hsistate_local ();;
+     RET {| hsi_pc := pc; hsi_exits := nil; hsi_local := hst |}.
+
+Lemma init_hsistate_correct pc:
+  WHEN init_hsistate pc ~> hst THEN
+      hsistate_refines_stat hst (init_sistate pc)
+   /\ forall ge sp rs0 m0, hsistate_refines_dyn ge sp rs0 m0 hst (init_sistate pc).
+Proof.
+  unfold hsistate_refines_stat, hsistate_refines_dyn, hsiexits_refines_dyn; wlp_simplify; constructor.
+Qed.
+Global Opaque init_hsistate.
+Local Hint Resolve init_hsistate_correct: wlp.
+
+Definition hsexec (f: function) (pc:node): ?? hsstate :=
+  DO path <~ some_or_fail ((fn_path f)!pc) "hsexec.internal_error.1";;
+  DO hinit <~ init_hsistate pc;;
+  DO hst <~ hsiexec_path path.(psize) f hinit;;
+  DO i <~ some_or_fail ((fn_code f)!(hst.(hsi_pc))) "hsexec.internal_error.2";;
+  DO ohst <~ hsiexec_inst i hst;;
+  match ohst with
+  | Some hst' => RET {| hinternal := hst'; hfinal := HSnone |}
+  | None => DO hsvf <~ hsexec_final i hst.(hsi_local);;
+            RET {| hinternal := hst; hfinal := hsvf |}
+  end.
+
+Lemma hsexec_correct_aux f pc:
+  WHEN hsexec f pc ~> hst THEN
+  exists st, sexec f pc = Some st /\ hsstate_refines hst st.
+Proof.
+  unfold hsstate_refines, sexec; wlp_simplify.
+  - (* Some *)
+   rewrite H; clear H.
+   exploit H0; clear H0; eauto.
+   intros (st0 & EXECPATH & SREF & DREF).
+   rewrite EXECPATH; clear EXECPATH.
+   generalize SREF. intros (EQPC & _).
+   rewrite <- EQPC, H3; clear H3.
+   exploit H4; clear H4; eauto.
+   intros (st' & EXECL & SREF' & DREF').
+   try_simplify_someHyps.
+   eexists; intuition (simpl; eauto).
+   constructor.
+  - (* None *)
+   rewrite H; clear H H4.
+   exploit H0; clear H0; eauto.
+   intros (st0 & EXECPATH & SREF & DREF).
+   rewrite EXECPATH; clear EXECPATH.
+   generalize SREF. intros (EQPC & _).
+   rewrite <- EQPC, H3; clear H3.
+   erewrite hsiexec_inst_None_correct; eauto.
+   eexists; intuition (simpl; eauto).
+Qed.
+
+Global Opaque hsexec.
+
+End CanonBuilding.
+
+(** Correction of concrete symbolic execution wrt abstract symbolic execution *)
+Theorem hsexec_correct
+  (hC_hsval : hashinfo hsval -> ?? hsval)
+  (hC_list_hsval : hashinfo list_hsval -> ?? list_hsval)
+  (hC_hsmem : hashinfo hsmem -> ?? hsmem)
+  (f : function) 
+  (pc : node):
+       WHEN hsexec hC_hsval hC_list_hsval hC_hsmem f pc ~> hst THEN forall
+        (hC_hsval_correct: forall hs,
+            WHEN hC_hsval hs ~> hs' THEN forall ge sp rs0 m0,
+                seval_sval ge sp (hsval_proj (hdata hs)) rs0 m0 =
+                seval_sval ge sp (hsval_proj hs') rs0 m0)
+        (hC_list_hsval_correct: forall lh,
+            WHEN hC_list_hsval lh ~> lh' THEN forall ge sp rs0 m0,
+              seval_list_sval ge sp (hsval_list_proj (hdata lh)) rs0 m0 =
+              seval_list_sval ge sp (hsval_list_proj lh') rs0 m0)
+         (hC_hsmem_correct: forall hm,
+            WHEN hC_hsmem hm ~> hm' THEN forall ge sp rs0 m0,
+              seval_smem ge sp (hsmem_proj (hdata hm)) rs0 m0 =
+              seval_smem ge sp (hsmem_proj hm') rs0 m0),
+         exists st : sstate, sexec f pc = Some st /\ hsstate_refines hst st.
+Proof.
+  wlp_simplify.
+  eapply hsexec_correct_aux; eauto.
+Qed.
+Local Hint Resolve hsexec_correct: wlp.
+
+(** * Implementing the simulation test with concrete hash-consed symbolic execution *)
+
+Definition phys_check {A} (x y:A) (msg: pstring): ?? unit :=
+  DO b <~ phys_eq x y;;
+  assert_b b msg;;
+  RET tt.
+
+Definition struct_check {A} (x y: A) (msg: pstring): ?? unit :=
+  DO b <~ struct_eq x y;;
+  assert_b b msg;;
+  RET tt.
+
+Lemma struct_check_correct {A} (a b: A) msg:
+  WHEN struct_check a b msg ~> _ THEN
+  a = b.
+Proof. wlp_simplify. Qed.
+Global Opaque struct_check.
+Hint Resolve struct_check_correct: wlp.
+
+Definition option_eq_check {A} (o1 o2: option A): ?? unit :=
+  match o1, o2 with
+  | Some x1, Some x2 => phys_check x1 x2 "option_eq_check: data physically differ"
+  | None, None => RET tt
+  | _, _ => FAILWITH "option_eq_check: structure differs"
+  end.
+
+Lemma option_eq_check_correct A (o1 o2: option A): WHEN option_eq_check o1 o2 ~> _ THEN o1=o2.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque option_eq_check.
+Hint Resolve option_eq_check_correct:wlp.
+
+Import PTree.
+
+Fixpoint PTree_eq_check {A} (d1 d2: PTree.t A): ?? unit :=
+  match d1, d2 with
+  | Leaf, Leaf => RET tt
+  | Node l1 o1 r1, Node l2 o2 r2 =>
+      option_eq_check o1 o2;;
+      PTree_eq_check l1 l2;;
+      PTree_eq_check r1 r2
+  | _, _ => FAILWITH "PTree_eq_check: some key is absent"
+  end.
+
+Lemma PTree_eq_check_correct A d1: forall (d2: t A),
+ WHEN PTree_eq_check d1 d2 ~> _ THEN forall x, PTree.get x d1 = PTree.get x d2.
+Proof.
+  induction d1 as [|l1 Hl1 o1 r1 Hr1]; destruct d2 as [|l2 o2 r2]; simpl; 
+  wlp_simplify. destruct x; simpl; auto.
+Qed.
+Global Opaque PTree_eq_check.
+Local Hint Resolve PTree_eq_check_correct: wlp.
+
+Fixpoint PTree_frame_eq_check {A} (frame: list positive) (d1 d2: PTree.t A): ?? unit :=
+  match frame with
+  | nil => RET tt
+  | k::l => 
+    option_eq_check (PTree.get k d1) (PTree.get k d2);;
+    PTree_frame_eq_check l d1 d2
+  end.
+
+Lemma PTree_frame_eq_check_correct A l (d1 d2: t A):
+ WHEN PTree_frame_eq_check l d1 d2 ~> _ THEN forall x, List.In x l -> PTree.get x d1 = PTree.get x d2.
+Proof.
+  induction l as [|k l]; simpl; wlp_simplify.
+  subst; auto.
+Qed.
+Global Opaque PTree_frame_eq_check.
+Local Hint Resolve PTree_frame_eq_check_correct: wlp.
+
+Definition hsilocal_simu_check hst1 hst2 : ?? unit :=
+  DEBUG("? last check");;
+  phys_check (hsi_smem hst2) (hsi_smem hst1) "hsilocal_simu_check: hsi_smem sets aren't equiv";;
+  PTree_eq_check (hsi_sreg hst1) (hsi_sreg hst2);;
+  Sets.assert_list_incl mk_hash_params (hsi_ok_lsval hst2) (hsi_ok_lsval hst1);;
+  DEBUG("=> last check: OK").
+
+Lemma hsilocal_simu_check_correct hst1 hst2:
+  WHEN hsilocal_simu_check hst1 hst2 ~> _ THEN
+  hsilocal_simu_spec None hst1 hst2.
+Proof.
+  unfold hsilocal_simu_spec; wlp_simplify. 
+Qed.
+Hint Resolve hsilocal_simu_check_correct: wlp.
+Global Opaque hsilocal_simu_check.
+
+Definition hsilocal_frame_simu_check frame hst1 hst2 : ?? unit :=
+  DEBUG("? frame check");;
+  phys_check (hsi_smem hst2) (hsi_smem hst1) "hsilocal_frame_simu_check: hsi_smem sets aren't equiv";;
+  PTree_frame_eq_check frame (hsi_sreg hst1) (hsi_sreg hst2);;
+  Sets.assert_list_incl mk_hash_params (hsi_ok_lsval hst2) (hsi_ok_lsval hst1);;
+  DEBUG("=> frame check: OK").
+
+Lemma setoid_in {A: Type} (a: A): forall l,
+  SetoidList.InA (fun x y => x = y) a l ->
+  In a l.
+Proof.
+  induction l; intros; inv H.
+  - constructor. reflexivity.
+  - right. auto.
+Qed.
+
+Lemma regset_elements_in r rs:
+  Regset.In r rs ->
+  In r (Regset.elements rs).
+Proof.
+  intros. exploit Regset.elements_1; eauto. intro SIN.
+  apply setoid_in. assumption.
+Qed.
+Local Hint Resolve regset_elements_in: core.
+
+Lemma hsilocal_frame_simu_check_correct hst1 hst2 alive:
+  WHEN hsilocal_frame_simu_check (Regset.elements alive) hst1 hst2 ~> _ THEN
+  hsilocal_simu_spec (Some alive) hst1 hst2.
+Proof.
+  unfold hsilocal_simu_spec; wlp_simplify. symmetry; eauto.
+Qed.
+Hint Resolve hsilocal_frame_simu_check_correct: wlp.
+Global Opaque hsilocal_frame_simu_check.
+
+Definition revmap_check_single (dm: PTree.t node) (n tn: node) : ?? unit :=
+  DO res <~ some_or_fail (dm ! tn) "revmap_check_single: no mapping for tn";;
+  struct_check n res "revmap_check_single: n and res are physically different".
+
+Lemma revmap_check_single_correct dm pc1 pc2:
+  WHEN revmap_check_single dm pc1 pc2 ~> _ THEN
+  dm ! pc2 = Some pc1.
+Proof.
+  wlp_simplify. congruence.
+Qed.
+Hint Resolve revmap_check_single_correct: wlp.
+Global Opaque revmap_check_single.
+
+Definition hsiexit_simu_check (dm: PTree.t node) (f: RTLpath.function) (hse1 hse2: hsistate_exit): ?? unit :=
+  struct_check (hsi_cond hse1) (hsi_cond hse2) "hsiexit_simu_check: conditions do not match";;
+  phys_check (hsi_scondargs hse1) (hsi_scondargs hse2) "hsiexit_simu_check: args do not match";;
+  revmap_check_single dm (hsi_ifso hse1) (hsi_ifso hse2);;
+  DO path <~ some_or_fail ((fn_path f) ! (hsi_ifso hse1)) "hsiexit_simu_check: internal error";;
+  hsilocal_frame_simu_check (Regset.elements path.(input_regs)) (hsi_elocal hse1) (hsi_elocal hse2).
+
+Lemma hsiexit_simu_check_correct dm f hse1 hse2:
+  WHEN hsiexit_simu_check dm f hse1 hse2 ~> _ THEN
+  hsiexit_simu_spec dm f hse1 hse2.
+Proof.
+  unfold hsiexit_simu_spec; wlp_simplify.
+Qed.
+Hint Resolve hsiexit_simu_check_correct: wlp.
+Global Opaque hsiexit_simu_check.
+
+Fixpoint hsiexits_simu_check (dm: PTree.t node) (f: RTLpath.function) (lhse1 lhse2: list hsistate_exit) :=
+  match lhse1,lhse2 with
+  | nil, nil => RET tt
+  | hse1 :: lhse1, hse2 :: lhse2 =>
+    hsiexit_simu_check dm f hse1 hse2;;
+    hsiexits_simu_check dm f lhse1 lhse2
+  | _, _ => FAILWITH "siexists_simu_check:  lengths do not match"
+  end.
+
+Lemma hsiexits_simu_check_correct dm f: forall le1 le2,
+  WHEN hsiexits_simu_check dm f le1 le2 ~> _ THEN
+  hsiexits_simu_spec dm f le1 le2.
+Proof.
+  unfold hsiexits_simu_spec; induction le1; simpl; destruct le2; wlp_simplify; constructor; eauto.
+Qed.
+Hint Resolve hsiexits_simu_check_correct: wlp.
+Global Opaque hsiexits_simu_check.
+
+Definition hsistate_simu_check (dm: PTree.t node) (f: RTLpath.function) (hst1 hst2: hsistate) :=
+  hsiexits_simu_check dm f (hsi_exits hst1) (hsi_exits hst2);;
+  hsilocal_simu_check (hsi_local hst1) (hsi_local hst2).
+
+Lemma hsistate_simu_check_correct dm f hst1 hst2:
+  WHEN hsistate_simu_check dm f hst1 hst2 ~> _ THEN
+  hsistate_simu_spec dm f hst1 hst2.
+Proof.
+  unfold hsistate_simu_spec; wlp_simplify.
+Qed.
+Hint Resolve hsistate_simu_check_correct: wlp.
+Global Opaque hsistate_simu_check.
+
+
+Fixpoint revmap_check_list (dm: PTree.t node) (ln ln': list node): ?? unit :=
+  match ln, ln' with
+  | nil, nil => RET tt
+  | n::ln, n'::ln' => 
+      revmap_check_single dm n n';;
+      revmap_check_list dm ln ln'
+  | _, _ => FAILWITH "revmap_check_list: lists have different lengths"
+  end.
+
+Lemma revmap_check_list_correct dm: forall lpc lpc',
+  WHEN revmap_check_list dm lpc lpc' ~> _ THEN
+  ptree_get_list dm lpc' = Some lpc.
+Proof.
+  induction lpc.
+  - destruct lpc'; wlp_simplify.
+  - destruct lpc'; wlp_simplify. try_simplify_someHyps.
+Qed.
+Global Opaque revmap_check_list.
+Hint Resolve revmap_check_list_correct: wlp.
+
+
+Definition svos_simu_check (svos1 svos2: hsval + ident) :=
+  match svos1, svos2 with
+  | inl sv1, inl sv2 => phys_check sv1 sv2 "svos_simu_check: sval mismatch"
+  | inr id1, inr id2 => phys_check id1 id2 "svos_simu_check: symbol mismatch"
+  | _, _ => FAILWITH "svos_simu_check: type mismatch"
+  end.
+
+Lemma svos_simu_check_correct svos1 svos2:
+  WHEN svos_simu_check svos1 svos2 ~> _ THEN
+  svos1 = svos2.
+Proof.
+  destruct svos1; destruct svos2; wlp_simplify.
+Qed.
+Global Opaque svos_simu_check.
+Hint Resolve svos_simu_check_correct: wlp.
+
+
+Fixpoint builtin_arg_simu_check (bs bs': builtin_arg hsval) :=
+  match bs with
+  | BA sv =>
+    match bs' with
+    | BA sv' => phys_check sv sv' "builtin_arg_simu_check: sval mismatch"
+    | _ => FAILWITH "builtin_arg_simu_check: BA mismatch"
+    end
+  | BA_splitlong lo hi =>
+    match bs' with
+    | BA_splitlong lo' hi' =>
+        builtin_arg_simu_check lo lo';;
+        builtin_arg_simu_check hi hi'
+    | _ => FAILWITH "builtin_arg_simu_check: BA_splitlong mismatch"
+    end
+  | BA_addptr b1 b2 =>
+    match bs' with
+    | BA_addptr b1' b2' =>
+        builtin_arg_simu_check b1 b1';;
+        builtin_arg_simu_check b2 b2'
+    | _ => FAILWITH "builtin_arg_simu_check: BA_addptr mismatch"
+    end
+  | bs => struct_check bs bs' "builtin_arg_simu_check: basic mismatch"
+  end.
+
+Lemma builtin_arg_simu_check_correct: forall bs1 bs2,
+  WHEN builtin_arg_simu_check bs1 bs2 ~> _ THEN
+  builtin_arg_map hsval_proj bs1 = builtin_arg_map hsval_proj bs2.
+Proof.
+  induction bs1.
+  all: try (wlp_simplify; subst; reflexivity).
+  all: destruct bs2; wlp_simplify; congruence.
+Qed.
+Global Opaque builtin_arg_simu_check.
+Hint Resolve builtin_arg_simu_check_correct: wlp.
+
+Fixpoint list_builtin_arg_simu_check lbs1 lbs2 :=
+  match lbs1, lbs2 with
+  | nil, nil => RET tt
+  | bs1::lbs1, bs2::lbs2 =>
+    builtin_arg_simu_check bs1 bs2;;
+    list_builtin_arg_simu_check lbs1 lbs2
+  | _, _ => FAILWITH "list_builtin_arg_simu_check: length mismatch"
+  end.
+
+Lemma list_builtin_arg_simu_check_correct: forall lbs1 lbs2,
+  WHEN list_builtin_arg_simu_check lbs1 lbs2 ~> _ THEN
+  List.map (builtin_arg_map hsval_proj) lbs1 = List.map (builtin_arg_map hsval_proj) lbs2.
+Proof.
+  induction lbs1; destruct lbs2; wlp_simplify. congruence.
+Qed.
+Global Opaque list_builtin_arg_simu_check.
+Hint Resolve list_builtin_arg_simu_check_correct: wlp.
+
+Definition sfval_simu_check (dm: PTree.t node) (f: RTLpath.function) (pc1 pc2: node) (fv1 fv2: hsfval) :=
+  match fv1, fv2 with
+  | HSnone, HSnone => revmap_check_single dm pc1 pc2
+  | HScall sig1 svos1 lsv1 res1 pc1, HScall sig2 svos2 lsv2 res2 pc2 =>
+      revmap_check_single dm pc1 pc2;;
+      phys_check sig1 sig2 "sfval_simu_check: Scall different signatures";;
+      phys_check res1 res2 "sfval_simu_check: Scall res do not match";;
+      svos_simu_check svos1 svos2;;
+      phys_check lsv1 lsv2 "sfval_simu_check: Scall args do not match"
+  | HStailcall sig1 svos1 lsv1, HStailcall sig2 svos2 lsv2 =>
+      phys_check sig1 sig2 "sfval_simu_check: Stailcall different signatures";;
+      svos_simu_check svos1 svos2;;
+      phys_check lsv1 lsv2 "sfval_simu_check: Stailcall args do not match"
+  | HSbuiltin ef1 lbs1 br1 pc1, HSbuiltin ef2 lbs2 br2 pc2 =>
+      revmap_check_single dm pc1 pc2;;
+      phys_check ef1 ef2 "sfval_simu_check: builtin ef do not match";;
+      phys_check br1 br2 "sfval_simu_check: builtin br do not match";;
+      list_builtin_arg_simu_check lbs1 lbs2
+  | HSjumptable sv ln, HSjumptable sv' ln' =>
+      revmap_check_list dm ln ln';;
+      phys_check sv sv' "sfval_simu_check: Sjumptable sval do not match"
+  | HSreturn osv1, HSreturn osv2 =>
+      option_eq_check osv1 osv2
+  | _, _ => FAILWITH "sfval_simu_check: structure mismatch"
+  end.
+
+Lemma sfval_simu_check_correct dm f opc1 opc2 fv1 fv2:
+  WHEN sfval_simu_check dm f opc1 opc2 fv1 fv2 ~> _ THEN
+  hfinal_simu_spec dm f opc1 opc2 fv1 fv2.
+Proof.
+  unfold hfinal_simu_spec; destruct fv1; destruct fv2; wlp_simplify; try congruence.
+Qed.
+Hint Resolve sfval_simu_check_correct: wlp.
+Global Opaque sfval_simu_check.
+
+Definition hsstate_simu_check (dm: PTree.t node) (f: RTLpath.function) (hst1 hst2: hsstate) :=
+  hsistate_simu_check dm f (hinternal hst1) (hinternal hst2);;
+  sfval_simu_check dm f (hsi_pc hst1) (hsi_pc hst2) (hfinal hst1) (hfinal hst2).
+
+Lemma hsstate_simu_check_correct dm f hst1 hst2:
+  WHEN hsstate_simu_check dm f hst1 hst2 ~> _ THEN
+  hsstate_simu_spec dm f hst1 hst2.
+Proof.
+  unfold hsstate_simu_spec; wlp_simplify.
+Qed.
+Hint Resolve hsstate_simu_check_correct: wlp.
+Global Opaque hsstate_simu_check. 
+
+Definition simu_check_single (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) (m: node * node): ?? unit :=
+  let (pc2, pc1) := m in
+  (* creating the hash-consing tables *)
+  DO hC_sval <~ hCons hSVAL;;
+  DO hC_list_hsval <~ hCons hLSVAL;;
+  DO hC_hsmem <~ hCons hSMEM;;
+  let hsexec := hsexec hC_sval.(hC) hC_list_hsval.(hC) hC_hsmem.(hC) in
+  (* performing the hash-consed executions *)
+  XDEBUG pc1 (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("entry-point of input superblock: " +; name_pc)%string);;
+  DO hst1 <~ hsexec f pc1;;
+  XDEBUG pc2 (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("entry-point of output superblock: " +; name_pc)%string);;
+  DO hst2 <~ hsexec tf pc2;;
+  (* comparing the executions *)
+  hsstate_simu_check dm f hst1 hst2.
+
+Lemma simu_check_single_correct dm tf f pc1 pc2:
+  WHEN simu_check_single dm f tf (pc2, pc1) ~> _ THEN
+  sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold sexec_simu; wlp_simplify.
+  exploit H2; clear H2. 1-3: wlp_simplify.
+  intros (st2 & SEXEC2 & REF2). try_simplify_someHyps.
+  exploit H3; clear H3. 1-3: wlp_simplify.
+  intros (st3 & SEXEC3 & REF3). try_simplify_someHyps.
+  eexists. split; eauto.
+  intros ctx.
+  eapply hsstate_simu_spec_correct; eauto.
+Qed.
+Global Opaque simu_check_single.
+Global Hint Resolve simu_check_single_correct: wlp.
+
+Fixpoint simu_check_rec (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) lm : ?? unit :=
+  match lm with
+  | nil => RET tt
+  | m :: lm => 
+    simu_check_single dm f tf m;;
+    simu_check_rec dm f tf lm
+  end.
+
+Lemma simu_check_rec_correct dm f tf lm:
+  WHEN simu_check_rec dm f tf lm ~> _ THEN
+  forall pc1 pc2, In (pc2, pc1) lm -> sexec_simu dm f tf pc1 pc2.
+Proof.
+  induction lm; wlp_simplify.
+  match goal with
+  | X: (_,_) = (_,_) |- _ => inversion X; subst
+  end.
+  subst; eauto.
+Qed.
+Global Opaque simu_check_rec.
+Global Hint Resolve simu_check_rec_correct: wlp.
+
+Definition imp_simu_check (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function): ?? unit :=
+   simu_check_rec dm f tf (PTree.elements dm);;
+   DEBUG("simu_check OK!").
+
+Local Hint Resolve PTree.elements_correct: core.
+Lemma imp_simu_check_correct dm f tf:
+  WHEN imp_simu_check dm f tf ~> _ THEN
+  forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque imp_simu_check.
+Global Hint Resolve imp_simu_check_correct: wlp.
+
+Program Definition aux_simu_check (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function): ?? bool :=
+   DO r <~ 
+     (TRY 
+       imp_simu_check dm f tf;; 
+       RET true
+      CATCH_FAIL s, _ =>
+       println ("simu_check_failure:" +; s);;
+       RET false
+      ENSURE (fun b => b=true -> forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2));;
+   RET (`r).
+Obligation 1.
+  split; wlp_simplify. discriminate.
+Qed.
+
+Lemma aux_simu_check_correct dm f tf:
+  WHEN aux_simu_check dm f tf ~> b THEN
+  b=true -> forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold aux_simu_check; wlp_simplify.
+  destruct exta; simpl; auto.
+Qed.
+
+(* Coerce aux_simu_check into a pure function (this is a little unsafe like all oracles in CompCert). *)
+
+Import UnsafeImpure.
+
+Definition simu_check (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) : res unit := 
+  match unsafe_coerce (aux_simu_check dm f tf) with
+  | Some true => OK tt
+  | _ => Error (msg "simu_check has failed")
+  end.
+
+Lemma simu_check_correct dm f tf:
+  simu_check dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold simu_check.
+  destruct (unsafe_coerce (aux_simu_check dm f tf)) as [[|]|] eqn:Hres; simpl; try discriminate.
+  intros; eapply aux_simu_check_correct; eauto.
+  eapply unsafe_coerce_not_really_correct; eauto.
+Qed.
+\ No newline at end of file
diff --git a/scheduling/RTLpathSE_simu_specs.v b/scheduling/RTLpathSE_simu_specs.v
new file mode 100644
index 00000000..c9e272c0
--- /dev/null
+++ b/scheduling/RTLpathSE_simu_specs.v
@@ -0,0 +1,889 @@
+(** Low-level specifications of the simulation tests by symbolic execution with hash-consing *)
+
+Require Import Coqlib Maps Floats.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL RTLpath.
+Require Import Errors.
+Require Import RTLpathSE_theory RTLpathLivegenproof.
+Require Import Axioms.
+
+Local Open Scope error_monad_scope.
+Local Open Scope option_monad_scope.
+
+Require Export Impure.ImpHCons.
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+(** * Auxilary notions on simulation tests *)
+
+Definition silocal_simu (dm: PTree.t node) (f: RTLpath.function) (sl1 sl2: sistate_local) (ctx: simu_proof_context f): Prop :=
+    forall is1, ssem_local (the_ge1 ctx) (the_sp ctx) sl1 (the_rs0 ctx) (the_m0 ctx) (irs is1) (imem is1) ->
+    exists is2, ssem_local (the_ge2 ctx) (the_sp ctx) sl2 (the_rs0 ctx) (the_m0 ctx) (irs is2) (imem is2)
+                /\ istate_simu f dm is1 is2.
+
+(* a kind of negation of sabort_local *)
+Definition sok_local (ge: RTL.genv) (sp:val) (rs0: regset) (m0: mem) (st: sistate_local): Prop :=
+  (st.(si_pre) ge sp rs0 m0)
+  /\ seval_smem ge sp st.(si_smem) rs0 m0 <> None
+  /\ forall (r: reg), seval_sval ge sp (si_sreg st r) rs0 m0 <> None.
+
+Lemma ssem_local_sok ge sp rs0 m0 st rs m:
+  ssem_local ge sp st rs0 m0 rs m -> sok_local ge sp rs0 m0 st.
+Proof.
+  unfold sok_local, ssem_local. 
+  intuition congruence.
+Qed.
+
+Definition siexit_simu (dm: PTree.t node) (f: RTLpath.function) (ctx: simu_proof_context f) (se1 se2: sistate_exit) :=
+  (sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal se1) ->
+          (seval_condition (the_ge1 ctx) (the_sp ctx) (si_cond se1) (si_scondargs se1) 
+                             (si_smem (si_elocal se1)) (the_rs0 ctx) (the_m0 ctx)) =
+          (seval_condition (the_ge2 ctx) (the_sp ctx) (si_cond se2) (si_scondargs se2)
+                             (si_smem (si_elocal se2)) (the_rs0 ctx) (the_m0 ctx)))
+  /\ forall is1,
+      icontinue is1 = false ->
+      ssem_exit (the_ge1 ctx) (the_sp ctx) se1 (the_rs0 ctx) (the_m0 ctx) (irs is1) (imem is1) (ipc is1) ->
+      exists is2,
+          ssem_exit (the_ge2 ctx) (the_sp ctx) se2 (the_rs0 ctx) (the_m0 ctx) (irs is2) (imem is2) (ipc is2)
+      /\  istate_simu f dm is1 is2.
+
+Definition siexits_simu (dm: PTree.t node) (f: RTLpath.function) (lse1 lse2: list sistate_exit) (ctx: simu_proof_context f) :=
+  list_forall2 (siexit_simu dm f ctx) lse1 lse2.
+
+
+(** * Implementation of Data-structure use in Hash-consing *)
+
+(** ** Implementation of symbolic values/symbolic memories with hash-consing data *)
+
+Inductive hsval :=
+  | HSinput (r: reg) (hid: hashcode)
+  | HSop (op: operation) (lhsv: list_hsval) (hid: hashcode) (** NB: does not depend on the memory ! *)
+  | HSload (hsm: hsmem) (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (hid: hashcode)
+with list_hsval :=
+  | HSnil (hid: hashcode)
+  | HScons (hsv: hsval) (lhsv: list_hsval) (hid: hashcode)
+with hsmem :=
+  | HSinit (hid: hashcode)
+  | HSstore (hsm: hsmem) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (srce: hsval) (hid:hashcode).
+
+Scheme hsval_mut := Induction for hsval Sort Prop
+with list_hsval_mut := Induction for list_hsval Sort Prop
+with hsmem_mut := Induction for hsmem Sort Prop.
+
+
+
+(** Symbolic final value -- from hash-consed values
+  It does not seem useful to hash-consed these final values (because they are final).
+*)
+Inductive hsfval :=
+  | HSnone
+  | HScall (sig: signature) (svos: hsval + ident) (lsv: list_hsval) (res: reg) (pc: node)
+  | HStailcall (sig: signature) (svos: hsval + ident) (lsv: list_hsval)
+  | HSbuiltin (ef: external_function) (sargs: list (builtin_arg hsval)) (res: builtin_res reg) (pc: node)
+  | HSjumptable (sv: hsval) (tbl: list node)
+  | HSreturn (res: option hsval)
+.
+
+(** * gives the semantics of hash-consed symbolic values *)
+Fixpoint hsval_proj hsv :=
+  match hsv with
+  | HSinput r _ => Sinput r
+  | HSop op hl _ => Sop op (hsval_list_proj hl) Sinit (** NB: use the initial memory of the path ! *)
+  | HSload hm t chk addr hl _ => Sload (hsmem_proj hm) t chk addr (hsval_list_proj hl)
+  end
+with hsval_list_proj hl :=
+  match hl with
+  | HSnil _ => Snil
+  | HScons hv hl _ => Scons (hsval_proj hv) (hsval_list_proj hl)
+  end
+with hsmem_proj hm :=
+  match hm with
+  | HSinit _ => Sinit
+  | HSstore hm chk addr hl hv _ => Sstore (hsmem_proj hm) chk addr (hsval_list_proj hl) (hsval_proj hv)
+  end.
+
+Declare Scope hse.
+Local Open Scope hse.
+
+
+(** We use a Notation instead a Definition, in order to get more automation "for free" *)
+Notation "'seval_hsval' ge sp hsv" := (seval_sval ge sp (hsval_proj hsv))
+  (only parsing, at level 0, ge at next level, sp at next level, hsv at next level): hse.
+Notation "'seval_list_hsval' ge sp lhv" := (seval_list_sval ge sp (hsval_list_proj lhv))
+  (only parsing, at level 0, ge at next level, sp at next level, lhv at next level): hse.
+Notation "'seval_hsmem' ge sp hsm" := (seval_smem ge sp (hsmem_proj hsm))
+  (only parsing, at level 0, ge at next level, sp at next level, hsm at next level): hse.
+
+Notation "'sval_refines' ge sp rs0 m0 hv sv" := (seval_hsval ge sp hv rs0 m0 = seval_sval ge sp sv rs0 m0)
+  (only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, hv at next level, sv at next level): hse.
+Notation "'list_sval_refines' ge sp rs0 m0 lhv lsv" := (seval_list_hsval ge sp lhv rs0 m0 = seval_list_sval ge sp lsv rs0 m0)
+  (only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, lhv at next level, lsv at next level): hse.
+Notation "'smem_refines' ge sp rs0 m0 hm sm" := (seval_hsmem ge sp hm rs0 m0 = seval_smem ge sp sm rs0 m0)
+  (only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, hm at next level, sm at next level): hse.
+
+
+(** ** Implementation of symbolic states (with hash-consing) *)
+
+(** *** Syntax and semantics of symbolic internal local states 
+
+The semantics is given by the refinement relation [hsilocal_refines] wrt to (abstract) symbolic internal local states
+
+*)
+
+(* NB: "h" stands for hash-consing *)
+Record hsistate_local := 
+  { 
+    (** [hsi_smem] represents the current smem symbolic evaluations.
+        (we also recover the history of smem in hsi_smem)  *)
+    hsi_smem:> hsmem;
+    (** For the values in registers:
+        1) we store a list of sval evaluations
+        2) we encode the symbolic regset by a PTree *)
+    hsi_ok_lsval: list hsval;
+    hsi_sreg:> PTree.t hsval
+  }.
+
+Definition hsi_sreg_proj (hst: PTree.t hsval) r: sval :=
+   match PTree.get r hst with
+   | None => Sinput r
+   | Some hsv => hsval_proj hsv
+   end.
+
+Definition hsi_sreg_eval ge sp hst r := seval_sval ge sp (hsi_sreg_proj hst r).
+
+Definition hsok_local ge sp rs0 m0 (hst: hsistate_local) : Prop :=
+     (forall hsv, List.In hsv (hsi_ok_lsval hst) -> seval_hsval ge sp hsv rs0 m0 <> None)
+  /\ (seval_hsmem ge sp (hst.(hsi_smem)) rs0 m0 <> None).
+
+(* refinement link between a (st: sistate_local) and (hst: hsistate_local) *)
+Definition hsilocal_refines ge sp rs0 m0 (hst: hsistate_local) (st: sistate_local) :=
+      (sok_local ge sp rs0 m0 st <-> hsok_local ge sp rs0 m0 hst)
+  /\  (hsok_local ge sp rs0 m0 hst -> smem_refines ge sp rs0 m0 (hsi_smem hst) (st.(si_smem)))
+  /\  (hsok_local ge sp rs0 m0 hst -> forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (si_sreg st r) rs0 m0)
+  /\  (* the below invariant allows to evaluate operations in the initial memory of the path instead of the current memory *)
+      (forall m b ofs, seval_smem ge sp st.(si_smem) rs0 m0 = Some m -> Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  .
+
+(** *** Syntax and semantics of symbolic exit states *)
+Record hsistate_exit := mk_hsistate_exit
+  { hsi_cond: condition; hsi_scondargs: list_hsval; hsi_elocal: hsistate_local; hsi_ifso: node }.
+
+(** NB: we split the refinement relation between a "static" part -- independendent of the initial context
+   and a "dynamic" part -- that depends on it
+*)
+Definition hsiexit_refines_stat (hext: hsistate_exit) (ext: sistate_exit): Prop :=
+  hsi_ifso hext = si_ifso ext.
+
+Definition hseval_condition ge sp cond hcondargs hmem rs0 m0 :=
+  seval_condition ge sp cond (hsval_list_proj hcondargs) (hsmem_proj hmem) rs0 m0.
+
+Lemma hseval_condition_preserved ge ge' sp cond args mem rs0 m0:
+  (forall s : ident, Genv.find_symbol ge' s = Genv.find_symbol ge s) ->
+  hseval_condition ge sp cond args mem rs0 m0 = hseval_condition ge' sp cond args mem rs0 m0.
+Proof.
+  intros. unfold hseval_condition. erewrite seval_condition_preserved; [|eapply H].
+  reflexivity.
+Qed.
+
+Definition hsiexit_refines_dyn ge sp rs0 m0 (hext: hsistate_exit) (ext: sistate_exit): Prop :=
+   hsilocal_refines ge sp rs0 m0 (hsi_elocal hext) (si_elocal ext)
+   /\ (hsok_local ge sp rs0 m0 (hsi_elocal hext) ->
+        hseval_condition ge sp (hsi_cond hext) (hsi_scondargs hext) (hsi_smem (hsi_elocal hext)) rs0 m0
+         = seval_condition ge sp (si_cond ext) (si_scondargs ext) (si_smem (si_elocal ext)) rs0 m0).
+
+Definition hsiexits_refines_stat lhse lse :=
+  list_forall2 hsiexit_refines_stat lhse lse.
+
+Definition hsiexits_refines_dyn ge sp rs0 m0 lhse se :=
+  list_forall2 (hsiexit_refines_dyn ge sp rs0 m0) lhse se.
+
+
+(** *** Syntax and Semantics of symbolic internal state *)
+
+Record hsistate := { hsi_pc: node; hsi_exits: list hsistate_exit; hsi_local: hsistate_local }.
+
+(* expresses the "monotony" of sok_local along sequences *)
+Inductive nested_sok ge sp rs0 m0: sistate_local -> list sistate_exit -> Prop :=
+    nsok_nil st: nested_sok ge sp rs0 m0 st nil
+  | nsok_cons st se lse:
+     (sok_local ge sp rs0 m0 st -> sok_local ge sp rs0 m0 (si_elocal se)) ->
+     nested_sok ge sp rs0 m0 (si_elocal se) lse ->
+     nested_sok ge sp rs0 m0 st (se::lse).
+
+Lemma nested_sok_prop ge sp st sle rs0 m0:
+  nested_sok ge sp rs0 m0 st sle ->
+  sok_local ge sp rs0 m0 st ->
+  forall se, In se sle -> sok_local ge sp rs0 m0 (si_elocal se).
+Proof.
+  induction 1; simpl; intuition (subst; eauto).
+Qed.
+
+Lemma nested_sok_elocal ge sp rs0 m0 st2 exits:
+  nested_sok ge sp rs0 m0 st2 exits ->
+  forall st1, (sok_local ge sp rs0 m0 st1 -> sok_local ge sp rs0 m0 st2) ->
+  nested_sok ge sp rs0 m0 st1 exits.
+Proof.
+  induction 1; [intros; constructor|].
+  intros. constructor; auto.
+Qed.
+
+Lemma nested_sok_tail ge sp rs0 m0 st lx exits:
+  is_tail lx exits ->
+  nested_sok ge sp rs0 m0 st exits ->
+  nested_sok ge sp rs0 m0 st lx.
+Proof.
+  induction 1; [auto|].
+  intros. inv H0. eapply IHis_tail. eapply nested_sok_elocal; eauto.
+Qed.
+
+Definition hsistate_refines_stat (hst: hsistate) (st:sistate): Prop :=
+  hsi_pc hst = si_pc st
+  /\ hsiexits_refines_stat (hsi_exits hst) (si_exits st).
+
+Definition hsistate_refines_dyn ge sp rs0 m0 (hst: hsistate) (st:sistate): Prop :=
+     hsiexits_refines_dyn ge sp rs0 m0 (hsi_exits hst) (si_exits st)
+  /\ hsilocal_refines ge sp rs0 m0 (hsi_local hst) (si_local st)
+  /\ nested_sok ge sp rs0 m0 (si_local st) (si_exits st) (* invariant necessary to prove "monotony" of sok_local along execution *)
+  .
+
+(** *** Syntax and Semantics of symbolic state *)
+
+Definition hfinal_proj (hfv: hsfval) : sfval := 
+  match hfv with
+  | HSnone => Snone
+  | HScall s hvi hlv r pc => Scall s (sum_left_map hsval_proj hvi) (hsval_list_proj hlv) r pc
+  | HStailcall s hvi hlv => Stailcall s (sum_left_map hsval_proj hvi) (hsval_list_proj hlv)
+  | HSbuiltin ef lbh br pc => Sbuiltin ef (List.map (builtin_arg_map hsval_proj) lbh) br pc
+  | HSjumptable hv ln => Sjumptable (hsval_proj hv) ln
+  | HSreturn oh => Sreturn (option_map hsval_proj oh)
+  end.
+
+Section HFINAL_REFINES.
+
+Variable ge: RTL.genv.
+Variable sp: val.
+Variable rs0: regset.
+Variable m0: mem.
+
+Definition option_refines (ohsv: option hsval) (osv: option sval) :=
+  match ohsv, osv with
+  | Some hsv, Some sv => sval_refines ge sp rs0 m0 hsv sv
+  | None, None => True
+  | _, _ => False
+  end.
+
+Definition sum_refines (hsi: hsval + ident) (si: sval + ident) :=
+  match hsi, si with
+  | inl hv, inl sv => sval_refines ge sp rs0 m0 hv sv
+  | inr id, inr id' => id = id'
+  | _, _ => False
+  end.
+
+Definition bargs_refines (hargs: list (builtin_arg hsval)) (args: list (builtin_arg sval)): Prop :=
+  seval_list_builtin_sval ge sp (List.map (builtin_arg_map hsval_proj) hargs) rs0 m0 = seval_list_builtin_sval ge sp args rs0 m0.
+
+Inductive hfinal_refines: hsfval -> sfval -> Prop :=
+  | hsnone_ref: hfinal_refines HSnone Snone
+  | hscall_ref: forall hros ros hargs args s r pc,
+      sum_refines hros ros ->
+      list_sval_refines ge sp rs0 m0 hargs args ->
+      hfinal_refines (HScall s hros hargs r pc) (Scall s ros args r pc)
+  | hstailcall_ref: forall hros ros hargs args s,
+      sum_refines hros ros ->
+      list_sval_refines ge sp rs0 m0 hargs args ->
+      hfinal_refines (HStailcall s hros hargs) (Stailcall s ros args)
+  | hsbuiltin_ref: forall ef lbha lba br pc,
+      bargs_refines lbha lba ->
+      hfinal_refines (HSbuiltin ef lbha br pc) (Sbuiltin ef lba br pc)
+  | hsjumptable_ref: forall hsv sv lpc,
+      sval_refines ge sp rs0 m0 hsv sv -> hfinal_refines (HSjumptable hsv lpc) (Sjumptable sv lpc)
+  | hsreturn_ref: forall ohsv osv,
+      option_refines ohsv osv -> hfinal_refines (HSreturn ohsv) (Sreturn osv).
+
+End HFINAL_REFINES.
+
+
+Record hsstate := { hinternal:> hsistate; hfinal: hsfval }.
+
+Definition hsstate_refines (hst: hsstate) (st:sstate): Prop :=
+   hsistate_refines_stat (hinternal hst) (internal st)
+  /\ (forall ge sp rs0 m0, hsistate_refines_dyn ge sp rs0 m0 (hinternal hst) (internal st))
+  /\ (forall ge sp rs0 m0, hsok_local ge sp rs0 m0 (hsi_local (hinternal hst)) -> hfinal_refines ge sp rs0 m0 (hfinal hst) (final st))
+  .
+
+(** * Intermediate specifications of the simulation tests *)
+
+(** ** Specification of the simulation test on [hsistate_local].
+       It is motivated by [hsilocal_simu_spec_correct theorem] below
+*)
+Definition hsilocal_simu_spec (oalive: option Regset.t) (hst1 hst2: hsistate_local) :=
+     List.incl (hsi_ok_lsval hst2) (hsi_ok_lsval hst1)
+  /\ (forall r, (match oalive with Some alive => Regset.In r alive | _ => True end) -> PTree.get r hst2 = PTree.get r hst1)
+  /\ hsi_smem hst1 = hsi_smem hst2.
+
+Definition seval_sval_partial ge sp rs0 m0 hsv :=
+  match seval_hsval ge sp hsv rs0 m0 with
+  | Some v => v
+  | None => Vundef
+  end.
+
+Definition select_first (ox oy: option val) :=
+  match ox with
+  | Some v => Some v
+  | None => oy
+  end.
+
+(** If the register was computed by hrs, evaluate the symbolic value from hrs.
+    Else, take the value directly from rs0 *)
+Definition seval_partial_regset ge sp rs0 m0 hrs :=
+  let hrs_eval := PTree.map1 (seval_sval_partial ge sp rs0 m0) hrs in
+  (fst rs0, PTree.combine select_first hrs_eval (snd rs0)).
+
+Lemma seval_partial_regset_get ge sp rs0 m0 hrs r:
+  (seval_partial_regset ge sp rs0 m0 hrs) # r =
+  match (hrs ! r) with Some sv => seval_sval_partial ge sp rs0 m0 sv | None => (rs0 # r) end.
+Proof.
+  unfold seval_partial_regset. unfold Regmap.get. simpl.
+  rewrite PTree.gcombine; [| simpl; reflexivity]. rewrite PTree.gmap1.
+  destruct (hrs ! r); simpl; [reflexivity|].
+  destruct ((snd rs0) ! r); reflexivity.
+Qed.
+
+Lemma ssem_local_refines_hok ge sp rs0 m0 hst st rs m:
+  ssem_local ge sp st rs0 m0 rs m -> hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst.
+Proof.
+  intros H0 (H1 & _ & _). apply H1. eapply ssem_local_sok. eauto.
+Qed.
+
+Lemma hsilocal_simu_spec_nofail ge1 ge2 of sp rs0 m0 hst1 hst2:
+  hsilocal_simu_spec of hst1 hst2 ->
+  (forall s, Genv.find_symbol ge1 s = Genv.find_symbol ge2 s) ->
+  hsok_local ge1 sp rs0 m0 hst1 ->
+  hsok_local ge2 sp rs0 m0 hst2.
+Proof.
+  intros (RSOK & _ & MEMOK) GFS (OKV & OKM). constructor.
+  - intros sv INS. apply RSOK in INS. apply OKV in INS. erewrite seval_preserved; eauto.
+  - erewrite MEMOK in OKM. erewrite smem_eval_preserved; eauto.
+Qed.
+
+Theorem hsilocal_simu_spec_correct hst1 hst2 of ge1 ge2 sp rs0 m0 rs m st1 st2:
+  hsilocal_simu_spec of hst1 hst2 ->
+  hsilocal_refines ge1 sp rs0 m0 hst1 st1 ->
+  hsilocal_refines ge2 sp rs0 m0 hst2 st2 ->
+  (forall s, Genv.find_symbol ge1 s = Genv.find_symbol ge2 s) ->
+  ssem_local ge1 sp st1 rs0 m0 rs m ->
+  match of with
+  | None => ssem_local ge2 sp st2 rs0 m0 rs m
+  | Some alive => 
+      let rs' := seval_partial_regset ge2 sp rs0 m0 (hsi_sreg hst2)
+      in ssem_local ge2 sp st2 rs0 m0 rs' m /\ eqlive_reg (fun r => Regset.In r alive) rs rs'
+  end.
+Proof.
+  intros CORE HREF1 HREF2 GFS SEML.
+  refine (modusponens _ _ (ssem_local_refines_hok _ _ _ _ _ _ _ _ _ _) _); eauto.
+  intro HOK1.
+  refine (modusponens _ _ (hsilocal_simu_spec_nofail _ _ _ _ _ _ _ _ _ _ _) _); eauto.
+  intro HOK2.
+  destruct SEML as (PRE & MEMEQ & RSEQ).
+  assert (SIPRE: si_pre st2 ge2 sp rs0 m0). { destruct HREF2 as (OKEQ & _ & _). rewrite <- OKEQ in HOK2. apply HOK2. }
+  assert (SMEMEVAL: seval_smem ge2 sp (si_smem st2) rs0 m0 = Some m). {
+    destruct HREF2 as (_ & MEMEQ2 & _). destruct HREF1 as (_ & MEMEQ1 & _).
+    destruct CORE as (_ & _ & MEMEQ3).
+    rewrite <- MEMEQ2; auto. rewrite <- MEMEQ3.
+    erewrite smem_eval_preserved; [| eapply GFS].
+    rewrite MEMEQ1; auto. }
+  destruct of as [alive |].
+  - constructor.
+    + constructor; [assumption | constructor; [assumption|]].
+      destruct HREF2 as (B & _ & A & _).
+      (** B is used for the auto below. *)
+      assert (forall r : positive, hsi_sreg_eval ge2 sp hst2 r rs0 m0 = seval_sval ge2 sp (si_sreg st2 r) rs0 m0) by auto.
+      intro r. rewrite <- H. clear H. 
+      generalize (A HOK2 r). unfold hsi_sreg_eval.
+      rewrite seval_partial_regset_get.
+      unfold hsi_sreg_proj.
+      destruct (hst2 ! r) eqn:HST2; [| simpl; reflexivity].
+      unfold seval_sval_partial. generalize HOK2; rewrite <- B; intros (_ & _ & C) D.
+      assert (seval_sval ge2 sp (hsval_proj h) rs0 m0 <> None) by congruence.
+      destruct (seval_sval ge2 sp _ rs0 m0); [reflexivity | contradiction].
+    + intros r ALIVE. destruct HREF2 as (_ & _ & A & _). destruct HREF1 as (_ & _ & B & _).
+      destruct CORE as (_ & C & _). rewrite seval_partial_regset_get.
+      assert (OPT: forall (x y: val), Some x = Some y -> x = y) by congruence.
+      destruct (hst2 ! r) eqn:HST2; apply OPT; clear OPT.
+      ++ unfold seval_sval_partial.
+         assert (seval_sval ge2 sp (hsval_proj h) rs0 m0 = hsi_sreg_eval ge2 sp hst2 r rs0 m0). {
+           unfold hsi_sreg_eval, hsi_sreg_proj. rewrite HST2. reflexivity. }
+         rewrite H. clear H. unfold hsi_sreg_eval, hsi_sreg_proj. rewrite C; [|assumption].
+         erewrite seval_preserved; [| eapply GFS].
+         unfold hsi_sreg_eval, hsi_sreg_proj in B; rewrite B; [|assumption]. rewrite RSEQ. reflexivity.
+      ++ rewrite <- RSEQ. rewrite <- B; [|assumption]. unfold hsi_sreg_eval, hsi_sreg_proj.
+         rewrite <- C; [|assumption]. rewrite HST2. reflexivity.
+  - constructor; [|constructor].
+    + destruct HREF2 as (OKEQ & _ & _). rewrite <- OKEQ in HOK2. apply HOK2.
+    + destruct HREF2 as (_ & MEMEQ2 & _). destruct HREF1 as (_ & MEMEQ1 & _).
+      destruct CORE as (_ & _ & MEMEQ3).
+      rewrite <- MEMEQ2; auto. rewrite <- MEMEQ3.
+      erewrite smem_eval_preserved; [| eapply GFS].
+      rewrite MEMEQ1; auto.
+    + intro r. destruct HREF2 as (_ & _ & A & _). destruct HREF1 as (_ & _ & B & _).
+      destruct CORE as (_ & C & _). rewrite <- A; auto. unfold hsi_sreg_eval, hsi_sreg_proj.
+      rewrite C; [|auto]. erewrite seval_preserved; [| eapply GFS].
+      unfold hsi_sreg_eval, hsi_sreg_proj in B; rewrite B; auto.
+Qed.
+
+(** ** Specification of the simulation test on [hsistate_exit].
+       It is motivated by [hsiexit_simu_spec_correct theorem] below
+*)
+Definition hsiexit_simu_spec dm f (hse1 hse2: hsistate_exit) :=
+  (exists path, (fn_path f) ! (hsi_ifso hse1) = Some path
+    /\ hsilocal_simu_spec (Some path.(input_regs)) (hsi_elocal hse1) (hsi_elocal hse2))
+  /\ dm ! (hsi_ifso hse2) = Some (hsi_ifso hse1)
+  /\ hsi_cond hse1 = hsi_cond hse2
+  /\ hsi_scondargs hse1 = hsi_scondargs hse2.
+
+Definition hsiexit_simu dm f (ctx: simu_proof_context f) hse1 hse2: Prop := forall se1 se2,
+  hsiexit_refines_stat hse1 se1 ->
+  hsiexit_refines_stat hse2 se2 ->
+  hsiexit_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse1 se1 ->
+  hsiexit_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse2 se2 ->
+  siexit_simu dm f ctx se1 se2.
+
+Lemma hsiexit_simu_spec_nofail dm f hse1 hse2 ge1 ge2 sp rs m:
+  hsiexit_simu_spec dm f hse1 hse2 ->
+  (forall s, Genv.find_symbol ge1 s = Genv.find_symbol ge2 s) ->
+  hsok_local ge1 sp rs m (hsi_elocal hse1) ->
+  hsok_local ge2 sp rs m (hsi_elocal hse2).
+Proof.
+  intros CORE GFS HOK1.
+  destruct CORE as ((p & _ & CORE') & _ & _ & _).
+  eapply hsilocal_simu_spec_nofail; eauto.
+Qed.
+
+Theorem hsiexit_simu_spec_correct dm f hse1 hse2 ctx:
+  hsiexit_simu_spec dm f hse1 hse2 ->
+  hsiexit_simu dm f ctx hse1 hse2.
+Proof.
+  intros SIMUC st1 st2 HREF1 HREF2 HDYN1 HDYN2.
+  assert (SEVALC:
+   sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal st1) ->
+    (seval_condition (the_ge1 ctx) (the_sp ctx) (si_cond st1) (si_scondargs st1) (si_smem (si_elocal st1)) 
+      (the_rs0 ctx) (the_m0 ctx)) =
+    (seval_condition (the_ge2 ctx) (the_sp ctx) (si_cond st2) (si_scondargs st2) (si_smem (si_elocal st2)) 
+      (the_rs0 ctx) (the_m0 ctx))).
+  { destruct HDYN1 as ((OKEQ1 & _) & SCOND1).
+    rewrite OKEQ1; intro OK1. rewrite <- SCOND1 by assumption. clear SCOND1.
+    generalize (genv_match ctx).
+    intro GFS; exploit hsiexit_simu_spec_nofail; eauto.
+    destruct HDYN2 as (_ & SCOND2). intro OK2. rewrite <- SCOND2 by assumption. clear OK1 OK2 SCOND2.
+    destruct SIMUC as ((path & _ & LSIMU) & _ & CONDEQ & ARGSEQ). destruct LSIMU as (_ & _ & MEMEQ).
+    rewrite CONDEQ. rewrite ARGSEQ. rewrite MEMEQ. erewrite <- hseval_condition_preserved; eauto.
+  }
+  constructor; [assumption|]. intros is1 ICONT SSEME.
+  assert (OK1: sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal st1)). {
+    destruct SSEME as (_ & SSEML & _). eapply ssem_local_sok; eauto. }
+  assert (HOK1: hsok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (hsi_elocal hse1)). {
+    destruct HDYN1 as (LREF & _). destruct LREF as (OKEQ & _ & _). rewrite <- OKEQ. assumption. }
+  exploit hsiexit_simu_spec_nofail. 2: eapply ctx. all: eauto. intro HOK2.
+  destruct SSEME as (SCOND & SLOC & PCEQ). destruct SIMUC as ((path & PATH & LSIMU) & REVEQ & _ & _); eauto.
+  destruct HDYN1 as (LREF1 & _). destruct HDYN2 as (LREF2 & _).
+  exploit hsilocal_simu_spec_correct; eauto; [apply ctx|]. simpl.
+  intros (SSEML & EQREG).
+  eexists (mk_istate (icontinue is1) (si_ifso st2) _ (imem is1)). simpl. constructor.
+  - constructor; intuition congruence || eauto.
+  - unfold istate_simu. rewrite ICONT.
+    simpl. assert (PCEQ': hsi_ifso hse1 = ipc is1) by congruence.
+    exists path. constructor; [|constructor]; [congruence| |congruence].
+    constructor; [|constructor]; simpl; auto.
+Qed.
+
+Remark hsiexit_simu_siexit dm f ctx hse1 hse2 se1 se2:
+  hsiexit_simu dm f ctx hse1 hse2 ->
+  hsiexit_refines_stat hse1 se1 ->
+  hsiexit_refines_stat hse2 se2 ->
+  hsiexit_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse1 se1 ->
+  hsiexit_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse2 se2 ->
+  siexit_simu dm f ctx se1 se2.
+Proof.
+  auto.
+Qed.
+
+(** ** Specification of the simulation test on [list hsistate_exit].
+       It is motivated by [hsiexit_simu_spec_correct theorem] below
+*)
+
+Definition hsiexits_simu dm f (ctx: simu_proof_context f) (lhse1 lhse2: list hsistate_exit): Prop :=
+  list_forall2 (hsiexit_simu dm f ctx) lhse1 lhse2.
+
+Definition hsiexits_simu_spec dm f lhse1 lhse2: Prop :=
+  list_forall2 (hsiexit_simu_spec dm f) lhse1 lhse2.
+
+Theorem hsiexits_simu_spec_correct dm f lhse1 lhse2 ctx:
+  hsiexits_simu_spec dm f lhse1 lhse2 ->
+  hsiexits_simu dm f ctx lhse1 lhse2.
+Proof.
+  induction 1; [constructor|].
+  constructor; [|apply IHlist_forall2; assumption].
+  apply hsiexit_simu_spec_correct; assumption.
+Qed.
+
+
+Lemma siexits_simu_all_fallthrough dm f ctx: forall lse1 lse2,
+  siexits_simu dm f lse1 lse2 ctx ->
+  all_fallthrough (the_ge1 ctx) (the_sp ctx) lse1 (the_rs0 ctx) (the_m0 ctx) ->
+  (forall se1, In se1 lse1 -> sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal se1)) ->
+  all_fallthrough (the_ge2 ctx) (the_sp ctx) lse2 (the_rs0 ctx) (the_m0 ctx).
+Proof.
+  induction 1; [unfold all_fallthrough; contradiction|]; simpl.
+  intros X OK ext INEXT. eapply all_fallthrough_revcons in X. destruct X as (SEVAL & ALLFU).
+  apply IHlist_forall2 in ALLFU.
+  - destruct H as (CONDSIMU & _).
+    inv INEXT; [|eauto].
+    erewrite <- CONDSIMU; eauto.
+  - intros; intuition.
+Qed.
+
+
+Lemma siexits_simu_all_fallthrough_upto dm f ctx lse1 lse2:
+  siexits_simu dm f lse1 lse2 ctx ->
+  forall ext1 lx1,
+  (forall se1, In se1 lx1 -> sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal se1)) ->
+  all_fallthrough_upto_exit (the_ge1 ctx) (the_sp ctx) ext1 lx1 lse1 (the_rs0 ctx) (the_m0 ctx) ->
+  exists ext2 lx2,
+    all_fallthrough_upto_exit (the_ge2 ctx) (the_sp ctx) ext2 lx2 lse2 (the_rs0 ctx) (the_m0 ctx)
+  /\ length lx1 = length lx2.
+Proof.
+  induction 1.
+  - intros ext lx1. intros OK H. destruct H as (ITAIL & ALLFU). eapply is_tail_false in ITAIL. contradiction.
+  - simpl; intros ext lx1 OK ALLFUE.
+    destruct ALLFUE as (ITAIL & ALLFU). inv ITAIL.
+    + eexists; eexists.
+      constructor; [| eapply list_forall2_length; eauto].
+      constructor; [econstructor | eapply siexits_simu_all_fallthrough; eauto].
+    + exploit IHlist_forall2.
+      * intuition. apply OK. eassumption.
+      * constructor; eauto.
+      * intros (ext2 & lx2 & ALLFUE2 & LENEQ).
+        eexists; eexists. constructor; eauto.
+        eapply all_fallthrough_upto_exit_cons; eauto.
+Qed.
+
+
+Lemma hsiexits_simu_siexits dm f ctx lhse1 lhse2:
+  hsiexits_simu dm f ctx lhse1 lhse2 ->
+  forall lse1 lse2,
+  hsiexits_refines_stat lhse1 lse1 ->
+  hsiexits_refines_stat lhse2 lse2 ->
+  hsiexits_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) lhse1 lse1 ->
+  hsiexits_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) lhse2 lse2 ->
+  siexits_simu dm f lse1 lse2 ctx.
+Proof.
+  induction 1.
+  - intros. inv H. inv H0. constructor.
+  - intros lse1 lse2 SREF1 SREF2 DREF1 DREF2. inv SREF1. inv SREF2. inv DREF1. inv DREF2.
+    constructor; [| eapply IHlist_forall2; eauto].
+    eapply hsiexit_simu_siexit; eauto.
+Qed.
+
+
+(** ** Specification of the simulation test on [hsistate].
+       It is motivated by [hsistate_simu_spec_correct theorem] below
+*)
+
+Definition hsistate_simu_spec dm f (hse1 hse2: hsistate) :=
+     list_forall2 (hsiexit_simu_spec dm f) (hsi_exits hse1) (hsi_exits hse2)
+  /\ hsilocal_simu_spec None (hsi_local hse1) (hsi_local hse2).
+
+Definition hsistate_simu dm f (hst1 hst2: hsistate) (ctx: simu_proof_context f): Prop := forall st1 st2,
+  hsistate_refines_stat hst1 st1 ->
+  hsistate_refines_stat hst2 st2 ->
+  hsistate_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hst1 st1 ->
+  hsistate_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hst2 st2 ->
+  sistate_simu dm f st1 st2 ctx.
+
+Lemma list_forall2_nth_error {A} (l1 l2: list A) P:
+  list_forall2 P l1 l2 ->
+  forall x1 x2 n,
+  nth_error l1 n = Some x1 ->
+  nth_error l2 n = Some x2 ->
+  P x1 x2.
+Proof.
+  induction 1.
+  - intros. rewrite nth_error_nil in H. discriminate.
+  - intros x1 x2 n. destruct n as [|n]; simpl.
+    + intros. inv H1. inv H2. assumption.
+    + apply IHlist_forall2.
+Qed.
+
+Lemma is_tail_length {A} (l1 l2: list A):
+  is_tail l1 l2 ->
+  (length l1 <= length l2)%nat.
+Proof.
+  induction l2.
+  - intro. destruct l1; auto. apply is_tail_false in H. contradiction.
+  - intros ITAIL. inv ITAIL; auto.
+    apply IHl2 in H1. clear IHl2. simpl. omega.
+Qed.
+
+Lemma is_tail_nth_error {A} (l1 l2: list A) x:
+  is_tail (x::l1) l2 ->
+  nth_error l2 ((length l2) - length l1 - 1) = Some x.
+Proof.
+  induction l2.
+  - intro ITAIL. apply is_tail_false in ITAIL. contradiction.
+  - intros ITAIL. assert (length (a::l2) = S (length l2)) by auto. rewrite H. clear H.
+    assert (forall n n', ((S n) - n' - 1)%nat = (n - n')%nat) by (intros; omega). rewrite H. clear H.
+    inv ITAIL.
+    + assert (forall n, (n - n)%nat = 0%nat) by (intro; omega). rewrite H.
+      simpl. reflexivity.
+    + exploit IHl2; eauto. intros. clear IHl2.
+      assert (forall n n', (n > n')%nat -> (n - n')%nat = S (n - n' - 1)%nat) by (intros; omega).
+      exploit (is_tail_length (x::l1)); eauto. intro. simpl in H2.
+      assert ((length l2 > length l1)%nat) by omega. clear H2.
+      rewrite H0; auto.
+Qed.
+
+Theorem hsistate_simu_spec_correct dm f hst1 hst2 ctx:
+  hsistate_simu_spec dm f hst1 hst2 ->
+  hsistate_simu dm f hst1 hst2 ctx.
+Proof.
+  intros (ESIMU & LSIMU) st1 st2 (PCREF1 & EREF1) (PCREF2 & EREF2) DREF1 DREF2 is1 SEMI.
+  destruct DREF1 as (DEREF1 & LREF1 & NESTED). destruct DREF2 as (DEREF2 & LREF2 & _).
+  exploit hsiexits_simu_spec_correct; eauto. intro HESIMU.
+  unfold ssem_internal in SEMI. destruct (icontinue _) eqn:ICONT.
+  - destruct SEMI as (SSEML & PCEQ & ALLFU).
+    exploit hsilocal_simu_spec_correct; eauto; [apply ctx|]. simpl. intro SSEML2.
+    exists (mk_istate (icontinue is1) (si_pc st2) (irs is1) (imem is1)). constructor.
+    + unfold ssem_internal. simpl. rewrite ICONT. constructor; [assumption | constructor; [reflexivity |]].
+      eapply siexits_simu_all_fallthrough; eauto.
+      * eapply hsiexits_simu_siexits; eauto.
+      * eapply nested_sok_prop; eauto.
+        eapply ssem_local_sok; eauto.
+    + unfold istate_simu. rewrite ICONT. constructor; [simpl; assumption | constructor; [| reflexivity]].
+      constructor.
+  - destruct SEMI as (ext & lx & SSEME & ALLFU).
+    assert (SESIMU: siexits_simu dm f (si_exits st1) (si_exits st2) ctx) by (eapply hsiexits_simu_siexits; eauto).
+    exploit siexits_simu_all_fallthrough_upto; eauto.
+    * destruct ALLFU as (ITAIL & ALLF).
+      exploit nested_sok_tail; eauto. intros NESTED2.
+      inv NESTED2. destruct SSEME as (_ & SSEML & _). eapply ssem_local_sok in SSEML.
+      eapply nested_sok_prop; eauto.
+    * intros (ext2 & lx2 & ALLFU2 & LENEQ).
+      assert (EXTSIMU: siexit_simu dm f ctx ext ext2). {
+        eapply list_forall2_nth_error; eauto.
+        - destruct ALLFU as (ITAIL & _). eapply is_tail_nth_error; eauto.
+        - destruct ALLFU2 as (ITAIL & _). eapply is_tail_nth_error in ITAIL.
+          assert (LENEQ': length (si_exits st1) = length (si_exits st2)) by (eapply list_forall2_length; eauto).
+          congruence. }
+      destruct EXTSIMU as (CONDEVAL & EXTSIMU).
+      apply EXTSIMU in SSEME; [|assumption]. clear EXTSIMU. destruct SSEME as (is2 & SSEME2 & ISIMU).
+      exists (mk_istate (icontinue is1) (ipc is2) (irs is2) (imem is2)). constructor.
+      + unfold ssem_internal. simpl. rewrite ICONT. exists ext2, lx2. constructor; assumption.
+      + unfold istate_simu in *. rewrite ICONT in *. destruct ISIMU as (path & PATHEQ & ISIMULIVE & DMEQ).
+        destruct ISIMULIVE as (CONTEQ & REGEQ & MEMEQ).
+        exists path. repeat (constructor; auto).
+Qed.
+
+
+(** ** Specification of the simulation test on [sfval].
+       It is motivated by [hfinal_simu_spec_correct theorem] below
+*)
+
+
+Definition final_simu_spec (dm: PTree.t node) (f: RTLpath.function) (pc1 pc2: node) (f1 f2: sfval): Prop :=
+  match f1 with
+  | Scall sig1 svos1 lsv1 res1 pc1 =>
+      match f2 with
+      | Scall sig2 svos2 lsv2 res2 pc2 =>
+          dm ! pc2 = Some pc1 /\ sig1 = sig2 /\ svos1 = svos2 /\ lsv1 = lsv2 /\ res1 = res2
+      | _ => False
+      end
+  | Sbuiltin ef1 lbs1 br1 pc1 =>
+      match f2 with
+      | Sbuiltin ef2 lbs2 br2 pc2 =>
+          dm ! pc2 = Some pc1 /\ ef1 = ef2 /\ lbs1 = lbs2 /\ br1 = br2
+      | _ => False
+      end
+  | Sjumptable sv1 lpc1 =>
+      match f2 with
+      | Sjumptable sv2 lpc2 =>
+          ptree_get_list dm lpc2 = Some lpc1 /\ sv1 = sv2
+      | _ => False
+      end
+  | Snone =>
+      match f2 with
+      | Snone => dm ! pc2 = Some pc1
+      | _ => False
+      end
+  (* Stailcall, Sreturn *)
+  | _ => f1 = f2
+  end.
+
+Definition hfinal_simu_spec (dm: PTree.t node) (f: RTLpath.function) (pc1 pc2: node) (hf1 hf2: hsfval): Prop :=
+  final_simu_spec dm f pc1 pc2 (hfinal_proj hf1) (hfinal_proj hf2).
+
+Lemma svident_simu_refl f ctx s:
+  svident_simu f ctx s s.
+Proof.
+  destruct s; constructor; [| reflexivity].
+  erewrite <- seval_preserved; [| eapply ctx]. constructor.
+Qed.
+
+Lemma list_proj_refines_eq ge ge' sp rs0 m0 lsv lhsv:
+  (forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
+  list_sval_refines ge sp rs0 m0 lhsv lsv ->
+  forall lhsv' lsv',
+  list_sval_refines ge' sp rs0 m0 lhsv' lsv' ->
+  hsval_list_proj lhsv = hsval_list_proj lhsv' ->
+  seval_list_sval ge sp lsv rs0 m0 = seval_list_sval ge' sp lsv' rs0 m0.
+Proof.
+  intros GFS H lhsv' lsv' H' H0.
+  erewrite <- H, H0.
+  erewrite list_sval_eval_preserved; eauto.
+Qed.
+
+Lemma seval_builtin_sval_preserved ge ge' sp sv rs0 m0:
+   (forall s : ident, Genv.find_symbol ge' s = Genv.find_symbol ge s) ->
+   seval_builtin_sval ge sp sv rs0 m0 =
+   seval_builtin_sval ge' sp sv rs0 m0.
+Proof.
+  induction sv; intro FIND; cbn.
+  all: try (erewrite seval_preserved by eauto); trivial.
+  all: erewrite IHsv1 by eauto; erewrite IHsv2 by eauto; reflexivity.
+Qed.
+
+Lemma seval_list_builtin_sval_preserved ge ge' sp lsv rs0 m0:
+   (forall s : ident, Genv.find_symbol ge' s = Genv.find_symbol ge s) ->
+   seval_list_builtin_sval ge sp lsv rs0 m0 =
+   seval_list_builtin_sval ge' sp lsv rs0 m0.
+Proof.
+  induction lsv; intro FIND; cbn. { trivial. }
+  erewrite seval_builtin_sval_preserved by eauto.
+  erewrite IHlsv by eauto.
+  reflexivity.
+Qed.                              
+
+Lemma barg_proj_refines_eq ge ge' sp rs0 m0:
+  (forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
+  forall lhsv lsv, bargs_refines ge sp rs0 m0 lhsv lsv ->
+  forall lhsv' lsv', bargs_refines ge' sp rs0 m0 lhsv' lsv' ->
+  List.map (builtin_arg_map hsval_proj) lhsv = List.map (builtin_arg_map hsval_proj) lhsv' ->
+  seval_list_builtin_sval ge sp lsv rs0 m0 = seval_list_builtin_sval ge' sp lsv' rs0 m0.
+Proof.
+  unfold bargs_refines; intros GFS lhsv lsv H lhsv' lsv' H' H0.
+  erewrite <- H, H0.
+  erewrite seval_list_builtin_sval_preserved; eauto.
+Qed.
+
+Lemma sval_refines_proj ge ge' sp rs m hsv sv hsv' sv':
+  (forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
+  sval_refines ge sp rs m hsv sv ->
+  sval_refines ge' sp rs m hsv' sv' ->
+  hsval_proj hsv = hsval_proj hsv' ->
+  seval_sval ge sp sv rs m = seval_sval ge' sp sv' rs m.
+Proof.
+  intros GFS REF REF' PROJ.
+  rewrite <- REF, PROJ.
+  erewrite <- seval_preserved; eauto.
+Qed.
+
+Theorem hfinal_simu_spec_correct dm f ctx opc1 opc2 hf1 hf2 f1 f2:
+  hfinal_simu_spec dm f opc1 opc2 hf1 hf2 ->
+  hfinal_refines (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hf1 f1 ->
+  hfinal_refines (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hf2 f2 ->
+  sfval_simu dm f opc1 opc2 ctx f1 f2.
+Proof.
+  assert (GFS: forall s : ident, Genv.find_symbol (the_ge1 ctx) s = Genv.find_symbol (the_ge2 ctx) s) by apply ctx.
+  intros CORE FREF1 FREF2.
+  destruct hf1; inv FREF1.
+  (* Snone *)
+  - destruct hf2; try contradiction. inv FREF2.
+    inv CORE. constructor. assumption.
+  (* Scall *)
+  - rename H5 into SREF1. rename H6 into LREF1.
+    destruct hf2; try contradiction. inv FREF2.
+    rename H5 into SREF2. rename H6 into LREF2.
+    destruct CORE as (PCEQ & ? & ? & ? & ?). subst.
+    rename H0 into SVOSEQ. rename H1 into LSVEQ.
+    constructor; [assumption | |].
+    + destruct svos.
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ.
+        simpl in SREF1. simpl in SREF2.
+        rewrite <- SREF1. rewrite <- SREF2.
+        erewrite <- seval_preserved; [| eapply GFS]. congruence.
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ. congruence.
+    + erewrite list_proj_refines_eq; eauto.
+  (* Stailcall *)
+  - rename H3 into SREF1. rename H4 into LREF1.
+    destruct hf2; try (inv CORE; fail). inv FREF2.
+    rename H4 into LREF2. rename H3 into SREF2.
+    inv CORE. rename H1 into SVOSEQ. rename H2 into LSVEQ.
+    constructor.
+    + destruct svos. (** Copy-paste from Scall *)
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ.
+        simpl in SREF1. simpl in SREF2.
+        rewrite <- SREF1. rewrite <- SREF2.
+        erewrite <- seval_preserved; [| eapply GFS]. congruence.
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ. congruence.
+    + erewrite list_proj_refines_eq; eauto.
+  (* Sbuiltin *)
+  - rename H4 into BREF1. destruct hf2; try (inv CORE; fail). inv FREF2.
+    rename H4 into BREF2. inv CORE. destruct H0 as (? & ? & ?). subst.
+    rename H into PCEQ. rename H1 into ARGSEQ. constructor; [assumption|].
+    erewrite barg_proj_refines_eq; eauto. constructor.
+  (* Sjumptable *)
+  - rename H2 into SREF1. destruct hf2; try contradiction. inv FREF2.
+    rename H2 into SREF2. destruct CORE as (A & B). constructor; [assumption|].
+    erewrite sval_refines_proj; eauto.
+  (* Sreturn *)
+  - rename H0 into SREF1.
+    destruct hf2; try discriminate. inv CORE.
+    inv FREF2. destruct osv; destruct res; inv SREF1.
+    + destruct res0; try discriminate. destruct osv0; inv H1.
+      constructor. simpl in H0. inv H0. erewrite sval_refines_proj; eauto.
+    + destruct res0; try discriminate. destruct osv0; inv H1. constructor.
+Qed.
+
+
+(** ** Specification of the simulation test on [hsstate].
+       It is motivated by [hsstate_simu_spec_correct theorem] below
+*)
+
+Definition hsstate_simu_spec (dm: PTree.t node) (f: RTLpath.function) (hst1 hst2: hsstate) :=
+     hsistate_simu_spec dm f (hinternal hst1) (hinternal hst2)
+  /\ hfinal_simu_spec dm f (hsi_pc (hinternal hst1)) (hsi_pc (hinternal hst2)) (hfinal hst1) (hfinal hst2).
+
+Definition hsstate_simu dm f (hst1 hst2: hsstate) ctx: Prop :=
+  forall st1 st2,
+  hsstate_refines hst1 st1 ->
+  hsstate_refines hst2 st2 -> sstate_simu dm f st1 st2 ctx.
+
+Theorem hsstate_simu_spec_correct dm f ctx hst1 hst2:
+  hsstate_simu_spec dm f hst1 hst2 ->
+  hsstate_simu dm f hst1 hst2 ctx.
+Proof.
+  intros (SCORE & FSIMU) st1 st2 (SREF1 & DREF1 & FREF1) (SREF2 & DREF2 & FREF2).
+  generalize SCORE. intro SIMU; eapply hsistate_simu_spec_correct in SIMU; eauto.
+  constructor; auto.
+  intros is1 SEM1 CONT1.
+  unfold hsistate_simu in SIMU. exploit SIMU; clear SIMU; eauto.
+  unfold istate_simu, ssem_internal in *; intros (is2 & SEM2 & SIMU).
+  rewrite! CONT1 in *. destruct SIMU as (CONT2 & _).
+  rewrite! CONT1, <- CONT2 in *.
+  destruct SEM1 as (SEM1 & _ & _).
+  destruct SEM2 as (SEM2 & _ & _).
+  eapply hfinal_simu_spec_correct in FSIMU; eauto.
+  - destruct SREF1 as (PC1 & _). destruct SREF2 as (PC2 & _). rewrite <- PC1. rewrite <- PC2.
+    eapply FSIMU.
+  - eapply FREF1. exploit DREF1. intros (_ & (OK & _) & _). rewrite <- OK. eapply ssem_local_sok; eauto.
+  - eapply FREF2. exploit DREF2. intros (_ & (OK & _) & _). rewrite <- OK. eapply ssem_local_sok; eauto.
+Qed.
diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v
new file mode 100644
index 00000000..4c492ecd
--- /dev/null
+++ b/scheduling/RTLpathSE_theory.v
@@ -0,0 +1,1897 @@
+(* A theory of symbolic execution on RTLpath
+
+NB: an efficient implementation with hash-consing will be defined in RTLpathSE_impl.v
+
+*)
+
+Require Import Coqlib Maps Floats.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL RTLpath.
+Require Import Errors Duplicate.
+
+Local Open Scope error_monad_scope.
+
+(* Enhanced from kvx/Asmblockgenproof.v *)
+Ltac explore_hyp :=
+  repeat match goal with
+  | [ H : match ?var with | _ => _ end = _ |- _ ] => (let EQ1 := fresh "EQ" in (destruct var eqn:EQ1; try discriminate))
+  | [ H : OK _ = OK _ |- _ ] => monadInv H
+  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H : Error _ = OK _ |- _ ] => inversion H
+  | [ H : Some _ = Some _ |- _ ] => inv H
+  | [ x : unit |- _ ] => destruct x
+  end.
+
+Ltac explore := explore_hyp;
+  repeat match goal with
+  | [ |- context[if ?b then _ else _] ] => (let EQ1 := fresh "IEQ" in destruct b eqn:EQ1)
+  | [ |- context[match ?m with | _ => _ end] ] => (let DEQ1 := fresh "DEQ" in destruct m eqn:DEQ1)
+  | [ |- context[match ?m as _ return _ with | _ => _ end]] => (let DREQ1 := fresh "DREQ" in destruct m eqn:DREQ1)
+  end.
+
+(* Ltac explore :=
+  repeat match goal with
+  | [ H : match ?var with | _ => _ end = _ |- _ ] => (let EQ1 := fresh "EQ" in (destruct var eqn:EQ1; try discriminate))
+  | [ H : OK _ = OK _ |- _ ] => monadInv H
+  | [ |- context[if ?b then _ else _] ] => (let EQ1 := fresh "IEQ" in destruct b eqn:EQ1)
+  | [ |- context[match ?m with | _ => _ end] ] => (let DEQ1 := fresh "DEQ" in destruct m eqn:DEQ1)
+  | [ |- context[match ?m as _ return _ with | _ => _ end]] => (let DREQ1 := fresh "DREQ" in destruct m eqn:DREQ1)
+  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H : Error _ = OK _ |- _ ] => inversion H
+  | [ H : Some _ = Some _ |- _ ] => inv H
+  | [ x : unit |- _ ] => destruct x
+  end. *)
+
+(** * Syntax and semantics of symbolic values *)
+
+(* symbolic value *)
+Inductive sval :=
+  | Sinput (r: reg)
+  | Sop (op:operation) (lsv: list_sval)  (sm: smem)
+  | Sload (sm: smem) (trap: trapping_mode) (chunk:memory_chunk) (addr:addressing) (lsv:list_sval) 
+with list_sval := 
+  | Snil
+  | Scons (sv: sval) (lsv: list_sval)
+(* symbolic memory *)
+with smem :=
+  | Sinit 
+  | Sstore (sm: smem) (chunk:memory_chunk) (addr:addressing) (lsv:list_sval) (srce: sval).
+
+Scheme sval_mut := Induction for sval Sort Prop
+with list_sval_mut := Induction for list_sval Sort Prop
+with smem_mut := Induction for smem Sort Prop.
+
+Fixpoint list_sval_inj (l: list sval): list_sval :=
+  match l with
+  | nil => Snil
+  | v::l => Scons v (list_sval_inj l)
+  end.
+
+Local Open Scope option_monad_scope.
+
+Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem): option val :=
+  match sv with
+  | Sinput r => Some (rs0#r)
+  | Sop op l sm =>
+     SOME args <- seval_list_sval ge sp l rs0 m0 IN
+     SOME m <- seval_smem ge sp sm rs0 m0 IN
+     eval_operation ge sp op args m
+  | Sload sm trap chunk addr lsv =>
+      match trap with
+      | TRAP =>
+          SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+          SOME a <- eval_addressing ge sp addr args IN
+          SOME m <- seval_smem ge sp sm rs0 m0 IN
+          Mem.loadv chunk m a
+      | NOTRAP =>
+          SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+          match (eval_addressing ge sp addr args) with
+          | None => Some (default_notrap_load_value chunk)
+          | Some a =>
+              SOME m <- seval_smem ge sp sm rs0 m0 IN
+              match (Mem.loadv chunk m a) with
+              | None => Some (default_notrap_load_value chunk)
+              | Some val => Some val
+              end
+          end
+      end
+  end
+with seval_list_sval (ge: RTL.genv) (sp:val) (lsv: list_sval) (rs0: regset) (m0: mem): option (list val) :=
+  match lsv with
+  | Snil => Some nil
+  | Scons sv lsv' => 
+    SOME v <- seval_sval ge sp sv rs0 m0 IN
+    SOME lv <- seval_list_sval ge sp lsv' rs0 m0 IN
+    Some (v::lv)
+  end
+with seval_smem (ge: RTL.genv) (sp:val) (sm: smem) (rs0: regset) (m0: mem): option mem :=
+  match sm with
+  | Sinit => Some m0
+  | Sstore sm chunk addr lsv srce =>
+     SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+     SOME a <- eval_addressing ge sp addr args IN
+     SOME m <- seval_smem ge sp sm rs0 m0 IN
+     SOME sv <- seval_sval ge sp srce rs0 m0 IN
+     Mem.storev chunk m a sv
+  end.
+
+(* Syntax and Semantics of local symbolic internal states *)
+(* [si_pre] is a precondition on initial ge, sp, rs0, m0 *)
+Record sistate_local := { si_pre: RTL.genv -> val -> regset -> mem -> Prop; si_sreg: reg -> sval; si_smem: smem }.
+
+(* Predicate on which (rs, m) is a possible final state after evaluating [st] on (rs0, m0) *)
+Definition ssem_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset) (m0: mem) (rs: regset) (m: mem): Prop :=
+  st.(si_pre) ge sp rs0 m0
+  /\ seval_smem ge sp st.(si_smem) rs0 m0 = Some m
+  /\ forall (r:reg), seval_sval ge sp (st.(si_sreg) r) rs0 m0 = Some (rs#r).
+
+Definition sabort_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset) (m0: mem): Prop :=
+  ~(st.(si_pre) ge sp rs0 m0)
+  \/ seval_smem ge sp st.(si_smem) rs0 m0 = None
+  \/ exists (r: reg), seval_sval ge sp (st.(si_sreg) r) rs0 m0 = None.
+
+(* Syntax and semantics of symbolic exit states *)
+Record sistate_exit := mk_sistate_exit
+  { si_cond: condition; si_scondargs: list_sval; si_elocal: sistate_local; si_ifso: node }.
+
+Definition seval_condition ge sp (cond: condition) (lsv: list_sval) (sm: smem) rs0 m0 : option bool :=
+  SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+  SOME m <- seval_smem ge sp sm rs0 m0 IN
+  eval_condition cond args m.
+
+Definition all_fallthrough ge sp (lx: list sistate_exit) rs0 m0: Prop :=
+  forall ext, List.In ext lx ->
+  seval_condition ge sp ext.(si_cond) ext.(si_scondargs) ext.(si_elocal).(si_smem) rs0 m0 = Some false.
+
+Lemma all_fallthrough_revcons ge sp ext rs m lx:
+  all_fallthrough ge sp (ext::lx) rs m ->
+  seval_condition ge sp (si_cond ext) (si_scondargs ext) (si_smem (si_elocal ext)) rs m = Some false
+  /\ all_fallthrough ge sp lx rs m.
+Proof.
+  intros ALLFU. constructor.
+  - assert (In ext (ext::lx)) by (constructor; auto). apply ALLFU in H. assumption.
+  - intros ext' INEXT. assert (In ext' (ext::lx)) by (apply in_cons; auto).
+    apply ALLFU in H. assumption.
+Qed.
+
+(** Semantic of an exit in pseudo code:
+     if si_cond (si_condargs)
+       si_elocal; goto if_so
+     else ()
+*)
+
+Definition ssem_exit (ge: RTL.genv) (sp: val) (ext: sistate_exit) (rs: regset) (m: mem) rs' m' (pc': node) : Prop :=
+    seval_condition ge sp (si_cond ext) (si_scondargs ext) ext.(si_elocal).(si_smem) rs m = Some true
+ /\ ssem_local ge sp (si_elocal ext) rs m rs' m'
+ /\ (si_ifso ext) = pc'.
+
+(* Either an abort on the condition evaluation OR an abort on the sistate_local IF the condition was true *)
+Definition sabort_exit (ge: RTL.genv) (sp: val) (ext: sistate_exit) (rs: regset) (m: mem) : Prop :=
+  let sev_cond := seval_condition ge sp (si_cond ext) (si_scondargs ext) ext.(si_elocal).(si_smem) rs m in
+  sev_cond = None
+  \/ (sev_cond = Some true /\ sabort_local ge sp ext.(si_elocal) rs m).
+
+(** * Syntax and Semantics of symbolic internal state *)
+Record sistate := { si_pc: node; si_exits: list sistate_exit; si_local: sistate_local }. 
+
+Definition all_fallthrough_upto_exit ge sp ext lx' lx rs m : Prop :=
+  is_tail (ext::lx') lx /\ all_fallthrough ge sp lx' rs m.
+
+(** Semantic of a sistate in pseudo code:
+     si_exit1; si_exit2; ...; si_exitn;
+     si_local; goto si_pc *)
+
+(* Note: in RTLpath, is.(icontinue) = false iff we took an early exit *)
+
+Definition ssem_internal (ge: RTL.genv) (sp:val) (st: sistate) (rs: regset) (m: mem) (is: istate): Prop :=
+  if (is.(icontinue)) 
+  then 
+    ssem_local ge sp st.(si_local) rs m is.(irs) is.(imem) 
+    /\ st.(si_pc) = is.(ipc)
+    /\ all_fallthrough ge sp st.(si_exits) rs m
+  else exists ext lx,
+    ssem_exit ge sp ext rs m is.(irs) is.(imem) is.(ipc)
+    /\ all_fallthrough_upto_exit ge sp ext lx st.(si_exits) rs m.
+
+Definition sabort (ge: RTL.genv) (sp: val) (st: sistate) (rs: regset) (m: mem): Prop :=
+  (* No early exit was met but we aborted on the si_local *)
+  (all_fallthrough ge sp st.(si_exits) rs m /\ sabort_local ge sp st.(si_local) rs m)
+  (* OR we aborted on an evaluation of one of the early exits *)
+  \/ (exists ext lx, all_fallthrough_upto_exit ge sp ext lx st.(si_exits) rs m /\ sabort_exit ge sp ext rs m).
+
+Definition ssem_internal_opt ge sp (st: sistate) rs0 m0 (ois: option istate): Prop :=
+  match ois with
+  | Some is => ssem_internal ge sp st rs0 m0 is
+  | None => sabort ge sp st rs0 m0
+  end.
+
+Definition ssem_internal_opt2 ge sp (ost: option sistate) rs0 m0 (ois: option istate) : Prop :=
+  match ost with
+  | Some st => ssem_internal_opt ge sp st rs0 m0 ois
+  | None => ois=None
+  end.
+
+(** * An internal state represents a parallel program !
+
+     We prove below that the semantics [ssem_internal_opt] is deterministic.
+
+ *)
+
+Definition istate_eq ist1 ist2 :=
+  ist1.(icontinue) = ist2.(icontinue) /\
+  ist1.(ipc) = ist2.(ipc) /\
+  (forall r, (ist1.(irs)#r) = ist2.(irs)#r) /\ 
+  ist1.(imem) = ist2.(imem).
+
+Lemma all_fallthrough_noexit ge sp ext lx rs0 m0 rs m pc:
+  ssem_exit ge sp ext rs0 m0 rs m pc ->
+  In ext lx ->
+  all_fallthrough ge sp lx rs0 m0 ->
+  False.
+Proof.
+  Local Hint Resolve is_tail_in: core.
+  intros SSEM INE ALLF.
+  destruct SSEM as (SSEM & SSEM').
+  unfold all_fallthrough in ALLF. rewrite ALLF in SSEM; eauto.
+  discriminate.
+Qed.
+
+Lemma ssem_internal_exclude_incompatible_continue ge sp st rs m is1 is2:
+  is1.(icontinue) = true ->
+  is2.(icontinue) = false ->
+  ssem_internal ge sp st rs m is1 ->
+  ssem_internal ge sp st rs m is2 ->
+  False.
+Proof.
+  Local Hint Resolve all_fallthrough_noexit: core.
+  unfold ssem_internal.
+  intros CONT1 CONT2.
+  rewrite CONT1, CONT2; simpl.
+  intuition eauto.
+  destruct H0 as (ext & lx & SSEME & ALLFU).
+  destruct ALLFU as (ALLFU & ALLFU').
+  eapply all_fallthrough_noexit; eauto.
+Qed.
+
+Lemma ssem_internal_determ_continue ge sp st rs m is1 is2:
+   ssem_internal ge sp st rs m is1 ->
+   ssem_internal ge sp st rs m is2 ->
+   is1.(icontinue) = is2.(icontinue).
+Proof.
+   Local Hint Resolve ssem_internal_exclude_incompatible_continue: core.
+   destruct (Bool.bool_dec is1.(icontinue) is2.(icontinue)) as [|H]; auto.
+   intros H1 H2. assert (absurd: False); intuition.
+   destruct (icontinue is1) eqn: His1, (icontinue is2) eqn: His2; eauto.
+Qed.
+
+Lemma ssem_local_determ ge sp st rs0 m0 rs1 m1 rs2 m2:
+  ssem_local ge sp st rs0 m0 rs1 m1 ->
+  ssem_local ge sp st rs0 m0 rs2 m2 ->
+  (forall r, rs1#r = rs2#r) /\ m1 = m2.
+Proof.
+  unfold ssem_local. intuition try congruence.
+  generalize (H5 r); rewrite H4; congruence.
+Qed.
+
+(* TODO: lemma to move in Coqlib *)
+Lemma is_tail_bounded_total {A} (l1 l2 l3: list A): is_tail l1 l3 -> is_tail l2 l3
+  -> is_tail l1 l2 \/ is_tail l2 l1.
+Proof.
+  Local Hint Resolve is_tail_cons: core.
+  induction 1 as [|i l1 l3 T1 IND]; simpl; auto.
+  intros T2; inversion T2; subst; auto.
+Qed.
+
+Lemma exit_cond_determ ge sp rs0 m0 l1 l2: 
+  is_tail l1 l2 -> forall ext1 lx1 ext2 lx2, 
+  l1=(ext1 :: lx1) -> 
+  l2=(ext2 :: lx2) ->
+  all_fallthrough ge sp lx1 rs0 m0 ->
+  seval_condition ge sp (si_cond ext1) (si_scondargs ext1) (si_smem (si_elocal ext1)) rs0 m0 = Some true ->
+  all_fallthrough ge sp lx2 rs0 m0 ->
+  ext1=ext2.
+Proof.
+  destruct 1 as [l1|i l1 l3 T1]; intros ext1 lx1 ext2 lx2 EQ1 EQ2; subst; 
+  inversion EQ2; subst; auto.
+  intros D1 EVAL NYE.
+  Local Hint Resolve is_tail_in: core.
+  unfold all_fallthrough in NYE.
+  rewrite NYE in EVAL; eauto.
+  try congruence.
+Qed.
+
+Lemma ssem_exit_determ ge sp ext rs0 m0 rs1 m1 pc1 rs2 m2 pc2:
+  ssem_exit ge sp ext rs0 m0 rs1 m1 pc1 ->
+  ssem_exit ge sp ext rs0 m0 rs2 m2 pc2 ->
+  pc1 = pc2 /\ (forall r, rs1#r = rs2#r) /\ m1 = m2.
+Proof.
+  Local Hint Resolve exit_cond_determ eq_sym: core.
+  intros SSEM1 SSEM2. destruct SSEM1 as (SEVAL1 & SLOC1 & PCEQ1). destruct SSEM2 as (SEVAL2 & SLOC2 & PCEQ2). subst.
+  destruct (ssem_local_determ ge sp (si_elocal ext) rs0 m0 rs1 m1 rs2 m2); auto.
+Qed.
+
+Remark is_tail_inv_left {A: Type} (a a': A) l l':
+  is_tail (a::l) (a'::l') ->
+  (a = a' /\ l = l') \/ (In a l' /\ is_tail l (a'::l')).
+Proof.
+  intros. inv H.
+  - left. eauto.
+  - right. econstructor.
+    + eapply is_tail_in; eauto.
+    + eapply is_tail_cons_left; eauto.
+Qed.
+
+Lemma ssem_internal_determ ge sp st rs m is1 is2:
+  ssem_internal ge sp st rs m is1 ->
+  ssem_internal ge sp st rs m is2 ->
+  istate_eq is1 is2.
+Proof.
+  unfold istate_eq.
+  intros SEM1 SEM2. 
+  exploit (ssem_internal_determ_continue ge sp st rs m is1 is2); eauto.
+  intros CONTEQ. unfold ssem_internal in * |-. rewrite CONTEQ in * |- *.
+  destruct (icontinue is2).
+  - destruct (ssem_local_determ ge sp (si_local st) rs m (irs is1) (imem is1) (irs is2) (imem is2)); 
+    intuition (try congruence).
+  - destruct SEM1 as (ext1 & lx1 & SSEME1 & ALLFU1). destruct SEM2 as (ext2 & lx2 & SSEME2 & ALLFU2).
+    destruct ALLFU1 as (ALLFU1 & ALLFU1'). destruct ALLFU2 as (ALLFU2 & ALLFU2').
+    destruct SSEME1 as (SSEME1 & SSEME1' & SSEME1''). destruct SSEME2 as (SSEME2 & SSEME2' & SSEME2'').
+    assert (X:ext1=ext2).
+    { destruct (is_tail_bounded_total (ext1 :: lx1) (ext2 :: lx2) (si_exits st)) as [TAIL|TAIL]; eauto. }
+    subst. destruct (ssem_local_determ ge sp (si_elocal ext2) rs m (irs is1) (imem is1) (irs is2) (imem is2)); auto.
+    intuition. congruence.
+Qed.
+
+Lemma ssem_local_exclude_sabort_local ge sp loc rs m rs' m':
+  ssem_local ge sp loc rs m rs' m' ->
+(*   all_fallthrough ge sp (si_exits st) rs m -> *)
+  sabort_local ge sp loc rs m ->
+  False.
+Proof.
+  intros SIML (* ALLF *) ABORT. inv SIML. destruct H0 as (H0 & H0').
+  inversion ABORT as [ABORT1 | [ABORT2 | ABORT3]]; [ | | inv ABORT3]; congruence.
+Qed.
+
+(* TODO: remove this JUNK ?
+Lemma ssem_local_exclude_sabort_exit ge sp st ext lx rs m rs' m':
+  ssem_local ge sp (si_local st) rs m rs' m' ->
+  all_fallthrough ge sp (si_exits st) rs m ->
+  is_tail (ext :: lx) (si_exits st) ->
+  sabort_exit ge sp ext rs m ->
+  False.
+Proof.
+  intros SSEML ALLF TAIL ABORT.
+  inv ABORT.
+  - exploit ALLF; eauto. congruence.
+  - (* FIXME Problem : if we have a ssem_local, this means we ONLY evaluated the conditions,
+    but we NEVER actually evaluated the si_elocal from the sistate_exit ! So we cannot prove
+    a lack of abort on the si_elocal.. We must change the definitions *)
+Abort.
+*)
+
+Lemma ssem_local_exclude_sabort ge sp st rs m rs' m':
+  ssem_local ge sp (si_local st) rs m rs' m' ->
+  all_fallthrough ge sp (si_exits st) rs m ->
+  sabort ge sp st rs m ->
+  False.
+Proof.
+  intros SIML ALLF ABORT.
+  inv ABORT.
+  - intuition; eapply ssem_local_exclude_sabort_local; eauto.
+  - destruct H as (ext & lx & ALLFU & SABORT).
+    destruct ALLFU as (TAIL & _). eapply is_tail_in in TAIL.
+    eapply ALLF in TAIL.
+    destruct SABORT as [CONDFAIL | (CONDTRUE & ABORTL)]; congruence.
+Qed.
+
+Lemma ssem_exit_fallthrough_upto_exit ge sp ext ext' lx lx' exits rs m rs' m' pc':
+  ssem_exit ge sp ext rs m rs' m' pc' ->
+  all_fallthrough_upto_exit ge sp ext lx exits rs m ->
+  all_fallthrough_upto_exit ge sp ext' lx' exits rs m ->
+  is_tail (ext'::lx') (ext::lx).
+Proof.
+  intros SSEME ALLFU ALLFU'.
+  destruct ALLFU as (ISTAIL & ALLFU). destruct ALLFU' as (ISTAIL' & ALLFU').
+  destruct (is_tail_bounded_total (ext::lx) (ext'::lx') exits); eauto.
+  inv H.
+  - econstructor; eauto.
+  - eapply is_tail_in in H2. eapply ALLFU' in H2.
+    destruct SSEME as (SEVAL & _). congruence.
+Qed.
+
+Lemma ssem_exit_exclude_sabort_exit ge sp ext rs m rs' m' pc':
+  ssem_exit ge sp ext rs m rs' m' pc' ->
+  sabort_exit ge sp ext rs m ->
+  False.
+Proof.
+  intros A B. destruct A as (A & A' & A''). inv B.
+  - congruence.
+  - destruct H as (_ & H). eapply ssem_local_exclude_sabort_local; eauto.
+Qed.
+
+Lemma ssem_exit_exclude_sabort ge sp ext st lx rs m rs' m' pc':
+  ssem_exit ge sp ext rs m rs' m' pc' ->
+  all_fallthrough_upto_exit ge sp ext lx (si_exits st) rs m ->
+  sabort ge sp st rs m ->
+  False.
+Proof.
+  intros SSEM ALLFU ABORT.
+  inv ABORT.
+  - destruct H as (ALLF & _). destruct ALLFU as (TAIL & _).
+    eapply is_tail_in in TAIL.
+    destruct SSEM as (SEVAL & _ & _).
+    eapply ALLF in TAIL. congruence.
+  - destruct H as (ext' & lx' & ALLFU' & ABORT).
+    exploit ssem_exit_fallthrough_upto_exit; eauto. intros ITAIL.
+    destruct ALLFU as (ALLFU1 & ALLFU2). destruct ALLFU' as (ALLFU1' & ALLFU2').
+    exploit (is_tail_inv_left ext' ext lx' lx); eauto. intro. inv H.
+    + inv H0. eapply ssem_exit_exclude_sabort_exit; eauto.
+    + destruct H0 as (INE & TAIL). eapply ALLFU2 in INE. destruct ABORT as [ABORT | (ABORT & ABORT')]; congruence.
+Qed.
+
+Lemma ssem_internal_exclude_sabort ge sp st rs m is:
+  sabort ge sp st rs m ->
+  ssem_internal ge sp st rs m is -> False.
+Proof.
+  intros ABORT SEM.
+  unfold ssem_internal in SEM. destruct icontinue.
+  - destruct SEM as (SEM1 & SEM2 & SEM3).
+    eapply ssem_local_exclude_sabort; eauto.
+  - destruct SEM as (ext & lx & SEM1 & SEM2). eapply ssem_exit_exclude_sabort; eauto.
+Qed.
+
+Definition istate_eq_opt ist1 oist :=
+  exists ist2, oist = Some ist2 /\ istate_eq ist1 ist2.
+
+Lemma ssem_internal_opt_determ ge sp st rs m ois is:
+  ssem_internal_opt ge sp st rs m ois ->
+  ssem_internal ge sp st rs m is ->
+  istate_eq_opt is ois.
+Proof.
+  destruct ois as [is1|]; simpl; eauto.
+  - intros; eexists; intuition; eapply ssem_internal_determ; eauto.
+  - intros; exploit ssem_internal_exclude_sabort; eauto. destruct 1.
+Qed.
+
+(** * Symbolic execution of one internal step *)
+
+Definition slocal_set_sreg (st:sistate_local) (r:reg) (sv:sval) :=
+  {| si_pre:=(fun ge sp rs m => seval_sval ge sp (st.(si_sreg) r) rs m <> None /\ (st.(si_pre) ge sp rs m));
+     si_sreg:=fun y => if Pos.eq_dec r y then sv else st.(si_sreg) y;
+     si_smem:= st.(si_smem)|}.
+
+Definition slocal_set_smem (st:sistate_local) (sm:smem) :=
+  {| si_pre:=(fun ge sp rs m => seval_smem ge sp st.(si_smem) rs m <> None /\ (st.(si_pre) ge sp rs m));
+     si_sreg:= st.(si_sreg);
+     si_smem:= sm |}.
+
+Definition sist_set_local (st: sistate) (pc: node) (nxt: sistate_local): sistate :=
+   {| si_pc := pc; si_exits := st.(si_exits); si_local:= nxt |}.
+
+Definition slocal_store st chunk addr args src : sistate_local :=
+   let args := list_sval_inj (List.map (si_sreg st) args) in
+   let src := si_sreg st src in
+   let sm := Sstore (si_smem st) chunk addr args src
+   in slocal_set_smem st sm.
+
+Definition siexec_inst (i: instruction) (st: sistate): option sistate := 
+  match i with
+  | Inop pc' => 
+      Some (sist_set_local st pc' st.(si_local))
+  | Iop op args dst pc' =>
+      let prev := st.(si_local) in
+      let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
+      let next := slocal_set_sreg prev dst (Sop op vargs prev.(si_smem)) in
+      Some (sist_set_local st pc' next)
+  | Iload trap chunk addr args dst pc' =>
+      let prev := st.(si_local) in
+      let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
+      let next := slocal_set_sreg prev dst (Sload prev.(si_smem) trap chunk addr vargs) in
+      Some (sist_set_local st pc' next)
+  | Istore chunk addr args src pc' =>
+      let next := slocal_store st.(si_local) chunk addr args src in
+      Some (sist_set_local st pc' next)
+   | Icond cond args ifso ifnot _ =>
+      let prev := st.(si_local) in
+      let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
+      let ex := {| si_cond:=cond; si_scondargs:=vargs; si_elocal := prev; si_ifso := ifso |} in
+      Some {| si_pc := ifnot; si_exits := ex::st.(si_exits); si_local := prev |}
+  | _ => None (* TODO jumptable ? *)
+  end.
+
+Lemma seval_list_sval_inj ge sp l rs0 m0 (sreg: reg -> sval) rs: 
+   (forall r : reg, seval_sval ge sp (sreg r) rs0 m0 = Some (rs # r)) ->
+   seval_list_sval ge sp (list_sval_inj (map sreg l)) rs0 m0 = Some (rs ## l).
+Proof.
+  intros H; induction l as [|r l]; simpl; auto.
+  inversion_SOME v.
+  inversion_SOME lv.
+  generalize (H r).
+  try_simplify_someHyps.
+Qed.
+
+Lemma slocal_set_sreg_preserves_sabort_local ge sp st rs0 m0 r sv:
+  sabort_local ge sp st rs0 m0 ->
+  sabort_local ge sp (slocal_set_sreg st r sv) rs0 m0.
+Proof.
+  unfold sabort_local. simpl; intuition.
+  destruct H as [r1 H]. destruct (Pos.eq_dec r r1) as [TEST|TEST] eqn: HTEST.
+  - subst; rewrite H; intuition.
+  - right. right. exists r1. rewrite HTEST. auto.
+Qed.
+
+Lemma slocal_set_smem_preserves_sabort_local ge sp st rs0 m0 m:
+  sabort_local ge sp st rs0 m0 ->
+  sabort_local ge sp (slocal_set_smem st m) rs0 m0.
+Proof.
+  unfold sabort_local. simpl; intuition.
+Qed.
+
+Lemma all_fallthrough_upto_exit_cons ge sp ext lx ext' exits rs m:
+  all_fallthrough_upto_exit ge sp ext lx exits rs m ->
+  all_fallthrough_upto_exit ge sp ext lx (ext'::exits) rs m.
+Proof.
+  intros. inv H. econstructor; eauto.
+Qed.
+
+Lemma all_fallthrough_cons ge sp exits rs m ext:
+  all_fallthrough ge sp exits rs m ->
+  seval_condition ge sp (si_cond ext) (si_scondargs ext) (si_smem (si_elocal ext)) rs m = Some false ->
+  all_fallthrough ge sp (ext::exits) rs m.
+Proof.
+  intros. unfold all_fallthrough in *. intros.
+  inv H1; eauto.
+Qed.
+
+Lemma siexec_inst_preserves_sabort i ge sp rs m st st': 
+  siexec_inst i st = Some st' ->
+  sabort ge sp st rs m -> sabort ge sp st' rs m.
+Proof.
+  intros SISTEP ABORT.
+  destruct i; simpl in SISTEP; try discriminate; inv SISTEP; unfold sabort; simpl.
+  (* NOP *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* OP *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto. eapply slocal_set_sreg_preserves_sabort_local; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* LOAD *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto. eapply slocal_set_sreg_preserves_sabort_local; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* STORE *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto. eapply slocal_set_smem_preserves_sabort_local; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* COND *)
+  * remember ({| si_cond := _; si_scondargs := _; si_elocal := _; si_ifso := _ |}) as ext.
+    destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - destruct (seval_condition ge sp (si_cond ext) (si_scondargs ext)
+        (si_smem (si_elocal ext)) rs m) eqn:SEVAL; [destruct b|].
+      (* case true *)
+      + right. exists ext, (si_exits st).
+        constructor.
+        ++ constructor. econstructor; eauto. eauto.
+        ++ unfold sabort_exit. right. constructor; eauto.
+           subst. simpl. eauto.
+      (* case false *)
+      + left. constructor; eauto. eapply all_fallthrough_cons; eauto.
+      (* case None *)
+      + right. exists ext, (si_exits st). constructor.
+        ++ constructor. econstructor; eauto. eauto.
+        ++ unfold sabort_exit. left. eauto.
+    - right. exists ext0, lx0. constructor; eauto. eapply all_fallthrough_upto_exit_cons; eauto.
+Qed.
+
+Lemma siexec_inst_WF i st:
+  siexec_inst i st = None -> default_succ i = None.
+Proof.
+  destruct i; simpl; unfold sist_set_local; simpl; congruence.
+Qed.
+
+Lemma siexec_inst_default_succ i st st':
+  siexec_inst i st = Some st' -> default_succ i = Some (st'.(si_pc)).
+Proof.
+  destruct i; simpl; unfold sist_set_local; simpl; try congruence;
+  intro H; inversion_clear H; simpl; auto.
+Qed.
+
+
+Lemma seval_list_sval_inj_not_none ge sp st rs0 m0: forall l,
+  (forall r, List.In r l -> seval_sval ge sp (si_sreg st r) rs0 m0 = None -> False) ->
+  seval_list_sval ge sp (list_sval_inj (map (si_sreg st) l)) rs0 m0 = None -> False.
+Proof.
+  induction l.
+  - intuition discriminate.
+  - intros ALLR. simpl.
+    inversion_SOME v.
+    + intro SVAL. inversion_SOME lv; [discriminate|].
+      assert (forall r : reg, In r l -> seval_sval ge sp (si_sreg st r) rs0 m0 = None -> False).
+      { intros r INR. eapply ALLR. right. assumption. }
+      intro SVALLIST. intro. eapply IHl; eauto.
+    + intros. exploit (ALLR a); simpl; eauto.
+Qed.
+
+Lemma siexec_inst_correct ge sp i st rs0 m0 rs m:
+  ssem_local ge sp st.(si_local) rs0 m0 rs m ->
+  all_fallthrough ge sp st.(si_exits) rs0 m0 ->
+  ssem_internal_opt2 ge sp (siexec_inst i st) rs0 m0 (istep ge i sp rs m).
+Proof.
+  intros (PRE & MEM & REG) NYE.
+  destruct i; simpl; auto.
+  + (* Nop *)
+    constructor; [|constructor]; simpl; auto.
+    constructor; auto.
+  + (* Op *)
+    inversion_SOME v; intros OP; simpl.
+    - constructor; [|constructor]; simpl; auto.
+      constructor; simpl; auto.
+      * constructor; auto. congruence.
+      * constructor; auto.
+        intro r0. destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+        subst. rewrite Regmap.gss; simpl; auto.
+        erewrite seval_list_sval_inj; simpl; auto.
+        try_simplify_someHyps.
+    - left. constructor; simpl; auto.
+      unfold sabort_local. right. right.
+      simpl. exists r. destruct (Pos.eq_dec r r); try congruence.
+      simpl. erewrite seval_list_sval_inj; simpl; auto.
+      try_simplify_someHyps.
+  + (* LOAD *) 
+    inversion_SOME a0; intro ADD.
+    { inversion_SOME v; intros LOAD; simpl. 
+      - explore_destruct; unfold ssem_internal, ssem_local; simpl; intuition.
+        * unfold ssem_internal. simpl. constructor; [|constructor]; auto.
+          constructor; constructor; simpl; auto. congruence. intro r0.
+          destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+          subst; rewrite Regmap.gss; simpl.
+          erewrite seval_list_sval_inj; simpl; auto.
+          try_simplify_someHyps.
+        * unfold ssem_internal. simpl. constructor; [|constructor]; auto.
+          constructor; constructor; simpl; auto. congruence. intro r0.
+          destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+          subst; rewrite Regmap.gss; simpl.
+          inversion_SOME args; intros ARGS.
+          2: { exploit seval_list_sval_inj_not_none; eauto; intuition congruence. }
+          exploit seval_list_sval_inj; eauto. intro ARGS'. erewrite ARGS in ARGS'. inv ARGS'. rewrite ADD.
+          inversion_SOME m2. intro SMEM.
+          assert (m = m2) by congruence. subst. rewrite LOAD. reflexivity.
+      - explore_destruct; unfold sabort, sabort_local; simpl.
+        * unfold sabort. simpl. left. constructor; auto.
+          right. right. exists r. simpl. destruct (Pos.eq_dec r r); try congruence.
+          simpl. erewrite seval_list_sval_inj; simpl; auto.
+          rewrite ADD; simpl; auto. try_simplify_someHyps.
+        * unfold ssem_internal. simpl. constructor; [|constructor]; auto.
+          constructor; constructor; simpl; auto. congruence. intro r0.
+          destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+          subst; rewrite Regmap.gss; simpl.
+          erewrite seval_list_sval_inj; simpl; auto.
+          try_simplify_someHyps.
+     } { rewrite ADD. destruct t.
+          - simpl. left; eauto. simpl. econstructor; eauto.
+            right. right. simpl. exists r. destruct (Pos.eq_dec r r); [|contradiction].
+            simpl. inversion_SOME args. intro SLS.
+            eapply seval_list_sval_inj in REG. rewrite REG in SLS. inv SLS.
+            rewrite ADD. reflexivity.
+          - simpl. constructor; [|constructor]; simpl; auto.
+            constructor; simpl; constructor; auto; [congruence|].
+            intro r0. destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+            subst. simpl. rewrite Regmap.gss.
+            erewrite seval_list_sval_inj; simpl; auto.
+            try_simplify_someHyps.
+     }
+  + (* STORE *)
+    inversion_SOME a0; intros ADD.
+    { inversion_SOME m'; intros STORE; simpl.
+      - unfold ssem_internal, ssem_local; simpl; intuition.
+        * congruence.
+        * erewrite seval_list_sval_inj; simpl; auto.
+          erewrite REG.
+          try_simplify_someHyps.
+      - unfold sabort, sabort_local; simpl.
+        left. constructor; auto. right. left.
+        erewrite seval_list_sval_inj; simpl; auto.
+        erewrite REG.
+        try_simplify_someHyps. }
+    { unfold sabort, sabort_local; simpl.
+      left. constructor; auto. right. left.
+      erewrite seval_list_sval_inj; simpl; auto.
+      erewrite ADD; simpl; auto. }
+  + (* COND *)
+    Local Hint Resolve is_tail_refl: core.
+    Local Hint Unfold ssem_local: core.
+    inversion_SOME b; intros COND.
+    { destruct b; simpl; unfold ssem_internal, ssem_local; simpl.
+      - remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
+        constructor; constructor; subst; simpl; auto.
+        unfold seval_condition. subst; simpl.
+        erewrite seval_list_sval_inj; simpl; auto.
+        try_simplify_someHyps.
+      - intuition. unfold all_fallthrough in * |- *. simpl.
+        intuition. subst. simpl.
+        unfold seval_condition.
+        erewrite seval_list_sval_inj; simpl; auto.
+        try_simplify_someHyps. }
+    { unfold sabort. simpl. right.
+      remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
+      constructor; [constructor; subst; simpl; auto|].
+      left. subst; simpl; auto.
+      unfold seval_condition.
+      erewrite seval_list_sval_inj; simpl; auto.
+      try_simplify_someHyps. }
+Qed.
+
+
+Lemma siexec_inst_correct_None ge sp i st rs0 m0 rs m:
+  ssem_local ge sp (st.(si_local)) rs0 m0 rs m ->
+  siexec_inst i st = None -> 
+  istep ge i sp rs m = None.
+Proof.
+  intros (PRE & MEM & REG).
+  destruct i; simpl; unfold sist_set_local, ssem_internal, ssem_local; simpl; try_simplify_someHyps.
+Qed.
+
+(** * Symbolic execution of the internal steps of a path *)
+Fixpoint siexec_path (path:nat) (f: function) (st: sistate): option sistate :=
+  match path with
+  | O => Some st
+  | S p =>
+    SOME i <- (fn_code f)!(st.(si_pc)) IN
+    SOME st1 <- siexec_inst i st IN
+    siexec_path p f st1
+  end.
+
+Lemma siexec_inst_add_exits i st st':
+  siexec_inst i st = Some st' ->
+  ( si_exits st' = si_exits st \/ exists ext, si_exits st' = ext :: si_exits st ).
+Proof.
+  destruct i; simpl; intro SISTEP; inversion_clear SISTEP; unfold siexec_inst; simpl; (discriminate || eauto).
+Qed.
+
+Lemma siexec_inst_preserves_allfu ge sp ext lx rs0 m0 st st' i:
+  all_fallthrough_upto_exit ge sp ext lx (si_exits st) rs0 m0 ->
+  siexec_inst i st = Some st' ->
+  all_fallthrough_upto_exit ge sp ext lx (si_exits st') rs0 m0.
+Proof.
+  intros ALLFU SISTEP. destruct ALLFU as (ISTAIL & ALLF).
+  constructor; eauto.
+  destruct i; simpl in SISTEP; inversion_clear SISTEP; simpl; (discriminate || eauto).
+Qed.
+
+Lemma siexec_path_correct_false ge sp f rs0 m0 st' is:
+  forall path,
+  is.(icontinue)=false ->
+  forall st, ssem_internal ge sp st rs0 m0 is ->
+  siexec_path path f st = Some st' ->
+  ssem_internal ge sp st' rs0 m0 is.
+Proof.
+  induction path; simpl.
+  - intros. congruence.
+  - intros ICF st SSEM STEQ'.
+    destruct ((fn_code f) ! (si_pc st)) eqn:FIC; [|discriminate].
+    destruct (siexec_inst _ _) eqn:SISTEP; [|discriminate].
+    eapply IHpath. 3: eapply STEQ'. eauto.
+    unfold ssem_internal in SSEM. rewrite ICF in SSEM.
+    destruct SSEM as (ext & lx & SEXIT & ALLFU).
+    unfold ssem_internal. rewrite ICF. exists ext, lx.
+    constructor; auto. eapply siexec_inst_preserves_allfu; eauto.
+Qed.
+
+Lemma siexec_path_preserves_sabort ge sp path f rs0 m0 st': forall st, 
+  siexec_path path f st = Some st' ->
+  sabort ge sp st rs0 m0 -> sabort ge sp st' rs0 m0.
+Proof.
+  Local Hint Resolve siexec_inst_preserves_sabort: core.
+  induction path; simpl.
+  + unfold sist_set_local; try_simplify_someHyps.
+  + intros st; inversion_SOME i.
+    inversion_SOME st1; eauto.
+Qed.
+
+Lemma siexec_path_WF path f: forall st,
+  siexec_path path f st = None -> nth_default_succ (fn_code f) path st.(si_pc) = None.
+Proof.
+  induction path; simpl.
+  + unfold sist_set_local. intuition congruence.
+  + intros st; destruct ((fn_code f) ! (si_pc st)); simpl; try tauto.
+    destruct (siexec_inst i st) as [st1|] eqn: Hst1; simpl.
+    - intros; erewrite siexec_inst_default_succ; eauto.
+    - intros; erewrite siexec_inst_WF; eauto.
+Qed.
+
+Lemma siexec_path_default_succ path f st': forall st,
+  siexec_path path f st = Some st' -> nth_default_succ (fn_code f) path st.(si_pc) = Some st'.(si_pc).
+Proof.
+  induction path; simpl.
+  + unfold sist_set_local. intros st H. inversion_clear H; simpl; try congruence.
+  + intros st; destruct ((fn_code f) ! (si_pc st)); simpl; try congruence.
+    destruct (siexec_inst i st) as [st1|] eqn: Hst1; simpl; try congruence.
+    intros; erewrite siexec_inst_default_succ; eauto.
+Qed.
+
+Lemma siexec_path_correct_true ge sp path (f:function) rs0 m0: forall st is,
+  is.(icontinue)=true ->
+  ssem_internal ge sp st rs0 m0 is -> 
+  nth_default_succ (fn_code f) path st.(si_pc) <> None ->
+  ssem_internal_opt2 ge sp (siexec_path path f st) rs0 m0
+                         (isteps ge path f sp is.(irs) is.(imem) is.(ipc))
+  .
+Proof.
+  Local Hint Resolve siexec_path_correct_false siexec_path_preserves_sabort siexec_path_WF: core.
+  induction path; simpl.
+  + intros st is CONT INV WF;
+    unfold ssem_internal, sist_set_local in * |- *;
+    try_simplify_someHyps. simpl.
+    destruct is; simpl in * |- *; subst; intuition auto.
+  + intros st is CONT; unfold ssem_internal at 1; rewrite CONT.
+    intros (LOCAL & PC & NYE) WF.
+    rewrite <- PC.
+    inversion_SOME i; intro Hi; rewrite Hi in WF |- *; simpl; auto.
+    exploit siexec_inst_correct; eauto. 
+    inversion_SOME st1; intros Hst1; erewrite Hst1; simpl.
+    - inversion_SOME is1; intros His1;rewrite His1; simpl. 
+      * destruct (icontinue is1) eqn:CONT1.
+        (* icontinue is0 = true *)
+        intros; eapply IHpath; eauto.
+        destruct i; simpl in * |- *; unfold sist_set_local in * |- *; try_simplify_someHyps.
+        (* icontinue is0 = false -> EARLY EXIT *)
+        destruct (siexec_path path f st1) as [st2|] eqn: Hst2; simpl; eauto.
+        destruct WF. erewrite siexec_inst_default_succ; eauto.
+        (* try_simplify_someHyps; eauto. *)
+      * destruct (siexec_path path f st1) as [st2|] eqn: Hst2; simpl; eauto.
+    - intros His1;rewrite His1; simpl; auto.
+Qed.
+
+(** REM: in the following two unused lemmas *)
+
+Lemma siexec_path_right_assoc_decompose f path: forall st st',
+  siexec_path (S path) f st = Some st' ->
+  exists st0, siexec_path path f st = Some st0 /\ siexec_path 1%nat f st0 = Some st'.
+Proof.
+  induction path; simpl; eauto.
+  intros st st'.
+  inversion_SOME i1.
+  inversion_SOME st1.
+  try_simplify_someHyps; eauto.
+Qed.
+
+Lemma siexec_path_right_assoc_compose f path: forall st st0 st',
+  siexec_path path f st = Some st0 ->
+  siexec_path 1%nat f st0 = Some st' ->
+  siexec_path (S path) f st = Some st'.
+Proof.
+  induction path.
+  + intros st st0 st' H. simpl in H.
+    try_simplify_someHyps; auto.
+  + intros st st0 st'.
+    assert (X:exists x, x=(S path)); eauto.
+    destruct X as [x X]. 
+    intros H1 H2. rewrite <- X.
+    generalize H1; clear H1. simpl.
+    inversion_SOME i1. intros Hi1; rewrite Hi1.
+    inversion_SOME st1. intros Hst1; rewrite Hst1.
+    subst; eauto.
+Qed.
+
+(** * Symbolic (final) value of a path *)
+Inductive sfval :=
+  | Snone
+  | Scall (sig:signature) (svos: sval + ident) (lsv:list_sval) (res:reg) (pc:node)
+    (* NB: [res] the return register is hard-wired ! Is it restrictive ? *)
+  | Stailcall: signature -> sval + ident -> list_sval -> sfval
+  | Sbuiltin (ef:external_function) (sargs: list (builtin_arg sval)) (res: builtin_res reg) (pc:node)
+  | Sjumptable (sv: sval) (tbl: list node)
+  | Sreturn: option sval -> sfval
+.
+
+Definition sfind_function (pge: RTLpath.genv) (ge: RTL.genv) (sp: val) (svos : sval + ident) (rs0: regset) (m0: mem): option fundef :=
+  match svos with
+  | inl sv => SOME v <- seval_sval ge sp sv rs0 m0 IN Genv.find_funct pge v
+  | inr symb => SOME b <- Genv.find_symbol pge symb IN Genv.find_funct_ptr pge b
+  end.
+
+Section SEVAL_BUILTIN_ARG. (* adapted from Events.v *)
+
+Variable ge: RTL.genv.
+Variable sp: val.
+Variable m: mem.
+Variable rs0: regset.
+Variable m0: mem.
+
+Inductive seval_builtin_arg: builtin_arg sval -> val -> Prop :=
+  | seval_BA: forall x v,
+      seval_sval ge sp x rs0 m0 = Some v ->
+      seval_builtin_arg (BA x) v
+  | seval_BA_int: forall n,
+      seval_builtin_arg (BA_int n) (Vint n)
+  | seval_BA_long: forall n,
+      seval_builtin_arg (BA_long n) (Vlong n)
+  | seval_BA_float: forall n,
+      seval_builtin_arg (BA_float n) (Vfloat n)
+  | seval_BA_single: forall n,
+      seval_builtin_arg (BA_single n) (Vsingle n)
+  | seval_BA_loadstack: forall chunk ofs v,
+      Mem.loadv chunk m (Val.offset_ptr sp ofs) = Some v ->
+      seval_builtin_arg (BA_loadstack chunk ofs) v
+  | seval_BA_addrstack: forall ofs,
+      seval_builtin_arg (BA_addrstack ofs) (Val.offset_ptr sp ofs)
+  | seval_BA_loadglobal: forall chunk id ofs v,
+      Mem.loadv chunk m (Senv.symbol_address ge id ofs) = Some v ->
+      seval_builtin_arg (BA_loadglobal chunk id ofs) v
+  | seval_BA_addrglobal: forall id ofs,
+      seval_builtin_arg (BA_addrglobal id ofs) (Senv.symbol_address ge id ofs)
+  | seval_BA_splitlong: forall hi lo vhi vlo,
+      seval_builtin_arg hi vhi -> seval_builtin_arg lo vlo ->
+      seval_builtin_arg (BA_splitlong hi lo) (Val.longofwords vhi vlo)
+  | seval_BA_addptr: forall a1 a2 v1 v2,
+      seval_builtin_arg a1 v1 -> seval_builtin_arg a2 v2 ->
+      seval_builtin_arg (BA_addptr a1 a2)
+                       (if Archi.ptr64 then Val.addl v1 v2 else Val.add v1 v2).
+
+Definition seval_builtin_args (al: list (builtin_arg sval)) (vl: list val) : Prop :=
+  list_forall2 seval_builtin_arg al vl.
+
+Lemma seval_builtin_arg_determ:
+  forall a v, seval_builtin_arg a v -> forall v', seval_builtin_arg a v' -> v' = v.
+Proof.
+  induction 1; intros v' EV; inv EV; try congruence.
+  f_equal; eauto.
+  apply IHseval_builtin_arg1 in H3. apply IHseval_builtin_arg2 in H5. subst; auto. 
+Qed.
+
+Lemma eval_builtin_args_determ:
+  forall al vl, seval_builtin_args al vl -> forall vl', seval_builtin_args al vl' -> vl' = vl.
+Proof.
+  induction 1; intros v' EV; inv EV; f_equal; eauto using seval_builtin_arg_determ.
+Qed.
+
+End SEVAL_BUILTIN_ARG.
+
+Inductive ssem_final (pge: RTLpath.genv) (ge: RTL.genv) (sp:val) (npc: node) stack (f: function) (rs0: regset) (m0: mem): sfval -> regset -> mem -> trace -> state -> Prop :=
+  | exec_Snone rs m:
+      ssem_final pge ge sp npc stack f rs0 m0 Snone rs m E0 (State stack f sp npc rs m)
+  | exec_Scall rs m sig svos lsv args res pc fd:
+      sfind_function pge ge sp svos rs0 m0 = Some fd ->
+      funsig fd = sig ->
+      seval_list_sval ge sp lsv rs0 m0 = Some args ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Scall sig svos lsv res pc) rs m
+        E0 (Callstate (Stackframe res f sp pc rs :: stack) fd args m)
+  | exec_Stailcall stk rs m sig svos args fd m' lsv:
+      sfind_function pge ge sp svos rs0 m0 = Some fd ->
+      funsig fd = sig ->
+      sp = Vptr stk Ptrofs.zero ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      seval_list_sval ge sp lsv rs0 m0 = Some args ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Stailcall sig svos lsv) rs m
+        E0 (Callstate stack fd args m')
+  | exec_Sbuiltin m' rs m vres res pc t sargs ef vargs:
+      seval_builtin_args ge sp m rs0 m0 sargs vargs ->
+      external_call ef ge vargs m t vres m' ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Sbuiltin ef sargs res pc) rs m
+        t (State stack f sp pc (regmap_setres res vres rs) m')
+  | exec_Sjumptable sv tbl pc' n rs m:
+      seval_sval ge sp sv rs0 m0 = Some (Vint n) ->
+      list_nth_z tbl (Int.unsigned n) = Some pc' ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Sjumptable sv tbl) rs m
+        E0 (State stack f sp pc' rs m)
+  | exec_Sreturn stk osv rs m m' v:
+      sp = (Vptr stk Ptrofs.zero) ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      match osv with Some sv => seval_sval ge sp sv rs0 m0 | None => Some Vundef end = Some v ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Sreturn osv) rs m 
+         E0 (Returnstate stack v m')
+.
+
+Record sstate := { internal:> sistate; final: sfval }.
+
+Inductive ssem pge (ge: RTL.genv) (sp:val) (st: sstate) stack f (rs0: regset) (m0: mem): trace -> state -> Prop :=
+  | ssem_early is:
+     is.(icontinue) = false ->
+     ssem_internal ge sp st rs0 m0 is -> 
+     ssem pge ge sp st stack f rs0 m0 E0 (State stack f sp is.(ipc) is.(irs) is.(imem))
+  | ssem_normal is t s:
+     is.(icontinue) = true ->
+     ssem_internal ge sp st rs0 m0 is ->  
+     ssem_final pge ge sp st.(si_pc) stack f rs0 m0 st.(final) is.(irs) is.(imem) t s ->
+     ssem pge ge sp st stack f rs0 m0 t s 
+  .
+
+(* NB: generic function that could be put into [AST] file *)
+Fixpoint builtin_arg_map {A B} (f: A -> B) (arg: builtin_arg A) : builtin_arg B :=
+  match arg with
+  | BA x => BA (f x)
+  | BA_int n => BA_int n
+  | BA_long n => BA_long n
+  | BA_float f => BA_float f
+  | BA_single s => BA_single s
+  | BA_loadstack chunk ptr => BA_loadstack chunk ptr
+  | BA_addrstack ptr => BA_addrstack ptr
+  | BA_loadglobal chunk id ptr => BA_loadglobal chunk id ptr
+  | BA_addrglobal id ptr => BA_addrglobal id ptr
+  | BA_splitlong ba1 ba2 => BA_splitlong (builtin_arg_map f ba1) (builtin_arg_map f ba2)
+  | BA_addptr ba1 ba2 => BA_addptr (builtin_arg_map f ba1) (builtin_arg_map f ba2)
+  end.
+
+Lemma seval_builtin_arg_correct ge sp rs m rs0 m0 sreg: forall arg varg,
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  eval_builtin_arg ge (fun r => rs # r) sp m arg varg ->
+  seval_builtin_arg ge sp m rs0 m0 (builtin_arg_map sreg arg) varg.
+Proof.
+  induction arg.
+  all: try (intros varg SEVAL BARG; inv BARG; constructor; congruence).
+  - intros varg SEVAL BARG. inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+  - intros varg SEVAL BARG. inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+Qed.
+
+Lemma seval_builtin_args_correct ge sp rs m rs0 m0 sreg args vargs:
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  eval_builtin_args ge (fun r => rs # r) sp m args vargs ->
+  seval_builtin_args ge sp m rs0 m0 (map (builtin_arg_map sreg) args) vargs.
+Proof.
+  induction 2.
+  - constructor.
+  - simpl. constructor; [| assumption].
+    eapply seval_builtin_arg_correct; eauto.
+Qed.
+
+Lemma seval_builtin_arg_complete ge sp rs m rs0 m0 sreg: forall arg varg,
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  seval_builtin_arg ge sp m rs0 m0 (builtin_arg_map sreg arg) varg ->
+  eval_builtin_arg ge (fun r => rs # r) sp m arg varg.
+Proof.
+  induction arg.
+  all: intros varg SEVAL BARG; try (inv BARG; constructor; congruence).
+  - inv BARG. rewrite SEVAL in H0. inv H0. constructor.
+  - inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+  - inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+Qed.
+
+Lemma seval_builtin_args_complete ge sp rs m rs0 m0 sreg: forall args vargs,
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  seval_builtin_args ge sp m rs0 m0 (map (builtin_arg_map sreg) args) vargs ->
+  eval_builtin_args ge (fun r => rs # r) sp m args vargs.
+Proof.
+  induction args.
+  - simpl. intros. inv H0. constructor.
+  - intros vargs SEVAL BARG. simpl in BARG. inv BARG.
+    constructor; [| eapply IHargs; eauto].
+    eapply seval_builtin_arg_complete; eauto.
+Qed.
+
+(** * Symbolic execution of final step *)
+Definition sexec_final (i: instruction) (prev: sistate_local): sfval := 
+  match i with
+  | Icall sig ros args res pc => 
+    let svos := sum_left_map prev.(si_sreg) ros in
+    let sargs := list_sval_inj (List.map prev.(si_sreg) args) in
+    Scall sig svos sargs res pc
+  | Itailcall sig ros args =>
+    let svos := sum_left_map prev.(si_sreg) ros in
+    let sargs := list_sval_inj (List.map prev.(si_sreg) args) in
+    Stailcall sig svos sargs
+  | Ibuiltin ef args res pc =>
+    let sargs := List.map (builtin_arg_map prev.(si_sreg)) args in
+    Sbuiltin ef sargs res pc
+  | Ireturn or => 
+    let sor := SOME r <- or IN Some (prev.(si_sreg) r) in
+    Sreturn sor
+  | Ijumptable reg tbl =>
+    let sv := prev.(si_sreg) reg in
+    Sjumptable sv tbl
+  | _ => Snone
+  end.
+
+Lemma sexec_final_correct pge ge sp i (f:function) pc st stack rs0 m0 t rs m s:
+  (fn_code f) ! pc = Some i ->
+  pc = st.(si_pc) ->
+  ssem_local ge sp (si_local st) rs0 m0 rs m ->
+  path_last_step ge pge stack f sp pc rs m t s ->
+  siexec_inst i st = None -> 
+  ssem_final pge ge sp pc stack f rs0 m0 (sexec_final i (si_local st)) rs m t s.
+Proof.
+  intros PC1 PC2 (PRE&MEM&REG) LAST. destruct LAST; subst; try_simplify_someHyps; simpl.
+  + (* Snone *) intro Hi; destruct i; simpl in Hi |- *; unfold sist_set_local in Hi; try congruence.
+  + (* Icall *) intros; eapply exec_Scall; auto.
+    - destruct ros; simpl in * |- *; auto.
+      rewrite REG; auto.
+    - erewrite seval_list_sval_inj; simpl; auto.
+  + (* Itailcall *) intros. eapply exec_Stailcall; auto.
+    - destruct ros; simpl in * |- *; auto.
+      rewrite REG; auto.
+    - erewrite seval_list_sval_inj; simpl; auto.
+  + (* Ibuiltin *) intros. eapply exec_Sbuiltin; eauto.
+    eapply seval_builtin_args_correct; eauto.
+  + (* Ijumptable *) intros. eapply exec_Sjumptable; eauto. congruence.
+  + (* Ireturn *) intros; eapply exec_Sreturn; simpl; eauto.
+    destruct or; simpl; auto.
+Qed.
+
+Lemma sexec_final_complete i (f:function) pc st ge pge sp stack rs0 m0 t rs m s:
+  (fn_code f) ! pc = Some i ->
+  pc = st.(si_pc) ->
+  ssem_local ge sp (si_local st) rs0 m0 rs m ->
+  ssem_final pge ge sp pc stack f rs0 m0 (sexec_final i (si_local st)) rs m t s ->
+  siexec_inst i st = None -> 
+  path_last_step ge pge stack f sp pc rs m t s.
+Proof.
+  intros PC1 PC2 (PRE&MEM&REG) LAST HSIS.
+  destruct i as [ (* Inop *) | (* Iop *) | (* Iload *) | (* Istore *)
+    | (* Icall *) sig ros args res pc'
+    | (* Itailcall *) sig ros args
+    | (* Ibuiltin *) ef bargs br pc'
+    | (* Icond *)
+    | (* Ijumptable *) jr tbl
+    | (*Ireturn*) or]; 
+    subst; try_simplify_someHyps; try (unfold sist_set_local in HSIS; try congruence);
+    inversion LAST; subst; clear LAST; simpl in * |- *.
+  + (* Icall *)
+    erewrite seval_list_sval_inj in * |- ; simpl; try_simplify_someHyps; auto.
+    intros; eapply exec_Icall; eauto.
+    destruct ros; simpl in * |- *; auto.
+    rewrite REG in * |- ; auto.
+  + (* Itailcall *)
+    intros HPC SMEM. erewrite seval_list_sval_inj in H10; auto. inv H10. 
+    eapply exec_Itailcall; eauto.
+    destruct ros; simpl in * |- *; auto.
+    rewrite REG in * |- ; auto.
+  + (* Ibuiltin *) intros HPC SMEM.
+    eapply exec_Ibuiltin; eauto.
+    eapply seval_builtin_args_complete; eauto.
+  + (* Ijumptable *) intros HPC SMEM.
+    eapply exec_Ijumptable; eauto.
+    congruence.
+  + (* Ireturn *)
+    intros; subst. enough (v=regmap_optget or Vundef rs) as ->.
+    * eapply exec_Ireturn; eauto.
+    * intros; destruct or; simpl; congruence.
+Qed.
+
+(** * Main function of the symbolic execution *)
+
+Definition init_sistate_local := {| si_pre:= fun _ _ _ _ => True; si_sreg:= fun r => Sinput r; si_smem:= Sinit |}.
+
+Definition init_sistate pc := {| si_pc:= pc; si_exits:=nil; si_local:= init_sistate_local |}.
+
+Lemma init_ssem_internal ge sp pc rs m: ssem_internal ge sp (init_sistate pc) rs m (mk_istate true pc rs m).
+Proof.
+  unfold ssem_internal, ssem_local, all_fallthrough; simpl. intuition.
+Qed.
+
+Definition sexec (f: function) (pc:node): option sstate :=
+  SOME path <- (fn_path f)!pc IN
+  SOME st <- siexec_path path.(psize) f (init_sistate pc) IN
+  SOME i <- (fn_code f)!(st.(si_pc)) IN
+  Some (match siexec_inst i st with
+       | Some st' => {| internal := st'; final := Snone |}
+       | None => {| internal := st; final := sexec_final i st.(si_local) |}
+       end).
+
+Lemma final_node_path_simpl f path pc:
+   (fn_path f)!pc = Some path -> nth_default_succ_inst (fn_code f) path.(psize) pc <> None. 
+Proof.
+  intros; exploit final_node_path; eauto.
+  intros (i & NTH & DUM).
+  congruence.
+Qed.
+
+Lemma symb_path_last_step i st st' ge pge stack (f:function) sp pc rs m t s: 
+  (fn_code f) ! pc = Some i ->
+  pc = st.(si_pc) ->
+  siexec_inst i st = Some st' ->
+  path_last_step ge pge stack f sp pc rs m t s ->
+  exists mk_istate, 
+     istep ge i sp rs m = Some mk_istate 
+  /\ t = E0 
+  /\ s = (State stack f sp mk_istate.(ipc) mk_istate.(RTLpath.irs) mk_istate.(imem)).
+Proof.
+  intros PC1 PC2 Hst' LAST; destruct LAST; subst; try_simplify_someHyps; simpl.
+Qed.
+
+(* NB: each concrete execution can be executed on the symbolic state (produced from [sexec]) 
+(sexec is a correct over-approximation)
+*)
+Theorem sexec_correct f pc pge ge sp path stack rs m t s: 
+  (fn_path f)!pc = Some path ->
+  path_step ge pge path.(psize) stack f sp rs m pc t s ->
+  exists st, sexec f pc = Some st /\ ssem pge ge sp st stack f rs m t s.
+Proof.
+  Local Hint Resolve init_ssem_internal: core.
+  intros PATH STEP; unfold sexec; rewrite PATH; simpl.
+  lapply (final_node_path_simpl f path pc); eauto. intro WF.
+  exploit (siexec_path_correct_true ge sp path.(psize) f rs m (init_sistate pc) (mk_istate true pc rs m)); simpl; eauto.
+  { intros ABS. apply WF; unfold nth_default_succ_inst. rewrite ABS; auto. }
+  (destruct (nth_default_succ_inst (fn_code f) path.(psize) pc) as [i|] eqn: Hi; [clear WF|congruence]).
+  destruct STEP as [sti STEPS CONT|sti t s STEPS CONT LAST];
+  (* intro Hst *)
+  (rewrite STEPS; unfold ssem_internal_opt2; destruct (siexec_path _ _ _) as [st|] eqn: Hst; try congruence);
+  (* intro SEM *)
+  (simpl; unfold ssem_internal; simpl; rewrite CONT; intro SEM);
+  (* intro Hi' *)
+  ( assert (Hi': (fn_code f) ! (si_pc st) = Some i); 
+    [ unfold nth_default_succ_inst in Hi; 
+      exploit siexec_path_default_succ; eauto; simpl;
+      intros DEF; rewrite DEF in Hi; auto 
+      | clear Hi; rewrite Hi' ]);
+  (* eexists *)
+  (eexists; constructor; eauto).
+  - (* early *)
+    eapply ssem_early; eauto.
+    unfold ssem_internal; simpl; rewrite CONT.
+    destruct (siexec_inst i st) as [st'|] eqn: Hst'; simpl; eauto.
+    destruct SEM as (ext & lx & SEM & ALLFU). exists ext, lx.
+    constructor; auto. eapply siexec_inst_preserves_allfu; eauto.
+  - destruct SEM as (SEM & PC & HNYE).
+    destruct (siexec_inst i st) as [st'|] eqn: Hst'; simpl.
+    + (* normal on Snone *)
+      rewrite <- PC in LAST.
+      exploit symb_path_last_step; eauto; simpl.
+      intros (mk_istate & ISTEP & Ht & Hs); subst.
+      exploit siexec_inst_correct; eauto. simpl.
+      erewrite Hst', ISTEP; simpl.
+      clear LAST CONT STEPS PC SEM HNYE Hst Hi' Hst' ISTEP st sti i.
+      intro SEM; destruct (mk_istate.(icontinue)) eqn: CONT.
+      { (* icontinue mk_istate = true *)
+        eapply ssem_normal; simpl; eauto.
+        unfold ssem_internal in SEM.
+        rewrite CONT in SEM.
+        destruct SEM as (SEM & PC & HNYE).
+        rewrite <- PC.
+        eapply exec_Snone. }
+      { eapply ssem_early; eauto. }
+    + (* normal non-Snone instruction *) 
+      eapply ssem_normal; eauto.
+      * unfold ssem_internal; simpl; rewrite CONT; intuition.
+      * simpl. eapply sexec_final_correct; eauto.
+        rewrite PC; auto.
+Qed.
+
+(* TODO: déplacer les trucs sur equiv_stackframe dans RTLpath ? *)
+Inductive equiv_stackframe: stackframe -> stackframe -> Prop :=
+  | equiv_stackframe_intro res f sp pc rs1 rs2
+      (EQUIV: forall r : positive, rs1 !! r = rs2 !! r):
+      equiv_stackframe (Stackframe res f sp pc rs1) (Stackframe res f sp pc rs2).
+
+Inductive equiv_state: state -> state -> Prop :=
+  | State_equiv stack f sp pc rs1 m rs2
+     (EQUIV: forall r, rs1#r = rs2#r): 
+     equiv_state (State stack f sp pc rs1 m) (State stack f sp pc rs2 m)
+  | Call_equiv stk stk' f args m
+      (STACKS: list_forall2 equiv_stackframe stk stk'):
+      equiv_state (Callstate stk f args m) (Callstate stk' f args m)
+  | Return_equiv stk stk' v m
+      (STACKS: list_forall2 equiv_stackframe stk stk'):
+      equiv_state (Returnstate stk v m) (Returnstate stk' v m).
+
+Lemma equiv_stackframe_refl stf: equiv_stackframe stf stf.
+Proof.
+  destruct stf. constructor; auto.
+Qed.
+
+Lemma equiv_stack_refl stk: list_forall2 equiv_stackframe stk stk.
+Proof.
+  Local Hint Resolve equiv_stackframe_refl: core.
+  induction stk; simpl; constructor; auto.
+Qed.
+
+Lemma equiv_state_refl s: equiv_state s s.
+Proof.
+  Local Hint Resolve equiv_stack_refl: core.
+  induction s; simpl; constructor; auto.
+Qed.
+
+(*
+Lemma equiv_stackframe_trans stf1 stf2 stf3:
+  equiv_stackframe stf1 stf2 -> equiv_stackframe stf2 stf3 -> equiv_stackframe stf1 stf3.
+Proof.
+  destruct 1; intros EQ; inv EQ; try econstructor; eauto.
+  intros; eapply eq_trans; eauto.
+Qed.
+
+Lemma equiv_stack_trans stk1 stk2:
+  list_forall2 equiv_stackframe stk1 stk2 -> 
+  forall stk3, list_forall2 equiv_stackframe stk2 stk3 -> 
+  list_forall2 equiv_stackframe stk1 stk3.
+Proof.
+  Local Hint Resolve equiv_stackframe_trans.
+  induction 1; intros stk3 EQ; inv EQ; econstructor; eauto.
+Qed.
+
+Lemma equiv_state_trans s1 s2 s3: equiv_state s1 s2 -> equiv_state s2 s3 -> equiv_state s1 s3.
+Proof.
+  Local Hint Resolve equiv_stack_trans.
+  destruct 1; intros EQ; inv EQ; econstructor; eauto.
+  intros; eapply eq_trans; eauto.
+Qed.
+*)
+
+Lemma regmap_setres_eq (rs rs': regset) res vres:
+  (forall r, rs # r = rs' # r) ->
+  forall r, (regmap_setres res vres rs) # r = (regmap_setres res vres rs') # r.
+Proof.
+  intros RSEQ r. destruct res; simpl; try congruence.
+  destruct (peq x r).
+  - subst. repeat (rewrite Regmap.gss). reflexivity.
+  - repeat (rewrite Regmap.gso); auto.
+Qed.
+
+Lemma ssem_final_equiv pge ge sp (f:function) st sv stack rs0 m0 t rs1 rs2 m s:
+  ssem_final pge ge sp st stack f rs0 m0 sv rs1 m t s ->
+  (forall r, rs1#r = rs2#r) -> 
+  exists s', equiv_state s s' /\ ssem_final pge ge sp st stack f rs0 m0 sv rs2 m t s'.
+Proof. 
+  Local Hint Resolve equiv_stack_refl: core.
+  destruct 1.
+  - (* Snone *) intros; eexists; econstructor.
+    + eapply State_equiv; eauto.
+    + eapply exec_Snone.
+  - (* Scall *)
+    intros; eexists; econstructor.
+    2: { eapply exec_Scall; eauto. }
+    apply Call_equiv; auto.
+    repeat (constructor; auto).
+  - (* Stailcall *)
+    intros; eexists; econstructor; [| eapply exec_Stailcall; eauto].
+    apply Call_equiv; auto.
+  - (* Sbuiltin *)
+    intros; eexists; econstructor; [| eapply exec_Sbuiltin; eauto].
+    constructor. eapply regmap_setres_eq; eauto.
+  - (* Sjumptable *)
+    intros; eexists; econstructor; [| eapply exec_Sjumptable; eauto].
+    constructor. assumption.
+  - (* Sreturn *)
+    intros; eexists; econstructor; [| eapply exec_Sreturn; eauto].
+    eapply equiv_state_refl; eauto.
+Qed.
+
+Lemma siexec_inst_early_exit_absurd i st st' ge sp rs m rs' m' pc':
+  siexec_inst i st = Some st' ->
+  (exists ext lx, ssem_exit ge sp ext rs m rs' m' pc' /\
+     all_fallthrough_upto_exit ge sp ext lx (si_exits st) rs m) ->
+  all_fallthrough ge sp (si_exits st') rs m ->
+  False.
+Proof.
+  intros SIEXEC (ext & lx & SSEME & ALLFU) ALLF. destruct ALLFU as (TAIL & _).
+  exploit siexec_inst_add_exits; eauto. destruct 1 as [SIEQ | (ext0 & SIEQ)].
+  - rewrite SIEQ in *. eapply all_fallthrough_noexit. eauto. 2: eapply ALLF. eapply is_tail_in. eassumption.
+  - rewrite SIEQ in *. eapply all_fallthrough_noexit. eauto. 2: eapply ALLF. eapply is_tail_in.
+    constructor. eassumption.
+Qed.
+
+Lemma is_tail_false {A: Type}: forall (l: list A) a, is_tail (a::l) nil -> False.
+Proof.
+  intros. eapply is_tail_incl in H. unfold incl in H. pose (H a).
+  assert (In a (a::l)) by (constructor; auto). assert (In a nil) by auto. apply in_nil in H1.
+  contradiction.
+Qed.
+
+Lemma cons_eq_false {A: Type}: forall (l: list A) a,
+  a :: l = l -> False.
+Proof.
+  induction l; intros.
+  - discriminate.
+  - inv H. apply IHl in H2. contradiction.
+Qed.
+
+Lemma app_cons_nil_eq {A: Type}: forall l' l (a:A),
+  (l' ++ a :: nil) ++ l = l' ++ a::l.
+Proof.
+  induction l'; intros.
+  - simpl. reflexivity.
+  - simpl. rewrite IHl'. reflexivity. 
+Qed.
+
+Lemma app_eq_false {A: Type}: forall l (l': list A) a,
+  l' ++ a :: l = l -> False.
+Proof.
+  induction l; intros.
+  - apply app_eq_nil in H. destruct H as (_ & H). apply cons_eq_false in H. contradiction.
+  - destruct l' as [|a' l'].
+    + simpl in H. apply cons_eq_false in H. contradiction.
+    + rewrite <- app_comm_cons in H. inv H.
+      apply (IHl (l' ++ (a0 :: nil)) a). rewrite app_cons_nil_eq. assumption.
+Qed.
+
+Lemma is_tail_false_gen {A: Type}: forall (l: list A) l' a, is_tail (l'++(a::l)) l -> False.
+Proof.
+  induction l.
+  - intros. destruct l' as [|a' l'].
+    + simpl in H. apply is_tail_false in H. contradiction.
+    + rewrite <- app_comm_cons in H. apply is_tail_false in H. contradiction.
+  - intros. inv H.
+    + apply app_eq_false in H2. contradiction.
+    + apply (IHl (l' ++ (a0 :: nil)) a). rewrite app_cons_nil_eq. assumption.
+Qed.
+
+Lemma is_tail_eq {A: Type}: forall (l l': list A),
+  is_tail l' l ->
+  is_tail l l' ->
+  l = l'.
+Proof.
+  destruct l as [|a l]; intros l' ITAIL ITAIL'.
+  - destruct l' as [|i' l']; auto. apply is_tail_false in ITAIL. contradiction.
+  - inv ITAIL; auto.
+    destruct l' as [|i' l']. { apply is_tail_false in ITAIL'. contradiction. }
+    exploit is_tail_trans. eapply ITAIL'. eauto. intro ABSURD.
+    apply (is_tail_false_gen l nil a) in ABSURD. contradiction.
+Qed.
+
+(* NB: each execution of a symbolic state (produced from [sexec]) represents a concrete execution
+  (sexec is exact).
+*)
+Theorem sexec_exact f pc pge ge sp path stack st rs m t s1: 
+  (fn_path f)!pc = Some path ->
+  sexec f pc = Some st -> 
+  ssem pge ge sp st stack f rs m t s1 ->
+  exists s2, path_step ge pge path.(psize) stack f sp rs m pc t s2 /\ 
+             equiv_state s1 s2.
+Proof.
+  Local Hint Resolve init_ssem_internal: core.
+  unfold sexec; intros PATH SSTEP SEM; rewrite PATH in SSTEP.
+  lapply (final_node_path_simpl f path pc); eauto. intro WF.
+  exploit (siexec_path_correct_true ge sp path.(psize) f rs m (init_sistate pc) (mk_istate true pc rs m)); simpl; eauto.
+  { intros ABS. apply WF; unfold nth_default_succ_inst. rewrite ABS; auto. }
+  (destruct (nth_default_succ_inst (fn_code f) path.(psize) pc) as [i|] eqn: Hi; [clear WF|congruence]).
+  unfold nth_default_succ_inst in Hi.
+  destruct (siexec_path path.(psize) f (init_sistate pc)) as [st0|] eqn: Hst0; simpl.
+  2:{ (* absurd case *)
+      exploit siexec_path_WF; eauto.
+      simpl; intros NDS; rewrite NDS in Hi; congruence. }
+  exploit siexec_path_default_succ; eauto; simpl.
+  intros NDS; rewrite NDS in Hi.
+  rewrite Hi in SSTEP.
+  intros ISTEPS. try_simplify_someHyps.
+  destruct (siexec_inst i st0) as [st'|] eqn:Hst'; simpl.
+  + (* exit on Snone instruction *)
+    assert (SEM': t = E0 /\ exists is, ssem_internal ge sp st' rs m is
+           /\ s1 = (State stack f sp (if (icontinue is) then (si_pc st') else (ipc is)) (irs is) (imem is))).
+    {  destruct SEM as [is CONT SEM|is t s CONT SEM1 SEM2]; simpl in * |- *.
+       - repeat (econstructor; eauto).
+         rewrite CONT; eauto.
+       - inversion SEM2. repeat (econstructor; eauto).
+         rewrite CONT; eauto. }
+    clear SEM; subst. destruct SEM' as [X (is & SEM & X')]; subst.
+    intros.
+    destruct (isteps ge (psize path) f sp rs m pc) as [is0|] eqn:RISTEPS; simpl in *.
+    * unfold ssem_internal in ISTEPS. destruct (icontinue is0) eqn: ICONT0. 
+      ** (* icontinue is0=true: path_step by normal_exit *)
+         destruct ISTEPS as (SEMis0&H1&H2).
+         rewrite H1 in * |-.
+         exploit siexec_inst_correct; eauto.
+         rewrite Hst'; simpl.
+         intros; exploit ssem_internal_opt_determ; eauto.
+         destruct 1 as (st & Hst & EQ1 & EQ2 & EQ3 & EQ4).
+         eexists. econstructor 1.
+         *** eapply exec_normal_exit; eauto.
+             eapply exec_istate; eauto.
+         *** rewrite EQ1.
+             enough ((ipc st) = (if icontinue st then si_pc st' else ipc is)) as ->.
+             { rewrite EQ2, EQ4. eapply State_equiv; auto. }
+             destruct (icontinue st) eqn:ICONT; auto.
+             exploit siexec_inst_default_succ; eauto.
+             erewrite istep_normal_exit; eauto.
+             try_simplify_someHyps.
+      ** (* The concrete execution has not reached "i" => early exit *) 
+         unfold ssem_internal in SEM.
+         destruct (icontinue is) eqn:ICONT.
+         { destruct SEM as (SEML & SIPC & ALLF).
+           exploit siexec_inst_early_exit_absurd; eauto. contradiction. }
+         
+         eexists. econstructor 1.
+         *** eapply exec_early_exit; eauto.
+         *** destruct ISTEPS as (ext & lx & SSEME & ALLFU). destruct SEM as (ext' & lx' & SSEME' & ALLFU').
+             eapply siexec_inst_preserves_allfu in ALLFU; eauto.
+             exploit ssem_exit_fallthrough_upto_exit; eauto.
+             exploit ssem_exit_fallthrough_upto_exit. eapply SSEME. eapply ALLFU. eapply ALLFU'.
+             intros ITAIL ITAIL'. apply is_tail_eq in ITAIL; auto. clear ITAIL'.
+             inv ITAIL. exploit ssem_exit_determ. eapply SSEME. eapply SSEME'. intros (IPCEQ & IRSEQ & IMEMEQ).
+             rewrite <- IPCEQ. rewrite <- IMEMEQ. constructor. congruence. 
+    * (* The concrete execution has not reached "i" => abort case *)
+      eapply siexec_inst_preserves_sabort in ISTEPS; eauto.
+      exploit ssem_internal_exclude_sabort; eauto. contradiction.
+  + destruct SEM as [is CONT SEM|is t s CONT SEM1 SEM2]; simpl in * |- *.
+    - (* early exit *)
+      intros.
+      exploit ssem_internal_opt_determ; eauto.
+      destruct 1 as (st & Hst & EQ1 & EQ2 & EQ3 & EQ4).
+      eexists. econstructor 1.
+      * eapply exec_early_exit; eauto.
+      * rewrite EQ2, EQ4; eapply State_equiv. auto.
+    - (* normal exit non-Snone instruction *)
+      intros.
+      exploit ssem_internal_opt_determ; eauto.
+      destruct 1 as (st & Hst & EQ1 & EQ2 & EQ3 & EQ4).
+      unfold ssem_internal in SEM1.
+      rewrite CONT in SEM1. destruct SEM1 as (SEM1 & PC0 & NYE0).
+      exploit ssem_final_equiv; eauto.
+      clear SEM2; destruct 1 as (s' & Ms' & SEM2).
+      rewrite ! EQ4 in * |-; clear EQ4.
+      rewrite ! EQ2 in * |-; clear EQ2.
+      exists s'; intuition.
+      eapply exec_normal_exit; eauto.
+      eapply sexec_final_complete; eauto.
+      * congruence.
+      * unfold ssem_local in * |- *. 
+        destruct SEM1 as (A & B & C). constructor; [|constructor]; eauto.
+        intro r. congruence.
+      * congruence.
+Qed.
+
+(** * Simulation of RTLpath code w.r.t symbolic execution *)
+
+Section SymbValPreserved.
+
+Variable ge ge': RTL.genv.
+
+Hypothesis symbols_preserved_RTL: forall s, Genv.find_symbol ge' s = Genv.find_symbol ge s.
+
+Hypothesis senv_preserved_RTL: Senv.equiv ge ge'.
+
+Lemma senv_find_symbol_preserved id:
+  Senv.find_symbol ge id = Senv.find_symbol ge' id.
+Proof.
+  destruct senv_preserved_RTL as (A & B & C). congruence.
+Qed.
+
+Lemma senv_symbol_address_preserved id ofs:
+  Senv.symbol_address ge id ofs = Senv.symbol_address ge' id ofs.
+Proof.
+  unfold Senv.symbol_address. rewrite senv_find_symbol_preserved.
+  reflexivity.
+Qed.
+
+Lemma seval_preserved sp sv rs0 m0:
+  seval_sval ge sp sv rs0 m0 = seval_sval ge' sp sv rs0 m0.
+Proof.
+  Local Hint Resolve symbols_preserved_RTL: core.
+  induction sv using sval_mut with (P0 := fun lsv => seval_list_sval ge sp lsv rs0 m0 = seval_list_sval ge' sp lsv rs0 m0)
+                                   (P1 := fun sm => seval_smem ge sp sm rs0 m0 = seval_smem ge' sp sm rs0 m0); simpl; auto.
+  + rewrite IHsv; clear IHsv. destruct (seval_list_sval _ _ _ _); auto.
+    rewrite IHsv0; clear IHsv0. destruct (seval_smem _ _ _ _); auto.
+    erewrite eval_operation_preserved; eauto.
+  + rewrite IHsv0; clear IHsv0. destruct (seval_list_sval _ _ _ _); auto.
+    erewrite <- eval_addressing_preserved; eauto.
+    destruct (eval_addressing _ sp _ _); auto.
+    rewrite IHsv; auto.
+  + rewrite IHsv; clear IHsv. destruct (seval_sval _ _ _ _); auto.
+    rewrite IHsv0; auto.
+  + rewrite IHsv0; clear IHsv0. destruct (seval_list_sval _ _ _ _); auto.
+    erewrite <- eval_addressing_preserved; eauto.
+    destruct (eval_addressing _ sp _ _); auto.
+    rewrite IHsv; clear IHsv. destruct (seval_smem _ _ _ _); auto.
+    rewrite IHsv1; auto.
+Qed.
+
+Lemma seval_builtin_arg_preserved sp m rs0 m0:
+  forall bs varg,
+  seval_builtin_arg ge sp m rs0 m0 bs varg ->
+  seval_builtin_arg ge' sp m rs0 m0 bs varg.
+Proof.
+  induction 1.
+  all: try (constructor; auto).
+  - rewrite <- seval_preserved. assumption.
+  - rewrite <- senv_symbol_address_preserved. assumption.
+  - rewrite senv_symbol_address_preserved. eapply seval_BA_addrglobal.
+Qed.
+
+Lemma seval_builtin_args_preserved sp m rs0 m0 lbs vargs:
+  seval_builtin_args ge sp m rs0 m0 lbs vargs ->
+  seval_builtin_args ge' sp m rs0 m0 lbs vargs.
+Proof.
+  induction 1; constructor; eauto.
+  eapply seval_builtin_arg_preserved; auto.
+Qed.
+
+Lemma list_sval_eval_preserved sp lsv rs0 m0: 
+  seval_list_sval ge sp lsv rs0 m0 = seval_list_sval ge' sp lsv rs0 m0.
+Proof.
+  induction lsv; simpl; auto.
+  rewrite seval_preserved. destruct (seval_sval _ _ _ _); auto.
+  rewrite IHlsv; auto.
+Qed.
+
+Lemma smem_eval_preserved sp sm rs0 m0: 
+  seval_smem ge sp sm rs0 m0 = seval_smem ge' sp sm rs0 m0.
+Proof.
+  induction sm; simpl; auto.
+  rewrite list_sval_eval_preserved. destruct (seval_list_sval _ _ _ _); auto.
+  erewrite <- eval_addressing_preserved; eauto.
+  destruct (eval_addressing _ sp _ _); auto.
+  rewrite IHsm; clear IHsm. destruct (seval_smem _ _ _ _); auto.
+  rewrite seval_preserved; auto.
+Qed.
+
+Lemma seval_condition_preserved sp cond lsv sm rs0 m0:
+ seval_condition ge sp cond lsv sm rs0 m0 = seval_condition ge' sp cond lsv sm rs0 m0.
+Proof.
+  unfold seval_condition.
+  rewrite list_sval_eval_preserved. destruct (seval_list_sval _ _ _ _); auto.
+  rewrite smem_eval_preserved; auto.
+Qed.
+
+End SymbValPreserved.
+
+Require Import RTLpathLivegen RTLpathLivegenproof.
+
+(** * DEFINITION OF SIMULATION BETWEEN (ABSTRACT) SYMBOLIC EXECUTIONS
+*)
+
+Definition istate_simulive alive (srce: PTree.t node) (is1 is2: istate): Prop :=
+     is1.(icontinue) = is2.(icontinue)
+     /\ eqlive_reg alive is1.(irs) is2.(irs)
+     /\ is1.(imem) = is2.(imem).
+
+Definition istate_simu f (srce: PTree.t node) is1 is2: Prop :=
+  if is1.(icontinue) then
+     (* TODO: il faudra raffiner le (fun _ => True) si on veut autoriser l'oracle à
+        ajouter du "code mort" sur des registres non utilisés (loop invariant code motion à la David)
+        Typiquement, pour connaître la frame des registres vivants, il faudra faire une propagation en arrière
+        sur la dernière instruction du superblock. *)
+     istate_simulive (fun _ => True) srce is1 is2
+  else
+     exists path, f.(fn_path)!(is1.(ipc)) = Some path 
+     /\ istate_simulive (fun r => Regset.In r path.(input_regs)) srce is1 is2
+     /\ srce!(is2.(ipc)) = Some is1.(ipc).
+
+Record simu_proof_context {f1: RTLpath.function} := {
+   liveness_hyps: liveness_ok_function f1;
+   the_ge1: RTL.genv;
+   the_ge2: RTL.genv;
+   genv_match: forall s, Genv.find_symbol the_ge1 s = Genv.find_symbol the_ge2 s;
+   the_sp: val;
+   the_rs0: regset; 
+   the_m0: mem
+}.
+Arguments simu_proof_context: clear implicits.
+
+(* NOTE: a pure semantic definition on [sistate], for a total freedom in refinements *)
+Definition sistate_simu (dm: PTree.t node) (f: RTLpath.function) (st1 st2: sistate) (ctx: simu_proof_context f): Prop :=
+  forall is1, ssem_internal (the_ge1 ctx) (the_sp ctx) st1 (the_rs0 ctx) (the_m0 ctx) is1 ->
+  exists is2, ssem_internal (the_ge2 ctx) (the_sp ctx) st2 (the_rs0 ctx) (the_m0 ctx) is2
+              /\ istate_simu f dm is1 is2.
+
+Inductive svident_simu (f: RTLpath.function) (ctx: simu_proof_context f): (sval + ident) -> (sval + ident) -> Prop :=
+  | Sleft_simu sv1 sv2:
+     (seval_sval (the_ge1 ctx) (the_sp ctx) sv1 (the_rs0 ctx) (the_m0 ctx)) = (seval_sval (the_ge2 ctx) (the_sp ctx) sv2 (the_rs0 ctx) (the_m0 ctx))
+     -> svident_simu f ctx (inl sv1) (inl sv2)
+  | Sright_simu id1 id2:
+     id1 = id2
+     -> svident_simu f ctx (inr id1) (inr id2)
+  .
+
+
+Fixpoint ptree_get_list (pt: PTree.t node) (lp: list positive) : option (list positive) :=
+  match lp with
+  | nil => Some nil
+  | p1::lp => SOME p2 <- pt!p1 IN
+              SOME lp2 <- (ptree_get_list pt lp) IN
+              Some (p2 :: lp2)
+  end.
+
+Lemma ptree_get_list_nth dm p2: forall lp2 lp1,
+  ptree_get_list dm lp2 = Some lp1 ->
+  forall n, list_nth_z lp2 n = Some p2 ->
+  exists p1,
+    list_nth_z lp1 n = Some p1 /\ dm ! p2 = Some p1.
+Proof.
+  induction lp2.
+  - simpl. intros. inv H. simpl in *. discriminate.
+  - intros lp1 PGL n LNZ. simpl in PGL. explore.
+    inv LNZ. destruct (zeq n 0) eqn:ZEQ.
+    + subst. inv H0. exists n0. simpl; constructor; auto.
+    + exploit IHlp2; eauto. intros (p1 & LNZ & DMEQ).
+      eexists. simpl. rewrite ZEQ.
+      constructor; eauto.
+Qed.
+
+Lemma ptree_get_list_nth_rev dm p1: forall lp2 lp1,
+  ptree_get_list dm lp2 = Some lp1 ->
+  forall n, list_nth_z lp1 n = Some p1 ->
+  exists p2,
+    list_nth_z lp2 n = Some p2 /\ dm ! p2 = Some p1.
+Proof.
+  induction lp2.
+  - simpl. intros. inv H. simpl in *. discriminate.
+  - intros lp1 PGL n LNZ. simpl in PGL. explore.
+    inv LNZ. destruct (zeq n 0) eqn:ZEQ.
+    + subst. inv H0. exists a. simpl; constructor; auto.
+    + exploit IHlp2; eauto. intros (p2 & LNZ & DMEQ).
+      eexists. simpl. rewrite ZEQ.
+      constructor; eauto. congruence.
+Qed.
+
+Fixpoint seval_builtin_sval ge sp bsv rs0 m0 :=
+  match bsv with
+  | BA sv => SOME v <- seval_sval ge sp sv rs0 m0 IN Some (BA v)
+  | BA_splitlong sv1 sv2 =>
+      SOME v1 <- seval_builtin_sval ge sp sv1 rs0 m0 IN
+      SOME v2 <- seval_builtin_sval ge sp sv2 rs0 m0 IN
+      Some (BA_splitlong v1 v2)
+  | BA_addptr sv1 sv2 =>
+      SOME v1 <- seval_builtin_sval ge sp sv1 rs0 m0 IN
+      SOME v2 <- seval_builtin_sval ge sp sv2 rs0 m0 IN
+      Some (BA_addptr v1 v2)
+  | BA_int i => Some (BA_int i)
+  | BA_long l => Some (BA_long l)
+  | BA_float f => Some (BA_float f)
+  | BA_single s => Some (BA_single s)
+  | BA_loadstack chk ptr => Some (BA_loadstack chk ptr)
+  | BA_addrstack ptr => Some (BA_addrstack ptr)
+  | BA_loadglobal chk id ptr => Some (BA_loadglobal chk id ptr)
+  | BA_addrglobal id ptr => Some (BA_addrglobal id ptr)
+  end.
+
+
+Fixpoint seval_list_builtin_sval ge sp lbsv rs0 m0 :=
+  match lbsv with
+  | nil => Some nil
+  | bsv::lbsv => SOME v <- seval_builtin_sval ge sp bsv rs0 m0 IN
+                 SOME lv <- seval_list_builtin_sval ge sp lbsv rs0 m0 IN
+                 Some (v::lv)
+  end.
+
+Lemma seval_list_builtin_sval_nil ge sp rs0 m0 lbs2:
+  seval_list_builtin_sval ge sp lbs2 rs0 m0 = Some nil ->
+  lbs2 = nil.
+Proof.
+  destruct lbs2; simpl; auto.
+  intros. destruct (seval_builtin_sval _ _ _ _ _);
+    try destruct (seval_list_builtin_sval _ _ _ _ _); discriminate.
+Qed.
+
+Lemma seval_builtin_sval_arg (ge:RTL.genv) sp rs0 m0 bs:
+   forall ba m v, 
+   seval_builtin_sval ge sp bs rs0 m0 = Some ba ->
+   eval_builtin_arg ge (fun id => id) sp m ba v ->
+   seval_builtin_arg ge sp m rs0 m0 bs v.
+Proof.
+   induction bs; simpl; 
+   try (intros ba m v H; inversion H; subst; clear H;
+        intros H; inversion H; subst;
+        econstructor; auto; fail).
+   - intros ba m v; destruct (seval_sval _ _ _ _ _) eqn: SV;
+     intros H; inversion H; subst; clear H.
+     intros H; inversion H; subst.
+     econstructor; auto.
+   - intros ba m v. 
+     destruct (seval_builtin_sval _ _ bs1 _ _) eqn: SV1; try congruence.
+     destruct (seval_builtin_sval _ _ bs2 _ _) eqn: SV2; try congruence.
+     intros H; inversion H; subst; clear H.
+     intros H; inversion H; subst.
+     econstructor; eauto.
+   - intros ba m v. 
+     destruct (seval_builtin_sval _ _ bs1 _ _) eqn: SV1; try congruence.
+     destruct (seval_builtin_sval _ _ bs2 _ _) eqn: SV2; try congruence.
+     intros H; inversion H; subst; clear H.
+     intros H; inversion H; subst.
+     econstructor; eauto.
+Qed.
+
+Lemma seval_builtin_arg_sval ge sp m rs0 m0 v: forall bs,
+  seval_builtin_arg ge sp m rs0 m0 bs v ->
+  exists ba,
+    seval_builtin_sval ge sp bs rs0 m0 = Some ba
+    /\ eval_builtin_arg ge (fun id => id) sp m ba v.
+Proof.
+  induction 1.
+  all: try (eexists; constructor; [simpl; reflexivity | constructor]).
+  2-3: try assumption.
+  - eexists. constructor.
+    + simpl. rewrite H. reflexivity.
+    + constructor.
+  - destruct IHseval_builtin_arg1 as (ba1 & A1 & B1).
+    destruct IHseval_builtin_arg2 as (ba2 & A2 & B2).
+    eexists. constructor.
+    + simpl. rewrite A1. rewrite A2. reflexivity.
+    + constructor; assumption. 
+  - destruct IHseval_builtin_arg1 as (ba1 & A1 & B1).
+    destruct IHseval_builtin_arg2 as (ba2 & A2 & B2).
+    eexists. constructor.
+    + simpl. rewrite A1. rewrite A2. reflexivity.
+    + constructor; assumption.
+Qed.
+
+Lemma seval_builtin_sval_args (ge:RTL.genv) sp rs0 m0 lbs:
+   forall lba m v, 
+   seval_list_builtin_sval ge sp lbs rs0 m0 = Some lba ->
+   list_forall2 (eval_builtin_arg ge (fun id => id) sp m) lba v ->
+   seval_builtin_args ge sp m rs0 m0 lbs v.
+Proof.
+  unfold seval_builtin_args; induction lbs; simpl; intros lba m v.
+  - intros H; inversion H; subst; clear H.
+    intros H; inversion H. econstructor.
+  - destruct (seval_builtin_sval _ _ _ _ _) eqn:SV; try congruence.
+    destruct (seval_list_builtin_sval _ _ _ _ _) eqn: SVL; try congruence.
+    intros H; inversion H; subst; clear H.
+    intros H; inversion H; subst; clear H. 
+    econstructor; eauto.
+    eapply seval_builtin_sval_arg; eauto.
+Qed.
+
+Lemma seval_builtin_args_sval ge sp m rs0 m0 lv: forall lbs,
+  seval_builtin_args ge sp m rs0 m0 lbs lv ->
+  exists lba,
+    seval_list_builtin_sval ge sp lbs rs0 m0 = Some lba
+    /\ list_forall2 (eval_builtin_arg ge (fun id => id) sp m) lba lv.
+Proof.
+  induction 1.
+  - eexists. constructor.
+    + simpl. reflexivity.
+    + constructor.
+  - destruct IHlist_forall2 as (lba & A & B).
+    apply seval_builtin_arg_sval in H. destruct H as (ba & A' & B').
+    eexists. constructor.
+    + simpl. rewrite A'. rewrite A. reflexivity.
+    + constructor; assumption.
+Qed.
+
+Lemma seval_builtin_sval_correct ge sp m rs0 m0: forall bs1 v bs2,
+  seval_builtin_arg ge sp m rs0 m0 bs1 v ->
+  (seval_builtin_sval ge sp bs1 rs0 m0) = (seval_builtin_sval ge sp bs2 rs0 m0) ->
+  seval_builtin_arg ge sp m rs0 m0 bs2 v.
+Proof.
+  intros. exploit seval_builtin_arg_sval; eauto.
+  intros (ba & X1 & X2).
+  eapply seval_builtin_sval_arg; eauto.
+  congruence.
+Qed.
+
+Lemma seval_list_builtin_sval_correct ge sp m rs0 m0 vargs: forall lbs1,
+  seval_builtin_args ge sp m rs0 m0 lbs1 vargs ->
+  forall lbs2, (seval_list_builtin_sval ge sp lbs1 rs0 m0) = (seval_list_builtin_sval ge sp lbs2 rs0 m0) ->
+  seval_builtin_args ge sp m rs0 m0 lbs2 vargs.
+Proof.
+  intros. exploit seval_builtin_args_sval; eauto.
+  intros (ba & X1 & X2).
+  eapply seval_builtin_sval_args; eauto.
+  congruence.
+Qed.
+
+(* NOTE: we need to mix semantical simulation and syntactic definition on [sfval] in order to abstract the [match_states] *)
+Inductive sfval_simu (dm: PTree.t node) (f: RTLpath.function) (opc1 opc2: node) (ctx: simu_proof_context f): sfval -> sfval -> Prop :=
+  | Snone_simu: 
+      dm!opc2 = Some opc1 -> 
+      sfval_simu dm f opc1 opc2 ctx Snone Snone
+  | Scall_simu sig svos1 svos2 lsv1 lsv2 res pc1 pc2:
+      dm!pc2 = Some pc1 ->
+      svident_simu f ctx svos1 svos2 ->
+      (seval_list_sval (the_ge1 ctx) (the_sp ctx) lsv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_list_sval (the_ge2 ctx) (the_sp ctx) lsv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Scall sig svos1 lsv1 res pc1) (Scall sig svos2 lsv2 res pc2)
+  | Stailcall_simu sig svos1 svos2 lsv1 lsv2:
+      svident_simu f ctx svos1 svos2 ->
+      (seval_list_sval (the_ge1 ctx) (the_sp ctx) lsv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_list_sval (the_ge2 ctx) (the_sp ctx) lsv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Stailcall sig svos1 lsv1) (Stailcall sig svos2 lsv2)
+  | Sbuiltin_simu ef lbs1 lbs2 br pc1 pc2:
+      dm!pc2 = Some pc1 ->
+      (seval_list_builtin_sval (the_ge1 ctx) (the_sp ctx) lbs1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_list_builtin_sval (the_ge2 ctx) (the_sp ctx) lbs2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Sbuiltin ef lbs1 br pc1) (Sbuiltin ef lbs2 br pc2)
+  | Sjumptable_simu sv1 sv2 lpc1 lpc2:
+      ptree_get_list dm lpc2 = Some lpc1 ->
+      (seval_sval (the_ge1 ctx) (the_sp ctx) sv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_sval (the_ge2 ctx) (the_sp ctx) sv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Sjumptable sv1 lpc1) (Sjumptable sv2 lpc2)
+  | Sreturn_simu_none: sfval_simu dm f opc1 opc2 ctx (Sreturn None) (Sreturn None)
+  | Sreturn_simu_some sv1 sv2:
+      (seval_sval (the_ge1 ctx) (the_sp ctx) sv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_sval (the_ge2 ctx) (the_sp ctx) sv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Sreturn (Some sv1)) (Sreturn (Some sv2)).
+
+Definition sstate_simu dm f (s1 s2: sstate) (ctx: simu_proof_context f): Prop :=
+       sistate_simu dm f s1.(internal) s2.(internal) ctx
+    /\ forall is1, 
+           ssem_internal (the_ge1 ctx) (the_sp ctx) s1 (the_rs0 ctx) (the_m0 ctx) is1 -> 
+           is1.(icontinue) = true ->
+           sfval_simu dm f s1.(si_pc) s2.(si_pc) ctx s1.(final) s2.(final).
+
+Definition sexec_simu dm (f1 f2: RTLpath.function) pc1 pc2: Prop :=
+    forall st1, sexec f1 pc1 = Some st1 -> 
+    exists st2, sexec f2 pc2 = Some st2 /\ forall ctx, sstate_simu dm f1 st1 st2 ctx.
diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v
new file mode 100644
index 00000000..beab405f
--- /dev/null
+++ b/scheduling/RTLpathScheduler.v
@@ -0,0 +1,330 @@
+(** RTLpath Scheduling from an external oracle. 
+
+This module is inspired from [Duplicate] and [Duplicateproof]
+
+*)
+
+Require Import AST Linking Values Maps Globalenvs Smallstep Registers.
+Require Import Coqlib Maps Events Errors Op.
+Require Import RTL RTLpath RTLpathLivegen RTLpathLivegenproof RTLpathSE_theory RTLpathSE_impl.
+
+
+Notation "'ASSERT' A 'WITH' MSG 'IN' B" := (if A then B else Error (msg MSG))
+         (at level 200, A at level 100, B at level 200)
+         : error_monad_scope.
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
+
+(** External oracle returning the new RTLpath function and a mapping of new path_entries to old path_entries 
+
+NB: the new RTLpath function is generated from the fn_code, the fn_entrypoint and the fn_path
+It requires to check that the path structure is wf !
+
+*)
+
+(* Returns: new code, new entrypoint, new pathmap, revmap
+ * Indeed, the entrypoint might not be the same if the entrypoint node is moved further down
+ * a path ; same reasoning for the pathmap *)
+Axiom untrusted_scheduler: RTLpath.function -> code * node * path_map * (PTree.t node).
+
+Extract Constant untrusted_scheduler => "RTLpathScheduleraux.scheduler".
+
+Program Definition function_builder (tfr: RTL.function) (tpm: path_map) : 
+  { r : res RTLpath.function | forall f', r = OK f' -> fn_RTL f' = tfr} :=
+  match RTLpathLivegen.function_checker tfr tpm with
+  | false => Error (msg "In function_builder: (tfr, tpm) is not wellformed")
+  | true => OK {| fn_RTL := tfr; fn_path := tpm |}
+  end.
+Next Obligation.
+  apply function_checker_path_entry. auto.
+Defined. Next Obligation.
+  apply function_checker_wellformed_path_map. auto.
+Defined.
+
+Definition entrypoint_check (dm: PTree.t node) (fr tfr: RTL.function) : res unit :=
+  match dm ! (fn_entrypoint tfr) with
+  | None => Error (msg "No mapping for (entrypoint tfr)")
+  | Some etp => if (Pos.eq_dec (fn_entrypoint fr) etp) then OK tt
+                else Error (msg "Entrypoints do not match")
+  end.
+
+Lemma entrypoint_check_correct fr tfr dm:
+  entrypoint_check dm fr tfr = OK tt ->
+  dm ! (fn_entrypoint tfr) = Some (fn_entrypoint fr).
+Proof.
+  unfold entrypoint_check. explore; try discriminate. congruence.
+Qed.
+
+Definition path_entry_check_single (pm tpm: path_map) (m: node * node) :=
+  let (pc2, pc1) := m in
+  match (tpm ! pc2) with
+  | None => Error (msg "pc2 isn't an entry of tpm")
+  | Some _ =>
+      match (pm ! pc1) with
+      | None => Error (msg "pc1 isn't an entry of pm")
+      | Some _ => OK tt
+      end
+  end.
+
+Lemma path_entry_check_single_correct pm tpm pc1 pc2:
+  path_entry_check_single pm tpm (pc2, pc1) = OK tt ->
+  path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+  unfold path_entry_check_single. intro. explore.
+  constructor; congruence.
+Qed.
+
+(* Inspired from Duplicate.verify_mapping_rec *)
+Fixpoint path_entry_check_rec (pm tpm: path_map) lm :=
+  match lm with
+  | nil => OK tt
+  | m :: lm => do u1 <- path_entry_check_single pm tpm m;
+               do u2 <- path_entry_check_rec pm tpm lm;
+               OK tt
+  end.
+
+Lemma path_entry_check_rec_correct pm tpm pc1 pc2: forall lm,
+  path_entry_check_rec pm tpm lm = OK tt ->
+  In (pc2, pc1) lm ->
+  path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+  induction lm.
+  - simpl. intuition.
+  - simpl. intros. explore. destruct H0.
+    + subst. eapply path_entry_check_single_correct; eauto.
+    + eapply IHlm; assumption.
+Qed.
+
+Definition path_entry_check (dm: PTree.t node) (pm tpm: path_map) := path_entry_check_rec pm tpm (PTree.elements dm).
+
+Lemma path_entry_check_correct dm pm tpm:
+  path_entry_check dm pm tpm = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+  unfold path_entry_check. intros. eapply PTree.elements_correct in H0.
+  eapply path_entry_check_rec_correct; eassumption.
+Qed.
+
+Definition function_equiv_checker (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) : res unit :=
+  let pm := fn_path f in
+  let fr := fn_RTL f in
+  let tpm := fn_path tf in
+  let tfr := fn_RTL tf in
+  do _ <- entrypoint_check dm fr tfr;
+  do _ <- path_entry_check dm pm tpm;
+  do _ <- simu_check dm f tf;
+  OK tt.
+
+Lemma function_equiv_checker_entrypoint f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  dm ! (fn_entrypoint tf) = Some (fn_entrypoint f).
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  eapply entrypoint_check_correct; eauto.
+Qed.
+
+Lemma function_equiv_checker_pathentry1 f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  path_entry (fn_path tf) pc2.
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  exploit path_entry_check_correct. eassumption. all: eauto. intuition.
+Qed.
+
+Lemma function_equiv_checker_pathentry2 f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  path_entry (fn_path f) pc1.
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  exploit path_entry_check_correct. eassumption. all: eauto. intuition.
+Qed.
+
+Lemma function_equiv_checker_correct f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  eapply simu_check_correct; eauto.
+Qed.
+
+Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (PTree.t node)) :=
+  let (tctetpm, dm) := untrusted_scheduler f in
+  let (tcte, tpm) := tctetpm in
+  let (tc, te) := tcte in
+  let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+  do tf <- proj1_sig (function_builder tfr tpm);
+  do tt <- function_equiv_checker dm f tf;
+  OK (tf, dm).
+
+Theorem verified_scheduler_correct f tf dm:
+  verified_scheduler f = OK (tf, dm) ->
+  fn_sig f = fn_sig tf
+  /\ fn_params f = fn_params tf
+  /\ fn_stacksize f = fn_stacksize tf
+  /\ dm ! (fn_entrypoint tf) = Some (fn_entrypoint f)
+  /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> path_entry (fn_path f) pc1)
+  /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> path_entry (fn_path tf) pc2)
+  /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2)
+.
+Proof.
+  intros VERIF. unfold verified_scheduler in VERIF. explore.
+  Local Hint Resolve function_equiv_checker_entrypoint
+    function_equiv_checker_pathentry1 function_equiv_checker_pathentry2
+    function_equiv_checker_correct: core.
+  destruct (function_builder _ _) as [res H]; simpl in * |- *; auto.
+    apply H in EQ2. rewrite EQ2. simpl.
+  repeat (constructor; eauto).
+  - exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition.
+Qed.
+
+Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := {
+  preserv_fnsig: fn_sig f1 = fn_sig f2;
+  preserv_fnparams: fn_params f1 = fn_params f2;
+  preserv_fnstacksize: fn_stacksize f1 = fn_stacksize f2;
+  preserv_entrypoint: dupmap!(f2.(fn_entrypoint)) = Some f1.(fn_entrypoint);
+  dupmap_path_entry1: forall pc1 pc2, dupmap!pc2 = Some pc1 -> path_entry (fn_path f1) pc1;
+  dupmap_path_entry2: forall pc1 pc2, dupmap!pc2 = Some pc1 -> path_entry (fn_path f2) pc2;
+  dupmap_correct: forall pc1 pc2, dupmap!pc2 = Some pc1 -> sexec_simu dupmap f1 f2 pc1 pc2;
+}.
+
+Program Definition transf_function (f: RTLpath.function):
+  { r : res RTLpath.function | forall f', r = OK f' -> exists dm, match_function dm f f'} :=
+  match (verified_scheduler f) with
+  | Error e => Error e
+  | OK (tf, dm) => OK tf
+  end.
+Next Obligation.
+  exploit verified_scheduler_correct; eauto.
+  intros (A & B & C & D & E & F & G (* & H *)).
+  exists dm. econstructor; eauto.
+Defined.
+
+Theorem match_function_preserves f f' dm:
+  match_function dm f f' ->
+  fn_sig f = fn_sig f' /\ fn_params f = fn_params f' /\ fn_stacksize f = fn_stacksize f'.
+Proof.
+  intros.
+  destruct H as [SIG PARAM SIZE ENTRY CORRECT].
+  intuition.
+Qed.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef (fun f => proj1_sig (transf_function f)) f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
+
+(** * Preservation proof *)
+
+Local Notation ext alive := (fun r => Regset.In r alive).
+
+Inductive match_fundef: RTLpath.fundef -> RTLpath.fundef -> Prop :=
+  | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
+  | match_External ef: match_fundef (External ef) (External ef).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframe_intro dupmap res f sp pc rs1 rs2 f' pc' path
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc)
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: forall v, eqlive_reg (ext path.(input_regs)) (rs1 # res <- v) (rs2 # res <- v)):
+      match_stackframes (Stackframe res f sp pc rs1) (Stackframe res f' sp pc' rs2).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro dupmap st f sp pc rs1 rs2 m st' f' pc' path
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc)
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: eqlive_reg (ext path.(input_regs)) rs1 rs2):
+      match_states (State st f sp pc rs1 m) (State st' f' sp pc' rs2 m)
+  | match_states_call st st' f f' args m
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_fundef f f')
+      (LIVE: liveness_ok_fundef f):
+      match_states (Callstate st f args m) (Callstate st' f' args m)
+  | match_states_return st st' v m
+      (STACKS: list_forall2 match_stackframes st st'):
+      match_states (Returnstate st v m) (Returnstate st' v m).
+
+Lemma match_stackframes_equiv stf1 stf2 stf3:
+  match_stackframes stf1 stf2 -> equiv_stackframe stf2 stf3 -> match_stackframes stf1 stf3.
+Proof.
+  destruct 1; intros EQ; inv EQ; try econstructor; eauto.
+  intros; eapply eqlive_reg_trans; eauto.
+  rewrite eqlive_reg_triv in * |-.
+  eapply eqlive_reg_update.
+  eapply eqlive_reg_monotonic; eauto.
+  simpl; auto.
+Qed.
+
+Lemma match_stack_equiv stk1 stk2:
+  list_forall2 match_stackframes stk1 stk2 -> 
+  forall stk3, list_forall2 equiv_stackframe stk2 stk3 -> 
+  list_forall2 match_stackframes stk1 stk3.
+Proof.
+  Local Hint Resolve match_stackframes_equiv: core.
+  induction 1; intros stk3 EQ; inv EQ; econstructor; eauto.
+Qed.
+
+Lemma match_states_equiv s1 s2 s3: match_states s1 s2 -> equiv_state s2 s3 -> match_states s1 s3.
+Proof.
+  Local Hint Resolve match_stack_equiv: core.
+  destruct 1; intros EQ; inv EQ; econstructor; eauto.
+  intros; eapply eqlive_reg_triv_trans; eauto.
+Qed.
+
+Lemma eqlive_match_stackframes stf1 stf2 stf3:
+  eqlive_stackframes stf1 stf2 -> match_stackframes stf2 stf3 -> match_stackframes stf1 stf3.
+Proof.
+  destruct 1; intros MS; inv MS; try econstructor; eauto.
+  try_simplify_someHyps. intros; eapply eqlive_reg_trans; eauto.
+Qed.
+
+Lemma eqlive_match_stack stk1 stk2:
+  list_forall2 eqlive_stackframes stk1 stk2 -> 
+  forall stk3, list_forall2 match_stackframes stk2 stk3 -> 
+  list_forall2 match_stackframes stk1 stk3.
+Proof.
+  induction 1; intros stk3 MS; inv MS; econstructor; eauto.
+  eapply eqlive_match_stackframes; eauto.
+Qed.
+
+Lemma eqlive_match_states s1 s2 s3: eqlive_states s1 s2 -> match_states s2 s3 -> match_states s1 s3.
+Proof.
+  Local Hint Resolve eqlive_match_stack: core.
+  destruct 1; intros MS; inv MS; try_simplify_someHyps; econstructor; eauto.
+  eapply eqlive_reg_trans; eauto.
+Qed.
+
+Lemma eqlive_stackframes_refl stf1 stf2: match_stackframes stf1 stf2 -> eqlive_stackframes stf1 stf1.
+Proof.
+  destruct 1; econstructor; eauto.
+  intros; eapply eqlive_reg_refl; eauto.
+Qed.
+
+Lemma eqlive_stacks_refl stk1 stk2:
+  list_forall2 match_stackframes stk1 stk2 -> list_forall2 eqlive_stackframes stk1 stk1.
+Proof.
+  induction 1; simpl; econstructor; eauto.
+  eapply eqlive_stackframes_refl; eauto.
+Qed.
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> match_fundef f f'.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  + destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+    destruct (H _ EQ).
+    intuition subst; auto.
+    eapply match_Internal; eauto.
+  + eapply match_External.
+Qed.
+
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
new file mode 100644
index 00000000..66910bdf
--- /dev/null
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -0,0 +1,368 @@
+open RTLpath
+open RTL
+open Maps
+open RTLpathLivegenaux
+open Registers
+open Camlcoq
+
+type superblock = {
+  instructions: P.t array; (* pointers to code instructions *)
+  (* each predicted Pcb has its attached liveins *)
+  (* This is indexed by the pc value *)
+  liveins: Regset.t PTree.t;
+  (* Union of the input_regs of the last successors *)
+  output_regs: Regset.t;
+  typing: RTLtyping.regenv
+}
+
+let print_instructions insts code =
+  if (!debug_flag) then begin
+    dprintf "[ ";
+    List.iter (
+      fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
+    ) insts; dprintf "]"
+  end
+
+let print_superblock sb code =
+  let insts = sb.instructions in
+  let li = sb.liveins in
+  let outs = sb.output_regs in
+  begin
+    dprintf "{ instructions = "; print_instructions (Array.to_list insts) code; dprintf "\n";
+    dprintf "  liveins = "; print_ptree_regset li; dprintf "\n";
+    dprintf "  output_regs = "; print_regset outs; dprintf "}"
+  end
+
+let print_superblocks lsb code =
+  let rec f = function
+    | [] -> ()
+    | sb :: lsb -> (print_superblock sb code; dprintf ",\n"; f lsb)
+  in begin
+    dprintf "[\n";
+    f lsb;
+    dprintf "]"
+  end
+
+(* Adapted from backend/PrintRTL.ml: print_function *)
+let print_code code = let open PrintRTL in let open Printf in
+  if (!debug_flag) then begin
+    fprintf stdout "{\n";
+    let instrs =
+      List.sort
+        (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+        (List.rev_map
+          (fun (pc, i) -> (P.to_int pc, i))
+          (PTree.elements code)) in
+    List.iter (print_instruction stdout) instrs;
+    fprintf stdout "}"
+  end
+
+let print_arrayp arr = begin
+  dprintf "[| ";
+  Array.iter (fun n -> dprintf "%d, " (P.to_int n)) arr;
+  dprintf "|]"
+end
+
+let get_superblocks code entry pm typing =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let rec get_superblocks_rec pc =
+    let liveins = ref (PTree.empty) in
+    let rec follow pc n =
+      let inst = get_some @@ PTree.get pc code in
+      if (n == 0) then begin
+        (match (non_predicted_successors inst) with
+          | [pcout] ->
+              let live = (get_some @@ PTree.get pcout pm).input_regs in
+                liveins := PTree.set pc live !liveins
+          | _ -> ());
+        ([pc], successors_inst inst)
+      end else
+        let nexts_from_exit = match (non_predicted_successors inst) with
+          | [pcout] -> 
+              let live = (get_some @@ PTree.get pcout pm).input_regs in begin
+                liveins := PTree.set pc live !liveins;
+                [pcout]
+              end
+          | [] -> []
+          | _ -> failwith "Having more than one non_predicted_successor is not handled"
+        in match (predicted_successor inst) with
+          | None -> failwith "Incorrect path"
+          | Some succ ->
+              let (insts, nexts) = follow succ (n-1) in (pc :: insts, nexts_from_exit @ nexts)
+    in if (get_some @@ PTree.get pc !visited) then []
+    else begin
+      visited := PTree.set pc true !visited;
+      let pi = get_some @@ PTree.get pc pm in
+      let (insts, nexts) = follow pc (Camlcoq.Nat.to_int pi.psize) in
+      let superblock = { instructions = Array.of_list insts; liveins = !liveins;
+        output_regs = pi.output_regs; typing = typing } in
+      superblock :: (List.concat @@ List.map get_superblocks_rec nexts)
+    end
+  in let lsb = get_superblocks_rec entry in begin
+    (* debug_flag := true; *)
+    dprintf "Superblocks identified:"; print_superblocks lsb code; dprintf "\n";
+    (* debug_flag := false; *)
+    lsb
+end
+
+(* TODO David *)
+let schedule_superblock sb code =
+  if not !Clflags.option_fprepass
+  then sb.instructions
+  else
+    (* let old_flag = !debug_flag in
+    debug_flag := true;
+    print_endline "ORIGINAL SUPERBLOCK";
+    print_superblock sb code;
+    debug_flag := old_flag; *)
+    let nr_instr = Array.length sb.instructions in
+    let trailer_length =
+      match PTree.get (sb.instructions.(nr_instr-1)) code with
+      | None -> 0
+      | Some ii ->
+         match predicted_successor ii with
+         | Some _ -> 0
+         | None -> 1 in
+    match PrepassSchedulingOracle.schedule_sequence
+            (Array.map (fun i ->
+                 (match PTree.get i code with
+                 | Some ii -> ii
+                 | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+                 (match PTree.get i sb.liveins with
+                 | Some s -> s
+                 | None -> Regset.empty))
+               (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with
+    | None -> sb.instructions
+    | Some order ->
+       let ins' =
+         Array.append 
+           (Array.map (fun i -> sb.instructions.(i)) order)
+           (Array.sub sb.instructions (nr_instr-trailer_length) trailer_length) in
+       (* Printf.printf "REORDERED SUPERBLOCK %d\n" (Array.length ins');
+       debug_flag := true;
+       print_instructions (Array.to_list ins') code;
+       debug_flag := old_flag;
+       flush stdout; *)
+       assert ((Array.length sb.instructions) = (Array.length ins'));
+       (*sb.instructions; *)
+       ins';;
+
+  (* stub2: reverse function *)
+  (*
+  let reversed = Array.of_list @@ List.rev @@ Array.to_list (sb.instructions) in
+  let tmp = reversed.(0) in
+  let last_index = Array.length reversed - 1 in
+  begin
+    reversed.(0) <- reversed.(last_index);
+    reversed.(last_index) <- tmp;
+    reversed
+  end *)
+  (* stub: identity function *)
+
+(**
+ * Perform basic checks on the new order :
+ * - must have the same length as the old order
+ * - non basic instructions (call, tailcall, return, jumptable, non predicted CB) must not move
+ *)
+let check_order code old_order new_order = begin
+  assert ((Array.length old_order) == (Array.length new_order));
+  let length = Array.length new_order in
+  if length > 0 then
+    let last_inst = Array.get old_order (length - 1) in
+    let instr = get_some @@ PTree.get last_inst code in
+    match predicted_successor instr with
+    | None ->
+        if (last_inst != Array.get new_order (length - 1)) then
+          failwith "The last instruction of the superblock is not basic, but was moved"
+    | _ -> ()
+end
+
+type sinst =
+  (* Each middle instruction has a direct successor *)
+  (* A Smid can be the last instruction of a superblock, but a Send cannot be moved *)
+  | Smid of RTL.instruction * node
+  | Send of RTL.instruction
+
+let rinst_to_sinst inst =
+  match inst with
+  | Inop n -> Smid(inst, n)
+  | Iop (_,_,_,n) -> Smid(inst, n)
+  | Iload (_,_,_,_,_,n) -> Smid(inst, n)
+  | Istore (_,_,_,_,n) -> Smid(inst, n)
+  | Icond (_,_,n1,n2,p) -> (
+      match p with
+      | Some true -> Smid(inst, n1)
+      | Some false -> Smid(inst, n2)
+      | None -> Send(inst)
+    )
+  | Icall _ | Ibuiltin _ | Ijumptable _ | Itailcall _ | Ireturn _ -> Send(inst)
+
+let change_predicted_successor s = function
+  | Smid(i, n) -> Smid(i, s)
+  | Send _ -> failwith "Called change_predicted_successor on Send. Are you trying to move a non-basic instruction in the middle of the block?"
+
+(* Forwards the successor changes into an RTL instruction *)
+let sinst_to_rinst = function
+  | Smid(inst, s) -> (
+      match inst with
+      | Inop n -> Inop s
+      | Iop (a,b,c,n) -> Iop (a,b,c,s)
+      | Iload (a,b,c,d,e,n) -> Iload (a,b,c,d,e,s)
+      | Istore (a,b,c,d,n) -> Istore (a,b,c,d,s)
+      | Icond (a,b,n1,n2,p) -> (
+        match p with
+        | Some true -> Icond(a, b, s, n2, p)
+        | Some false -> Icond(a, b, n1, s, p)
+        | None -> failwith "Non predicted Icond as a middle instruction!"
+        )
+      | _ -> failwith "That instruction shouldn't be a middle instruction"
+      )
+  | Send i -> i
+
+let apply_schedule code sb new_order =
+  let tc = ref code in
+  let old_order = sb.instructions in
+  begin
+    check_order code old_order new_order;
+    Array.iteri (fun i n' ->
+      let inst' = get_some @@ PTree.get n' code in
+      let iend = Array.length old_order - 1 in
+      let new_inst =
+        if (i == iend) then
+          let final_inst_node = Array.get old_order iend in
+          let sinst' = rinst_to_sinst inst' in
+          match sinst' with
+          (* The below assert fails if a Send is in the middle of the original superblock *)
+          | Send i -> (assert (final_inst_node == n'); i)
+          | Smid _ ->
+              let final_inst = get_some @@ PTree.get final_inst_node code in
+              match rinst_to_sinst final_inst with
+              | Smid (_, s') -> sinst_to_rinst @@ change_predicted_successor s' sinst'
+              | Send _ -> assert(false) (* should have failed earlier *)
+        else
+          sinst_to_rinst
+          (* this will fail if the moved instruction is a Send *)
+          @@ change_predicted_successor (Array.get old_order (i+1))
+          @@ rinst_to_sinst inst'
+      in tc := PTree.set (Array.get old_order i) new_inst !tc
+    ) new_order;
+    !tc
+  end
+
+  (*
+let main_successors = function
+| Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (
+    match p with
+    | Some true -> [n1; n2]
+    | Some false ->  [n2; n1]
+    | None -> [n1; n2] )
+| Ijumptable _ | Itailcall _ | Ireturn _ -> []
+
+let change_predicted_successor i s = match i with
+  | Itailcall _ | Ireturn _ -> failwith "Wrong instruction (5) - Tailcalls and returns should not be moved in the middle of a superblock"
+  | Ijumptable _ -> failwith "Wrong instruction (6) - Jumptables should not be moved in the middle of a superblock"
+  | Inop n -> Inop s
+  | Iop (a,b,c,n) -> Iop (a,b,c,s)
+  | Iload (a,b,c,d,e,n) -> Iload (a,b,c,d,e,s)
+  | Istore (a,b,c,d,n) -> Istore (a,b,c,d,s)
+  | Icall (a,b,c,d,n) -> Icall (a,b,c,d,s)
+  | Ibuiltin (a,b,c,n) -> Ibuiltin (a,b,c,s)
+  | Icond (a,b,n1,n2,p) -> (
+      match p with
+      | Some true -> Icond (a,b,s,n2,p)
+      | Some false -> Icond (a,b,n1,s,p)
+      | None -> failwith "Predicted a successor for an Icond with p=None - unpredicted CB should not be moved in the middle of the superblock"
+      )
+
+let rec change_successors i = function
+  | [] -> (
+      match i with
+      | Itailcall _ | Ireturn _ -> i
+      | _ -> failwith "Wrong instruction (1)")
+  | [s] -> (
+      match i with
+      | Inop n -> Inop s
+      | Iop (a,b,c,n) -> Iop (a,b,c,s)
+      | Iload (a,b,c,d,e,n) -> Iload (a,b,c,d,e,s)
+      | Istore (a,b,c,d,n) -> Istore (a,b,c,d,s)
+      | Icall (a,b,c,d,n) -> Icall (a,b,c,d,s)
+      | Ibuiltin (a,b,c,n) -> Ibuiltin (a,b,c,s)
+      | Ijumptable (a,[n]) -> Ijumptable (a,[s])
+      | Icond (a,b,n1,n2,p) -> (
+          match p with
+          | Some true -> Icond (a,b,s,n2,p)
+          | Some false -> Icond (a,b,n1,s,p)
+          | None -> failwith "Icond Wrong instruction (2) ; should not happen?"
+        )
+      | _ -> failwith "Wrong instruction (2)")
+  | [s1; s2] -> (
+      match i with
+      | Icond (a,b,n1,n2,p) -> Icond (a,b,s1,s2,p)
+      | Ijumptable (a, [n1; n2]) -> Ijumptable (a, [s1; s2])
+      | _ -> change_successors i [s1])
+  | ls -> (
+      match i with
+      | Ijumptable (a, ln) -> begin
+          assert ((List.length ln) == (List.length ls));
+          Ijumptable (a, ls)
+        end
+      | _ -> failwith "Wrong instruction (4)")
+
+
+let apply_schedule code sb new_order =
+  let tc = ref code in
+  let old_order = sb.instructions in
+  let last_node = Array.get old_order (Array.length old_order - 1) in
+  let last_successors = main_successors
+    @@ get_some @@ PTree.get last_node code in
+  begin
+    check_order code old_order new_order;
+    Array.iteri (fun i n' ->
+      let inst' = get_some @@ PTree.get n' code in
+      let new_inst =
+        if (i == (Array.length old_order - 1)) then
+          change_successors inst' last_successors
+        else
+          change_predicted_successor inst' (Array.get old_order (i+1))
+      in tc := PTree.set (Array.get old_order i) new_inst !tc
+    ) new_order;
+    !tc
+  end
+*)
+
+let rec do_schedule code = function
+  | [] -> code
+  | sb :: lsb ->
+      let schedule = schedule_superblock sb code in
+      let new_code = apply_schedule code sb schedule in
+      begin
+        (* debug_flag := true; *)
+        dprintf "Old Code: "; print_code code;
+        dprintf "\nSchedule to apply: "; print_arrayp schedule;
+        dprintf "\nNew Code: "; print_code new_code;
+        dprintf "\n";
+        (* debug_flag := false; *)
+        do_schedule new_code lsb
+      end
+
+let get_ok r = match r with Errors.OK x -> x | _ -> failwith "Did not get OK"
+
+let scheduler f =
+  let code = f.fn_RTL.fn_code in
+  let id_ptree = PTree.map (fun n i -> n) (f.fn_path) in
+  let entry = f.fn_RTL.fn_entrypoint in
+  let pm = f.fn_path in
+  let typing = get_ok @@ RTLtyping.type_function f.fn_RTL in
+  let lsb = get_superblocks code entry pm typing in
+  begin
+    (* debug_flag := true; *)
+    dprintf "Pathmap:\n"; dprintf "\n";
+    print_path_map pm;
+    dprintf "Superblocks:\n";
+    print_superblocks lsb code; dprintf "\n";
+    (* debug_flag := false; *)
+    let tc = do_schedule code lsb in
+    (((tc, entry), pm), id_ptree)
+  end
diff --git a/scheduling/RTLpathSchedulerproof.v b/scheduling/RTLpathSchedulerproof.v
new file mode 100644
index 00000000..4ba197b0
--- /dev/null
+++ b/scheduling/RTLpathSchedulerproof.v
@@ -0,0 +1,363 @@
+Require Import AST Linking Values Maps Globalenvs Smallstep Registers.
+Require Import Coqlib Maps Events Errors Op.
+Require Import RTL RTLpath RTLpathLivegen RTLpathLivegenproof RTLpathSE_theory.
+Require Import RTLpathScheduler.
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: program.
+Variable tprog: program.
+
+Hypothesis TRANSL: match_prog prog tprog.
+
+Let pge := Genv.globalenv prog.
+Let tpge := Genv.globalenv tprog.
+
+Hypothesis all_fundef_liveness_ok: forall b fd, Genv.find_funct_ptr pge b = Some fd -> liveness_ok_fundef fd.
+
+Lemma symbols_preserved s: Genv.find_symbol tpge s = Genv.find_symbol pge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv pge tpge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma functions_preserved:
+  forall (v: val) (f: fundef),
+  Genv.find_funct pge v = Some f ->
+  exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tpge v = Some tf /\ linkorder cunit prog.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+  + unfold incl; auto.
+  + eapply linkorder_refl.
+Qed.
+
+Lemma function_ptr_preserved:
+  forall v f,
+  Genv.find_funct_ptr pge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tpge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma function_sig_preserved:
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H.
+    destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+    destruct (H _ EQ).
+    intuition subst; auto.
+    symmetry.
+    eapply match_function_preserves.
+    eassumption.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Theorem transf_initial_states:
+  forall s1, initial_state prog s1 ->
+  exists s2, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inv H.
+  exploit function_ptr_preserved; eauto. intros (tf & FIND &  TRANSF).
+  exists (Callstate nil tf nil m0).
+  split.
+  - econstructor; eauto.
+    + intros; apply (Genv.init_mem_match TRANSL); assumption.
+    + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main. eauto.
+    + destruct f.
+      * monadInv TRANSF. rewrite <- H3.
+        destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+        destruct (H _ EQ).
+        intuition subst; auto.
+        symmetry; eapply match_function_preserves. eassumption.
+      * monadInv TRANSF. assumption.
+  - constructor; eauto.
+    + constructor.
+    + apply transf_fundef_correct; auto.
+(*     + eapply all_fundef_liveness_ok; eauto. *)
+Qed.
+
+Theorem transf_final_states s1 s2 r:
+  final_state s1 r -> match_states s1 s2 -> final_state s2 r.
+Proof.
+  unfold final_state.
+  intros H; inv H.
+  intros H; inv H; simpl in * |- *; try congruence.
+  inv H1.
+  destruct st; simpl in * |- *; try congruence.
+  inv STACKS. constructor.
+Qed.
+
+
+Let ge := Genv.globalenv (RTLpath.transf_program prog).
+Let tge := Genv.globalenv (RTLpath.transf_program tprog).
+
+Lemma senv_sym x y: Senv.equiv x y -> Senv.equiv y x.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_preserved_RTL:
+  Senv.equiv ge tge.
+Proof.
+  eapply senv_transitivity. { eapply senv_sym; eapply RTLpath.senv_preserved. }
+  eapply senv_transitivity. { eapply senv_preserved. }
+  eapply RTLpath.senv_preserved.
+Qed.
+
+Lemma symbols_preserved_RTL s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  unfold tge, ge. erewrite RTLpath.symbols_preserved; eauto.
+  rewrite symbols_preserved.
+  erewrite RTLpath.symbols_preserved; eauto.
+Qed.
+
+Program Definition mkctx sp rs0 m0 {f1: RTLpath.function} (hyp: liveness_ok_function f1)
+   :  simu_proof_context f1
+   := {| the_ge1:= ge; the_ge2 := tge; the_sp:=sp; the_rs0:=rs0; the_m0:=m0 |}.
+Obligation 2.
+  erewrite symbols_preserved_RTL. eauto.
+Qed.
+
+Lemma s_find_function_preserved f sp svos1 svos2 rs0 m0 fd
+  (LIVE: liveness_ok_function f):
+  (svident_simu f (mkctx sp rs0 m0 LIVE) svos1 svos2) ->
+  sfind_function pge ge sp svos1 rs0 m0 = Some fd ->
+  exists fd', sfind_function tpge tge sp svos2 rs0 m0 = Some fd'
+              /\ transf_fundef fd = OK fd'
+              /\ liveness_ok_fundef fd.
+Proof.
+  Local Hint Resolve symbols_preserved_RTL: core.
+  unfold sfind_function. intros [sv1 sv2 SIMU|]; simpl in *.
+  + rewrite !(seval_preserved ge tge) in *; eauto.
+    destruct (seval_sval _ _ _ _); try congruence.
+    erewrite <- SIMU; try congruence. clear SIMU.
+    intros; exploit functions_preserved; eauto.
+    intros (fd' & cunit & (X1 & X2 & X3)). eexists.
+    repeat split; eauto.
+    eapply find_funct_liveness_ok; eauto.
+(*     intros. eapply all_fundef_liveness_ok; eauto. *)
+  + subst. rewrite symbols_preserved. destruct (Genv.find_symbol _ _); try congruence.
+    intros; exploit function_ptr_preserved; eauto.
+    intros (fd' & X). eexists. intuition eauto.
+(*     intros. eapply all_fundef_liveness_ok; eauto. *)
+Qed.
+
+Lemma sistate_simu f dupmap sp st st' rs m is 
+  (LIVE: liveness_ok_function f):
+  ssem_internal ge sp st rs m is ->
+  sistate_simu dupmap f st st' (mkctx sp rs m LIVE)->
+  exists is',
+    ssem_internal tge sp st' rs m is' /\ istate_simu f dupmap is is'.
+Proof.
+  intros SEM X; eapply X; eauto.
+Qed.
+
+Lemma seval_builtin_sval_preserved sp rs m:
+  forall bs, seval_builtin_sval ge sp bs rs m = seval_builtin_sval tge sp bs rs m.
+Proof.
+  induction bs.
+  all: try (simpl; try reflexivity; erewrite seval_preserved by eapply symbols_preserved_RTL; reflexivity).
+  all: simpl; rewrite IHbs1; rewrite IHbs2; reflexivity.
+Qed.
+
+Lemma seval_list_builtin_sval_preserved sp rs m:
+  forall lbs,
+  seval_list_builtin_sval ge sp lbs rs m = seval_list_builtin_sval tge sp lbs rs m.
+Proof.
+  induction lbs; [simpl; reflexivity|].
+  simpl. rewrite seval_builtin_sval_preserved. rewrite IHlbs.
+  reflexivity.
+Qed.
+
+Lemma ssem_final_simu dm f f' stk stk' sp st st' rs0 m0 sv sv' rs m t s
+  (LIVE: liveness_ok_function f):
+  match_function dm f f' ->
+  list_forall2 match_stackframes stk stk' ->
+  (* s_istate_simu f dupmap st st' -> *)
+  sfval_simu dm f st.(si_pc) st'.(si_pc) (mkctx sp rs0 m0 LIVE) sv sv' ->
+  ssem_final pge ge sp st.(si_pc) stk f rs0 m0 sv rs m t s ->
+  exists s', ssem_final tpge tge sp st'.(si_pc) stk' f' rs0 m0 sv' rs m t s' /\ match_states s s'.
+Proof.
+  Local Hint Resolve transf_fundef_correct: core.
+  intros FUN STK (* SIS *) SFV. destruct SFV; intros SEM; inv SEM; simpl in *.
+  - (* Snone *)
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    eapply eqlive_reg_refl.
+  - (* Scall *)
+    exploit s_find_function_preserved; eauto.
+    intros (fd' & FIND & TRANSF & LIVE').
+    erewrite <- function_sig_preserved; eauto.
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    + eapply eq_trans; try eassumption; auto.
+    + simpl. repeat (econstructor; eauto).
+  - (* Stailcall *)
+    exploit s_find_function_preserved; eauto.
+    intros (fd' & FIND & TRANSF & LIVE').
+    erewrite <- function_sig_preserved; eauto.
+    eexists; split; econstructor; eauto.
+    + erewrite <- preserv_fnstacksize; eauto.
+    + eapply eq_trans; try eassumption; auto.
+  - (* Sbuiltin *)
+    pose senv_preserved_RTL as SRTL.
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    + eapply seval_builtin_args_preserved; eauto.
+      eapply seval_list_builtin_sval_correct; eauto.
+      rewrite H0.
+      erewrite seval_list_builtin_sval_preserved; eauto.
+    + eapply external_call_symbols_preserved; eauto.
+    + eapply eqlive_reg_refl.
+  - (* Sjumptable *)
+    exploit ptree_get_list_nth_rev; eauto. intros (p2 & LNZ & DM).
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    + eapply eq_trans; try eassumption; auto.
+    + eapply eqlive_reg_refl.
+  - (* Sreturn *)
+    eexists; split; econstructor; eauto.
+    erewrite <- preserv_fnstacksize; eauto.
+  - (* Sreturn bis *)
+    eexists; split; econstructor; eauto.
+    + erewrite <- preserv_fnstacksize; eauto.
+    + rewrite <- H. erewrite <- seval_preserved; eauto.
+Qed.
+
+(* The main theorem on simulation of symbolic states ! *)
+Theorem ssem_sstate_simu dm f f' stk stk' sp st st' rs m t s:
+  match_function dm f f' ->
+  liveness_ok_function f ->
+  list_forall2 match_stackframes stk stk' ->
+  ssem pge ge sp st stk f rs m t s ->
+  (forall ctx: simu_proof_context f, sstate_simu dm f st st' ctx) ->
+  exists s', ssem tpge tge sp st' stk' f' rs m t s' /\ match_states s s'.
+Proof.
+  intros MFUNC LIVE STACKS SEM SIMU.
+  destruct (SIMU (mkctx sp rs m LIVE)) as (SIMU1 & SIMU2); clear SIMU.
+  destruct SEM as [is CONT SEM|is t s' CONT SEM1 SEM2]; simpl.
+  - (* sem_early *)
+    exploit sistate_simu; eauto.
+    unfold istate_simu; rewrite CONT.
+    intros (is' & SEM' & (path & PATH & (CONT' & RS' & M') & PC')).
+    exists (State stk' f' sp (ipc is') (irs is') (imem is')).
+    split.
+    + eapply ssem_early; auto. congruence.
+    + rewrite M'. econstructor; eauto.
+  - (* sem_normal *)
+    exploit sistate_simu; eauto.
+    unfold istate_simu; rewrite CONT.
+    intros (is' & SEM' & (CONT' & RS' & M')(*  & DMEQ *)).
+    rewrite <- eqlive_reg_triv in RS'.
+    exploit ssem_final_simu; eauto.
+    clear SEM2; intros (s0 & SEM2 & MATCH0).
+    exploit ssem_final_equiv; eauto.
+    clear SEM2; rewrite M'; rewrite CONT' in CONT; intros (s1 & EQ & SEM2).
+    exists s1; split.
+    + eapply ssem_normal; eauto.
+    + eapply match_states_equiv; eauto.
+Qed.
+
+Lemma exec_path_simulation dupmap path stk stk' f f' sp rs m pc pc' t s:
+  (fn_path f)!pc = Some path ->
+  path_step ge pge path.(psize) stk f sp rs m pc t s ->
+  list_forall2 match_stackframes stk stk' ->
+  dupmap ! pc' = Some pc ->
+  match_function dupmap f f' ->
+  liveness_ok_function f ->
+  exists path' s', (fn_path f')!pc' = Some path' /\ path_step tge tpge path'.(psize) stk' f' sp rs m pc' t s' /\ match_states s s'.
+Proof.
+  intros PATH STEP STACKS DUPPC MATCHF LIVE.
+  exploit initialize_path. { eapply dupmap_path_entry2; eauto. }
+  intros (path' & PATH').
+  exists path'.
+  exploit (sexec_correct f pc pge ge sp path stk rs m t s); eauto.
+  intros (st & SYMB & SEM); clear STEP.
+  exploit dupmap_correct; eauto.
+  clear SYMB; intros (st' & SYMB & SIMU).
+  exploit ssem_sstate_simu; eauto.
+  intros (s0 & SEM0 & MATCH). 
+  exploit sexec_exact; eauto.
+  intros (s' & STEP' & EQ).
+  exists s'; intuition.
+  eapply match_states_equiv; eauto.
+Qed.
+
+Lemma step_simulation s1 t s1' s2:
+  step ge pge s1 t s1' ->
+  match_states s1 s2 ->
+  exists s2',
+     step tge tpge s2 t s2'
+  /\ match_states s1' s2'.
+Proof.
+  Local Hint Resolve eqlive_stacks_refl transf_fundef_correct: core.
+  destruct 1 as [path stack f sp rs m pc t s PATH STEP | | | ]; intros MS; inv MS.
+(* exec_path *)
+  - try_simplify_someHyps. intros.
+    exploit path_step_eqlive; eauto. (* { intros. eapply all_fundef_liveness_ok; eauto. } *)
+    clear STEP EQUIV rs; intros (s2 & STEP & EQLIVE).
+    exploit exec_path_simulation; eauto.
+    clear STEP; intros (path' & s' & PATH' & STEP' & MATCH').
+    exists s'; split.
+    + eapply exec_path; eauto.
+    + eapply eqlive_match_states; eauto.
+(* exec_function_internal *)
+  - inv LIVE.
+    exploit initialize_path. { eapply (fn_entry_point_wf f). }
+    destruct 1 as (path & PATH).
+    inversion TRANSF as [f0 xf tf MATCHF|]; subst. eexists. split.
+    + eapply exec_function_internal. erewrite <- preserv_fnstacksize; eauto.
+    + erewrite preserv_fnparams; eauto.
+      econstructor; eauto. 
+      { apply preserv_entrypoint; auto. }
+      { apply eqlive_reg_refl. }
+(* exec_function_external *)
+  - inversion TRANSF as [|]; subst. eexists. split.
+    + econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved_RTL.
+    + constructor. assumption.
+(* exec_return *)
+  - inv STACKS. destruct b1 as [res' f' sp' pc' rs']. eexists. split.
+    + constructor.
+    + inv H1. econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (semantics prog) (semantics tprog).
+Proof.
+  eapply forward_simulation_step with match_states.
+  - eapply senv_preserved.
+  - eapply transf_initial_states.
+  - intros; eapply transf_final_states; eauto.
+  - intros; eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/scheduling/RTLpathproof.v b/scheduling/RTLpathproof.v
new file mode 100644
index 00000000..20eded97
--- /dev/null
+++ b/scheduling/RTLpathproof.v
@@ -0,0 +1,50 @@
+Require Import Coqlib Maps.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL Linking.
+Require Import RTLpath.
+
+Definition match_prog (p: RTLpath.program) (tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = fundef_RTL f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Lemma match_program_transf:
+  forall p tp, match_prog p tp -> transf_program p = tp.
+Proof.
+  intros p tp H. inversion_clear H. inv H1.
+  destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. simpl in *.
+  subst. unfold transf_program. unfold transform_program. simpl.
+  apply program_equals; simpl; auto.
+  induction H0; simpl; auto.
+  rewrite IHlist_forall2. apply cons_extract.
+  destruct a1 as [ida gda]. destruct b1 as [idb gdb].
+  simpl in *.
+  inv H. inv H2.
+  - simpl in *. subst. auto.
+  - simpl in *. subst. inv H. auto.
+Qed.
+
+
+Section PRESERVATION.
+
+Variable prog: RTLpath.program.
+Variable tprog: RTL.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+  forward_simulation (RTLpath.semantics prog) (RTL.semantics tprog).
+Proof.
+  pose proof (match_program_transf prog tprog TRANSF) as TR. subst.
+  eapply RTLpath_correct.
+Qed.
+
+End PRESERVATION.
+
+
diff --git a/test/c/Makefile b/test/c/Makefile
index 726631d2..a728d182 100644
--- a/test/c/Makefile
+++ b/test/c/Makefile
@@ -1,6 +1,8 @@
 include ../../Makefile.config
 
 CCOMP=../../ccomp
+# TODO - temporary
+# CCOMPOPTS:=$(CCOMPOPTS) -fall-loads-nontrap -fduplicate 2 -fprepass
 CCOMPFLAGS=$(CCOMPOPTS) -stdlib ../../runtime -dc -dclight -dasm
 
 CFLAGS+=-O2 -Wall
diff --git a/test/monniaux/picosat-965/onefile/picosat.c b/test/monniaux/picosat-965/onefile/picosat.c
new file mode 100644
index 00000000..e1c18438
--- /dev/null
+++ b/test/monniaux/picosat-965/onefile/picosat.c
@@ -0,0 +1,25 @@
+typedef struct b b;
+b *a;
+struct b {
+  int c;
+  int d, **clshead;
+  int **ahead;
+  unsigned h;
+} i;
+b *j();
+int k();
+int main() {
+  a = j();
+  k(a);
+}
+#define e(f) f - g->c
+static void m(b *g, int *l) {
+  if (g)
+    *g->ahead = l;
+}
+b *j() { return &i; }
+int k(b *g) {
+  if (g->d)
+    m(g, e(g->clshead[-1]));
+  return g->h;
+}
diff --git a/test/monniaux/picosat-965/onefile/testcmp.sh b/test/monniaux/picosat-965/onefile/testcmp.sh
new file mode 100755
index 00000000..2228c675
--- /dev/null
+++ b/test/monniaux/picosat-965/onefile/testcmp.sh
@@ -0,0 +1,146 @@
+DEFINES="-DNALARM -DNZIP -DNGETRUSAGE -DNDEBUG"
+COMPCERT=/local/monniaux/Kalray/mppa-RTLpathSE-verif-hash-junk
+DATA=$COMPCERT/test/monniaux/picosat-965/tiny.dat
+CCOMP="$COMPCERT/ccomp -fbitfields -fduplicate 2 -fall-loads-nontrap $DEFINES"
+GCC="kvx-cos-gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC0="gcc -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC1="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC2="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+HOSTCC3="gcc -O3 -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC4="clang -Wimplicit -Wuninitialized -Werror $DEFINES"
+HOSTCC5="clang -Wimplicit -Wuninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+CFILES="picosat.c"
+SIMU="kvx-cluster --timeout=100000 -- "
+
+if ! $HOSTCC0 $CFILES -o picosat.cc0.host ;
+then exit 30 ;
+fi
+
+if ! $HOSTCC1 $CFILES -o picosat.cc1.host ;
+then exit 31 ;
+fi
+
+if ! $HOSTCC2 $CFILES -o picosat.cc2.host ;
+then exit 32 ;
+fi
+
+if ! $HOSTCC3 $CFILES -o picosat.cc3.host ;
+then exit 33 ;
+fi
+
+if ! $HOSTCC4 $CFILES -o picosat.cc4.host ;
+then exit 34 ;
+fi
+
+if ! $HOSTCC5 $CFILES -o picosat.cc5.host ;
+then exit 35 ;
+fi
+
+timeout 1 ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.out
+if [ $? -ge 100 ];
+then exit 40 ;
+fi
+
+timeout 1 ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.out
+if [ $? -ge 100 ];
+then exit 41 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc0.valgrind.log ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.valgrind.out
+if [ $? -ge 100 ];
+then exit 50 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc1.valgrind.log ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.valgrind.out
+if [ $? -ge 100 ];
+then exit 51 ;
+fi
+
+timeout 1 ./picosat.cc2.host $DATA 2>&1 > picosat.cc2.out
+if [ $? -ge 100 ];
+then exit 42 ;
+fi
+
+timeout 1 ./picosat.cc3.host $DATA 2>&1 > picosat.cc3.out
+if [ $? -ge 100 ];
+then exit 43 ;
+fi
+
+timeout 1 ./picosat.cc4.host $DATA 2>&1 > picosat.cc4.out
+if [ $? -ge 100 ];
+then exit 44 ;
+fi
+
+timeout 1 ./picosat.cc5.host $DATA 2>&1 > picosat.cc5.out
+if [ $? -ge 100 ];
+then exit 45 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc1.out ;
+then exit 60 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc0.valgrind.out ;
+then exit 70 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc1.valgrind.out ;
+then exit 61 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc2.out ;
+then exit 62 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc3.out ;
+then exit 63 ;
+fi
+
+if ! $GCC $CFILES -o picosat.gcc.target ;
+then exit 1 ;
+fi
+
+if ! $CCOMP $CFILES -o picosat.ccomp.target ;
+then exit 2 ;
+fi
+
+if ! $CCOMP -fprepass -fprepass= list $CFILES -o picosat.prepass.target ;
+then exit 3 ;
+fi
+
+$SIMU ./picosat.gcc.target $DATA 2>&1 > picosat.gcc.out
+if [ $? -ge 100 ];
+then exit 4 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.cc1.out ;
+then exit 13 ;
+fi
+
+if grep timeout picosat.gcc.out ;
+then exit 8 ;
+fi
+
+$SIMU ./picosat.ccomp.target $DATA 2>&1 > picosat.ccomp.out
+if [ $? -ge 100 ];
+then exit 5 ;
+fi
+
+if grep timeout picosat.ccomp.out ;
+then exit 9 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.ccomp.out ;
+then exit 6 ;
+fi
+
+$SIMU ./picosat.prepass.target $DATA 2>&1 > picosat.prepass.out
+if [ $? -ge 100 ];
+then exit 0 ;
+fi
+
+if cmp picosat.gcc.out picosat.prepass.out ;
+then exit 7 ;
+fi
+
+exit 0
diff --git a/test/monniaux/picosat-965/small.dat b/test/monniaux/picosat-965/small.dat
new file mode 100644
index 00000000..accb9054
--- /dev/null
+++ b/test/monniaux/picosat-965/small.dat
@@ -0,0 +1,2 @@
+p cnf 1 1
+1 0
diff --git a/test/monniaux/picosat-965/tiny.dat b/test/monniaux/picosat-965/tiny.dat
new file mode 100644
index 00000000..1d89b303
--- /dev/null
+++ b/test/monniaux/picosat-965/tiny.dat
@@ -0,0 +1,2 @@
+p cnf 0 1
+0
diff --git a/test/monniaux/reduced_picosat/reduced_picosat.c b/test/monniaux/reduced_picosat/reduced_picosat.c
new file mode 100644
index 00000000..eb9fdaf8
--- /dev/null
+++ b/test/monniaux/reduced_picosat/reduced_picosat.c
@@ -0,0 +1,23 @@
+typedef struct b b;
+b *a;
+struct b {
+  int c;
+  int d, **clshead;
+  int **ahead;
+  unsigned h;
+} glob;
+int k();
+int main() {
+  a = &glob;
+  k(a);
+}
+#define e(f) f - g->c
+static void m(b *g, int *l) {
+  if (g)
+    *g->ahead = l;
+}
+int k(b *g) {
+  if (g->d)
+    m(g, e(g->clshead[-1]));
+  return g->h;
+}
diff --git a/test/monniaux/reduced_picosat/test_a.s b/test/monniaux/reduced_picosat/test_a.s
new file mode 100644
index 00000000..c14cc8f9
--- /dev/null
+++ b/test/monniaux/reduced_picosat/test_a.s
@@ -0,0 +1,10 @@
+	.text
+	.global	dummyload
+	.type	dummyload, @function
+dummyload:
+	make	$r0 = 0
+	;;
+	ld.s	$r0 = -8[$r0]
+	ret
+	;;
+	.size	dummyload, .-dummyload
diff --git a/test/monniaux/reduced_picosat/test_b.c b/test/monniaux/reduced_picosat/test_b.c
new file mode 100644
index 00000000..a0fe625b
--- /dev/null
+++ b/test/monniaux/reduced_picosat/test_b.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+extern uint64_t dummyload(void);
+
+int main() {
+  printf("%" PRIu64 "\n", dummyload());
+}
diff --git a/test/monniaux/reduced_picosat/testcmp.sh b/test/monniaux/reduced_picosat/testcmp.sh
new file mode 100755
index 00000000..8dc93de9
--- /dev/null
+++ b/test/monniaux/reduced_picosat/testcmp.sh
@@ -0,0 +1,146 @@
+DEFINES="-DNALARM -DNZIP -DNGETRUSAGE -DNDEBUG"
+COMPCERT=/home/monniaux/work/Kalray/mppa-RTLpathSE-verif-hash-junk
+DATA=$COMPCERT/test/monniaux/picosat-965/tiny.dat
+CCOMP="$COMPCERT/ccomp -fbitfields -fduplicate 2 -fall-loads-nontrap $DEFINES"
+GCC="kvx-cos-gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC0="gcc -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC1="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC2="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+HOSTCC3="gcc -O3 -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC4="/usr/bin/clang -Wimplicit -Wuninitialized -Werror $DEFINES"
+HOSTCC5="/usr/bin/clang -Wimplicit -Wuninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+CFILES="reduced_picosat.c"
+SIMU="kvx-cluster --timeout=10000000 -- "
+
+if ! $HOSTCC0 $CFILES -o picosat.cc0.host ;
+then exit 30 ;
+fi
+
+if ! $HOSTCC1 $CFILES -o picosat.cc1.host ;
+then exit 31 ;
+fi
+
+if ! $HOSTCC2 $CFILES -o picosat.cc2.host ;
+then exit 32 ;
+fi
+
+if ! $HOSTCC3 $CFILES -o picosat.cc3.host ;
+then exit 33 ;
+fi
+
+if ! $HOSTCC4 $CFILES -o picosat.cc4.host ;
+then exit 34 ;
+fi
+
+if ! $HOSTCC5 $CFILES -o picosat.cc5.host ;
+then exit 35 ;
+fi
+
+timeout 1 ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.out
+if [ $? -ge 100 ];
+then exit 40 ;
+fi
+
+timeout 1 ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.out
+if [ $? -ge 100 ];
+then exit 41 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc0.valgrind.log ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.valgrind.out
+if [ $? -ge 100 ];
+then exit 50 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc1.valgrind.log ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.valgrind.out
+if [ $? -ge 100 ];
+then exit 51 ;
+fi
+
+timeout 1 ./picosat.cc2.host $DATA 2>&1 > picosat.cc2.out
+if [ $? -ge 100 ];
+then exit 42 ;
+fi
+
+timeout 1 ./picosat.cc3.host $DATA 2>&1 > picosat.cc3.out
+if [ $? -ge 100 ];
+then exit 43 ;
+fi
+
+timeout 1 ./picosat.cc4.host $DATA 2>&1 > picosat.cc4.out
+if [ $? -ge 100 ];
+then exit 44 ;
+fi
+
+timeout 1 ./picosat.cc5.host $DATA 2>&1 > picosat.cc5.out
+if [ $? -ge 100 ];
+then exit 45 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc1.out ;
+then exit 60 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc0.valgrind.out ;
+then exit 70 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc1.valgrind.out ;
+then exit 61 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc2.out ;
+then exit 62 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc3.out ;
+then exit 63 ;
+fi
+
+if ! $GCC $CFILES -o picosat.gcc.target ;
+then exit 1 ;
+fi
+
+if ! $CCOMP $CFILES -o picosat.ccomp.target ;
+then exit 2 ;
+fi
+
+if ! $CCOMP -fprepass -fprepass= list $CFILES -o picosat.prepass.target ;
+then exit 3 ;
+fi
+
+$SIMU ./picosat.gcc.target $DATA 2>&1 > picosat.gcc.out
+if [ $? -ge 100 ];
+then exit 4 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.cc1.out ;
+then exit 13 ;
+fi
+
+if grep timeout picosat.gcc.out ;
+then exit 8 ;
+fi
+
+$SIMU ./picosat.ccomp.target $DATA 2>&1 > picosat.ccomp.out
+if [ $? -ge 100 ];
+then exit 5 ;
+fi
+
+if grep timeout picosat.ccomp.out ;
+then exit 9 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.ccomp.out ;
+then exit 6 ;
+fi
+
+$SIMU ./picosat.prepass.target $DATA 2>&1 > picosat.prepass.out
+if [ $? -ge 100 ];
+then exit 0 ;
+fi
+
+if cmp picosat.gcc.out picosat.prepass.out ;
+then exit 7 ;
+fi
+
+exit 0
diff --git a/test/monniaux/rules.mk b/test/monniaux/rules.mk
index c0594ef9..cab957c0 100644
--- a/test/monniaux/rules.mk
+++ b/test/monniaux/rules.mk
@@ -21,15 +21,15 @@ MEASURES?=time
 ALL_CFLAGS+=-Wall -D__KVX_COS__ -DMAX_MEASURES=$(MAX_MEASURES)
 #ALL_CFLAGS+=-g
 ALL_GCCFLAGS+=$(ALL_CFLAGS) -std=c99 -Wextra -Werror=implicit
-ALL_CCOMPFLAGS+=$(ALL_CFLAGS)
+ALL_CCOMPFLAGS+=$(ALL_CFLAGS) # -fprofile-use= ../compcert_profiling.dat
 
 # The compilers
-KVX_CC?=kvx-cos-gcc
+KVX_CC?=kvx-elf-gcc
 KVX_CCOMP?=ccomp
 
 # Command to execute
 #EXECUTE_CYCLES?=timeout --signal=SIGTERM 3m kvx-cluster --syscall=libstd_scalls.so --cycle-based --
-EXECUTE_CYCLES?=kvx-cluster --syscall=libstd_scalls.so --cycle-based --
+EXECUTE_CYCLES?=kvx-cluster --enable-cache --syscall=libstd_scalls.so --cycle-based --
 
 # You can define up to GCC4FLAGS and CCOMP4FLAGS
 GCC0FLAGS?=$(ALL_GCCFLAGS) -O0
@@ -37,11 +37,11 @@ GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
 GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
 GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
 GCC4FLAGS?=
-CCOMP0FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fno-postpass
-CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fpostpass= greedy
-CCOMP2FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fno-if-conversion
-CCOMP3FLAGS?=$(ALL_CCOMPFLAGS) -O2
-CCOMP4FLAGS?=
+CCOMP0FLAGS?=$(ALL_CCOMPFLAGS) -O2 
+CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fprepass= list
+CCOMP2FLAGS?=$(ALL_CCOMPFLAGS) -O2 -funrollsingle 30
+CCOMP3FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fprepass= list -funrollsingle 30
+CCOMP4FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fprepass= zigzag
 
 # Prefix names
 GCC0PREFIX?=.gcc.o0
@@ -49,11 +49,11 @@ GCC1PREFIX?=.gcc.o1
 GCC2PREFIX?=.gcc.o2
 GCC3PREFIX?=.gcc.o3
 GCC4PREFIX?=
-CCOMP0PREFIX?=.ccomp.nobundle
-CCOMP1PREFIX?=.ccomp.greedy
-CCOMP2PREFIX?=.ccomp.noif
-CCOMP3PREFIX?=.ccomp
-CCOMP4PREFIX?=
+CCOMP0PREFIX?=.ccomp
+CCOMP1PREFIX?=.ccomp.prepass_list
+CCOMP2PREFIX?=.ccomp.unrollsingle_30
+CCOMP3PREFIX?=.ccomp.prepass_list-unrollsingle_30
+CCOMP4PREFIX?=.ccomp.prepass_zigzag
 
 # List of outfiles, updated by gen_rules
 OUTFILES:=
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 4dfbe565..56d90469 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -1,6 +1,8 @@
 include ../../Makefile.config
 
 CCOMP=../../ccomp
+# TODO - temporary
+# CCOMPOPTS:=$(CCOMPOPTS) -fall-loads-nontrap -fduplicate 2 -fprepass
 CCOMPFLAGS=$(CCOMPOPTS) -stdlib ../../runtime \
  -dparse -dc -dclight -dasm -fall \
  -DARCH_$(ARCH) -DMODEL_$(MODEL)
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 54946366..8d4f4f0b 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -25,10 +25,10 @@ TOTAL, Always, Require, (Some "Renumbering"), "Renumber";
 PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
 PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict";
 PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle";
-TOTAL, Always, NoRequire, (Some "Renumbering pre unroll"), "Renumber";
-PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
 TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber";
 PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate";
+TOTAL, Always, NoRequire, (Some "Renumbering pre unrolling"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
 TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber";
 TOTAL, (Option "optim_constprop"), Require, (Some "Constant propagation"), "Constprop";
 TOTAL, Always, NoRequire, (Some "Renumbering pre CSE"), "Renumber";
@@ -50,8 +50,11 @@ PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"
 
 let post_rtl_passes =
 [|
-  PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL");
-  TOTAL, Always, Require, (Some "Branch tunneling"), "Tunneling", Noprint;
+  PARTIAL, Always, Require, (Some "RTLpath generation"), "RTLpathLivegen", Noprint;
+  PARTIAL, Always, Require, (Some "Prepass scheduling"), "RTLpathScheduler", Noprint;
+  TOTAL, Always, Require,  (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1)));
+  PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1");
+  PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2");
   PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint;
   TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint;
   PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint;
diff --git a/x86/CSE2deps.v b/x86/CSE2deps.v
index a4b47a5c..757966b8 100644
--- a/x86/CSE2deps.v
+++ b/x86/CSE2deps.v
@@ -32,5 +32,7 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     if peq symb symb'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
     else false
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
   | _, _, _, _ => true
   end.
diff --git a/x86/CSE2depsproof.v b/x86/CSE2depsproof.v
index fd088962..e181b8f4 100644
--- a/x86/CSE2depsproof.v
+++ b/x86/CSE2depsproof.v
@@ -20,11 +20,79 @@ Require Import Registers Op RTL.
 Require Import CSE2 CSE2deps.
 Require Import Lia.
 
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = if Archi.ptr64
+                   then 18446744073709551616
+                   else 4294967296.
+Proof.
+  reflexivity.
+Qed.
+
 Section SOUNDNESS.
   Variable F V : Type.
   Variable genv: Genv.t F V.
   Variable sp : val.
 
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try rewrite ptrofs_modulus in *.
+    all: destruct Archi.ptr64.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
+
 Section MEMORY_WRITE.
   Variable m m2 : mem.
   Variable chunkw chunkr : memory_chunk.
@@ -237,7 +305,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -247,8 +315,7 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away; eassumption.
-  }
-  { (* Aglobal / Aglobal *)
+- (* Aglobal / Aglobal *)
     destruct args. 2: discriminate.
     destruct args'. 2: discriminate.
     simpl in *.
@@ -259,7 +326,14 @@ Proof.
       eapply load_store_glob_away; eassumption.
     }
     eapply load_store_diff_globals; eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/x86/Op.v b/x86/Op.v
index 28e6dbd8..776f9495 100644
--- a/x86/Op.v
+++ b/x86/Op.v
@@ -1037,6 +1037,20 @@ Proof.
   auto.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct c; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..7b6a1b14
--- /dev/null
+++ b/x86/PrepassSchedulingOracle.ml
@@ -0,0 +1,5 @@
+open RTL
+open Registers
+
+(* Do not do anything *)
+let schedule_sequence (seqa : (instruction*Regset.t) array) = None