26 files changed, 1905 insertions, 68 deletions
diff --git a/Makefile b/Makefile
index 6f2a786e..d2c81266 100644
--- a/Makefile
+++ b/Makefile
@@ -89,6 +89,7 @@ BACKEND=\
   NeedDomain.v NeedOp.v Deadcode.v Deadcodeproof.v \
   Unusedglob.v Unusedglobproof.v \
   Machregs.v Locations.v Conventions1.v Conventions.v LTL.v \
+  ForwardMoves.v ForwardMovesproof.v \
   Allnontrap.v Allnontrapproof.v \
   Allocation.v Allocproof.v \
   Tunneling.v Tunnelingproof.v \
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 55922e9e..471ad501 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -435,7 +435,7 @@ let preg_to_dwarf = function
 let expand_function id fn =
   try
     set_current_function fn;
-    expand id (* sp= *) 2 preg_to_dwarf expand_instruction fn.fn_code;
+    expand id (* sp= *) 31 preg_to_dwarf expand_instruction fn.fn_code;
     Errors.OK (get_current_function ())
   with Error s ->
     Errors.Error (Errors.msg (coqstring_of_camlstring s))
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index 0c72c7cc..46dd875d 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -268,18 +268,24 @@ Definition arith_extended
 Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code :=
   if Int.eq n Int.zero then
     Pmov rd r1 :: k
-  else
-    Porr W X16 XZR r1 (SOasr (Int.repr 31)) ::
-    Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) ::
-    Porr W rd XZR X16 (SOasr n) :: k.
+  else if Int.eq n Int.one then
+         Padd W X16 r1 r1 (SOlsr (Int.repr 31)) ::
+         Porr W rd XZR X16 (SOasr n) :: k
+       else
+         Porr W X16 XZR r1 (SOasr (Int.repr 31)) ::
+         Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) ::
+         Porr W rd XZR X16 (SOasr n) :: k.
 
 Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code :=
   if Int.eq n Int.zero then
     Pmov rd r1 :: k
-  else
-    Porr X X16 XZR r1 (SOasr (Int.repr 63)) ::
-    Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) ::
-    Porr X rd XZR X16 (SOasr n) :: k.
+  else if Int.eq n Int.one then
+         Padd X X16 r1 r1 (SOlsr (Int.repr 63)) ::
+         Porr X rd XZR X16 (SOasr n) :: k
+       else
+         Porr X X16 XZR r1 (SOasr (Int.repr 63)) ::
+         Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) ::
+         Porr X rd XZR X16 (SOasr n) :: k.
 
 (** Load the address [id + ofs] in [rd] *)
 
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index c860b961..88258cd6 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -259,13 +259,13 @@ Proof.
 - apply logicalimm32_label; unfold nolabel; auto.
 - apply logicalimm32_label; unfold nolabel; auto.
 - apply logicalimm32_label; unfold nolabel; auto.
-- unfold shrx32. destruct Int.eq; TailNoLabel.
+- unfold shrx32. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel.
 - apply arith_extended_label; unfold nolabel; auto.
 - apply arith_extended_label; unfold nolabel; auto.
 - apply logicalimm64_label; unfold nolabel; auto.
 - apply logicalimm64_label; unfold nolabel; auto.
 - apply logicalimm64_label; unfold nolabel; auto.
-- unfold shrx64. destruct Int.eq; TailNoLabel.
+- unfold shrx64. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel.
 - eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
 - destruct (preg_of r); try discriminate; TailNoLabel;
   (eapply tail_nolabel_trans; [eapply transl_cond_label; eauto | TailNoLabel]).
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
index b622a0bb..6f296f56 100644
--- a/aarch64/Asmgenproof1.v
+++ b/aarch64/Asmgenproof1.v
@@ -754,16 +754,28 @@ Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
   /\ rs'#rd = v
   /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
 Proof.
-  unfold shrx32; intros. apply Val.shrx_shr_2 in H.
+  unfold shrx32; intros. apply Val.shrx_shr_3 in H.
   destruct (Int.eq n Int.zero) eqn:E.
 - econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
   split. Simpl. subst v; auto. intros; Simpl.
-- econstructor; split. eapply exec_straight_three.
-  unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
-  simpl; eauto.
-  unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
-  auto. auto. auto.
-  split. subst v; Simpl. intros; Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: simpl; auto.
+    split.
+    ** subst v; Simpl.
+       destruct (Val.add _ _); simpl; trivial.
+       change (Int.ltu Int.one Int.iwordsize) with true; simpl.
+       rewrite Int.or_zero_l.
+       reflexivity.
+    ** intros; Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    simpl; eauto.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    auto. auto. auto.
+    split. subst v; Simpl. intros; Simpl.
 Qed.
  
 Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
@@ -774,16 +786,28 @@ Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
   /\ rs'#rd = v
   /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
 Proof.
-  unfold shrx64; intros. apply Val.shrxl_shrl_2 in H.
+  unfold shrx64; intros. apply Val.shrxl_shrl_3 in H.
   destruct (Int.eq n Int.zero) eqn:E.
 - econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
   split. Simpl. subst v; auto. intros; Simpl.
-- econstructor; split. eapply exec_straight_three.
-  unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
-  simpl; eauto.
-  unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
-  auto. auto. auto.
-  split. subst v; Simpl. intros; Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: simpl; auto.
+    split.
+    ** subst v; Simpl.
+       destruct (Val.addl _ _); simpl; trivial.
+       change (Int.ltu Int.one Int64.iwordsize') with true; simpl.
+       rewrite Int64.or_zero_l.
+       reflexivity.
+    ** intros; Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    simpl; eauto.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    auto. auto. auto.
+    split. subst v; Simpl. intros; Simpl.
 Qed.
 
 (** Condition bits *)
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 016a1c5a..f428feea 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -481,6 +481,9 @@ Definition transl_op
       do r <- ireg_of res; do r1 <- ireg_of a1;
       if Int.eq n Int.zero then
         OK (Pmov r (SOreg r1) :: k)
+      else if Int.eq n Int.one then
+        OK (Padd IR14 r1 (SOlsr r1 (Int.repr 31)) ::
+            Pmov r (SOasr IR14 n) :: k)
       else
         OK (Pmov IR14 (SOasr r1 (Int.repr 31)) ::
             Padd IR14 r1 (SOlsr IR14 (Int.sub Int.iwordsize n)) ::
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 7ef7b776..cdac697e 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -1264,15 +1264,32 @@ Local Transparent destroyed_by_op.
   destruct (rs x0) eqn: X0; simpl in H0; try discriminate.
   destruct (Int.ltu i (Int.repr 31)) eqn: LTU; inv H0.
   revert EQ2. predSpec Int.eq Int.eq_spec i Int.zero; intros EQ2.
+  {
   (* i = 0 *)
   inv EQ2. econstructor.
   split. apply exec_straight_one. simpl. reflexivity. auto.
   split. Simpl. unfold Int.shrx. rewrite Int.shl_zero. unfold Int.divs.
   change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed. auto.
   intros. Simpl.
-  (* i <> 0 *)
-  inv EQ2.
-  assert (LTU': Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize = true).
+  }
+  { (* i <> 0 *)
+    revert EQ2. predSpec Int.eq Int.eq_spec i Int.one; intros EQ2.
+    {
+      inv EQ2.
+      econstructor; split.
+      eapply exec_straight_two; simpl; reflexivity.
+      split.
+      { rewrite X0.
+        rewrite Int.shrx1_shr by reflexivity.
+        Simpl.
+      }
+      { intros.
+        Simpl.
+      }
+    }
+    clear H0.
+    inv EQ2.
+    assert (LTU': Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize = true).
   {
     generalize (Int.ltu_inv _ _ LTU). intros.
     unfold Int.sub, Int.ltu. rewrite Int.unsigned_repr_wordsize.
@@ -1306,6 +1323,7 @@ Local Transparent destroyed_by_op.
   rewrite LTU'; simpl. rewrite LTU''; simpl.
   f_equal. symmetry. apply Int.shrx_shr_2. assumption.
   intros. unfold rs3; Simpl. unfold rs2; Simpl. unfold rs1; Simpl.
+  }
   (* intoffloat *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
 Transparent destroyed_by_op.
diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v
new file mode 100644
index 00000000..c73b0213
--- /dev/null
+++ b/backend/ForwardMoves.v
@@ -0,0 +1,333 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+(* Static analysis *)
+
+Module RELATION.
+  
+Definition t := (PTree.t reg).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty reg.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition reg_beq (x y : reg) :=
+  if Pos.eq_dec x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq reg_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct reg_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold reg_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (Pos.eq_dec R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if Pos.eq_dec v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
+
+  Parameter t: Type.
+  Parameter eq: t -> t -> Prop.
+  Axiom eq_refl: forall x, eq x x.
+  Axiom eq_sym: forall x y, eq x y -> eq y x.
+  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Parameter beq: t -> t -> bool.
+  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
+  Parameter ge: t -> t -> Prop.
+  Axiom ge_refl: forall x y, eq x y -> ge x y.
+  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Parameter lub: t -> t -> t.
+  Axiom ge_lub_left: forall x y, ge (lub x y) x.
+  Axiom ge_lub_right: forall x y, ge (lub x y) y.
+
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => if Pos.eq_dec dst x then false else true)
+                (PTree.remove dst rel).
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (match PTree.get src rel with
+                 | Some src' => src'
+                 | None => src
+                 end) (kill dst rel).
+
+Fixpoint kill_builtin_res (res : builtin_res reg) (rel : RELATION.t) :=
+  match res with
+  | BR z => kill z rel
+  | BR_none => rel
+  | BR_splitlong hi lo => kill_builtin_res hi (kill_builtin_res lo rel)
+  end.
+
+Definition apply_instr instr x :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _
+  | Ijumptable _ _
+  | Istore _ _ _ _ _ => Some x
+  | Iop Omove (src :: nil) dst _ => Some (move src dst x)
+  | Iop _ _ dst _
+  | Iload _ _ _ _ dst _
+  | Icall _ _ _ dst _ => Some (kill dst x)
+  | Ibuiltin _ _ res _ => Some (RELATION.top) (* TODO (kill_builtin_res res x) *)
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition get_r (rel : RELATION.t) (x : reg) :=
+  match PTree.get x rel with
+  | None => x
+  | Some src => src
+  end.
+
+Definition get_rb (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => get_r rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => get_rb (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    Iop op (subst_args fmap pc args) dst s
+  | Iload trap chunk addr args dst s =>
+    Iload trap chunk addr (subst_args fmap pc args) dst s
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) src s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 =>
+    Icond cond (subst_args fmap pc args) s1 s2
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ForwardMovesproof.v b/backend/ForwardMovesproof.v
new file mode 100644
index 00000000..826d4250
--- /dev/null
+++ b/backend/ForwardMovesproof.v
@@ -0,0 +1,801 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ForwardMoves.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+(*
+Definition fmap_sem (fmap : option (PMap.t RB.t)) (pc : node) (rs : regset) :=
+  forall x : reg,
+    (rs # (subst_arg fmap pc x)) = (rs # x).
+ *)
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Definition get_rb_sem (rb : RB.t) (rs : regset) :=
+  match rb with
+  | None => False
+  | Some rel =>
+    forall x : reg,
+      (rs # (get_r rel x)) = (rs # x)
+  end.
+
+Lemma get_rb_sem_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall rs : regset,
+      (get_rb_sem rb2 rs) -> (get_rb_sem rb1 rs).
+Proof.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ];
+    unfold get_rb_sem;
+    simpl;
+    intros GE rs RB2RS;
+    try contradiction.
+  unfold RELATION.ge in GE.
+  unfold get_r in *.
+  intro x.
+  pose proof (GE x) as GEx.
+  pose proof (RB2RS x) as RB2RSx.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => get_rb_sem (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold get_rb_sem in *.
+  destruct (map # pc).
+  2: contradiction.
+  apply SEM.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_ok:
+  forall dst,
+  forall mpc,
+  forall rs,
+  forall v,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs # dst <- v.
+Proof.
+  unfold get_rb_sem.
+  intros until v.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    rewrite Regmap.gss.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    apply Regmap.gss.
+  }
+  rewrite (Regmap.gso v rs NEQ).
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      subst dst.
+      rewrite Regmap.gso by assumption.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by assumption.
+  reflexivity.
+Qed.
+
+Lemma kill_weaken:
+  forall dst,
+  forall mpc,
+  forall rs,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs.
+Proof.
+  unfold get_rb_sem.
+  intros until rs.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      reflexivity.
+    }
+    assumption.
+  }
+  reflexivity.
+Qed.
+
+Lemma top_ok :
+  forall rs, get_rb_sem (Some RELATION.top) rs.
+Proof.
+  unfold get_rb_sem, RELATION.top.
+  intros.
+  unfold get_r.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma move_ok:
+  forall mpc : RELATION.t,
+  forall src res : reg,
+  forall rs : regset,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (move src res mpc)) (rs # res <- (rs # src)).
+Proof.
+  unfold get_rb_sem, move.
+  intros until rs.
+  intros SEM x.
+  unfold get_r in *.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    pose proof (SEM src) as SEMsrc.
+    destruct (mpc ! src) as [mpcsrc | ] in *.
+    {
+      destruct (Pos.eq_dec x mpcsrc).
+      {
+        subst mpcsrc.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    destruct (Pos.eq_dec x src).
+    {
+      subst src.
+      rewrite Regmap.gss.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    reflexivity.
+  }
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  unfold kill.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x) as [ r |].
+  {
+    destruct (Pos.eq_dec res r).
+    {
+      subst r.
+      rewrite Regmap.gso by congruence.
+      trivial.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Definition killed_twice:
+  forall rel : RELATION.t,
+  forall res,
+    RELATION.eq (kill res rel) (kill res (kill res rel)).
+Proof.
+  unfold kill, RELATION.eq.
+  intros.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.grs.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! x) as [relx | ]; trivial.
+  destruct (Pos.eq_dec res relx); trivial.
+  destruct (Pos.eq_dec res relx); congruence.
+Qed.
+
+Lemma get_rb_killed:
+  forall mpc,
+  forall rs,
+  forall rel,
+  forall res,
+  forall vres,
+    (get_rb_sem (Some mpc) rs) ->
+    (RELATION.ge mpc (kill res rel)) ->
+    (get_rb_sem (Some mpc) rs # res <- vres).
+Proof.
+  simpl.
+  intros until vres.
+  intros SEM GE x.
+  pose proof (GE x) as GEx.
+  pose proof (SEM x) as SEMx.
+  unfold get_r in *.
+  destruct (mpc ! x) as [mpcx | ] in *; trivial.
+  unfold kill in GEx.
+  rewrite PTree.gfilter1 in GEx.
+  destruct (Pos.eq_dec res x) as [ | res_NE_x].
+  {
+    subst res.
+    rewrite PTree.grs in GEx.
+    discriminate.
+  }
+  rewrite PTree.gro in GEx by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  destruct (rel ! x) as [relx | ]; try discriminate.
+  destruct (Pos.eq_dec res relx) as [ res_EQ_relx | res_NE_relx] in *; try discriminate.
+  rewrite Regmap.gso by congruence.
+  congruence.
+Qed.
+  
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (fmap_sem (forward_map f) pc rs) ->
+    (is_killed_in_fmap (forward_map f) pc res) ->
+      match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma op_cases:
+  forall op,
+  forall args,
+  forall dst,
+  forall s,
+  forall x,
+    (exists src, op=Omove /\ args = src :: nil /\
+                 (apply_instr (Iop op args dst s) x) = Some (move src dst x))
+    \/
+    (apply_instr (Iop op args dst s) x) = Some (kill dst x).
+Proof.
+  destruct op; try (right; simpl; reflexivity).
+  destruct args as [| arg0 args0t]; try (right; simpl; reflexivity).
+  destruct args0t as [| arg1 args1t]; try (right; simpl; reflexivity).
+  left.
+  eauto.
+Qed.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0.
+  rewrite subst_args_ok by assumption.
+  apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  rewrite MPC in GE.
+  rewrite H in GE.
+  
+  destruct (op_cases op args res pc' mpc) as [[src [OP [ARGS MOVE]]] | KILL].
+  {
+    subst op.
+    subst args.
+    rewrite MOVE in GE.
+    simpl in H0.
+    simpl in GE.
+    apply get_rb_sem_ge with (rb2 := Some (move src res mpc)).
+    assumption.
+    replace v with (rs # src) by congruence.
+    apply move_ok.
+    assumption.
+  }
+  rewrite KILL in GE.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  assumption.
+  apply kill_ok.
+  assumption.
+  
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. constructor; auto. constructor.
+
+  {
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  {
+    replace (Some (kill res mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_weaken.
+  assumption.
+  }
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  unfold fmap_sem in *.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  rewrite H in GE.
+  simpl in GE.
+  unfold is_killed_in_fmap, is_killed_in_map.
+  unfold RB.ge in GE.
+  destruct (map # pc') as [mpc'|] eqn:MPC' in *; trivial.
+  eauto.
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. auto.
+  
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    replace (Some RELATION.top) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply top_ok.
+  
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite subst_args_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite subst_arg_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite subst_arg_ok by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  unfold is_killed_in_fmap in H8.
+  unfold is_killed_in_map in H8.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  destruct H8 as [rel' RGE].
+  eapply get_rb_killed; eauto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/common/Memory.v b/common/Memory.v
index cfd13601..50e339e1 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -38,6 +38,7 @@ Require Import Floats.
 Require Import Values.
 Require Export Memdata.
 Require Export Memtype.
+Require Import Lia.
 
 Definition default_notrap_load_value (chunk : memory_chunk) := Vundef.
 
@@ -541,6 +542,48 @@ Proof.
   induction vl; simpl; intros. auto. rewrite IHvl. auto.
 Qed.
 
+Remark set_setN_swap_disjoint:
+  forall vl: list memval,
+  forall v: memval,
+  forall m : ZMap.t memval,
+  forall p pl: Z,
+    ~ (Intv.In p (pl, pl + Z.of_nat (length vl))) ->
+    (setN vl pl (ZMap.set p v m)) = (ZMap.set p v (setN vl pl m)).
+Proof.
+  induction vl; simpl; trivial.
+  intros.
+  unfold Intv.In in *; simpl in *.
+  rewrite ZMap.set_disjoint by lia.
+  apply IHvl.
+  lia.
+Qed.
+
+Lemma setN_swap_disjoint:
+  forall vl1 vl2: list memval,
+  forall m : ZMap.t memval,
+  forall p1 p2: Z,
+    Intv.disjoint (p1, p1 + Z.of_nat (length vl1))
+                  (p2, p2 + Z.of_nat (length vl2)) ->
+    (setN vl1 p1 (setN vl2 p2 m)) = (setN vl2 p2 (setN vl1 p1 m)).
+Proof.
+  induction vl1; simpl; trivial.
+  intros until p2. intro DISJOINT.
+  rewrite <- set_setN_swap_disjoint.
+  { rewrite IHvl1.
+    reflexivity.
+    unfold Intv.disjoint, Intv.In in *.
+    simpl in *.
+    intro.
+    intro BOUNDS.
+    apply DISJOINT.
+    lia.
+  }
+  unfold Intv.disjoint, Intv.In in *.
+  simpl in *.
+  apply DISJOINT.
+  lia.
+Qed.
+    
 (** [store chunk m b ofs v] perform a write in memory state [m].
   Value [v] is stored at address [b] and offset [ofs].
   Return the updated memory store, or [None] if the accessed bytes
@@ -1172,6 +1215,89 @@ Local Hint Resolve store_valid_block_1 store_valid_block_2: mem.
 Local Hint Resolve store_valid_access_1 store_valid_access_2
              store_valid_access_3: mem.
 
+Remark mem_same_proof_irr :
+  forall m1 m2 : mem,
+    (mem_contents m1) = (mem_contents m2) ->
+    (mem_access m1) = (mem_access m2) ->
+    (nextblock m1) = (nextblock m2) ->
+    m1 = m2.
+Proof.
+  destruct m1 as [contents1 access1 nextblock1 access_max1 nextblock_noaccess1 default1].
+  destruct m2 as [contents2 access2 nextblock2 access_max2 nextblock_noaccess2 default2].
+  simpl.
+  intros.
+  subst contents2.
+  subst access2.
+  subst nextblock2.
+  f_equal; apply proof_irr.
+Qed.
+
+Theorem store_store_other:
+  forall chunk b ofs v chunk' b' ofs' v' m0 m1 m1',
+     b' <> b
+  \/ ofs' + size_chunk chunk' <= ofs
+  \/ ofs  + size_chunk chunk  <= ofs' ->
+     store chunk m0 b ofs v = Some m1 ->
+     store chunk' m0 b' ofs' v' = Some m1' ->
+     store chunk' m1 b' ofs' v' =
+     store chunk m1' b ofs v.
+Proof.
+  intros until m1'.
+  intro DISJOINT.
+  intros W0 W0'.
+  assert (valid_access m1' chunk b ofs Writable) as WRITEABLE1' by eauto with mem.
+  (* {
+    eapply store_valid_access_1.
+    apply W0'.
+    eapply store_valid_access_3.
+    apply W0.
+  } *)
+  assert (valid_access m1 chunk' b' ofs' Writable) as WRITABLE1 by eauto with mem.
+  (* {
+    eapply store_valid_access_1.
+    apply W0.
+    eapply store_valid_access_3.
+    apply W0'.
+  } *)
+  unfold store in *.
+  destruct (valid_access_dec m0 chunk b ofs Writable).
+  2: congruence.
+  destruct (valid_access_dec m1 chunk' b' ofs' Writable).
+  2: contradiction.
+  destruct (valid_access_dec m0 chunk' b' ofs' Writable).
+  2: congruence.
+  destruct (valid_access_dec m1' chunk b ofs Writable).
+  2: contradiction.
+  f_equal.
+  inv W0; simpl in *.
+  inv W0'; simpl in *.
+  apply mem_same_proof_irr; simpl; trivial.
+  destruct (eq_block b b').
+  { subst b'.
+    rewrite PMap.gss.
+    rewrite PMap.gss.
+    rewrite PMap.set2.
+    rewrite PMap.set2.
+    f_equal.
+    apply setN_swap_disjoint.
+    unfold Intv.disjoint.
+    rewrite encode_val_length.
+    rewrite <- size_chunk_conv.
+    rewrite encode_val_length.
+    rewrite <- size_chunk_conv.
+    unfold Intv.In; simpl.
+    intros.
+    destruct DISJOINT. contradiction.
+    lia.
+  }
+  {
+    rewrite PMap.set_disjoint by congruence.
+    rewrite PMap.gso by congruence.
+    rewrite PMap.gso by congruence.
+    reflexivity.
+  }
+Qed.
+    
 Lemma load_store_overlap:
   forall chunk m1 b ofs v m2 chunk' ofs' v',
   store chunk m1 b ofs v = Some m2 ->
diff --git a/common/Values.v b/common/Values.v
index de317734..84030123 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -1439,6 +1439,60 @@ Proof.
   assert (32 < Int.max_unsigned) by reflexivity. omega.
 Qed.
 
+Theorem shrx1_shr:
+  forall x z,
+  shrx x (Vint (Int.repr 1)) = Some z ->
+  z = shr (add x (shru x (Vint (Int.repr 31)))) (Vint (Int.repr 1)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  change (Int.ltu (Int.repr 1) (Int.repr 31)) with true in H; simpl in H.
+  inversion_clear H.
+  simpl.
+  change (Int.ltu (Int.repr 31) Int.iwordsize) with true; simpl.
+  change (Int.ltu (Int.repr 1) Int.iwordsize) with true; simpl.
+  f_equal.
+  rewrite Int.shrx1_shr by reflexivity.
+  reflexivity.
+Qed.
+
+Theorem shrx_shr_3:
+  forall n x z,
+  shrx x (Vint n) = Some z ->
+  z = (if Int.eq n Int.zero then x else
+         if Int.eq n Int.one
+         then shr (add x (shru x (Vint (Int.repr 31)))) (Vint Int.one)
+         else shr (add x (shru (shr x (Vint (Int.repr 31)))
+                    (Vint (Int.sub (Int.repr 32) n))))
+             (Vint n)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  destruct (Int.ltu n (Int.repr 31)) eqn:LT; inv H.
+  exploit Int.ltu_inv; eauto. change (Int.unsigned (Int.repr 31)) with 31; intros LT'.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. unfold Int.shrx. rewrite Int.shl_zero. unfold Int.divs. change (Int.signed Int.one) with 1.
+  rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
+- predSpec Int.eq Int.eq_spec n Int.one.
+  * subst n. simpl.
+    change (Int.ltu (Int.repr 31) Int.iwordsize) with true. simpl.
+    change (Int.ltu Int.one Int.iwordsize) with true. simpl.
+    f_equal.
+    apply Int.shrx1_shr.
+    reflexivity.
+  * clear H0.
+    simpl. change (Int.ltu (Int.repr 31) Int.iwordsize) with true. simpl.
+    replace (Int.ltu (Int.sub (Int.repr 32) n) Int.iwordsize) with true. simpl.
+    replace (Int.ltu n Int.iwordsize) with true.
+    f_equal; apply Int.shrx_shr_2; assumption.
+    symmetry; apply zlt_true. change (Int.unsigned n < 32); omega.
+    symmetry; apply zlt_true. unfold Int.sub. change (Int.unsigned (Int.repr 32)) with 32.
+    assert (Int.unsigned n <> 0).
+    { red; intros; elim H.
+      rewrite <- (Int.repr_unsigned n), H0. auto. }
+    rewrite Int.unsigned_repr.
+    change (Int.unsigned Int.iwordsize) with 32; omega.
+    assert (32 < Int.max_unsigned) by reflexivity. omega.
+Qed.
+
 Theorem or_rolm:
   forall x n m1 m2,
   or (rolm x n m1) (rolm x n m2) = rolm x n (Int.or m1 m2).
@@ -1698,6 +1752,58 @@ Proof.
   assert (64 < Int.max_unsigned) by reflexivity. omega.
 Qed.
 
+Theorem shrxl1_shrl:
+  forall x z,
+  shrxl x (Vint (Int.repr 1)) = Some z ->
+  z = shrl (addl x (shrlu x (Vint (Int.repr 63)))) (Vint (Int.repr 1)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  change (Int.ltu (Int.repr 1) (Int.repr 63)) with true in H; simpl in H.
+  inversion_clear H.
+  simpl.
+  change (Int.ltu (Int.repr 63) Int64.iwordsize') with true; simpl.
+  change (Int.ltu (Int.repr 1) Int64.iwordsize') with true; simpl.
+  f_equal.
+  rewrite Int64.shrx'1_shr' by reflexivity.
+  reflexivity.
+Qed.
+
+Theorem shrxl_shrl_3:
+  forall n x z,
+  shrxl x (Vint n) = Some z ->
+  z = (if Int.eq n Int.zero then x else
+         if Int.eq n Int.one
+         then shrl (addl x (shrlu x (Vint (Int.repr 63)))) (Vint Int.one)
+         else shrl (addl x (shrlu (shrl x (Vint (Int.repr 63)))
+                    (Vint (Int.sub (Int.repr 64) n))))
+             (Vint n)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  destruct (Int.ltu n (Int.repr 63)) eqn:LT; inv H.
+  exploit Int.ltu_inv; eauto. change (Int.unsigned (Int.repr 63)) with 63; intros LT'.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. unfold Int64.shrx'. rewrite Int64.shl'_zero. unfold Int64.divs. change (Int64.signed Int64.one) with 1.
+  rewrite Z.quot_1_r. rewrite Int64.repr_signed; auto.
+- predSpec Int.eq Int.eq_spec n Int.one.
+  * subst n. simpl.
+    change (Int.ltu (Int.repr 63) Int64.iwordsize') with true. simpl.
+    change (Int.ltu Int.one Int64.iwordsize') with true. simpl.
+    f_equal.
+    apply Int64.shrx'1_shr'.
+    reflexivity.
+  * clear H0.
+simpl. change (Int.ltu (Int.repr 63) Int64.iwordsize') with true. simpl.
+  replace (Int.ltu (Int.sub (Int.repr 64) n) Int64.iwordsize') with true. simpl.
+  replace (Int.ltu n Int64.iwordsize') with true.
+  f_equal; apply Int64.shrx'_shr_2; assumption.
+  symmetry; apply zlt_true. change (Int.unsigned n < 64); omega.
+  symmetry; apply zlt_true. unfold Int.sub. change (Int.unsigned (Int.repr 64)) with 64.
+  assert (Int.unsigned n <> 0). { red; intros; elim H. rewrite <- (Int.repr_unsigned n), H0. auto. }
+  rewrite Int.unsigned_repr.
+  change (Int.unsigned Int64.iwordsize') with 64; omega.
+  assert (64 < Int.max_unsigned) by reflexivity. omega.
+Qed.
+
 Theorem negate_cmp_bool:
   forall c x y, cmp_bool (negate_comparison c) x y = option_map negb (cmp_bool c x y).
 Proof.
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index 2b04340e..3dbb9d45 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -1853,7 +1853,12 @@ let elab_expr ctx loc env a =
            having declared it *)
         match a1 with
         | VARIABLE n when not (Env.ident_is_bound env n) ->
-            warning Implicit_function_declaration "implicit declaration of function '%s' is invalid in C99" n;
+            let is_builtin = String.length n > 10
+                           && String.sub n 0 10 = "__builtin_" in
+            if is_builtin then
+              error "use of unknown builtin '%s'" n
+            else
+              warning Implicit_function_declaration "implicit declaration of function '%s' is invalid in C99" n;
             let ty = TFun(TInt(IInt, []), None, false, []) in
             (* Check against other definitions and enter in env *)
             let (id, sto, env, ty, linkage) =
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index fd8227c9..9aa4a2bf 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -74,5 +74,6 @@ let option_fglobaladdrtmp = ref false
 let option_fglobaladdroffset = ref false
 let option_fxsaddr = ref true  
 let option_faddx = ref false  
-let option_fcoalesce_mem = ref true  
+let option_fcoalesce_mem = ref true
+let option_fforward_moves = ref true
 let option_all_loads_nontrap = ref false
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 72db86e9..24964237 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -41,6 +41,7 @@ Require Renumber.
 Require Duplicate.
 Require Constprop.
 Require CSE.
+Require ForwardMoves.
 Require Deadcode.
 Require Unusedglob.
 Require Allnontrap.
@@ -64,6 +65,7 @@ Require Renumberproof.
 Require Duplicateproof.
 Require Constpropproof.
 Require CSEproof.
+Require ForwardMovesproof.
 Require Deadcodeproof.
 Require Unusedglobproof.
 Require Allnontrapproof.
@@ -138,12 +140,14 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
    @@ print (print_RTL 6)
   @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program)
    @@ print (print_RTL 7)
-  @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
+   @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program
    @@ print (print_RTL 8)
-   @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program
+  @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
    @@ print (print_RTL 9)
-  @@@ time "Unused globals" Unusedglob.transform_program
+   @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program
    @@ print (print_RTL 10)
+  @@@ time "Unused globals" Unusedglob.transform_program
+   @@ print (print_RTL 11)
   @@@ time "Register allocation" Allocation.transf_program
    @@ print print_LTL
    @@ time "Branch tunneling" Tunneling.tunnel_program
@@ -250,6 +254,7 @@ Definition CompCert's_passes :=
   ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
   ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog)
   ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog)
+  ::: mkpass (match_if Compopts.optim_forward_moves ForwardMovesproof.match_prog)
   ::: mkpass (match_if Compopts.optim_redundancy Deadcodeproof.match_prog)
   ::: mkpass (match_if Compopts.all_loads_nontrap Allnontrapproof.match_prog)
   ::: mkpass Unusedglobproof.match_prog
@@ -295,7 +300,8 @@ Proof.
   set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
   set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
   destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate.
-  destruct (partial_if optim_redundancy Deadcode.transf_program p13) as [p14|e] eqn:P14; simpl in T; try discriminate.
+  set (p13bis := total_if optim_forward_moves ForwardMoves.transf_program p13) in *.
+  destruct (partial_if optim_redundancy Deadcode.transf_program p13bis) as [p14|e] eqn:P14; simpl in T; try discriminate.
   set (p14bis := total_if all_loads_nontrap Allnontrap.transf_program p14) in *.
   destruct (Unusedglob.transform_program p14bis) as [p15|e] eqn:P15; simpl in T; try discriminate.
   destruct (Allocation.transf_program p15) as [p16|e] eqn:P16; simpl in T; try discriminate.
@@ -318,6 +324,7 @@ Proof.
   exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
   exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match.
   exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
+  exists p13bis; split. eapply total_if_match; eauto. apply ForwardMovesproof.transf_program_match.
   exists p14; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match.
   exists p14bis; split. eapply total_if_match; eauto. apply Allnontrapproof.transf_program_match.
   exists p15; split. apply Unusedglobproof.transf_program_match; auto.
@@ -378,7 +385,7 @@ Ltac DestructM :=
       destruct H as (p & M & MM); clear H
   end.
   repeat DestructM. subst tp.
-  assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p23)).
+  assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p24)).
   {
   eapply compose_forward_simulations.
     eapply SimplExprproof.transl_program_correct; eassumption.
@@ -405,6 +412,8 @@ Ltac DestructM :=
   eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact CSEproof.transf_program_correct.
   eapply compose_forward_simulations.
+    eapply match_if_simulation. eassumption. exact ForwardMovesproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Deadcodeproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Allnontrapproof.transf_program_correct.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 6e3b0d62..fdd2b1d6 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -66,6 +66,9 @@ Parameter debug: unit -> bool.
 (** Flag -fall-loads-nontrap. Turn user loads into non trapping. *)
 Parameter all_loads_nontrap: unit -> bool.
 
+(** Flag -fforward-moves. Forward moves after CSE. *)
+Parameter optim_forward_moves: unit -> bool.
+
 (* TODO is there a more appropriate place? *)
 Require Import Coqlib.
 Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 59b7b222..992cf8c4 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -199,6 +199,7 @@ Processing options:
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on]
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+  -fforward-moves   Forward moves after CSE
   -finline       Perform inlining of functions [on]
   -finline-functions-called-once Integrate functions only required by their
                  single caller [on]
@@ -392,6 +393,7 @@ let cmdline_actions =
   @ f_opt "addx" option_faddx
   @ f_opt "coalesce-mem" option_fcoalesce_mem
   @ f_opt "all-loads-nontrap" option_all_loads_nontrap
+  @ f_opt "forward-moves" option_fforward_moves
 (* Code generation options *)
   @ f_opt "fpu" option_ffpu
   @ f_opt "sse" option_ffpu (* backward compatibility *)
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 828d0dac..0c19ea70 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -127,6 +127,8 @@ Extract Constant Compopts.optim_addx =>
   "fun _ -> !Clflags.option_faddx".
 Extract Constant Compopts.optim_coalesce_mem =>
   "fun _ -> !Clflags.option_fcoalesce_mem".
+Extract Constant Compopts.optim_forward_moves =>
+  "fun _ -> !Clflags.option_fforward_moves".
 Extract Constant Compopts.va_strict =>
   "fun _ -> false".
 Extract Constant Compopts.all_loads_nontrap =>
diff --git a/lib/Integers.v b/lib/Integers.v
index bc05a4da..246c708c 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -4,7 +4,7 @@
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
 (*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Copyright Institut National de Recherstestche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the GNU General Public License as published by  *)
 (*  the Free Software Foundation, either version 2 of the License, or  *)
@@ -1194,6 +1194,34 @@ Proof.
   rewrite <- half_modulus_modulus. apply unsigned_range.
 Qed.
 
+Local Transparent repr.
+Lemma sign_bit_of_signed: forall x,
+    (testbit x (zwordsize - 1)) = lt x zero.
+Proof.
+  intro.
+  rewrite sign_bit_of_unsigned.
+  unfold lt.
+  unfold signed, unsigned.
+  simpl.
+  pose proof half_modulus_pos as HMOD.
+  destruct (zlt 0 half_modulus) as [HMOD' | HMOD'].
+  2: omega.
+  clear HMOD'.
+  destruct (zlt (intval x) half_modulus) as [ LOW | HIGH].
+  {
+    destruct x as [ix RANGE].
+    simpl in *.
+    destruct (zlt ix 0). omega.
+    reflexivity.
+  }
+  destruct (zlt _ _) as [LOW' | HIGH']; trivial.
+  destruct x as [ix RANGE].
+  simpl in *.
+  rewrite half_modulus_modulus in *.
+  omega.
+Qed.
+Local Opaque repr.
+
 Lemma bits_signed:
   forall x i, 0 <= i ->
   Z.testbit (signed x) i = testbit x (if zlt i zwordsize then i else zwordsize - 1).
@@ -2427,6 +2455,57 @@ Proof.
   bit_solve. destruct (zlt (i + unsigned (sub iwordsize y)) zwordsize); auto.
 Qed.
 
+Theorem shrx1_shr:
+  forall x,
+  ltu one (repr (zwordsize - 1)) = true ->
+  shrx x (repr 1) = shr (add x (shru x (repr (zwordsize - 1)))) (repr 1).
+Proof.
+  intros.
+  rewrite shrx_shr by assumption.
+  rewrite shl_mul_two_p.
+  rewrite mul_commut. rewrite mul_one.
+  change (repr 1) with one.
+  rewrite unsigned_one.
+  change (two_p 1) with 2.
+  unfold sub.
+  rewrite unsigned_one.
+  assert (0 <= 2 <= max_unsigned).
+  {
+    unfold max_unsigned, modulus.
+    unfold zwordsize in *.
+    unfold ltu in *.
+    rewrite unsigned_one in H.
+    rewrite unsigned_repr in H.
+    {
+      destruct (zlt 1 (Z.of_nat wordsize - 1)) as [ LT | NONE].
+      2: discriminate.
+      clear H.
+      rewrite two_power_nat_two_p.
+      split.
+      omega.
+      set (w := (Z.of_nat wordsize)) in *.
+      assert ((two_p 2) <= (two_p w)) as MONO.
+      {
+        apply two_p_monotone.
+        omega.
+      }
+      change (two_p 2) with 4 in MONO.
+      omega.
+    }
+    generalize wordsize_max_unsigned.
+    fold zwordsize.
+    generalize wordsize_pos.
+    omega.
+  }
+  rewrite unsigned_repr by assumption.
+  simpl.
+  rewrite shru_lt_zero.
+  destruct (lt x zero).
+  reflexivity.
+  rewrite add_zero.
+  reflexivity.
+Qed.
+
 Theorem shrx_carry:
   forall x y,
   ltu y (repr (zwordsize - 1)) = true ->
@@ -3593,6 +3672,104 @@ Proof.
   unfold ltu. apply zlt_true. change (unsigned z < 63). rewrite A; omega.
 Qed.
 
+Lemma shr'63:
+  forall x, (shr' x (Int.repr 63)) = if lt x zero then mone else zero.
+Proof.
+  intro.
+  unfold shr', mone, zero.
+  rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; omega).
+  apply same_bits_eq.
+  intros i BIT.
+  rewrite testbit_repr by assumption.
+  rewrite Z.shiftr_spec by omega.
+  rewrite bits_signed by omega.
+  simpl.
+  change zwordsize with 64 in *.
+  destruct (zlt _ _) as [LT | GE].
+  {
+    replace i with 0 in * by omega.
+    change (0 + 63) with (zwordsize - 1).
+    rewrite  sign_bit_of_signed.
+    destruct (lt x _).
+    all: rewrite testbit_repr by (change zwordsize with 64 in *; omega).
+    all: simpl; reflexivity.
+  }
+  change (64 - 1) with (zwordsize - 1).
+  rewrite  sign_bit_of_signed.
+  destruct (lt x _).
+  all: rewrite testbit_repr by (change zwordsize with 64 in *; omega).
+  { symmetry.
+    apply Ztestbit_m1.
+    tauto.
+  }
+  symmetry.
+  apply Ztestbit_0.
+Qed.
+
+Lemma shru'63:
+  forall x, (shru' x (Int.repr 63)) = if lt x zero then one else zero.
+Proof.
+  intro.
+  unfold shru'.
+  rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; omega).
+  apply same_bits_eq.
+  intros i BIT.
+  rewrite testbit_repr by assumption.
+  rewrite Z.shiftr_spec by omega.
+  unfold lt.
+  rewrite signed_zero.
+  unfold one, zero.
+  destruct (zlt _ 0) as [LT | GE].
+  {
+    rewrite testbit_repr by assumption.
+    destruct (zeq i 0) as [IZERO | INONZERO].
+    { subst i.
+      change (Z.testbit (unsigned x) (0 + 63)) with (testbit x (zwordsize - 1)).
+      rewrite sign_bit_of_signed.
+      unfold lt.
+      rewrite signed_zero.
+      destruct (zlt _ _); try omega.
+      reflexivity.
+    }
+    change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i+63)).
+    rewrite bits_above by (change zwordsize with 64; omega).
+    rewrite Ztestbit_1.
+    destruct (zeq i 0); trivial.
+    subst i.
+    omega.
+  }
+  destruct (zeq i 0) as [IZERO | INONZERO].
+  { subst i.
+    change (Z.testbit (unsigned x) (0 + 63)) with (testbit x (zwordsize - 1)).
+    rewrite sign_bit_of_signed.
+    unfold lt.
+    rewrite signed_zero.
+    rewrite bits_zero.
+    destruct (zlt _ _); try omega.
+    reflexivity.
+  }
+  change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i + 63)).
+  rewrite bits_zero.
+  apply bits_above.
+  change zwordsize with 64.
+  omega.
+Qed.
+  
+Theorem shrx'1_shr':
+  forall x,
+  Int.ltu Int.one (Int.repr (zwordsize - 1)) = true ->
+  shrx' x (Int.repr 1) = shr' (add x (shru' x (Int.repr (Int64.zwordsize - 1)))) (Int.repr 1).
+Proof.
+  intros.
+  rewrite shrx'_shr_2 by reflexivity.
+  change (Int.sub (Int.repr 64) (Int.repr 1)) with (Int.repr 63).
+  f_equal. f_equal.
+  rewrite shr'63.
+  rewrite shru'63.
+  rewrite shru'63.
+  destruct (lt x zero); reflexivity.
+Qed.  
+
 Remark int_ltu_2_inv:
   forall y z,
   Int.ltu y iwordsize' = true ->
diff --git a/lib/Maps.v b/lib/Maps.v
index 9e44a7fe..1dec59a2 100644
--- a/lib/Maps.v
+++ b/lib/Maps.v
@@ -958,6 +958,36 @@ Module PTree <: TREE.
     intros. apply fold1_xelements with (l := @nil (positive * A)).
   Qed.
 
+  Local Open Scope positive.
+  Lemma set_disjoint1:
+    forall (A: Type)(i d : elt) (m: t A)  (x y: A),
+      set (i + d) y (set i x m) = set i x (set (i + d) y m).
+  Proof.
+    induction i; destruct d; destruct m; intro; simpl; trivial;
+      intro; congruence.
+  Qed.
+  
+  Local Open Scope positive.
+  Lemma set_disjoint:
+    forall (A: Type)(i j : elt) (m: t A)  (x y: A),
+      i <> j ->
+      set j y (set i x m) = set i x (set j y m).
+  Proof.
+    intros.
+    destruct (Pos.compare_spec i j) as [Heq | Hlt | Hlt].
+    { congruence. }
+    {
+      rewrite (Pos.lt_iff_add i j) in Hlt.
+      destruct Hlt as [d Hd].
+      subst j.
+      apply set_disjoint1.
+    }
+      rewrite (Pos.lt_iff_add j i) in Hlt.
+      destruct Hlt as [d Hd].
+      subst i.
+      symmetry.
+      apply set_disjoint1.
+  Qed.
 End PTree.
 
 (** * An implementation of maps over type [positive] *)
@@ -1035,6 +1065,15 @@ Module PMap <: MAP.
     intros. unfold set. simpl. decEq. apply PTree.set2.
   Qed.
 
+  Local Open Scope positive.
+  Lemma set_disjoint:
+    forall (A: Type) (i j : elt) (x y: A) (m: t A),
+      i <> j ->
+      set j y (set i x m) = set i x (set j y m).
+  Proof.
+    intros. unfold set. decEq. apply PTree.set_disjoint. assumption.
+  Qed.
+
 End PMap.
 
 (** * An implementation of maps over any type that injects into type [positive] *)
@@ -1102,6 +1141,16 @@ Module IMap(X: INDEXED_TYPE).
     intros. unfold set. apply PMap.set2.
   Qed.
 
+  Lemma set_disjoint:
+    forall (A: Type) (i j : elt) (x y: A) (m: t A),
+      i <> j ->
+      set j y (set i x m) = set i x (set j y m).
+  Proof.
+    intros. unfold set. apply PMap.set_disjoint.
+    intro INEQ.
+    assert (i = j) by (apply X.index_inj; auto).
+    auto.
+  Qed.
 End IMap.
 
 Module ZIndexed.
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index c7cfe43c..584f2339 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -22,6 +22,8 @@ Require Import Parallelizability.
 Require Import Asmvliw Permutation.
 Require Import Chunks.
 
+Require Import Lia.
+
 Open Scope impure.
 
 (** Definition of L *)
@@ -208,6 +210,136 @@ Definition store_eval (so: store_op) (l: list value) :=
   | _, _ => None
   end.
 
+Local Open Scope Z.
+
+Remark size_chunk_positive: forall chunk,
+    (size_chunk chunk) > 0.
+Proof.
+  destruct chunk; simpl; lia.
+Qed.
+
+Remark size_chunk_small: forall chunk,
+    (size_chunk chunk) <= 8.
+Proof.
+  destruct chunk; simpl; lia.
+Qed.
+
+Definition disjoint_chunks
+           (ofs1 : offset) (chunk1 : memory_chunk)
+           (ofs2 : offset) (chunk2 : memory_chunk) :=
+  Intv.disjoint ((Ptrofs.unsigned ofs1),
+                 ((Ptrofs.unsigned ofs1) + (size_chunk chunk1)))
+                ((Ptrofs.unsigned ofs2),
+                 ((Ptrofs.unsigned ofs2) + (size_chunk chunk2))).
+
+Definition small_offset_threshold := 18446744073709551608.
+
+Lemma store_store_disjoint_offsets :
+  forall n1 n2 ofs1 ofs2 vs1 vs2 va m0 m1 m2 m1' m2',
+    (disjoint_chunks ofs1 (store_chunk n1) ofs2 (store_chunk n2)) ->
+    (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+    (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+    store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m0] = Some (Memstate m1) ->
+    store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m1] = Some (Memstate m2) ->
+    store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m0] = Some (Memstate m1') ->
+    store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m1'] = Some (Memstate m2') ->
+    m2 = m2'.
+Proof.
+  intros until m2'.
+  intros DISJOINT SMALL1 SMALL2 STORE0 STORE1 STORE0' STORE1'.
+  unfold disjoint_chunks in DISJOINT.
+  destruct vs1 as [v1 | ]; simpl in STORE0, STORE1'; try congruence.
+  destruct vs2 as [v2 | ]; simpl in STORE1, STORE0'; try congruence.
+  destruct va as [base | ]; try congruence.
+  unfold exec_store_deps_offset in *.
+  destruct Ge.
+  unfold eval_offset in *; simpl in *.
+  unfold Mem.storev in *.
+  unfold Val.offset_ptr in *.
+  destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+  destruct (Mem.store _ _ _ _ _) eqn:E0; try congruence.
+  inv STORE0.
+  destruct (Mem.store (store_chunk n2) _ _ _ _) eqn:E1; try congruence.
+  inv STORE1.
+  destruct (Mem.store (store_chunk n2) m0 _ _ _) eqn:E0'; try congruence.
+  inv STORE0'.
+  destruct (Mem.store _ m1' _ _ _) eqn:E1'; try congruence.
+  inv STORE1'.
+  assert (Some m2 = Some m2').
+  2: congruence.
+  rewrite <- E1.
+  rewrite <- E1'.
+  eapply Mem.store_store_other.
+  2, 3: eassumption.
+
+  right.
+  pose proof (size_chunk_positive (store_chunk n1)).
+  pose proof (size_chunk_positive (store_chunk n2)).
+  pose proof (size_chunk_small (store_chunk n1)).
+  pose proof (size_chunk_small (store_chunk n2)).
+  destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+    unfold Intv.empty in DIS; simpl in DIS.
+  1, 2: lia.
+  pose proof (Ptrofs.unsigned_range ofs1).
+  pose proof (Ptrofs.unsigned_range ofs2).
+  unfold small_offset_threshold in *.
+  destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+    destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+      change Ptrofs.modulus with 18446744073709551616 in *; 
+      lia.
+Qed.
+
+Lemma load_store_disjoint_offsets :
+  forall n1 n2 tm ofs1 ofs2 vs va m0 m1,
+    (disjoint_chunks ofs1 (store_chunk n1) ofs2 (load_chunk n2)) ->
+    (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+    (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+    store_eval (OStoreRRO n1 ofs1) [vs; va; Memstate m0] = Some (Memstate m1) ->
+    load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m1] =
+    load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m0].
+Proof.
+  intros until m1.
+  intros DISJOINT SMALL1 SMALL2 STORE0.
+  destruct vs as [v | ]; simpl in STORE0; try congruence.
+  destruct va as [base | ]; try congruence.
+  unfold exec_store_deps_offset in *.
+  unfold eval_offset in *; simpl in *.
+  unfold exec_load_deps_offset.
+  unfold Mem.storev, Mem.loadv in *.
+  destruct Ge in *.
+  unfold eval_offset in *.
+  unfold Val.offset_ptr in *.
+  destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+  destruct (Mem.store _ _ _ _) eqn:E0; try congruence.
+  inv STORE0.
+  assert (
+    (Mem.load (load_chunk n2) m1 wblock
+      (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) =
+    (Mem.load (load_chunk n2) m0 wblock
+              (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) ) as LOADS.
+  {
+    eapply Mem.load_store_other.
+    eassumption.
+    right.
+    pose proof (size_chunk_positive (store_chunk n1)).
+    pose proof (size_chunk_positive (load_chunk n2)).
+    pose proof (size_chunk_small (store_chunk n1)).
+    pose proof (size_chunk_small (load_chunk n2)).
+    destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+      unfold Intv.empty in DIS; simpl in DIS.
+    1,2: lia.
+    
+    pose proof (Ptrofs.unsigned_range ofs1).
+    pose proof (Ptrofs.unsigned_range ofs2).
+    unfold small_offset_threshold in *.
+    destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+      destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+        change Ptrofs.modulus with 18446744073709551616 in *; 
+        lia.
+  }
+  destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence.
+Qed.
+             
 Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
   match label_pos lbl 0 (fn_blocks f) with
   | None => None
diff --git a/riscV/Asmgen.v b/riscV/Asmgen.v
index 0fa47fca..b431d63d 100644
--- a/riscV/Asmgen.v
+++ b/riscV/Asmgen.v
@@ -505,11 +505,16 @@ Definition transl_op
       OK (Psrliw rd rs n :: k)
   | Oshrximm n, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
-      OK (if Int.eq n Int.zero then Pmv rd rs :: k else
-          Psraiw X31 rs (Int.repr 31) ::
-          Psrliw X31 X31 (Int.sub Int.iwordsize n) ::
-          Paddw X31 rs X31 ::
-          Psraiw rd X31 n :: k)  
+        OK (if Int.eq n Int.zero
+            then Pmv rd rs :: k
+            else if Int.eq n Int.one
+                 then Psrliw X31 rs (Int.repr 31) ::
+                      Paddw X31 rs X31 ::
+                      Psraiw rd X31 Int.one :: k
+                 else Psraiw X31 rs (Int.repr 31) ::
+                      Psrliw X31 X31 (Int.sub Int.iwordsize n) ::
+                      Paddw X31 rs X31 ::
+                      Psraiw rd X31 n :: k)  
 
   (* [Omakelong], [Ohighlong]  should not occur *)
   | Olowlong, a1 :: nil =>
@@ -594,11 +599,16 @@ Definition transl_op
       OK (Psrlil rd rs n :: k)
   | Oshrxlimm n, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
-      OK (if Int.eq n Int.zero then Pmv rd rs :: k else
-          Psrail X31 rs (Int.repr 63) ::
-          Psrlil X31 X31 (Int.sub Int64.iwordsize' n) ::
-          Paddl X31 rs X31 ::
-          Psrail rd X31 n :: k)  
+        OK (if Int.eq n Int.zero
+            then Pmv rd rs :: k
+            else if Int.eq n Int.one
+                 then Psrlil X31 rs (Int.repr 63) ::
+                      Paddl X31 rs X31 ::
+                      Psrail rd X31 Int.one :: k
+                 else Psrail X31 rs (Int.repr 63) ::
+                      Psrlil X31 X31 (Int.sub Int64.iwordsize' n) ::
+                      Paddl X31 rs X31 ::
+                      Psrail rd X31 n :: k)  
 
   | Onegf, a1 :: nil =>
       do rd <- freg_of res; do rs <- freg_of a1;
diff --git a/riscV/Asmgenproof.v b/riscV/Asmgenproof.v
index e2fafb16..8e9f022c 100644
--- a/riscV/Asmgenproof.v
+++ b/riscV/Asmgenproof.v
@@ -285,12 +285,12 @@ Opaque Int.eq.
 - apply opimm32_label; intros; exact I.
 - apply opimm32_label; intros; exact I.
 - apply opimm32_label; intros; exact I.
-- destruct (Int.eq n Int.zero); TailNoLabel.
+- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
 - apply opimm64_label; intros; exact I.
 - apply opimm64_label; intros; exact I.
 - apply opimm64_label; intros; exact I.
 - apply opimm64_label; intros; exact I.
-- destruct (Int.eq n Int.zero); TailNoLabel.
+- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
 - eapply transl_cond_op_label; eauto.
 Qed.
 
diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v
index 54a86ae7..8678a5dc 100644
--- a/riscV/Asmgenproof1.v
+++ b/riscV/Asmgenproof1.v
@@ -1035,17 +1035,23 @@ Opaque Int.eq.
   intros (rs' & A & B & C).
   exists rs'; split; eauto. rewrite B; auto with asmgen.
 - (* shrximm *)
-  clear H. exploit Val.shrx_shr_2; eauto. intros E; subst v; clear EV.
+  clear H. exploit Val.shrx_shr_3; eauto. intros E; subst v; clear EV.
   destruct (Int.eq n Int.zero).
 + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl. 
-+ change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
-  econstructor; split.
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  apply exec_straight_one. simpl; reflexivity. auto. 
-  split; intros; Simpl.
++ destruct (Int.eq n Int.one).
+  * econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    apply exec_straight_one. simpl; reflexivity. auto.
+    split; intros; Simpl.
+  * change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
+    econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    apply exec_straight_one. simpl; reflexivity. auto. 
+    split; intros; Simpl.
 - (* longofintu *)
   econstructor; split.
   eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto.
@@ -1070,17 +1076,24 @@ Opaque Int.eq.
   intros (rs' & A & B & C).
   exists rs'; split; eauto. rewrite B; auto with asmgen.
 - (* shrxlimm *)
-  clear H. exploit Val.shrxl_shrl_2; eauto. intros E; subst v; clear EV.
+  clear H. exploit Val.shrxl_shrl_3; eauto. intros E; subst v; clear EV.
   destruct (Int.eq n Int.zero).
 + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl. 
-+ change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
-  econstructor; split.
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  apply exec_straight_one. simpl; reflexivity. auto. 
-  split; intros; Simpl.
++ destruct (Int.eq n Int.one).
+  * econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    apply exec_straight_one. simpl; reflexivity. auto.
+    split; intros; Simpl.
+
+  * change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
+    econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    apply exec_straight_one. simpl; reflexivity. auto. 
+    split; intros; Simpl.
 - (* cond *)
   exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
   exists rs'; split. eexact A. eauto with asmgen.
diff --git a/runtime/include/math.h b/runtime/include/math.h
index 060968c8..d6475df1 100644
--- a/runtime/include/math.h
+++ b/runtime/include/math.h
@@ -3,6 +3,8 @@
 
 #define isfinite(__y) (fpclassify((__y)) >= FP_ZERO)
 
+#include_next <math.h>
+
 #ifndef COMPCERT_NO_FP_MACROS
 #define fmin(x, y) __builtin_fmin((x),(y))
 #define fmax(x, y) __builtin_fmax((x),(y))
@@ -14,5 +16,4 @@
 #define fmaf(x, y, z) __builtin_fmaf((x),(y),(z))
 #endif
 
-#include_next <math.h>
 #endif
diff --git a/test/monniaux/moves/array.c b/test/monniaux/moves/array.c
new file mode 100644
index 00000000..faa1d96b
--- /dev/null
+++ b/test/monniaux/moves/array.c
@@ -0,0 +1,18 @@
+void incr_double_array(double *t) {
+  double x0 = 1.0;
+  double t0 = t[0];
+  double x1 = 1.0;
+  double t1 = t[1];
+  double x2 = 1.0;
+  double t2 = t[2];
+  double x3 = 1.0;
+  double t3 = t[3];
+  t0 = t0 + x0;
+  t1 = t1 + x1;
+  t2 = t2 + x2;
+  t3 = t3 + x3;
+  t[0] = t0;
+  t[1] = t1;
+  t[2] = t2;
+  t[3] = t3;
+}
diff --git a/x86/CBuiltins.ml b/x86/CBuiltins.ml
index f4f40a31..e7f714c7 100644
--- a/x86/CBuiltins.ml
+++ b/x86/CBuiltins.ml
@@ -73,9 +73,6 @@ let builtins = {
       (TVoid [], [TPtr(TInt(IUShort, []), []); TInt(IUShort, [])], false);
     "__builtin_write32_reversed",
       (TVoid [], [TPtr(TInt(IUInt, []), []); TInt(IUInt, [])], false);
-    (* no operation *)
-    "__builtin_nop",
-      (TVoid [], [], false);
   ]
 }