107 files changed, 3975 insertions, 1436 deletions
diff --git a/arm/Asm.v b/arm/Asm.v
index bc5ca1a5..08234975 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -900,7 +900,7 @@ Proof.
   { intros. inv H; inv H0; congruence. }
   assert (B: forall p v1 v2,
              extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2).
-  { intros. inv H; inv H0. 
+  { intros. inv H; inv H0.
     eapply A; eauto.
     f_equal; eapply A; eauto. }
   assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 ->
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 252a294a..eec531dc 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -30,7 +30,7 @@ Require Import Asmgen.
 Require Import Conventions.
 Require Import Asmgenproof0.
 
-Local Transparent Archi.ptr64. 
+Local Transparent Archi.ptr64.
 
 (** Useful properties of the R14 registers. *)
 
@@ -530,7 +530,7 @@ Lemma loadind_int_correct:
 Proof.
   intros; unfold loadind_int.
   assert (Val.offset_ptr (rs base) ofs = Val.add (rs base) (Vint (Ptrofs.to_int ofs))).
-  { destruct (rs base); try discriminate. simpl. f_equal; f_equal. symmetry; auto with ptrofs. }   
+  { destruct (rs base); try discriminate. simpl. f_equal; f_equal. symmetry; auto with ptrofs. }
   apply indexed_memory_access_correct; intros.
   econstructor; split.
   apply exec_straight_one. simpl. unfold exec_load. rewrite H1, <- H0, H. eauto. auto.
@@ -546,9 +546,9 @@ Lemma loadind_correct:
   /\ rs'#(preg_of dst) = v
   /\ forall r, if_preg r = true -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  unfold loadind; intros. 
+  unfold loadind; intros.
   assert (Val.offset_ptr (rs base) ofs = Val.add (rs base) (Vint (Ptrofs.to_int ofs))).
-  { destruct (rs base); try discriminate. simpl. f_equal; f_equal. symmetry; auto with ptrofs. }   
+  { destruct (rs base); try discriminate. simpl. f_equal; f_equal. symmetry; auto with ptrofs. }
   destruct ty; destruct (preg_of dst); inv H; simpl in H0.
 - (* int *)
   apply loadind_int_correct; auto.
@@ -587,32 +587,32 @@ Proof.
   unfold storeind; intros.
   assert (DATA: data_preg (preg_of src) = true) by eauto with asmgen.
   assert (Val.offset_ptr (rs base) ofs = Val.add (rs base) (Vint (Ptrofs.to_int ofs))).
-  { destruct (rs base); try discriminate. simpl. f_equal; f_equal. symmetry; auto with ptrofs. }   
+  { destruct (rs base); try discriminate. simpl. f_equal; f_equal. symmetry; auto with ptrofs. }
   destruct ty; destruct (preg_of src); inv H; simpl in H0.
 - (* int *)
   apply indexed_memory_access_correct; intros.
   econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto. 
+  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto.
   intros; Simpl.
 - (* float *)
   apply indexed_memory_access_correct; intros.
   econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto. 
+  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto.
   intros; Simpl.
 - (* single *)
   apply indexed_memory_access_correct; intros.
   econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto. 
+  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto.
   intros; Simpl.
 - (* any32 *)
   apply indexed_memory_access_correct; intros.
   econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto. 
+  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto.
   intros; Simpl.
 - (* any64 *)
   apply indexed_memory_access_correct; intros.
   econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto. 
+  apply exec_straight_one. simpl. unfold exec_store. rewrite H, <- H1, H2, H0 by auto with asmgen; eauto. auto.
   intros; Simpl.
 Qed.
 
@@ -1306,7 +1306,7 @@ Proof.
        exists rs', exec_straight ge fn c rs m k rs' m
            /\ Val.lessdef v rs'#(preg_of res)
            /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs'#r = rs#r).
-  { intros (rs' & A & B & C). subst v; exists rs'; auto. } 
+  { intros (rs' & A & B & C). subst v; exists rs'; auto. }
   destruct op; try (apply SAME; eapply transl_op_correct_same; eauto; fail).
 - (* Oaddrstack *)
   clear SAME; simpl in *; ArgsInv.
@@ -1372,7 +1372,7 @@ Proof.
   erewrite ! ireg_of_eq; eauto. rewrite transl_shift_correct. auto.
   (* Ainstack *)
   inv TR. apply indexed_memory_access_correct. intros. eapply MK1; eauto.
-  rewrite H. destruct (rs IR13); try contradiction. simpl. f_equal; f_equal. auto with ptrofs.     
+  rewrite H. destruct (rs IR13); try contradiction. simpl. f_equal; f_equal. auto with ptrofs.
 Qed.
 
 Lemma transl_load_int_correct:
diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index e0f0889f..cb7a73eb 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -10,7 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Static analysis and strength reduction for operators 
+(** Static analysis and strength reduction for operators
   and conditions.  This is the machine-dependent part of [Constprop]. *)
 
 Require Import Coqlib.
@@ -51,7 +51,7 @@ Definition eval_static_shift (s: shift) (n: int) : int :=
   | Sror x => Int.ror n x
   end.
 
-Nondetfunction cond_strength_reduction 
+Nondetfunction cond_strength_reduction
               (cond: condition) (args: list reg) (vl: list aval) :=
   match cond, args, vl with
   | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
@@ -98,7 +98,7 @@ Nondetfunction cond_strength_reduction
       if Float32.eq_dec n2 Float32.zero
       then (Cnotcompfszero c, r1 :: nil)
       else (cond, args)
-  | _, _, _ => 
+  | _, _, _ =>
       (cond, args)
   end.
 
@@ -206,7 +206,7 @@ Definition make_cast8signed (r: reg) (a: aval) :=
 Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn Ptop 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
 
-Nondetfunction op_strength_reduction 
+Nondetfunction op_strength_reduction
               (op: operation) (args: list reg) (vl: list aval) :=
   match op, args, vl with
   | Ocast8signed, r1 :: nil, v1 :: nil => make_cast8signed r1 v1
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index e1ae80a2..c9f97aa8 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -116,7 +116,7 @@ Proof.
   + (* global *)
     inv H2. exists (Genv.symbol_address ge id ofs); auto.
   + (* stack *)
-    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.  
+    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma eval_static_shift_correct:
diff --git a/arm/Conventions1.v b/arm/Conventions1.v
index ecf03e1d..86be8c95 100644
--- a/arm/Conventions1.v
+++ b/arm/Conventions1.v
@@ -60,6 +60,8 @@ Definition destroyed_at_call :=
 Definition dummy_int_reg := R0.     (**r Used in [Coloring]. *)
 Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
 
+Definition callee_save_type := mreg_type.
+  
 Definition is_float_reg (r: mreg): bool :=
   match r with
   | R0  | R1  | R2  | R3
@@ -136,7 +138,10 @@ Lemma loc_result_pair:
   forall sg,
   match loc_result sg with
   | One _ => True
-  | Twolong r1 r2 => r1 <> r2 /\ sg.(sig_res) = Some Tlong /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true /\ Archi.splitlong = true
+  | Twolong r1 r2 =>
+        r1 <> r2 /\ sg.(sig_res) = Some Tlong
+     /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
+     /\ Archi.ptr64 = false
   end.
 Proof.
   intros; unfold loc_result; destruct (sig_res sg) as [[]|]; destruct Archi.big_endian; auto.
diff --git a/arm/Op.v b/arm/Op.v
index 0d31c2ac..2e919758 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -1102,7 +1102,7 @@ Proof.
   rewrite eval_shift_stack_addressing.
   eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma eval_operation_inject:
@@ -1122,7 +1122,7 @@ Proof.
   intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
   intros; eapply Mem.different_pointers_inject; eauto.
   intros; apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 End EVAL_INJECT.
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index 4ea1e1a1..fc2fbe6b 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -199,7 +199,7 @@ Definition mulhu (e1: expr) (e2: expr) := Eop Omulhu (e1 ::: e2 ::: Enil).
 
 (** ** Bitwise and, or, xor *)
 
-Nondetfunction andimm (n1: int) (e2: expr) := 
+Nondetfunction andimm (n1: int) (e2: expr) :=
   if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else
   if Int.eq n1 Int.mone then e2 else
   match e2 with
@@ -343,14 +343,14 @@ Nondetfunction compimm (default: comparison -> int -> condition)
         Eop (Ocmp (negate_condition c)) el
       else if Int.eq_dec n2 Int.one then
         Eop (Ocmp c) el
-      else 
+      else
         Eop (Ointconst Int.zero) Enil
   | Cne, Eop (Ocmp c) el =>
       if Int.eq_dec n2 Int.zero then
         Eop (Ocmp c) el
       else if Int.eq_dec n2 Int.one then
         Eop (Ocmp (negate_condition c)) el
-      else 
+      else
         Eop (Ointconst Int.one) Enil
   | _, _ =>
        Eop (Ocmp (default c n2)) (e1 ::: Enil)
diff --git a/arm/Stacklayout.v b/arm/Stacklayout.v
index f5c07fff..c867ba59 100644
--- a/arm/Stacklayout.v
+++ b/arm/Stacklayout.v
@@ -86,16 +86,16 @@ Local Opaque Z.add Z.mul sepconj range.
      retaddr
      back link *)
   rewrite sep_swap12.
-  rewrite sep_swap45. 
+  rewrite sep_swap45.
   rewrite sep_swap34.
   rewrite sep_swap45.
 (* Apply range_split and range_split2 repeatedly *)
   unfold fe_ofs_arg.
   apply range_split_2. fold ol; omega. omega.
-  apply range_split. omega. 
+  apply range_split. omega.
   apply range_split_2. fold ora; omega. omega.
   apply range_split. omega.
-  apply range_drop_right with ostkdata. omega. 
+  apply range_drop_right with ostkdata. omega.
   eapply sep_drop2. eexact H.
 Qed.
 
diff --git a/backend/Allocation.v b/backend/Allocation.v
index f561ef4e..3dd4cb09 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -39,7 +39,12 @@ Require Import Op Registers RTL Locations Conventions RTLtyping LTL.
   maching between an RTL instruction and an LTL basic block.
 *)
 
-Definition move := (loc * loc)%type.
+Inductive move: Type :=
+  | MV (src dst: loc)
+  | MVmakelong (src1 src2 dst: mreg)
+  | MVlowlong (src dst: mreg)
+  | MVhighlong (src dst: mreg).
+
 Definition moves := list move.
 
 Inductive block_shape: Type :=
@@ -110,18 +115,22 @@ Definition classify_operation {A: Type} (op: operation) (args: list A) : operati
   end.
 
 (** Extract the move instructions at the beginning of block [b].
-  Return the list of moves and the suffix of [b] after the moves. *)
+  Return the list of moves and the suffix of [b] after the moves.
+  Two versions are provided: [extract_moves], which extracts only
+  "true" moves, and [extract_moves_ext], which also extracts
+  the [makelong], [lowlong] and [highlong] operations over 64-bit integers.
+*)
 
 Fixpoint extract_moves (accu: moves) (b: bblock) {struct b} : moves * bblock :=
   match b with
   | Lgetstack sl ofs ty dst :: b' =>
-      extract_moves ((S sl ofs ty, R dst) :: accu) b'
+      extract_moves (MV (S sl ofs ty) (R dst) :: accu) b'
   | Lsetstack src sl ofs ty :: b' =>
-      extract_moves ((R src, S sl ofs ty) :: accu) b'
+      extract_moves (MV (R src) (S sl ofs ty) :: accu) b'
   | Lop op args res :: b' =>
       match is_move_operation op args with
       | Some arg =>
-          extract_moves ((R arg, R res) :: accu) b'
+          extract_moves (MV (R arg) (R res) :: accu) b'
       | None =>
           (List.rev accu, b)
       end
@@ -129,6 +138,29 @@ Fixpoint extract_moves (accu: moves) (b: bblock) {struct b} : moves * bblock :=
       (List.rev accu, b)
   end.
 
+Fixpoint extract_moves_ext (accu: moves) (b: bblock) {struct b} : moves * bblock :=
+  match b with
+  | Lgetstack sl ofs ty dst :: b' =>
+      extract_moves_ext (MV (S sl ofs ty) (R dst) :: accu) b'
+  | Lsetstack src sl ofs ty :: b' =>
+      extract_moves_ext (MV (R src) (S sl ofs ty) :: accu) b'
+  | Lop op args res :: b' =>
+      match classify_operation op args with
+      | operation_Omove arg =>
+          extract_moves_ext (MV (R arg) (R res) :: accu) b'
+      | operation_Omakelong arg1 arg2 =>
+          extract_moves_ext (MVmakelong arg1 arg2 res :: accu) b'
+      | operation_Olowlong arg =>
+          extract_moves_ext (MVlowlong arg res :: accu) b'
+      | operation_Ohighlong arg =>
+          extract_moves_ext (MVhighlong arg res :: accu) b'
+      | operation_other _ _ =>
+          (List.rev accu, b)
+      end
+  | _ =>
+      (List.rev accu, b)
+  end.
+
 Definition check_succ (s: node) (b: LTL.bblock) : bool :=
   match b with
   | Lbranch s' :: _ => peq s s'
@@ -251,17 +283,17 @@ Definition pair_instr_block
       | _ => None
       end
   | Icall sg ros args res s =>
-      let (mv1, b1) := extract_moves nil b in
+      let (mv1, b1) := extract_moves_ext nil b in
       match b1 with
       | Lcall sg' ros' :: b2 =>
-          let (mv2, b3) := extract_moves nil b2 in
+          let (mv2, b3) := extract_moves_ext nil b2 in
           assertion (signature_eq sg sg');
           assertion (check_succ s b3);
           Some(BScall sg ros args res mv1 ros' mv2 s)
       | _ => None
       end
   | Itailcall sg ros args =>
-      let (mv1, b1) := extract_moves nil b in
+      let (mv1, b1) := extract_moves_ext nil b in
       match b1 with
       | Ltailcall sg' ros' :: b2 =>
           assertion (signature_eq sg sg');
@@ -297,7 +329,7 @@ Definition pair_instr_block
       | _ => None
       end
   | Ireturn arg =>
-      let (mv1, b1) := extract_moves nil b in
+      let (mv1, b1) := extract_moves_ext nil b in
       match b1 with
       | Lreturn :: b2 => Some(BSreturn arg mv1)
       | _ => None
@@ -319,7 +351,7 @@ Definition pair_codes (f1: RTL.function) (f2: LTL.function) : PTree.t block_shap
 
 Definition pair_entrypoints (f1: RTL.function) (f2: LTL.function) : option moves :=
   do b <- (LTL.fn_code f2)!(LTL.fn_entrypoint f2);
-  let (mv, b1) := extract_moves nil b in
+  let (mv, b1) := extract_moves_ext nil b in
   assertion (check_succ (RTL.fn_entrypoint f1) b1);
   Some mv.
 
@@ -602,6 +634,55 @@ Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs :=
      (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
      (Some e).
 
+(** [subst_loc_part l1 l2 k e] simulates the effect of assigning
+  [l2] to the [k] part of [l1] on [e].
+  All equations of the form [r = l1 [k]] are replaced by [r = l2 [Full]].
+  Return [None] if [e] contains an equation of the form [r = l] with [l]
+  partially overlapping [l1], or an equation of the form [r = l1] with
+  a kind different from [k1].
+*)
+
+Definition subst_loc_part (l1: loc) (l2: loc) (k: equation_kind) (e: eqs) : option eqs :=
+  EqSet2.fold
+    (fun q opte =>
+      match opte with
+      | None => None
+      | Some e =>
+          if Loc.eq l1 (eloc q) then
+            if IndexedEqKind.eq (ekind q) k
+            then Some (add_equation (Eq Full (ereg q) l2) (remove_equation q e))
+            else None
+          else
+            None
+      end)
+     (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
+     (Some e).
+
+(** [subst_loc_pair l1 l2 l2'] simulates the effect of assigning
+  [makelong l2 l2'] to [l1].  All equations of the form [r = l1 [Full]]
+  are replaced by the two equations [r = l2 [High], r = l2' [Low]].
+  Return [None] if [e] contains an equation of the form [r = l] with [l]
+  partially overlapping [l1], or an equation of the form [r = l1] with
+  a kind different from [Full]. *)
+
+Definition subst_loc_pair (l1 l2 l2': loc) (e: eqs) : option eqs :=
+  EqSet2.fold
+    (fun q opte =>
+      match opte with
+      | None => None
+      | Some e =>
+          if Loc.eq l1 (eloc q) then
+            if IndexedEqKind.eq (ekind q) Full
+            then Some (add_equation (Eq High (ereg q) l2)
+                        (add_equation (Eq Low (ereg q) l2')
+                           (remove_equation q e)))
+            else None
+          else
+            None
+      end)
+     (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
+     (Some e).
+
 (** [loc_type_compat env l e] checks that for all equations [r = l] in [e],
   the type [env r] of [r] is compatible with the type of [l]. *)
 
@@ -616,6 +697,14 @@ Definition loc_type_compat (env: regenv) (l: loc) (e: eqs) : bool :=
     (fun q => subtype (sel_type (ekind q) (env (ereg q))) (Loc.type l))
     (select_loc_l l) (select_loc_h l) (eqs2 e).
 
+(** [long_type_compat env l e] checks that for all equations [r = l] in [e].
+  then type [env r] of [r] is compatible with the type [Tlong]. *)
+
+Definition long_type_compat (env: regenv) (l: loc) (e: eqs) : bool :=
+  EqSet2.for_all_between
+    (fun q => subtype (env (ereg q)) Tlong)
+    (select_loc_l l) (select_loc_h l) (eqs2 e).
+
 (** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations
     [ri = R mi [Full]].  Return [None] if the two lists have different lengths.
 *)
@@ -637,9 +726,8 @@ Function add_equations_args (rl: list reg) (tyl: list typ) (ll: list (rpair loc)
   | r1 :: rl, ty :: tyl, One l1 :: ll =>
       add_equations_args rl tyl ll (add_equation (Eq Full r1 l1) e)
   | r1 :: rl, Tlong :: tyl, Twolong l1 l2 :: ll =>
-      if Archi.splitlong then
-        add_equations_args rl tyl ll (add_equation (Eq Low r1 l2) (add_equation (Eq High r1 l1) e))
-      else None
+      if Archi.ptr64 then None else
+      add_equations_args rl tyl ll (add_equation (Eq Low r1 l2) (add_equation (Eq High r1 l1) e))
   | _, _, _ => None
   end.
 
@@ -651,9 +739,8 @@ Function add_equations_res (r: reg) (oty: option typ) (p: rpair mreg) (e: eqs) :
   | One mr, _ =>
       Some (add_equation (Eq Full r (R mr)) e)
   | Twolong mr1 mr2, Some Tlong =>
-      if Archi.splitlong then
-        Some (add_equation (Eq Low r (R mr2)) (add_equation (Eq High r (R mr1)) e))
-      else None
+      if Archi.ptr64 then None else
+      Some (add_equation (Eq Low r (R mr2)) (add_equation (Eq High r (R mr1)) e))
   | _, _ =>
       None
   end.
@@ -857,11 +944,24 @@ Definition well_typed_move (env: regenv) (dst: loc) (e: eqs) : bool :=
 Fixpoint track_moves (env: regenv) (mv: moves) (e: eqs) : option eqs :=
   match mv with
   | nil => Some e
-  | (src, dst) :: mv =>
+  | MV src dst :: mv =>
       do e1 <- track_moves env mv e;
       assertion (can_undef_except dst (destroyed_by_move src dst)) e1;
       assertion (well_typed_move env dst e1);
       subst_loc dst src e1
+  | MVmakelong src1 src2 dst :: mv =>
+      assertion (negb Archi.ptr64);
+      do e1 <- track_moves env mv e;
+      assertion (long_type_compat env (R dst) e1);
+      subst_loc_pair (R dst) (R src1) (R src2) e1
+  | MVlowlong src dst :: mv =>
+      assertion (negb Archi.ptr64);
+      do e1 <- track_moves env mv e;
+      subst_loc_part (R dst) (R src) Low e1
+  | MVhighlong src dst :: mv =>
+      assertion (negb Archi.ptr64);
+      do e1 <- track_moves env mv e;
+      subst_loc_part (R dst) (R src) High e1
   end.
 
 (** [transfer_use_def args res args' res' undefs e] returns the set
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 888945ec..3b2ecd35 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -34,10 +34,13 @@ Qed.
 
 Definition expand_move (m: move) : instruction :=
   match m with
-  | (R src, R dst) => Lop Omove (src::nil) dst
-  | (S sl ofs ty, R dst) => Lgetstack sl ofs ty dst
-  | (R src, S sl ofs ty) => Lsetstack src sl ofs ty
-  | (S _ _ _, S _ _ _) => Lreturn    (**r should never happen *)
+  | MV (R src) (R dst) => Lop Omove (src::nil) dst
+  | MV (S sl ofs ty) (R dst) => Lgetstack sl ofs ty dst
+  | MV (R src) (S sl ofs ty) => Lsetstack src sl ofs ty
+  | MV (S _ _ _) (S _ _ _) => Lreturn    (**r should never happen *)
+  | MVmakelong src1 src2 dst => Lop Omakelong (src1::src2::nil) dst
+  | MVlowlong src dst => Lop Olowlong (src::nil) dst
+  | MVhighlong src dst => Lop Ohighlong (src::nil) dst
   end.
 
 Definition expand_moves (mv: moves) (k: bblock) : bblock :=
@@ -45,7 +48,7 @@ Definition expand_moves (mv: moves) (k: bblock) : bblock :=
 
 Definition wf_move (m: move) : Prop :=
   match m with
-  | (S _ _ _, S _ _ _) => False
+  | MV (S _ _ _) (S _ _ _) => False
   | _ => True
   end.
 
@@ -64,17 +67,20 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                          (Iop Omove (src :: nil) dst s)
                          (expand_moves mv (Lbranch s :: k))
   | ebs_makelong: forall src1 src2 dst mv s k,
-      wf_moves mv -> Archi.splitlong = true ->
+      wf_moves mv ->
+      Archi.splitlong = true ->
       expand_block_shape (BSmakelong src1 src2 dst mv s)
                          (Iop Omakelong (src1 :: src2 :: nil) dst s)
                          (expand_moves mv (Lbranch s :: k))
   | ebs_lowlong: forall src dst mv s k,
-      wf_moves mv -> Archi.splitlong = true ->
+      wf_moves mv ->
+      Archi.splitlong = true ->
       expand_block_shape (BSlowlong src dst mv s)
                          (Iop Olowlong (src :: nil) dst s)
                          (expand_moves mv (Lbranch s :: k))
   | ebs_highlong: forall src dst mv s k,
-      wf_moves mv -> Archi.splitlong = true ->
+      wf_moves mv ->
+      Archi.splitlong = true ->
       expand_block_shape (BShighlong src dst mv s)
                          (Iop Ohighlong (src :: nil) dst s)
                          (expand_moves mv (Lbranch s :: k))
@@ -97,7 +103,8 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                            (Lload chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2: forall addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s k,
       wf_moves mv1 -> wf_moves mv2 -> wf_moves mv3 ->
-      Archi.splitlong = true -> offset_addressing addr 4 = Some addr2 ->
+      Archi.splitlong = true ->
+      offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2 addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s)
                          (Iload Mint64 addr args dst s)
                          (expand_moves mv1
@@ -107,7 +114,7 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                               expand_moves mv3 (Lbranch s :: k))))
   | ebs_load2_1: forall addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
-      Archi.splitlong = true -> 
+      Archi.splitlong = true ->
       expand_block_shape (BSload2_1 addr args dst mv1 args' dst' mv2 s)
                          (Iload Mint64 addr args dst s)
                          (expand_moves mv1
@@ -115,7 +122,8 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                             expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2_2: forall addr addr2 args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
-      Archi.splitlong = true -> offset_addressing addr 4 = Some addr2 ->
+      Archi.splitlong = true ->
+      offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2_2 addr addr2 args dst mv1 args' dst' mv2 s)
                          (Iload Mint64 addr args dst s)
                          (expand_moves mv1
@@ -134,7 +142,8 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                            (Lstore chunk addr args' src' :: Lbranch s :: k))
   | ebs_store2: forall addr addr2 args src mv1 args1' src1' mv2 args2' src2' s k,
       wf_moves mv1 -> wf_moves mv2 ->
-      Archi.splitlong = true -> offset_addressing addr 4 = Some addr2 ->
+      Archi.splitlong = true ->
+      offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSstore2 addr addr2 args src mv1 args1' src1' mv2 args2' src2' s)
                          (Istore Mint64 addr args src s)
                          (expand_moves mv1
@@ -184,7 +193,7 @@ Ltac MonadInv :=
   | [ H: match negb (proj_sumbool ?x) with true => _ | false => None end = Some _ |- _ ] =>
         destruct x; [discriminate|simpl in H; MonadInv]
   | [ H: match negb ?x with true => _ | false => None end = Some _ |- _ ] =>
-        destruct x; [discriminate|simpl in H; MonadInv]
+        destruct x as [] eqn:? ; [discriminate|simpl in H; MonadInv]
   | [ H: match ?x with true => _ | false => None end = Some _ |- _ ] =>
         destruct x as [] eqn:? ; [MonadInv|discriminate]
   | [ H: match ?x with (_, _) => _ end = Some _ |- _ ] =>
@@ -233,7 +242,45 @@ Proof.
   + (* reg-stack move *)
     exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
   }
-  intros. exploit IND; eauto. constructor. 
+  intros. exploit IND; eauto. constructor.
+Qed.
+
+Lemma extract_moves_ext_sound:
+  forall b mv b',
+  extract_moves_ext nil b = (mv, b') ->
+  wf_moves mv /\ b = expand_moves mv b'.
+Proof.
+  assert (BASE:
+      forall accu b,
+      wf_moves accu ->
+      wf_moves (List.rev accu) /\ expand_moves (List.rev accu) b = expand_moves (List.rev accu) b).
+   { intros; split; auto. unfold wf_moves in *; rewrite Forall_forall in *.
+     intros. apply H. rewrite <- in_rev in H0; auto. }
+
+  assert (IND: forall b accu mv b',
+          extract_moves_ext accu b = (mv, b') ->
+          wf_moves accu ->
+          wf_moves mv /\ expand_moves (List.rev accu) b = expand_moves mv b').
+  { induction b; simpl; intros.
+  - inv H. auto.
+  - destruct a; try (inv H; apply BASE; auto; fail).
+  + destruct (classify_operation op args).
+  * (* reg-reg move *)
+    exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
+  * (* makelong *)
+    exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
+  * (* lowlong *)
+    exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
+  * (* highlong *)
+    exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
+  * (* default *)
+    inv H; apply BASE; auto.
+  + (* stack-reg move *)
+    exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
+  + (* reg-stack move *)
+    exploit IHb; eauto. constructor; auto. exact I. rewrite expand_moves_cons; auto.
+  }
+  intros. exploit IND; eauto. constructor.
 Qed.
 
 Lemma check_succ_sound:
@@ -248,6 +295,8 @@ Ltac UseParsingLemmas :=
   match goal with
   | [ H: extract_moves nil _ = (_, _) |- _ ] =>
       destruct (extract_moves_sound _ _ _ H); clear H; subst; UseParsingLemmas
+  | [ H: extract_moves_ext nil _ = (_, _) |- _ ] =>
+      destruct (extract_moves_ext_sound _ _ _ H); clear H; subst; UseParsingLemmas
   | [ H: check_succ _ _ = true |- _ ] =>
       try (discriminate H);
       destruct (check_succ_sound _ _ H); clear H; subst; UseParsingLemmas
@@ -261,7 +310,7 @@ Proof.
   assert (OP: forall op args res s b bsh,
     pair_Iop_block op args res s b = Some bsh -> expand_block_shape bsh (Iop op args res s) b).
   {
-    unfold pair_Iop_block; intros. MonadInv. destruct b0. 
+    unfold pair_Iop_block; intros. MonadInv. destruct b0.
     MonadInv; UseParsingLemmas.
     destruct i; MonadInv; UseParsingLemmas.
     eapply ebs_op; eauto.
@@ -290,8 +339,8 @@ Proof.
   destruct (chunk_eq m Mint64 && Archi.splitlong) eqn:A; MonadInv; UseParsingLemmas.
   destruct b as [ | [] b]; MonadInv; UseParsingLemmas.
   InvBooleans. subst m. eapply ebs_load2; eauto.
-  InvBooleans. subst m. 
-  destruct (eq_addressing a addr). 
+  InvBooleans. subst m.
+  destruct (eq_addressing a addr).
   inv H; inv H2. eapply ebs_load2_1; eauto.
   destruct (option_eq eq_addressing (offset_addressing a 4) (Some addr)).
   inv H; inv H2. eapply ebs_load2_2; eauto.
@@ -418,20 +467,28 @@ Proof.
   intros until e'. functional induction (add_equations_args rl tyl ll e); intros.
 - inv H; auto.
 - eapply add_equation_satisf; eauto.
+- discriminate.
 - eapply add_equation_satisf. eapply add_equation_satisf. eauto.
 - congruence.
-- congruence.
 Qed.
 
-Lemma val_longofwords_eq:
+Lemma val_longofwords_eq_1:
   forall v,
-  Val.has_type v Tlong -> Archi.splitlong = true ->
+  Val.has_type v Tlong -> Archi.ptr64 = false ->
   Val.longofwords (Val.hiword v) (Val.loword v) = v.
 Proof.
   intros. red in H. destruct v; try contradiction.
 - reflexivity.
 - simpl. rewrite Int64.ofwords_recompose. auto.
-- rewrite Archi.splitlong_ptr32 in H by auto. congruence.
+- congruence.
+Qed.
+
+Lemma val_longofwords_eq_2:
+  forall v,
+  Val.has_type v Tlong -> Archi.splitlong = true ->
+  Val.longofwords (Val.hiword v) (Val.loword v) = v.
+Proof.
+  intros. apply Archi.splitlong_ptr32 in H0. apply val_longofwords_eq_1; assumption.
 Qed.
 
 Lemma add_equations_args_lessdef:
@@ -445,14 +502,14 @@ Proof.
 - inv H; auto.
 - destruct H1. constructor; auto.
   eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto.
+- discriminate.
 - destruct H1. constructor; auto.
-  rewrite <- (val_longofwords_eq (rs#r1)) by auto. apply Val.longofwords_lessdef.
+  rewrite <- (val_longofwords_eq_1 (rs#r1)) by auto. apply Val.longofwords_lessdef.
   eapply add_equation_lessdef with (q := Eq High r1 l1).
   eapply add_equation_satisf. eapply add_equations_args_satisf; eauto.
   eapply add_equation_lessdef with (q := Eq Low r1 l2).
   eapply add_equations_args_satisf; eauto.
 - discriminate.
-- discriminate.
 Qed.
 
 Lemma add_equation_ros_satisf:
@@ -676,7 +733,7 @@ Lemma parallel_assignment_satisf_2:
 Proof.
   intros. functional inversion H.
 - (* One location *)
-  subst. simpl in H2. InvBooleans. simpl. 
+  subst. simpl in H2. InvBooleans. simpl.
   apply parallel_assignment_satisf with Full; auto.
   unfold reg_loc_unconstrained. rewrite H1, H4. auto.
 - (* Two 32-bit halves *)
@@ -686,10 +743,10 @@ Proof.
   simpl in H2. InvBooleans. simpl.
   red; intros.
   destruct (OrderedEquation.eq_dec q (Eq Low res (R mr2))).
-  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss. 
+  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss.
   apply Val.loword_lessdef; auto.
   destruct (OrderedEquation.eq_dec q (Eq High res (R mr1))).
-  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto. rewrite Locmap.gss. 
+  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto. rewrite Locmap.gss.
   apply Val.hiword_lessdef; auto.
   assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec.
   rewrite Regmap.gso. rewrite ! Locmap.gso. auto.
@@ -737,7 +794,7 @@ Proof.
   {
     apply ESP.fold_rec; unfold Q; intros.
     - auto.
-    - simpl. red in H2. rewrite H2 in H4. ESD.fsetdec. 
+    - simpl. red in H2. rewrite H2 in H4. ESD.fsetdec.
   }
   destruct (ESP.In_dec q elt).
   left. split. apply IN_ELT. auto. apply H. auto.
@@ -878,7 +935,7 @@ Lemma partial_fold_ind:
       f x a' = Some a'' -> P s' a' -> P s'' a'') ->
   P s final.
 Proof.
-  intros. 
+  intros.
   set (g := fun q opte => match opte with Some e => f q e | None => None end) in *.
   set (Q := fun s1 opte => match opte with None => True | Some e => P s1 e end).
   change (Q s (Some final)).
@@ -909,7 +966,7 @@ Proof.
     simpl. rewrite ESF.add_iff, ESF.remove_iff.
     apply H1 in H4; destruct H4.
     subst x; rewrite e; auto.
-    apply H3 in H2; destruct H2. split. congruence. 
+    apply H3 in H2; destruct H2. split. congruence.
     destruct (OrderedEquation.eq_dec x {| ekind := ekind q; ereg := ereg q; eloc := l2 |}); auto.
     subst x; auto.
   }
@@ -999,6 +1056,171 @@ Proof.
   rewrite Locmap.gso; auto.
 Qed.
 
+Lemma in_subst_loc_part:
+  forall l1 l2 k q (e e': eqs),
+  EqSet.In q e ->
+  subst_loc_part l1 l2 k e = Some e' ->
+  (eloc q = l1 /\ ekind q = k /\ EqSet.In (Eq Full (ereg q) l2) e') \/ (Loc.diff l1 (eloc q) /\ EqSet.In q e').
+Proof.
+  unfold subst_loc_part; intros l1 l2 k q e0 e0' IN SUBST.
+  set (elt := EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e0)) in *.
+  set (f := fun q0 e =>
+             if Loc.eq l1 (eloc q0) then
+             if IndexedEqKind.eq (ekind q0) k then
+                Some (add_equation
+                     {| ekind := Full; ereg := ereg q0; eloc := l2 |}
+                     (remove_equation q0 e))
+             else None else None).
+  set (P := fun e1 e2 => EqSet2.In q e1 -> eloc q = l1 /\ ekind q = k /\ EqSet.In (Eq Full (ereg q) l2) e2).
+  assert (A: P elt e0').
+  { eapply partial_fold_ind with (f := f) (s := elt) (final := e0'). eexact SUBST.
+  - unfold P; intros. ESD2.fsetdec.
+  - unfold P, f; intros. destruct (Loc.eq l1 (eloc x)); try discriminate.
+    destruct (IndexedEqKind.eq (ekind x) k); inversion H2; subst a''; clear H2.
+    simpl. rewrite ESF.add_iff, ESF.remove_iff.
+    apply H1 in H4; destruct H4.
+    subst x; rewrite e, <- e1; auto.
+    apply H3 in H2; destruct H2 as (X & Y & Z). split; auto. split; auto.
+    destruct (OrderedEquation.eq_dec x {| ekind := Full; ereg := ereg q; eloc := l2 |}); auto.
+    subst x; auto.
+  }
+  set (Q := fun e1 e2 => ~EqSet2.In q e1 -> EqSet.In q e2).
+  assert (B: Q elt e0').
+  { eapply partial_fold_ind with (f := f) (s := elt) (final := e0'). eexact SUBST.
+  - unfold Q; intros. auto.
+  - unfold Q, f; intros. destruct (Loc.eq l1 (eloc x)); try congruence.
+    destruct (IndexedEqKind.eq (ekind x) k); inversion H2; subst a''; clear H2.
+    simpl. rewrite ESF.add_iff, ESF.remove_iff.
+    red in H1. rewrite H1 in H4. intuition auto. }
+  destruct (ESP2.In_dec q elt).
+  left. apply A; auto.
+  right. split; auto.
+  rewrite <- select_loc_charact.
+  destruct (select_loc_l l1 q) eqn: LL; auto.
+  destruct (select_loc_h l1 q) eqn: LH; auto.
+  elim n. eapply EqSet2.elements_between_iff.
+  exact (select_loc_l_monotone l1).
+  exact (select_loc_h_monotone l1).
+  split. apply eqs_same; auto. auto.
+Qed.
+
+Lemma subst_loc_part_satisf_lowlong:
+  forall src dst rs ls e e',
+  subst_loc_part (R dst) (R src) Low e = Some e' ->
+  satisf rs ls e' ->
+  satisf rs (Locmap.set (R dst) (Val.loword (ls (R src))) ls) e.
+Proof.
+  intros; red; intros.
+  exploit in_subst_loc_part; eauto. intros [[A [B C]] | [A B]].
+  rewrite A, B. apply H0 in C. rewrite Locmap.gss. apply Val.loword_lessdef. exact C.
+  rewrite Locmap.gso; auto.
+Qed.
+
+Lemma subst_loc_part_satisf_highlong:
+  forall src dst rs ls e e',
+  subst_loc_part (R dst) (R src) High e = Some e' ->
+  satisf rs ls e' ->
+  satisf rs (Locmap.set (R dst) (Val.hiword (ls (R src))) ls) e.
+Proof.
+  intros; red; intros.
+  exploit in_subst_loc_part; eauto. intros [[A [B C]] | [A B]].
+  rewrite A, B. apply H0 in C. rewrite Locmap.gss. apply Val.hiword_lessdef. exact C.
+  rewrite Locmap.gso; auto.
+Qed.
+
+Lemma in_subst_loc_pair:
+  forall l1 l2 l2' q (e e': eqs),
+  EqSet.In q e ->
+  subst_loc_pair l1 l2  l2' e = Some e' ->
+  (eloc q = l1 /\ ekind q = Full /\ EqSet.In (Eq High (ereg q) l2) e' /\ EqSet.In (Eq Low (ereg q) l2') e')
+  \/ (Loc.diff l1 (eloc q) /\ EqSet.In q e').
+Proof.
+  unfold subst_loc_pair; intros l1 l2 l2' q e0 e0' IN SUBST.
+  set (elt := EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e0)) in *.
+  set (f := fun q0 e =>
+             if Loc.eq l1 (eloc q0) then
+             if IndexedEqKind.eq (ekind q0) Full then
+                Some (add_equation {| ekind := High; ereg := ereg q0; eloc := l2 |}
+                     (add_equation {| ekind := Low; ereg := ereg q0; eloc := l2' |}
+                     (remove_equation q0 e)))
+             else None else None).
+  set (P := fun e1 e2 => EqSet2.In q e1 -> eloc q = l1 /\ ekind q = Full
+                                        /\ EqSet.In (Eq High (ereg q) l2) e2
+                                        /\ EqSet.In (Eq Low (ereg q) l2') e2).
+  assert (A: P elt e0').
+  { eapply partial_fold_ind with (f := f) (s := elt) (final := e0'). eexact SUBST.
+  - unfold P; intros. ESD2.fsetdec.
+  - unfold P, f; intros. destruct (Loc.eq l1 (eloc x)); try discriminate.
+    destruct (IndexedEqKind.eq (ekind x) Full); inversion H2; subst a''; clear H2.
+    simpl. rewrite ! ESF.add_iff, ! ESF.remove_iff.
+    apply H1 in H4; destruct H4.
+    subst x. rewrite e, e1. intuition auto.
+    apply H3 in H2; destruct H2 as (U & V & W & X).
+    destruct (OrderedEquation.eq_dec x {| ekind := High; ereg := ereg q; eloc := l2 |}).
+    subst x. intuition auto.
+    destruct (OrderedEquation.eq_dec x {| ekind := Low; ereg := ereg q; eloc := l2' |}).
+    subst x. intuition auto.
+    intuition auto. }
+  set (Q := fun e1 e2 => ~EqSet2.In q e1 -> EqSet.In q e2).
+  assert (B: Q elt e0').
+  { eapply partial_fold_ind with (f := f) (s := elt) (final := e0'). eexact SUBST.
+  - unfold Q; intros. auto.
+  - unfold Q, f; intros. destruct (Loc.eq l1 (eloc x)); try congruence.
+    destruct (IndexedEqKind.eq (ekind x) Full); inversion H2; subst a''; clear H2.
+    simpl. rewrite ! ESF.add_iff, ! ESF.remove_iff.
+    red in H1. rewrite H1 in H4. intuition auto. }
+  destruct (ESP2.In_dec q elt).
+  left. apply A; auto.
+  right. split; auto.
+  rewrite <- select_loc_charact.
+  destruct (select_loc_l l1 q) eqn: LL; auto.
+  destruct (select_loc_h l1 q) eqn: LH; auto.
+  elim n. eapply EqSet2.elements_between_iff.
+  exact (select_loc_l_monotone l1).
+  exact (select_loc_h_monotone l1).
+  split. apply eqs_same; auto. auto.
+Qed.
+
+Lemma long_type_compat_charact:
+  forall env l e q,
+  long_type_compat env l e = true ->
+  EqSet.In q e ->
+  subtype (env (ereg q)) Tlong = true \/ Loc.diff l (eloc q).
+Proof.
+  unfold long_type_compat; intros.
+  rewrite EqSet2.for_all_between_iff in H.
+  destruct (select_loc_l l q) eqn: LL.
+  destruct (select_loc_h l q) eqn: LH.
+  left; apply H; auto. apply eqs_same; auto.
+  right. apply select_loc_charact. auto.
+  right. apply select_loc_charact. auto.
+  intros; subst; auto.
+  exact (select_loc_l_monotone l).
+  exact (select_loc_h_monotone l).
+Qed.
+
+Lemma subst_loc_pair_satisf_makelong:
+  forall env src1 src2 dst rs ls e e',
+  subst_loc_pair (R dst) (R src1) (R src2) e = Some e' ->
+  long_type_compat env (R dst) e = true ->
+  wt_regset env rs ->
+  satisf rs ls e' ->
+  Archi.ptr64 = false ->
+  satisf rs (Locmap.set (R dst) (Val.longofwords (ls (R src1)) (ls (R src2))) ls) e.
+Proof.
+  intros; red; intros.
+  exploit in_subst_loc_pair; eauto. intros [[A [B [C D]]] | [A B]].
+- rewrite A, B. apply H2 in C. apply H2 in D.
+  assert (subtype (env (ereg q)) Tlong = true).
+  { exploit long_type_compat_charact; eauto. intros [P|P]; auto.
+    eelim Loc.diff_not_eq; eauto. }
+  rewrite Locmap.gss. simpl. rewrite <- (val_longofwords_eq_1 rs#(ereg q)).
+  apply Val.longofwords_lessdef. exact C. exact D.
+  eapply Val.has_subtype; eauto.
+  assumption.
+- rewrite Locmap.gso; auto.
+Qed.
+
 Lemma can_undef_sound:
   forall e ml q,
   can_undef ml e = true -> EqSet.In q e -> Loc.notin (eloc q) (map R ml).
@@ -1086,7 +1308,7 @@ Lemma add_equations_res_lessdef:
 Proof.
   intros. functional inversion H; simpl.
 - subst. eapply add_equation_lessdef with (q := Eq Full r (R mr)); eauto.
-- subst. rewrite <- (val_longofwords_eq rs#r) by auto. 
+- subst. rewrite <- (val_longofwords_eq_1 rs#r) by auto.
   apply Val.longofwords_lessdef.
   eapply add_equation_lessdef with (q := Eq High r (R mr1)).
   eapply add_equation_satisf. eauto.
@@ -1109,7 +1331,7 @@ Lemma return_regs_agree_callee_save:
 Proof.
   intros; red; intros. unfold return_regs. red in H.
   destruct l.
-  rewrite H; auto. 
+  rewrite H; auto.
   destruct sl; auto || congruence.
 Qed.
 
@@ -1163,7 +1385,7 @@ Proof.
   exploit no_caller_saves_sound; eauto. intros.
   red in H5. rewrite <- H5; auto.
 - (* Two 32-bit halves *)
-  subst. rewrite <- H9 in *. simpl in *. 
+  subst. rewrite <- H9 in *. simpl in *.
   set (e' := remove_equation {| ekind := Low; ereg := res; eloc := R mr2 |}
           (remove_equation {| ekind := High; ereg := res; eloc := R mr1 |} e)) in *.
   InvBooleans.
@@ -1260,7 +1482,7 @@ Qed.
 Lemma return_regs_arg_values:
   forall sg ls1 ls2,
   tailcall_is_possible sg = true ->
-  map (fun p => Locmap.getpair p (return_regs ls1 ls2)) (loc_arguments sg) 
+  map (fun p => Locmap.getpair p (return_regs ls1 ls2)) (loc_arguments sg)
   = map (fun p => Locmap.getpair p ls2) (loc_arguments sg).
 Proof.
   intros.
@@ -1268,7 +1490,7 @@ Proof.
   apply list_map_exten; intros.
   apply Locmap.getpair_exten; intros.
   assert (In l (regs_of_rpairs (loc_arguments sg))) by (eapply in_regs_of_rpairs; eauto).
-  exploit loc_arguments_acceptable_2; eauto. exploit H; eauto. 
+  exploit loc_arguments_acceptable_2; eauto. exploit H; eauto.
   destruct l; simpl; intros; try contradiction. rewrite H4; auto.
 Qed.
 
@@ -1291,7 +1513,7 @@ Lemma loadv_int64_split:
   /\ Val.lessdef (Val.hiword v) v1
   /\ Val.lessdef (Val.loword v) v2.
 Proof.
-  intros. apply Archi.splitlong_ptr32 in H0. 
+  intros. apply Archi.splitlong_ptr32 in H0.
   exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & A & B & C).
   exists v1, v2. split; auto. split; auto.
   inv C; auto. destruct v1, v2; simpl; auto.
@@ -1324,9 +1546,8 @@ Proof.
   exploit add_equation_lessdef. eauto. simpl; intros LD1.
   exploit add_equation_lessdef. eapply add_equation_satisf. eauto. simpl; intros LD2.
   exists (Val.longofwords (ls x0) (ls x1)); split; auto with barg.
-  rewrite <- (val_longofwords_eq rs#x). apply Val.longofwords_lessdef; auto.
+  rewrite <- (val_longofwords_eq_2 rs#x); auto. apply Val.longofwords_lessdef; auto.
   rewrite <- e0; apply WT.
-  assumption.
 - econstructor; eauto with barg.
 - econstructor; eauto with barg.
 - econstructor; eauto with barg.
@@ -1534,7 +1755,7 @@ Proof.
   monadInv Heqr.
   destruct (check_entrypoints_aux f tf env x) as [y|] eqn:?; try discriminate.
   unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv.
-  exploit extract_moves_sound; eauto. intros [A B]. subst b.
+  exploit extract_moves_ext_sound; eauto. intros [A B]. subst b.
   exploit check_succ_sound; eauto. intros [k EQ1]. subst b0.
   econstructor; eauto. eapply type_function_correct; eauto. congruence.
 Qed.
@@ -1639,7 +1860,8 @@ Opaque destroyed_by_op.
 - unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto.
   (* step *)
 - assert (wf_moves mv) by (inv H0; auto).
-  destruct a as (src, dst); unfold expand_moves; simpl; MonadInv.
+  destruct a; unfold expand_moves; simpl; MonadInv.
++ (* loc-loc move *)
   destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst].
 * (* reg-reg *)
   exploit IHmv; eauto. eapply subst_loc_undef_satisf; eauto.
@@ -1655,6 +1877,18 @@ Opaque destroyed_by_op.
   econstructor. auto. auto.
 * (* stack->stack *)
   inv H0. simpl in H6. contradiction.
++ (* makelong *)
+  exploit IHmv; eauto. eapply subst_loc_pair_satisf_makelong; eauto.
+  intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto.
+  econstructor. simpl; eauto. reflexivity. traceEq.
++ (* lowlong *)
+  exploit IHmv; eauto. eapply subst_loc_part_satisf_lowlong; eauto.
+  intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto.
+  econstructor. simpl; eauto. reflexivity. traceEq.
++ (* highlong *)
+  exploit IHmv; eauto. eapply subst_loc_part_satisf_highlong; eauto.
+  intros [ls' [A B]]. exists ls'; split; auto. eapply star_left; eauto.
+  econstructor. simpl; eauto. reflexivity. traceEq.
 Qed.
 
 (** The simulation relation *)
@@ -1749,7 +1983,7 @@ Proof.
   assert (B: In (env r) (type_of_addressing addr)).
   { rewrite <- H5. apply in_map; auto. }
   assert (C: env r = Tint).
-  { apply A in B. rewrite B. unfold Tptr. rewrite Archi.splitlong_ptr32 by auto. auto. }    
+  { apply A in B. rewrite B. unfold Tptr. rewrite Archi.splitlong_ptr32 by auto. auto. }
   red; intros; subst r. rewrite C in H8; discriminate.
 Qed.
 
@@ -2195,7 +2429,7 @@ Proof.
   with (Locmap.getpair (map_rpair R (loc_result (RTL.fn_sig f))) ls1).
   eapply add_equations_res_lessdef; eauto.
   rewrite H13. apply WTRS.
-  generalize (loc_result_caller_save (RTL.fn_sig f)). 
+  generalize (loc_result_caller_save (RTL.fn_sig f)).
   destruct (loc_result (RTL.fn_sig f)); simpl.
   intros A; rewrite A; auto.
   intros [A B]; rewrite A, B; auto.
@@ -2228,15 +2462,15 @@ Proof.
   econstructor; split.
   apply plus_one. econstructor; eauto.
   eapply external_call_symbols_preserved with (ge1 := ge); eauto. apply senv_preserved.
-  econstructor; eauto. 
+  econstructor; eauto.
   simpl. destruct (loc_result (ef_sig ef)) eqn:RES; simpl.
   rewrite Locmap.gss; auto.
-  generalize (loc_result_pair (ef_sig ef)); rewrite RES; intros (A & B & C & D & E). 
+  generalize (loc_result_pair (ef_sig ef)); rewrite RES; intros (A & B & C & D & E).
   exploit external_call_well_typed; eauto. unfold proj_sig_res; rewrite B. intros WTRES'.
-  rewrite Locmap.gss. rewrite Locmap.gso by (red; auto). rewrite Locmap.gss. 
-  rewrite val_longofwords_eq by auto. auto.
+  rewrite Locmap.gss. rewrite Locmap.gso by (red; auto). rewrite Locmap.gss.
+  rewrite val_longofwords_eq_1 by auto. auto.
   red; intros. rewrite (AG l H0).
-  symmetry; apply Locmap.gpo. 
+  symmetry; apply Locmap.gpo.
   assert (X: forall r, is_callee_save r = false -> Loc.diff l (R r)).
   { intros. destruct l; simpl in *. congruence. auto. }
   generalize (loc_result_caller_save (ef_sig ef)). destruct (loc_result (ef_sig ef)); simpl; intuition auto.
@@ -2276,18 +2510,18 @@ Lemma final_states_simulation:
   forall st1 st2 r,
   match_states st1 st2 -> RTL.final_state st1 r -> LTL.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv STACKS. 
+  intros. inv H0. inv H. inv STACKS.
   econstructor. rewrite <- (loc_result_exten sg). inv RES; auto.
-  rewrite H; auto.  
+  rewrite H; auto.
 Qed.
- 
+
 Lemma wt_prog: wt_program prog.
 Proof.
-  red; intros. 
-  exploit list_forall2_in_left. eexact (proj1 TRANSF). eauto. 
+  red; intros.
+  exploit list_forall2_in_left. eexact (proj1 TRANSF). eauto.
   intros ([i' gd] & A & B & C). simpl in *; subst i'.
   inv C. destruct f; simpl in *.
-- monadInv H2.  
+- monadInv H2.
   unfold transf_function in EQ.
   destruct (type_function f) as [env|] eqn:TF; try discriminate.
   econstructor. eapply type_function_correct; eauto.
diff --git a/backend/Asmgenproof0.v b/backend/Asmgenproof0.v
index 2c7994e9..53ecf73a 100644
--- a/backend/Asmgenproof0.v
+++ b/backend/Asmgenproof0.v
@@ -338,7 +338,7 @@ Proof.
 - exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto.
 - exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1).
   exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2).
-  exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto. 
+  exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto.
 Qed.
 
 Lemma extcall_args_match:
@@ -871,13 +871,13 @@ Inductive match_stack: list Mach.stackframe -> Prop :=
 
 Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef.
 Proof.
-  induction 1; simpl. 
+  induction 1; simpl.
   unfold Vnullptr; destruct Archi.ptr64; congruence.
   auto.
 Qed.
 
 Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef.
-Proof. 
+Proof.
   induction 1; simpl.
   unfold Vnullptr; destruct Archi.ptr64; congruence.
   inv H0. congruence.
diff --git a/backend/Bounds.v b/backend/Bounds.v
index 8a383380..93a4b504 100644
--- a/backend/Bounds.v
+++ b/backend/Bounds.v
@@ -190,7 +190,7 @@ Remark fold_left_ensures:
   (forall a, P (f a b0)) ->
   forall l a, In b0 l -> P (fold_left f l a).
 Proof.
-  induction l; simpl; intros. contradiction. 
+  induction l; simpl; intros. contradiction.
   destruct H1. subst a. apply fold_left_preserves; auto. apply IHl; auto.
 Qed.
 
@@ -199,7 +199,7 @@ Definition only_callee_saves (u: RegSet.t) : Prop :=
 
 Lemma record_reg_only: forall u r, only_callee_saves u -> only_callee_saves (record_reg u r).
 Proof.
-  unfold only_callee_saves, record_reg; intros. 
+  unfold only_callee_saves, record_reg; intros.
   destruct (is_callee_save r) eqn:CS; auto.
   destruct (mreg_eq r r0). congruence. apply H; eapply RegSet.add_3; eauto.
 Qed.
@@ -214,11 +214,11 @@ Proof.
   intros. destruct i; simpl; auto using record_reg_only, record_regs_only.
 Qed.
 
-Lemma record_regs_of_function_only: 
+Lemma record_regs_of_function_only:
   only_callee_saves record_regs_of_function.
 Proof.
   intros. unfold record_regs_of_function.
-  apply fold_left_preserves. apply record_regs_of_instr_only. 
+  apply fold_left_preserves. apply record_regs_of_instr_only.
   red; intros. eelim RegSet.empty_1; eauto.
 Qed.
 
@@ -248,7 +248,7 @@ Next Obligation.
   apply record_regs_of_function_only. apply RegSet.elements_2.
   apply InA_alt. exists r; auto.
 Qed.
-  
+
 (** We now show the correctness of the inferred bounds. *)
 
 Lemma record_reg_incr: forall u r r', RegSet.In r' u -> RegSet.In r' (record_reg u r).
@@ -268,7 +268,7 @@ Qed.
 
 Lemma record_regs_ok: forall r rl u, In r rl -> is_callee_save r = true -> RegSet.In r (record_regs u rl).
 Proof.
-  intros. unfold record_regs. eapply fold_left_ensures; eauto using record_reg_incr, record_reg_ok. 
+  intros. unfold record_regs. eapply fold_left_ensures; eauto using record_reg_incr, record_reg_ok.
 Qed.
 
 Lemma record_regs_of_instr_incr: forall r' u i, RegSet.In r' u -> RegSet.In r' (record_regs_of_instr u i).
@@ -291,7 +291,7 @@ Proof.
   destruct H; auto using record_regs_incr, record_regs_ok.
 Qed.
 
-Lemma record_regs_of_function_ok: 
+Lemma record_regs_of_function_ok:
   forall r i, In i f.(fn_code) -> defined_by_instr r i -> is_callee_save r = true -> RegSet.In r record_regs_of_function.
 Proof.
   intros. unfold record_regs_of_function.
@@ -373,9 +373,9 @@ Lemma mreg_is_within_bounds:
   forall r, defined_by_instr r i ->
   mreg_within_bounds function_bounds r.
 Proof.
-  intros. unfold mreg_within_bounds. intros. 
+  intros. unfold mreg_within_bounds. intros.
   exploit record_regs_of_function_ok; eauto. intros.
-  apply RegSet.elements_1 in H2. rewrite InA_alt in H2. destruct H2 as (r' & A & B). 
+  apply RegSet.elements_1 in H2. rewrite InA_alt in H2. destruct H2 as (r' & A & B).
   subst r'; auto.
 Qed.
 
@@ -447,9 +447,9 @@ Proof.
 Local Opaque mreg_type.
   induction l as [ | r l]; intros; simpl.
 - omega.
-- eapply Zle_trans. 2: apply IHl. 
+- eapply Zle_trans. 2: apply IHl.
   generalize (AST.typesize_pos (mreg_type r)); intros.
-  apply Zle_trans with (align ofs (AST.typesize (mreg_type r))). 
+  apply Zle_trans with (align ofs (AST.typesize (mreg_type r))).
   apply align_le; auto.
   omega.
 Qed.
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index bf152e82..8516e384 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -661,12 +661,12 @@ Proof with (try discriminate).
   }
   inv H2.
 + inv H3. exploit eval_addressing_Ainstack_inv; eauto. intros [E1 E2].
-  simpl in E2; rewrite Ptrofs.add_zero_l in E2. subst a.    
+  simpl in E2; rewrite Ptrofs.add_zero_l in E2. subst a.
   apply eq_holds_strict. econstructor. rewrite eval_addressing_Ainstack.
   simpl. rewrite Ptrofs.add_zero_l. eauto.
   apply LD; auto.
 + inv H4. exploit eval_addressing_Ainstack_inv; eauto. intros [E1 E2].
-  simpl in E2; rewrite Ptrofs.add_zero_l in E2. subst a.    
+  simpl in E2; rewrite Ptrofs.add_zero_l in E2. subst a.
   apply eq_holds_lessdef with v; auto.
   econstructor. rewrite eval_addressing_Ainstack. simpl. rewrite Ptrofs.add_zero_l. eauto.
   apply LD; auto.
diff --git a/backend/Cminor.v b/backend/Cminor.v
index e238140b..11941da3 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -61,7 +61,7 @@ Inductive unary_operation : Type :=
   | Ointofsingle: unary_operation          (**r signed integer to float32 *)
   | Ointuofsingle: unary_operation         (**r unsigned integer to float32 *)
   | Osingleofint: unary_operation          (**r float32 to signed integer *)
-  | Osingleofintu: unary_operation        (**r float32 to unsigned integer *)
+  | Osingleofintu: unary_operation         (**r float32 to unsigned integer *)
   | Onegl: unary_operation                 (**r long integer opposite *)
   | Onotl: unary_operation                 (**r long bitwise complement *)
   | Ointoflong: unary_operation            (**r long to int *)
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index fd9cfaa5..b14c4be0 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -55,7 +55,7 @@ Lemma functions_translated:
   Genv.find_funct ge v = Some f ->
   exists cunit, Genv.find_funct tge v = Some (transf_fundef (romem_for cunit) f) /\ linkorder cunit prog.
 Proof.
-  intros. exploit (Genv.find_funct_match TRANSL); eauto. 
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
   intros (cu & tf & A & B & C). subst tf. exists cu; auto.
 Qed.
 
@@ -64,7 +64,7 @@ Lemma function_ptr_translated:
   Genv.find_funct_ptr ge b = Some f ->
   exists cunit, Genv.find_funct_ptr tge b = Some (transf_fundef (romem_for cunit) f) /\ linkorder cunit prog.
 Proof.
-  intros. exploit (Genv.find_funct_ptr_match TRANSL); eauto. 
+  intros. exploit (Genv.find_funct_ptr_match TRANSL); eauto.
   intros (cu & tf & A & B & C). subst tf. exists cu; auto.
 Qed.
 
@@ -92,7 +92,7 @@ Lemma transf_ros_correct:
   ematch bc rs ae ->
   find_function ge ros rs = Some f ->
   regs_lessdef rs rs' ->
-  exists cunit, 
+  exists cunit,
      find_function tge (transf_ros ae ros) rs' = Some (transf_fundef (romem_for cunit) f)
   /\ linkorder cunit prog.
 Proof.
@@ -100,7 +100,7 @@ Proof.
 - (* function pointer *)
   generalize (EM r); fold (areg ae r); intro VM. generalize (RLD r); intro LD.
   assert (DEFAULT:
-    exists cunit, 
+    exists cunit,
        find_function tge (inl _ r) rs' = Some (transf_fundef (romem_for cunit) f)
     /\ linkorder cunit prog).
   {
@@ -131,7 +131,7 @@ Lemma const_for_result_correct:
 Proof.
   intros. exploit ConstpropOpproof.const_for_result_correct; eauto. intros (v' & A & B).
   exists v'; split.
-  rewrite <- A; apply eval_operation_preserved. exact symbols_preserved. 
+  rewrite <- A; apply eval_operation_preserved. exact symbols_preserved.
   auto.
 Qed.
 
@@ -163,10 +163,10 @@ Proof.
   try apply match_pc_base.
   eapply match_pc_cond; eauto. intros b' DYNAMIC.
   assert (b = b').
-  { eapply resolve_branch_sound; eauto. 
-    rewrite <- DYNAMIC. apply eval_static_condition_sound with bc. 
+  { eapply resolve_branch_sound; eauto.
+    rewrite <- DYNAMIC. apply eval_static_condition_sound with bc.
     apply aregs_sound; auto. }
-  subst b'. apply IHn. 
+  subst b'. apply IHn.
 Qed.
 
 Lemma match_successor:
@@ -326,7 +326,7 @@ Lemma match_states_succ:
   match_states O (State s f sp pc rs m)
                  (State s' (transf_function (romem_for cu) f) sp pc rs' m').
 Proof.
-  intros. apply match_states_intro; auto. constructor. 
+  intros. apply match_states_intro; auto. constructor.
 Qed.
 
 Lemma transf_instr_at:
@@ -506,7 +506,7 @@ Opaque builtin_strength_reduction.
 
 - (* Icond, skipped over *)
   rewrite H1 in H; inv H.
-  right; exists n; split. omega. split. auto. 
+  right; exists n; split. omega. split. auto.
   econstructor; eauto.
 
 - (* Ijumptable *)
diff --git a/backend/Conventions.v b/backend/Conventions.v
index 64a83a58..bdc4c8b6 100644
--- a/backend/Conventions.v
+++ b/backend/Conventions.v
@@ -33,7 +33,7 @@ Proof.
   exploit H; eauto. destruct p; simpl in *; intuition congruence.
   apply IHpl; auto.
 Qed.
-  
+
 (** ** Location of function parameters *)
 
 (** A function finds the values of its parameter in the same locations
@@ -65,7 +65,7 @@ Proof.
   inv A. auto.
   unfold loc_parameters. generalize (loc_arguments sg). induction l as [ | p l]; simpl; intros.
   auto.
-  rewrite map_app. f_equal; auto. destruct p; auto. 
+  rewrite map_app. f_equal; auto. destruct p; auto.
 Qed.
 
 (** * Tail calls *)
@@ -90,8 +90,8 @@ Definition tailcall_is_possible (sg: signature) : bool :=
 Lemma tailcall_is_possible_correct:
   forall s, tailcall_is_possible s = true -> tailcall_possible s.
 Proof.
-  unfold tailcall_is_possible; intros. rewrite forallb_forall in H. 
-  red; intros. apply H in H0. destruct l; [auto|discriminate]. 
+  unfold tailcall_is_possible; intros. rewrite forallb_forall in H.
+  red; intros. apply H in H0. destruct l; [auto|discriminate].
 Qed.
 
 Lemma zero_size_arguments_tailcall_possible:
diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v
index fa402e9f..3f0c5a4c 100644
--- a/backend/Deadcodeproof.v
+++ b/backend/Deadcodeproof.v
@@ -1101,7 +1101,7 @@ Proof.
   exists (Callstate nil tf nil m0); split.
   econstructor; eauto.
   eapply (Genv.init_mem_match TRANSF); eauto.
-  replace (prog_main tprog) with (prog_main prog). 
+  replace (prog_main tprog) with (prog_main prog).
   rewrite symbols_preserved. eauto.
   symmetry; eapply match_program_main; eauto.
   rewrite <- H3. eapply sig_function_translated; eauto.
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index d5d7e033..bc991f0f 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -400,7 +400,7 @@ Proof.
   simpl in H0. unfold ge, fundef in H0. rewrite A in H0.
   rewrite <- Genv.find_funct_ptr_iff in B.
   congruence.
-Qed. 
+Qed.
 
 (** Translation of builtin arguments. *)
 
diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v
index 331f8b06..dfd96333 100644
--- a/backend/Inliningspec.v
+++ b/backend/Inliningspec.v
@@ -52,9 +52,9 @@ Proof.
             P dm fenv ->
             P (fold_left (fun x idg => PTree.set (fst idg) (snd idg) x) l dm)
               (fold_left add_globdef l fenv)).
-  { induction l; simpl; intros. 
+  { induction l; simpl; intros.
   - auto.
-  - apply IHl. apply ADD; auto. 
+  - apply IHl. apply ADD; auto.
   }
   intros. apply REC. red; intros.  rewrite PTree.gempty in H; discriminate.
 Qed.
@@ -63,8 +63,8 @@ Lemma fenv_compat_linkorder:
   forall cunit prog fenv,
   linkorder cunit prog -> fenv_compat cunit fenv -> fenv_compat prog fenv.
 Proof.
-  intros; red; intros. apply H0 in H1. 
-  destruct (prog_defmap_linkorder _ _ _ _ H H1) as (gd' & P & Q). 
+  intros; red; intros. apply H0 in H1.
+  destruct (prog_defmap_linkorder _ _ _ _ H H1) as (gd' & P & Q).
   inv Q. inv H3. auto.
 Qed.
 
@@ -702,7 +702,7 @@ Lemma tr_function_linkorder:
   tr_function cunit f f' ->
   tr_function prog f f'.
 Proof.
-  intros. inv H0. econstructor; eauto. eapply fenv_compat_linkorder; eauto. 
+  intros. inv H0. econstructor; eauto. eapply fenv_compat_linkorder; eauto.
 Qed.
 
 Lemma transf_function_spec:
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index e13ffb40..30cc0d91 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -164,7 +164,7 @@ Proof.
   intros. generalize (loc_result_pair sg) (loc_result_type sg).
   destruct (loc_result sg); simpl Locmap.setpair.
 - intros. apply wt_setreg; auto. eapply Val.has_subtype; eauto.
-- intros (A & B & C & D & E) F. 
+- intros A B. decompose [and] A.
   apply wt_setreg. eapply Val.has_subtype; eauto. destruct v; exact I.
   apply wt_setreg. eapply Val.has_subtype; eauto. destruct v; exact I.
   auto.
diff --git a/backend/Locations.v b/backend/Locations.v
index 52abfc46..ca148761 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -403,7 +403,7 @@ Module Locmap.
     (forall l, In l (regs_of_rpair p) -> ls2 l = ls1 l) ->
     getpair p ls2 = getpair p ls1.
   Proof.
-    intros. destruct p; simpl. 
+    intros. destruct p; simpl.
     apply H; simpl; auto.
     f_equal; apply H; simpl; auto.
   Qed.
@@ -412,7 +412,7 @@ Module Locmap.
     forall p v m l,
     forall_rpair (fun r => Loc.diff l (R r)) p -> setpair p v m l = m l.
   Proof.
-    intros; destruct p; simpl in *. 
+    intros; destruct p; simpl in *.
   - apply gso. apply Loc.diff_sym; auto.
   - destruct H. rewrite ! gso by (apply Loc.diff_sym; auto). auto.
   Qed.
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index 09630e29..c8f8ea82 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -72,6 +72,7 @@ let elf_label oc lbl =
 
 let float64_literals : (int * int64) list ref = ref []
 let float32_literals : (int * int32) list ref = ref []
+let int64_literals : (int * int64) list ref = ref []
 let jumptables : (int * label list) list ref = ref []
 
 let reset_constants () =
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index f9f01d49..9992ab79 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -693,7 +693,7 @@ Proof.
   rewrite A; simpl; rewrite C; simpl.
   rewrite H2; rewrite dec_eq_true.
   replace (tailcall_is_possible sig) with true; auto.
-  symmetry. unfold tailcall_is_possible. apply forallb_forall. 
+  symmetry. unfold tailcall_is_possible. apply forallb_forall.
   intros. apply H3 in H4. destruct x; intuition auto.
 - (* builtin *)
   exploit type_builtin_args_complete; eauto. instantiate (1 := args). intros [e1 [A B]].
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index cfaf422d..c14852f4 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -605,6 +605,17 @@ let add_interfs_destroyed g live mregs =
     (fun mr -> VSet.iter (IRC.add_interf g (L (R mr))) live)
     mregs
 
+let add_interfs_caller_save g live =
+  VSet.iter
+    (fun v ->
+       let tv = typeof v in
+       List.iter
+         (fun mr ->
+            if not (is_callee_save mr && subtype tv (callee_save_type mr))
+            then IRC.add_interf g (L (R mr)) v)
+         all_mregs)
+    live
+
 let add_interfs_live g live v =
   VSet.iter (fun v' -> IRC.add_interf g v v') live
 
@@ -622,7 +633,14 @@ let add_interfs_instr g instr live =
   match instr with
   | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
       IRC.add_pref g src dst;
-      add_interfs_move g src dst live
+      add_interfs_move g src dst live;
+      (* Reloads from incoming slots can occur when some 64-bit
+         parameters are split and passed as two 32-bit stack locations. *)
+      begin match src with
+      | L(Locations.S(Incoming, _, _)) -> 
+          add_interfs_def g (vmreg temp_for_parent_frame) live
+      | _ -> ()
+      end
   | Xparmove(srcs, dsts, itmp, ftmp) ->
       List.iter2 (IRC.add_pref g) srcs dsts;
       (* Interferences with live across *)
@@ -636,20 +654,10 @@ let add_interfs_instr g instr live =
       add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts;
       (* Take into account destroyed reg when accessing Incoming param *)
       if List.exists (function (L(Locations.S(Incoming, _, _))) -> true | _ -> false) srcs
-      then add_interfs_list g (vmreg temp_for_parent_frame) dsts;
-      (* Take into account destroyed reg when initializing Outgoing
-         arguments of type Tsingle *)
-      if List.exists
-           (function (L(Locations.S(Outgoing, _, Tsingle))) -> true | _ -> false) dsts
-      then
-        List.iter
-          (fun mr ->
-            add_interfs_list g (vmreg mr) srcs;
-            IRC.add_interf g (vmreg mr) ftmp)
-          (destroyed_by_setstack Tsingle)
-  | Xop(Ofloatofsingle, arg1::_, res) when Configuration.arch = "powerpc" ->
-      add_interfs_def g res live;
-      IRC.add_pref g arg1 res
+      then begin
+         add_interfs_list g (vmreg temp_for_parent_frame) dsts;
+         add_interfs_live g across (vmreg temp_for_parent_frame)
+      end
   | Xop(op, args, res) ->
       begin match is_two_address op args with
       | None ->
@@ -672,7 +680,7 @@ let add_interfs_instr g instr live =
       begin match vos with
       | Coq_inl v ->  List.iter (fun r -> IRC.add_interf g (vmreg r) v) destroyed_at_indirect_call
       | _ -> () end;
-      add_interfs_destroyed g (vset_removelist res live) destroyed_at_call
+      add_interfs_caller_save g (vset_removelist res live)
   | Xtailcall(sg, Coq_inl v, args) ->
       List.iter (fun r -> IRC.add_interf g (vmreg r) v) (int_callee_save_regs @ destroyed_at_indirect_call)
   | Xtailcall(sg, Coq_inr id, args) ->
diff --git a/backend/SelectDiv.vp b/backend/SelectDiv.vp
index 96b07e28..d91797c5 100644
--- a/backend/SelectDiv.vp
+++ b/backend/SelectDiv.vp
@@ -123,7 +123,7 @@ Definition divuimm (e1: expr) (n2: int) :=
         end
   end.
 
-Definition divu (e1: expr) (e2: expr) := 
+Definition divu (e1: expr) (e2: expr) :=
   match is_intconst e2, is_intconst e1 with
   | Some n2, Some n1 =>
       if Int.eq n2 Int.zero
@@ -149,7 +149,7 @@ Definition moduimm (e1: expr) (n2: int) :=
         end
   end.
 
-Definition modu (e1: expr) (e2: expr) := 
+Definition modu (e1: expr) (e2: expr) :=
   match is_intconst e2, is_intconst e1 with
   | Some n2, Some n1 =>
       if Int.eq n2 Int.zero
@@ -169,7 +169,7 @@ Definition divs_mul (p: Z) (m: Z) :=
 
 Definition divsimm (e1: expr) (n2: int) :=
   match Int.is_power2 n2 with
-  | Some l => 
+  | Some l =>
       if Int.ltu l (Int.repr 31)
       then shrximm e1 l
       else divs_base e1 (Eop (Ointconst n2) Enil)
@@ -183,7 +183,7 @@ Definition divsimm (e1: expr) (n2: int) :=
         end
   end.
 
-Definition divs (e1: expr) (e2: expr) := 
+Definition divs (e1: expr) (e2: expr) :=
   match is_intconst e2, is_intconst e1 with
   | Some n2, Some n1 =>
       if Int.eq n2 Int.zero
@@ -209,7 +209,7 @@ Definition modsimm (e1: expr) (n2: int) :=
         end
   end.
 
-Definition mods (e1: expr) (e2: expr) := 
+Definition mods (e1: expr) (e2: expr) :=
   match is_intconst e2, is_intconst e1 with
   | Some n2, Some n1 =>
       if Int.eq n2 Int.zero
@@ -266,7 +266,7 @@ Definition modlu (e1 e2: expr) :=
   end.
 
 Definition divls_mull (p: Z) (m: Z) :=
-  let e2 := 
+  let e2 :=
     mullhs (Eletvar O) (Int64.repr m) in
   let e3 :=
     if zlt m Int64.half_modulus then e2 else addl e2 (Eletvar O) in
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index 5704b32b..fe5bfe28 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -184,7 +184,7 @@ Proof with (try discriminate).
   destruct (find_div_mul_params Int.wordsize
                (Int.half_modulus - Int.half_modulus mod d - 1) d 32)
   as [[p m] | ]...
-  generalize (p - 32). intro p1.  
+  generalize (p - 32). intro p1.
   destruct (zlt 0 d)...
   destruct (zlt (two_p (32 + p1)) (m * d))...
   destruct (zle (m * d) (two_p (32 + p1) + two_p (p1 + 1)))...
@@ -192,7 +192,7 @@ Proof with (try discriminate).
   destruct (zlt m Int.modulus)...
   destruct (zle 0 p1)...
   destruct (zlt p1 32)...
-  intros EQ; inv EQ. 
+  intros EQ; inv EQ.
   split. auto. split. auto. intros.
   replace (32 + p') with (31 + (p' + 1)) by omega.
   apply Zquot_mul; try omega.
@@ -331,7 +331,7 @@ Proof with (try discriminate).
   destruct (find_div_mul_params Int64.wordsize
                (Int64.half_modulus - Int64.half_modulus mod d - 1) d 64)
   as [[p m] | ]...
-  generalize (p - 64). intro p1.  
+  generalize (p - 64). intro p1.
   destruct (zlt 0 d)...
   destruct (zlt (two_p (64 + p1)) (m * d))...
   destruct (zle (m * d) (two_p (64 + p1) + two_p (p1 + 1)))...
@@ -339,7 +339,7 @@ Proof with (try discriminate).
   destruct (zlt m Int64.modulus)...
   destruct (zle 0 p1)...
   destruct (zlt p1 64)...
-  intros EQ; inv EQ. 
+  intros EQ; inv EQ.
   split. auto. split. auto. intros.
   replace (64 + p') with (63 + (p' + 1)) by omega.
   apply Zquot_mul; try omega.
@@ -746,7 +746,7 @@ Proof.
   unfold modl_from_divl; intros.
   exploit eval_mullimm; eauto. instantiate (1 := n). intros (v1 & A1 & B1).
   assert (A0: eval_expr ge sp e m le (Eletvar O) (Vlong x)) by (constructor; auto).
-  exploit eval_subl; auto. eexact A0. eexact A1.
+  exploit eval_subl ; auto ; try apply HELPERS. exact A0. exact A1.
   intros (v2 & A2 & B2).
   simpl in B1; inv B1. simpl in B2; inv B2. exact A2.
 Qed.
@@ -784,11 +784,11 @@ Proof.
 + destruct (Int64.is_power2' n2) as [l|] eqn:POW.
 * exploit Val.divlu_pow2; eauto. intros EQ; subst z. apply eval_shrluimm; auto.
 * destruct (Compopts.optim_for_size tt). eapply eval_divlu_base; eauto.
-  destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS. 
+  destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero); inv H1.
-   econstructor; split; eauto. econstructor. eauto. eapply eval_divlu_mull; eauto.  
-** eapply eval_divlu_base; eauto.     
+   econstructor; split; eauto. econstructor. eauto. eapply eval_divlu_mull; eauto.
+** eapply eval_divlu_base; eauto.
 - eapply eval_divlu_base; eauto.
 Qed.
 
@@ -809,15 +809,15 @@ Proof.
 + destruct (Int64.is_power2 n2) as [l|] eqn:POW.
 * exploit Val.modlu_pow2; eauto. intros EQ; subst z. eapply eval_andl; eauto. apply eval_longconst.
 * destruct (Compopts.optim_for_size tt). eapply eval_modlu_base; eauto.
-  destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS. 
+  destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero) eqn:Z; inv H1.
-   rewrite Int64.modu_divu. 
+   rewrite Int64.modu_divu.
     econstructor; split; eauto. econstructor. eauto.
-    eapply eval_modl_from_divl; eauto.  
+    eapply eval_modl_from_divl; eauto.
     eapply eval_divlu_mull; eauto.
-    red; intros; subst n2; discriminate Z.  
-** eapply eval_modlu_base; eauto.     
+    red; intros; subst n2; discriminate Z.
+** eapply eval_modlu_base; eauto.
 - eapply eval_modlu_base; eauto.
 Qed.
 
@@ -831,16 +831,16 @@ Proof.
   assert (A0: eval_expr ge sp e m le (Eletvar O) (Vlong x)).
   { constructor; auto. }
   exploit eval_mullhs. eauto. eexact A0. instantiate (1 := Int64.repr M).  intros (v1 & A1 & B1).
-  exploit eval_addl; auto. eexact A1. eexact A0. intros (v2 & A2 & B2).
+  exploit eval_addl; auto; try apply HELPERS. eexact A1. eexact A0. intros (v2 & A2 & B2).
   exploit eval_shrluimm. eauto. eexact A0. instantiate (1 := Int.repr 63). intros (v3 & A3 & B3).
   set (a4 := if zlt M Int64.half_modulus
              then mullhs (Eletvar 0) (Int64.repr M)
              else addl (mullhs (Eletvar 0) (Int64.repr M)) (Eletvar 0)).
   set (v4 := if zlt M Int64.half_modulus then v1 else v2).
-  assert (A4: eval_expr ge sp e m le a4 v4). 
+  assert (A4: eval_expr ge sp e m le a4 v4).
   { unfold a4, v4; destruct (zlt M Int64.half_modulus); auto. }
   exploit eval_shrlimm. eauto. eexact A4. instantiate (1 := Int.repr p). intros (v5 & A5 & B5).
-  exploit eval_addl; auto. eexact A5. eexact A3. intros (v6 & A6 & B6).
+  exploit eval_addl; auto; try apply HELPERS. eexact A5. eexact A3. intros (v6 & A6 & B6).
   assert (RANGE: forall x, 0 <= x < 64 -> Int.ltu (Int.repr x) Int64.iwordsize' = true).
   { intros. unfold Int.ltu. rewrite Int.unsigned_repr. rewrite zlt_true by tauto. auto.
     assert (64 < Int.max_unsigned) by (compute; auto). omega. }
@@ -850,11 +850,11 @@ Proof.
   destruct (zlt M Int64.half_modulus).
 - exploit (divls_mul_shift_1 x); eauto. intros [A B].
   simpl in B5; rewrite RANGE in B5 by auto; inv B5.
-  simpl in B6; inv B6. 
+  simpl in B6; inv B6.
   rewrite B; exact A6.
 - exploit (divls_mul_shift_2 x); eauto. intros [A B].
   simpl in B5; rewrite RANGE in B5 by auto; inv B5.
-  simpl in B6; inv B6. 
+  simpl in B6; inv B6.
   rewrite B; exact A6.
 Qed.
 
@@ -870,7 +870,7 @@ Proof.
 - assert (y = Vlong n2) by (eapply is_longconst_sound; eauto). subst y.
   destruct (is_longconst a) as [n1|] eqn:N1.
 + assert (x = Vlong n1) by (eapply is_longconst_sound; eauto). subst x.
-  simpl in H1. 
+  simpl in H1.
   destruct (Int64.eq n2 Int64.zero
          || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); inv H1.
   econstructor; split. apply eval_longconst. constructor.
@@ -879,7 +879,7 @@ Proof.
 ** exploit Val.divls_pow2; eauto. intros EQ. eapply eval_shrxlimm; eauto.
 ** eapply eval_divls_base; eauto.
 * destruct (Compopts.optim_for_size tt). eapply eval_divls_base; eauto.
-  destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS. 
+  destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero
              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); inv H1.
@@ -901,7 +901,7 @@ Proof.
 - assert (y = Vlong n2) by (eapply is_longconst_sound; eauto). subst y.
   destruct (is_longconst a) as [n1|] eqn:N1.
 + assert (x = Vlong n1) by (eapply is_longconst_sound; eauto). subst x.
-  simpl in H1. 
+  simpl in H1.
   destruct (Int64.eq n2 Int64.zero
          || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); inv H1.
   econstructor; split. apply eval_longconst. constructor.
@@ -917,19 +917,19 @@ Proof.
   assert (A: eval_expr ge sp e m le' (Eletvar O) (Vlong i)) by (constructor; auto).
   exploit eval_shrxlimm; eauto. intros (v1 & A1 & B1). inv B1.
   econstructor; split.
-  econstructor. eauto. eapply eval_modl_from_divl. eexact A1. reflexivity.      
+  econstructor. eauto. eapply eval_modl_from_divl. eexact A1. reflexivity.
   rewrite Int64.mods_divs. auto.
 **eapply eval_modls_base; eauto.
 * destruct (Compopts.optim_for_size tt). eapply eval_modls_base; eauto.
-  destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS. 
+  destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero
              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); inv H1.
    econstructor; split; eauto. econstructor. eauto.
-   rewrite Int64.mods_divs. 
+   rewrite Int64.mods_divs.
    eapply eval_modl_from_divl; auto.
    eapply eval_divls_mull; eauto.
-** eapply eval_modls_base; eauto.     
+** eapply eval_modls_base; eauto.
 - eapply eval_modls_base; eauto.
 Qed.
 
diff --git a/backend/Selection.v b/backend/Selection.v
index abda1d95..f278ed0b 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -339,7 +339,7 @@ Definition sel_fundef (dm: PTree.t globdef) (hf: helper_functions) (f: Cminor.fu
 (** We build a partial mapping from global identifiers to their definitions,
   restricting ourselves to the globals we are interested in, namely
   the external function declarations that are marked as runtime library
-  helpers. 
+  helpers.
   This ensures that the mapping remains small and that [lookup_helper]
   below is efficient. *)
 
@@ -350,7 +350,7 @@ Definition globdef_of_interest (gd: globdef) : bool :=
   end.
 
 Definition record_globdefs (defmap: PTree.t globdef) : PTree.t globdef :=
-  PTree.fold 
+  PTree.fold
     (fun m id gd => if globdef_of_interest gd then PTree.set id gd m else m)
     defmap (PTree.empty globdef).
 
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 90e50338..ebc64c6f 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -34,14 +34,14 @@ Definition match_prog (p: Cminor.program) (tp: CminorSel.program) :=
 Lemma record_globdefs_sound:
   forall dm id gd, (record_globdefs dm)!id = Some gd -> dm!id = Some gd.
 Proof.
-  intros. 
+  intros.
   set (f := fun m id gd => if globdef_of_interest gd then PTree.set id gd m else m) in *.
   set (P := fun m m' => m'!id = Some gd -> m!id = Some gd).
   assert (X: P dm (PTree.fold f dm (PTree.empty _))).
   { apply PTree_Properties.fold_rec.
   - unfold P; intros. rewrite <- H0; auto.
   - red. rewrite ! PTree.gempty. auto.
-  - unfold P; intros. rewrite PTree.gsspec. unfold f in H3. 
+  - unfold P; intros. rewrite PTree.gsspec. unfold f in H3.
     destruct (globdef_of_interest v).
     + rewrite PTree.gsspec in H3. destruct (peq id k); auto.
     + apply H2 in H3. destruct (peq id k). congruence. auto. }
@@ -91,7 +91,7 @@ Qed.
 Theorem transf_program_match:
   forall p tp, sel_program p = OK tp -> match_prog p tp.
 Proof.
-  intros. monadInv H. 
+  intros. monadInv H.
   eapply match_transform_partial_program_contextual. eexact EQ0.
   intros. exists x; split; auto. apply get_helpers_correct; auto.
 Qed.
@@ -100,10 +100,10 @@ Lemma helper_functions_declared_linkorder:
   forall (p p': Cminor.program) hf,
   helper_functions_declared p hf -> linkorder p p' -> helper_functions_declared p' hf.
 Proof.
-  intros. 
+  intros.
   assert (X: forall id name sg, helper_declared p id name sg -> helper_declared p' id name sg).
   { unfold helper_declared; intros.
-    destruct (prog_defmap_linkorder _ _ _ _ H0 H1) as (gd & P & Q). 
+    destruct (prog_defmap_linkorder _ _ _ _ H0 H1) as (gd & P & Q).
     inv Q. inv H3. auto. }
   red in H. decompose [Logic.and] H; clear H. red; auto 20.
 Qed.
@@ -139,7 +139,7 @@ Lemma functions_translated:
   exists cu tf, Genv.find_funct tge v' = Some tf /\ match_fundef cu f tf /\ linkorder cu prog.
 Proof.
   intros. inv H0.
-  eapply Genv.find_funct_match; eauto. 
+  eapply Genv.find_funct_match; eauto.
   discriminate.
 Qed.
 
@@ -159,11 +159,11 @@ Lemma helper_functions_preserved:
   forall hf, helper_functions_declared prog hf -> helper_functions_declared tprog hf.
 Proof.
   assert (X: forall id name sg, helper_declared prog id name sg -> helper_declared tprog id name sg).
-  { unfold helper_declared; intros. 
+  { unfold helper_declared; intros.
     generalize (match_program_defmap _ _ _ _ _ TRANSF id).
     unfold Cminor.fundef; rewrite H; intros R; inv R. inv H2.
     destruct H4 as (cu & A & B). monadInv B. auto. }
-  unfold helper_functions_declared; intros. decompose [Logic.and] H; clear H. auto 20. 
+  unfold helper_functions_declared; intros. decompose [Logic.and] H; clear H. auto 20.
 Qed.
 
 Section CMCONSTR.
@@ -354,13 +354,13 @@ Proof.
   exploit expr_is_addrof_ident_correct; eauto. intros EQ; subst a.
   inv H0. inv H3. unfold Genv.symbol_address in *.
   destruct (Genv.find_symbol ge id) as [b|] eqn:FS; try discriminate.
-  rewrite Genv.find_funct_find_funct_ptr in H1. 
+  rewrite Genv.find_funct_find_funct_ptr in H1.
   assert (DFL: exists b1, Genv.find_symbol ge id = Some b1 /\ Vptr b Ptrofs.zero = Vptr b1 Ptrofs.zero) by (exists b; auto).
   unfold globdef; destruct (prog_defmap unit)!id as [[[f|ef] |gv] |] eqn:G; auto.
   destruct (ef_inline ef) eqn:INLINE; auto.
   destruct (prog_defmap_linkorder _ _ _ _ H G) as (gd & P & Q).
-  inv Q. inv H2. 
-- apply Genv.find_def_symbol in P. destruct P as (b' & X & Y). fold ge in X, Y. 
+  inv Q. inv H2.
+- apply Genv.find_def_symbol in P. destruct P as (b' & X & Y). fold ge in X, Y.
   rewrite <- Genv.find_funct_ptr_iff in Y. congruence.
 - simpl in INLINE. discriminate.
 Qed.
@@ -459,6 +459,17 @@ Qed.
 
 End SEL_SWITCH.
 
+Section SEL_SWITH_INT.
+
+Variable cunit: Cminor.program.
+Variable hf: helper_functions.
+Hypothesis LINK: linkorder cunit prog.
+Hypothesis HF: helper_functions_declared cunit hf.
+
+Let HF': helper_functions_declared tprog hf.
+Proof.
+  apply helper_functions_preserved. eapply helper_functions_declared_linkorder; eauto.
+Qed.
 Lemma sel_switch_int_correct:
   forall dfl cases arg sp e m i t le,
   validate_switch Int.modulus dfl cases t = true ->
@@ -517,7 +528,7 @@ Proof.
   rewrite Int64.unsigned_repr. destruct (zlt (Int64.unsigned n0) n); auto.
   unfold Int64.max_unsigned; omega.
 - intros until n; intros EVAL R RANGE.
-  exploit eval_subl; auto. eexact EVAL. apply eval_longconst with (n := Int64.repr n).
+  exploit eval_subl; auto; try apply HF'. eexact EVAL. apply eval_longconst with (n := Int64.repr n).
   intros (vb & A & B).
   inv R. simpl in B. inv B. econstructor; split; eauto.
   replace ((Int64.unsigned n0 - n) mod Int64.modulus)
@@ -535,6 +546,8 @@ Proof.
 - apply Int64.unsigned_range.
 Qed.
 
+End SEL_SWITH_INT.
+
 (** Compatibility of evaluation functions with the "less defined than" relation. *)
 
 Ltac TrivialExists :=
@@ -561,7 +574,7 @@ Lemma eval_binop_lessdef:
 Proof.
   intros until m'; intros EV LD1 LD2 ME.
   assert (exists v', eval_binop op v1' v2' m = Some v' /\ Val.lessdef v v').
-  { inv LD1. inv LD2. exists v; auto. 
+  { inv LD1. inv LD2. exists v; auto.
     destruct op; destruct v1'; simpl in *; inv EV; TrivialExists.
     destruct op; simpl in *; inv EV; TrivialExists. }
   assert (CMPU: forall c,
@@ -648,7 +661,7 @@ Proof.
   exists (Vint i); split; auto. econstructor. constructor. auto.
   exists (Vfloat f); split; auto. econstructor. constructor. auto.
   exists (Vsingle f); split; auto. econstructor. constructor. auto.
-  exists (Vlong i); split; auto. apply eval_longconst. 
+  exists (Vlong i); split; auto. apply eval_longconst.
   unfold Genv.symbol_address; rewrite <- symbols_preserved; fold (Genv.symbol_address tge i i0). apply eval_addrsymbol.
   apply eval_addrstack.
   (* Eunop *)
@@ -808,7 +821,7 @@ Remark call_cont_commut:
 Proof.
   induction 1; simpl; auto; red; intros.
 - constructor.
-- eapply match_cont_call with (hf := hf); eauto. 
+- eapply match_cont_call with (hf := hf); eauto.
 Qed.
 
 Remark match_is_call_cont:
@@ -816,7 +829,7 @@ Remark match_is_call_cont:
 Proof.
   destruct 1; intros; try contradiction; red; intros.
 - constructor.
-- eapply match_cont_call with (hf := hf); eauto. 
+- eapply match_cont_call with (hf := hf); eauto.
 Qed.
 
 Remark match_call_cont_cont:
@@ -920,7 +933,7 @@ Proof.
   econstructor; eauto. econstructor; eauto.
   eapply sig_function_translated; eauto.
   eapply match_callstate with (cunit := cunit'); eauto.
-  red; intros. eapply match_cont_call with (cunit := cunit) (hf := hf); eauto. 
+  red; intros. eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
 + (* direct *)
   intros [b [U V]].
   exploit sel_exprlist_correct; eauto. intros [vargs' [C D]].
@@ -930,7 +943,7 @@ Proof.
   subst vf. econstructor; eauto. rewrite symbols_preserved; eauto.
   eapply sig_function_translated; eauto.
   eapply match_callstate with (cunit := cunit'); eauto.
-  red; intros; eapply match_cont_call with (cunit := cunit) (hf := hf); eauto. 
+  red; intros; eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
 + (* turned into Sbuiltin *)
   intros EQ. subst fd.
   exploit sel_builtin_args_correct; eauto. intros [vargs' [C D]].
@@ -943,7 +956,7 @@ Proof.
   exploit sel_exprlist_correct; eauto. intros [vargs' [C D]].
   exploit functions_translated; eauto. intros (cunit' & fd' & E & F & G).
   left; econstructor; split.
-  exploit classify_call_correct. eexact LINK. eauto. eauto. 
+  exploit classify_call_correct. eexact LINK. eauto. eauto.
   destruct (classify_call (prog_defmap cunit)) as [ | id | ef]; intros.
   econstructor; eauto. econstructor; eauto. eapply sig_function_translated; eauto.
   destruct H2 as [b [U V]]. subst vf. inv B.
@@ -1021,7 +1034,7 @@ Proof.
   econstructor; eauto.
   econstructor; eauto.
 - (* internal function *)
-  destruct TF as (hf & HF & TF). specialize (MC cunit hf). 
+  destruct TF as (hf & HF & TF). specialize (MC cunit hf).
   monadInv TF. generalize EQ; intros TF; monadInv TF.
   exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl.
   intros [m2' [A B]].
@@ -1029,7 +1042,7 @@ Proof.
   econstructor; simpl; eauto.
   econstructor; simpl; eauto. apply set_locals_lessdef. apply set_params_lessdef; auto.
 - (* external call *)
-  destruct TF as (hf & HF & TF). 
+  destruct TF as (hf & HF & TF).
   monadInv TF.
   exploit external_call_mem_extends; eauto.
   intros [vres' [m2 [A [B [C D]]]]].
@@ -1043,7 +1056,7 @@ Proof.
   econstructor. eauto. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 - (* return *)
-  apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC). 
+  apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC).
   inv MC.
   left; econstructor; split.
   econstructor.
@@ -1073,7 +1086,7 @@ Lemma sel_final_states:
   match_states S R -> Cminor.final_state S r -> final_state R r.
 Proof.
   intros. inv H0. inv H.
-  apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC). 
+  apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC).
   inv MC. inv LD. constructor.
 Qed.
 
@@ -1081,7 +1094,7 @@ Theorem transf_program_correct:
   forward_simulation (Cminor.semantics prog) (CminorSel.semantics tprog).
 Proof.
   apply forward_simulation_opt with (match_states := match_states) (measure := measure).
-  apply senv_preserved. 
+  apply senv_preserved.
   apply sel_initial_states; auto.
   apply sel_final_states; auto.
   apply sel_step_correct; auto.
@@ -1101,5 +1114,5 @@ Local Transparent Linker_fundef.
 - discriminate.
 - destruct e; inv H2. econstructor; eauto.
 - destruct e; inv H2. econstructor; eauto.
-- destruct (external_function_eq e e0); inv H2. econstructor; eauto. 
+- destruct (external_function_eq e e0); inv H2. econstructor; eauto.
 Qed.
diff --git a/backend/SplitLong.vp b/backend/SplitLong.vp
index cbf7fa30..de954482 100644
--- a/backend/SplitLong.vp
+++ b/backend/SplitLong.vp
@@ -130,7 +130,7 @@ Definition negl (e: expr) :=
 Definition notl (e: expr) :=
   splitlong e (fun h l => makelong (notint h) (notint l)).
 
-Definition longoffloat (arg: expr) := 
+Definition longoffloat (arg: expr) :=
   Eexternal i64_dtos sig_f_l (arg ::: Enil).
 Definition longuoffloat (arg: expr) :=
   Eexternal i64_dtou sig_f_l (arg ::: Enil).
@@ -138,7 +138,7 @@ Definition floatoflong (arg: expr) :=
   Eexternal i64_stod sig_l_f (arg ::: Enil).
 Definition floatoflongu (arg: expr) :=
   Eexternal i64_utod sig_l_f (arg ::: Enil).
-Definition longofsingle (arg: expr) := 
+Definition longofsingle (arg: expr) :=
   longoffloat (floatofsingle arg).
 Definition longuofsingle (arg: expr) :=
   longuoffloat (floatofsingle arg).
diff --git a/backend/SplitLongproof.v b/backend/SplitLongproof.v
index 3b1eaa6b..fd1fdebd 100644
--- a/backend/SplitLongproof.v
+++ b/backend/SplitLongproof.v
@@ -96,7 +96,7 @@ Lemma eval_helper:
 Proof.
   intros.
   red in H0. apply Genv.find_def_symbol in H0. destruct H0 as (b & P & Q).
-  rewrite <- Genv.find_funct_ptr_iff in Q. 
+  rewrite <- Genv.find_funct_ptr_iff in Q.
   econstructor; eauto.
 Qed.
 
@@ -363,7 +363,7 @@ Qed.
 Theorem eval_longofint: unary_constructor_sound longofint Val.longofint.
 Proof.
   red; intros. unfold longofint. destruct (longofint_match a).
-- InvEval. econstructor; split. apply eval_longconst. auto.   
+- InvEval. econstructor; split. apply eval_longconst. auto.
 - exploit (eval_shrimm ge sp e m (Int.repr 31) (x :: le) (Eletvar 0)). EvalOp.
   intros [v1 [A B]].
   econstructor; split. EvalOp.
@@ -725,7 +725,7 @@ Qed.
 
 Theorem eval_addl: Archi.ptr64 = false -> binary_constructor_sound addl Val.addl.
 Proof.
-  unfold addl; red; intros. 
+  unfold addl; red; intros.
   set (default := Ebuiltin (EF_builtin "__builtin_addl" sig_ll_l) (a ::: b ::: Enil)).
   assert (DEFAULT:
     exists v, eval_expr ge sp e m le default v /\ Val.lessdef (Val.addl x y) v).
@@ -806,7 +806,7 @@ Proof.
   exists v; split; auto.
   destruct x; simpl; auto.
   erewrite Int64.mul_pow2' by eauto.
-  simpl in B. erewrite Int64.is_power2'_range in B by eauto. 
+  simpl in B. erewrite Int64.is_power2'_range in B by eauto.
   exact B.
   apply eval_mull_base; auto. apply eval_longconst.
 Qed.
@@ -828,18 +828,18 @@ Proof.
 - apply eval_mull_base; auto.
 Qed.
 
-Theorem eval_mullhu: 
+Theorem eval_mullhu:
   forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
 Proof.
-  unfold mullhu; intros; red; intros. econstructor; split; eauto. 
-  eapply eval_helper_2; eauto. apply eval_longconst. DeclHelper; eauto. UseHelper. 
+  unfold mullhu; intros; red; intros. econstructor; split; eauto.
+  eapply eval_helper_2; eauto. apply eval_longconst. DeclHelper; eauto. UseHelper.
 Qed.
 
-Theorem eval_mullhs: 
+Theorem eval_mullhs:
   forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
 Proof.
-  unfold mullhs; intros; red; intros. econstructor; split; eauto. 
-  eapply eval_helper_2; eauto. apply eval_longconst. DeclHelper; eauto. UseHelper. 
+  unfold mullhs; intros; red; intros. econstructor; split; eauto.
+  eapply eval_helper_2; eauto. apply eval_longconst. DeclHelper; eauto. UseHelper.
 Qed.
 
 Theorem eval_shrxlimm:
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index d8d916de..b885d22c 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -147,7 +147,7 @@ Lemma contains_get_stack:
   m |= contains (chunk_of_type ty) sp ofs spec ->
   exists v, load_stack m (Vptr sp Ptrofs.zero) ty (Ptrofs.repr ofs) = Some v /\ spec v.
 Proof.
-  intros. unfold load_stack. 
+  intros. unfold load_stack.
   replace (Val.offset_ptr (Vptr sp Ptrofs.zero) (Ptrofs.repr ofs)) with (Vptr sp (Ptrofs.repr ofs)).
   eapply loadv_rule; eauto.
   simpl. rewrite Ptrofs.add_zero_l; auto.
@@ -169,7 +169,7 @@ Lemma contains_set_stack:
       store_stack m (Vptr sp Ptrofs.zero) ty (Ptrofs.repr ofs) v = Some m'
   /\ m' |= contains (chunk_of_type ty) sp ofs spec ** P.
 Proof.
-  intros. unfold store_stack. 
+  intros. unfold store_stack.
   replace (Val.offset_ptr (Vptr sp Ptrofs.zero) (Ptrofs.repr ofs)) with (Vptr sp (Ptrofs.repr ofs)).
   eapply storev_rule; eauto.
   simpl. rewrite Ptrofs.add_zero_l; auto.
@@ -195,11 +195,11 @@ Program Definition contains_locations (j: meminj) (sp: block) (pos bound: Z) (sl
     b = sp /\ pos <= ofs < pos + 4 * bound
 |}.
 Next Obligation.
-  intuition auto. 
+  intuition auto.
 - red; intros. eapply Mem.perm_unchanged_on; eauto. simpl; auto.
 - exploit H4; eauto. intros (v & A & B). exists v; split; auto.
   eapply Mem.load_unchanged_on; eauto.
-  simpl; intros. rewrite size_type_chunk, typesize_typesize in H8. 
+  simpl; intros. rewrite size_type_chunk, typesize_typesize in H8.
   split; auto. omega.
 Qed.
 Next Obligation.
@@ -214,9 +214,9 @@ Remark valid_access_location:
   Mem.valid_access m (chunk_of_type ty) sp (pos + 4 * ofs) p.
 Proof.
   intros; split.
-- red; intros. apply Mem.perm_implies with Freeable; auto with mem. 
+- red; intros. apply Mem.perm_implies with Freeable; auto with mem.
   apply H0. rewrite size_type_chunk, typesize_typesize in H4. omega.
-- rewrite align_type_chunk. apply Z.divide_add_r. 
+- rewrite align_type_chunk. apply Z.divide_add_r.
   apply Zdivide_trans with 8; auto.
   exists (8 / (4 * typealign ty)); destruct ty; reflexivity.
   apply Z.mul_divide_mono_l. auto.
@@ -246,20 +246,20 @@ Lemma set_location:
   /\ m' |= contains_locations j sp pos bound sl (Locmap.set (S sl ofs ty) v ls) ** P.
 Proof.
   intros. destruct H as (A & B & C). destruct A as (D & E & F & G & H).
-  edestruct Mem.valid_access_store as [m' STORE]. 
-  eapply valid_access_location; eauto. 
+  edestruct Mem.valid_access_store as [m' STORE].
+  eapply valid_access_location; eauto.
   assert (PERM: Mem.range_perm m' sp pos (pos + 4 * bound) Cur Freeable).
   { red; intros; eauto with mem. }
   exists m'; split.
 - unfold store_stack; simpl. rewrite Ptrofs.add_zero_l, Ptrofs.unsigned_repr; eauto.
   unfold Ptrofs.max_unsigned. generalize (typesize_pos ty). omega.
 - simpl. intuition auto.
-+ unfold Locmap.set. 
++ unfold Locmap.set.
   destruct (Loc.eq (S sl ofs ty) (S sl ofs0 ty0)); [|destruct (Loc.diff_dec (S sl ofs ty) (S sl ofs0 ty0))].
 * (* same location *)
   inv e. rename ofs0 into ofs. rename ty0 into ty.
   exists (Val.load_result (chunk_of_type ty) v'); split.
-  eapply Mem.load_store_similar_2; eauto. omega. 
+  eapply Mem.load_store_similar_2; eauto. omega.
   apply Val.load_result_inject; auto.
 * (* different locations *)
   exploit H; eauto. intros (v0 & X & Y). exists v0; split; auto.
@@ -267,11 +267,11 @@ Proof.
   destruct d. congruence. right. rewrite ! size_type_chunk, ! typesize_typesize. omega.
 * (* overlapping locations *)
   destruct (Mem.valid_access_load m' (chunk_of_type ty0) sp (pos + 4 * ofs0)) as [v'' LOAD].
-  apply Mem.valid_access_implies with Writable; auto with mem. 
+  apply Mem.valid_access_implies with Writable; auto with mem.
   eapply valid_access_location; eauto.
   exists v''; auto.
-+ apply (m_invar P) with m; auto. 
-  eapply Mem.store_unchanged_on; eauto. 
++ apply (m_invar P) with m; auto.
+  eapply Mem.store_unchanged_on; eauto.
   intros i; rewrite size_type_chunk, typesize_typesize. intros; red; intros.
   eelim C; eauto. simpl. split; auto. omega.
 Qed.
@@ -284,7 +284,7 @@ Lemma initial_locations:
   m |= contains_locations j sp pos bound sl ls ** P.
 Proof.
   intros. destruct H as (A & B & C). destruct A as (D & E & F). split.
-- simpl; intuition auto. red; intros; eauto with mem. 
+- simpl; intuition auto. red; intros; eauto with mem.
   destruct (Mem.valid_access_load m (chunk_of_type ty) sp (pos + 4 * ofs)) as [v LOAD].
   eapply valid_access_location; eauto.
   red; intros; eauto with mem.
@@ -389,7 +389,7 @@ Lemma frame_get_local:
 Proof.
   unfold frame_contents, frame_contents_1; intros. unfold slot_valid in H1; InvBooleans.
   apply mconj_proj1 in H. apply sep_proj1 in H. apply sep_proj1 in H.
-  eapply get_location; eauto. 
+  eapply get_location; eauto.
 Qed.
 
 Lemma frame_get_outgoing:
@@ -402,7 +402,7 @@ Lemma frame_get_outgoing:
 Proof.
   unfold frame_contents, frame_contents_1; intros. unfold slot_valid in H1; InvBooleans.
   apply mconj_proj1 in H. apply sep_proj1 in H. apply sep_pick2 in H.
-  eapply get_location; eauto. 
+  eapply get_location; eauto.
 Qed.
 
 Lemma frame_get_parent:
@@ -437,9 +437,9 @@ Lemma frame_set_local:
   /\ m' |= frame_contents j sp (Locmap.set (S Local ofs ty) v ls) ls0 parent retaddr ** P.
 Proof.
   intros. unfold frame_contents in H.
-  exploit mconj_proj1; eauto. unfold frame_contents_1. 
+  exploit mconj_proj1; eauto. unfold frame_contents_1.
   rewrite ! sep_assoc; intros SEP.
-  unfold slot_valid in H1; InvBooleans. simpl in H0. 
+  unfold slot_valid in H1; InvBooleans. simpl in H0.
   exploit set_location; eauto. intros (m' & A & B).
   exists m'; split; auto.
   assert (forall i k p, Mem.perm m sp i k p -> Mem.perm m' sp i k p).
@@ -463,8 +463,8 @@ Lemma frame_set_outgoing:
 Proof.
   intros. unfold frame_contents in H.
   exploit mconj_proj1; eauto. unfold frame_contents_1.
-  rewrite ! sep_assoc, sep_swap. intros SEP. 
-  unfold slot_valid in H1; InvBooleans. simpl in H0. 
+  rewrite ! sep_assoc, sep_swap. intros SEP.
+  unfold slot_valid in H1; InvBooleans. simpl in H0.
   exploit set_location; eauto. intros (m' & A & B).
   exists m'; split; auto.
   assert (forall i k p, Mem.perm m sp i k p -> Mem.perm m' sp i k p).
@@ -510,7 +510,7 @@ Proof.
 Local Opaque sepconj.
   induction rl; simpl; intros.
 - auto.
-- apply frame_set_reg; auto. 
+- apply frame_set_reg; auto.
 Qed.
 
 Corollary frame_set_regpair:
@@ -626,7 +626,7 @@ Lemma agree_regs_set_pair:
 Proof.
   intros. destruct p; simpl.
 - apply agree_regs_set_reg; auto.
-- apply agree_regs_set_reg. apply agree_regs_set_reg; auto. 
+- apply agree_regs_set_reg. apply agree_regs_set_reg; auto.
   apply Val.hiword_inject; auto. apply Val.loword_inject; auto.
 Qed.
 
@@ -728,7 +728,7 @@ Proof.
   apply agree_locs_set_reg; auto. apply caller_save_reg_within_bounds; auto.
   destruct H0.
   apply agree_locs_set_reg; auto. apply agree_locs_set_reg; auto.
-  apply caller_save_reg_within_bounds; auto. apply caller_save_reg_within_bounds; auto. 
+  apply caller_save_reg_within_bounds; auto. apply caller_save_reg_within_bounds; auto.
 Qed.
 
 Lemma agree_locs_set_res:
@@ -770,8 +770,8 @@ Lemma agree_locs_undef_locs:
   existsb is_callee_save regs = false ->
   agree_locs (LTL.undef_regs regs ls) ls0.
 Proof.
-  intros. eapply agree_locs_undef_locs_1; eauto. 
-  intros. destruct (is_callee_save r) eqn:CS; auto. 
+  intros. eapply agree_locs_undef_locs_1; eauto.
+  intros. destruct (is_callee_save r) eqn:CS; auto.
   assert (existsb is_callee_save regs = true).
   { apply existsb_exists. exists r; auto. }
   congruence.
@@ -831,7 +831,7 @@ Lemma agree_callee_save_set_result:
   agree_callee_save ls1 ls2 ->
   agree_callee_save (Locmap.setpair (loc_result sg) v ls1) ls2.
 Proof.
-  intros; red; intros. rewrite Locmap.gpo. apply H; auto. 
+  intros; red; intros. rewrite Locmap.gpo. apply H; auto.
   assert (X: forall r, is_callee_save r = false -> Loc.diff l (R r)).
   { intros. destruct l; auto. simpl; congruence. }
   generalize (loc_result_caller_save sg). destruct (loc_result sg); simpl; intuition auto.
@@ -845,7 +845,7 @@ Definition no_callee_saves (l: list mreg) : Prop :=
 Remark destroyed_by_op_caller_save:
   forall op, no_callee_saves (destroyed_by_op op).
 Proof.
-  unfold no_callee_saves; destruct op; reflexivity.
+  unfold no_callee_saves; destruct op; (reflexivity || destruct c; reflexivity).
 Qed.
 
 Remark destroyed_by_load_caller_save:
@@ -950,10 +950,10 @@ Lemma save_callee_save_rec_correct:
 Proof.
 Local Opaque mreg_type.
   induction l as [ | r l]; simpl; intros until P; intros CS SEP AG.
-- exists rs, m. 
+- exists rs, m.
   split. apply star_refl.
   split. rewrite sep_pure; split; auto. eapply sep_drop; eauto.
-  split. auto. 
+  split. auto.
   auto.
 - set (ty := mreg_type r) in *.
   set (sz := AST.typesize ty) in *.
@@ -971,17 +971,17 @@ Local Opaque mreg_type.
   apply range_contains in SEP; auto.
   exploit (contains_set_stack (fun v' => Val.inject j (ls (R r)) v') (rs r)).
   eexact SEP.
-  apply load_result_inject; auto. apply wt_ls. 
+  apply load_result_inject; auto. apply wt_ls.
   clear SEP; intros (m1 & STORE & SEP).
   set (rs1 := undef_regs (destroyed_by_setstack ty) rs).
   assert (AG1: agree_regs j ls rs1).
   { red; intros. unfold rs1. destruct (In_dec mreg_eq r0 (destroyed_by_setstack ty)).
     erewrite ls_temp_undef by eauto. auto.
     rewrite undef_regs_other by auto. apply AG. }
-  rewrite sep_swap in SEP. 
+  rewrite sep_swap in SEP.
   exploit (IHl (pos1 + sz) rs1 m1); eauto.
   intros (rs2 & m2 & A & B & C & D).
-  exists rs2, m2. 
+  exists rs2, m2.
   split. eapply star_left; eauto. constructor. exact STORE. auto. traceEq.
   split. rewrite sep_assoc, sep_swap. exact B.
   split. intros. apply C. unfold store_stack in STORE; simpl in STORE. eapply Mem.perm_store_1; eauto.
@@ -1042,16 +1042,16 @@ Proof.
   intros until P; intros SEP TY AGCS AG; intros ls1 rs1.
   exploit (save_callee_save_rec_correct j cs fb sp ls1).
 - intros. unfold ls1. apply LTL_undef_regs_same. eapply destroyed_by_setstack_function_entry; eauto.
-- intros. unfold ls1. apply undef_regs_type. apply TY. 
+- intros. unfold ls1. apply undef_regs_type. apply TY.
 - exact b.(used_callee_save_prop).
 - eexact SEP.
 - instantiate (1 := rs1). apply agree_regs_undef_regs. apply agree_regs_call_regs. auto.
 - clear SEP. intros (rs' & m' & EXEC & SEP & PERMS & AG').
-  exists rs', m'. 
+  exists rs', m'.
   split. eexact EXEC.
   split. rewrite (contains_callee_saves_exten j sp ls0 ls1). exact SEP.
   intros. apply b.(used_callee_save_prop) in H.
-    unfold ls1. rewrite LTL_undef_regs_others. unfold call_regs. 
+    unfold ls1. rewrite LTL_undef_regs_others. unfold call_regs.
     apply AGCS; auto.
     red; intros.
     assert (existsb is_callee_save destroyed_at_function_entry = false)
@@ -1095,14 +1095,14 @@ Proof.
   unfold fn_stacksize, fn_link_ofs, fn_retaddr_ofs.
   (* Stack layout info *)
 Local Opaque b fe.
-  generalize (frame_env_range b) (frame_env_aligned b). replace (make_env b) with fe by auto. simpl. 
+  generalize (frame_env_range b) (frame_env_aligned b). replace (make_env b) with fe by auto. simpl.
   intros LAYOUT1 LAYOUT2.
   (* Allocation step *)
   destruct (Mem.alloc m1' 0 (fe_size fe)) as [m2' sp'] eqn:ALLOC'.
   exploit alloc_parallel_rule_2.
-  eexact SEP. eexact ALLOC. eexact ALLOC'. 
+  eexact SEP. eexact ALLOC. eexact ALLOC'.
   instantiate (1 := fe_stack_data fe). tauto.
-  reflexivity. 
+  reflexivity.
   instantiate (1 := fe_stack_data fe + bound_stack_data b). rewrite Z.max_comm. reflexivity.
   generalize (bound_stack_data_pos b) size_no_overflow; omega.
   tauto.
@@ -1139,23 +1139,23 @@ Local Opaque b fe.
   clear SEP; intros (rs2 & m5' & SAVE_CS & SEP & PERMS & AGREGS').
   rewrite sep_swap5 in SEP.
   (* Materializing the Local and Outgoing locations *)
-  exploit (initial_locations j'). eexact SEP. tauto. 
-  instantiate (1 := Local). instantiate (1 := ls1). 
+  exploit (initial_locations j'). eexact SEP. tauto.
+  instantiate (1 := Local). instantiate (1 := ls1).
   intros; rewrite LS1. rewrite LTL_undef_regs_slot. reflexivity.
   clear SEP; intros SEP.
   rewrite sep_swap in SEP.
-  exploit (initial_locations j'). eexact SEP. tauto. 
-  instantiate (1 := Outgoing). instantiate (1 := ls1). 
+  exploit (initial_locations j'). eexact SEP. tauto.
+  instantiate (1 := Outgoing). instantiate (1 := ls1).
   intros; rewrite LS1. rewrite LTL_undef_regs_slot. reflexivity.
   clear SEP; intros SEP.
   rewrite sep_swap in SEP.
   (* Now we frame this *)
   assert (SEPFINAL: m5' |= frame_contents j' sp' ls1 ls0 parent ra ** minjection j' m2 ** globalenv_inject ge j' ** P).
   { eapply frame_mconj. eexact SEPCONJ.
-    rewrite chunk_of_Tptr in SEP.  
+    rewrite chunk_of_Tptr in SEP.
     unfold frame_contents_1; rewrite ! sep_assoc. exact SEP.
     assert (forall ofs k p, Mem.perm m2' sp' ofs k p -> Mem.perm m5' sp' ofs k p).
-    { intros. apply PERMS. 
+    { intros. apply PERMS.
       unfold store_stack in STORE_PARENT, STORE_RETADDR.
       simpl in STORE_PARENT, STORE_RETADDR.
       eauto using Mem.perm_store_1. }
@@ -1172,7 +1172,7 @@ Local Opaque b fe.
   split. eexact SAVE_CS.
   split. exact AGREGS'.
   split. rewrite LS1. apply agree_locs_undef_locs; [|reflexivity].
-    constructor; intros. unfold call_regs. apply AGCS. 
+    constructor; intros. unfold call_regs. apply AGCS.
     unfold mreg_within_bounds in H; tauto.
     unfold call_regs. apply AGCS. auto.
   split. exact SEPFINAL.
@@ -1229,13 +1229,13 @@ Local Opaque mreg_type.
     eauto.
   intros (rs' & A & B & C & D).
   exists rs'.
-  split. eapply star_step; eauto. 
+  split. eapply star_step; eauto.
     econstructor. exact LOAD. traceEq.
   split. intros.
-    destruct (In_dec mreg_eq r0 l). auto. 
+    destruct (In_dec mreg_eq r0 l). auto.
     assert (r = r0) by tauto. subst r0.
     rewrite C by auto. rewrite Regmap.gss. exact SPEC.
-  split. intros. 
+  split. intros.
     rewrite C by tauto. apply Regmap.gso. intuition auto.
   exact D.
 Qed.
@@ -1256,8 +1256,8 @@ Lemma restore_callee_save_correct:
         is_callee_save r = false -> rs' r = rs r).
 Proof.
   intros.
-  unfold frame_contents, frame_contents_1 in H. 
-  apply mconj_proj1 in H. rewrite ! sep_assoc in H. apply sep_pick5 in H. 
+  unfold frame_contents, frame_contents_1 in H.
+  apply mconj_proj1 in H. rewrite ! sep_assoc in H. apply sep_pick5 in H.
   exploit restore_callee_save_rec_correct; eauto.
   intros; unfold mreg_within_bounds; auto.
   intros (rs' & A & B & C & D).
@@ -1304,7 +1304,7 @@ Proof.
   (* Reloading the callee-save registers *)
   exploit restore_callee_save_correct.
     eexact SEP.
-    instantiate (1 := rs). 
+    instantiate (1 := rs).
     red; intros. destruct AGL. rewrite <- agree_unused_reg0 by auto. apply AGR.
   intros (rs' & LOAD_CS & CS & NCS).
   (* Reloading the back link and return address *)
@@ -1320,7 +1320,7 @@ Proof.
   split. assumption.
   split. assumption.
   split. eassumption.
-  split. red; unfold return_regs; intros. 
+  split. red; unfold return_regs; intros.
     destruct (is_callee_save r) eqn:C.
     apply CS; auto.
     rewrite NCS by auto. apply AGR.
@@ -1418,7 +1418,7 @@ Lemma match_stacks_type_sp:
   Val.has_type (parent_sp cs') Tptr.
 Proof.
   induction 1; unfold parent_sp. apply Val.Vnullptr_has_type. apply Val.Vptr_has_type.
-Qed. 
+Qed.
 
 Lemma match_stacks_type_retaddr:
   forall j cs cs' sg,
@@ -1504,7 +1504,7 @@ Lemma is_tail_save_callee_save:
   is_tail k (save_callee_save_rec l ofs k).
 Proof.
   induction l; intros; simpl. auto with coqlib.
-  constructor; auto. 
+  constructor; auto.
 Qed.
 
 Lemma is_tail_restore_callee_save:
@@ -1512,7 +1512,7 @@ Lemma is_tail_restore_callee_save:
   is_tail k (restore_callee_save_rec l ofs k).
 Proof.
   induction l; intros; simpl. auto with coqlib.
-  constructor; auto. 
+  constructor; auto.
 Qed.
 
 Lemma is_tail_transl_instr:
@@ -1541,7 +1541,7 @@ Lemma is_tail_transf_function:
   is_tail (transl_code (make_env (function_bounds f)) c) (fn_code tf).
 Proof.
   intros. rewrite (unfold_transf_function _ _ H). simpl.
-  unfold transl_body, save_callee_save. 
+  unfold transl_body, save_callee_save.
   eapply is_tail_trans. 2: apply is_tail_save_callee_save.
   apply is_tail_transl_code; auto.
 Qed.
@@ -1636,7 +1636,7 @@ Proof.
 + elim (H1 _ H).
 + simpl in SEP. unfold parent_sp.
   assert (slot_valid f Outgoing pos ty = true).
-  { destruct H0. unfold slot_valid, proj_sumbool. 
+  { destruct H0. unfold slot_valid, proj_sumbool.
     rewrite zle_true by omega. rewrite pred_dec_true by auto. reflexivity. }
   assert (slot_within_bounds (function_bounds f) Outgoing pos ty) by eauto.
   exploit frame_get_outgoing; eauto. intros (v & A & B).
@@ -1651,10 +1651,10 @@ Lemma transl_external_argument_2:
 Proof.
   intros. destruct p as [l | l1 l2].
 - destruct (transl_external_argument l) as (v & A & B). eapply in_regs_of_rpairs; eauto; simpl; auto.
-  exists v; split; auto. constructor; auto. 
+  exists v; split; auto. constructor; auto.
 - destruct (transl_external_argument l1) as (v1 & A1 & B1). eapply in_regs_of_rpairs; eauto; simpl; auto.
   destruct (transl_external_argument l2) as (v2 & A2 & B2). eapply in_regs_of_rpairs; eauto; simpl; auto.
-  exists (Val.longofwords v1 v2); split. 
+  exists (Val.longofwords v1 v2); split.
   constructor; auto.
   apply Val.longofwords_inject; auto.
 Qed.
@@ -1724,7 +1724,7 @@ Local Opaque fe.
 - assert (loc_valid f x = true) by auto.
   destruct x as [r | [] ofs ty]; try discriminate.
   + exists (rs r); auto with barg.
-  + exploit frame_get_local; eauto. intros (v & A & B). 
+  + exploit frame_get_local; eauto. intros (v & A & B).
     exists v; split; auto. constructor; auto.
 - econstructor; eauto with barg.
 - econstructor; eauto with barg.
@@ -1734,12 +1734,12 @@ Local Opaque fe.
   apply sep_proj2 in SEP. apply sep_proj1 in SEP. exploit loadv_parallel_rule; eauto.
   instantiate (1 := Val.offset_ptr (Vptr sp' Ptrofs.zero) ofs').
   simpl. rewrite ! Ptrofs.add_zero_l. econstructor; eauto.
-  intros (v' & A & B). exists v'; split; auto. constructor; auto. 
+  intros (v' & A & B). exists v'; split; auto. constructor; auto.
 - econstructor; split; eauto with barg.
   unfold Val.offset_ptr. rewrite ! Ptrofs.add_zero_l. econstructor; eauto.
 - apply sep_proj2 in SEP. apply sep_proj1 in SEP. exploit loadv_parallel_rule; eauto.
   intros (v' & A & B). exists v'; auto with barg.
-- econstructor; split; eauto with barg. 
+- econstructor; split; eauto with barg.
 - destruct IHeval_builtin_arg1 as (v1 & A1 & B1); auto using in_or_app.
   destruct IHeval_builtin_arg2 as (v2 & A2 & B2); auto using in_or_app.
   exists (Val.longofwords v1 v2); split; auto with barg.
@@ -1776,7 +1776,7 @@ End BUILTIN_ARGUMENTS.
 >>
   Matching between source and target states is defined by [match_states]
   below.  It implies:
-- Satisfaction of the separation logic assertions that describe the contents 
+- Satisfaction of the separation logic assertions that describe the contents
   of memory.  This is a separating conjunction of facts about:
 -- the current stack frame
 -- the frames in the call stack
@@ -1864,7 +1864,7 @@ Proof.
   eapply slot_outgoing_argument_valid; eauto.
   intros (v & A & B).
   econstructor; split.
-  apply plus_one. eapply exec_Mgetparam; eauto. 
+  apply plus_one. eapply exec_Mgetparam; eauto.
   rewrite (unfold_transf_function _ _ TRANSL). unfold fn_link_ofs.
   eapply frame_get_parent. eexact SEP.
   econstructor; eauto with coqlib. econstructor; eauto.
@@ -1901,7 +1901,7 @@ Proof.
   apply plus_one. destruct sl; try discriminate.
     econstructor. eexact STORE. eauto.
     econstructor. eexact STORE. eauto.
-  econstructor. eauto. eauto. eauto. 
+  econstructor. eauto. eauto. eauto.
   apply agree_regs_set_slot. apply agree_regs_undef_regs. auto.
   apply agree_locs_set_slot. apply agree_locs_undef_locs. auto. apply destroyed_by_setstack_caller_save. auto.
   eauto. eauto with coqlib. eauto.
@@ -1923,7 +1923,7 @@ Proof.
   apply agree_regs_set_reg; auto.
   rewrite transl_destroyed_by_op.  apply agree_regs_undef_regs; auto.
   apply agree_locs_set_reg; auto. apply agree_locs_undef_locs. auto. apply destroyed_by_op_caller_save.
-  apply frame_set_reg. apply frame_undef_regs. exact SEP. 
+  apply frame_set_reg. apply frame_undef_regs. exact SEP.
 
 - (* Lload *)
   assert (exists a',
@@ -1935,7 +1935,7 @@ Proof.
   destruct H1 as [a' [A B]].
   exploit loadv_parallel_rule.
   apply sep_proj2 in SEP. apply sep_proj2 in SEP. apply sep_proj1 in SEP. eexact SEP.
-  eauto. eauto. 
+  eauto. eauto.
   intros [v' [C D]].
   econstructor; split.
   apply plus_one. econstructor.
@@ -1943,7 +1943,7 @@ Proof.
   eexact C. eauto.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
-  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto. 
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
 
 - (* Lstore *)
   assert (exists a',
@@ -1954,14 +1954,14 @@ Proof.
   eapply agree_reglist; eauto.
   destruct H1 as [a' [A B]].
   rewrite sep_swap3 in SEP.
-  exploit storev_parallel_rule. eexact SEP. eauto. eauto. apply AGREGS. 
+  exploit storev_parallel_rule. eexact SEP. eauto. eauto. apply AGREGS.
   clear SEP; intros (m1' & C & SEP).
   rewrite sep_swap3 in SEP.
   econstructor; split.
   apply plus_one. econstructor.
   instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved.
   eexact C. eauto.
-  econstructor. eauto. eauto. eauto. 
+  econstructor. eauto. eauto. eauto.
   rewrite transl_destroyed_by_store. apply agree_regs_undef_regs; auto.
   apply agree_locs_undef_locs. auto. apply destroyed_by_store_caller_save.
   auto. eauto with coqlib.
@@ -2018,7 +2018,7 @@ Proof.
   eapply match_stacks_change_meminj; eauto.
   apply agree_regs_set_res; auto. apply agree_regs_undef_regs; auto. eapply agree_regs_inject_incr; eauto.
   apply agree_locs_set_res; auto. apply agree_locs_undef_regs; auto.
-  apply frame_set_res. apply frame_undef_regs. apply frame_contents_incr with j; auto. 
+  apply frame_set_res. apply frame_undef_regs. apply frame_contents_incr with j; auto.
   rewrite sep_swap2. apply stack_contents_change_meminj with j; auto. rewrite sep_swap2.
   exact SEP.
 
@@ -2042,7 +2042,7 @@ Proof.
   econstructor. eauto. eauto. eauto.
   apply agree_regs_undef_regs; auto.
   apply agree_locs_undef_locs. auto. apply destroyed_by_cond_caller_save.
-  auto. 
+  auto.
   eapply find_label_tail; eauto.
   apply frame_undef_regs; auto.
 
@@ -2081,7 +2081,7 @@ Proof.
   revert TRANSL. unfold transf_fundef, transf_partial_fundef.
   destruct (transf_function f) as [tfn|] eqn:TRANSL; simpl; try congruence.
   intros EQ; inversion EQ; clear EQ; subst tf.
-  rewrite sep_comm, sep_assoc in SEP. 
+  rewrite sep_comm, sep_assoc in SEP.
   exploit function_prologue_correct; eauto.
   red; intros; eapply wt_callstate_wt_regs; eauto.
   eapply match_stacks_type_sp; eauto.
@@ -2111,16 +2111,16 @@ Proof.
   eapply match_stacks_change_meminj; eauto.
   apply agree_regs_set_pair. apply agree_regs_inject_incr with j; auto. auto.
   apply agree_callee_save_set_result; auto.
-  apply stack_contents_change_meminj with j; auto. 
+  apply stack_contents_change_meminj with j; auto.
   rewrite sep_comm, sep_assoc; auto.
 
 - (* return *)
-  inv STACKS. simpl in AGLOCS. simpl in SEP. rewrite sep_assoc in SEP. 
+  inv STACKS. simpl in AGLOCS. simpl in SEP. rewrite sep_assoc in SEP.
   econstructor; split.
   apply plus_one. apply exec_return.
   econstructor; eauto.
   apply agree_locs_return with rs0; auto.
-  apply frame_contents_exten with rs0 (parent_locset s); auto. 
+  apply frame_contents_exten with rs0 (parent_locset s); auto.
 Qed.
 
 Lemma transf_initial_states:
diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v
index 1dcdfb64..06e314f3 100644
--- a/backend/Tailcallproof.v
+++ b/backend/Tailcallproof.v
@@ -577,7 +577,7 @@ Proof.
   econstructor; eauto. apply (Genv.init_mem_transf TRANSL). auto.
   replace (prog_main tprog) with (prog_main prog).
   rewrite symbols_preserved. eauto.
-  symmetry; eapply match_program_main; eauto. 
+  symmetry; eapply match_program_main; eauto.
   rewrite <- H3. apply sig_preserved.
   constructor. constructor. constructor. apply Mem.extends_refl.
 Qed.
@@ -597,7 +597,7 @@ Theorem transf_program_correct:
   forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
 Proof.
   eapply forward_simulation_opt with (measure := measure); eauto.
-  apply senv_preserved. 
+  apply senv_preserved.
   eexact transf_initial_states.
   eexact transf_final_states.
   exact transf_step_correct.
diff --git a/backend/Unusedglobproof.v b/backend/Unusedglobproof.v
index c79ae4fd..db03d0b3 100644
--- a/backend/Unusedglobproof.v
+++ b/backend/Unusedglobproof.v
@@ -315,11 +315,11 @@ Corollary used_globals_valid:
   valid_used_set p u.
 Proof.
   intros. constructor.
-- intros. eapply used_globals_sound; eauto. 
+- intros. eapply used_globals_sound; eauto.
 - eapply used_globals_incl; eauto. apply seen_main_initial_workset.
 - intros. eapply used_globals_incl; eauto. apply seen_public_initial_workset; auto.
 - intros. apply ISF.for_all_iff in H0.
-+ red in H0. apply H0 in H1. unfold global_defined in H1. 
++ red in H0. apply H0 in H1. unfold global_defined in H1.
   destruct pm!id as [g|] eqn:E.
 * left. change id with (fst (id,g)). apply in_map. apply in_prog_defmap; auto.
 * InvBooleans; auto.
@@ -394,7 +394,7 @@ Lemma filter_globdefs_map:
 Proof.
   intros. unfold PTree_Properties.of_list. fold prog_map. unfold PTree.elt. fold add_def.
   destruct (IS.mem id u) eqn:MEM.
-- erewrite filter_globdefs_map_2. rewrite List.rev_involutive. reflexivity. 
+- erewrite filter_globdefs_map_2. rewrite List.rev_involutive. reflexivity.
   auto. auto.
 - apply filter_globdefs_map_1. auto. apply PTree.gempty.
 Qed.
@@ -419,7 +419,7 @@ Proof.
 - constructor.
 - destruct (IS.mem id1 u) eqn:MEM; auto.
   rewrite filter_globdefs_nil, map_app. simpl.
-  apply list_norepet_append; auto. 
+  apply list_norepet_append; auto.
   constructor. simpl; tauto. constructor.
   red; simpl; intros. destruct H0; try tauto. subst y.
   apply filter_globdefs_domain in H. rewrite ISF.remove_iff in H. intuition.
@@ -433,11 +433,11 @@ Proof.
   unfold transform_program; intros p tp TR. set (pm := prog_defmap p) in *.
   destruct (used_globals p pm) as [u|] eqn:U; try discriminate.
   destruct (IS.for_all (global_defined p pm) u) eqn:DEF; inv TR.
-  exists u; split. 
+  exists u; split.
   apply used_globals_valid; auto.
   constructor; simpl; auto.
   intros. unfold prog_defmap; simpl. apply filter_globdefs_map.
-  apply filter_globdefs_unique_names. 
+  apply filter_globdefs_unique_names.
 Qed.
 
 (** * Semantic preservation *)
@@ -480,7 +480,7 @@ Lemma transform_find_symbol_1:
   forall id b,
   Genv.find_symbol ge id = Some b -> kept id -> exists b', Genv.find_symbol tge id = Some b'.
 Proof.
-  intros. 
+  intros.
   assert (A: exists g, (prog_defmap p)!id = Some g).
   { apply prog_defmap_dom. eapply Genv.find_symbol_inversion; eauto. }
   destruct A as (g & P).
@@ -493,13 +493,13 @@ Lemma transform_find_symbol_2:
   forall id b,
   Genv.find_symbol tge id = Some b -> kept id /\ exists b', Genv.find_symbol ge id = Some b'.
 Proof.
-  intros. 
+  intros.
   assert (A: exists g, (prog_defmap tp)!id = Some g).
   { apply prog_defmap_dom. eapply Genv.find_symbol_inversion; eauto. }
   destruct A as (g & P).
-  erewrite match_prog_def in P by eauto. 
+  erewrite match_prog_def in P by eauto.
   destruct (IS.mem id used) eqn:U; try discriminate.
-  split. apply IS.mem_2; auto. 
+  split. apply IS.mem_2; auto.
   apply Genv.find_symbol_exists with g.
   apply in_prog_defmap. auto.
 Qed.
@@ -564,7 +564,7 @@ Proof.
   auto.
 - exploit transform_find_symbol_1; eauto. intros (b' & F). exists b'; split; auto.
   eapply init_meminj_eq; eauto.
-- exploit transform_find_symbol_2; eauto. intros (K & b & F). 
+- exploit transform_find_symbol_2; eauto. intros (K & b & F).
   exists b; split; auto. eapply init_meminj_eq; eauto.
 - exploit init_meminj_invert; eauto. intros (A & id & B & C).
   assert (kept id) by (eapply transform_find_symbol_2; eauto).
@@ -573,7 +573,7 @@ Proof.
   assert ((prog_defmap tp)!id = Some gd).
   { erewrite match_prog_def by eauto. rewrite IS.mem_1 by auto. auto. }
   rewrite Genv.find_def_symbol in H3. destruct H3 as (b1 & P & Q).
-  fold tge in P. replace b' with b1 by congruence. split; auto. split; auto. 
+  fold tge in P. replace b' with b1 by congruence. split; auto. split; auto.
   intros. eapply kept_closed; eauto.
 - exploit init_meminj_invert; eauto. intros (A & id & B & C).
   assert ((prog_defmap tp)!id = Some gd).
@@ -616,7 +616,7 @@ Proof.
     rewrite <- Genv.find_var_info_iff in A. rewrite A; auto.
     destruct (Genv.find_var_info tge b2) as [gv|] eqn:V2; auto.
     rewrite Genv.find_var_info_iff in V2.
-    exploit defs_rev_inject; eauto. intros (A & B). 
+    exploit defs_rev_inject; eauto. intros (A & B).
     rewrite <- Genv.find_var_info_iff in A. congruence.
 Qed.
 
@@ -805,15 +805,15 @@ Proof.
 - exploit Genv.find_funct_inv; eauto. intros (b & R). rewrite R in H0.
   rewrite Genv.find_funct_find_funct_ptr in H0.
   specialize (H1 r). rewrite R in H1. inv H1.
-  rewrite Genv.find_funct_ptr_iff in H0.  
+  rewrite Genv.find_funct_ptr_iff in H0.
   exploit defs_inject; eauto. intros (A & B & C).
-  rewrite <- Genv.find_funct_ptr_iff in A. 
+  rewrite <- Genv.find_funct_ptr_iff in A.
   rewrite B; auto.
 - destruct (Genv.find_symbol ge id) as [b|] eqn:FS; try discriminate.
   exploit symbols_inject_2; eauto. intros (tb & P & Q). rewrite P.
-  rewrite Genv.find_funct_ptr_iff in H0.  
+  rewrite Genv.find_funct_ptr_iff in H0.
   exploit defs_inject; eauto. intros (A & B & C).
-  rewrite <- Genv.find_funct_ptr_iff in A. 
+  rewrite <- Genv.find_funct_ptr_iff in A.
   auto.
 Qed.
 
@@ -1057,7 +1057,7 @@ Proof.
   { induction l as [ | [id1 g1] l]; simpl; intros.
   - auto.
   - apply IHl. unfold Genv.add_global, P; simpl. intros LT. apply Plt_succ_inv in LT. destruct LT.
-  + rewrite PTree.gso. apply H; auto. apply Plt_ne; auto. 
+  + rewrite PTree.gso. apply H; auto. apply Plt_ne; auto.
   + rewrite H0. rewrite PTree.gss. exists g1; auto. }
   apply H. red; simpl; intros. exfalso; xomega.
 Qed.
@@ -1074,14 +1074,14 @@ Lemma init_meminj_invert_strong:
   /\ Genv.find_def tge b' = Some gd
   /\ (forall i, ref_def gd i -> kept i).
 Proof.
-  intros. exploit init_meminj_invert; eauto. intros (A & id & B & C). 
+  intros. exploit init_meminj_invert; eauto. intros (A & id & B & C).
   assert (exists gd, (prog_defmap p)!id = Some gd).
   { apply prog_defmap_dom. eapply Genv.find_symbol_inversion; eauto. }
   destruct H0 as [gd DM]. rewrite Genv.find_def_symbol in DM.
   destruct DM as (b'' & P & Q). fold ge in P. rewrite P in B; inv B.
   fold ge in Q. exploit defs_inject. apply init_meminj_preserves_globals.
-  eauto. eauto. intros (X & _ & Y). 
-  split. auto. exists id, gd; auto. 
+  eauto. eauto. intros (X & _ & Y).
+  split. auto. exists id, gd; auto.
 Qed.
 
 Section INIT_MEM.
@@ -1098,11 +1098,11 @@ Proof.
   induction il as [ | i1 il]; simpl; intros.
 - constructor.
 - apply list_forall2_app.
-+ destruct i1; simpl; try (apply inj_bytes_inject). 
++ destruct i1; simpl; try (apply inj_bytes_inject).
   induction (Z.to_nat z); simpl; constructor. constructor. auto.
   destruct (Genv.find_symbol ge i) as [b|] eqn:FS.
   assert (kept i). { apply H. red. exists i0; auto with coqlib. }
-  exploit symbols_inject_2. apply init_meminj_preserves_globals. eauto. eauto. 
+  exploit symbols_inject_2. apply init_meminj_preserves_globals. eauto. eauto.
   intros (b' & A & B). rewrite A. apply inj_value_inject.
   econstructor; eauto. symmetry; apply Ptrofs.add_zero.
   destruct (Genv.find_symbol tge i) as [b'|] eqn:FS'.
@@ -1123,7 +1123,7 @@ Proof.
 - inv H. rewrite inj_S in H1. destruct (zeq i p0).
 + congruence.
 + apply IHn with (p0 + 1); auto. omega. omega.
-Qed. 
+Qed.
 
 Lemma init_mem_inj_1:
   Mem.mem_inj init_meminj m tm.
@@ -1138,9 +1138,9 @@ Proof.
   apply Mem.perm_cur. auto.
 + intros (P2 & Q2 & R2 & S2) (P1 & Q1 & R1 & S1).
   apply Q1 in H0. destruct H0. subst.
-  apply Mem.perm_cur. eapply Mem.perm_implies; eauto. 
+  apply Mem.perm_cur. eapply Mem.perm_implies; eauto.
   apply P2. omega.
-- exploit init_meminj_invert; eauto. intros (A & id & B & C). 
+- exploit init_meminj_invert; eauto. intros (A & id & B & C).
   subst delta. apply Zdivide_0.
 - exploit init_meminj_invert_strong; eauto. intros (A & id & gd & B & C & D & E & F).
   exploit (Genv.init_mem_characterization_gen p); eauto.
@@ -1157,9 +1157,9 @@ Local Transparent Mem.loadbytes.
   generalize (S2 NO). unfold Mem.loadbytes. destruct Mem.range_perm_dec; intros E2; inv E2.
   rewrite Zplus_0_r.
   apply Mem_getN_forall2 with (p := 0) (n := nat_of_Z (init_data_list_size (gvar_init v))).
-  rewrite H3, H4. apply bytes_of_init_inject. auto. 
-  omega. 
-  rewrite nat_of_Z_eq by (apply init_data_list_size_pos). omega. 
+  rewrite H3, H4. apply bytes_of_init_inject. auto.
+  omega.
+  rewrite nat_of_Z_eq by (apply init_data_list_size_pos). omega.
 Qed.
 
 Lemma init_mem_inj_2:
@@ -1168,9 +1168,9 @@ Proof.
   constructor; intros.
 - apply init_mem_inj_1.
 - destruct (init_meminj b) as [[b' delta]|] eqn:INJ; auto.
-  elim H. exploit init_meminj_invert; eauto. intros (A & id & B & C). 
+  elim H. exploit init_meminj_invert; eauto. intros (A & id & B & C).
   eapply Genv.find_symbol_not_fresh; eauto.
-- exploit init_meminj_invert; eauto. intros (A & id & B & C). 
+- exploit init_meminj_invert; eauto. intros (A & id & B & C).
   eapply Genv.find_symbol_not_fresh; eauto.
 - red; intros.
   exploit init_meminj_invert. eexact H0. intros (A1 & id1 & B1 & C1).
@@ -1187,7 +1187,7 @@ Proof.
   left; apply Mem.perm_cur; auto.
 + intros (P2 & Q2 & R2 & S2) (P1 & Q1 & R1 & S1).
   apply Q2 in H0. destruct H0. subst.
-  left. apply Mem.perm_cur. eapply Mem.perm_implies; eauto. 
+  left. apply Mem.perm_cur. eapply Mem.perm_implies; eauto.
   apply P1. omega.
 Qed.
 
@@ -1198,7 +1198,7 @@ Lemma init_mem_exists:
   exists tm, Genv.init_mem tp = Some tm.
 Proof.
   intros. apply Genv.init_mem_exists.
-  intros. 
+  intros.
   assert (P: (prog_defmap tp)!id = Some (Gvar v)).
   { eapply prog_defmap_norepet; eauto. eapply match_prog_unique; eauto. }
   rewrite (match_prog_def _ _ _ TRANSF) in P. destruct (IS.mem id used) eqn:U; try discriminate.
@@ -1206,7 +1206,7 @@ Proof.
   split. auto.
   intros. exploit FV; eauto. intros (b & FS).
   apply transform_find_symbol_1 with b; auto.
-  apply kept_closed with id (Gvar v). 
+  apply kept_closed with id (Gvar v).
   apply IS.mem_2; auto. auto. red. red. exists o; auto.
 Qed.
 
@@ -1218,9 +1218,9 @@ Proof.
   intros.
   exploit init_mem_exists; eauto. intros [tm INIT].
   exists init_meminj, tm.
-  split. auto. 
-  split. eapply init_mem_inj_2; eauto. 
-  apply init_meminj_preserves_globals. 
+  split. auto.
+  split. eapply init_mem_inj_2; eauto.
+  apply init_meminj_preserves_globals.
 Qed.
 
 Lemma transf_initial_states:
@@ -1228,7 +1228,7 @@ Lemma transf_initial_states:
 Proof.
   intros. inv H. exploit init_mem_inject; eauto. intros (j & tm & A & B & C).
   exploit symbols_inject_2. eauto. eapply kept_main. eexact H1. intros (tb & P & Q).
-  rewrite Genv.find_funct_ptr_iff in H2. 
+  rewrite Genv.find_funct_ptr_iff in H2.
   exploit defs_inject. eauto. eexact Q. exact H2.
   intros (R & S & T).
   rewrite <- Genv.find_funct_ptr_iff in R.
@@ -1286,15 +1286,15 @@ Local Transparent Linker_def Linker_fundef Linker_varinit Linker_vardef Linker_u
   destruct (link_varinit init1 init2) as [init|] eqn:LI...
   destruct (eqb ro1 ro2) eqn:RO...
   destruct (eqb vo1 vo2) eqn:VO...
-  simpl. 
+  simpl.
   destruct info1, info2.
   assert (EITHER: init = init1 \/ init = init2).
-  { revert LI. unfold link_varinit. 
+  { revert LI. unfold link_varinit.
     destruct (classify_init init1), (classify_init init2); intro EQ; inv EQ; auto.
     destruct (zeq sz (Z.max sz0 0 + 0)); inv H0; auto.
     destruct (zeq sz (init_data_list_size il)); inv H0; auto.
     destruct (zeq sz (init_data_list_size il)); inv H0; auto. }
-  apply eqb_prop in RO. apply eqb_prop in VO. 
+  apply eqb_prop in RO. apply eqb_prop in VO.
   intro EQ; inv EQ. destruct EITHER; subst init; auto.
 Qed.
 
@@ -1339,7 +1339,7 @@ Proof.
 + (* common definition *)
   exploit Y; eauto. intros (PUB1 & PUB2 & _).
   exploit link_def_either; eauto. intros [EQ|EQ]; subst gd.
-* left. eapply used_closed. eexact V1. eapply used_public. eexact V1. eauto. eauto. auto. 
+* left. eapply used_closed. eexact V1. eapply used_public. eexact V1. eauto. eauto. auto.
 * right. eapply used_closed. eexact V2. eapply used_public. eexact V2. eauto. eauto. auto.
 + (* left definition *)
   inv H0. destruct (ISP.In_dec id used1).
@@ -1358,7 +1358,7 @@ Proof.
 + (* no definition *)
   auto.
 - simpl. rewrite ISF.union_iff; left; eapply used_main; eauto.
-- simpl. intros id. rewrite in_app_iff, ISF.union_iff. 
+- simpl. intros id. rewrite in_app_iff, ISF.union_iff.
   intros [A|A]; [left|right]; eapply used_public; eauto.
 - intros. rewrite ISF.union_iff in H.
   destruct (ident_eq id (prog_main p1)).
@@ -1387,16 +1387,16 @@ Theorem link_match_program:
 Proof.
   intros. destruct H0 as (used1 & A1 & B1). destruct H1 as (used2 & A2 & B2).
   destruct (link_prog_inv _ _ _ H) as (U & V & W).
-  econstructor; split. 
+  econstructor; split.
 - apply link_prog_succeeds.
 + rewrite (match_prog_main _ _ _ B1), (match_prog_main _ _ _ B2). auto.
-+ intros. 
++ intros.
   rewrite (match_prog_def _ _ _ B1) in H0.
   rewrite (match_prog_def _ _ _ B2) in H1.
   destruct (IS.mem id used1) eqn:U1; try discriminate.
   destruct (IS.mem id used2) eqn:U2; try discriminate.
   edestruct V as (X & Y & gd & Z); eauto.
-  split. rewrite (match_prog_public _ _ _ B1); auto. 
+  split. rewrite (match_prog_public _ _ _ B1); auto.
   split. rewrite (match_prog_public _ _ _ B2); auto.
   congruence.
 - exists (IS.union used1 used2); split.
@@ -1411,7 +1411,7 @@ Proof.
   destruct (prog_defmap p1)!id as [gd1|] eqn:GD1;
   destruct (prog_defmap p2)!id as [gd2|] eqn:GD2.
 - (* both defined *)
-  exploit V; eauto. intros (PUB1 & PUB2 & _). 
+  exploit V; eauto. intros (PUB1 & PUB2 & _).
   assert (EQ1: IS.mem id used1 = true) by (apply IS.mem_1; eapply used_public; eauto).
   assert (EQ2: IS.mem id used2 = true) by (apply IS.mem_1; eapply used_public; eauto).
   rewrite EQ1, EQ2; auto.
@@ -1428,7 +1428,7 @@ Proof.
 - (* none defined *)
   destruct (IS.mem id used1), (IS.mem id used2); auto.
 }
-* intros. apply PTree.elements_keys_norepet. 
+* intros. apply PTree.elements_keys_norepet.
 Qed.
 
 Instance TransfSelectionLink : TransfLink match_prog := link_match_program.
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index c89f8435..17a518cd 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -1468,7 +1468,7 @@ End SOUNDNESS.
 
 (** ** Extension to separate compilation *)
 
-(** Following Kang et al, POPL 2016, we now extend the results above 
+(** Following Kang et al, POPL 2016, we now extend the results above
   to the case where only one compilation unit is analyzed, and not the
   whole program.  *)
 
@@ -1485,14 +1485,14 @@ Inductive sound_state: state -> Prop :=
 Theorem sound_step:
   forall st t st', RTL.step ge st t st' -> sound_state st -> sound_state st'.
 Proof.
-  intros. inv H0. constructor; intros. eapply sound_step_base; eauto. 
+  intros. inv H0. constructor; intros. eapply sound_step_base; eauto.
 Qed.
 
 Remark sound_state_inv:
   forall st cunit,
   sound_state st -> linkorder cunit prog -> sound_state_base cunit ge st.
 Proof.
-  intros. inv H. eauto. 
+  intros. inv H. eauto.
 Qed.
 
 End LINKING.
@@ -1700,7 +1700,7 @@ Proof.
   rewrite PTree.gsspec in H2. destruct (peq id id1).
   inv H2. rewrite PTree.gss in H3. discriminate.
   assert (Plt b (Genv.genv_next g)) by (eapply Genv.genv_symb_range; eauto).
-  rewrite PTree.gso in H3 by (apply Plt_ne; auto). 
+  rewrite PTree.gso in H3 by (apply Plt_ne; auto).
   assert (Mem.valid_block m b) by (red; rewrite <- H; auto).
   assert (b <> b1) by (apply Mem.valid_not_valid_diff with m; eauto with mem).
   apply bmatch_inv with m.
@@ -1729,7 +1729,7 @@ Proof.
   intros. eapply Mem.loadbytes_drop; eauto.
   right; right; right. unfold Genv.perm_globvar. rewrite H4, H5. constructor.
 + assert (Plt b (Genv.genv_next g)) by (eapply Genv.genv_symb_range; eauto).
-  rewrite PTree.gso in H3 by (apply Plt_ne; auto). 
+  rewrite PTree.gso in H3 by (apply Plt_ne; auto).
   assert (Mem.valid_block m b) by (red; rewrite <- H; auto).
   assert (b <> b1) by (apply Mem.valid_not_valid_diff with m; eauto with mem).
   apply bmatch_inv with m3.
@@ -1773,14 +1773,14 @@ Lemma alloc_global_consistent:
 Proof.
   intros; red; intros. destruct idg as [id1 [f1 | v1]]; simpl in *.
 - rewrite PTree.grspec in H0. destruct (PTree.elt_eq id id1); try discriminate.
-  rewrite PTree.gso by auto. apply H; auto. 
+  rewrite PTree.gso by auto. apply H; auto.
 - destruct (gvar_readonly v1 && negb (gvar_volatile v1) && definitive_initializer (gvar_init v1)) eqn:RO.
 + InvBooleans. rewrite negb_true_iff in H4.
   rewrite PTree.gsspec in *. destruct (peq id id1).
 * inv H0. exists v1; auto.
 * apply H; auto.
 + rewrite PTree.grspec in H0. destruct (PTree.elt_eq id id1); try discriminate.
-  rewrite PTree.gso by auto. apply H; auto. 
+  rewrite PTree.gso by auto. apply H; auto.
 Qed.
 
 Lemma romem_for_consistent:
@@ -1802,7 +1802,7 @@ Proof.
   exploit (romem_for_consistent cunit); eauto. intros (v & DM & RO & VO & DEFN & AB).
   destruct (prog_defmap_linkorder _ _ _ _ H DM) as (gd & P & Q).
   assert (gd = Gvar v).
-  { inv Q. inv H2. simpl in *. f_equal. f_equal. 
+  { inv Q. inv H2. simpl in *. f_equal. f_equal.
     destruct info1, info2; auto.
     inv H3; auto; discriminate. }
   subst gd. exists v; auto.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index 4b782286..f905ffa2 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -425,7 +425,7 @@ Proof.
             cmatch (Val.cmpu_bool valid c (Vptr b1 ofs1) (Vptr b2 ofs2))
                    (Maybe (Ptrofs.cmpu c ofs1 ofs2))).
   {
-    intros. subst b2. simpl. destruct Archi.ptr64. 
+    intros. subst b2. simpl. destruct Archi.ptr64.
     constructor.
     rewrite dec_eq_true.
     destruct ((valid b1 (Ptrofs.unsigned ofs1) || valid b1 (Ptrofs.unsigned ofs1 - 1)) &&
@@ -1492,7 +1492,7 @@ Proof.
 - apply vmatch_uns. red; intros. rewrite Int.bits_rol by auto.
   generalize (Int.unsigned_range n); intros.
   rewrite Zmod_small by omega.
-  apply H1. omega. omega. 
+  apply H1. omega. omega.
 - destruct (zlt n0 Int.zwordsize); auto with va.
   apply vmatch_sgn. red; intros. rewrite ! Int.bits_rol by omega.
   generalize (Int.unsigned_range n); intros.
@@ -1732,7 +1732,7 @@ Proof.
     destruct (Int.ltu i0 Int64.iwordsize'); constructor. }
   unfold shift_long. destruct y; auto.
   destruct (Int.ltu n Int64.iwordsize') eqn:LT; auto.
-  destruct x; auto. 
+  destruct x; auto.
   inv H; inv H0. rewrite LT. constructor.
 Qed.
 
@@ -1966,6 +1966,19 @@ Proof.
   rewrite LTU; auto with va.
 Qed.
 
+Definition rolml (x: aval) (amount: int) (mask: int64) :=
+  andl (roll x (I amount)) (L mask).
+
+Lemma rolml_sound:
+  forall v x amount mask,
+  vmatch v x -> vmatch (Val.rolml v amount mask) (rolml x amount mask).
+Proof.
+  intros.
+  replace (Val.rolml v amount mask) with (Val.andl (Val.roll v (Vint amount)) (Vlong mask)).
+  apply andl_sound. apply roll_sound. auto. constructor. constructor.
+  destruct v; auto.
+Qed.
+
 (** Pointer operations *)
 
 Definition offset_ptr (v: aval) (n: ptrofs) :=
@@ -2101,7 +2114,7 @@ Proof.
   apply Z.min_case; auto with va.
 Qed.
 
-Definition longofint (v: aval) := 
+Definition longofint (v: aval) :=
   match v with
   | I i => L (Int64.repr (Int.signed i))
   | _ => ntop1 v
@@ -2113,7 +2126,7 @@ Proof.
   unfold Val.longofint, longofint; intros; inv H; auto with va.
 Qed.
 
-Definition longofintu (v: aval) := 
+Definition longofintu (v: aval) :=
   match v with
   | I i => L (Int64.repr (Int.unsigned i))
   | _ => ntop1 v
@@ -2637,7 +2650,7 @@ Proof.
   assert (IP: forall i b ofs,
     cmatch (Val.cmpu_bool valid c (Vint i) (Vptr b ofs)) (cmp_different_blocks c)).
   {
-    intros. simpl. destruct Archi.ptr64. 
+    intros. simpl. destruct Archi.ptr64.
     apply cmp_different_blocks_none.
     destruct (Int.eq i Int.zero && (valid b (Ptrofs.unsigned ofs) || valid b (Ptrofs.unsigned ofs - 1))).
     apply cmp_different_blocks_sound. apply cmp_different_blocks_none.
@@ -2645,7 +2658,7 @@ Proof.
   assert (PI: forall i b ofs,
     cmatch (Val.cmpu_bool valid c (Vptr b ofs) (Vint i)) (cmp_different_blocks c)).
   {
-    intros. simpl. destruct Archi.ptr64. 
+    intros. simpl. destruct Archi.ptr64.
     apply cmp_different_blocks_none.
     destruct (Int.eq i Int.zero && (valid b (Ptrofs.unsigned ofs) || valid b (Ptrofs.unsigned ofs - 1))).
     apply cmp_different_blocks_sound. apply cmp_different_blocks_none.
@@ -2942,7 +2955,7 @@ Proof with (auto using provenance_monotone with va).
 - destruct (zlt n2 16); constructor...
 - destruct ptr64...
 - destruct ptr64...
-- destruct ptr64... 
+- destruct ptr64...
 - destruct ptr64...
 - destruct ptr64...
 - destruct ptr64...
@@ -3511,7 +3524,7 @@ Proof.
 - unfold ablock_load_anywhere; intros; congruence.
 - assert (A: forall i, ZTree.get i (ab_contents ab1) = ZTree.get i (ab_contents ab2)).
   {
-    intros. exploit ZTree.beq_sound; eauto. instantiate (1 := i). 
+    intros. exploit ZTree.beq_sound; eauto. instantiate (1 := i).
     destruct (ab_contents ab1)##i, (ab_contents ab2)##i; intros; try contradiction.
     InvBooleans; subst; auto.
     auto. }
@@ -3569,7 +3582,7 @@ Proof.
   { exploit smatch_lub_l; eauto. instantiate (1 := ab_summary y).
     intros [SUMM _]. eapply vnormalize_cast; eauto. }
   exploit BM2; eauto.
-  unfold ablock_load; simpl. rewrite ZTree.gcombine by auto. 
+  unfold ablock_load; simpl. rewrite ZTree.gcombine by auto.
   unfold combine_acontents;
   destruct (ab_contents x)##ofs as [[chunkx avx]|], (ab_contents y)##ofs as [[chunky avy]|]; auto.
   destruct (chunk_eq chunkx chunky); auto. subst chunky.
@@ -3588,7 +3601,7 @@ Proof.
   { exploit smatch_lub_r; eauto. instantiate (1 := ab_summary x).
     intros [SUMM _]. eapply vnormalize_cast; eauto. }
   exploit BM2; eauto.
-  unfold ablock_load; simpl. rewrite ZTree.gcombine by auto. 
+  unfold ablock_load; simpl. rewrite ZTree.gcombine by auto.
   unfold combine_acontents;
   destruct (ab_contents x)##ofs as [[chunkx avx]|], (ab_contents y)##ofs as [[chunky avy]|]; auto.
   destruct (chunk_eq chunkx chunky); auto. subst chunky.
diff --git a/common/AST.v b/common/AST.v
index 34f29bb3..8a46a153 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -111,7 +111,7 @@ Definition cc_default :=
 
 Definition calling_convention_eq (x y: calling_convention) : {x=y} + {x<>y}.
 Proof.
-  decide equality; apply bool_dec. 
+  decide equality; apply bool_dec.
 Defined.
 Global Opaque calling_convention_eq.
 
@@ -301,7 +301,7 @@ Lemma prog_defmap_unique:
   ~In id (map fst defs2) ->
   (prog_defmap p)!id = Some g.
 Proof.
-  unfold prog_defmap; intros. rewrite H. apply PTree_Properties.of_list_unique; auto. 
+  unfold prog_defmap; intros. rewrite H. apply PTree_Properties.of_list_unique; auto.
 Qed.
 
 Lemma prog_defmap_norepet:
@@ -408,7 +408,7 @@ Proof.
               OK (List.map (transform_program_globdef transf_fun) l)).
   { induction l as [ | [id g] l]; simpl.
   - auto.
-  - destruct g; simpl; rewrite IHl; simpl. auto. destruct v; auto. 
+  - destruct g; simpl; rewrite IHl; simpl. auto. destruct v; auto.
   }
   rewrite EQ; simpl. auto.
 Qed.
@@ -563,7 +563,7 @@ End TRANSF_PARTIAL_FUNDEF.
 Set Contextual Implicit.
 
 (** In some intermediate languages (LTL, Mach), 64-bit integers can be
-  split into two 32-bit halves and held in a pair of registers.  
+  split into two 32-bit halves and held in a pair of registers.
   Syntactically, this is captured by the type [rpair] below. *)
 
 Inductive rpair (A: Type) : Type :=
@@ -589,7 +589,7 @@ Definition regs_of_rpair (A: Type) (p: rpair A): list A :=
   end.
 
 Fixpoint regs_of_rpairs (A: Type) (l: list (rpair A)): list A :=
-  match l with 
+  match l with
   | nil => nil
   | p :: l => regs_of_rpair p ++ regs_of_rpairs l
   end.
@@ -603,8 +603,8 @@ Qed.
 Lemma in_regs_of_rpairs_inv:
   forall (A: Type) (x: A) l, In x (regs_of_rpairs l) -> exists p, In p l /\ In x (regs_of_rpair p).
 Proof.
-  induction l; simpl; intros. contradiction. 
-  rewrite in_app_iff in H; destruct H. 
+  induction l; simpl; intros. contradiction.
+  rewrite in_app_iff in H; destruct H.
   exists a; auto.
   apply IHl in H. firstorder auto.
 Qed.
diff --git a/common/Events.v b/common/Events.v
index 97d4f072..14cd27c5 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -976,7 +976,7 @@ Proof.
   {
     intros.
     apply Mem.unchanged_on_implies with (fun b1 ofs1 => b1 <> b).
-    apply Mem.unchanged_on_trans with m'. 
+    apply Mem.unchanged_on_trans with m'.
     eapply Mem.alloc_unchanged_on; eauto.
     eapply Mem.store_unchanged_on; eauto.
     intros. eapply Mem.valid_not_valid_diff; eauto with mem.
@@ -997,7 +997,7 @@ Proof.
 (* mem extends *)
 - inv H. inv H1. inv H7.
   assert (SZ: v2 = Vptrofs sz).
-  { unfold Vptrofs in *. destruct Archi.ptr64; inv H5; auto. } 
+  { unfold Vptrofs in *. destruct Archi.ptr64; inv H5; auto. }
   subst v2.
   exploit Mem.alloc_extends; eauto. apply Zle_refl. apply Zle_refl.
   intros [m3' [A B]].
@@ -1009,7 +1009,7 @@ Proof.
 (* mem injects *)
 - inv H0. inv H2. inv H8.
   assert (SZ: v' = Vptrofs sz).
-  { unfold Vptrofs in *. destruct Archi.ptr64; inv H6; auto. } 
+  { unfold Vptrofs in *. destruct Archi.ptr64; inv H6; auto. }
   subst v'.
   exploit Mem.alloc_parallel_inject; eauto. apply Zle_refl. apply Zle_refl.
   intros [f' [m3' [b' [ALLOC [A [B [C D]]]]]]].
@@ -1036,7 +1036,7 @@ Proof.
     rewrite <- (Ptrofs.of_int64_to_int64 SF sz0), <- (Ptrofs.of_int64_to_int64 SF sz). congruence.
     rewrite <- (Ptrofs.of_int_to_int SF sz0), <- (Ptrofs.of_int_to_int SF sz). congruence.
   }
-  subst. 
+  subst.
   split. constructor. intuition congruence.
 Qed.
 
@@ -1093,7 +1093,7 @@ Proof.
     eapply Mem.free_range_perm; eauto.
   exploit Mem.address_inject; eauto.
     apply Mem.perm_implies with Freeable; auto with mem.
-    apply P. instantiate (1 := lo). 
+    apply P. instantiate (1 := lo).
     generalize (size_chunk_pos Mptr); omega.
   intro EQ.
   exploit Mem.free_parallel_inject; eauto. intros (m2' & C & D).
diff --git a/common/Globalenvs.v b/common/Globalenvs.v
index 9affd634..dd8a1eb9 100644
--- a/common/Globalenvs.v
+++ b/common/Globalenvs.v
@@ -259,7 +259,7 @@ Lemma add_globals_app:
   forall gl2 gl1 ge,
   add_globals ge (gl1 ++ gl2) = add_globals (add_globals ge gl1) gl2.
 Proof.
-  intros. apply fold_left_app. 
+  intros. apply fold_left_app.
 Qed.
 
 Program Definition empty_genv (pub: list ident): t :=
@@ -429,17 +429,17 @@ Proof.
   { induction l as [ | [id1 g1] l]; intros; simpl.
   - auto.
   - apply IHl. unfold P, add_global, find_symbol, find_def; simpl.
-    rewrite ! PTree.gsspec. destruct (peq id id1). 
+    rewrite ! PTree.gsspec. destruct (peq id id1).
     + subst id1. split; intros.
       inv H0. exists (genv_next ge); split; auto. apply PTree.gss.
       destruct H0 as (b & A & B). inv A. rewrite PTree.gss in B. auto.
     + red in H; rewrite H. split.
-      intros (b & A & B). exists b; split; auto. rewrite PTree.gso; auto. 
-      apply Plt_ne. eapply genv_symb_range; eauto. 
-      intros (b & A & B). rewrite PTree.gso in B. exists b; auto. 
-      apply Plt_ne. eapply genv_symb_range; eauto. 
+      intros (b & A & B). exists b; split; auto. rewrite PTree.gso; auto.
+      apply Plt_ne. eapply genv_symb_range; eauto.
+      intros (b & A & B). rewrite PTree.gso in B. exists b; auto.
+      apply Plt_ne. eapply genv_symb_range; eauto.
   }
-  apply REC. unfold P, find_symbol, find_def; simpl. 
+  apply REC. unfold P, find_symbol, find_def; simpl.
   rewrite ! PTree.gempty. split.
   congruence.
   intros (b & A & B); congruence.
@@ -770,12 +770,12 @@ Remark store_init_data_perm:
   store_init_data m b p i = Some m' ->
   (Mem.perm m b' q k prm <-> Mem.perm m' b' q k prm).
 Proof.
-  intros. 
+  intros.
   assert (forall chunk v,
           Mem.store chunk m b p v = Some m' ->
           (Mem.perm m b' q k prm <-> Mem.perm m' b' q k prm)).
     intros; split; eauto with mem.
-  destruct i; simpl in H; eauto. 
+  destruct i; simpl in H; eauto.
   inv H; tauto.
   destruct (find_symbol ge i); try discriminate. eauto.
 Qed.
@@ -788,7 +788,7 @@ Proof.
   induction idl as [ | i1 idl]; simpl; intros.
 - inv H; tauto.
 - destruct (store_init_data m b p i1) as [m1|] eqn:S1; try discriminate.
-  transitivity (Mem.perm m1 b' q k prm). 
+  transitivity (Mem.perm m1 b' q k prm).
   eapply store_init_data_perm; eauto.
   eapply IHidl; eauto.
 Qed.
@@ -849,8 +849,8 @@ Proof.
   intros until n. functional induction (store_zeros m b p n); intros.
 - inv H; apply Mem.unchanged_on_refl.
 - apply Mem.unchanged_on_trans with m'.
-+ eapply Mem.store_unchanged_on; eauto. simpl. intros. apply H0. omega. 
-+ apply IHo; auto. intros; apply H0; omega. 
++ eapply Mem.store_unchanged_on; eauto. simpl. intros. apply H0. omega.
++ apply IHo; auto. intros; apply H0; omega.
 - discriminate.
 Qed.
 
@@ -878,7 +878,7 @@ Proof.
 - inv H. apply Mem.unchanged_on_refl.
 - destruct (store_init_data m b p a) as [m1|] eqn:?; try congruence.
   apply Mem.unchanged_on_trans with m1.
-  eapply store_init_data_unchanged; eauto. intros; apply H0; tauto. 
+  eapply store_init_data_unchanged; eauto. intros; apply H0; tauto.
   eapply IHil; eauto. intros; apply H0. generalize (init_data_size_pos a); omega.
 Qed.
 
@@ -947,8 +947,8 @@ Proof.
   intros; destruct i; simpl in H; try apply (Mem.loadbytes_store_same _ _ _ _ _ _ H).
 - inv H. simpl.
   assert (EQ: Z.of_nat (Z.to_nat z) = Z.max z 0).
-  { destruct (zle 0 z). rewrite Z2Nat.id; xomega. destruct z; try discriminate. simpl. xomega. } 
-  rewrite <- EQ. apply H0. omega. simpl. omega. 
+  { destruct (zle 0 z). rewrite Z2Nat.id; xomega. destruct z; try discriminate. simpl. xomega. }
+  rewrite <- EQ. apply H0. omega. simpl. omega.
 - rewrite init_data_size_addrof. simpl.
   destruct (find_symbol ge i) as [b'|]; try discriminate.
   rewrite (Mem.loadbytes_store_same _ _ _ _ _ _ H).
@@ -976,14 +976,14 @@ Proof.
   eapply store_init_data_list_unchanged; eauto.
   intros; omega.
   intros; omega.
-  eapply store_init_data_loadbytes; eauto. 
+  eapply store_init_data_loadbytes; eauto.
   red; intros; apply H0. omega. omega.
   apply IHil with m1; auto.
-  red; intros. 
+  red; intros.
   eapply Mem.loadbytes_unchanged_on with (P := fun b1 ofs1 => p + init_data_size i1 <= ofs1).
-  eapply store_init_data_unchanged; eauto. 
+  eapply store_init_data_unchanged; eauto.
+  intros; omega.
   intros; omega.
-  intros; omega. 
   apply H0. omega. omega.
   auto. auto.
 Qed.
@@ -1010,9 +1010,9 @@ Remark read_as_zero_unchanged:
   (forall i, ofs <= i < ofs + len -> P b i) ->
   read_as_zero m' b ofs len.
 Proof.
-  intros; red; intros. eapply Mem.load_unchanged_on; eauto. 
-  intros; apply H1. omega. 
-Qed. 
+  intros; red; intros. eapply Mem.load_unchanged_on; eauto.
+  intros; apply H1. omega.
+Qed.
 
 Lemma store_zeros_read_as_zero:
   forall m b p n m',
@@ -1078,7 +1078,7 @@ Proof.
   exploit IHil; eauto.
   set (P := fun (b': block) ofs' => p + init_data_size a <= ofs').
   apply read_as_zero_unchanged with (m := m) (P := P).
-  red; intros; apply H0; auto. generalize (init_data_size_pos a); omega. omega. 
+  red; intros; apply H0; auto. generalize (init_data_size_pos a); omega. omega.
   eapply store_init_data_unchanged with (P := P); eauto.
   intros; unfold P. omega.
   intros; unfold P. omega.
@@ -1094,7 +1094,7 @@ Proof.
   set (P := fun (b': block) ofs' => ofs' < p + init_data_size (Init_space z)).
   inv Heqo. apply read_as_zero_unchanged with (m := m1) (P := P).
   red; intros. apply H0; auto. simpl. generalize (init_data_list_size_pos il); xomega.
-  eapply store_init_data_list_unchanged; eauto. 
+  eapply store_init_data_list_unchanged; eauto.
   intros; unfold P. omega.
   intros; unfold P. simpl; xomega.
 + rewrite init_data_size_addrof in *.
@@ -1118,7 +1118,7 @@ Proof.
   apply Mem.unchanged_on_implies with Q.
   apply Mem.unchanged_on_trans with m1.
   eapply Mem.alloc_unchanged_on; eauto.
-  eapply Mem.drop_perm_unchanged_on; eauto. 
+  eapply Mem.drop_perm_unchanged_on; eauto.
   intros; red. apply Mem.valid_not_valid_diff with m; eauto with mem.
 - (* variable *)
   set (init := gvar_init v) in *.
@@ -1133,8 +1133,8 @@ Proof.
   apply Mem.unchanged_on_trans with m2.
   eapply store_zeros_unchanged; eauto.
   apply Mem.unchanged_on_trans with m3.
-  eapply store_init_data_list_unchanged; eauto. 
-  eapply Mem.drop_perm_unchanged_on; eauto. 
+  eapply store_init_data_list_unchanged; eauto.
+  eapply Mem.drop_perm_unchanged_on; eauto.
   intros; red. apply Mem.valid_not_valid_diff with m; eauto with mem.
 Qed.
 
@@ -1147,7 +1147,7 @@ Proof.
 - inv H. apply Mem.unchanged_on_refl.
 - destruct (alloc_global m a) as [m''|] eqn:?; try discriminate.
   destruct a as [id g].
-  apply Mem.unchanged_on_trans with m''. 
+  apply Mem.unchanged_on_trans with m''.
   eapply alloc_global_unchanged; eauto.
   apply IHgl; auto.
 Qed.
@@ -1196,7 +1196,7 @@ Proof.
   exploit Mem.alloc_result; eauto. intros RES.
   rewrite H, <- RES. split.
   eapply Mem.perm_drop_1; eauto. omega.
-  intros. 
+  intros.
   assert (0 <= ofs < 1). { eapply Mem.perm_alloc_3; eauto. eapply Mem.perm_drop_4; eauto. }
   exploit Mem.perm_drop_2; eauto. intros ORD.
   split. omega. inv ORD; auto.
@@ -1210,35 +1210,35 @@ Proof.
   split. red; intros. eapply Mem.perm_drop_1; eauto.
   split. intros.
   assert (0 <= ofs < sz).
-  { eapply Mem.perm_alloc_3; eauto. 
+  { eapply Mem.perm_alloc_3; eauto.
     erewrite store_zeros_perm by eauto.
-    erewrite store_init_data_list_perm by eauto. 
+    erewrite store_init_data_list_perm by eauto.
     eapply Mem.perm_drop_4; eauto. }
   split; auto.
   eapply Mem.perm_drop_2; eauto.
   split. intros NOTVOL. apply load_store_init_data_invariant with m3.
-  intros. eapply Mem.load_drop; eauto. right; right; right. 
+  intros. eapply Mem.load_drop; eauto. right; right; right.
   unfold perm_globvar. rewrite NOTVOL. destruct (gvar_readonly v); auto with mem.
-  eapply store_init_data_list_charact; eauto. 
+  eapply store_init_data_list_charact; eauto.
   eapply store_zeros_read_as_zero; eauto.
-  intros NOTVOL. 
-  transitivity (Mem.loadbytes m3 b 0 sz). 
+  intros NOTVOL.
+  transitivity (Mem.loadbytes m3 b 0 sz).
   eapply Mem.loadbytes_drop; eauto. right; right; right.
   unfold perm_globvar. rewrite NOTVOL. destruct (gvar_readonly v); auto with mem.
   eapply store_init_data_list_loadbytes; eauto.
   eapply store_zeros_loadbytes; eauto.
 + assert (U: Mem.unchanged_on (fun _ _ => True) m m') by (eapply alloc_global_unchanged; eauto).
   assert (VALID: Mem.valid_block m b).
-  { red. rewrite <- H. eapply genv_defs_range; eauto. } 
+  { red. rewrite <- H. eapply genv_defs_range; eauto. }
   exploit H1; eauto.
   destruct gd0 as [f|v].
 * intros [A B]; split; intros.
   eapply Mem.perm_unchanged_on; eauto. exact I.
   eapply B. eapply Mem.perm_unchanged_on_2; eauto. exact I.
-* intros (A & B & C & D). split; [| split; [| split]]. 
+* intros (A & B & C & D). split; [| split; [| split]].
   red; intros. eapply Mem.perm_unchanged_on; eauto. exact I.
   intros. eapply B. eapply Mem.perm_unchanged_on_2; eauto. exact I.
-  intros. apply load_store_init_data_invariant with m; auto. 
+  intros. apply load_store_init_data_invariant with m; auto.
   intros. eapply Mem.load_unchanged_on_1; eauto. intros; exact I.
   intros. eapply Mem.loadbytes_unchanged_on; eauto. intros; exact I.
 - simpl. congruence.
@@ -1312,7 +1312,7 @@ Lemma init_mem_characterization_gen:
   init_mem p = Some m ->
   globals_initialized (globalenv p) (globalenv p) m.
 Proof.
-  intros. apply alloc_globals_initialized with Mem.empty. 
+  intros. apply alloc_globals_initialized with Mem.empty.
   auto.
   rewrite Mem.nextblock_empty. auto.
   red; intros. unfold find_def in H0; simpl in H0; rewrite PTree.gempty in H0; discriminate.
@@ -1499,7 +1499,7 @@ Proof.
   { intros. apply Mem.store_valid_access_3 in H0. destruct H0. congruence. }
   destruct i; simpl in H; eauto.
   simpl. apply Z.divide_1_l.
-  destruct (find_symbol ge i); try discriminate. eapply DFL. eassumption. 
+  destruct (find_symbol ge i); try discriminate. eapply DFL. eassumption.
   unfold Mptr, init_data_alignment; destruct Archi.ptr64; auto.
 Qed.
 
@@ -1537,14 +1537,14 @@ Theorem init_mem_inversion:
   init_data_list_aligned 0 v.(gvar_init)
   /\ forall i o, In (Init_addrof i o) v.(gvar_init) -> exists b, find_symbol (globalenv p) i = Some b.
 Proof.
-  intros until v. unfold init_mem. set (ge := globalenv p). 
-  revert m. generalize Mem.empty. generalize (prog_defs p). 
+  intros until v. unfold init_mem. set (ge := globalenv p).
+  revert m. generalize Mem.empty. generalize (prog_defs p).
   induction l as [ | idg1 defs ]; simpl; intros m m'; intros.
 - contradiction.
 - destruct (alloc_global ge m idg1) as [m''|] eqn:A; try discriminate.
   destruct H0.
-+ subst idg1; simpl in A. 
-  set (il := gvar_init v) in *. set (sz := init_data_list_size il) in *. 
++ subst idg1; simpl in A.
+  set (il := gvar_init v) in *. set (sz := init_data_list_size il) in *.
   destruct (Mem.alloc m 0 sz) as [m1 b].
   destruct (store_zeros m1 b 0 sz) as [m2|]; try discriminate.
   destruct (store_init_data_list ge m2 b 0 il) as [m3|] eqn:B; try discriminate.
@@ -1565,7 +1565,7 @@ Proof.
 - exists m; auto.
 - apply IHo. red; intros. eapply Mem.perm_store_1; eauto. apply PERM. omega.
 - destruct (Mem.valid_access_store m Mint8unsigned b p Vzero) as (m' & STORE).
-  split. red; intros. apply Mem.perm_cur. apply PERM. simpl in H. omega. 
+  split. red; intros. apply Mem.perm_cur. apply PERM. simpl in H. omega.
   simpl. apply Z.divide_1_l.
   congruence.
 Qed.
@@ -1577,17 +1577,17 @@ Lemma store_init_data_exists:
   (forall id ofs, i = Init_addrof id ofs -> exists b, find_symbol ge id = Some b) ->
   exists m', store_init_data ge m b p i = Some m'.
 Proof.
-  intros. 
+  intros.
   assert (DFL: forall chunk v,
           init_data_size i = size_chunk chunk ->
           init_data_alignment i = align_chunk chunk ->
           exists m', Mem.store chunk m b p v = Some m').
   { intros. destruct (Mem.valid_access_store m chunk b p v) as (m' & STORE).
-    split. rewrite <- H2; auto. rewrite <- H3; auto. 
+    split. rewrite <- H2; auto. rewrite <- H3; auto.
     exists m'; auto. }
   destruct i; eauto.
   simpl. exists m; auto.
-  simpl. exploit H1; eauto. intros (b1 & FS). rewrite FS. eapply DFL. 
+  simpl. exploit H1; eauto. intros (b1 & FS). rewrite FS. eapply DFL.
   unfold init_data_size, Mptr. destruct Archi.ptr64; auto.
   unfold init_data_alignment, Mptr. destruct Archi.ptr64; auto.
 Qed.
@@ -1601,11 +1601,11 @@ Lemma store_init_data_list_exists:
 Proof.
   induction il as [ | i1 il ]; simpl; intros.
 - exists m; auto.
-- destruct H0. 
+- destruct H0.
   destruct (@store_init_data_exists m b p i1) as (m1 & S1); eauto.
   red; intros. apply H. generalize (init_data_list_size_pos il); omega.
-  rewrite S1. 
-  apply IHil; eauto. 
+  rewrite S1.
+  apply IHil; eauto.
   red; intros. erewrite <- store_init_data_perm by eauto. apply H. generalize (init_data_size_pos i1); omega.
 Qed.
 
@@ -1622,7 +1622,7 @@ Proof.
   intros m [id [f|v]]; intros; simpl.
 - destruct (Mem.alloc m 0 1) as [m1 b] eqn:ALLOC.
   destruct (Mem.range_perm_drop_2 m1 b 0 1 Nonempty) as [m2 DROP].
-  red; intros; eapply Mem.perm_alloc_2; eauto. 
+  red; intros; eapply Mem.perm_alloc_2; eauto.
   exists m2; auto.
 - destruct H as [P Q].
   set (sz := init_data_list_size (gvar_init v)).
@@ -1651,14 +1651,14 @@ Theorem init_mem_exists:
      /\ forall i o, In (Init_addrof i o) v.(gvar_init) -> exists b, find_symbol (globalenv p) i = Some b) ->
   exists m, init_mem p = Some m.
 Proof.
-  intros. set (ge := globalenv p) in *. 
+  intros. set (ge := globalenv p) in *.
   unfold init_mem. revert H. generalize (prog_defs p) Mem.empty.
   induction l as [ | idg l]; simpl; intros.
 - exists m; auto.
 - destruct (@alloc_global_exists ge m idg) as [m1 A1].
   destruct idg as [id [f|v]]; eauto.
-  fold ge. rewrite A1. eapply IHl; eauto. 
-Qed. 
+  fold ge. rewrite A1. eapply IHl; eauto.
+Qed.
 
 End GENV.
 
@@ -1685,8 +1685,8 @@ Lemma add_global_match:
 Proof.
   intros. destruct H. constructor; simpl; intros.
 - congruence.
-- rewrite mge_next0, ! PTree.gsspec. destruct (peq id0 id); auto. 
-- rewrite mge_next0, ! PTree.gsspec. destruct (peq b (genv_next ge1)). 
+- rewrite mge_next0, ! PTree.gsspec. destruct (peq id0 id); auto.
+- rewrite mge_next0, ! PTree.gsspec. destruct (peq b (genv_next ge1)).
   constructor; auto.
   auto.
 Qed.
@@ -1718,7 +1718,7 @@ Hypothesis progmatch: match_program_gen match_fundef match_varinfo ctx p tp.
 Lemma globalenvs_match:
   match_genvs (match_globdef match_fundef match_varinfo ctx) (globalenv p) (globalenv tp).
 Proof.
-  intros. apply add_globals_match. apply progmatch. 
+  intros. apply add_globals_match. apply progmatch.
   constructor; simpl; intros; auto. rewrite ! PTree.gempty. constructor.
 Qed.
 
@@ -1734,7 +1734,7 @@ Theorem find_def_match:
   find_def (globalenv tp) b = Some tg /\ match_globdef match_fundef match_varinfo ctx g tg.
 Proof.
   intros. generalize (find_def_match_2 b). rewrite H; intros R; inv R.
-  exists y; auto. 
+  exists y; auto.
 Qed.
 
 Theorem find_funct_ptr_match:
@@ -1743,8 +1743,8 @@ Theorem find_funct_ptr_match:
   exists cunit tf,
   find_funct_ptr (globalenv tp) b = Some tf /\ match_fundef cunit f tf /\ linkorder cunit ctx.
 Proof.
-  intros. rewrite find_funct_ptr_iff in *. apply find_def_match in H. 
-  destruct H as (tg & P & Q). inv Q. 
+  intros. rewrite find_funct_ptr_iff in *. apply find_def_match in H.
+  destruct H as (tg & P & Q). inv Q.
   exists ctx', f2; intuition auto. apply find_funct_ptr_iff; auto.
 Qed.
 
@@ -1766,8 +1766,8 @@ Theorem find_var_info_match:
   exists tv,
   find_var_info (globalenv tp) b = Some tv /\ match_globvar match_varinfo v tv.
 Proof.
-  intros. rewrite find_var_info_iff in *. apply find_def_match in H. 
-  destruct H as (tg & P & Q). inv Q. 
+  intros. rewrite find_var_info_iff in *. apply find_def_match in H.
+  destruct H as (tg & P & Q). inv Q.
   exists v2; split; auto. apply find_var_info_iff; auto.
 Qed.
 
@@ -1783,10 +1783,10 @@ Theorem senv_match:
 Proof.
   red; simpl. repeat split.
 - apply find_symbol_match.
-- intros. unfold public_symbol. rewrite find_symbol_match. 
-  rewrite ! globalenv_public. 
+- intros. unfold public_symbol. rewrite find_symbol_match.
+  rewrite ! globalenv_public.
   destruct progmatch as (P & Q & R). rewrite R. auto.
-- intros. unfold block_is_volatile. 
+- intros. unfold block_is_volatile.
   destruct globalenvs_match as [P Q R]. specialize (R b).
   unfold find_var_info, find_def.
   inv R; auto.
@@ -1820,7 +1820,7 @@ Proof.
   { destruct a1 as [id1 g1]; destruct b1 as [id2 g2]; destruct H; simpl in *.
     subst id2. inv H2.
   - auto.
-  - inv H; simpl in *. 
+  - inv H; simpl in *.
     set (sz := init_data_list_size init) in *.
     destruct (Mem.alloc m 0 sz) as [m2 b] eqn:?.
     destruct (store_zeros m2 b 0 sz) as [m3|] eqn:?; try discriminate.
@@ -1853,7 +1853,7 @@ Theorem find_funct_ptr_transf_partial:
   exists tf,
   find_funct_ptr (globalenv tp) b = Some tf /\ transf f = OK tf.
 Proof.
-  intros. exploit (find_funct_ptr_match progmatch); eauto. 
+  intros. exploit (find_funct_ptr_match progmatch); eauto.
   intros (cu & tf & P & Q & R); exists tf; auto.
 Qed.
 
@@ -1863,7 +1863,7 @@ Theorem find_funct_transf_partial:
   exists tf,
   find_funct (globalenv tp) v = Some tf /\ transf f = OK tf.
 Proof.
-  intros. exploit (find_funct_match progmatch); eauto. 
+  intros. exploit (find_funct_match progmatch); eauto.
   intros (cu & tf & P & Q & R); exists tf; auto.
 Qed.
 
@@ -1871,7 +1871,7 @@ Theorem find_symbol_transf_partial:
   forall (s : ident),
   find_symbol (globalenv tp) s = find_symbol (globalenv p) s.
 Proof.
-  intros. eapply (find_symbol_match progmatch). 
+  intros. eapply (find_symbol_match progmatch).
 Qed.
 
 Theorem senv_transf_partial:
@@ -1901,7 +1901,7 @@ Theorem find_funct_ptr_transf:
   find_funct_ptr (globalenv p) b = Some f ->
   find_funct_ptr (globalenv tp) b = Some (transf f).
 Proof.
-  intros. exploit (find_funct_ptr_match progmatch); eauto. 
+  intros. exploit (find_funct_ptr_match progmatch); eauto.
   intros (cu & tf & P & Q & R). congruence.
 Qed.
 
@@ -1910,7 +1910,7 @@ Theorem find_funct_transf:
   find_funct (globalenv p) v = Some f ->
   find_funct (globalenv tp) v = Some (transf f).
 Proof.
-  intros. exploit (find_funct_match progmatch); eauto. 
+  intros. exploit (find_funct_match progmatch); eauto.
   intros (cu & tf & P & Q & R). congruence.
 Qed.
 
diff --git a/common/Linking.v b/common/Linking.v
index 52e774db..eaa95462 100644
--- a/common/Linking.v
+++ b/common/Linking.v
@@ -111,13 +111,13 @@ Inductive linkorder_varinit: list init_data -> list init_data -> Prop :=
 
 Instance Linker_varinit : Linker (list init_data) := {
   link := link_varinit;
-  linkorder := linkorder_varinit    
+  linkorder := linkorder_varinit
 }.
 Proof.
 - intros. constructor.
 - intros. inv H; inv H0; constructor; auto.
   congruence.
-  simpl. generalize (init_data_list_size_pos z). xomega. 
+  simpl. generalize (init_data_list_size_pos z). xomega.
 - unfold link_varinit; intros until z.
   destruct (classify_init x) eqn:Cx, (classify_init y) eqn:Cy; intros E; inv E; try (split; constructor; fail).
 + destruct (zeq sz (Z.max sz0 0 + 0)); inv H0.
@@ -159,7 +159,7 @@ Instance Linker_vardef (V: Type) {LV: Linker V}: Linker (globvar V) := {
   linkorder := linkorder_vardef
 }.
 Proof.
-- intros. destruct x; constructor; apply linkorder_refl. 
+- intros. destruct x; constructor; apply linkorder_refl.
 - intros. inv H; inv H0. constructor; eapply linkorder_trans; eauto.
 - unfold link_vardef; intros until z.
   destruct x as [f1 i1 r1 v1], y as [f2 i2 r2 v2]; simpl.
@@ -214,7 +214,7 @@ Instance Linker_def (F V: Type) {LF: Linker F} {LV: Linker V}: Linker (globdef F
 Proof.
 - intros. destruct x; constructor; apply linkorder_refl.
 - intros. inv H; inv H0; constructor; eapply linkorder_trans; eauto.
-- unfold link_def; intros.  
+- unfold link_def; intros.
   destruct x as [f1|v1], y as [f2|v2]; try discriminate.
 + destruct (link f1 f2) as [f|] eqn:L; inv H. apply link_linkorder in L.
   split; constructor; tauto.
@@ -229,7 +229,7 @@ Global Opaque Linker_def.
   a global definition in one unit but not in the other, this definition
   is left unchanged in the result of the link.  If a name has
   global definitions in both units, and is public (not static) in both,
-  the two definitions are linked as per [Linker_def] above.  
+  the two definitions are linked as per [Linker_def] above.
 
   If one or both definitions are static (not public), we should ideally
   rename it so that it can be kept unchanged in the result of the link.
@@ -284,8 +284,8 @@ Proof.
   unfold link_prog; intros p E.
   destruct (ident_eq (prog_main p1) (prog_main p2)); try discriminate.
   destruct (PTree_Properties.for_all dm1 link_prog_check) eqn:C; inv E.
-  rewrite PTree_Properties.for_all_correct in C. 
-  split; auto. split; auto. 
+  rewrite PTree_Properties.for_all_correct in C.
+  split; auto. split; auto.
   intros. exploit C; eauto. unfold link_prog_check. rewrite H0. intros.
   destruct (in_dec peq id (prog_public p1)); try discriminate.
   destruct (in_dec peq id (prog_public p2)); try discriminate.
@@ -303,7 +303,7 @@ Lemma link_prog_succeeds:
             prog_public := p1.(prog_public) ++ p2.(prog_public);
             prog_defs := PTree.elements (PTree.combine link_prog_merge dm1 dm2) |}.
 Proof.
-  intros. unfold link_prog. unfold proj_sumbool. rewrite H, dec_eq_true. simpl. 
+  intros. unfold link_prog. unfold proj_sumbool. rewrite H, dec_eq_true. simpl.
   replace (PTree_Properties.for_all dm1 link_prog_check) with true; auto.
   symmetry. apply PTree_Properties.for_all_correct; intros. rename a into gd1.
   unfold link_prog_check. destruct dm2!x as [gd2|] eqn:G2; auto.
@@ -334,29 +334,29 @@ Instance Linker_prog (F V: Type) {LF: Linker F} {LV: Linker V} : Linker (program
 }.
 Proof.
 - intros; split; auto. split. apply incl_refl. intros.
-  exists gd1; split; auto. split; auto. apply linkorder_refl. 
+  exists gd1; split; auto. split; auto. apply linkorder_refl.
 
-- intros x y z (A1 & B1 & C1) (A2 & B2 & C2). 
-  split. congruence. split. red; eauto. 
-  intros. exploit C1; eauto. intros (gd2 & P & Q & R). 
-  exploit C2; eauto. intros (gd3 & U & X & Y). 
-  exists gd3. split; auto. split. eapply linkorder_trans; eauto. 
-  intros. transitivity gd2. apply Y. auto. apply R. red; intros; elim H0; auto. 
+- intros x y z (A1 & B1 & C1) (A2 & B2 & C2).
+  split. congruence. split. red; eauto.
+  intros. exploit C1; eauto. intros (gd2 & P & Q & R).
+  exploit C2; eauto. intros (gd3 & U & X & Y).
+  exists gd3. split; auto. split. eapply linkorder_trans; eauto.
+  intros. transitivity gd2. apply Y. auto. apply R. red; intros; elim H0; auto.
 
 - intros. apply link_prog_inv in H. destruct H as (L1 & L2 & L3).
   subst z; simpl. intuition auto.
 + red; intros; apply in_app_iff; auto.
-+ rewrite prog_defmap_elements, PTree.gcombine, H by auto. 
++ rewrite prog_defmap_elements, PTree.gcombine, H by auto.
   destruct (prog_defmap y)!id as [gd2|] eqn:GD2; simpl.
 * exploit L2; eauto. intros (P & Q & gd & R).
-  exists gd; split. auto. split. apply link_linkorder in R; tauto. 
+  exists gd; split. auto. split. apply link_linkorder in R; tauto.
   rewrite in_app_iff; tauto.
 * exists gd1; split; auto. split. apply linkorder_refl. auto.
 + red; intros; apply in_app_iff; auto.
-+ rewrite prog_defmap_elements, PTree.gcombine, H by auto. 
++ rewrite prog_defmap_elements, PTree.gcombine, H by auto.
   destruct (prog_defmap x)!id as [gd2|] eqn:GD2; simpl.
 * exploit L2; eauto. intros (P & Q & gd & R).
-  exists gd; split. auto. split. apply link_linkorder in R; tauto. 
+  exists gd; split. auto. split. apply link_linkorder in R; tauto.
   rewrite in_app_iff; tauto.
 * exists gd1; split; auto. split. apply linkorder_refl. auto.
 Defined.
@@ -417,24 +417,24 @@ Theorem match_program_defmap:
   forall ctx p1 p2, match_program_gen ctx p1 p2 ->
   forall id, option_rel (match_globdef ctx) (prog_defmap p1)!id (prog_defmap p2)!id.
 Proof.
-  intros. apply PTree_Properties.of_list_related. apply H. 
+  intros. apply PTree_Properties.of_list_related. apply H.
 Qed.
 
 Lemma match_program_gen_main:
   forall ctx p1 p2, match_program_gen ctx p1 p2 -> p2.(prog_main) = p1.(prog_main).
 Proof.
-  intros. apply H. 
+  intros. apply H.
 Qed.
 
 Lemma match_program_public:
   forall ctx p1 p2, match_program_gen ctx p1 p2 -> p2.(prog_public) = p1.(prog_public).
 Proof.
-  intros. apply H. 
+  intros. apply H.
 Qed.
 
 End MATCH_PROGRAM_GENERIC.
 
-(** In many cases, the context for [match_program_gen] is the source program or 
+(** In many cases, the context for [match_program_gen] is the source program or
   source compilation unit itself.  We provide a specialized definition for this case. *)
 
 Definition match_program {F1 V1 F2 V2: Type} {LF: Linker F1} {LV: Linker V1}
@@ -464,7 +464,7 @@ Lemma match_program_implies:
   (forall v w, match_varinfo1 v w -> match_varinfo2 v w) ->
   match_program match_fundef2 match_varinfo2 p p'.
 Proof.
-  intros. destruct H as [P Q]. split; auto. 
+  intros. destruct H as [P Q]. split; auto.
   eapply list_forall2_imply; eauto.
   intros. inv H3. split; auto. inv H5.
   econstructor; eauto.
@@ -488,12 +488,12 @@ Theorem match_transform_partial_program2:
   match_program_gen match_fundef match_varinfo ctx p tp.
 Proof.
   unfold transform_partial_program2; intros. monadInv H.
-  red; simpl; split; auto. 
-  revert x EQ. generalize (prog_defs p). 
+  red; simpl; split; auto.
+  revert x EQ. generalize (prog_defs p).
   induction l as [ | [i g] l]; simpl; intros.
 - monadInv EQ. constructor.
 - destruct g as [f|v].
-+ destruct (transf_fun i f) as [tf|?] eqn:TF; monadInv EQ. 
++ destruct (transf_fun i f) as [tf|?] eqn:TF; monadInv EQ.
   constructor; auto. split; simpl; auto. econstructor. apply linkorder_refl. eauto.
 + destruct (transf_globvar transf_var i v) as [tv|?] eqn:TV; monadInv EQ.
   constructor; auto. split; simpl; auto. constructor.
@@ -509,7 +509,7 @@ Theorem match_transform_partial_program_contextual:
   (forall f tf, transf_fun f = OK tf -> match_fundef p f tf) ->
   match_program match_fundef eq p tp.
 Proof.
-  intros. 
+  intros.
   eapply match_transform_partial_program2. eexact H.
   auto.
   simpl; intros. congruence.
@@ -523,8 +523,8 @@ Theorem match_transform_program_contextual:
   (forall f, match_fundef p f (transf_fun f)) ->
   match_program match_fundef eq p (transform_program transf_fun p).
 Proof.
-  intros. 
-  eapply match_transform_partial_program_contextual. 
+  intros.
+  eapply match_transform_partial_program_contextual.
   apply transform_program_partial_program with (transf_fun := transf_fun).
   simpl; intros. inv H0. auto.
 Qed.
@@ -582,11 +582,11 @@ Lemma link_match_globvar:
   match_globvar match_varinfo v1 tv1 -> match_globvar match_varinfo v2 tv2 ->
   exists tv, link tv1 tv2 = Some tv /\ match_globvar match_varinfo v tv.
 Proof.
-  simpl; intros. unfold link_vardef in *. inv H0; inv H1; simpl in *. 
+  simpl; intros. unfold link_vardef in *. inv H0; inv H1; simpl in *.
   destruct (link i1 i0) as [info'|] eqn:LINFO; try discriminate.
   destruct (link init init0) as [init'|] eqn:LINIT; try discriminate.
   destruct (eqb ro ro0 && eqb vo vo0); inv H.
-  exploit link_match_varinfo; eauto. intros (tinfo & P & Q). rewrite P. 
+  exploit link_match_varinfo; eauto. intros (tinfo & P & Q). rewrite P.
   econstructor; split. eauto. constructor. auto.
 Qed.
 
@@ -603,7 +603,7 @@ Proof.
   exploit link_match_fundef; eauto. intros (tf & P & Q).
   assert (X: exists ctx', linkorder ctx' ctx /\ match_fundef ctx' f tf).
   { destruct Q as [Q|Q]; econstructor; (split; [|eassumption]).
-    apply linkorder_trans with ctx1; auto. 
+    apply linkorder_trans with ctx1; auto.
     apply linkorder_trans with ctx2; auto. }
   destruct X as (cu & X & Y).
   exists (Gfun tf); split. rewrite P; auto. econstructor; eauto.
@@ -618,7 +618,7 @@ Lemma match_globdef_linkorder:
   linkorder ctx ctx' ->
   match_globdef match_fundef match_varinfo ctx' g tg.
 Proof.
-  intros. inv H. 
+  intros. inv H.
 - econstructor. eapply linkorder_trans; eauto. auto.
 - constructor; auto.
 Qed.
@@ -635,25 +635,25 @@ Proof.
   generalize H0; intros (A1 & B1 & C1).
   generalize H1; intros (A2 & B2 & C2).
   econstructor; split.
-- apply link_prog_succeeds. 
-+ congruence. 
-+ intros. 
+- apply link_prog_succeeds.
++ congruence.
++ intros.
   generalize (match_program_defmap _ _ _ _ _ H0 id) (match_program_defmap _ _ _ _ _ H1 id).
   rewrite H4, H5. intros R1 R2; inv R1; inv R2.
   exploit Q; eauto. intros (X & Y & gd & Z).
-  exploit link_match_globdef. eexact H2. eexact H3. eauto. eauto. eauto. 
+  exploit link_match_globdef. eexact H2. eexact H3. eauto. eauto. eauto.
   intros (tg & TL & _). intuition congruence.
 - split; [|split].
-+ rewrite R. apply PTree.elements_canonical_order'. intros id. 
++ rewrite R. apply PTree.elements_canonical_order'. intros id.
   rewrite ! PTree.gcombine by auto.
   generalize (match_program_defmap _ _ _ _ _ H0 id) (match_program_defmap _ _ _ _ _ H1 id).
   clear R. intros R1 R2; inv R1; inv R2; unfold link_prog_merge.
 * constructor.
-* constructor. apply match_globdef_linkorder with ctx2; auto. 
+* constructor. apply match_globdef_linkorder with ctx2; auto.
 * constructor. apply match_globdef_linkorder with ctx1; auto.
 * exploit Q; eauto. intros (X & Y & gd & Z).
-  exploit link_match_globdef. eexact H2. eexact H3. eauto. eauto. eauto. 
-  intros (tg & TL & MG). rewrite Z, TL. constructor; auto. 
+  exploit link_match_globdef. eexact H2. eexact H3. eauto. eauto. eauto.
+  intros (tg & TL & MG). rewrite Z, TL. constructor; auto.
 + rewrite R; simpl; auto.
 + rewrite R; simpl. congruence.
 Qed.
@@ -674,7 +674,7 @@ Remark link_transf_partial_fundef:
   link f1 f2 = Some f ->
   transf_partial_fundef tr1 f1 = OK tf1 ->
   transf_partial_fundef tr2 f2 = OK tf2 ->
-  exists tf, 
+  exists tf,
       link tf1 tf2 = Some tf
   /\ (transf_partial_fundef tr1 f = OK tf \/ transf_partial_fundef tr2 f = OK tf).
 Proof.
@@ -683,7 +683,7 @@ Local Transparent Linker_fundef.
 - discriminate.
 - destruct ef2; inv H. exists (Internal x); split; auto. left; simpl; rewrite EQ; auto.
 - destruct ef1; inv H. exists (Internal x); split; auto. right; simpl; rewrite EQ; auto.
-- destruct (external_function_eq ef1 ef2); inv H. exists (External ef2); split; auto. simpl. rewrite dec_eq_true; auto. 
+- destruct (external_function_eq ef1 ef2); inv H. exists (External ef2); split; auto. simpl. rewrite dec_eq_true; auto.
 Qed.
 
 Instance TransfPartialContextualLink
@@ -697,8 +697,8 @@ Instance TransfPartialContextualLink
 Proof.
   red. intros. destruct (link_linkorder _ _ _ H) as [LO1 LO2].
   eapply link_match_program; eauto.
-- intros. eapply link_transf_partial_fundef; eauto. 
-- intros; subst. exists v; auto. 
+- intros. eapply link_transf_partial_fundef; eauto.
+- intros; subst. exists v; auto.
 Qed.
 
 Instance TransfPartialLink
@@ -711,8 +711,8 @@ Instance TransfPartialLink
 Proof.
   red. intros. destruct (link_linkorder _ _ _ H) as [LO1 LO2].
   eapply link_match_program; eauto.
-- intros. eapply link_transf_partial_fundef; eauto. 
-- intros; subst. exists v; auto. 
+- intros. eapply link_transf_partial_fundef; eauto.
+- intros; subst. exists v; auto.
 Qed.
 
 Instance TransfTotallContextualLink
@@ -726,12 +726,12 @@ Instance TransfTotallContextualLink
 Proof.
   red. intros. destruct (link_linkorder _ _ _ H) as [LO1 LO2].
   eapply link_match_program; eauto.
-- intros. subst. destruct f1, f2; simpl in *. 
+- intros. subst. destruct f1, f2; simpl in *.
 + discriminate.
-+ destruct e; inv H2. econstructor; eauto. 
-+ destruct e; inv H2. econstructor; eauto. 
-+ destruct (external_function_eq e e0); inv H2. econstructor; eauto. 
-- intros; subst. exists v; auto. 
++ destruct e; inv H2. econstructor; eauto.
++ destruct e; inv H2. econstructor; eauto.
++ destruct (external_function_eq e e0); inv H2. econstructor; eauto.
+- intros; subst. exists v; auto.
 Qed.
 
 Instance TransfTotalLink
@@ -744,12 +744,12 @@ Instance TransfTotalLink
 Proof.
   red. intros. destruct (link_linkorder _ _ _ H) as [LO1 LO2].
   eapply link_match_program; eauto.
-- intros. subst. destruct f1, f2; simpl in *. 
+- intros. subst. destruct f1, f2; simpl in *.
 + discriminate.
-+ destruct e; inv H2. econstructor; eauto. 
-+ destruct e; inv H2. econstructor; eauto. 
-+ destruct (external_function_eq e e0); inv H2. econstructor; eauto. 
-- intros; subst. exists v; auto. 
++ destruct e; inv H2. econstructor; eauto.
++ destruct e; inv H2. econstructor; eauto.
++ destruct (external_function_eq e e0); inv H2. econstructor; eauto.
+- intros; subst. exists v; auto.
 Qed.
 
 (** * Linking a set of compilation units *)
@@ -794,7 +794,7 @@ Theorem link_list_match:
   exists b, link_list bl = Some b /\ prog_match a b.
 Proof.
   induction 1; simpl; intros a' L.
-- inv L. exists b; auto. 
+- inv L. exists b; auto.
 - destruct (link_list l) as [a1|] eqn:LL; try discriminate.
   exploit IHnlist_forall2; eauto. intros (b' & P & Q).
   red in TL. exploit TL; eauto. intros (b'' & U & V).
@@ -829,7 +829,7 @@ Program Definition pass_identity (l: Language): Pass l l :=
   {| pass_match := fun p1 p2 => p1 = p2;
      pass_match_link := _ |}.
 Next Obligation.
-  red; intros. subst. exists p; auto. 
+  red; intros. subst. exists p; auto.
 Defined.
 
 Program Definition pass_compose {l1 l2 l3: Language} (pass: Pass l1 l2) (pass': Pass l2 l3) : Pass l1 l3 :=
@@ -875,7 +875,7 @@ Lemma nlist_forall2_compose_inv:
   exists lb: nlist B, nlist_forall2 R1 la lb /\ nlist_forall2 R2 lb lc.
 Proof.
   induction 1.
-- rename b into c. destruct H as (b & P & Q). 
+- rename b into c. destruct H as (b & P & Q).
   exists (nbase b); split; constructor; auto.
 - rename b into c. destruct H as (b & P & Q). destruct IHnlist_forall2 as (lb & U & V).
   exists (ncons b lb); split; constructor; auto.
@@ -898,8 +898,8 @@ Proof.
 - apply nlist_forall2_compose_inv in F2. destruct F2 as (interm_units & P & Q).
   edestruct (@link_list_match _ _ (lang_link l1) (lang_link l2) (pass_match p))
   as (interm_prog & U & V).
-  apply pass_match_link. eauto. eauto.  
+  apply pass_match_link. eauto. eauto.
   exploit IHpasses; eauto. intros (tgt_prog & X & Y).
-  exists tgt_prog; split; auto. exists interm_prog; auto. 
+  exists tgt_prog; split; auto. exists interm_prog; auto.
 Qed.
 
diff --git a/common/Memdata.v b/common/Memdata.v
index 87547e1e..0aed4644 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -1062,7 +1062,7 @@ Lemma encode_val_int64:
      encode_val Mint32 (if Archi.big_endian then Val.hiword v else Val.loword v)
   ++ encode_val Mint32 (if Archi.big_endian then Val.loword v else Val.hiword v).
 Proof.
-  intros. unfold encode_val. rewrite H. 
+  intros. unfold encode_val. rewrite H.
   destruct v; destruct Archi.big_endian eqn:BI; try reflexivity;
   unfold Val.loword, Val.hiword, encode_val.
   unfold inj_bytes. rewrite <- map_app. f_equal.
diff --git a/common/Memory.v b/common/Memory.v
index 764fdc26..8bb69c02 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -912,7 +912,7 @@ Proof.
   rewrite Ptrofs.add_unsigned.
   replace (Ptrofs.unsigned (Ptrofs.of_int (Int.repr 4))) with (Int.unsigned (Int.repr 4))
     by (symmetry; apply Ptrofs.agree32_of_int; auto).
-  change (Int.unsigned (Int.repr 4)) with 4. 
+  change (Int.unsigned (Int.repr 4)) with 4.
   apply Ptrofs.unsigned_repr.
   exploit (Zdivide_interval (Ptrofs.unsigned i) Ptrofs.modulus 8).
   omega. apply Ptrofs.unsigned_range. auto.
@@ -934,7 +934,7 @@ Proof.
   exploit load_int64_split; eauto. intros (v1 & v2 & L1 & L2 & EQ).
   unfold Val.add; rewrite H0.
   assert (NV: Ptrofs.unsigned (Ptrofs.add i (Ptrofs.of_int (Int.repr 4))) = Ptrofs.unsigned i + 4).
-  { apply addressing_int64_split; auto. 
+  { apply addressing_int64_split; auto.
     exploit load_valid_access. eexact H2. intros [P Q]. auto. }
   exists v1, v2.
 Opaque Ptrofs.repr.
@@ -1519,7 +1519,7 @@ Qed.
 Theorem loadbytes_storebytes_same:
   loadbytes m2 b ofs (Z_of_nat (length bytes)) = Some bytes.
 Proof.
-  intros. assert (STORE2:=STORE). unfold storebytes in STORE2. unfold loadbytes. 
+  intros. assert (STORE2:=STORE). unfold storebytes in STORE2. unfold loadbytes.
   destruct (range_perm_dec m1 b ofs (ofs + Z_of_nat (length bytes)) Cur Writable);
   try discriminate.
   rewrite pred_dec_true.
@@ -1829,7 +1829,7 @@ Proof.
   intros. exploit load_result; eauto. intro. rewrite H0.
   injection ALLOC; intros. rewrite <- H2; simpl. rewrite <- H1.
   rewrite PMap.gss. destruct (size_chunk_nat_pos chunk) as [n E]. rewrite E. simpl.
-  rewrite ZMap.gi. apply decode_val_undef. 
+  rewrite ZMap.gi. apply decode_val_undef.
 Qed.
 
 Theorem load_alloc_same':
@@ -2930,7 +2930,7 @@ Proof.
   rewrite (nextblock_store _ _ _ _ _ _ H0). auto.
   eapply store_outside_inj; eauto.
   unfold inject_id; intros. inv H2. eapply H1; eauto. omega.
-  intros. eauto using perm_store_2. 
+  intros. eauto using perm_store_2.
 Qed.
 
 Theorem storev_extends:
@@ -2982,7 +2982,7 @@ Proof.
   rewrite (nextblock_storebytes _ _ _ _ _ H0). auto.
   eapply storebytes_outside_inj; eauto.
   unfold inject_id; intros. inv H2. eapply H1; eauto. omega.
-  intros. eauto using perm_storebytes_2. 
+  intros. eauto using perm_storebytes_2.
 Qed.
 
 Theorem alloc_extends:
@@ -3017,7 +3017,7 @@ Proof.
   intros. eapply perm_alloc_inv in H; eauto.
   generalize (perm_alloc_inv _ _ _ _ _ H0 b0 ofs Max Nonempty); intros PERM.
   destruct (eq_block b0 b).
-  subst b0. 
+  subst b0.
   assert (EITHER: lo1 <= ofs < hi1 \/ ~(lo1 <= ofs < hi1)) by omega.
   destruct EITHER.
   left. apply perm_implies with Freeable; auto with mem. eapply perm_alloc_2; eauto.
@@ -3034,7 +3034,7 @@ Proof.
   intros. inv H. constructor.
   rewrite (nextblock_free _ _ _ _ _ H0). auto.
   eapply free_left_inj; eauto.
-  intros. exploit mext_perm_inv0; eauto. intros [A|A]. 
+  intros. exploit mext_perm_inv0; eauto. intros [A|A].
   eapply perm_free_inv in A; eauto. destruct A as [[A B]|A]; auto.
   subst b0. right; eapply perm_free_2; eauto.
   intuition eauto using perm_free_3.
@@ -3051,7 +3051,7 @@ Proof.
   rewrite (nextblock_free _ _ _ _ _ H0). auto.
   eapply free_right_inj; eauto.
   unfold inject_id; intros. inv H. eapply H1; eauto. omega.
-  intros. eauto using perm_free_3. 
+  intros. eauto using perm_free_3.
 Qed.
 
 Theorem free_parallel_extends:
@@ -3498,7 +3498,7 @@ Proof.
   intros. eapply mi_representable; try eassumption.
   destruct H4; eauto with mem.
 (* perm inv *)
-  intros. exploit mi_perm_inv0; eauto using perm_store_2. 
+  intros. exploit mi_perm_inv0; eauto using perm_store_2.
   intuition eauto using perm_store_1, perm_store_2.
 Qed.
 
@@ -3523,7 +3523,7 @@ Proof.
   intros. eapply mi_representable; try eassumption.
   destruct H3; eauto with mem.
 (* perm inv *)
-  intros. exploit mi_perm_inv0; eauto using perm_store_2. 
+  intros. exploit mi_perm_inv0; eauto using perm_store_2.
   intuition eauto using perm_store_1, perm_store_2.
 Qed.
 
@@ -3594,7 +3594,7 @@ Proof.
   intros. eapply mi_representable0; eauto.
   destruct H4; eauto using perm_storebytes_2.
 (* perm inv *)
-  intros. exploit mi_perm_inv0; eauto using perm_storebytes_2. 
+  intros. exploit mi_perm_inv0; eauto using perm_storebytes_2.
   intuition eauto using perm_storebytes_1, perm_storebytes_2.
 Qed.
 
@@ -3668,7 +3668,7 @@ Proof.
   intros. eapply mi_representable0; eauto.
   destruct H3; eauto using perm_storebytes_2.
 (* perm inv *)
-  intros. exploit mi_perm_inv0; eauto using perm_storebytes_2. 
+  intros. exploit mi_perm_inv0; eauto using perm_storebytes_2.
   intuition eauto using perm_storebytes_1, perm_storebytes_2.
 Qed.
 
@@ -3694,7 +3694,7 @@ Proof.
   auto.
 (* perm inv *)
   intros. eapply perm_alloc_inv in H2; eauto. destruct (eq_block b0 b2).
-  subst b0. eelim fresh_block_alloc; eauto. 
+  subst b0. eelim fresh_block_alloc; eauto.
   eapply mi_perm_inv0; eauto.
 Qed.
 
@@ -3741,7 +3741,7 @@ Proof.
   destruct H4; eauto using perm_alloc_4.
 (* perm inv *)
   intros. unfold f' in H3; destruct (eq_block b0 b1); try discriminate.
-  exploit mi_perm_inv0; eauto. 
+  exploit mi_perm_inv0; eauto.
   intuition eauto using perm_alloc_1, perm_alloc_4.
 (* incr *)
   split. auto.
@@ -3892,7 +3892,7 @@ Proof.
 (* perm inv *)
   intros. exploit mi_perm_inv0; eauto. intuition eauto using perm_free_3.
   eapply perm_free_inv in H4; eauto. destruct H4 as [[A B] | A]; auto.
-  subst b1. right; eapply perm_free_2; eauto. 
+  subst b1. right; eapply perm_free_2; eauto.
 Qed.
 
 Lemma free_list_left_inject:
@@ -4080,7 +4080,7 @@ Proof.
   destruct H0; eauto using perm_inj.
   rewrite H. omega.
 (* perm inv *)
-  intros. 
+  intros.
   destruct (f b1) as [[b' delta'] |] eqn:?; try discriminate.
   destruct (f' b') as [[b'' delta''] |] eqn:?; try discriminate.
   inversion H; clear H; subst b'' delta.
@@ -4163,7 +4163,7 @@ Proof.
   eapply mem_inj_compose; eauto.
   apply extensionality; intros. unfold compose_meminj, inject_id. auto.
   (* perm inv *)
-  exploit mext_perm_inv1; eauto. intros [A|A]. 
+  exploit mext_perm_inv1; eauto. intros [A|A].
   eapply mext_perm_inv0; eauto.
   right; red; intros; elim A. eapply perm_extends; eauto.
 Qed.
@@ -4305,7 +4305,7 @@ Lemma perm_unchanged_on:
   unchanged_on m m' -> P b ofs ->
   perm m b ofs k p -> perm m' b ofs k p.
 Proof.
-  intros. destruct H. apply unchanged_on_perm0; auto. eapply perm_valid_block; eauto. 
+  intros. destruct H. apply unchanged_on_perm0; auto. eapply perm_valid_block; eauto.
 Qed.
 
 Lemma perm_unchanged_on_2:
@@ -4324,7 +4324,7 @@ Proof.
 - intros. transitivity (perm m2 b ofs k p); apply unchanged_on_perm; auto.
   eapply valid_block_unchanged_on; eauto.
 - intros. transitivity (ZMap.get ofs (mem_contents m2)#b); apply unchanged_on_contents; auto.
-  eapply perm_unchanged_on; eauto. 
+  eapply perm_unchanged_on; eauto.
 Qed.
 
 Lemma loadbytes_unchanged_on_1:
@@ -4462,13 +4462,13 @@ Proof.
 - rewrite (nextblock_drop _ _ _ _ _ _ H). apply Ple_refl.
 - split; intros. eapply perm_drop_3; eauto.
   destruct (eq_block b0 b); auto.
-  subst b0. 
+  subst b0.
   assert (~ (lo <= ofs < hi)). { red; intros; eelim H0; eauto. }
   right; omega.
-  eapply perm_drop_4; eauto. 
-- unfold drop_perm in H. 
+  eapply perm_drop_4; eauto.
+- unfold drop_perm in H.
   destruct (range_perm_dec m b lo hi Cur Freeable); inv H; simpl. auto.
-Qed. 
+Qed.
 
 End UNCHANGED_ON.
 
@@ -4480,9 +4480,9 @@ Lemma unchanged_on_implies:
 Proof.
   intros. destruct H. constructor; intros.
 - auto.
-- apply unchanged_on_perm0; auto. 
-- apply unchanged_on_contents0; auto. 
-  apply H0; auto. eapply perm_valid_block; eauto. 
+- apply unchanged_on_perm0; auto.
+- apply unchanged_on_contents0; auto.
+  apply H0; auto. eapply perm_valid_block; eauto.
 Qed.
 
 End Mem.
diff --git a/common/Separation.v b/common/Separation.v
index c0a3c9cf..c27148aa 100644
--- a/common/Separation.v
+++ b/common/Separation.v
@@ -18,7 +18,7 @@
 (** This library defines a number of useful logical assertions about
   CompCert memory states, such as "block [b] at offset [ofs] contains
   value [v]".  Assertions can be grouped using a separating conjunction
-  operator in the style of separation logic.  
+  operator in the style of separation logic.
 
   Currently, this library is used only in module [Stackingproof]
   to reason about the shapes of stack frames generated during the
@@ -84,7 +84,7 @@ Qed.
 
 Remark massert_eqv_trans: forall p q r, massert_eqv p q -> massert_eqv q r -> massert_eqv p r.
 Proof.
-  unfold massert_eqv, massert_imp; intros. firstorder auto. 
+  unfold massert_eqv, massert_imp; intros. firstorder auto.
 Qed.
 
 (** Record [massert_eqv] and [massert_imp] as relations so that they can be used by rewriting tactics. *)
@@ -139,7 +139,7 @@ Add Morphism sepconj
 Proof.
   intros P1 P2 [A B] Q1 Q2 [C D].
   red; simpl; split; intros.
-- intuition auto. red; intros. apply (H2 b ofs); auto. 
+- intuition auto. red; intros. apply (H2 b ofs); auto.
 - intuition auto.
 Qed.
 
@@ -147,7 +147,7 @@ Add Morphism sepconj
   with signature massert_eqv ==> massert_eqv ==> massert_eqv
   as sepconj_morph_2.
 Proof.
-  intros. destruct H, H0. split; apply sepconj_morph_1; auto. 
+  intros. destruct H, H0. split; apply sepconj_morph_1; auto.
 Qed.
 
 Infix "**" := sepconj (at level 60, right associativity) : sep_scope.
@@ -155,7 +155,7 @@ Infix "**" := sepconj (at level 60, right associativity) : sep_scope.
 Local Open Scope sep_scope.
 
 Lemma sep_imp:
-  forall P P' Q Q' m, 
+  forall P P' Q Q' m,
   m |= P ** Q -> massert_imp P P' -> massert_imp Q Q' -> m |= P' ** Q'.
 Proof.
   intros. rewrite <- H0, <- H1; auto.
@@ -249,7 +249,7 @@ Lemma sep_drop2:
   forall P Q R m, m |= P ** Q ** R -> m |= P ** R.
 Proof.
   intros. rewrite sep_swap in H. eapply sep_drop; eauto.
-Qed. 
+Qed.
 
 Lemma sep_proj1:
   forall Q P m, m |= P ** Q -> m |= P.
@@ -263,25 +263,25 @@ Proof sep_drop.
 
 Definition sep_pick1 := sep_proj1.
 
-Lemma sep_pick2: 
+Lemma sep_pick2:
   forall P Q R m, m |= P ** Q ** R -> m |= Q.
 Proof.
   intros. eapply sep_proj1; eapply sep_proj2; eauto.
 Qed.
 
-Lemma sep_pick3: 
+Lemma sep_pick3:
   forall P Q R S m, m |= P ** Q ** R ** S -> m |= R.
 Proof.
   intros. eapply sep_pick2; eapply sep_proj2; eauto.
 Qed.
 
-Lemma sep_pick4: 
+Lemma sep_pick4:
   forall P Q R S T m, m |= P ** Q ** R ** S ** T -> m |= S.
 Proof.
   intros. eapply sep_pick3; eapply sep_proj2; eauto.
 Qed.
 
-Lemma sep_pick5: 
+Lemma sep_pick5:
   forall P Q R S T U m, m |= P ** Q ** R ** S ** T ** U -> m |= T.
 Proof.
   intros. eapply sep_pick4; eapply sep_proj2; eauto.
@@ -337,7 +337,7 @@ Lemma alloc_rule:
   m |= P ->
   m' |= range b lo hi ** P.
 Proof.
-  intros; simpl. split; [|split]. 
+  intros; simpl. split; [|split].
 - split; auto. split; auto. intros.
   apply Mem.perm_implies with Freeable; auto with mem.
   eapply Mem.perm_alloc_2; eauto.
@@ -352,7 +352,7 @@ Lemma range_split:
   m |= range b lo hi ** P ->
   m |= range b lo mid ** range b mid hi ** P.
 Proof.
-  intros. rewrite <- sep_assoc. eapply sep_imp; eauto. 
+  intros. rewrite <- sep_assoc. eapply sep_imp; eauto.
   split; simpl; intros.
 - intuition auto.
 + omega.
@@ -425,8 +425,8 @@ Next Obligation.
 - exists v. split; auto. eapply Mem.load_unchanged_on; eauto. simpl; auto.
 Qed.
 Next Obligation.
-  eauto with mem. 
-Qed. 
+  eauto with mem.
+Qed.
 
 Lemma contains_no_overflow:
   forall spec m chunk b ofs,
@@ -466,10 +466,10 @@ Proof.
   destruct (Mem.valid_access_store _ _ _ _ v H) as [m' STORE].
   exists m'; split; auto. simpl. intuition auto.
 - eapply Mem.store_valid_access_1; eauto.
-- exists (Val.load_result chunk v); split; auto. eapply Mem.load_store_same; eauto. 
-- apply (m_invar P) with m; auto. 
-  eapply Mem.store_unchanged_on; eauto. 
-  intros; red; intros. apply (C b i); simpl; auto. 
+- exists (Val.load_result chunk v); split; auto. eapply Mem.load_store_same; eauto.
+- apply (m_invar P) with m; auto.
+  eapply Mem.store_unchanged_on; eauto.
+  intros; red; intros. apply (C b i); simpl; auto.
 Qed.
 
 Lemma storev_rule:
@@ -523,7 +523,7 @@ Lemma store_rule':
   exists m',
   Mem.store chunk m b ofs v = Some m' /\ m' |= hasvalue chunk b ofs (Val.load_result chunk v) ** P.
 Proof.
-  intros. eapply store_rule; eauto. 
+  intros. eapply store_rule; eauto.
 Qed.
 
 Lemma storev_rule':
@@ -542,7 +542,7 @@ Program Definition mconj (P Q: massert) : massert := {|
   m_footprint := fun b ofs => m_footprint P b ofs \/ m_footprint Q b ofs
 |}.
 Next Obligation.
-  split. 
+  split.
   apply (m_invar P) with m; auto. eapply Mem.unchanged_on_implies; eauto. simpl; auto.
   apply (m_invar Q) with m; auto. eapply Mem.unchanged_on_implies; eauto. simpl; auto.
 Qed.
@@ -586,7 +586,7 @@ Lemma frame_mconj:
 Proof.
   intros. destruct H as (A & B & C); simpl in A.
   destruct H0 as (D & E & F).
-  simpl. intuition auto. 
+  simpl. intuition auto.
   red; simpl; intros. destruct H2. eapply F; eauto. eapply C; simpl; eauto.
 Qed.
 
@@ -602,7 +602,7 @@ Add Morphism mconj
   with signature massert_eqv ==> massert_eqv ==> massert_eqv
   as mconj_morph_2.
 Proof.
-  intros. destruct H, H0. split; apply mconj_morph_1; auto. 
+  intros. destruct H, H0. split; apply mconj_morph_1; auto.
 Qed.
 
 (** The image of a memory injection *)
@@ -615,8 +615,8 @@ Next Obligation.
   set (img := fun b' ofs => exists b delta, j b = Some(b', delta) /\ Mem.perm m0 b (ofs - delta) Max Nonempty) in *.
   assert (IMG: forall b1 b2 delta ofs k p,
            j b1 = Some (b2, delta) -> Mem.perm m0 b1 ofs k p -> img b2 (ofs + delta)).
-  { intros. red. exists b1, delta; split; auto. 
-    replace (ofs + delta - delta) with ofs by omega. 
+  { intros. red. exists b1, delta; split; auto.
+    replace (ofs + delta - delta) with ofs by omega.
     eauto with mem. }
   destruct H. constructor.
 - destruct mi_inj. constructor; intros.
@@ -624,11 +624,11 @@ Next Obligation.
 + eauto.
 + rewrite (Mem.unchanged_on_contents _ _ _ H0); eauto.
 - assumption.
-- intros. eapply Mem.valid_block_unchanged_on; eauto. 
+- intros. eapply Mem.valid_block_unchanged_on; eauto.
 - assumption.
 - assumption.
 - intros. destruct (Mem.perm_dec m0 b1 ofs Max Nonempty); auto.
-  eapply mi_perm_inv; eauto. 
+  eapply mi_perm_inv; eauto.
   eapply Mem.perm_unchanged_on_2; eauto.
 Qed.
 Next Obligation.
@@ -666,8 +666,8 @@ Proof.
 - apply (m_invar P) with m2; auto.
   eapply Mem.store_unchanged_on; eauto.
   intros; red; intros. eelim C; eauto. simpl.
-  exists b1, delta; split; auto. destruct VALID as [V1 V2]. 
-  apply Mem.perm_cur_max. apply Mem.perm_implies with Writable; auto with mem. 
+  exists b1, delta; split; auto. destruct VALID as [V1 V2].
+  apply Mem.perm_cur_max. apply Mem.perm_implies with Writable; auto with mem.
   apply V1. omega.
 - red; simpl; intros. destruct H1 as (b0 & delta0 & U & V).
   eelim C; eauto. simpl. exists b0, delta0; eauto with mem.
@@ -695,41 +695,41 @@ Proof.
   assert (FRESH2: ~Mem.valid_block m2 b2) by (eapply Mem.fresh_block_alloc; eauto).
   destruct SEP as (INJ & SP & DISJ). simpl in INJ.
   exploit Mem.alloc_left_mapped_inject.
-- eapply Mem.alloc_right_inject; eauto. 
+- eapply Mem.alloc_right_inject; eauto.
 - eexact ALLOC1.
 - instantiate (1 := b2). eauto with mem.
 - instantiate (1 := delta). xomega.
 - intros. assert (0 <= ofs < sz2) by (eapply Mem.perm_alloc_3; eauto). omega.
-- intros. apply Mem.perm_implies with Freeable; auto with mem. 
+- intros. apply Mem.perm_implies with Freeable; auto with mem.
   eapply Mem.perm_alloc_2; eauto. xomega.
-- red; intros. apply Zdivides_trans with 8; auto. 
+- red; intros. apply Zdivides_trans with 8; auto.
   exists (8 / align_chunk chunk). destruct chunk; reflexivity.
-- intros. elim FRESH2. eapply Mem.valid_block_inject_2; eauto. 
+- intros. elim FRESH2. eapply Mem.valid_block_inject_2; eauto.
 - intros (j' & INJ' & J1 & J2 & J3).
   exists j'; split; auto.
   rewrite <- ! sep_assoc.
   split; [|split].
 + simpl. intuition auto; try (unfold Ptrofs.max_unsigned in *; omega).
 * apply Mem.perm_implies with Freeable; auto with mem.
-  eapply Mem.perm_alloc_2; eauto. omega. 
+  eapply Mem.perm_alloc_2; eauto. omega.
 * apply Mem.perm_implies with Freeable; auto with mem.
-  eapply Mem.perm_alloc_2; eauto. omega. 
+  eapply Mem.perm_alloc_2; eauto. omega.
 * red; simpl; intros. destruct H1, H2. omega.
 * red; simpl; intros.
   assert (b = b2) by tauto. subst b.
   assert (0 <= ofs < lo \/ hi <= ofs < sz2) by tauto. clear H1.
   destruct H2 as (b0 & delta0 & D & E).
-  eapply Mem.perm_alloc_inv in E; eauto. 
+  eapply Mem.perm_alloc_inv in E; eauto.
   destruct (eq_block b0 b1).
   subst b0. rewrite J2 in D. inversion D; clear D; subst delta0. xomega.
-  rewrite J3 in D by auto. elim FRESH2. eapply Mem.valid_block_inject_2; eauto. 
-+ apply (m_invar P) with m2; auto. eapply Mem.alloc_unchanged_on; eauto. 
+  rewrite J3 in D by auto. elim FRESH2. eapply Mem.valid_block_inject_2; eauto.
++ apply (m_invar P) with m2; auto. eapply Mem.alloc_unchanged_on; eauto.
 + red; simpl; intros.
   assert (VALID: Mem.valid_block m2 b) by (eapply (m_valid P); eauto).
   destruct H as [A | (b0 & delta0 & A & B)].
 * assert (b = b2) by tauto. subst b. contradiction.
-* eelim DISJ; eauto. simpl. 
-  eapply Mem.perm_alloc_inv in B; eauto. 
+* eelim DISJ; eauto. simpl.
+  eapply Mem.perm_alloc_inv in B; eauto.
   destruct (eq_block b0 b1).
   subst b0. rewrite J2 in A. inversion A; clear A; subst b delta0. contradiction.
   rewrite J3 in A by auto. exists b0, delta0; auto.
@@ -745,19 +745,19 @@ Lemma free_parallel_rule:
      Mem.free m2 b2 0 sz2 = Some m2'
   /\ m2' |= minjection j m1' ** P.
 Proof.
-  intros. rewrite <- ! sep_assoc in H. 
+  intros. rewrite <- ! sep_assoc in H.
   destruct H as (A & B & C).
   destruct A as (D & E & F).
   destruct D as (J & K & L).
   destruct J as (_ & _ & J). destruct K as (_ & _ & K).
   simpl in E.
   assert (PERM: Mem.range_perm m2 b2 0 sz2 Cur Freeable).
-  { red; intros. 
+  { red; intros.
     destruct (zlt ofs lo). apply J; omega.
     destruct (zle hi ofs). apply K; omega.
     replace ofs with ((ofs - delta) + delta) by omega.
-    eapply Mem.perm_inject; eauto. 
-    eapply Mem.free_range_perm; eauto. xomega. 
+    eapply Mem.perm_inject; eauto.
+    eapply Mem.free_range_perm; eauto. xomega.
   }
   destruct (Mem.range_perm_free _ _ _ _ PERM) as [m2' FREE].
   exists m2'; split; auto. split; [|split].
@@ -765,33 +765,33 @@ Proof.
   intros. apply (F b2 (ofs + delta0)).
 + simpl.
   destruct (zlt (ofs + delta0) lo). intuition auto.
-  destruct (zle hi (ofs + delta0)). intuition auto. 
+  destruct (zle hi (ofs + delta0)). intuition auto.
   destruct (eq_block b0 b1).
 * subst b0. rewrite H1 in H; inversion H; clear H; subst delta0.
   eelim (Mem.perm_free_2 m1); eauto. xomega.
-* exploit Mem.mi_no_overlap; eauto. 
-  apply Mem.perm_max with k. apply Mem.perm_implies with p; auto with mem. 
+* exploit Mem.mi_no_overlap; eauto.
+  apply Mem.perm_max with k. apply Mem.perm_implies with p; auto with mem.
   eapply Mem.perm_free_3; eauto.
-  apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem. 
-  eapply (Mem.free_range_perm m1); eauto. 
-  instantiate (1 := ofs + delta0 - delta). xomega. 
-  intros [X|X]. congruence. omega. 
-+ simpl. exists b0, delta0; split; auto. 
+  apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem.
+  eapply (Mem.free_range_perm m1); eauto.
+  instantiate (1 := ofs + delta0 - delta). xomega.
+  intros [X|X]. congruence. omega.
++ simpl. exists b0, delta0; split; auto.
   replace (ofs + delta0 - delta0) with ofs by omega.
-  apply Mem.perm_max with k. apply Mem.perm_implies with p; auto with mem. 
+  apply Mem.perm_max with k. apply Mem.perm_implies with p; auto with mem.
   eapply Mem.perm_free_3; eauto.
-- apply (m_invar P) with m2; auto. 
-  eapply Mem.free_unchanged_on; eauto. 
-  intros; red; intros. eelim C; eauto. simpl. 
+- apply (m_invar P) with m2; auto.
+  eapply Mem.free_unchanged_on; eauto.
+  intros; red; intros. eelim C; eauto. simpl.
   destruct (zlt i lo). intuition auto.
   destruct (zle hi i). intuition auto.
-  right; exists b1, delta; split; auto. 
+  right; exists b1, delta; split; auto.
   apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem.
-  eapply Mem.free_range_perm; eauto. xomega. 
-- red; simpl; intros. eelim C; eauto. 
-  simpl. right. destruct H as (b0 & delta0 & U & V). 
-  exists b0, delta0; split; auto. 
-  eapply Mem.perm_free_3; eauto. 
+  eapply Mem.free_range_perm; eauto. xomega.
+- red; simpl; intros. eelim C; eauto.
+  simpl. right. destruct H as (b0 & delta0 & U & V).
+  exists b0, delta0; split; auto.
+  eapply Mem.perm_free_3; eauto.
 Qed.
 
 (** Preservation of a global environment by a memory injection *)
@@ -836,7 +836,7 @@ Lemma globalenv_inject_incr:
 Proof.
   intros. destruct H1 as (A & B & C). destruct A as (bound & D & E).
   split; [|split]; auto.
-  exists bound; split; auto. 
+  exists bound; split; auto.
   inv E; constructor; intros.
 - eauto.
 - destruct (j b1) as [[b0 delta0]|] eqn:JB1.
@@ -860,7 +860,7 @@ Lemma external_call_parallel_rule:
   /\ inject_separated j j' m1 m2.
 Proof.
   intros until vargs2; intros CALL SEP ARGS.
-  destruct SEP as (A & B & C). simpl in A. 
+  destruct SEP as (A & B & C). simpl in A.
   exploit external_call_mem_inject; eauto.
   eapply globalenv_inject_preserves_globals. eapply sep_pick1; eauto.
   intros (j' & vres2 & m2' & CALL' & RES & INJ' & UNCH1 & UNCH2 & INCR & ISEP).
@@ -877,11 +877,11 @@ Proof.
   eelim C; eauto. simpl. exists b0, delta; auto.
 - red; intros. destruct H as (b0 & delta & J' & E).
   destruct (j b0) as [[b' delta'] | ] eqn:J.
-+ erewrite INCR in J' by eauto. inv J'. 
-  eelim C; eauto. simpl. exists b0, delta; split; auto. apply MAXPERMS; auto. 
++ erewrite INCR in J' by eauto. inv J'.
+  eelim C; eauto. simpl. exists b0, delta; split; auto. apply MAXPERMS; auto.
   eapply Mem.valid_block_inject_1; eauto.
 + exploit ISEP; eauto. intros (X & Y). elim Y. eapply m_valid; eauto.
-Qed.  
+Qed.
 
 Lemma alloc_parallel_rule_2:
   forall (F V: Type) (ge: Genv.t F V) m1 sz1 m1' b1 m2 sz2 m2' b2 P j lo hi delta,
@@ -898,19 +898,19 @@ Lemma alloc_parallel_rule_2:
   /\ inject_incr j j'
   /\ j' b1 = Some(b2, delta).
 Proof.
-  intros. 
+  intros.
   set (j1 := fun b => if eq_block b b1 then Some(b2, delta) else j b).
   assert (X: inject_incr j j1).
-  { unfold j1; red; intros. destruct (eq_block b b1); auto. 
-    subst b. eelim Mem.fresh_block_alloc. eexact H0. 
+  { unfold j1; red; intros. destruct (eq_block b b1); auto.
+    subst b. eelim Mem.fresh_block_alloc. eexact H0.
     eapply Mem.valid_block_inject_1. eauto. apply sep_proj1 in H. eexact H. }
   assert (Y: inject_separated j j1 m1 m2).
-  { unfold j1; red; intros. destruct (eq_block b0 b1). 
+  { unfold j1; red; intros. destruct (eq_block b0 b1).
   - inversion H9; clear H9; subst b3 delta0 b0. split; eapply Mem.fresh_block_alloc; eauto.
   - congruence. }
   rewrite sep_swap in H. eapply globalenv_inject_incr with (j' := j1) in H; eauto. rewrite sep_swap in H.
   clear X Y.
-  exploit alloc_parallel_rule; eauto. 
+  exploit alloc_parallel_rule; eauto.
   intros (j' & A & B & C & D).
   exists j'; split; auto.
   rewrite sep_swap4 in A. rewrite sep_swap4. apply globalenv_inject_incr with j1 m1; auto.
diff --git a/common/Smallstep.v b/common/Smallstep.v
index 9c91243a..c269013b 100644
--- a/common/Smallstep.v
+++ b/common/Smallstep.v
@@ -786,7 +786,7 @@ End SIMULATION_SEQUENCES.
 Lemma compose_forward_simulations:
   forall L1 L2 L3, forward_simulation L1 L2 -> forward_simulation L2 L3 -> forward_simulation L1 L3.
 Proof.
-  intros L1 L2 L3 S12 S23. 
+  intros L1 L2 L3 S12 S23.
   destruct S12 as [index order match_states props].
   destruct S23 as [index' order' match_states' props'].
 
@@ -1632,7 +1632,7 @@ Theorem factor_forward_simulation:
   forward_simulation L1 L2 -> single_events L2 ->
   forward_simulation (atomic L1) L2.
 Proof.
-  intros L1 L2 FS L2single. 
+  intros L1 L2 FS L2single.
   destruct FS as [index order match_states sim].
   apply Forward_simulation with order (ffs_match L1 L2 match_states); constructor.
 - (* wf *)
@@ -1727,7 +1727,7 @@ Theorem factor_backward_simulation:
   backward_simulation L1 L2 -> single_events L1 -> well_behaved_traces L2 ->
   backward_simulation L1 (atomic L2).
 Proof.
-  intros L1 L2 BS L1single L2wb. 
+  intros L1 L2 BS L1single L2wb.
   destruct BS as [index order match_states sim].
   apply Backward_simulation with order (fbs_match L1 L2 match_states); constructor.
 - (* wf *)
diff --git a/common/Values.v b/common/Values.v
index cfabb7a5..d086c69f 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -16,7 +16,6 @@
 (** This module defines the type of values that is used in the dynamic
   semantics of all our intermediate languages. *)
 
-Require Archi.
 Require Import Coqlib.
 Require Import AST.
 Require Import Integers.
@@ -668,6 +667,12 @@ Definition modlu (v1 v2: val): option val :=
   | _, _ => None
   end.
 
+Definition addl_carry (v1 v2 cin: val): val :=
+  match v1, v2, cin with
+  | Vlong n1, Vlong n2, Vlong c => Vlong(Int64.add_carry n1 n2 c)
+  | _, _, _ => Vundef
+  end.
+
 Definition subl_overflow (v1 v2: val) : val :=
   match v1, v2 with
   | Vlong n1, Vlong n2 => Vint (Int.repr (Int64.unsigned (Int64.sub_overflow n1 n2 Int64.zero)))
@@ -734,6 +739,15 @@ Definition shrxl (v1 v2: val): option val :=
   | _, _ => None
   end.
 
+Definition shrl_carry (v1 v2: val): val :=
+  match v1, v2 with
+  | Vlong n1, Vint n2 =>
+     if Int.ltu n2 Int64.iwordsize'
+     then Vlong(Int64.shr_carry' n1 n2)
+     else Vundef
+  | _, _ => Vundef
+  end.
+
 Definition roll (v1 v2: val): val :=
   match v1, v2 with
   | Vlong n1, Vint n2 => Vlong(Int64.rol n1 (Int64.repr (Int.unsigned n2)))
@@ -746,9 +760,9 @@ Definition rorl (v1 v2: val): val :=
   | _, _ => Vundef
   end.
 
-Definition rolml (v: val) (amount mask: int64): val :=
+Definition rolml (v: val) (amount: int) (mask: int64): val :=
   match v with
-  | Vlong n => Vlong(Int64.rolm n amount mask)
+  | Vlong n => Vlong(Int64.rolm n (Int64.repr (Int.unsigned amount)) mask)
   | _ => Vundef
   end.
 
@@ -1073,7 +1087,7 @@ Proof.
   symmetry. auto with ptrofs.
   symmetry. rewrite Int.add_commut. auto with ptrofs.
 - destruct (eq_block b b0); auto.
-  f_equal. rewrite Ptrofs.add_commut. rewrite Ptrofs.sub_add_r. auto with ptrofs. 
+  f_equal. rewrite Ptrofs.add_commut. rewrite Ptrofs.sub_add_r. auto with ptrofs.
 Qed.
 
 Theorem mul_commut: forall x y, mul x y = mul y x.
@@ -1133,6 +1147,28 @@ Proof.
   generalize (Int.eq_spec i0 Int.zero). rewrite Heqb; auto.
 Qed.
 
+Theorem modls_divls:
+  forall x y z,
+  modls x y = Some z -> exists v, divls x y = Some v /\ z = subl x (mull v y).
+Proof.
+  intros. destruct x; destruct y; simpl in *; try discriminate.
+  destruct (Int64.eq i0 Int64.zero
+        || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H.
+  exists (Vlong (Int64.divs i i0)); split; auto.
+  simpl. rewrite Int64.mods_divs. auto.
+Qed.
+
+Theorem modlu_divlu:
+  forall x y z,
+  modlu x y = Some z -> exists v, divlu x y = Some v /\ z = subl x (mull v y).
+Proof.
+  intros. destruct x; destruct y; simpl in *; try discriminate.
+  destruct (Int64.eq i0 Int64.zero) eqn:?; inv H.
+  exists (Vlong (Int64.divu i i0)); split; auto.
+  simpl. rewrite Int64.modu_divu. auto.
+  generalize (Int64.eq_spec i0 Int64.zero). rewrite Heqb; auto.
+Qed.
+
 Theorem divs_pow2:
   forall x n logn y,
   Int.is_power2 n = Some logn -> Int.ltu logn (Int.repr 31) = true ->
@@ -1282,7 +1318,7 @@ Proof.
   symmetry; apply zlt_true. change (Int.unsigned n < 32); omega.
   symmetry; apply zlt_true. unfold Int.sub. change (Int.unsigned (Int.repr 32)) with 32.
   assert (Int.unsigned n <> 0). { red; intros; elim H. rewrite <- (Int.repr_unsigned n), H0. auto. }
-  rewrite Int.unsigned_repr. 
+  rewrite Int.unsigned_repr.
   change (Int.unsigned Int.iwordsize) with 32; omega.
   assert (32 < Int.max_unsigned) by reflexivity. omega.
 Qed.
@@ -1384,7 +1420,7 @@ Proof.
   symmetry. auto with ptrofs.
   symmetry. rewrite Int64.add_commut. auto with ptrofs.
 - destruct (eq_block b b0); auto.
-  simpl; f_equal. rewrite Ptrofs.add_commut. rewrite Ptrofs.sub_add_r. auto with ptrofs. 
+  simpl; f_equal. rewrite Ptrofs.add_commut. rewrite Ptrofs.sub_add_r. auto with ptrofs.
 - rewrite Int64.add_commut. rewrite Int64.sub_add_r. auto.
 Qed.
 
@@ -1462,7 +1498,7 @@ Proof.
          || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq n Int64.mone); inv H3.
   simpl. rewrite H0. decEq. decEq.
   generalize (Int64.is_power2'_correct _ _ H); intro.
-  unfold Int64.shrx'. rewrite Int64.shl'_mul_two_p. rewrite <- H1. 
+  unfold Int64.shrx'. rewrite Int64.shl'_mul_two_p. rewrite <- H1.
   rewrite Int64.mul_commut. rewrite Int64.mul_one.
   rewrite Int64.repr_unsigned. auto.
 Qed.
@@ -1491,6 +1527,19 @@ Proof.
   simpl. decEq. symmetry. eapply Int64.modu_and; eauto.
 Qed.
 
+Theorem shrxl_carry:
+  forall x y z,
+  shrxl x y = Some z ->
+  addl (shrl x y) (shrl_carry x y) = z.
+Proof.
+  intros. destruct x; destruct y; simpl in H; inv H.
+  destruct (Int.ltu i0 (Int.repr 63)) eqn:?; inv H1.
+  exploit Int.ltu_inv; eauto. change (Int.unsigned (Int.repr 63)) with 63. intros.
+  assert (Int.ltu i0 Int64.iwordsize' = true).
+    unfold Int.ltu. apply zlt_true. change (Int.unsigned Int64.iwordsize') with 64. omega.
+  simpl. rewrite H0. simpl. decEq. rewrite Int64.shrx'_carry; auto.
+Qed.
+
 Theorem shrxl_shrl_2:
   forall n x z,
   shrxl x (Vint n) = Some z ->
@@ -1511,7 +1560,7 @@ Proof.
   symmetry; apply zlt_true. change (Int.unsigned n < 64); omega.
   symmetry; apply zlt_true. unfold Int.sub. change (Int.unsigned (Int.repr 64)) with 64.
   assert (Int.unsigned n <> 0). { red; intros; elim H. rewrite <- (Int.repr_unsigned n), H0. auto. }
-  rewrite Int.unsigned_repr. 
+  rewrite Int.unsigned_repr.
   change (Int.unsigned Int64.iwordsize') with 64; omega.
   assert (64 < Int.max_unsigned) by reflexivity. omega.
 Qed.
@@ -1603,7 +1652,7 @@ Theorem swap_cmpu_bool:
 Proof.
   assert (E: forall c, cmp_different_blocks (swap_comparison c) = cmp_different_blocks c).
   { destruct c; auto. }
-  intros; unfold cmpu_bool. rewrite ! E. destruct Archi.ptr64 eqn:SF, x, y; auto.  
+  intros; unfold cmpu_bool. rewrite ! E. destruct Archi.ptr64 eqn:SF, x, y; auto.
 - rewrite Int.swap_cmpu. auto.
 - rewrite Int.swap_cmpu. auto.
 - destruct (eq_block b b0); subst.
@@ -1630,7 +1679,7 @@ Theorem swap_cmplu_bool:
 Proof.
   assert (E: forall c, cmp_different_blocks (swap_comparison c) = cmp_different_blocks c).
   { destruct c; auto. }
-  intros; unfold cmplu_bool. rewrite ! E. destruct Archi.ptr64 eqn:SF, x, y; auto.  
+  intros; unfold cmplu_bool. rewrite ! E. destruct Archi.ptr64 eqn:SF, x, y; auto.
 - rewrite Int64.swap_cmpu. auto.
 - destruct (eq_block b b0); subst.
   rewrite dec_eq_true.
@@ -1937,7 +1986,7 @@ Qed.
 Lemma offset_ptr_assoc:
   forall v d1 d2, offset_ptr (offset_ptr v d1) d2 = offset_ptr v (Ptrofs.add d1 d2).
 Proof.
-  intros. destruct v; simpl; auto. f_equal. apply Ptrofs.add_assoc. 
+  intros. destruct v; simpl; auto. f_equal. apply Ptrofs.add_assoc.
 Qed.
 
 (** * Values and memory injections *)
@@ -1988,7 +2037,7 @@ Hint Resolve inject_list_nil inject_list_cons.
 Lemma inject_ptrofs:
   forall mi i, inject mi (Vptrofs i) (Vptrofs i).
 Proof.
-  unfold Vptrofs; intros. destruct Archi.ptr64; auto. 
+  unfold Vptrofs; intros. destruct Archi.ptr64; auto.
 Qed.
 
 Hint Resolve inject_ptrofs.
@@ -2002,7 +2051,7 @@ Lemma load_result_inject:
   inject f v1 v2 ->
   inject f (Val.load_result chunk v1) (Val.load_result chunk v2).
 Proof.
-  intros. inv H; destruct chunk; simpl; try constructor; destruct Archi.ptr64; econstructor; eauto. 
+  intros. inv H; destruct chunk; simpl; try constructor; destruct Archi.ptr64; econstructor; eauto.
 Qed.
 
 Remark add_inject:
@@ -2012,11 +2061,11 @@ Remark add_inject:
   inject f (Val.add v1 v2) (Val.add v1' v2').
 Proof.
   intros. unfold Val.add. destruct Archi.ptr64 eqn:SF.
-- inv H; inv H0; constructor. 
+- inv H; inv H0; constructor.
 - inv H; inv H0; simpl; auto.
-+ econstructor; eauto. 
++ econstructor; eauto.
   rewrite ! Ptrofs.add_assoc. decEq. apply Ptrofs.add_commut.
-+ econstructor; eauto. 
++ econstructor; eauto.
   rewrite ! Ptrofs.add_assoc. decEq. apply Ptrofs.add_commut.
 Qed.
 
@@ -2029,7 +2078,7 @@ Proof.
   intros. unfold Val.sub. destruct Archi.ptr64 eqn:SF.
 - inv H; inv H0; constructor.
 - inv H; inv H0; simpl; auto.
-+ econstructor; eauto. 
++ econstructor; eauto.
   rewrite Ptrofs.sub_add_l. auto.
 + destruct (eq_block b1 b0); auto.
   subst. rewrite H1 in H. inv H. rewrite dec_eq_true.
@@ -2044,11 +2093,11 @@ Remark addl_inject:
 Proof.
   intros. unfold Val.addl. destruct Archi.ptr64 eqn:SF.
 - inv H; inv H0; simpl; auto.
-+ econstructor; eauto. 
++ econstructor; eauto.
   rewrite ! Ptrofs.add_assoc. decEq. apply Ptrofs.add_commut.
-+ econstructor; eauto. 
++ econstructor; eauto.
   rewrite ! Ptrofs.add_assoc. decEq. apply Ptrofs.add_commut.
-- inv H; inv H0; constructor. 
+- inv H; inv H0; constructor.
 Qed.
 
 Remark subl_inject:
@@ -2059,7 +2108,7 @@ Remark subl_inject:
 Proof.
   intros. unfold Val.subl. destruct Archi.ptr64 eqn:SF.
 - inv H; inv H0; simpl; auto.
-+ econstructor; eauto. 
++ econstructor; eauto.
   rewrite Ptrofs.sub_add_l. auto.
 + destruct (eq_block b1 b0); auto.
   subst. rewrite H1 in H. inv H. rewrite dec_eq_true.
@@ -2126,7 +2175,7 @@ Lemma cmpu_bool_inject:
 Proof.
   Local Opaque Int.add Ptrofs.add.
   intros.
-  unfold cmpu_bool in *; destruct Archi.ptr64; 
+  unfold cmpu_bool in *; destruct Archi.ptr64;
   inv H; simpl in H1; try discriminate; inv H0; simpl in H1; try discriminate; simpl; auto.
 - fold (weak_valid_ptr1 b1 (Ptrofs.unsigned ofs1)) in H1.
   fold (weak_valid_ptr2 b2 (Ptrofs.unsigned (Ptrofs.add ofs1 (Ptrofs.repr delta)))).
diff --git a/debug/Dwarfgen.ml b/debug/Dwarfgen.ml
index ee568042..ca5d783d 100644
--- a/debug/Dwarfgen.ml
+++ b/debug/Dwarfgen.ml
@@ -549,7 +549,7 @@ let diab_gen_compilation_section s defs acc =
   let cp = {
     compile_unit_name = Simple_string !file_name;
     compile_unit_range = Pc_pair (low_pc,high_pc);
-    compile_unit_dir = Simple_string (Sys.getcwd ());
+    compile_unit_dir = Simple_string (Filename.quote (Sys.getcwd ()));
     compile_unit_prod_name = Simple_string prod_name
   } in
   let cp = new_entry (next_id ()) (DW_TAG_compile_unit cp) in
@@ -620,7 +620,7 @@ let gen_gnu_debug_info sec_name var_section : debug_entries =
   let cp = {
     compile_unit_name = gnu_string_entry !file_name;
     compile_unit_range = r;
-    compile_unit_dir = gnu_string_entry (Sys.getcwd ());
+    compile_unit_dir = gnu_string_entry (Filename.quote (Sys.getcwd ()));
     compile_unit_prod_name = gnu_string_entry prod_name;
   } in
   let cp = new_entry (next_id ()) (DW_TAG_compile_unit cp) in
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 5ced13e1..75247f71 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -292,7 +292,7 @@ Proof.
   set (p18 := CleanupLabels.transf_program p17) in *.
   destruct (partial_if debug Debugvar.transf_program p18) as [p19|e] eqn:P19; simpl in T; try discriminate.
   destruct (Stacking.transf_program p19) as [p20|e] eqn:P20; simpl in T; try discriminate.
-  unfold match_prog; simpl. 
+  unfold match_prog; simpl.
   exists p1; split. apply SimplExprproof.transf_program_match; auto.
   exists p2; split. apply SimplLocalsproof.match_transf_program; auto.
   exists p3; split. apply Cshmgenproof.transf_program_match; auto.
@@ -313,14 +313,14 @@ Proof.
   exists p18; split. apply CleanupLabelsproof.transf_program_match; auto.
   exists p19; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match.
   exists p20; split. apply Stackingproof.transf_program_match; auto.
-  exists tp; split. apply Asmgenproof.transf_program_match; auto. 
+  exists tp; split. apply Asmgenproof.transf_program_match; auto.
   reflexivity.
 Qed.
 
 (** * Semantic preservation *)
 
 (** We now prove that the whole CompCert compiler (as characterized by the
-  [match_prog] relation) preserves semantics by constructing 
+  [match_prog] relation) preserves semantics by constructing
   the following simulations:
 - Forward simulations from [Cstrategy] to [Asm]
   (composition of the forward simulations for each pass).
@@ -359,7 +359,7 @@ Proof.
   intros p tp M. unfold match_prog, pass_match in M; simpl in M.
 Ltac DestructM :=
   match goal with
-    [ H: exists p, _ /\ _ |- _ ] => 
+    [ H: exists p, _ /\ _ |- _ ] =>
       let p := fresh "p" in let M := fresh "M" in let MM := fresh "MM" in
       destruct H as (p & M & MM); clear H
   end.
@@ -467,11 +467,11 @@ Theorem separate_transf_c_program_correct:
   forall c_units asm_units c_program,
   nlist_forall2 (fun cu tcu => transf_c_program cu = OK tcu) c_units asm_units ->
   link_list c_units = Some c_program ->
-  exists asm_program, 
+  exists asm_program,
       link_list asm_units = Some asm_program
    /\ backward_simulation (Csem.semantics c_program) (Asm.semantics asm_program).
 Proof.
-  intros. 
+  intros.
   assert (nlist_forall2 match_prog c_units asm_units).
   { eapply nlist_forall2_imply. eauto. simpl; intros. apply transf_c_program_match; auto. }
   assert (exists asm_program, link_list asm_units = Some asm_program /\ match_prog c_program asm_program).
diff --git a/driver/Complements.v b/driver/Complements.v
index f7598758..d1bea1b3 100644
--- a/driver/Complements.v
+++ b/driver/Complements.v
@@ -80,7 +80,7 @@ Theorem transf_cstrategy_program_preservation:
 Proof.
   assert (WBT: forall p, well_behaved_traces (Cstrategy.semantics p)).
     intros. eapply ssr_well_behaved. apply Cstrategy.semantics_strongly_receptive.
-  intros. 
+  intros.
   assert (MATCH: match_prog p tp) by (apply transf_c_program_match; auto).
   intuition auto.
   eapply forward_simulation_behavior_improves; eauto.
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 9c4c724f..b96cd314 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -170,7 +170,7 @@ Separate Extraction
    Ctyping.typecheck_program
    Ctyping.epostincr Ctyping.epostdecr Ctyping.epreincr Ctyping.epredecr
    Ctypes.make_program
-   Conventions1.is_float_reg
+   Conventions1.callee_save_type Conventions1.is_float_reg
    Conventions1.int_caller_save_regs Conventions1.float_caller_save_regs
    Conventions1.int_callee_save_regs Conventions1.float_callee_save_regs
    Conventions1.dummy_int_reg Conventions1.dummy_float_reg
diff --git a/lib/BoolEqual.v b/lib/BoolEqual.v
index a5b543e1..c9e7bad5 100644
--- a/lib/BoolEqual.v
+++ b/lib/BoolEqual.v
@@ -17,7 +17,7 @@
 
 (** The [decide equality] tactic can generate a term of type
 [forall (x y: A), {x=y} + {x<>y}] if [A] is an inductive type.
-This term is a decidable equality function.  
+This term is a decidable equality function.
 
 Similarly, this module defines a [boolean_equality] tactic that generates
 a term of type [A -> A -> bool].  This term is a Boolean-valued equality
@@ -33,7 +33,7 @@ a decidable equality of type [forall (x y: A), {x=y} + {x<>y}].
 
 The advantage of the present tactics over the standard [decide equality]
 tactic is that the former produce smaller transparent definitions than
-the latter.  
+the latter.
 
 For a type [A] that has N constructors, [decide equality] produces a
 single term of size O(N^3), which must be kept transparent so that
@@ -91,7 +91,7 @@ Ltac bool_eq :=
   end.
 
 Lemma proj_sumbool_is_true:
-  forall (A: Type) (f: forall (x y: A), {x=y} + {x<>y}) (x: A), 
+  forall (A: Type) (f: forall (x y: A), {x=y} + {x<>y}) (x: A),
   proj_sumbool (f x x) = true.
 Proof.
   intros. unfold proj_sumbool. destruct (f x x). auto. elim n; auto.
@@ -119,7 +119,7 @@ Lemma proj_sumbool_true:
   forall (A: Type) (x y: A) (a: {x=y} + {x<>y}),
   proj_sumbool a = true -> x = y.
 Proof.
-  intros. destruct a. auto. discriminate.   
+  intros. destruct a. auto. discriminate.
 Qed.
 
 Ltac bool_eq_sound_case :=
diff --git a/lib/Decidableplus.v b/lib/Decidableplus.v
index 3bb6eee7..6383794d 100644
--- a/lib/Decidableplus.v
+++ b/lib/Decidableplus.v
@@ -30,14 +30,14 @@ Program Instance Decidable_not (P: Prop) (dP: Decidable P) : Decidable (~ P) :=
   Decidable_witness := negb (@Decidable_witness P dP)
 }.
 Next Obligation.
-  rewrite negb_true_iff. split. apply Decidable_complete_alt. apply Decidable_sound_alt. 
+  rewrite negb_true_iff. split. apply Decidable_complete_alt. apply Decidable_sound_alt.
 Qed.
 
 Program Instance Decidable_equiv (P Q: Prop) (dP: Decidable P) (dQ: Decidable Q) : Decidable (P <-> Q) := {
   Decidable_witness := Bool.eqb (@Decidable_witness P dP) (@Decidable_witness Q dQ)
 }.
 Next Obligation.
-  rewrite eqb_true_iff. 
+  rewrite eqb_true_iff.
   split; intros.
   split; intros; eapply Decidable_sound; [rewrite <- H | rewrite H]; eapply Decidable_complete; eauto.
   destruct (@Decidable_witness Q dQ) eqn:D.
@@ -65,7 +65,7 @@ Program Instance Decidable_implies (P Q: Prop) (dP: Decidable P) (dQ: Decidable
 Next Obligation.
   split.
 - intros. rewrite Decidable_complete in H by auto. eapply Decidable_sound; eauto.
-- intros. destruct (@Decidable_witness P dP) eqn:WP; auto. 
+- intros. destruct (@Decidable_witness P dP) eqn:WP; auto.
   eapply Decidable_complete. apply H. eapply Decidable_sound; eauto.
 Qed.
 
@@ -75,7 +75,7 @@ Program Definition Decidable_eq {A: Type} (eqdec: forall (x y: A), {x=y} + {x<>y
   Decidable_witness := proj_sumbool (eqdec x y)
 |}.
 Next Obligation.
-  split; intros. InvBooleans. auto. subst y. apply dec_eq_true. 
+  split; intros. InvBooleans. auto. subst y. apply dec_eq_true.
 Qed.
 
 Program Instance Decidable_eq_bool : forall (x y : bool), Decidable (eq x y) := {
@@ -112,14 +112,14 @@ Program Instance Decidable_le_Z : forall (x y: Z), Decidable (x <= y) := {
   Decidable_witness := Z.leb x y
 }.
 Next Obligation.
-  apply Z.leb_le. 
+  apply Z.leb_le.
 Qed.
 
 Program Instance Decidable_lt_Z : forall (x y: Z), Decidable (x < y) := {
   Decidable_witness := Z.ltb x y
 }.
 Next Obligation.
-  apply Z.ltb_lt. 
+  apply Z.ltb_lt.
 Qed.
 
 Program Instance Decidable_ge_Z : forall (x y: Z), Decidable (x >= y) := {
@@ -142,8 +142,8 @@ Program Instance Decidable_divides : forall (x y: Z), Decidable (x | y) := {
 Next Obligation.
   split.
   intros. apply Z.eqb_eq in H. exists (y / x). auto.
-  intros [k EQ]. apply Z.eqb_eq. 
-  destruct (Z.eq_dec x 0). 
+  intros [k EQ]. apply Z.eqb_eq.
+  destruct (Z.eq_dec x 0).
   subst x. rewrite Z.mul_0_r in EQ. subst y. reflexivity.
   assert (k = y / x).
   { apply Zdiv_unique_full with 0. red; omega. rewrite EQ; ring. }
@@ -152,7 +152,7 @@ Qed.
 
 (** Deciding properties over lists *)
 
-Program Instance Decidable_forall_in_list : 
+Program Instance Decidable_forall_in_list :
       forall (A: Type) (l: list A) (P: A -> Prop) (dP: forall x:A, Decidable (P x)),
       Decidable (forall x:A, In x l -> P x) := {
   Decidable_witness := List.forallb (fun x => @Decidable_witness (P x) (dP x)) l
@@ -160,10 +160,10 @@ Program Instance Decidable_forall_in_list :
 Next Obligation.
   rewrite List.forallb_forall. split; intros.
 - eapply Decidable_sound; eauto.
-- eapply Decidable_complete; eauto. 
+- eapply Decidable_complete; eauto.
 Qed.
 
-Program Instance Decidable_exists_in_list : 
+Program Instance Decidable_exists_in_list :
       forall (A: Type) (l: list A) (P: A -> Prop) (dP: forall x:A, Decidable (P x)),
       Decidable (exists x:A, In x l /\ P x) := {
   Decidable_witness := List.existsb (fun x => @Decidable_witness (P x) (dP x)) l
@@ -188,8 +188,8 @@ Program Instance Decidable_forall : forall (T: Type) (fT: Finite T) (P: T -> Pro
 }.
 Next Obligation.
   rewrite List.forallb_forall. split; intros.
-- eapply Decidable_sound; eauto. apply H. apply Finite_elements_spec. 
-- eapply Decidable_complete; eauto. 
+- eapply Decidable_sound; eauto. apply H. apply Finite_elements_spec.
+- eapply Decidable_complete; eauto.
 Qed.
 
 Program Instance Decidable_exists : forall (T: Type) (fT: Finite T) (P: T -> Prop) (dP: forall x:T, Decidable (P x)), Decidable (exists x, P x) := {
@@ -198,7 +198,7 @@ Program Instance Decidable_exists : forall (T: Type) (fT: Finite T) (P: T -> Pro
 Next Obligation.
   rewrite List.existsb_exists. split.
 - intros (x & A & B). exists x. eapply Decidable_sound; eauto.
-- intros (x & A). exists x; split. eapply Finite_elements_spec. eapply Decidable_complete; eauto. 
+- intros (x & A). exists x; split. eapply Finite_elements_spec. eapply Decidable_complete; eauto.
 Qed.
 
 (** Some examples of finite types. *)
diff --git a/lib/Floats.v b/lib/Floats.v
index 51b0c415..aa52b197 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -820,6 +820,75 @@ Proof.
 - omega.
 Qed.
 
+(** Conversions to/from 32-bit integers can be implemented by going through 64-bit integers. *)
+
+Remark ZofB_range_widen:
+  forall (f: float) n min1 max1 min2 max2,
+  ZofB_range _ _ f min1 max1 = Some n ->
+  min2 <= min1 -> max1 <= max2 ->
+  ZofB_range _ _ f min2 max2 = Some n.
+Proof.
+  intros. exploit ZofB_range_inversion; eauto. intros (A & B & C).
+  unfold ZofB_range; rewrite C.
+  replace (min2 <=? n) with true. replace (n <=? max2) with true. auto.
+  symmetry; apply Z.leb_le; omega.
+  symmetry; apply Z.leb_le; omega.
+Qed.
+
+Theorem to_int_to_long:
+  forall f n, to_int f = Some n -> to_long f = Some (Int64.repr (Int.signed n)).
+Proof.
+  unfold to_int, to_long; intros.
+  destruct (ZofB_range 53 1024 f Int.min_signed Int.max_signed) as [z|] eqn:Z; inv H.
+  exploit ZofB_range_inversion; eauto. intros (A & B & C).
+  replace (ZofB_range 53 1024 f Int64.min_signed Int64.max_signed) with (Some z).
+  simpl. rewrite Int.signed_repr; auto.
+  symmetry; eapply ZofB_range_widen; eauto. compute; congruence. compute; congruence.
+Qed.
+
+Theorem to_intu_to_longu:
+  forall f n, to_intu f = Some n -> to_longu f = Some (Int64.repr (Int.unsigned n)).
+Proof.
+  unfold to_intu, to_longu; intros.
+  destruct (ZofB_range 53 1024 f 0 Int.max_unsigned) as [z|] eqn:Z; inv H.
+  exploit ZofB_range_inversion; eauto. intros (A & B & C).
+  replace (ZofB_range 53 1024 f 0 Int64.max_unsigned) with (Some z).
+  simpl. rewrite Int.unsigned_repr; auto.
+  symmetry; eapply ZofB_range_widen; eauto. omega. compute; congruence.
+Qed.
+
+Theorem to_intu_to_long:
+  forall f n, to_intu f = Some n -> to_long f = Some (Int64.repr (Int.unsigned n)).
+Proof.
+  unfold to_intu, to_long; intros.
+  destruct (ZofB_range 53 1024 f 0 Int.max_unsigned) as [z|] eqn:Z; inv H.
+  exploit ZofB_range_inversion; eauto. intros (A & B & C).
+  replace (ZofB_range 53 1024 f Int64.min_signed Int64.max_signed) with (Some z).
+  simpl. rewrite Int.unsigned_repr; auto.
+  symmetry; eapply ZofB_range_widen; eauto. compute; congruence. compute; congruence.
+Qed.
+
+Theorem of_int_of_long:
+  forall n, of_int n = of_long (Int64.repr (Int.signed n)).
+Proof.
+  unfold of_int, of_long. intros. f_equal. rewrite Int64.signed_repr. auto.
+  generalize (Int.signed_range n). compute_this Int64.min_signed. compute_this Int64.max_signed. smart_omega.
+Qed.
+
+Theorem of_intu_of_longu:
+  forall n, of_intu n = of_longu (Int64.repr (Int.unsigned n)).
+Proof.
+  unfold of_intu, of_longu. intros. f_equal. rewrite Int64.unsigned_repr. auto.
+  generalize (Int.unsigned_range n). smart_omega.
+Qed.
+
+Theorem of_intu_of_long:
+  forall n, of_intu n = of_long (Int64.repr (Int.unsigned n)).
+Proof.
+  unfold of_intu, of_long. intros. f_equal. rewrite Int64.signed_repr. auto.
+  generalize (Int.unsigned_range n). compute_this Int64.min_signed; compute_this Int64.max_signed; smart_omega.
+Qed.
+
 End Float.
 
 (** * Single-precision FP numbers *)
diff --git a/lib/Integers.v b/lib/Integers.v
index b1fa982d..c44fa55f 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -1224,9 +1224,9 @@ Proof.
   { unfold Q, R, Z.div, Z.modulo. destruct (Z.div_eucl N D); auto. }
   rewrite E2. rewrite zle_true. auto.
   assert (unsigned d <> 0).
-  { red; intros. elim H. rewrite <- (repr_unsigned d). rewrite H0; auto. }   
+  { red; intros. elim H. rewrite <- (repr_unsigned d). rewrite H0; auto. }
   assert (0 < D).
-  { unfold D. generalize (unsigned_range d); intros. omega. } 
+  { unfold D. generalize (unsigned_range d); intros. omega. }
   assert (0 <= Q <= max_unsigned).
   { unfold Q. apply Zdiv_interval_2.
     rewrite <- E1; apply unsigned_range_2.
@@ -1262,7 +1262,7 @@ Proof.
   set (Q := Z.quot N D); set (R := Z.rem N D).
   assert (E2: Z.quotrem N D = (Q, R)).
   { unfold Q, R, Z.quot, Z.rem. destruct (Z.quotrem N D); auto. }
-  rewrite E2. 
+  rewrite E2.
   assert (min_signed <= N <= max_signed) by (rewrite H2; apply signed_range).
   assert (min_signed <= Q <= max_signed).
   { unfold Q. destruct (zeq D 1); [ | destruct (zeq D (-1))].
@@ -1284,7 +1284,7 @@ Proof.
       rewrite Z.mul_1_r. unfold min_signed, max_signed in H3; xomega.
       apply Zmult_lt_compat_l. generalize half_modulus_pos; omega. xomega. }
     rewrite Z.abs_lt in H4.
-    unfold min_signed, max_signed; omega. 
+    unfold min_signed, max_signed; omega.
   }
   unfold proj_sumbool; rewrite ! zle_true by omega; simpl.
   unfold Q, R; rewrite H2; auto.
@@ -2186,6 +2186,16 @@ Proof.
     + omega.
 Qed.
 
+Theorem sub_ltu:
+  forall x y,
+    ltu x y = true ->
+    0 <= unsigned y - unsigned x <= unsigned y.
+Proof.
+  intros.
+  generalize (ltu_inv x y H). intros .
+  split. omega. omega.
+Qed.
+
 Theorem shru_zero: forall x, shru x zero = x.
 Proof.
   bit_solve. rewrite unsigned_zero. rewrite zlt_true. f_equal; omega. omega.
@@ -4036,8 +4046,13 @@ Definition shru' (x: int) (y: Int.int): int :=
   repr (Z.shiftr (unsigned x) (Int.unsigned y)).
 Definition shr' (x: int) (y: Int.int): int :=
   repr (Z.shiftr (signed x) (Int.unsigned y)).
+Definition rol' (x: int) (y: Int.int): int :=
+  rol x (repr (Int.unsigned y)).
 Definition shrx' (x: int) (y: Int.int): int :=
   divs x (shl' one y).
+Definition shr_carry' (x: int) (y: Int.int): int :=
+  if lt x zero && negb (eq (and x (sub (shl' one y) one)) zero)
+  then one else zero.
 
 Lemma bits_shl':
   forall x y i,
@@ -4082,7 +4097,7 @@ Lemma shl'_mul_two_p:
   shl' x y = mul x (repr (two_p (Int.unsigned y))).
 Proof.
   intros. unfold shl', mul. apply eqm_samerepr.
-  rewrite Zshiftl_mul_two_p. apply eqm_mult. apply eqm_refl. apply eqm_unsigned_repr. 
+  rewrite Zshiftl_mul_two_p. apply eqm_mult. apply eqm_refl. apply eqm_unsigned_repr.
   generalize (Int.unsigned_range y); omega.
 Qed.
 
@@ -4099,11 +4114,50 @@ Proof.
   intros. rewrite shl'_one_two_p. apply shl'_mul_two_p.
 Qed.
 
+Theorem shl'_zero:
+  forall x, shl' x Int.zero = x.
+Proof.
+  intros. unfold shl'. rewrite Int.unsigned_zero. unfold Z.shiftl.
+  apply repr_unsigned.
+Qed.
+
+Theorem shru'_zero :
+  forall x, shru' x Int.zero = x.
+Proof.
+  intros. unfold shru'. rewrite Int.unsigned_zero. unfold Z.shiftr.
+  apply repr_unsigned.
+Qed.
+
+Theorem shr'_zero :
+  forall x, shr' x Int.zero = x.
+Proof.
+  intros. unfold shr'. rewrite Int.unsigned_zero. unfold Z.shiftr.
+  apply repr_signed.
+Qed.
+
 Theorem shrx'_zero:
   forall x, shrx' x Int.zero = x.
 Proof.
-  intros. change (shrx' x Int.zero) with (shrx x zero). apply shrx_zero. compute; auto. 
-Qed. 
+  intros. change (shrx' x Int.zero) with (shrx x zero). apply shrx_zero. compute; auto.
+Qed.
+
+Theorem shrx'_carry:
+  forall x y,
+  Int.ltu y (Int.repr 63) = true ->
+  shrx' x y = add (shr' x y) (shr_carry' x y).
+Proof.
+  intros. apply Int.ltu_inv in H. change (Int.unsigned (Int.repr 63)) with 63 in H.
+  set (y1 := Int64.repr (Int.unsigned y)).
+  assert (U: unsigned y1 = Int.unsigned y).
+  { apply unsigned_repr. assert (63 < max_unsigned) by reflexivity. omega. }
+  transitivity (shrx x y1).
+- unfold shrx', shrx, shl', shl. rewrite U; auto.
+- rewrite shrx_carry. 
++ f_equal. 
+  unfold shr, shr'. rewrite U; auto.
+  unfold shr_carry, shr_carry', shl, shl'. rewrite U; auto.
++ unfold ltu. apply zlt_true. rewrite U; tauto. 
+Qed.
 
 Theorem shrx'_shr_2:
   forall x y,
@@ -4119,7 +4173,7 @@ Proof.
   { unfold z; apply unsigned_repr; omega. }
   assert (B: unsigned (sub (repr 64) z) = Int.unsigned (Int.sub (Int.repr 64) y)).
   { unfold z. unfold sub, Int.sub.
-    change (unsigned (repr 64)) with 64.  
+    change (unsigned (repr 64)) with 64.
     change (Int.unsigned (Int.repr 64)) with 64.
     rewrite (unsigned_repr (Int.unsigned y)) by omega.
     rewrite unsigned_repr, Int.unsigned_repr by omega.
@@ -4172,7 +4226,7 @@ Proof.
   replace (shl' x y) with (shl x (repr (Int.unsigned y))).
   replace (shru' x z) with (shru x (repr (Int.unsigned z))).
   apply or_ror; auto. rewrite F, H1. reflexivity.
-  unfold shru, shru'; rewrite <- B; auto. 
+  unfold shru, shru'; rewrite <- B; auto.
   unfold shl, shl'; rewrite <- A; auto.
 Qed.
 
@@ -4190,7 +4244,7 @@ Proof.
   replace (shl' x y) with (shl x y').
   replace (shl' (shl x y') z) with (shl (shl x y') z').
   replace (shl' x (Int.add y z)) with (shl x (add y' z')).
-  apply shl_shl; auto. apply zlt_true. rewrite <- E. 
+  apply shl_shl; auto. apply zlt_true. rewrite <- E.
   change (unsigned iwordsize) with zwordsize. tauto.
   unfold shl, shl'. rewrite E; auto.
   unfold shl at 1, shl'. rewrite <- B; auto.
@@ -4211,7 +4265,7 @@ Proof.
   replace (shru' x y) with (shru x y').
   replace (shru' (shru x y') z) with (shru (shru x y') z').
   replace (shru' x (Int.add y z)) with (shru x (add y' z')).
-  apply shru_shru; auto. apply zlt_true. rewrite <- E. 
+  apply shru_shru; auto. apply zlt_true. rewrite <- E.
   change (unsigned iwordsize) with zwordsize. tauto.
   unfold shru, shru'. rewrite E; auto.
   unfold shru at 1, shru'. rewrite <- B; auto.
@@ -4232,7 +4286,7 @@ Proof.
   replace (shr' x y) with (shr x y').
   replace (shr' (shr x y') z) with (shr (shr x y') z').
   replace (shr' x (Int.add y z)) with (shr x (add y' z')).
-  apply shr_shr; auto. apply zlt_true. rewrite <- E. 
+  apply shr_shr; auto. apply zlt_true. rewrite <- E.
   change (unsigned iwordsize) with zwordsize. tauto.
   unfold shr, shr'. rewrite E; auto.
   unfold shr at 1, shr'. rewrite <- B; auto.
@@ -4256,8 +4310,8 @@ Proof.
   intros.
   destruct (list_in_map_inv _ _ _ H) as [i0 [EQ IN]].
   exploit Z_one_bits_range; eauto. intros R.
-  unfold Int.ltu. rewrite EQ. rewrite Int.unsigned_repr. 
-  change (Int.unsigned iwordsize') with zwordsize. apply zlt_true. omega. 
+  unfold Int.ltu. rewrite EQ. rewrite Int.unsigned_repr.
+  change (Int.unsigned iwordsize') with zwordsize. apply zlt_true. omega.
   assert (zwordsize < Int.max_unsigned) by reflexivity. omega.
 Qed.
 
@@ -4276,13 +4330,13 @@ Proof.
   { induction l; simpl; intros.
   - auto.
   - rewrite IHl by eauto. apply eqm_samerepr; apply eqm_add.
-  + rewrite shl'_one_two_p. rewrite Int.unsigned_repr. apply eqm_sym; apply eqm_unsigned_repr.  
+  + rewrite shl'_one_two_p. rewrite Int.unsigned_repr. apply eqm_sym; apply eqm_unsigned_repr.
     exploit (H a). auto. assert (zwordsize < Int.max_unsigned) by reflexivity. omega.
-  + apply eqm_sym; apply eqm_unsigned_repr. 
+  + apply eqm_sym; apply eqm_unsigned_repr.
   }
-  intros. rewrite <- (repr_unsigned x) at 1. unfold one_bits'. rewrite REC. 
+  intros. rewrite <- (repr_unsigned x) at 1. unfold one_bits'. rewrite REC.
   rewrite <- Z_one_bits_powerserie. auto. apply unsigned_range.
-  apply Z_one_bits_range.    
+  apply Z_one_bits_range.
 Qed.
 
 Lemma is_power2'_rng:
@@ -4290,7 +4344,7 @@ Lemma is_power2'_rng:
   is_power2' n = Some logn ->
   0 <= Int.unsigned logn < zwordsize.
 Proof.
-  unfold is_power2'; intros n logn P2. 
+  unfold is_power2'; intros n logn P2.
   destruct (Z_one_bits wordsize (unsigned n) 0) as [ | i [ | ? ?]] eqn:B; inv P2.
   assert (0 <= i < zwordsize).
   { apply Z_one_bits_range with (unsigned n). rewrite B; auto with coqlib. }
@@ -4303,7 +4357,7 @@ Theorem is_power2'_range:
   forall n logn,
   is_power2' n = Some logn -> Int.ltu logn iwordsize' = true.
 Proof.
-  intros. unfold Int.ltu. change (Int.unsigned iwordsize') with zwordsize. 
+  intros. unfold Int.ltu. change (Int.unsigned iwordsize') with zwordsize.
   apply zlt_true. generalize (is_power2'_rng _ _ H). tauto.
 Qed.
 
@@ -4321,13 +4375,13 @@ Proof.
   assert (zwordsize < Int.max_unsigned) by reflexivity.
   omega.
 Qed.
-   
+
 Theorem mul_pow2':
   forall x n logn,
   is_power2' n = Some logn ->
   mul x n = shl' x logn.
 Proof.
-  intros. rewrite shl'_mul. f_equal. rewrite shl'_one_two_p. 
+  intros. rewrite shl'_mul. f_equal. rewrite shl'_one_two_p.
   rewrite <- (repr_unsigned n). f_equal. apply is_power2'_correct; auto.
 Qed.
 
@@ -4855,6 +4909,43 @@ Proof.
   rewrite zlt_true by omega; auto.
 Qed.
 
+(** Utility proofs for mixed 32bit and 64bit arithmetic *)
+
+Remark int_unsigned_range:
+  forall x, 0 <= Int.unsigned x <= max_unsigned.
+Proof.
+  intros.
+  unfold max_unsigned. unfold modulus.
+  generalize (Int.unsigned_range x).
+  unfold Int.modulus in *.
+  change (wordsize) with  64%nat in *.
+  change (Int.wordsize) with 32%nat in *.
+  unfold two_power_nat. simpl.
+  omega.
+Qed.
+
+Remark int_unsigned_repr:
+  forall x, unsigned (repr (Int.unsigned x)) = Int.unsigned x.
+Proof.
+  intros. rewrite unsigned_repr. auto.
+  apply int_unsigned_range.
+Qed.
+
+Lemma int_sub_ltu:
+  forall x y,
+    Int.ltu x y= true ->
+    Int.unsigned (Int.sub y x) = unsigned (sub (repr (Int.unsigned y)) (repr (Int.unsigned x))).
+Proof.
+  intros. generalize (Int.sub_ltu x y H). intros. unfold Int.sub. unfold sub.
+  rewrite Int.unsigned_repr. rewrite unsigned_repr.
+  rewrite unsigned_repr by apply int_unsigned_range. rewrite int_unsigned_repr. reflexivity.
+  rewrite unsigned_repr by apply int_unsigned_range.
+  rewrite int_unsigned_repr. generalize (int_unsigned_range y).
+  omega.
+  generalize (Int.sub_ltu x y H). intros.
+  generalize (Int.unsigned_range_2 y). intros. omega.
+Qed.
+
 End Int64.
 
 Strategy 0 [Wordsize_64.wordsize].
@@ -4899,7 +4990,7 @@ Hypothesis _32: Archi.ptr64 = false.
 
 Lemma modulus_eq32: modulus = Int.modulus.
 Proof.
-  unfold modulus, wordsize. 
+  unfold modulus, wordsize.
   change Wordsize_Ptrofs.wordsize with (if Archi.ptr64 then 64%nat else 32%nat).
   rewrite _32. reflexivity.
 Qed.
@@ -4923,8 +5014,8 @@ Qed.
 Lemma agree32_signed:
   forall a b, agree32 a b -> Ptrofs.signed a = Int.signed b.
 Proof.
-  unfold agree32; intros. unfold signed, Int.signed, half_modulus, Int.half_modulus. 
-  rewrite modulus_eq32. rewrite H. auto. 
+  unfold agree32; intros. unfold signed, Int.signed, half_modulus, Int.half_modulus.
+  rewrite modulus_eq32. rewrite H. auto.
 Qed.
 
 Lemma agree32_of_int:
@@ -4932,7 +5023,7 @@ Lemma agree32_of_int:
 Proof.
   unfold of_int; intros. rewrite <- (Int.repr_unsigned b) at 2. apply agree32_repr.
 Qed.
- 
+
 Lemma agree32_of_ints:
   forall b, agree32 (of_ints b) b.
 Proof.
@@ -4942,7 +5033,7 @@ Qed.
 Lemma agree32_of_int_eq:
   forall a b, agree32 a b -> of_int b = a.
 Proof.
-  unfold agree32, of_int; intros. rewrite <- H. apply repr_unsigned. 
+  unfold agree32, of_int; intros. rewrite <- H. apply repr_unsigned.
 Qed.
 
 Lemma agree32_of_ints_eq:
@@ -4954,7 +5045,7 @@ Qed.
 Lemma agree32_to_int:
   forall a, agree32 a (to_int a).
 Proof.
-  unfold agree32, to_int; intros. rewrite <- (agree32_repr (unsigned a)). 
+  unfold agree32, to_int; intros. rewrite <- (agree32_repr (unsigned a)).
   rewrite repr_unsigned; auto.
 Qed.
 
@@ -4974,21 +5065,21 @@ Lemma agree32_add:
   forall a1 b1 a2 b2,
   agree32 a1 b1 -> agree32 a2 b2 -> agree32 (Ptrofs.add a1 a2) (Int.add b1 b2).
 Proof.
-  unfold agree32, Ptrofs.add, Int.add; intros. rewrite H, H0. apply agree32_repr. 
+  unfold agree32, Ptrofs.add, Int.add; intros. rewrite H, H0. apply agree32_repr.
 Qed.
 
 Lemma agree32_sub:
   forall a1 b1 a2 b2,
   agree32 a1 b1 -> agree32 a2 b2 -> agree32 (Ptrofs.sub a1 a2) (Int.sub b1 b2).
 Proof.
-  unfold agree32, Ptrofs.sub, Int.sub; intros. rewrite H, H0. apply agree32_repr. 
+  unfold agree32, Ptrofs.sub, Int.sub; intros. rewrite H, H0. apply agree32_repr.
 Qed.
 
 Lemma agree32_mul:
   forall a1 b1 a2 b2,
   agree32 a1 b1 -> agree32 a2 b2 -> agree32 (Ptrofs.mul a1 a2) (Int.mul b1 b2).
 Proof.
-  unfold agree32, Ptrofs.mul, Int.mul; intros. rewrite H, H0. apply agree32_repr. 
+  unfold agree32, Ptrofs.mul, Int.mul; intros. rewrite H, H0. apply agree32_repr.
 Qed.
 
 Lemma agree32_divs:
@@ -5004,7 +5095,7 @@ Lemma of_int_to_int:
 Proof.
   intros; unfold of_int, to_int. apply eqm_repr_eq. rewrite <- eqm32.
   apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
-Qed.   
+Qed.
 
 End AGREE32.
 
@@ -5014,7 +5105,7 @@ Hypothesis _64: Archi.ptr64 = true.
 
 Lemma modulus_eq64: modulus = Int64.modulus.
 Proof.
-  unfold modulus, wordsize. 
+  unfold modulus, wordsize.
   change Wordsize_Ptrofs.wordsize with (if Archi.ptr64 then 64%nat else 32%nat).
   rewrite _64. reflexivity.
 Qed.
@@ -5038,8 +5129,8 @@ Qed.
 Lemma agree64_signed:
   forall a b, agree64 a b -> Ptrofs.signed a = Int64.signed b.
 Proof.
-  unfold agree64; intros. unfold signed, Int64.signed, half_modulus, Int64.half_modulus. 
-  rewrite modulus_eq64. rewrite H. auto. 
+  unfold agree64; intros. unfold signed, Int64.signed, half_modulus, Int64.half_modulus.
+  rewrite modulus_eq64. rewrite H. auto.
 Qed.
 
 Lemma agree64_of_int:
@@ -5051,13 +5142,13 @@ Qed.
 Lemma agree64_of_int_eq:
   forall a b, agree64 a b -> of_int64 b = a.
 Proof.
-  unfold agree64, of_int64; intros. rewrite <- H. apply repr_unsigned. 
+  unfold agree64, of_int64; intros. rewrite <- H. apply repr_unsigned.
 Qed.
 
 Lemma agree64_to_int:
   forall a, agree64 a (to_int64 a).
 Proof.
-  unfold agree64, to_int64; intros. rewrite <- (agree64_repr (unsigned a)). 
+  unfold agree64, to_int64; intros. rewrite <- (agree64_repr (unsigned a)).
   rewrite repr_unsigned; auto.
 Qed.
 
@@ -5077,21 +5168,21 @@ Lemma agree64_add:
   forall a1 b1 a2 b2,
   agree64 a1 b1 -> agree64 a2 b2 -> agree64 (Ptrofs.add a1 a2) (Int64.add b1 b2).
 Proof.
-  unfold agree64, Ptrofs.add, Int.add; intros. rewrite H, H0. apply agree64_repr. 
+  unfold agree64, Ptrofs.add, Int.add; intros. rewrite H, H0. apply agree64_repr.
 Qed.
 
 Lemma agree64_sub:
   forall a1 b1 a2 b2,
   agree64 a1 b1 -> agree64 a2 b2 -> agree64 (Ptrofs.sub a1 a2) (Int64.sub b1 b2).
 Proof.
-  unfold agree64, Ptrofs.sub, Int.sub; intros. rewrite H, H0. apply agree64_repr. 
+  unfold agree64, Ptrofs.sub, Int.sub; intros. rewrite H, H0. apply agree64_repr.
 Qed.
 
 Lemma agree64_mul:
   forall a1 b1 a2 b2,
   agree64 a1 b1 -> agree64 a2 b2 -> agree64 (Ptrofs.mul a1 a2) (Int64.mul b1 b2).
 Proof.
-  unfold agree64, Ptrofs.mul, Int.mul; intros. rewrite H, H0. apply agree64_repr. 
+  unfold agree64, Ptrofs.mul, Int.mul; intros. rewrite H, H0. apply agree64_repr.
 Qed.
 
 Lemma agree64_divs:
@@ -5107,16 +5198,16 @@ Lemma of_int64_to_int64:
 Proof.
   intros; unfold of_int64, to_int64. apply eqm_repr_eq. rewrite <- eqm64.
   apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr.
-Qed.   
+Qed.
 
 End AGREE64.
 
-Hint Resolve 
+Hint Resolve
   agree32_repr agree32_of_int agree32_of_ints agree32_of_int_eq agree32_of_ints_eq
   agree32_to_int agree32_to_int_eq agree32_neg agree32_add agree32_sub agree32_mul agree32_divs
   agree64_repr agree64_of_int agree64_of_int_eq
   agree64_to_int agree64_to_int_eq agree64_neg agree64_add agree64_sub agree64_mul agree64_divs : ptrofs.
-              
+
 End Ptrofs.
 
 Strategy 0 [Wordsize_Ptrofs.wordsize].
diff --git a/lib/Maps.v b/lib/Maps.v
index e2d4e965..cfb866ba 100644
--- a/lib/Maps.v
+++ b/lib/Maps.v
@@ -819,8 +819,8 @@ Module PTree <: TREE.
       (fun i_x i_y => fst i_x = fst i_y /\ R (snd i_x) (snd i_y))
       (elements m) (elements n).
   Proof.
-    intros. apply elements_canonical_order'. 
-    intros. destruct (get i m) as [x|] eqn:GM. 
+    intros. apply elements_canonical_order'.
+    intros. destruct (get i m) as [x|] eqn:GM.
     exploit H; eauto. intros (y & P & Q). rewrite P; constructor; auto.
     destruct (get i n) as [y|] eqn:GN.
     exploit H0; eauto. intros (x & P & Q). congruence.
@@ -1265,7 +1265,7 @@ Module ITree(X: INDEXED_TYPE).
      | _, _ => False
     end.
   Proof.
-    unfold beq, get. intros. rewrite PTree.beq_correct in H. apply H. 
+    unfold beq, get. intros. rewrite PTree.beq_correct in H. apply H.
   Qed.
 
   Definition combine: forall (A B C: Type), (option A -> option B -> option C) -> t A -> t B -> t C := PTree.combine.
@@ -1636,8 +1636,8 @@ Proof.
   { induction l as [ | [k1 v1] l]; simpl; intros.
   - tauto.
   - apply IHl in H. unfold f in H. simpl in H. rewrite T.gsspec in H.
-    destruct H; auto. 
-    destruct (T.elt_eq k k1). inv H. auto. auto. 
+    destruct H; auto.
+    destruct (T.elt_eq k k1). inv H. auto. auto.
   }
   intros. apply REC in H. rewrite T.gempty in H. intuition congruence.
 Qed.
@@ -1650,7 +1650,7 @@ Proof.
             exists v, T.get k (fold_left f l m) = Some v).
   { induction l as [ | [k1 v1] l]; simpl; intros.
   - tauto.
-  - apply IHl. unfold f; rewrite T.gsspec. simpl. destruct (T.elt_eq k k1). 
+  - apply IHl. unfold f; rewrite T.gsspec. simpl. destruct (T.elt_eq k k1).
     right; econstructor; eauto.
     intuition congruence.
   }
@@ -1669,7 +1669,7 @@ Lemma of_list_unique:
   forall k v l1 l2,
   ~In k (map fst l2) -> T.get k (of_list (l1 ++ (k, v) :: l2)) = Some v.
 Proof.
-  intros. unfold of_list. rewrite fold_left_app. simpl. 
+  intros. unfold of_list. rewrite fold_left_app. simpl.
   rewrite of_list_unchanged by auto. unfold f; apply T.gss.
 Qed.
 
@@ -1683,8 +1683,8 @@ Proof.
   { induction l as [ | [k1 v1] l]; simpl; intros.
     contradiction.
     inv H. destruct H0.
-    inv H. rewrite of_list_unchanged by auto. apply T.gss. 
-    apply IHl; auto. 
+    inv H. rewrite of_list_unchanged by auto. apply T.gss.
+    apply IHl; auto.
   }
   intros; apply REC; auto.
 Qed.
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index 10dc5534..5d11aad1 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -27,11 +27,14 @@ Definition big_endian := true.
 Definition align_int64 := 8%Z.
 Definition align_float64 := 8%Z.
 
-Definition splitlong := true.
+(** Can we use the 64-bit extensions to the PowerPC architecture? *)
+Parameter ppc64 : bool.
+
+Definition splitlong := negb ppc64.
 
 Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
 Proof.
-  unfold splitlong, ptr64; congruence.
+  reflexivity.
 Qed.
 
 Program Definition default_pl_64 : bool * nan_pl 53 :=
@@ -51,7 +54,4 @@ Definition float_of_single_preserves_sNaN := true.
 Global Opaque ptr64 big_endian splitlong
               default_pl_64 choose_binop_pl_64
               default_pl_32 choose_binop_pl_32
-              float_of_single_preserves_sNaN.
-
-(** Can we use the 64-bit extensions to the PowerPC architecture? *)
-Parameter ppc64: bool.
+              float_of_single_preserves_sNaN.
+\ No newline at end of file
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index cc554eb1..746a610b 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -136,17 +136,24 @@ Definition label := positive.
 
 Inductive instruction : Type :=
   | Padd: ireg -> ireg -> ireg -> instruction                 (**r integer addition *)
+  | Padd64: ireg -> ireg -> ireg -> instruction               (**r integer addition (PPC64) *)
   | Paddc: ireg -> ireg -> ireg -> instruction                (**r integer addition and set carry *)
   | Padde: ireg -> ireg -> ireg -> instruction                (**r integer addition with carry *)
   | Paddi: ireg -> ireg -> constant -> instruction            (**r add immediate *)
+  | Paddi64: ireg -> ireg -> int64 -> instruction             (**r add immediate (PPC64) *)
   | Paddic: ireg -> ireg -> constant -> instruction           (**r add immediate and set carry *)
   | Paddis: ireg -> ireg -> constant -> instruction           (**r add immediate high *)
+  | Paddis64: ireg -> ireg -> int64 -> instruction            (**r add immediate high (PPC64) *)
   | Paddze: ireg -> ireg -> instruction                       (**r add carry *)
-  | Pallocframe: Z -> ptrofs -> ptrofs -> instruction               (**r allocate new stack frame (pseudo) *)
+  | Paddze64: ireg -> ireg -> instruction                     (**r add carry (PPC64) *)
+  | Pallocframe: Z -> ptrofs -> ptrofs -> instruction         (**r allocate new stack frame (pseudo) *)
   | Pand_: ireg -> ireg -> ireg -> instruction                (**r bitwise and *)
+  | Pand_64: ireg -> ireg -> ireg -> instruction              (**r bitwise and (PPC64) *)
   | Pandc: ireg -> ireg -> ireg -> instruction                (**r bitwise and-complement *)
   | Pandi_: ireg -> ireg -> constant -> instruction           (**r and immediate and set conditions *)
+  | Pandi_64: ireg -> ireg -> int64 -> instruction            (**r and immediate and set conditions (PPC64) *)
   | Pandis_: ireg -> ireg -> constant -> instruction          (**r and immediate high and set conditions *)
+  | Pandis_64: ireg -> ireg -> int64 -> instruction           (**r and immediate high and set conditions (PPC64) *)
   | Pb: label -> instruction                                  (**r unconditional branch *)
   | Pbctr: signature -> instruction                           (**r branch to contents of register CTR *)
   | Pbctrl: signature -> instruction                          (**r branch to contents of CTR and link *)
@@ -158,10 +165,15 @@ Inductive instruction : Type :=
   | Pbt: crbit -> label -> instruction                        (**r branch if true *)
   | Pbtbl: ireg -> list label -> instruction                  (**r N-way branch through a jump table (pseudo) *)
   | Pcmpb: ireg -> ireg -> ireg -> instruction                (**r compare bytes *)
+  | Pcmpld: ireg -> ireg -> instruction                       (**r unsigned integer comparison (PPC64) *)
+  | Pcmpldi: ireg -> int64 -> instruction                     (**r same, with immediate argument (PPC64) *)
   | Pcmplw: ireg -> ireg -> instruction                       (**r unsigned integer comparison *)
   | Pcmplwi: ireg -> constant -> instruction                  (**r same, with immediate argument *)
+  | Pcmpd: ireg -> ireg -> instruction                        (**r signed integer comparison (PPC64) *)
+  | Pcmpdi: ireg -> int64 -> instruction                      (**r same, with immediate argument (PPC64) *)
   | Pcmpw: ireg -> ireg -> instruction                        (**r signed integer comparison *)
   | Pcmpwi: ireg -> constant -> instruction                   (**r same, with immediate argument *)
+  | Pcntlzd: ireg -> ireg -> instruction                      (**r count leading zeros (PPC64) *)
   | Pcntlzw: ireg -> ireg -> instruction                      (**r count leading zeros *)
   | Pcreqv: crbit -> crbit -> crbit -> instruction            (**r not-xor between condition bits *)
   | Pcror: crbit -> crbit -> crbit -> instruction             (**r or between condition bits *)
@@ -173,23 +185,28 @@ Inductive instruction : Type :=
   | Pdcbtls: int -> ireg -> ireg -> instruction               (**r data cache block touch and lock *)
   | Pdcbz: ireg -> ireg -> instruction                        (**r data cache block zero *)
   | Pdivw: ireg -> ireg -> ireg -> instruction                (**r signed division *)
+  | Pdivd: ireg -> ireg -> ireg -> instruction                (**r signed division (PPC64) *)
   | Pdivwu: ireg -> ireg -> ireg -> instruction               (**r unsigned division *)
+  | Pdivdu: ireg -> ireg -> ireg -> instruction               (**r unsigned division (PPC64) *)
   | Peieio: instruction                                       (**r EIEIO barrier *)
   | Peqv: ireg -> ireg -> ireg -> instruction                 (**r bitwise not-xor *)
   | Pextsb: ireg -> ireg -> instruction                       (**r 8-bit sign extension *)
   | Pextsh: ireg -> ireg -> instruction                       (**r 16-bit sign extension *)
   | Pextsw: ireg -> ireg -> instruction                       (**r 64-bit sign extension (PPC64) *)
-  | Pfreeframe: Z -> ptrofs -> instruction                       (**r deallocate stack frame and restore previous frame (pseudo) *)
+  | Pextzw: ireg -> ireg -> instruction                       (**r 64-bit zero extension (pseudo, PPC64) *)
+  | Pfreeframe: Z -> ptrofs -> instruction                    (**r deallocate stack frame and restore previous frame (pseudo) *)
   | Pfabs: freg -> freg -> instruction                        (**r float absolute value *)
   | Pfabss: freg -> freg -> instruction                       (**r float absolute value *)
   | Pfadd: freg -> freg -> freg -> instruction                (**r float addition *)
   | Pfadds: freg -> freg -> freg -> instruction               (**r float addition *)
   | Pfcmpu: freg -> freg -> instruction                       (**r float comparison *)
   | Pfcfi: freg -> ireg -> instruction                        (**r signed-int-to-float conversion (pseudo, PPC64) *)
+  | Pfcfl: freg -> ireg -> instruction                        (**r signed-long-to-float conversion (pseudo, PPC64) *)
   | Pfcfiu: freg -> ireg -> instruction                       (**r unsigned-int-to-float conversion (pseudo, PPC64) *)
   | Pfcfid: freg -> freg -> instruction                       (**r signed-long-to-float conversion (PPC64) *)
   | Pfcti: ireg -> freg -> instruction                        (**r float-to-signed-int conversion, round towards 0 (pseudo) *)
   | Pfctiu: ireg -> freg -> instruction                       (**r float-to-unsigned-int conversion, round towards 0 (pseudo, PPC64) *)
+  | Pfctid: ireg -> freg -> instruction                       (**r float-to-signed-int conversion, round towards 0 (pseudo, PPC64) *)
   | Pfctidz: freg -> freg -> instruction                      (**r float-to-signed-long conversion, round towards 0 (PPC64) *)
   | Pfctiw: freg -> freg -> instruction                       (**r float-to-signed-int conversion, round by default *)
   | Pfctiwz: freg -> freg -> instruction                      (**r float-to-signed-int conversion, round towards 0 *)
@@ -219,6 +236,10 @@ Inductive instruction : Type :=
   | Picbtls: int -> ireg -> ireg -> instruction               (**r instruction cache block touch and lock set *)
   | Plbz: ireg -> constant -> ireg -> instruction             (**r load 8-bit unsigned int *)
   | Plbzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Pld: ireg -> constant -> ireg -> instruction              (**r load 64-bit int (PPC64) *)
+  | Pldx: ireg -> ireg -> ireg -> instruction                 (**r same, with 2 index regs *)
+  | Pld_a: ireg -> constant -> ireg -> instruction            (**r load 64-bit quantity to int reg (PPC64) *)
+  | Pldx_a: ireg -> ireg -> ireg -> instruction               (**r same, with 2 index regs *)
   | Plfd: freg -> constant -> ireg -> instruction             (**r load 64-bit float *)
   | Plfdx: freg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plfd_a: freg -> constant -> ireg -> instruction           (**r load 64-bit quantity to float reg *)
@@ -230,6 +251,10 @@ Inductive instruction : Type :=
   | Plhbrx: ireg -> ireg -> ireg -> instruction               (**r load 16-bit int and reverse endianness *)
   | Plhz: ireg -> constant -> ireg -> instruction             (**r load 16-bit unsigned int *)
   | Plhzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Pldi: ireg -> int64 -> instruction                        (**r load 64-bit int constant (PPC64) *)
+  | Plmake: ireg -> ireg -> ireg -> instruction               (**r build an int64 from 2 ints (pseudo, PPC64) *)
+  | Pllo: ireg -> instruction                                 (**r extract low 32 bits of an int64 (pseudo, PPC64) *)
+  | Plhi: ireg -> ireg -> instruction                         (**r extract high 32 bits of an int64 (pseudo, PPC64) *)
   | Plfi: freg -> float -> instruction                        (**r load float constant *)
   | Plfis: freg -> float32 -> instruction                     (**r load float constant *)
   | Plwz: ireg -> constant -> ireg -> instruction             (**r load 32-bit int *)
@@ -250,25 +275,41 @@ Inductive instruction : Type :=
   | Pmtspr: int -> ireg -> instruction                        (**r move to special register *)
   | Pmulli: ireg -> ireg -> constant -> instruction           (**r integer multiply immediate *)
   | Pmullw: ireg -> ireg -> ireg -> instruction               (**r integer multiply *)
+  | Pmulld: ireg -> ireg -> ireg -> instruction               (**r integer multiply (PPC64) *)
   | Pmulhw: ireg -> ireg -> ireg -> instruction               (**r multiply high signed *)
   | Pmulhwu: ireg -> ireg -> ireg -> instruction              (**r multiply high signed *)
+  | Pmulhd: ireg -> ireg -> ireg -> instruction               (**r multiply high double word signed (PPC64) *)
+  | Pmulhdu: ireg -> ireg -> ireg -> instruction              (**r multiply high double word unsigned (PPC64) *)
   | Pnand: ireg -> ireg -> ireg -> instruction                (**r bitwise not-and *)
   | Pnor: ireg -> ireg -> ireg -> instruction                 (**r bitwise not-or *)
+  | Pnor64: ireg -> ireg -> ireg -> instruction               (**r bitwise not-or (PPC64) *)
   | Por: ireg -> ireg -> ireg -> instruction                  (**r bitwise or *)
+  | Por64: ireg -> ireg -> ireg -> instruction                (**r bitwise or (PPC64) *)
   | Porc: ireg -> ireg -> ireg -> instruction                 (**r bitwise or-complement *)
   | Pori: ireg -> ireg -> constant -> instruction             (**r or with immediate *)
+  | Pori64: ireg -> ireg -> int64 -> instruction              (**r or with immediate (PPC64) *)
   | Poris: ireg -> ireg -> constant -> instruction            (**r or with immediate high *)
+  | Poris64: ireg -> ireg -> int64 -> instruction             (**r or with immediate high (PPC64) *)
   | Prldicl: ireg -> ireg -> int -> int -> instruction        (**r rotate and mask left (PPC64) *)
-  | Prldicr: ireg -> ireg -> int -> int -> instruction        (**r rotate and mask right (PPC64) *)
+  | Prldinm: ireg -> ireg -> int -> int64 -> instruction      (**r rotate and mask (PPC64) *)
+  | Prldimi: ireg -> ireg -> int -> int64 -> instruction      (**r rotate and insert (PPC64) *)
   | Prlwinm: ireg -> ireg -> int -> int -> instruction        (**r rotate and mask *)
   | Prlwimi: ireg -> ireg -> int -> int -> instruction        (**r rotate and insert *)
+  | Psld: ireg -> ireg -> ireg -> instruction                 (**r shift left 64 bits (PPC64) *)
   | Pslw: ireg -> ireg -> ireg -> instruction                 (**r shift left *)
+  | Psrad: ireg -> ireg -> ireg -> instruction                (**r shift right signed 64 bits (PPC64) *)
+  | Psradi: ireg -> ireg -> int -> instruction                (**r shift right signed immediate 64 bits (PPC64) *)
   | Psraw: ireg -> ireg -> ireg -> instruction                (**r shift right signed *)
   | Psrawi: ireg -> ireg -> int -> instruction                (**r shift right signed immediate *)
+  | Psrd: ireg -> ireg -> ireg -> instruction                 (**r shift right unsigned 64 bits (PPC64) *)
   | Psrw: ireg -> ireg -> ireg -> instruction                 (**r shift right unsigned *)
   | Pstb: ireg -> constant -> ireg -> instruction             (**r store 8-bit int *)
   | Pstbx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Pstd: ireg -> constant -> ireg -> instruction             (**r store 64-bit integer (PPC64) *)
+  | Pstdx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs (PPC64) *)
   | Pstdu: ireg -> constant -> ireg -> instruction            (**r store 64-bit integer with update (PPC64) *)
+  | Pstd_a: ireg -> constant -> ireg -> instruction           (**r store 64-bit quantity from int reg (PPC64) *)
+  | Pstdx_a: ireg -> ireg -> ireg -> instruction              (**r same, with 2 index regs (PPC64) *)
   | Pstfd: freg -> constant -> ireg -> instruction            (**r store 64-bit float *)
   | Pstfdu: freg -> constant -> ireg -> instruction           (**r store 64-bit float with update *)
   | Pstfdx: freg -> ireg -> ireg -> instruction               (**r same, with 2 index regs *)
@@ -288,15 +329,20 @@ Inductive instruction : Type :=
   | Pstwbrx: ireg -> ireg -> ireg -> instruction              (**r store 32-bit int with reverse endianness *)
   | Pstwcx_: ireg -> ireg -> ireg -> instruction              (**r store conditional *)
   | Psubfc: ireg -> ireg -> ireg -> instruction               (**r reversed integer subtraction *)
+  | Psubfc64: ireg -> ireg -> ireg -> instruction             (**r reversed integer subtraction (PPC64) *)
   | Psubfe: ireg -> ireg -> ireg -> instruction               (**r reversed integer subtraction with carry *)
   | Psubfze: ireg -> ireg -> instruction                      (**r integer opposite with carry *)
   | Psubfic: ireg -> ireg -> constant -> instruction          (**r integer subtraction from immediate *)
+  | Psubfic64: ireg -> ireg -> int64 -> instruction           (**r integer subtraction from immediate (PPC64) *)
   | Psync: instruction                                        (**r SYNC barrier *)
   | Plwsync: instruction                                      (**r LWSYNC barrier *)
   | Ptrap: instruction                                        (**r unconditional trap *)
   | Pxor: ireg -> ireg -> ireg -> instruction                 (**r bitwise xor *)
+  | Pxor64: ireg -> ireg -> ireg -> instruction               (**r bitwise xor (PPC64) *)
   | Pxori: ireg -> ireg -> constant -> instruction            (**r bitwise xor with immediate *)
+  | Pxori64: ireg -> ireg -> int64 -> instruction             (**r bitwise xor with immediate (PPC64) *)
   | Pxoris: ireg -> ireg -> constant -> instruction           (**r bitwise xor with immediate high *)
+  | Pxoris64: ireg -> ireg -> int64 -> instruction            (**r bitwise xor with immediate high (PPC64) *)
   | Plabel: label -> instruction                              (**r define a code label *)
   | Pbuiltin: external_function -> list (builtin_arg preg) -> builtin_res preg -> instruction (**r built-in function (pseudo) *)
   | Pcfi_adjust: int -> instruction                           (**r .cfi_adjust debug directive *)
@@ -452,7 +498,10 @@ Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
   when that register is used in argument position. *)
 
 Definition gpr_or_zero (rs: regset) (r: ireg) :=
-  if ireg_eq r GPR0 then Vzero else rs#r.
+  if ireg_eq r GPR0 then Vint Int.zero else rs#r.
+
+Definition gpr_or_zero_l (rs: regset) (r: ireg) :=
+  if ireg_eq r GPR0 then Vlong Int64.zero else rs#r.
 
 Variable ge: genv.
 
@@ -597,6 +646,18 @@ Definition compare_uint (rs: regset) (m: mem) (v1 v2: val) :=
     #CR0_2 <- (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
     #CR0_3 <- Vundef.
 
+Definition compare_slong (rs: regset) (v1 v2: val) :=
+  rs#CR0_0 <- (Val.of_optbool (Val.cmpl_bool Clt v1 v2))
+    #CR0_1 <- (Val.of_optbool (Val.cmpl_bool Cgt v1 v2))
+    #CR0_2 <- (Val.of_optbool (Val.cmpl_bool Ceq v1 v2))
+    #CR0_3 <- Vundef.
+
+Definition compare_ulong (rs: regset) (m: mem) (v1 v2: val) :=
+  rs#CR0_0 <- (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Clt v1 v2))
+    #CR0_1 <- (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Cgt v1 v2))
+    #CR0_2 <- (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Ceq v1 v2))
+    #CR0_3 <- Vundef.
+
 Definition compare_float (rs: regset) (v1 v2: val) :=
   rs#CR0_0 <- (Val.cmpf Clt v1 v2)
     #CR0_1 <- (Val.cmpf Cgt v1 v2)
@@ -619,6 +680,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   match i with
   | Padd rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2))) m
+  | Padd64 rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.addl rs#r1 rs#r2))) m
   | Paddc rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2)
                         #CARRY <- (Val.add_carry rs#r1 rs#r2 Vzero))) m
@@ -627,14 +690,21 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
                         #CARRY <- (Val.add_carry rs#r1 rs#r2 rs#CARRY))) m
   | Paddi rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.add (gpr_or_zero rs r1) (const_low cst)))) m
+  | Paddi64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.addl (gpr_or_zero_l rs r1) (Vlong cst)))) m
   | Paddic rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.add (gpr_or_zero rs r1) (const_low cst))
                        #CARRY <- (Val.add_carry (gpr_or_zero rs r1) (const_low cst) Vzero))) m
   | Paddis rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.add (gpr_or_zero rs r1) (const_high cst)))) m
+  | Paddis64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.addl (gpr_or_zero_l rs r1) (Vlong (Int64.shl cst (Int64.repr 16)))))) m
   | Paddze rd r1 =>
       Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#CARRY)
                        #CARRY <- (Val.add_carry rs#r1 Vzero rs#CARRY))) m
+  | Paddze64 rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.addl rs#r1 rs#CARRY)
+                       #CARRY <- (Val.addl_carry rs#r1 (Vlong Int64.zero) rs#CARRY))) m
   | Pallocframe sz ofs _ =>
       let (m1, stk) := Mem.alloc m 0 sz in
       let sp := Vptr stk Ptrofs.zero in
@@ -645,14 +715,23 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pand_ rd r1 r2 =>
       let v := Val.and rs#r1 rs#r2 in
       Next (nextinstr (compare_sint (rs#rd <- v) v Vzero)) m
+  | Pand_64 rd r1 r2 =>
+      let v := Val.andl rs#r1 rs#r2 in
+      Next (nextinstr (compare_slong (rs#rd <- v) v (Vlong Int64.zero))) m
   | Pandc rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.and rs#r1 (Val.notint rs#r2)))) m
   | Pandi_ rd r1 cst =>
       let v := Val.and rs#r1 (const_low cst) in
       Next (nextinstr (compare_sint (rs#rd <- v) v Vzero)) m
+  | Pandi_64 rd r1 cst =>
+      let v := Val.andl rs#r1 (Vlong cst) in
+      Next (nextinstr (compare_slong (rs#rd <- v) v (Vlong Int64.zero))) m
   | Pandis_ rd r1 cst =>
       let v := Val.and rs#r1 (const_high cst) in
       Next (nextinstr (compare_sint (rs#rd <- v) v Vzero)) m
+  | Pandis_64 rd r1 cst =>
+      let v := Val.andl rs#r1 (Vlong (Int64.shl cst (Int64.repr 16))) in
+      Next (nextinstr (compare_slong (rs#rd <- v) v (Vlong Int64.zero))) m
   | Pb lbl =>
       goto_label f lbl rs m
   | Pbctr sg =>
@@ -684,10 +763,18 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
           end
       | _ => Stuck
       end
+  | Pcmpld r1 r2 =>
+      Next (nextinstr (compare_ulong rs m rs#r1 rs#r2)) m
+  | Pcmpldi r1 cst =>
+      Next (nextinstr (compare_ulong rs m rs#r1 (Vlong cst))) m
   | Pcmplw r1 r2 =>
       Next (nextinstr (compare_uint rs m rs#r1 rs#r2)) m
   | Pcmplwi r1 cst =>
       Next (nextinstr (compare_uint rs m rs#r1 (const_low cst))) m
+  | Pcmpd r1 r2 =>
+      Next (nextinstr (compare_slong rs rs#r1 rs#r2)) m
+  | Pcmpdi r1 cst =>
+      Next (nextinstr (compare_slong rs rs#r1 (Vlong cst))) m
   | Pcmpw r1 r2 =>
       Next (nextinstr (compare_sint rs rs#r1 rs#r2)) m
   | Pcmpwi r1 cst =>
@@ -696,14 +783,22 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#(reg_of_crbit bd) <- (Val.or rs#(reg_of_crbit b1) rs#(reg_of_crbit b2)))) m
   | Pdivw rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.divs rs#r1 rs#r2)))) m
+  | Pdivd rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.divls rs#r1 rs#r2)))) m
   | Pdivwu rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.divu rs#r1 rs#r2)))) m
+  | Pdivdu rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.divlu rs#r1 rs#r2)))) m
   | Peqv rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.notint (Val.xor rs#r1 rs#r2)))) m
   | Pextsb rd r1 =>
       Next (nextinstr (rs#rd <- (Val.sign_ext 8 rs#r1))) m
   | Pextsh rd r1 =>
       Next (nextinstr (rs#rd <- (Val.sign_ext 16 rs#r1))) m
+  | Pextsw rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.longofint rs#r1))) m
+  | Pextzw rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.longofintu rs#r1))) m
   | Pfreeframe sz ofs =>
       match Mem.loadv Mint32 m (Val.offset_ptr rs#GPR1 ofs) with
       | None => Stuck
@@ -729,12 +824,16 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (compare_float rs rs#r1 rs#r2)) m
   | Pfcfi rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofint rs#r1)))) m
+  | Pfcfl rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatoflong rs#r1)))) m
   | Pfcfiu rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofintu rs#r1)))) m
   | Pfcti rd r1 =>
       Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
   | Pfctiu rd r1 =>
       Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.intuoffloat rs#r1)))) m
+  | Pfctid rd r1 =>
+      Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.longoffloat rs#r1)))) m
   | Pfdiv rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.divf rs#r1 rs#r2))) m
   | Pfdivs rd r1 r2 =>
@@ -763,12 +862,20 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       load1 Mint8unsigned rd cst r1 rs m
   | Plbzx rd r1 r2 =>
       load2 Mint8unsigned rd r1 r2 rs m
+  | Pld rd cst r1 =>
+      load1 Mint64 rd cst r1 rs m
+  | Pldx rd r1 r2 =>
+      load2 Mint64 rd r1 r2 rs m
+  | Pld_a rd cst r1 =>
+      load1 Many64 rd cst r1 rs m
+  | Pldx_a rd r1 r2 =>
+      load2 Many64 rd r1 r2 rs m
   | Plfd rd cst r1 =>
       load1 Mfloat64 rd cst r1 rs m
-  | Plfdx rd r1 r2 =>
-      load2 Mfloat64 rd r1 r2 rs m
   | Plfd_a rd cst r1 =>
       load1 Many64 rd cst r1 rs m
+  | Plfdx rd r1 r2 =>
+      load2 Mfloat64 rd r1 r2 rs m
   | Plfdx_a rd r1 r2 =>
       load2 Many64 rd r1 r2 rs m
   | Plfs rd cst r1 =>
@@ -783,6 +890,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       load1 Mint16unsigned rd cst r1 rs m
   | Plhzx rd r1 r2 =>
       load2 Mint16unsigned rd r1 r2 rs m
+  | Pldi rd i =>
+       Next (nextinstr (rs #GPR12 <- Vundef #rd <- (Vlong i))) m
+  | Plmake rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.longofwords rs#r1 rs#r2))) m
+  | Pllo rd =>
+      Next (nextinstr (rs#rd <- (Val.loword rs#rd))) m
+  | Plhi rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.hiword rs#r1))) m
   | Plfi rd f =>
       Next (nextinstr (rs #GPR12 <- Vundef #rd <- (Vfloat f))) m
   | Plfis rd f =>
@@ -809,39 +924,71 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#rd <- (Val.mul rs#r1 (const_low cst)))) m
   | Pmullw rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.mul rs#r1 rs#r2))) m
+  | Pmulld rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mull rs#r1 rs#r2))) m
   | Pmulhw rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.mulhs rs#r1 rs#r2))) m
   | Pmulhwu rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.mulhu rs#r1 rs#r2))) m
+  | Pmulhd rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mullhs rs#r1 rs#r2))) m
+  | Pmulhdu rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mullhu rs#r1 rs#r2))) m
   | Pnand rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.notint (Val.and rs#r1 rs#r2)))) m
   | Pnor rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.notint (Val.or rs#r1 rs#r2)))) m
+  | Pnor64 rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.notl (Val.orl rs#r1 rs#r2)))) m
   | Por rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.or rs#r1 rs#r2))) m
+  | Por64 rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.orl rs#r1 rs#r2))) m
   | Porc rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.or rs#r1 (Val.notint rs#r2)))) m
   | Pori rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.or rs#r1 (const_low cst)))) m
+  | Pori64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.orl rs#r1 (Vlong cst)))) m
   | Poris rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.or rs#r1 (const_high cst)))) m
+  | Poris64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.orl rs#r1 (Vlong (Int64.shl cst (Int64.repr 16)))))) m
+  | Prldinm rd r1 amount mask =>
+      Next (nextinstr (rs#rd <- (Val.rolml rs#r1 amount mask))) m
   | Prlwinm rd r1 amount mask =>
       Next (nextinstr (rs#rd <- (Val.rolm rs#r1 amount mask))) m
   | Prlwimi rd r1 amount mask =>
       Next (nextinstr (rs#rd <- (Val.or (Val.and rs#rd (Vint (Int.not mask)))
                                      (Val.rolm rs#r1 amount mask)))) m
+  | Psld rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shll rs#r1 rs#r2))) m
   | Pslw rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.shl rs#r1 rs#r2))) m
+  | Psrad rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shrl rs#r1 rs#r2) #CARRY <- (Val.shrl_carry rs#r1 rs#r2))) m
+  | Psradi rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.shrl rs#r1 (Vint n)) #CARRY <- (Val.shrl_carry rs#r1 (Vint n)))) m
   | Psraw rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.shr rs#r1 rs#r2) #CARRY <- (Val.shr_carry rs#r1 rs#r2))) m
   | Psrawi rd r1 n =>
       Next (nextinstr (rs#rd <- (Val.shr rs#r1 (Vint n)) #CARRY <- (Val.shr_carry rs#r1 (Vint n)))) m
+  | Psrd rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shrlu rs#r1 rs#r2))) m
   | Psrw rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.shru rs#r1 rs#r2))) m
   | Pstb rd cst r1 =>
       store1 Mint8unsigned rd cst r1 rs m
   | Pstbx rd r1 r2 =>
       store2 Mint8unsigned rd r1 r2 rs m
+  | Pstd rd cst r1 =>
+      store1 Mint64 rd cst r1 rs m
+  | Pstdx rd r1 r2 =>
+      store2 Mint64 rd r1 r2 rs m
+  | Pstd_a rd cst r1 =>
+      store1 Many64 rd cst r1 rs m
+  | Pstdx_a rd r1 r2 =>
+      store2 Many64 rd r1 r2 rs m
   | Pstfd rd cst r1 =>
       store1 Mfloat64 rd cst r1 rs m
   | Pstfdx rd r1 r2 =>
@@ -869,18 +1016,28 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Psubfc rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.sub rs#r2 rs#r1)
                        #CARRY <- (Val.add_carry rs#r2 (Val.notint rs#r1) Vone))) m
+  | Psubfc64 rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.subl rs#r2 rs#r1) #CARRY <- Vundef)) m
   | Psubfe rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.add (Val.add rs#r2 (Val.notint rs#r1)) rs#CARRY)
                        #CARRY <- (Val.add_carry rs#r2 (Val.notint rs#r1) rs#CARRY))) m
   | Psubfic rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.sub (const_low cst) rs#r1)
                        #CARRY <- (Val.add_carry (const_low cst) (Val.notint rs#r1) Vone))) m
+  | Psubfic64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.subl (Vlong cst) rs#r1) #CARRY <- Vundef)) m
   | Pxor rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.xor rs#r1 rs#r2))) m
+  | Pxor64 rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.xorl rs#r1 rs#r2))) m
   | Pxori rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.xor rs#r1 (const_low cst)))) m
+  | Pxori64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.xorl rs#r1 (Vlong cst)))) m
   | Pxoris rd r1 cst =>
       Next (nextinstr (rs#rd <- (Val.xor rs#r1 (const_high cst)))) m
+  | Pxoris64 rd r1 cst =>
+      Next (nextinstr (rs#rd <- (Val.xorl rs#r1 (Vlong (Int64.shl cst (Int64.repr 16)))))) m
   | Plabel lbl =>
       Next (nextinstr rs) m
   | Pcfi_rel_offset ofs =>
@@ -891,6 +1048,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       directly by [Asmgen], so we do not model them. *)
   | Pbdnz _
   | Pcmpb _ _ _
+  | Pcntlzd _ _
   | Pcntlzw _ _
   | Pcreqv _ _ _
   | Pcrxor _ _ _
@@ -900,7 +1058,6 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pdcbtst _ _ _
   | Pdcbtls _ _ _
   | Pdcbz _ _
-  | Pextsw _ _
   | Peieio
   | Pfcfid _ _
   | Pfctidz _ _
@@ -928,7 +1085,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pmfspr _ _
   | Pmtspr _ _
   | Prldicl _ _ _ _
-  | Prldicr _ _ _ _
+  | Prldimi _ _ _ _
   | Pstdu _ _ _
   | Pstwbrx _ _ _
   | Pstwcx_ _ _ _
@@ -1067,7 +1224,7 @@ Proof.
   { intros. inv H; inv H0; congruence. }
   assert (B: forall p v1 v2,
              extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2).
-  { intros. inv H; inv H0. 
+  { intros. inv H; inv H0.
     eapply A; eauto.
     f_equal; eapply A; eauto. }
   assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 ->
@@ -1107,7 +1264,7 @@ Ltac Equalities :=
 (* initial states *)
   inv H; inv H0. f_equal. congruence.
 (* final no step *)
-  inv H. red; intros; red; intros. inv H; rewrite H0 in *; discriminate. 
+  inv H. red; intros; red; intros. inv H; rewrite H0 in *; discriminate.
 (* final states *)
   inv H; inv H0. congruence.
 Qed.
diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml
index 1c8b9934..343d9b65 100644
--- a/powerpc/AsmToJSON.ml
+++ b/powerpc/AsmToJSON.ml
@@ -78,6 +78,7 @@ type instruction_arg =
   | Ireg of ireg
   | Freg of freg
   | Constant of constant
+  | Long of Integers.Int.int
   | Crbit of crbit
   | ALabel of positive
   | Float32 of Floats.float32
@@ -88,6 +89,7 @@ let p_arg oc = function
   | Ireg ir -> p_ireg oc ir
   | Freg fr -> p_freg oc fr
   | Constant c -> p_constant oc c
+  | Long i ->  p_jsingle_object oc "Integer" p_int64 i
   | Crbit cr -> p_crbit oc cr
   | ALabel lbl -> p_label oc lbl
   | Float32 f -> p_float32_constant oc f
@@ -101,18 +103,25 @@ let p_instruction oc ic =
   let sep oc = if !first then first := false else output_string oc ", " in
   let instruction n args = fprintf oc "\n%t{%a,%a}" sep inst_name n p_args args in
   let instruction = function
-  | Padd (ir1,ir2,ir3) -> instruction "Padd" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Padd (ir1,ir2,ir3)
+  | Padd64 (ir1,ir2,ir3) -> instruction "Padd" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Paddc (ir1,ir2,ir3) -> instruction "Paddc" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Padde (ir1,ir2,ir3) -> instruction "Padde" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Paddi (ir1,ir2,c) -> instruction "Paddi" [Ireg ir1; Ireg ir2; Constant c]
+  | Paddi64 (ir1,ir2,n) -> instruction "Paddi" [Ireg ir1; Ireg ir2; Constant (Cint n)] (* FIXME, ugly, immediates are int64 but always fit into 16bits *)
   | Paddic  (ir1,ir2,c) -> instruction "Paddic" [Ireg ir1; Ireg ir2; Constant c]
   | Paddis  (ir1,ir2,c) -> instruction "Paddis" [Ireg ir1; Ireg ir2; Constant c]
-  | Paddze (ir1,ir2) -> instruction "Paddze" [Ireg ir1; Ireg ir2]
+  | Paddis64 (ir1,ir2,n) -> instruction "Paddis" [Ireg ir1; Ireg ir2; Constant (Cint n)]
+  | Paddze (ir1,ir2)
+  | Paddze64 (ir1,ir2) -> instruction "Paddze" [Ireg ir1; Ireg ir2]
   | Pallocframe _ -> assert false (* Should not occur *)
-  | Pand_ (ir1,ir2,ir3) -> instruction "Pand_" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pand_ (ir1,ir2,ir3)
+  | Pand_64 (ir1,ir2,ir3) -> instruction "Pand_" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pandc (ir1,ir2,ir3) -> instruction "Pandc" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pandi_ (ir1,ir2,c) -> instruction "Pandi_" [Ireg ir1; Ireg ir2; Constant c]
+  | Pandi_64 (ir1,ir2,n) -> instruction "Pandi_" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Pandis_ (ir1,ir2,c) -> instruction "Pandis_" [Ireg ir1; Ireg ir2; Constant c]
+  | Pandis_64 (ir1,ir2,n) -> instruction "Pandis_" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Pb l -> instruction "Pb" [ALabel l]
   | Pbctr s -> instruction "Pbctr" []
   | Pbctrl s -> instruction "Pbctrl" []
@@ -124,10 +133,15 @@ let p_instruction oc ic =
   | Pbt (cr,l) ->  instruction "Pbt" [Crbit cr; ALabel l]
   | Pbtbl (i,lb) -> instruction "Pbtbl" ((Ireg i)::(List.map (fun a -> ALabel a) lb))
   | Pcmpb (ir1,ir2,ir3) -> instruction "Pcmpb" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pcmpld (ir1,ir2) -> instruction "Pcmpld" [Ireg ir1; Ireg ir2]
+  | Pcmpldi (ir,n) -> instruction "Pcmpldi" [Ireg ir; Constant (Cint n)]
   | Pcmplw (ir1,ir2) -> instruction "Pcmplw" [Ireg ir1; Ireg ir2]
   | Pcmplwi (ir,c) -> instruction "Pcmplwi" [Ireg ir; Constant c]
+  | Pcmpd (ir1,ir2) -> instruction "Pcmpd" [Ireg ir1; Ireg ir2]
+  | Pcmpdi (ir,n) -> instruction "Pcmpdi" [Ireg ir; Constant (Cint n)]
   | Pcmpw (ir1,ir2) -> instruction "Pcmpw" [Ireg ir1; Ireg ir2]
   | Pcmpwi (ir,c) -> instruction "Pcmpwi" [Ireg ir; Constant c]
+  | Pcntlzd (ir1,ir2) -> instruction "Pcntlzd" [Ireg ir1; Ireg ir2]
   | Pcntlzw (ir1,ir2) -> instruction "Pcntlzw" [Ireg ir1; Ireg ir2]
   | Pcreqv (cr1,cr2,cr3) -> instruction "Pcreqv" [Crbit cr1; Crbit cr2; Crbit cr3]
   | Pcror (cr1,cr2,cr3) -> instruction "Pcror" [Crbit cr1; Crbit cr2; Crbit cr3]
@@ -139,23 +153,28 @@ let p_instruction oc ic =
   | Pdcbtls (n,ir1,ir2) ->  instruction "Pdcbtls" [Constant (Cint n); Ireg ir1; Ireg ir2]
   | Pdcbz (ir1,ir2) -> instruction "Pdcbz" [Ireg ir1; Ireg ir2]
   | Pdivw (ir1,ir2,ir3) -> instruction "Pdivw" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pdivd (ir1,ir2,ir3) -> instruction "Pdivd" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pdivwu (ir1,ir2,ir3) -> instruction "Pdivwu" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pdivdu (ir1,ir2,ir3) -> instruction "Pdivdu" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Peieio -> instruction "Peieio" []
   | Peqv (ir1,ir2,ir3) -> instruction "Peqv" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pextsb (ir1,ir2) -> instruction "Pextsb" [Ireg ir1; Ireg ir2]
   | Pextsh (ir1,ir2) -> instruction "Pextsh" [Ireg ir1; Ireg ir2]
   | Pextsw (ir1,ir2) -> instruction "Pextsw" [Ireg ir1; Ireg ir2]
-  | Pfreeframe (c,i) -> assert false (* Should not occur *)
+  | Pextzw (ir1,ir2) -> assert false (* Should not occur *)
+  | Pfreeframe (c,i) -> () (* Should not occur *)
   | Pfabs (fr1,fr2)
   | Pfabss (fr1,fr2) -> instruction "Pfabs" [Freg fr1; Freg fr2]
   | Pfadd (fr1,fr2,fr3) -> instruction "Pfadd" [Freg fr1; Freg fr2; Freg fr3]
   | Pfadds (fr1,fr2,fr3) -> instruction "Pfadds" [Freg fr1; Freg fr2; Freg fr3]
   | Pfcmpu (fr1,fr2) -> instruction "Pfcmpu" [Freg fr1; Freg fr2]
-  | Pfcfi (ir,fr) -> assert false (* Should not occur *)
+  | Pfcfi (ir,fr)
+  | Pfcfl (ir,fr) -> assert false (* Should not occur *)
   | Pfcfid (fr1,fr2) -> instruction "Pfcfid" [Freg fr1; Freg fr2]
   | Pfcfiu _ (* Should not occur *)
   | Pfcti _ (* Should not occur *)
-  | Pfctiu _ -> assert false (* Should not occur *)
+  | Pfctiu _ (* Should not occur *)
+  | Pfctid _ -> assert false (* Should not occur *)
   | Pfctidz (fr1,fr2) -> instruction "Pfctidz" [Freg fr1; Freg fr2]
   | Pfctiw (fr1,fr2) -> instruction "Pfctiw" [Freg fr1; Freg fr2]
   | Pfctiwz (fr1,fr2) -> instruction "Pfctiwz" [Freg fr1; Freg fr2]
@@ -186,6 +205,10 @@ let p_instruction oc ic =
   | Plwsync -> instruction "Plwsync" []
   | Plbz (ir1,c,ir2) -> instruction "Plbz" [Ireg ir1; Constant c; Ireg ir2]
   | Plbzx (ir1,ir2,ir3) -> instruction "Plbzx" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pld (ir1,c,ir2)
+  | Pld_a (ir1,c,ir2) -> instruction "Pld" [Ireg ir1; Constant c; Ireg ir2]
+  | Pldx (ir1,ir2,ir3)
+  | Pldx_a (ir1,ir2,ir3) -> instruction "Pldx" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Plfd (fr,c,ir)
   | Plfd_a (fr,c,ir) -> instruction "Plfd" [Freg fr; Constant c; Ireg ir]
   | Plfdx (fr,ir1,ir2)
@@ -197,6 +220,10 @@ let p_instruction oc ic =
   | Plhbrx (ir1,ir2,ir3) -> instruction "Plhbrx" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Plhz (ir1,c,ir2) -> instruction "Plhz" [Ireg ir1; Constant c; Ireg ir2]
   | Plhzx (ir1,ir2,ir3) -> instruction "Plhzx" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pldi (ir,c) -> instruction "Pldi" [Ireg ir; Long c] (* FIXME Cint is too small, we need Clong *)
+  | Plmake _ (* Should not occur *)
+  | Pllo _ (* Should not occur *)
+  | Plhi _ -> assert false (* Should not occur *)
   | Plfi (fr,fc) -> instruction "Plfi" [Freg fr; Float64 fc]
   | Plfis (fr,fc) -> instruction "Plfis" [Freg fr; Float32 fc]
   | Plwz (ir1,c,ir2)
@@ -215,26 +242,42 @@ let p_instruction oc ic =
   | Pmtlr ir -> instruction "Pmtlr" [Ireg ir]
   | Pmfspr(ir, n) -> instruction "Pmfspr" [Ireg ir; Constant (Cint n)]
   | Pmtspr(n, ir) -> instruction "Pmtspr" [Constant (Cint n); Ireg ir]
+  | Pmulhd (ir1,ir2,ir3) -> instruction "Pmulhd" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pmulhdu (ir1,ir2,ir3) -> instruction "Pmulhdu" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pmulld (ir1,ir2,ir3) -> instruction "Pmulld" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pmulli (ir1,ir2,c) -> instruction "Pmulli" [Ireg ir1; Ireg ir2; Constant c]
   | Pmullw (ir1,ir2,ir3) -> instruction "Pmullw" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pmulhw (ir1,ir2,ir3) -> instruction "Pmulhw" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pmulhwu (ir1,ir2,ir3) -> instruction "Pmulhwu" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pnand (ir1,ir2,ir3) -> instruction "Pnand" [Ireg ir1; Ireg ir2; Ireg ir3]
-  | Pnor (ir1,ir2,ir3) -> instruction "Pnor" [Ireg ir1; Ireg ir2; Ireg ir3]
-  | Por (ir1,ir2,ir3) -> instruction "Por" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pnor (ir1,ir2,ir3)
+  | Pnor64 (ir1,ir2,ir3) -> instruction "Pnor" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Por (ir1,ir2,ir3)
+  | Por64 (ir1,ir2,ir3) -> instruction "Por" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Porc (ir1,ir2,ir3) -> instruction "Porc" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pori (ir1,ir2,c) -> instruction "Pori" [Ireg ir1; Ireg ir2; Constant c]
+  | Pori64 (ir1,ir2,n) -> instruction "Pori" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Poris (ir1,ir2,c) -> instruction "Poris" [Ireg ir1; Ireg ir2; Constant c]
+  | Poris64 (ir1,ir2,n) -> instruction "Poris" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Prldicl (ir1,ir2,ic1,ic2) -> instruction "Prldicl" [Ireg ir1; Ireg ir2; Constant (Cint ic1); Constant (Cint ic2)]
-  | Prldicr (ir1,ir2,ic1,ic2) -> instruction "Prldicr" [Ireg ir1; Ireg ir2; Constant (Cint ic1); Constant (Cint ic2)]
+  | Prldinm (ir1,ir2,ic1,ic2) -> instruction "Prldinm" [Ireg ir1; Ireg ir2; Constant (Cint ic1); Constant (Cint ic2)]
+  | Prldimi  (ir1,ir2,ic1,ic2) ->instruction "Prldimi" [Ireg ir1; Ireg ir2; Constant (Cint ic1); Constant (Cint ic2)]
   | Prlwinm (ir1,ir2,ic1,ic2) -> instruction "Prlwinm" [Ireg ir1; Ireg ir2; Constant (Cint ic1); Constant (Cint ic2)]
   | Prlwimi  (ir1,ir2,ic1,ic2) ->instruction "Prlwimi" [Ireg ir1; Ireg ir2; Constant (Cint ic1); Constant (Cint ic2)]
+  | Psld (ir1,ir2,ir3) -> instruction "Psld" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pslw (ir1,ir2,ir3) -> instruction "Pslw" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Psrad (ir1,ir2,ir3) -> instruction "Psrad" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Psradi  (ir1,ir2,ic) -> instruction "Psradi" [Ireg ir1; Ireg ir2; Constant (Cint ic)]
   | Psraw (ir1,ir2,ir3) -> instruction "Psraw" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Psrawi  (ir1,ir2,ic) -> instruction "Psrawi" [Ireg ir1; Ireg ir2; Constant (Cint ic)]
+  | Psrd (ir1,ir2,ir3) -> instruction "Psrd" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Psrw (ir1,ir2,ir3) -> instruction "Psrw" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pstb (ir1,c,ir2) -> instruction "Pstb" [Ireg ir1; Constant c; Ireg ir2]
   | Pstbx (ir1,ir2,ir3) -> instruction "Pstbx" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pstd (ir1,c,ir2)
+  | Pstd_a (ir1,c,ir2) -> instruction "Pstd" [Ireg ir1; Constant c; Ireg ir2]
+  | Pstdx (ir1,ir2,ir3)
+  | Pstdx_a (ir1,ir2,ir3) -> instruction "Pstdx" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pstdu (ir1,c,ir2) -> instruction "Pstdu" [Ireg ir1; Constant c; Ireg ir2]
   | Pstfd (fr,c,ir)
   | Pstfd_a (fr,c,ir) -> instruction "Pstfd" [Freg fr; Constant c; Ireg ir]
@@ -254,15 +297,20 @@ let p_instruction oc ic =
   | Pstwux (ir1,ir2,ir3) -> instruction "Pstwux" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pstwbrx (ir1,ir2,ir3) -> instruction "Pstwbrx" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Pstwcx_ (ir1,ir2,ir3) -> instruction "Pstwcx_" [Ireg ir1; Ireg ir2; Ireg ir3]
-  | Psubfc (ir1,ir2,ir3) -> instruction "Psubfc" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Psubfc (ir1,ir2,ir3)
+  | Psubfc64 (ir1,ir2,ir3) -> instruction "Psubfc" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Psubfe (ir1,ir2,ir3) -> instruction "Psubfe" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Psubfze (ir1,ir2) -> instruction "Psubfze" [Ireg ir1; Ireg ir2]
   | Psubfic (ir1,ir2,c) -> instruction "Psubfic" [Ireg ir1; Ireg ir2; Constant c]
+  | Psubfic64 (ir1,ir2,n) -> instruction "Psubfic" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Psync -> instruction "Psync" []
   | Ptrap -> instruction "Ptrap" []
-  | Pxor (ir1,ir2,ir3) -> instruction "Pxor" [Ireg ir1; Ireg ir2; Ireg ir3]
-  | Pxori (ir1,ir2,c) ->instruction "Pxori" [Ireg ir1; Ireg ir2; Constant c]
+  | Pxor (ir1,ir2,ir3)
+  | Pxor64 (ir1,ir2,ir3) -> instruction "Pxor" [Ireg ir1; Ireg ir2; Ireg ir3]
+  | Pxori (ir1,ir2,c) -> instruction "Pxori" [Ireg ir1; Ireg ir2; Constant c]
+  | Pxori64 (ir1,ir2,n) -> instruction "Pxori" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Pxoris (ir1,ir2,c) -> instruction "Pxoris" [Ireg ir1; Ireg ir2; Constant c]
+  | Pxoris64 (ir1,ir2,n) -> instruction "Pxoris" [Ireg ir1; Ireg ir2; Constant (Cint n)]
   | Plabel l -> instruction "Plabel" [ALabel l]
   | Pbuiltin (ef,_,_) ->
     begin match ef with
@@ -271,7 +319,7 @@ let p_instruction oc ic =
       | _ -> ()
     end
   | Pcfi_adjust _  (* Only debug relevant *)
-  | Pcfi_rel_offset _ ->  () (* Only debug relevant *) in
+  | Pcfi_rel_offset _ -> () in (* Only debug relevant *)
   List.iter instruction ic
 
 let p_storage oc static =
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
index a27eeeb7..da229d0b 100644
--- a/powerpc/Asmexpand.ml
+++ b/powerpc/Asmexpand.ml
@@ -43,14 +43,19 @@ let _m1 = coqint_of_camlint (-1l)
 let _m4 = coqint_of_camlint (-4l)
 let _m8 = coqint_of_camlint (-8l)
 
+let _0L = Integers.Int64.zero
+let _32L = coqint_of_camlint64 32L
+let _64L = coqint_of_camlint64 64L
+let _m1L = coqint_of_camlint64 (-1L)
+let upper32 = coqint_of_camlint64 0xFFFF_FFFF_0000_0000L
+let lower32 = coqint_of_camlint64 0x0000_0000_FFFF_FFFFL
+
 let emit_loadimm r n =
   List.iter emit (Asmgen.loadimm r n [])
 
 let emit_addimm rd rs n =
   List.iter emit (Asmgen.addimm rd rs n [])
 
-
-
 (* Handling of annotations *)
 
 let expand_annot_val txt targ args res =
@@ -72,6 +77,8 @@ let expand_annot_val txt targ args res =
    So, use 64-bit accesses only if alignment >= 4.
    Note that lfd and stfd cannot trap on ill-formed floats. *)
 
+
+
 let offset_in_range ofs =
   Int.eq (Asmgen.high_s ofs) _0
 
@@ -184,6 +191,8 @@ let rec expand_builtin_vload_common chunk base offset res =
       emit (Plfs(res, offset, base))
   | (Mfloat64 | Many64), BR(FR res) ->
       emit (Plfd(res, offset, base))
+  | (Mint64 | Many64), BR(IR res) ->
+      emit (Pld(res, offset, base))
   | Mint64, BR_splitlong(BR(IR hi), BR(IR lo)) ->
       begin match offset_constant offset _4 with
       | Some offset' ->
@@ -243,6 +252,8 @@ let expand_builtin_vstore_common chunk base offset src =
       emit (Pstfs(src, offset, base))
   | (Mfloat64 | Many64), BA(FR src) ->
       emit (Pstfd(src, offset, base))
+  | (Mint64 | Many64), BA(IR src) ->
+      emit (Pstd(src, offset, base))
   | Mint64, BA_splitlong(BA(IR hi), BA(IR lo)) ->
       begin match offset_constant offset _4 with
       | Some offset' ->
@@ -361,6 +372,8 @@ let expand_builtin_inline name args res =
       emit (Pmulhwu(res, a1, a2))
   | ("__builtin_clz" | "__builtin_clzl"), [BA(IR a1)], BR(IR res) ->
       emit (Pcntlzw(res, a1))
+  | "__builtin_clzll", [BA(IR a1)], BR(IR res) ->
+      emit (Pcntlzd(res, a1))
   | "__builtin_clzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
       let lbl = new_label () in
       emit (Pcntlzw(GPR0, al));
@@ -376,6 +389,11 @@ let expand_builtin_inline name args res =
       emit (Pandc(res, GPR0, a1));        (* res := tmp & ~(x) *)
       emit (Pcntlzw(res, res));           (* res := #leading zeros *)
       emit (Psubfic(res, res, Cint _32))  (* res := 32 - #leading zeros *)
+  | "__builtin_ctzll", [BA(IR a1)], BR(IR res) ->
+      emit (Paddi64(GPR0, a1, _m1L));     (* tmp := x-1 *)
+      emit (Pandc(res, GPR0, a1));        (* res := tmp & ~(x) *)
+      emit (Pcntlzd(res, res));           (* res := #leading zeros *)
+      emit (Psubfic64(res, res, _64L))    (* res := 64 - #leading zeros *)
   | "__builtin_ctzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
       let lbl1 = new_label () in
       let lbl2 = new_label () in
@@ -530,22 +548,17 @@ let expand_builtin_inline name args res =
   | "__builtin_set_spr", _, _ ->
       raise (Error "the first argument of __builtin_set_spr must be a constant")
   (* Special registers in 32bit hybrid mode *)
-  | "__builtin_get_spr64", [BA_int n], BR_splitlong(BR(IR rh), BR(IR rl)) ->
-      if Archi.ppc64 then begin
-        emit (Pmfspr(rl, n));
-        emit (Prldicl(rh, rl, _32, _32));
-        emit (Prldicl(rl, rl, _0, _32))
-      end else
+  | "__builtin_get_spr64", [BA_int n], BR(IR r) ->
+      if Archi.ppc64 then
+        emit (Pmfspr(r, n))
+      else
         raise (Error "__builtin_get_spr64 is only supported for PPC64 targets")
   | "__builtin_get_spr64", _, _ ->
       raise (Error "the argument of __builtin_get_spr64 must be a constant")
-  | "__builtin_set_spr64", [BA_int n; BA_splitlong(BA(IR ah), BA(IR al))], _ ->
-      if Archi.ppc64 then begin
-        emit (Prldicr(GPR10, ah, _32, _31));
-        emit (Prldicl(al, al, _0, _32));
-        emit (Por(GPR10, GPR10, al));
-        emit (Pmtspr(n, GPR10))
-      end else
+  | "__builtin_set_spr64", [BA_int n; BA(IR a)], _ ->
+      if Archi.ppc64 then
+        emit (Pmtspr(n, a))
+      else
         raise (Error "__builtin_set_spr64 is only supported for PPC64 targets")
   | "__builtin_set_spr64", _, _ ->
       raise (Error "the first argument of __builtin_set_spr64 must be a constant")
@@ -692,6 +705,8 @@ let expand_instruction instr =
   | Pbctr sg | Pbctrl sg | Pbl(_, sg) | Pbs(_, sg) ->
       set_cr6 sg;
       emit instr
+  | Pextzw(r1, r2) ->
+      emit (Prldinm(r1, r2, _0L, lower32))
   | Pfreeframe(sz, ofs) ->
       let variadic = is_current_function_variadic () in
       let sz = camlint_of_coqint sz in
@@ -709,6 +724,14 @@ let expand_instruction instr =
       emit (Pfcfid(r1, r1));
       emit (Paddi(GPR1, GPR1, Cint _8));
       emit (Pcfi_adjust _m8)
+  | Pfcfl(r1, r2) ->
+      assert (Archi.ppc64);
+      emit (Pstdu(r2, Cint _m8, GPR1));
+      emit (Pcfi_adjust _8);
+      emit (Plfd(r1, Cint _0, GPR1));
+      emit (Pfcfid(r1, r1));
+      emit (Paddi(GPR1, GPR1, Cint _8));
+      emit (Pcfi_adjust _m8)
   | Pfcfiu(r1, r2) ->
       assert (Archi.ppc64);
       emit (Prldicl(GPR0, r2, _0, _32));
@@ -733,6 +756,14 @@ let expand_instruction instr =
       emit (Plwz(r1, Cint _4, GPR1));
       emit (Paddi(GPR1, GPR1, Cint _8));
       emit (Pcfi_adjust _m8)
+  | Pfctid(r1, r2) ->
+      assert (Archi.ppc64);
+      emit (Pfctidz(FPR13, r2));
+      emit (Pstfdu(FPR13, Cint _m8, GPR1));
+      emit (Pcfi_adjust _8);
+      emit (Pld(r1, Cint _0, GPR1));
+      emit (Paddi(GPR1, GPR1, Cint _8));
+      emit (Pcfi_adjust _m8)
   | Pfmake(rd, r1, r2) ->
       emit (Pstwu(r1, Cint _m8, GPR1));
       emit (Pcfi_adjust _8);
@@ -742,6 +773,20 @@ let expand_instruction instr =
       emit (Pcfi_adjust _m8);
   | Pfxdp(r1, r2) ->
       if r1 <> r2 then emit(Pfmr(r1, r2))
+  | Plmake(r1, rhi, rlo) ->
+      if r1 = rlo then
+        emit (Prldimi(r1, rhi, _32L, upper32))
+      else if r1 = rhi then begin
+        emit (Prldinm(r1, rhi, _32L, upper32));
+        emit (Prldimi(r1, rlo, _0L, lower32))
+      end else begin
+        emit (Pmr(r1, rlo));
+        emit (Prldimi(r1, rhi, _32L, upper32))
+      end
+  | Pllo r1 ->
+      ()   (* no computational content *)
+  | Plhi(r1, r2) ->
+      emit (Prldinm(r1, r2, _32L, lower32))
   | Pmfcrbit(r1, bit) ->
       emit (Pmfcr r1);
       emit (Prlwinm(r1, r1, Z.of_uint (1 + num_crbit bit), _1))
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 1c97c5b0..8c296f0a 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -120,6 +120,56 @@ Definition rolm (r1 r2: ireg) (amount mask: int) (k: code) :=
   else
     Prlwinm r1 r2 amount Int.mone :: andimm_base r1 r1 mask k.
 
+(** Smart constructors for 64-bit integer constants *)
+
+Definition low64_u (n: int64) := Int64.zero_ext 16 n.
+Definition low64_s (n: int64) := Int64.sign_ext 16 n.
+
+Definition loadimm64 (r: ireg) (n: int64) (k: code) :=
+  let lo_u := low64_u n in
+  let lo_s := low64_s n in
+  let hi_s := Int64.sign_ext 16 (Int64.shr n (Int64.repr 16)) in
+  if Int64.eq n lo_s then
+    Paddi64 r GPR0 n :: k
+  else if Int64.eq n (Int64.or (Int64.shl hi_s (Int64.repr 16)) lo_u) then
+    Paddis64 r GPR0 hi_s :: Pori64 r r lo_u :: k
+  else
+    Pldi r n :: k.
+
+Definition opimm64 (insn2: ireg -> ireg -> ireg -> instruction)
+                   (insn1: ireg -> ireg -> int64 -> instruction)
+                   (r1 r2: ireg) (n: int64) (ok: bool) (k: code) :=
+  if ok then
+    insn1 r1 r2 n :: k
+  else if ireg_eq r2 GPR12 then
+    Pmr GPR0 GPR12 :: loadimm64 GPR12 n (insn2 r1 GPR0 GPR12 :: k)
+  else
+    loadimm64 GPR0 n (insn2 r1 r2 GPR0 :: k).
+
+Definition addimm64 (r1 r2: ireg) (n: int64) (k : code) :=
+  opimm64 Padd64 Paddi64 r1 r2 n (Int64.eq n (low64_s n)) k.
+
+Definition orimm64 (r1 r2: ireg) (n: int64) (k : code) :=
+  opimm64 Por64 Pori64 r1 r2 n (Int64.eq n (low64_u n)) k.
+
+Definition xorimm64 (r1 r2: ireg) (n: int64) (k : code) :=
+  opimm64 Pxor64 Pxori64 r1 r2 n (Int64.eq n (low64_u n)) k.
+
+Definition andimm64_base (r1 r2: ireg) (n: int64) (k : code) :=
+  opimm64 Pand_64 Pandi_64 r1 r2 n (Int64.eq n (low64_u n)) k.
+
+Definition andimm64 (r1 r2: ireg) (n: int64) (k : code) :=
+  if is_rldl_mask n || is_rldr_mask n then
+    Prldinm r1 r2 Int.zero n :: k
+  else
+    andimm64_base r1 r2 n k.
+
+Definition rolm64 (r1 r2: ireg) (amount: int) (mask: int64) (k: code) :=
+  if is_rldl_mask mask || is_rldr_mask mask || is_rldl_mask (Int64.shru' mask amount) then
+    Prldinm r1 r2 amount mask :: k
+  else
+    Prldinm r1 r2 amount Int64.mone :: andimm64_base r1 r1 mask k.
+
 (** Accessing slots in the stack frame.  *)
 
 Definition accessind {A: Type}
@@ -136,7 +186,9 @@ Definition loadind (base: ireg) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) :=
   | Tint, IR r => OK(accessind Plwz Plwzx base ofs r k)
   | Tany32, IR r => OK(accessind Plwz_a Plwzx_a base ofs r k)
   | Tsingle, FR r => OK(accessind Plfs Plfsx base ofs r k)
+  | Tlong, IR r => OK(accessind Pld Pldx base ofs r k)
   | Tfloat, FR r => OK(accessind Plfd Plfdx base ofs r k)
+  | Tany64, IR r => OK(accessind Pld_a Pldx_a base ofs r k)
   | Tany64, FR r => OK(accessind Plfd_a Plfdx_a base ofs r k)
   | _, _ => Error (msg "Asmgen.loadind")
   end.
@@ -146,7 +198,9 @@ Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: code) :
   | Tint, IR r => OK(accessind Pstw Pstwx base ofs r k)
   | Tany32, IR r => OK(accessind Pstw_a Pstwx_a base ofs r k)
   | Tsingle, FR r => OK(accessind Pstfs Pstfsx base ofs r k)
+  | Tlong, IR r => OK(accessind Pstd Pstdx base ofs r k)
   | Tfloat, FR r => OK(accessind Pstfd Pstfdx base ofs r k)
+  | Tany64, IR r => OK(accessind Pstd_a Pstdx_a base ofs r k)
   | Tany64, FR r => OK(accessind Pstfd_a Pstfdx_a base ofs r k)
   | _, _ => Error (msg "Asmgen.storeind")
   end.
@@ -199,6 +253,26 @@ Definition transl_cond
       do r1 <- ireg_of a1; OK (andimm_base GPR0 r1 n k)
   | Cmasknotzero n, a1 :: nil =>
       do r1 <- ireg_of a1; OK (andimm_base GPR0 r1 n k)
+  | Ccompl c, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pcmpd r1 r2 :: k)
+  | Ccomplu c, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pcmpld r1 r2 :: k)
+  | Ccomplimm c n, a1 :: nil =>
+      do r1 <- ireg_of a1;
+      if Int64.eq n (low64_s n) then
+        OK (Pcmpdi r1 n :: k)
+      else if ireg_eq r1 GPR12 then
+        OK (Pmr GPR0 GPR12 :: loadimm64 GPR12 n (Pcmpd GPR0 GPR12 :: k))
+      else
+        OK (loadimm64 GPR0 n (Pcmpd r1 GPR0 :: k))
+  | Ccompluimm c n, a1 :: nil =>
+      do r1 <- ireg_of a1;
+      if Int64.eq n (low64_u n) then
+        OK (Pcmpldi r1 n :: k)
+      else if ireg_eq r1 GPR12 then
+        OK (Pmr GPR0 GPR12 :: loadimm64 GPR12 n (Pcmpld GPR0 GPR12 :: k))
+      else
+        OK (loadimm64 GPR0 n (Pcmpld r1 GPR0 :: k))
   | _, _ =>
       Error(msg "Asmgen.transl_cond")
   end.
@@ -238,6 +312,10 @@ Definition crbit_for_cond (cond: condition) :=
   | Cnotcompf cmp => let p := crbit_for_fcmp cmp in (fst p, negb (snd p))
   | Cmaskzero n => (CRbit_2, true)
   | Cmasknotzero n => (CRbit_2, false)
+  | Ccompl cmp => crbit_for_icmp cmp
+  | Ccomplu cmp => crbit_for_icmp cmp
+  | Ccomplimm cmp n => crbit_for_icmp cmp
+  | Ccompluimm cmp n => crbit_for_icmp cmp
   end.
 
 (** Recognition of comparisons [>= 0] and [< 0]. *)
@@ -509,8 +587,96 @@ Definition transl_op
   | Ofloatofwords, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- freg_of res;
       OK (Pfmake r r1 r2 :: k)
+  | Omakelong, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res; OK (Plmake r r1 r2 :: k)
+  | Olowlong, a1 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- ireg_of res; OK (Pllo r :: k)
+  | Ohighlong, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res; OK (Plhi r r1 :: k)
   | Ocmp cmp, _ =>
       transl_cond_op cmp args res k
+(*c PPC64 operations *)
+  | Olongconst n, nil =>
+      do r <- ireg_of res; OK (loadimm64 r n k)
+  | Ocast32signed, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (Pextsw r r1 :: k)
+  | Ocast32unsigned, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (Pextzw r r1 :: k)
+  | Oaddl, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Padd64 r r1 r2 :: k)
+  | Oaddlimm n, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (addimm64 r r1 n k)
+  | Osubl, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Psubfc64 r r2 r1 :: k)
+  | Onegl, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (Psubfic64 r r1 Int64.zero :: k)
+  | Omull, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Pmulld r r1 r2 :: k)
+  | Omullhs, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+        OK (Pmulhd r r1 r2 :: k)
+  | Omullhu, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+        OK (Pmulhdu r r1 r2 :: k)
+  | Odivl, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Pdivd r r1 r2 :: k)
+  | Odivlu, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Pdivdu r r1 r2 :: k)
+  | Oandl,  a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Pand_64 r r1 r2 :: k)
+  | Oandlimm n, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (andimm64 r r1 n k)
+  | Oorl,  a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Por64 r r1 r2 :: k)
+  | Oorlimm n, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (orimm64 r r1 n k)
+  | Oxorl,  a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Pxor64 r r1 r2 :: k)
+  | Oxorlimm n, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (xorimm64 r r1 n k)
+  | Onotl, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (Pnor64 r r1 r1 :: k)
+  | Oshll, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Psld r r1 r2 :: k)
+  | Oshrl, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Psrad r r1 r2 :: k)
+  | Oshrlimm n, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (Psradi r r1 n :: k)
+  | Oshrlu, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+      OK (Psrd r r1 r2 :: k)
+  | Orolml amount mask, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (rolm64 r r1 amount mask k)
+  | Oshrxlimm n, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- ireg_of res;
+      OK (Psradi r r1 n :: Paddze64 r r :: k)
+  | Olongoffloat, a1 :: nil =>
+      do r1 <- freg_of a1; do r <- ireg_of res;
+      OK (Pfctid r r1 :: k)
+  | Ofloatoflong, a1 :: nil =>
+      do r1 <- ireg_of a1; do r <- freg_of res;
+      OK (Pfcfl r r1 :: k)
   | _, _ =>
       Error(msg "Asmgen.transl_op")
   end.
@@ -588,6 +754,9 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
   | Mint32 =>
       do r <- ireg_of dst;
       transl_memory_access (Plwz r) (Plwzx r) addr args GPR12 k
+  | Mint64 =>
+      do r <- ireg_of dst;
+      transl_memory_access (Pld r) (Pldx r) addr args GPR12 k
   | Mfloat32 =>
       do r <- freg_of dst;
       transl_memory_access (Plfs r) (Plfsx r) addr args GPR12 k
@@ -611,6 +780,9 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
   | Mint32  =>
       do r <- ireg_of src;
       transl_memory_access (Pstw r) (Pstwx r) addr args temp k
+  | Mint64  =>
+      do r <- ireg_of src;
+      transl_memory_access (Pstd r) (Pstdx r) addr args temp k
   | Mfloat32 =>
       do r <- freg_of src;
       transl_memory_access (Pstfs r) (Pstfsx r) addr args temp k
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 0610b242..bf75d2e0 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -179,6 +179,14 @@ Proof.
 Qed.
 Hint Resolve rolm_label: labels.
 
+Remark loadimm64_label:
+  forall r n k, tail_nolabel k (loadimm64 r n k).
+Proof.
+  unfold loadimm64; intros.
+  destruct Int64.eq. TailNoLabel. destruct Int64.eq; TailNoLabel.
+Qed.
+Hint Resolve loadimm64_label: labels.
+
 Remark loadind_label:
   forall base ofs ty dst k c,
   loadind base ofs ty dst k = OK c -> tail_nolabel k c.
@@ -232,12 +240,27 @@ Remark transl_op_label:
 Proof.
 Opaque Int.eq.
   unfold transl_op; intros; destruct op; TailNoLabel.
-  destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
-  destruct (symbol_is_small_data i i0). TailNoLabel. destruct (symbol_is_rel_data i i0); TailNoLabel.
-  destruct (symbol_is_small_data i i0). TailNoLabel. destruct (symbol_is_rel_data i i0); TailNoLabel.
-  destruct (Int.eq (high_s i) Int.zero); TailNoLabel; eapply tail_nolabel_trans; TailNoLabel.
-  destruct (Int.eq (high_s i) Int.zero); TailNoLabel; eapply tail_nolabel_trans; TailNoLabel.
-  eapply transl_cond_op_label; eauto.
+- destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
+- destruct (symbol_is_small_data i i0). TailNoLabel. destruct (symbol_is_rel_data i i0); TailNoLabel.
+- destruct (symbol_is_small_data i i0). TailNoLabel. destruct (symbol_is_rel_data i i0); TailNoLabel.
+- destruct (Int.eq (high_s i) Int.zero); TailNoLabel; eapply tail_nolabel_trans; TailNoLabel.
+- destruct (Int.eq (high_s i) Int.zero); TailNoLabel; eapply tail_nolabel_trans; TailNoLabel.
+- unfold addimm64, opimm64. destruct Int64.eq. TailNoLabel.
+  destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel.
+- unfold andimm64, andimm64_base, opimm64.
+  destruct (is_rldl_mask i || is_rldr_mask i). TailNoLabel.
+  destruct Int64.eq. TailNoLabel.
+  destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel.
+- unfold orimm64, opimm64. destruct Int64.eq. TailNoLabel.
+  destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel.
+- unfold xorimm64, opimm64. destruct Int64.eq. TailNoLabel.
+  destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel.
+- unfold rolm64, andimm64_base, opimm64.
+  destruct (is_rldl_mask i0 || is_rldr_mask i0 || is_rldl_mask (Int64.shru' i0 i)). TailNoLabel.
+  apply tail_nolabel_cons; unfold nolabel; auto.
+  destruct Int64.eq. TailNoLabel.
+  destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel.
+- eapply transl_cond_op_label; eauto.
 Qed.
 
 Remark transl_memory_access_label:
@@ -760,12 +783,12 @@ Opaque loadind.
   destruct (snd (crbit_for_cond cond)).
   (* Pbt, taken *)
   econstructor; econstructor; econstructor; split. eexact A.
-  split. eapply agree_exten; eauto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
   split. simpl. rewrite B. reflexivity.
   auto with asmgen.
   (* Pbf, taken *)
   econstructor; econstructor; econstructor; split. eexact A.
-  split. eapply agree_exten; eauto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
   split. simpl. rewrite B. reflexivity.
   auto with asmgen.
 
@@ -779,7 +802,7 @@ Opaque loadind.
   destruct (snd (crbit_for_cond cond)).
   apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
   apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
-  split. eapply agree_exten; eauto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
   intros; Simpl.
   split. simpl. congruence.
   Simpl. 
@@ -865,7 +888,7 @@ Local Transparent destroyed_by_jumptable.
   apply exec_straight_two with rs4 m3'.
   simpl. unfold store1. rewrite gpr_or_zero_not_zero.
   change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR).
-  simpl const_low. rewrite ATLR. erewrite storev_offset_ptr by eexact P. auto. congruence.   
+  simpl const_low. rewrite ATLR. erewrite storev_offset_ptr by eexact P. auto. congruence.
   auto. auto. auto.
   left; exists (State rs5 m3'); split.
   eapply exec_straight_steps_1; eauto. omega. constructor.
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index 9fee580c..e5736277 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -167,6 +167,18 @@ Proof.
 Qed.
 Hint Resolve gpr_or_zero_not_zero gpr_or_zero_zero: asmgen.
 
+Lemma gpr_or_zero_l_not_zero:
+  forall rs r, r <> GPR0 -> gpr_or_zero_l rs r = rs#r.
+Proof.
+  intros. unfold gpr_or_zero_l. case (ireg_eq r GPR0); tauto.
+Qed.
+Lemma gpr_or_zero_l_zero:
+  forall rs, gpr_or_zero_l rs GPR0 = Vlong Int64.zero.
+Proof.
+  intros. reflexivity.
+Qed.
+Hint Resolve gpr_or_zero_l_not_zero gpr_or_zero_l_zero: asmgen.
+
 Lemma ireg_of_not_GPR0:
   forall m r, ireg_of m = OK r -> IR r <> IR GPR0.
 Proof.
@@ -280,6 +292,30 @@ Proof.
   intros. unfold compare_uint. Simpl.
 Qed.
 
+Lemma compare_slong_spec:
+  forall rs v1 v2,
+  let rs1 := nextinstr (compare_slong rs v1 v2) in
+     rs1#CR0_0 = Val.of_optbool (Val.cmpl_bool Clt v1 v2)
+  /\ rs1#CR0_1 = Val.of_optbool (Val.cmpl_bool Cgt v1 v2)
+  /\ rs1#CR0_2 = Val.of_optbool (Val.cmpl_bool Ceq v1 v2)
+  /\ forall r', r' <> CR0_0 -> r' <> CR0_1 -> r' <> CR0_2 -> r' <> CR0_3 -> r' <> PC -> rs1#r' = rs#r'.
+Proof.
+  intros. unfold rs1. split. reflexivity. split. reflexivity. split. reflexivity.
+  intros. unfold compare_slong. Simpl.
+Qed.
+
+Lemma compare_ulong_spec:
+  forall rs m v1 v2,
+  let rs1 := nextinstr (compare_ulong rs m v1 v2) in
+     rs1#CR0_0 = Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Clt v1 v2)
+  /\ rs1#CR0_1 = Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Cgt v1 v2)
+  /\ rs1#CR0_2 = Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Ceq v1 v2)
+  /\ forall r', r' <> CR0_0 -> r' <> CR0_1 -> r' <> CR0_2 -> r' <> CR0_3 -> r' <> PC -> rs1#r' = rs#r'.
+Proof.
+  intros. unfold rs1. split. reflexivity. split. reflexivity. split. reflexivity.
+  intros. unfold compare_ulong. Simpl.
+Qed.
+
 (** Loading a constant. *)
 
 Lemma loadimm_correct:
@@ -493,6 +529,183 @@ Proof.
   intros. rewrite D; auto. unfold rs1; Simpl.
 Qed.
 
+(** Load int64 constant. *)
+
+Lemma loadimm64_correct:
+  forall r n k rs m,
+  exists rs',
+     exec_straight ge fn (loadimm64 r n k) rs m  k rs' m
+  /\ rs'#r = Vlong n
+  /\ forall r': preg, r' <> r -> r' <> GPR12 -> r' <> PC -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold loadimm64.
+  set (hi_s := Int64.sign_ext 16 (Int64.shr n (Int64.repr 16))).
+  set (hi' := Int64.shl hi_s (Int64.repr 16)).
+  destruct (Int64.eq n (low64_s n)).
+  (* addi *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  rewrite Int64.add_zero_l. intuition Simpl.
+  (* addis + ori *)
+  predSpec Int64.eq Int64.eq_spec n (Int64.or hi' (low64_u n)).
+  econstructor; split. eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+  split. Simpl. rewrite Int64.add_zero_l. simpl; f_equal; auto.
+  intros. Simpl.
+  (* ldi *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  intuition Simpl.
+Qed.
+
+(** Add int64 immediate. *)
+
+Lemma addimm64_correct:
+  forall r1 r2 n k rs m,
+  r2 <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (addimm64 r1 r2 n k) rs m  k rs' m
+  /\ rs'#r1 = Val.addl rs#r2 (Vlong n)
+  /\ forall r': preg, r' <> r1 -> r' <> GPR0 -> r' <> GPR12 -> r' <> PC -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold addimm64, opimm64. destruct (Int64.eq n (low64_s n)); [|destruct (ireg_eq r2 GPR12)].
+- (* addi *)
+  econstructor; split. apply exec_straight_one.
+  simpl. rewrite gpr_or_zero_l_not_zero; eauto.
+  reflexivity.
+  intuition Simpl.
+- (* move-loadimm-add *)
+  subst r2.
+  edestruct (loadimm64_correct GPR12 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_step. simpl; reflexivity. auto.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. Simpl.
+  intros. Simpl. rewrite C by auto. Simpl.
+- (* loadimm-add *)
+  edestruct (loadimm64_correct GPR0 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. Simpl.
+  intros. Simpl.
+Qed.
+
+(** Or int64 immediate. *)
+
+Lemma orimm64_correct:
+  forall r1 r2 n k rs m,
+  r2 <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (orimm64 r1 r2 n k) rs m  k rs' m
+  /\ rs'#r1 = Val.orl rs#r2 (Vlong n)
+  /\ forall r': preg, r' <> r1 -> r' <> GPR0 -> r' <> GPR12 -> r' <> PC -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold orimm64, opimm64. destruct (Int64.eq n (low64_u n)); [|destruct (ireg_eq r2 GPR12)].
+- (* ori *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. reflexivity.
+  intuition Simpl.
+- (* move-loadimm-or *)
+  subst r2.
+  edestruct (loadimm64_correct GPR12 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_step. simpl; reflexivity. auto.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. Simpl.
+  intros. Simpl. rewrite C by auto. Simpl.
+- (* loadimm-or *)
+  edestruct (loadimm64_correct GPR0 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. Simpl.
+  intros. Simpl.
+Qed.
+
+(** Xor int64 immediate. *)
+
+Lemma xorimm64_correct:
+  forall r1 r2 n k rs m,
+  r2 <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (xorimm64 r1 r2 n k) rs m  k rs' m
+  /\ rs'#r1 = Val.xorl rs#r2 (Vlong n)
+  /\ forall r': preg, r' <> r1 -> r' <> GPR0 -> r' <> GPR12 -> r' <> PC -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold xorimm64, opimm64. destruct (Int64.eq n (low64_u n)); [|destruct (ireg_eq r2 GPR12)].
+- (* xori *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. reflexivity.
+  intuition Simpl.
+- (* move-loadimm-xor *)
+  subst r2.
+  edestruct (loadimm64_correct GPR12 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_step. simpl; reflexivity. auto.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. Simpl.
+  intros. Simpl. rewrite C by auto. Simpl.
+- (* loadimm-xor *)
+  edestruct (loadimm64_correct GPR0 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. Simpl.
+  intros. Simpl.
+Qed.
+
+(** And int64 immediate. *)
+
+Lemma andimm64_base_correct:
+  forall r1 r2 n k rs m,
+  r2 <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (andimm64_base r1 r2 n k) rs m  k rs' m
+  /\ rs'#r1 = Val.andl rs#r2 (Vlong n)
+  /\ forall r': preg, r' <> r1 -> r' <> GPR12 -> important_preg r' = true  -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold andimm64_base, opimm64. destruct (Int64.eq n (low64_u n)); [|destruct (ireg_eq r2 GPR12)].
+- (* andi *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. reflexivity.
+  unfold compare_slong; intuition Simpl.
+- (* move-loadimm-and *)
+  subst r2.
+  edestruct (loadimm64_correct GPR12 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_step. simpl; reflexivity. auto.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. unfold compare_slong; Simpl.
+  intros. unfold compare_slong; Simpl. rewrite C by auto with asmgen. Simpl.
+- (* loadimm-xor *)
+  edestruct (loadimm64_correct GPR0 n) as (rs2 & A & B & C).
+  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; reflexivity. auto.
+  split. rewrite B, C by eauto with asmgen. unfold compare_slong; Simpl.
+  intros. unfold compare_slong; Simpl.
+Qed.
+
+Lemma andimm64_correct:
+  forall r1 r2 n k rs m,
+  r2 <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (andimm64 r1 r2 n k) rs m  k rs' m
+  /\ rs'#r1 = Val.andl rs#r2 (Vlong n)
+  /\ forall r': preg, r' <> r1 -> r' <> GPR12 -> important_preg r' = true  -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold andimm64. destruct (is_rldl_mask n || is_rldr_mask n).
+- econstructor; split. eapply exec_straight_one. simpl; reflexivity. reflexivity.
+  split. Simpl. destruct (rs r2); simpl; auto. rewrite Int64.rolm_zero. auto.
+  intros; Simpl.
+- apply andimm64_base_correct; auto.
+Qed.
+
+(** Rotate-and-mask for int64 *)
+
+Lemma rolm64_correct:
+  forall r1 r2 amount mask k rs m,
+  r1 <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (rolm64 r1 r2 amount mask k) rs m  k rs' m
+  /\ rs'#r1 = Val.rolml rs#r2 amount mask
+  /\ forall r': preg, r' <> r1 -> r' <> GPR12 -> important_preg r' = true  -> rs'#r' = rs#r'.
+Proof.
+  intros. unfold rolm64.
+  destruct (is_rldl_mask mask || is_rldr_mask mask || is_rldl_mask (Int64.shru' mask amount)).
+- econstructor; split. eapply exec_straight_one. simpl; reflexivity. reflexivity.
+  intuition Simpl.
+- edestruct (andimm64_base_correct r1 r1 mask k) as (rs2 & A & B & C); auto.
+  econstructor; split.
+  eapply exec_straight_step. simpl; reflexivity. reflexivity. eexact A.
+  split. rewrite B. Simpl. destruct (rs r2); simpl; auto. unfold Int64.rolm.
+  rewrite Int64.and_assoc, Int64.and_mone_l; auto.
+  intros; Simpl. rewrite C by auto. Simpl.
+Qed.
+
 (** Indexed memory loads. *)
 
 Lemma accessind_load_correct:
@@ -541,8 +754,10 @@ Proof.
   unfold loadind; intros. destruct ty; try discriminate; destruct (preg_of dst); inv H; simpl in H0.
   apply accessind_load_correct with (inj := IR) (chunk := Mint32); auto with asmgen.
   apply accessind_load_correct with (inj := FR) (chunk := Mfloat64); auto with asmgen.
+  apply accessind_load_correct with (inj := IR) (chunk := Mint64); auto with asmgen.
   apply accessind_load_correct with (inj := FR) (chunk := Mfloat32); auto with asmgen.
   apply accessind_load_correct with (inj := IR) (chunk := Many32); auto with asmgen.
+  apply accessind_load_correct with (inj := IR) (chunk := Many64); auto with asmgen.
   apply accessind_load_correct with (inj := FR) (chunk := Many64); auto with asmgen.
 Qed.
 
@@ -593,8 +808,10 @@ Proof.
   destruct ty; try discriminate; destruct (preg_of src) ; inv H; simpl in H0.
   apply accessind_store_correct with (inj := IR) (chunk := Mint32); auto with asmgen.
   apply accessind_store_correct with (inj := FR) (chunk := Mfloat64); auto with asmgen.
+  apply accessind_store_correct with (inj := IR) (chunk := Mint64); auto with asmgen.
   apply accessind_store_correct with (inj := FR) (chunk := Mfloat32); auto with asmgen.
   apply accessind_store_correct with (inj := IR) (chunk := Many32); auto with asmgen.
+  apply accessind_store_correct with (inj := IR) (chunk := Many64); auto with asmgen.
   apply accessind_store_correct with (inj := FR) (chunk := Many64); auto with asmgen.
 Qed.
 
@@ -669,7 +886,7 @@ Lemma transl_cond_correct_1:
        (if snd (crbit_for_cond cond)
         then Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)
         else Val.notbool (Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)))
-  /\ forall r, important_preg r = true -> rs'#r = rs#r.
+  /\ forall r, important_preg r = true  -> preg_notin r (destroyed_by_cond cond) ->  rs'#r = rs#r.
 Proof.
   intros.
 Opaque Int.eq.
@@ -755,6 +972,64 @@ Opaque Int.eq.
   fold (option_map negb (Some (Int.eq (Int.and i0 i) Int.zero))).
   rewrite Val.notbool_negb_3. rewrite Val.notbool_idem4. auto.
   auto with asmgen.
+- (* Ccompl *)
+  destruct (compare_slong_spec rs (rs x) (rs x0)) as [A [B [C D]]].
+  econstructor; split.
+  apply exec_straight_one. simpl; reflexivity. reflexivity.
+  rewrite <- Val.notbool_negb_3. rewrite <- Val.negate_cmpl_bool.
+  split. case c0; simpl; auto.
+  auto with asmgen.
+- (* Ccomplu *)
+  destruct (compare_ulong_spec rs m (rs x) (rs x0)) as [A [B [C D]]].
+  econstructor; split.
+  apply exec_straight_one. simpl; reflexivity. reflexivity.
+  rewrite <- Val.notbool_negb_3. rewrite <- Val.negate_cmplu_bool.
+  split. case c0; simpl; auto.
+  auto with asmgen.
+- (* Ccomplimm *)
+  rewrite <- Val.notbool_negb_3. rewrite <- Val.negate_cmpl_bool.
+  destruct (Int64.eq i (low64_s i)); [|destruct (ireg_eq x GPR12)]; inv EQ0.
++ destruct (compare_slong_spec rs (rs x) (Vlong i)) as [A [B [C D]]].
+  econstructor; split.
+  apply exec_straight_one. simpl; reflexivity. reflexivity.
+  split. case c0; simpl; auto. auto with asmgen.
++ destruct (loadimm64_correct GPR12 i (Pcmpd GPR0 GPR12 :: k) (nextinstr (rs#GPR0 <- (rs#GPR12))) m) as [rs1 [EX1 [RES1 OTH1]]].
+  destruct (compare_slong_spec rs1 (rs GPR12) (Vlong i)) as [A [B [C D]]].
+  assert (SAME: rs1 GPR0 = rs GPR12) by (apply OTH1; eauto with asmgen).
+  econstructor; split.
+  eapply exec_straight_step. simpl;reflexivity. reflexivity.
+  eapply exec_straight_trans. eexact EX1. eapply exec_straight_one. simpl;reflexivity. reflexivity.
+  split. rewrite RES1, SAME. destruct c0; simpl; auto.
+  simpl; intros. rewrite RES1, SAME. rewrite D by eauto with asmgen. rewrite OTH1 by eauto with asmgen. Simpl.
++ destruct (loadimm64_correct GPR0 i (Pcmpd x GPR0 :: k) rs m) as [rs1 [EX1 [RES1 OTH1]]].
+  destruct (compare_slong_spec rs1 (rs x) (Vlong i)) as [A [B [C D]]].
+  assert (SAME: rs1 x = rs x) by (apply OTH1; eauto with asmgen).
+  econstructor; split.
+  eapply exec_straight_trans. eexact EX1. eapply exec_straight_one. simpl;reflexivity. reflexivity.
+  split. rewrite RES1, SAME. destruct c0; simpl; auto.
+  simpl; intros. rewrite RES1, SAME. rewrite D; eauto with asmgen.
+- (* Ccompluimm *)
+  rewrite <- Val.notbool_negb_3. rewrite <- Val.negate_cmplu_bool.
+  destruct (Int64.eq i (low64_u i)); [|destruct (ireg_eq x GPR12)]; inv EQ0.
++ destruct (compare_ulong_spec rs m (rs x) (Vlong i)) as [A [B [C D]]].
+  econstructor; split.
+  apply exec_straight_one. simpl; reflexivity. reflexivity.
+  split. case c0; simpl; auto. auto with asmgen.
++ destruct (loadimm64_correct GPR12 i (Pcmpld GPR0 GPR12 :: k) (nextinstr (rs#GPR0 <- (rs#GPR12))) m) as [rs1 [EX1 [RES1 OTH1]]].
+  destruct (compare_ulong_spec rs1 m (rs GPR12) (Vlong i)) as [A [B [C D]]].
+  assert (SAME: rs1 GPR0 = rs GPR12) by (apply OTH1; eauto with asmgen).
+  econstructor; split.
+  eapply exec_straight_step. simpl;reflexivity. reflexivity.
+  eapply exec_straight_trans. eexact EX1. eapply exec_straight_one. simpl;reflexivity. reflexivity.
+  split. rewrite RES1, SAME. destruct c0; simpl; auto.
+  simpl; intros. rewrite RES1, SAME. rewrite D by eauto with asmgen. rewrite OTH1 by eauto with asmgen. Simpl.
++ destruct (loadimm64_correct GPR0 i (Pcmpld x GPR0 :: k) rs m) as [rs1 [EX1 [RES1 OTH1]]].
+  destruct (compare_ulong_spec rs1 m (rs x) (Vlong i)) as [A [B [C D]]].
+  assert (SAME: rs1 x = rs x) by (apply OTH1; eauto with asmgen).
+  econstructor; split.
+  eapply exec_straight_trans. eexact EX1. eapply exec_straight_one. simpl;reflexivity. reflexivity.
+  split. rewrite RES1, SAME. destruct c0; simpl; auto.
+  simpl; intros. rewrite RES1, SAME. rewrite D; eauto with asmgen.
 Qed.
 
 Lemma transl_cond_correct_2:
@@ -767,7 +1042,7 @@ Lemma transl_cond_correct_2:
        (if snd (crbit_for_cond cond)
         then Val.of_bool b
         else Val.notbool (Val.of_bool b))
-  /\ forall r, important_preg r = true -> rs'#r = rs#r.
+  /\ forall r, important_preg r = true -> preg_notin r (destroyed_by_cond cond)  -> rs'#r = rs#r.
 Proof.
   intros.
   replace (Val.of_bool b)
@@ -788,13 +1063,14 @@ Lemma transl_cond_correct_3:
        (if snd (crbit_for_cond cond)
         then Val.of_bool b
         else Val.notbool (Val.of_bool b))
-  /\ agree ms sp rs'.
+  /\ agree (Mach.undef_regs (destroyed_by_cond cond) ms) sp rs'.
 Proof.
   intros.
   exploit transl_cond_correct_2. eauto.
     eapply eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto.
   intros [rs' [A [B C]]].
-  exists rs'; split. eauto. split. auto. apply agree_exten with rs; auto with asmgen.
+  exists rs'; split. eauto. split. auto. apply agree_undef_regs with rs; auto. intros r D.
+  apply C. apply important_data_preg_1; auto.
 Qed.
 
 (** Translation of condition operators *)
@@ -851,7 +1127,7 @@ Lemma transl_cond_op_correct:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of r) = Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)
-  /\ forall r', important_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r'.
+  /\ forall r', important_preg r' = true -> preg_notin r' (destroyed_by_cond cond) ->  r' <> preg_of r -> rs'#r' = rs#r'.
 Proof.
   intros until args. unfold transl_cond_op.
   destruct (classify_condition cond args); intros; monadInv H; simpl;
@@ -921,49 +1197,49 @@ Proof.
   assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. }
 Opaque Int.eq.
   intros. unfold transl_op in H; destruct op; ArgsInv; simpl in H0; try (inv H0); try TranslOpSimpl.
-  (* Omove *)
+- (* Omove *)
   destruct (preg_of res) eqn:RES; destruct (preg_of m0) eqn:ARG; inv H.
   TranslOpSimpl.
   TranslOpSimpl.
-  (* Ointconst *)
+- (* Ointconst *)
   destruct (loadimm_correct x i k rs m) as [rs' [A [B C]]].
   exists rs'. rewrite B. auto with asmgen.
-  (* Oaddrsymbol *)
+- (* Oaddrsymbol *)
   set (v' := Genv.symbol_address ge i i0).
   destruct (symbol_is_small_data i i0) eqn:SD; [ | destruct (symbol_is_rel_data i i0) ].
-  (* small data *)
++ (* small data *)
 Opaque Val.add.
   econstructor; split. apply exec_straight_one; simpl; reflexivity.
   split. apply SAME. Simpl. rewrite small_data_area_addressing by auto. apply add_zero_symbol_address.
   intros; Simpl.
-  (* relative data *)
++ (* relative data *)
   econstructor; split. eapply exec_straight_two; simpl; reflexivity.
   split. apply SAME. Simpl. rewrite gpr_or_zero_not_zero by eauto with asmgen. Simpl.
   apply low_high_half_zero.
   intros; Simpl.
-  (* absolute data *)
++ (* absolute data *)
   econstructor; split. eapply exec_straight_two; simpl; reflexivity.
   split. apply SAME. Simpl. rewrite gpr_or_zero_not_zero; eauto with asmgen. Simpl.
   apply low_high_half_zero.
   intros; Simpl.
-  (* Oaddrstack *)
+- (* Oaddrstack *)
   destruct (addimm_correct x GPR1 (Ptrofs.to_int i) k rs m) as [rs' [EX [RES OTH]]]; eauto with asmgen.
   exists rs'; split. auto. split; auto with asmgen.
-  rewrite RES. destruct (rs GPR1); simpl; auto. 
+  rewrite RES. destruct (rs GPR1); simpl; auto.
 Transparent Val.add.
   simpl. rewrite Ptrofs.of_int_to_int; auto.
 Opaque Val.add.
-  (* Oaddimm *)
+- (* Oaddimm *)
   destruct (addimm_correct x0 x i k rs m) as [rs' [A [B C]]]; eauto with asmgen.
   exists rs'; auto with asmgen.
-  (* Oaddsymbol *)
+- (* Oaddsymbol *)
   destruct (symbol_is_small_data i i0) eqn:SD; [ | destruct (symbol_is_rel_data i i0) ].
-  (* small data *)
++ (* small data *)
   econstructor; split. eapply exec_straight_two; simpl; reflexivity.
   split. apply SAME. Simpl. rewrite (Val.add_commut (rs x)). f_equal.
   rewrite small_data_area_addressing by auto. apply add_zero_symbol_address.
   intros; Simpl.
-  (* relative data *)
++ (* relative data *)
   econstructor; split. eapply exec_straight_trans.
   eapply exec_straight_two; simpl; reflexivity.
   eapply exec_straight_two; simpl; reflexivity.
@@ -971,12 +1247,12 @@ Opaque Val.add.
   Simpl. rewrite ! gpr_or_zero_zero. rewrite ! gpr_or_zero_not_zero by eauto with asmgen. Simpl.
   rewrite low_high_half_zero. auto.
   intros; Simpl.
-  (* absolute data *)
++ (* absolute data *)
   econstructor; split. eapply exec_straight_two; simpl; reflexivity.
   split. Simpl. rewrite ! gpr_or_zero_not_zero by (eauto with asmgen). Simpl.
   rewrite Val.add_assoc. rewrite (Val.add_commut (rs x)). rewrite low_high_half. auto.
   intros; Simpl.
-  (* Osubimm *)
+- (* Osubimm *)
   case (Int.eq (high_s i) Int.zero).
   TranslOpSimpl.
   destruct (loadimm_correct GPR0 i (Psubfc x0 x GPR0 :: k) rs m) as [rs1 [EX [RES OTH]]].
@@ -984,7 +1260,7 @@ Opaque Val.add.
   eapply exec_straight_trans. eexact EX. apply exec_straight_one; simpl; reflexivity.
   split. Simpl. rewrite RES. rewrite OTH; eauto with asmgen.
   intros; Simpl.
-  (* Omulimm *)
+- (* Omulimm *)
   case (Int.eq (high_s i) Int.zero).
   TranslOpSimpl.
   destruct (loadimm_correct GPR0 i (Pmullw x0 x GPR0 :: k) rs m) as [rs1 [EX [RES OTH]]].
@@ -992,61 +1268,111 @@ Opaque Val.add.
   eapply exec_straight_trans. eexact EX. apply exec_straight_one; simpl; reflexivity.
   split. Simpl. rewrite RES. rewrite OTH; eauto with asmgen.
   intros; Simpl.
-  (* Odivs *)
+- (* Odivs *)
   replace v with (Val.maketotal (Val.divs (rs x) (rs x0))).
   TranslOpSimpl.
   rewrite H1; auto.
-  (* Odivu *)
+- (* Odivu *)
   replace v with (Val.maketotal (Val.divu (rs x) (rs x0))).
   TranslOpSimpl.
   rewrite H1; auto.
-  (* Oand *)
+- (* Oand *)
   set (v' := Val.and (rs x) (rs x0)) in *.
   pose (rs1 := rs#x1 <- v').
   destruct (compare_sint_spec rs1 v' Vzero) as [A [B [C D]]].
   econstructor; split. apply exec_straight_one; simpl; reflexivity.
   split. rewrite D; auto with asmgen. unfold rs1; Simpl.
   intros. rewrite D; auto with asmgen. unfold rs1; Simpl.
-  (* Oandimm *)
+- (* Oandimm *)
   destruct (andimm_correct x0 x i k rs m) as [rs' [A [B C]]]. eauto with asmgen.
   exists rs'; auto with asmgen.
-  (* Oorimm *)
+- (* Oorimm *)
   destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]].
   exists rs'; auto with asmgen.
-  (* Oxorimm *)
+- (* Oxorimm *)
   destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]].
   exists rs'; auto with asmgen.
-  (* Onor *)
+- (* Onor *)
   replace (Val.notint (rs x))
      with (Val.notint (Val.or (rs x) (rs x))).
   TranslOpSimpl.
   destruct (rs x); simpl; auto. rewrite Int.or_idem. auto.
-  (* Oshrximm *)
+- (* Oshrximm *)
   econstructor; split.
   eapply exec_straight_two; simpl; reflexivity.
   split. Simpl. apply SAME. apply Val.shrx_carry. auto.
   intros; Simpl.
-  (* Orolm *)
+- (* Orolm *)
   destruct (rolm_correct x0 x i i0 k rs m) as [rs' [A [B C]]]. eauto with asmgen.
   exists rs'; auto.
+- (* Olongconst *)
+  destruct (loadimm64_correct x i k rs m) as [rs' [A [B C]]].
+  exists rs'; auto with asmgen.
+- (* Oaddlimm *)
+  destruct (addimm64_correct x0 x i k rs m) as [rs' [A [B C]]]. eauto with asmgen.
+  exists rs'; auto with asmgen.
+- (* Odivl *)
+  replace v with (Val.maketotal (Val.divls (rs x) (rs x0))).
+  TranslOpSimpl.
+  rewrite H1; auto.
+- (* Odivlu *)
+  replace v with (Val.maketotal (Val.divlu (rs x) (rs x0))).
+  TranslOpSimpl.
+  rewrite H1; auto.
+- (* Oandl *)
+  set (v' := Val.andl (rs x) (rs x0)) in *.
+  pose (rs1 := rs#x1 <- v').
+  destruct (compare_slong_spec rs1 v' (Vlong Int64.zero)) as [A [B [C D]]].
+  econstructor; split. apply exec_straight_one; simpl; reflexivity.
+  split. rewrite D; auto with asmgen. unfold rs1; Simpl.
+  intros. rewrite D; auto with asmgen. unfold rs1; Simpl.
+- (* Oandlimm *)
+  destruct (andimm64_correct x0 x i k rs m) as [rs' [A [B C]]]. eauto with asmgen.
+  exists rs'; auto with asmgen.
+- (* Oorlimm *)
+  destruct (orimm64_correct x0 x i k rs m) as [rs' [A [B C]]]. eauto with asmgen.
+  exists rs'; auto with asmgen.
+- (* Oxorlimm *)
+  destruct (xorimm64_correct x0 x i k rs m) as [rs' [A [B C]]]. eauto with asmgen.
+  exists rs'; auto with asmgen.
+- (* Onotl *)
+  econstructor; split. eapply exec_straight_one; simpl; reflexivity.
+  split. Simpl. destruct (rs x); simpl; auto. rewrite Int64.or_idem; auto.
+  intros; Simpl.
+- (* Oshrxlimm *)
+  econstructor; split.
+  eapply exec_straight_two; simpl; reflexivity.
+  split. Simpl. apply SAME. apply Val.shrxl_carry. auto.
+  intros; Simpl.
+- (* Orolml *)
+  destruct (rolm64_correct x0 x i i0 k rs m) as [rs' [A [B C]]]. eauto with asmgen.
+  exists rs'; auto with asmgen.
+- (* Olongoffloat *)
+  replace v with (Val.maketotal (Val.longoffloat (rs x))).
+  TranslOpSimpl.
+  rewrite H1; auto.
+- (* Ofloatoflong *)
+  replace v with (Val.maketotal (Val.floatoflong (rs x))).
+  TranslOpSimpl.
+  rewrite H1; auto.
   (* Ointoffloat *)
-  replace v with (Val.maketotal (Val.intoffloat (rs x))).
+- replace v with (Val.maketotal (Val.intoffloat (rs x))).
   TranslOpSimpl.
   rewrite H1; auto.
   (* Ointuoffloat *)
-  replace v with (Val.maketotal (Val.intuoffloat (rs x))).
+- replace v with (Val.maketotal (Val.intuoffloat (rs x))).
   TranslOpSimpl.
   rewrite H1; auto.
   (* Ofloatofint *)
-  replace v with (Val.maketotal (Val.floatofint (rs x))).
+- replace v with (Val.maketotal (Val.floatofint (rs x))).
   TranslOpSimpl.
   rewrite H1; auto.
   (* Ofloatofintu *)
-  replace v with (Val.maketotal (Val.floatofintu (rs x))).
+- replace v with (Val.maketotal (Val.floatofintu (rs x))).
   TranslOpSimpl.
   rewrite H1; auto.
   (* Ocmp *)
-  destruct (transl_cond_op_correct c0 args res k rs m c) as [rs' [A [B C]]]; auto.
+- destruct (transl_cond_op_correct c0 args res k rs m c) as [rs' [A [B C]]]; auto.
   exists rs'; auto with asmgen.
 Qed.
 
@@ -1179,7 +1505,7 @@ Transparent Val.add.
   (* Ainstack *)
   set (ofs := Ptrofs.to_int i) in *.
   assert (L: Val.lessdef (Val.offset_ptr (rs GPR1) i) (Val.add (rs GPR1) (Vint ofs))).
-  { destruct (rs GPR1); simpl; auto. unfold ofs; rewrite Ptrofs.of_int_to_int; auto. } 
+  { destruct (rs GPR1); simpl; auto. unfold ofs; rewrite Ptrofs.of_int_to_int; auto. }
   destruct (Int.eq (high_s ofs) Int.zero); inv TR.
   apply MK1. simpl. rewrite gpr_or_zero_not_zero; eauto with asmgen. auto.
   set (rs1 := nextinstr (rs#temp <- (Val.add rs#GPR1 (Vint (Int.shl (high_s ofs) (Int.repr 16)))))).
@@ -1209,7 +1535,7 @@ Lemma transl_load_correct:
 Proof.
   intros.
   assert (LD: forall v, Val.lessdef a v -> v = a).
-  { intros. inv H2; auto. discriminate H1. }   
+  { intros. inv H2; auto. discriminate H1. }
   assert (BASE: forall mk1 mk2 k' chunk' v',
   transl_memory_access mk1 mk2 addr args GPR12 k' = OK c ->
   Mem.loadv chunk' m a = Some v' ->
@@ -1257,6 +1583,8 @@ Proof.
   eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
 - (* Mint32 *)
   eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
+- (* Mint64 *)
+  eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
 - (* Mfloat32 *)
   eapply BASE; eauto; erewrite freg_of_eq by eauto; auto.
 - (* Mfloat64 *)
@@ -1277,7 +1605,7 @@ Proof.
 Local Transparent destroyed_by_store.
   intros.
   assert (LD: forall v, Val.lessdef a v -> v = a).
-  { intros. inv H2; auto. discriminate H1. }   
+  { intros. inv H2; auto. discriminate H1. }
   assert (TEMP0: int_temp_for src = GPR11 \/ int_temp_for src = GPR12).
     unfold int_temp_for. destruct (mreg_eq src R12); auto.
   assert (TEMP1: int_temp_for src <> GPR0).
@@ -1323,6 +1651,8 @@ Local Transparent destroyed_by_store.
   eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
 - (* Mint32 *)
   eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
+- (* Mint64 *)
+  eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
 - (* Mfloat32 *)
   eapply BASE; eauto; erewrite freg_of_eq by eauto; auto.
 - (* Mfloat64 *)
@@ -1386,4 +1716,3 @@ Proof.
 Qed.
 
 End CONSTRUCTORS.
-
diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp
index 403a7a77..8946ae27 100644
--- a/powerpc/ConstpropOp.vp
+++ b/powerpc/ConstpropOp.vp
@@ -39,7 +39,7 @@ Definition const_for_result (a: aval) : option operation :=
 
 Section STRENGTH_REDUCTION.
 
-Nondetfunction cond_strength_reduction 
+Nondetfunction cond_strength_reduction
               (cond: condition) (args: list reg) (vl: list aval) :=
   match cond, args, vl with
   | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
@@ -50,7 +50,7 @@ Nondetfunction cond_strength_reduction
       (Ccompuimm (swap_comparison c) n1, r2 :: nil)
   | Ccompu c, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
       (Ccompuimm c n2, r1 :: nil)
-  | _, _, _ => 
+  | _, _, _ =>
       (cond, args)
   end.
 
@@ -158,7 +158,7 @@ Definition make_cast8signed (r: reg) (a: aval) :=
 Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn Ptop 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
 
-Nondetfunction op_strength_reduction 
+Nondetfunction op_strength_reduction
               (op: operation) (args: list reg) (vl: list aval) :=
   match op, args, vl with
   | Ocast8signed, r1 :: nil, v1 :: nil => make_cast8signed r1 v1
diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v
index bb0605ee..d2ebf52d 100644
--- a/powerpc/ConstpropOpproof.v
+++ b/powerpc/ConstpropOpproof.v
@@ -106,7 +106,7 @@ Proof.
   + (* global *)
     inv H2. exists (Genv.symbol_address ge id ofs); auto.
   + (* stack *)
-    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.  
+    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma cond_strength_reduction_correct:
@@ -478,7 +478,7 @@ Remark shift_symbol_address:
   Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.of_int delta)) = Val.add (Genv.symbol_address ge id ofs) (Vint delta).
 Proof.
   intros. unfold Genv.symbol_address. destruct (Genv.find_symbol ge id); auto.
-Qed.   
+Qed.
 
 Lemma addr_strength_reduction_correct:
   forall addr args vl res,
@@ -491,7 +491,7 @@ Proof.
   destruct (addr_strength_reduction_match addr args vl); simpl;
   intros VL EA; InvApproxRegs; SimplVM; try (inv EA).
 - rewrite shift_symbol_address. econstructor; split; eauto. apply Val.add_lessdef; auto.
-- econstructor; split; eauto. 
+- econstructor; split; eauto.
   change (Val.lessdef (Val.add (Vint n1) rs#r2) (Genv.symbol_address ge symb (Ptrofs.add (Ptrofs.of_int n1) n2))).
   rewrite Ptrofs.add_commut. rewrite shift_symbol_address. rewrite Val.add_commut.
   apply Val.add_lessdef; auto.
diff --git a/powerpc/Conventions1.v b/powerpc/Conventions1.v
index b83ab6da..2793fbfb 100644
--- a/powerpc/Conventions1.v
+++ b/powerpc/Conventions1.v
@@ -18,6 +18,7 @@ Require Import Decidableplus.
 Require Import AST.
 Require Import Events.
 Require Import Locations.
+Require Archi.
 
 (** * Classification of machine registers *)
 
@@ -41,6 +42,38 @@ Definition is_callee_save (r: mreg): bool :=
   | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => true
   end.
 
+Definition destroyed_at_call :=
+  List.filter (fun r => negb (is_callee_save r)) all_mregs.
+
+(** The following definitions are used by the register allocator. *)
+
+(** When a PPC64 processor is used with the PPC32 ABI, the high 32 bits
+  of integer callee-save registers may not be preserved.  So,
+  declare all integer registers as having size 32 bits for the purpose
+  of determining which variables can go in callee-save registers. *)
+  
+Definition callee_save_type (r: mreg): typ :=
+  match r with
+  | R3  | R4  | R5  | R6  | R7  | R8  | R9  | R10  | R11 | R12
+  | R14 | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | R24
+  | R25 | R26 | R27 | R28 | R29 | R30 | R31 => Tany32
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => Tany64
+  end.
+
+Definition is_float_reg (r: mreg): bool :=
+  match r with
+  | R3  | R4  | R5  | R6  | R7  | R8  | R9  | R10  | R11 | R12
+  | R14 | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | R24
+  | R25 | R26 | R27 | R28 | R29 | R30 | R31 => false
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13
+  | F14 | F15 | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => true
+  end.
+
 Definition int_caller_save_regs :=
   R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: R12 :: nil.
 
@@ -55,23 +88,9 @@ Definition float_callee_save_regs :=
   F31 :: F30 :: F29 :: F28 :: F27 :: F26 :: F25 :: F24 :: F23 ::
   F22 :: F21 :: F20 :: F19 :: F18 :: F17 :: F16 :: F15 :: F14 :: nil.
 
-Definition destroyed_at_call :=
-  List.filter (fun r => negb (is_callee_save r)) all_mregs.
-
 Definition dummy_int_reg := R3.     (**r Used in [Coloring]. *)
 Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
 
-Definition is_float_reg (r: mreg): bool :=
-  match r with
-  | R3  | R4  | R5  | R6  | R7  | R8  | R9  | R10  | R11 | R12
-  | R14 | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | R24
-  | R25 | R26 | R27 | R28 | R29 | R30 | R31 => false
-  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
-  | F8  | F9  | F10 | F11 | F12 | F13
-  | F14 | F15 | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
-  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => true
-  end.
-
 (** * Function calling conventions *)
 
 (** The functions in this section determine the locations (machine registers
@@ -97,7 +116,7 @@ Definition is_float_reg (r: mreg): bool :=
   registers [R3] or [F1] or [R3, R4], depending on the type of the returned value.
   We treat a function without result as a function with one integer result. *)
 
-Definition loc_result (s: signature) : rpair mreg :=
+Definition loc_result_32 (s: signature) : rpair mreg :=
   match s.(sig_res) with
   | None => One R3
   | Some (Tint | Tany32) => One R3
@@ -105,12 +124,24 @@ Definition loc_result (s: signature) : rpair mreg :=
   | Some Tlong => Twolong R3 R4
   end.
 
+Definition loc_result_64 (s: signature) : rpair mreg :=
+  match s.(sig_res) with
+  | None => One R3
+  | Some (Tint | Tlong | Tany32 | Tany64) => One R3
+  | Some (Tfloat | Tsingle) => One F1
+  end.
+
+Definition loc_result :=
+  if Archi.ptr64 then loc_result_64 else loc_result_32.
+
+(** The result registers have types compatible with that given in the signature. *)
+
 Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold proj_sig_res, loc_result.
-  destruct (sig_res sig) as [[]|]; simpl; destruct Archi.ppc64; auto.
+  intros. unfold proj_sig_res, loc_result, loc_result_32, loc_result_64, mreg_type.
+  destruct Archi.ptr64 eqn:?; destruct (sig_res sig) as [[]|]; destruct Archi.ppc64; simpl; auto.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -119,8 +150,8 @@ Lemma loc_result_caller_save:
   forall (s: signature),
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
-  intros.
-  unfold loc_result. destruct (sig_res s) as [[]|]; simpl; auto.
+  intros. unfold loc_result, loc_result_32, loc_result_64, is_callee_save;
+  destruct Archi.ptr64; destruct (sig_res s) as [[]|]; simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -132,11 +163,13 @@ Lemma loc_result_pair:
   | Twolong r1 r2 =>
         r1 <> r2 /\ sg.(sig_res) = Some Tlong
      /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
-     /\ Archi.splitlong = true 
+     /\ Archi.ptr64 = false
   end.
 Proof.
-  intros; unfold loc_result; destruct (sig_res sg) as [[]|]; auto.
-  simpl; intuition congruence. 
+  intros; unfold loc_result, loc_result_32, loc_result_64, mreg_type;
+  destruct Archi.ptr64; destruct (sig_res sg) as [[]|]; destruct Archi.ppc64; simpl; auto.
+  split; auto. congruence.
+  split; auto. congruence.
 Qed.
 
 (** The location of the result depends only on the result part of the signature *)
@@ -144,7 +177,8 @@ Qed.
 Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
-  intros. unfold loc_result. rewrite H; auto.
+  intros. unfold loc_result, loc_result_32, loc_result_64.
+  destruct Archi.ptr64; rewrite H; auto.
 Qed.
 
 (** ** Location of function arguments *)
@@ -191,7 +225,10 @@ Fixpoint loc_arguments_rec
           Twolong (R r1) (R r2) :: loc_arguments_rec tys (ir + 2) fr ofs
       | _, _ =>
           let ofs := align ofs 2 in
-          Twolong (S Outgoing ofs Tint) (S Outgoing (ofs + 1) Tint) :: loc_arguments_rec tys ir fr (ofs + 2)
+          (if Archi.ptr64
+           then One (S Outgoing ofs Tlong)
+           else Twolong (S Outgoing ofs Tint) (S Outgoing (ofs + 1) Tint)) ::
+          loc_arguments_rec tys ir fr (ofs + 2)
       end
   end.
 
@@ -279,10 +316,12 @@ Opaque list_nth_z.
   destruct H. subst; split; left; eapply list_nth_z_in; eauto.
   eapply IHtyl; eauto.
   destruct H.
-  subst. split; (split; [omega|apply Z.divide_1_l]).
+  subst. destruct Archi.ptr64; [split|split;split]; try omega.
+  apply align_divides; omega. apply Z.divide_1_l. apply Z.divide_1_l.
   eapply Y; eauto. omega.
   destruct H.
-  subst. split; (split; [omega|apply Z.divide_1_l]).
+  subst. destruct Archi.ptr64; [split|split;split]; try omega.
+  apply align_divides; omega. apply Z.divide_1_l. apply Z.divide_1_l.
   eapply Y; eauto. omega.
 - (* single *)
   assert (ofs <= align ofs 2) by (apply align_le; omega).
@@ -386,12 +425,15 @@ Proof.
   set (ir' := align ir 2) in *.
   assert (DFL:
     In (S Outgoing ofs ty) (regs_of_rpairs
-                             (Twolong (S Outgoing (align ofs0 2) Tint)
-                                      (S Outgoing (align ofs0 2 + 1) Tint)
+                             ((if Archi.ptr64
+                               then One (S Outgoing (align ofs0 2) Tlong)
+                               else Twolong (S Outgoing (align ofs0 2) Tint)
+                                            (S Outgoing (align ofs0 2 + 1) Tint))
                           :: loc_arguments_rec tyl ir' fr (align ofs0 2 + 2))) ->
     ofs + typesize ty <= size_arguments_rec tyl ir' fr (align ofs0 2 + 2)).
-  { intros IN. destruct IN. inv H1.
-    transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above. 
+  { destruct Archi.ptr64; intros IN.
+  - destruct IN. inv H1. apply size_arguments_rec_above. auto.
+  - destruct IN. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
     destruct H1. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
     auto. }
   destruct (list_nth_z int_param_regs ir'); auto.
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index ce9c3542..6f2c6a03 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -86,7 +86,7 @@ Definition mreg_type (r: mreg): typ :=
   match r with
   | R3  | R4  | R5  | R6  | R7  | R8  | R9  | R10  | R11 | R12
   | R14 | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | R24
-  | R25 | R26 | R27 | R28 | R29 | R30 | R31 => Tany32
+  | R25 | R26 | R27 | R28 | R29 | R30 | R31 => if Archi.ppc64 then Tany64 else Tany32
   | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
   | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
   | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
@@ -159,11 +159,25 @@ Definition register_by_name (s: string) : option mreg :=
 
 (** ** Destroyed registers, preferred registers *)
 
+Definition destroyed_by_cond (cond: condition): list mreg :=
+  match cond with
+  | Ccomplimm _ _ | Ccompluimm _ _ => R12 :: nil
+  | _ => nil
+  end.
+
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
   | Ofloatconst _ => R12 :: nil
   | Osingleconst _ => R12 :: nil
+  | Olongconst _ => R12 :: nil
   | Ointoffloat | Ointuoffloat => F13 :: nil
+  | Olongoffloat => F13 :: nil
+  | Oaddlimm _ => R12 :: nil
+  | Oandlimm _ => R12 :: nil
+  | Oorlimm _ => R12 :: nil
+  | Oxorlimm _ => R12 :: nil
+  | Orolml _ _ => R12 :: nil
+  | Ocmp c => destroyed_by_cond c
   | _ => nil
   end.
 
@@ -173,9 +187,6 @@ Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg
 Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
   R11 :: R12 :: nil.
 
-Definition destroyed_by_cond (cond: condition): list mreg :=
-  nil.
-
 Definition destroyed_by_jumptable: list mreg :=
   R12 :: nil.
 
@@ -239,11 +250,13 @@ Global Opaque
 
 (** Two-address operations.  Return [true] if the first argument and
   the result must be in the same location *and* are unconstrained
-  by [mregs_for_operation]. There is only one: rotate-mask-insert. *)
+  by [mregs_for_operation]. *)
 
 Definition two_address_op (op: operation) : bool :=
   match op with
   | Oroli _ _ => true
+  | Olowlong => true
+  | Ofloatofsingle => true
   | _ => false
   end.
 
diff --git a/powerpc/NeedOp.v b/powerpc/NeedOp.v
index 956b5d43..9a579cc5 100644
--- a/powerpc/NeedOp.v
+++ b/powerpc/NeedOp.v
@@ -51,6 +51,11 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Oshrximm n => op1 (default nv)
   | Orolm amount mask => op1 (rolm nv amount mask)
   | Oroli amount mask => op1 (default nv)
+  | Olongconst n => nil
+  | Ocast32signed | Ocast32unsigned | Onegl | Onotl => op1 (default nv)
+  | Oaddl | Osubl | Omull | Omullhs | Omullhu | Odivl | Odivlu | Oandl | Oorl | Oxorl | Oshll | Oshrl | Oshrlu => op2 (default nv)
+  | Oaddlimm _ | Oandlimm _ | Oorlimm _ | Oxorlimm _ | Oshrlimm _ | Oshrxlimm _=> op1 (default nv)
+  | Orolml _ _ | Olongoffloat | Ofloatoflong => op1 (default nv)
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
   | Onegfs | Oabsfs => op1 (default nv)
diff --git a/powerpc/Op.v b/powerpc/Op.v
index e171c7d4..de4eee48 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -35,7 +35,6 @@ Require Import Globalenvs.
 Require Import Events.
 
 Set Implicit Arguments.
-Local Transparent Archi.ptr64.
 
 (** Conditions (boolean-valued operators). *)
 
@@ -43,11 +42,16 @@ Inductive condition : Type :=
   | Ccomp: comparison -> condition      (**r signed integer comparison *)
   | Ccompu: comparison -> condition     (**r unsigned integer comparison *)
   | Ccompimm: comparison -> int -> condition (**r signed integer comparison with a constant *)
-  | Ccompuimm: comparison -> int -> condition  (**r unsigned integer comparison with a constant *)
+  | Ccompuimm: comparison -> int -> condition (**r unsigned integer comparison with a constant *)
   | Ccompf: comparison -> condition     (**r floating-point comparison *)
   | Cnotcompf: comparison -> condition  (**r negation of a floating-point comparison *)
   | Cmaskzero: int -> condition         (**r test [(arg & constant) == 0] *)
-  | Cmasknotzero: int -> condition.     (**r test [(arg & constant) != 0] *)
+  | Cmasknotzero: int -> condition      (**r test [(arg & constant) != 0] *)
+(*c PPC64 specific conditions: *)
+  | Ccompl: comparison -> condition     (**r signed int64 comparison *)
+  | Ccomplu: comparison -> condition    (**r unsigned int64 comparison *)
+  | Ccomplimm: comparison -> int64 -> condition (**r signed int64 comparison with a constant *)
+  | Ccompluimm: comparison -> int64 -> condition. (**r unsigned int64 comparison with a constant *)
 
 (** Arithmetic and logical operations.  In the descriptions, [rd] is the
   result of the operation and [r1], [r2], etc, are the arguments. *)
@@ -58,7 +62,7 @@ Inductive operation : Type :=
   | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
   | Osingleconst: float32 -> operation  (**r [rd] is set to the given float constant *)
   | Oaddrsymbol: ident -> ptrofs -> operation (**r [rd] is set to the the address of the symbol plus the offset *)
-  | Oaddrstack: ptrofs -> operation        (**r [rd] is set to the stack pointer plus the given offset *)
+  | Oaddrstack: ptrofs -> operation     (**r [rd] is set to the stack pointer plus the given offset *)
 (*c Integer arithmetic: *)
   | Ocast8signed: operation             (**r [rd] is 8-bit sign extension of [r1] *)
   | Ocast16signed: operation            (**r [rd] is 16-bit sign extension of [r1] *)
@@ -92,6 +96,34 @@ Inductive operation : Type :=
   | Oshru: operation                    (**r [rd = r1 >> r2] (unsigned) *)
   | Orolm: int -> int -> operation      (**r rotate left and mask *)
   | Oroli: int -> int -> operation      (**r rotate left and insert *)
+(*c PPC64 64-bit integer arithmetic: *)
+  | Olongconst: int64 -> operation      (**r [rd] is set to the given int64 constant *)
+  | Ocast32signed: operation            (**r [rd] is 64-bit sign extension of [r1] *)
+  | Ocast32unsigned: operation          (**r [rd] is 64-bit zero extension of [r1] *)
+  | Oaddl: operation                    (**r [rd = r1 + r2] *)
+  | Oaddlimm: int64 -> operation        (**r [rd = r1 + n] *)
+  | Osubl: operation                    (**r [rd = r1 - r2] *)
+  | Onegl: operation                    (**r [rd = - r1] *)
+  | Omull: operation                    (**r [rd = r1 * r2] *)
+  | Omullhs: operation                   (**r [rd = high part of r1 * r2, signed] *)
+  | Omullhu: operation                  (**r [rd = high part of r1 * r2, unsigned] *)
+  | Odivl: operation                    (**r [rd = r1 / r2] (signed) *)
+  | Odivlu: operation                   (**r [rd = r1 / r2] (unsigned) *)
+  | Oandl: operation                    (**r [rd = r1 & r2] *)
+  | Oandlimm: int64 -> operation        (**r [rd = r1 & n] *)
+  | Oorl: operation                     (**r [rd = r1 | r2] *)
+  | Oorlimm: int64 -> operation         (**r [rd = r1 | n] *)
+  | Oxorl: operation                    (**r [rd = r1 ^ r2] *)
+  | Oxorlimm: int64 -> operation        (**r [rd = r1 ^ n] *)
+  | Onotl: operation                    (**r [rd = ~r1] *)
+  | Oshll: operation                    (**r [rd = r1 << r2] *)
+  | Oshrl: operation                    (**r [rd = r1 >> r2] (signed) *)
+  | Oshrlimm: int -> operation          (**r [rd = r1 >> n] (signed) *)
+  | Oshrxlimm: int -> operation         (**r [rd = r1 / 2^n] (signed) *)
+  | Oshrlu: operation                   (**r [rd = r1 >> r2] (unsigned) *)
+  | Orolml: int -> int64 -> operation   (**r rotate left and mask *)
+  | Olongoffloat: operation             (**r [rd = signed_int64_of_float(r1)] *)
+  | Ofloatoflong: operation             (**r [rd = float_of_signed_int64(r1)] *)
 (*c Floating-point arithmetic: *)
   | Onegf: operation                    (**r [rd = - r1] *)
   | Oabsf: operation                    (**r [rd = abs(r1)] *)
@@ -120,27 +152,28 @@ Inductive operation : Type :=
 (*c Boolean tests: *)
   | Ocmp: condition -> operation.       (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
 
+
 (** Addressing modes.  [r1], [r2], etc, are the arguments to the
   addressing. *)
 
 Inductive addressing: Type :=
-  | Aindexed: int -> addressing         (**r Address is [r1 + offset] *)
-  | Aindexed2: addressing               (**r Address is [r1 + r2] *)
+  | Aindexed: int -> addressing            (**r Address is [r1 + offset] *)
+  | Aindexed2: addressing                  (**r Address is [r1 + r2] *)
   | Aglobal: ident -> ptrofs -> addressing (**r Address is [symbol + offset] *)
-  | Abased: ident -> ptrofs -> addressing (**r Address is [symbol + offset + r1] *)
+  | Abased: ident -> ptrofs -> addressing  (**r Address is [symbol + offset + r1] *)
   | Ainstack: ptrofs -> addressing.        (**r Address is [stack_pointer + offset] *)
 
 (** Comparison functions (used in module [CSE]). *)
 
 Definition eq_condition (x y: condition) : {x=y} + {x<>y}.
 Proof.
-  generalize Int.eq_dec; intro.
+  generalize Int.eq_dec Int64.eq_dec; intro.
   assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
   decide equality.
 Defined.
 
 Definition beq_operation: forall (x y: operation), bool.
-  generalize Int.eq_dec Ptrofs.eq_dec ident_eq Float.eq_dec Float32.eq_dec  eq_condition; boolean_equality.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec ident_eq Float.eq_dec Float32.eq_dec  eq_condition; boolean_equality.
 Defined.
 
 Definition eq_operation (x y: operation): {x=y} + {x<>y}.
@@ -150,7 +183,7 @@ Defined.
 
 Definition eq_addressing (x y: addressing) : {x=y} + {x<>y}.
 Proof.
-  generalize Int.eq_dec Ptrofs.eq_dec ident_eq; intro.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec ident_eq; intro.
   decide equality.
 Defined.
 
@@ -173,6 +206,10 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
   | Cmaskzero n, v1 :: nil => Val.maskzero_bool v1 n
   | Cmasknotzero n, v1 :: nil => option_map negb (Val.maskzero_bool v1 n)
+  | Ccompl c, v1 :: v2 :: nil => Val.cmpl_bool c v1 v2
+  | Ccomplu c, v1 :: v2 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 v2
+  | Ccomplimm c n, v1 :: nil => Val.cmpl_bool c v1 (Vlong n)
+  | Ccompluimm c n, v1 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 (Vlong n)
   | _, _ => None
   end.
 
@@ -219,6 +256,33 @@ Definition eval_operation
   | Orolm amount mask, v1::nil => Some (Val.rolm v1 amount mask)
   | Oroli amount mask, v1::v2::nil =>
       Some(Val.or (Val.and v1 (Vint (Int.not mask))) (Val.rolm v2 amount mask))
+  | Olongconst n, nil => Some (Vlong n)
+  | Ocast32signed, v1::nil => Some (Val.longofint v1)
+  | Ocast32unsigned, v1::nil => Some (Val.longofintu v1)
+  | Oaddl, v1::v2::nil => Some (Val.addl v1 v2)
+  | Oaddlimm n, v1::nil => Some (Val.addl v1 (Vlong n))
+  | Osubl, v1::v2::nil => Some (Val.subl v1 v2)
+  | Onegl, v1::nil => Some (Val.negl v1)
+  | Omull, v1::v2::nil => Some (Val.mull v1 v2)
+  | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
+  | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
+  | Odivl, v1::v2::nil => Val.divls v1 v2
+  | Odivlu, v1::v2::nil => Val.divlu v1 v2
+  | Oandl, v1::v2::nil => Some(Val.andl v1 v2)
+  | Oandlimm n, v1::nil => Some (Val.andl v1 (Vlong n))
+  | Oorl, v1::v2::nil => Some(Val.orl v1 v2)
+  | Oorlimm n, v1::nil => Some (Val.orl v1 (Vlong n))
+  | Oxorl, v1::v2::nil => Some(Val.xorl v1 v2)
+  | Oxorlimm n, v1::nil => Some (Val.xorl v1 (Vlong n))
+  | Onotl, v1::nil => Some(Val.notl v1)
+  | Oshll, v1::v2::nil => Some (Val.shll v1 v2)
+  | Oshrl, v1::v2::nil => Some (Val.shrl v1 v2)
+  | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n))
+  | Oshrxlimm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2)
+  | Orolml amount mask, v1::nil => Some (Val.rolml v1 amount mask)
+  | Olongoffloat, v1::nil => Val.longoffloat v1
+  | Ofloatoflong, v1::nil => Val.floatoflong v1
   | Onegf, v1::nil => Some(Val.negf v1)
   | Oabsf, v1::nil => Some(Val.absf v1)
   | Oaddf, v1::v2::nil => Some(Val.addf v1 v2)
@@ -295,6 +359,10 @@ Definition type_of_condition (c: condition) : list typ :=
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
   | Cmaskzero _ => Tint :: nil
   | Cmasknotzero _ => Tint :: nil
+  | Ccompl _ => Tlong :: Tlong :: nil
+  | Ccomplu _ => Tlong :: Tlong :: nil
+  | Ccomplimm _ _ => Tlong :: nil
+  | Ccompluimm _ _ => Tlong :: nil
   end.
 
 Definition type_of_operation (op: operation) : list typ * typ :=
@@ -337,6 +405,33 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Oshru => (Tint :: Tint :: nil, Tint)
   | Orolm _ _ => (Tint :: nil, Tint)
   | Oroli _ _ => (Tint :: Tint :: nil, Tint)
+  | Olongconst _ => (nil, Tlong)
+  | Ocast32signed => (Tint :: nil, Tlong)
+  | Ocast32unsigned => (Tint :: nil, Tlong)
+  | Oaddl => (Tlong :: Tlong :: nil, Tlong)
+  | Oaddlimm _ => (Tlong :: nil, Tlong)
+  | Osubl => (Tlong :: Tlong :: nil, Tlong)
+  | Onegl => (Tlong :: nil, Tlong)
+  | Omull => (Tlong :: Tlong :: nil, Tlong)
+  | Omullhs => (Tlong :: Tlong :: nil, Tlong)
+  | Omullhu => (Tlong :: Tlong :: nil, Tlong)
+  | Odivl => (Tlong :: Tlong :: nil, Tlong)
+  | Odivlu => (Tlong :: Tlong :: nil, Tlong)
+  | Oandl => (Tlong :: Tlong :: nil, Tlong)
+  | Oandlimm _ => (Tlong :: nil, Tlong)
+  | Oorl => (Tlong :: Tlong :: nil, Tlong)
+  | Oorlimm _ => (Tlong :: nil, Tlong)
+  | Oxorl => (Tlong :: Tlong :: nil, Tlong)
+  | Oxorlimm _ => (Tlong :: nil, Tlong)
+  | Onotl => (Tlong :: nil, Tlong)
+  | Oshll => (Tlong :: Tint :: nil, Tlong)
+  | Oshrl => (Tlong :: Tint :: nil, Tlong)
+  | Oshrlimm _ => (Tlong :: nil, Tlong)
+  | Oshrxlimm _ => (Tlong :: nil, Tlong)
+  | Oshrlu => (Tlong :: Tint :: nil, Tlong)
+  | Orolml _ _ => (Tlong :: nil, Tlong)
+  | Olongoffloat => (Tfloat :: nil, Tlong)
+  | Ofloatoflong => (Tlong :: nil, Tfloat)
   | Onegf => (Tfloat :: nil, Tfloat)
   | Oabsf => (Tfloat :: nil, Tfloat)
   | Oaddf => (Tfloat :: Tfloat :: nil, Tfloat)
@@ -428,6 +523,35 @@ Proof with (try exact I; try reflexivity).
   destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   destruct v0...
   destruct v0; destruct v1...
+  exact I.
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1; simpl in *; inv H0.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
+  destruct v0; destruct v1; simpl in *; inv H0. destruct (Int64.eq i0 Int64.zero); inv H2...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  destruct v0; simpl... destruct (Int.ltu i Int64.iwordsize')...
+  destruct v0; simpl in *; inv H0. destruct (Int.ltu i (Int.repr 63)); inv H2...
+  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  destruct v0...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
+  destruct v0; simpl in H0; inv H0...
   destruct v0...
   destruct v0...
   destruct v0; destruct v1...
@@ -491,6 +615,10 @@ Definition negate_condition (cond: condition): condition :=
   | Cnotcompf c => Ccompf c
   | Cmaskzero n => Cmasknotzero n
   | Cmasknotzero n => Cmaskzero n
+  | Ccompl c => Ccompl(negate_comparison c)
+  | Ccomplu c => Ccomplu(negate_comparison c)
+  | Ccomplimm c n => Ccomplimm (negate_comparison c) n
+  | Ccompluimm c n => Ccompluimm (negate_comparison c) n
   end.
 
 Lemma eval_negate_condition:
@@ -506,6 +634,10 @@ Proof.
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0); auto. destruct b; auto.
   repeat (destruct vl; auto).
   repeat (destruct vl; auto). destruct (Val.maskzero_bool v i) as [[]|]; auto.
+  repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
 Qed.
 
 (** Shifting stack-relative references.  This is used in [Stacking]. *)
@@ -571,7 +703,7 @@ Lemma eval_offset_addressing:
   eval_addressing ge sp addr args = Some v ->
   eval_addressing ge sp addr' args = Some(Val.add v (Vint (Int.repr delta))).
 Proof.
-  intros. 
+  intros.
   assert (D: Ptrofs.repr delta = Ptrofs.of_int (Int.repr delta)) by (symmetry; auto with ptrofs).
   destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst.
 - rewrite Val.add_assoc; auto.
@@ -599,6 +731,8 @@ Definition op_depends_on_memory (op: operation) : bool :=
   match op with
   | Ocmp (Ccompu _) => true
   | Ocmp (Ccompuimm _ _) => true
+  | Ocmp (Ccomplu _) => Archi.ppc64
+  | Ocmp (Ccompluimm _ _) => Archi.ppc64
   | _ => false
   end.
 
@@ -736,6 +870,10 @@ Proof.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; try discriminate; auto.
   inv H3; try discriminate; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; try discriminate; auto.
+  inv H3; try discriminate; auto.
 Qed.
 
 Ltac TrivialExists :=
@@ -773,7 +911,7 @@ Proof.
     destruct (Int.eq i0 Int.zero
          || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists.
   inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
   inv H4; inv H2; simpl; auto.
   inv H4; simpl; auto.
   inv H4; inv H2; simpl; auto.
@@ -796,6 +934,36 @@ Proof.
   inv H4; simpl; auto.
   inv H4; simpl; auto.
   inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H3; simpl in H1; inv H1. simpl.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. TrivialExists.
+  inv H4; inv H3; simpl in H1; inv H1. simpl.
+    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  inv H4; simpl; auto. destruct (Int.ltu i Int64.iwordsize'); auto.
+  inv H4; simpl in *; inv H1. destruct (Int.ltu i (Int.repr 63)); inv H2. econstructor; eauto.
+  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  inv H4; simpl; auto.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
+  exists (Vlong i); auto.
+  inv H4; simpl in H1; inv H1; simpl. TrivialExists.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
@@ -993,7 +1161,7 @@ Proof.
   rewrite eval_shift_stack_addressing.
   eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma eval_operation_inject:
@@ -1013,7 +1181,7 @@ Proof.
   intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
   intros; eapply Mem.different_pointers_inject; eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 End EVAL_INJECT.
@@ -1042,14 +1210,14 @@ End EVAL_INJECT.
 *)
 
 Inductive rlw_state: Type :=
-  | RLW_S0 : rlw_state
-  | RLW_S1 : rlw_state
-  | RLW_S2 : rlw_state
-  | RLW_S3 : rlw_state
-  | RLW_S4 : rlw_state
-  | RLW_S5 : rlw_state
-  | RLW_S6 : rlw_state
-  | RLW_Sbad : rlw_state.
+  | RLW_S0
+  | RLW_S1
+  | RLW_S2
+  | RLW_S3
+  | RLW_S4
+  | RLW_S5
+  | RLW_S6
+  | RLW_Sbad.
 
 Definition rlw_transition (s: rlw_state) (b: bool) : rlw_state :=
   match s, b with
@@ -1082,13 +1250,71 @@ Definition rlw_accepting (s: rlw_state) : bool :=
   | RLW_Sbad => false
   end.
 
-Fixpoint is_rlw_mask_rec (n: nat) (s: rlw_state) (x: Z) {struct n} : bool :=
+Fixpoint is_mask_rec {A: Type} (trans: A -> bool -> A) (accept: A -> bool)
+                     (n: nat) (s: A) (x: Z) {struct n} : bool :=
   match n with
   | O =>
-      rlw_accepting s
+      accept s
   | S m =>
-      is_rlw_mask_rec m (rlw_transition s (Z.odd x)) (Z.div2 x)
+      is_mask_rec trans accept m (trans s (Z.odd x)) (Z.div2 x)
   end.
 
 Definition is_rlw_mask (x: int) : bool :=
-  is_rlw_mask_rec Int.wordsize RLW_S0 (Int.unsigned x).
+  is_mask_rec rlw_transition rlw_accepting Int.wordsize RLW_S0 (Int.unsigned x).
+
+(** For the 64-bit [rldicl] and [rldicr] instructions, the acceptable
+  masks are [1111100000] and [0000011111], that is, some ones in the
+  high bits and some zeroes in the low bits, or conversely.  All ones
+  is OK, but not all zeroes.  The corresponding automata are:
+<<
+      0          1
+     / \        / \
+     \ /        \ /          (accepting: [1])
+     [0] --1--> [1]
+
+
+                 1          0
+                / \        / \
+                \ /        \ /      (accepting: [1], [2])
+     [0] --1--> [1] --0--> [2]
+>>
+*)
+
+Inductive rll_state: Type := RLL_S0 | RLL_S1 | RLL_Sbad.
+
+Definition rll_transition (s: rll_state) (b: bool) : rll_state :=
+  match s, b with
+  | RLL_S0, false => RLL_S0
+  | RLL_S0, true => RLL_S1
+  | RLL_S1, true => RLL_S1
+  | _, _ => RLL_Sbad
+  end.
+
+Definition rll_accepting (s: rll_state) : bool :=
+  match s with
+  | RLL_S1 => true
+  | _ => false
+  end.
+
+Inductive rlr_state: Type := RLR_S0 | RLR_S1 | RLR_S2 | RLR_Sbad.
+
+Definition rlr_transition (s: rlr_state) (b: bool) : rlr_state :=
+  match s, b with
+  | RLR_S0, true => RLR_S1
+  | RLR_S1, false => RLR_S2
+  | RLR_S1, true => RLR_S1
+  | RLR_S2, false => RLR_S2
+  | _, _ => RLR_Sbad
+  end.
+
+Definition rlr_accepting (s: rlr_state) : bool :=
+  match s with
+  | RLR_S1 | RLR_S2 => true
+  | _ => false
+  end.
+
+Definition is_rldl_mask (x: int64) : bool :=    (*r 0s in the high bits, 1s in the low bits *)
+  is_mask_rec rll_transition rll_accepting Int64.wordsize RLL_S0 (Int64.unsigned x).
+
+Definition is_rldr_mask (x: int64) : bool :=    (*r 1s in the high bits, 0s in the low bits *)
+  is_mask_rec rlr_transition rlr_accepting Int64.wordsize RLR_S0 (Int64.unsigned x).
diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml
index a3fac2c3..cffaafdb 100644
--- a/powerpc/PrintOp.ml
+++ b/powerpc/PrintOp.ml
@@ -110,6 +110,32 @@ let print_operation reg pp = function
   | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
   | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
+  | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
+  | Ocast32signed, [r1] -> fprintf pp "int32signed(%a)" reg r1
+  | Ocast32unsigned, [r1] -> fprintf pp "int32unsigned(%a)" reg r1
+  | Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2
+  | Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n)
+  | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
+  | Onegl, [r1] -> fprintf pp "-l %a" reg r1
+  | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
+  | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
+  | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2
+  | Oandl, [r1;r2] -> fprintf pp "%a &l %a" reg r1 reg r2
+  | Oandlimm n, [r1] -> fprintf pp "%a &l %Ld" reg r1 (camlint64_of_coqint n)
+  | Oorl, [r1;r2] -> fprintf pp "%a |l %a" reg r1 reg r2
+  | Oorlimm n, [r1] -> fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n)
+  | Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2
+  | Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n)
+  | Onotl, [r1] -> fprintf pp "~l %a" reg r1
+  | Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2
+  | Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2
+  | Oshrlimm n, [r1] -> fprintf pp "%a >>ls %ld" reg r1 (camlint_of_coqint n)
+  | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
+  | Orolml(n,m), [r1] ->
+      fprintf pp "(%a rol %Ld) &l 0x%Lx"
+              reg r1 (camlint64_of_coqint n) (camlint64_of_coqint m)
+  | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
+  | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
   | _ -> fprintf pp "<bad operator>"
 
 let print_addressing reg pp = function
diff --git a/powerpc/SelectLong.vp b/powerpc/SelectLong.vp
index cc7a38f6..5f13774b 100644
--- a/powerpc/SelectLong.vp
+++ b/powerpc/SelectLong.vp
@@ -18,4 +18,337 @@ Require Import AST Integers Floats.
 Require Import Op CminorSel.
 Require Import SelectOp SplitLong.
 
-(** This file is empty because we use the default implementation provided in [SplitLong]. *)
+Local Open Scope cminorsel_scope.
+Local Open Scope string_scope.
+
+Section SELECT.
+
+Context {hf: helper_functions}.
+
+Definition longconst (n: int64) : expr :=
+  if Archi.splitlong then SplitLong.longconst n else Eop (Olongconst n) Enil.
+
+Definition is_longconst (e: expr) :=
+  if Archi.splitlong then SplitLong.is_longconst e else
+  match e with
+  | Eop (Olongconst n) Enil => Some n
+  | _ => None
+  end.
+
+Definition intoflong (e: expr) :=
+  if Archi.splitlong then SplitLong.intoflong e else
+  match is_longconst e with
+  | Some n => Eop (Ointconst (Int.repr (Int64.unsigned n))) Enil
+  | None =>  Eop Olowlong (e ::: Enil)
+  end.
+
+Definition longofint (e: expr) :=
+  if Archi.splitlong then SplitLong.longofint e else
+  match is_intconst e with
+  | Some n => longconst (Int64.repr (Int.signed n))
+  | None =>  Eop Ocast32signed (e ::: Enil)
+  end.
+
+Definition longofintu (e: expr) :=
+  if Archi.splitlong then SplitLong.longofintu e else
+  match is_intconst e with
+  | Some n => longconst (Int64.repr (Int.unsigned n))
+  | None =>  Eop Ocast32unsigned (e ::: Enil)
+  end.
+
+Nondetfunction notl (e: expr) :=
+  if Archi.splitlong then SplitLong.notl e else
+  match e with
+  | Eop (Olongconst n) Enil => longconst (Int64.not n)
+  | Eop Onotl (t1:::Enil) => t1
+  | _ => Eop Onotl (e:::Enil)
+  end.
+
+Nondetfunction andlimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then longconst Int64.zero else
+  if Int64.eq n1 Int64.mone then e2 else
+  match e2 with
+  | Eop (Olongconst n2) Enil =>
+      longconst (Int64.and n1 n2)
+  | Eop (Oandlimm n2) (t2:::Enil) =>
+      Eop (Oandlimm (Int64.and n1 n2)) (t2:::Enil)
+  | Eop (Orolml amount2 mask2) (t2:::Enil) =>
+      Eop (Orolml amount2 (Int64.and n1 mask2)) (t2:::Enil)
+  | _ =>
+      Eop (Oandlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction andl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.andl e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => andlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => andlimm n2 t1
+  | _, _ => Eop Oandl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction orlimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then e2 else
+  if Int64.eq n1 Int64.mone then longconst Int64.mone else
+  match e2 with
+  | Eop (Olongconst n2) Enil => longconst (Int64.or n1 n2)
+  | Eop (Oorlimm n2) (t2:::Enil) => Eop (Oorlimm (Int64.or n1 n2)) (t2:::Enil)
+  | _ => Eop (Oorlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction orl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.orl e1 e2 else
+  match e1, e2 with
+  | Eop (Orolml amount1  mask1) (t1:::Enil), Eop (Orolml amount2 mask2) (t2:::Enil) =>
+      if Int.eq amount1 amount2 && same_expr_pure t1 t2
+      then Eop (Orolml amount1 (Int64.or mask1 mask2)) (t1:::Enil)
+      else Eop Oorl (e1:::e2:::Enil)
+  | Eop (Olongconst n1) Enil, t2 => orlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => orlimm n2 t1
+  | _, _ => Eop Oorl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction xorlimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then e2 else
+  if Int64.eq n1 Int64.mone then notl e2 else
+  match e2 with
+  | Eop (Olongconst n2) Enil => longconst (Int64.xor n1 n2)
+  | Eop (Oxorlimm n2) (t2:::Enil) => Eop (Oxorlimm (Int64.xor n1 n2)) (t2:::Enil)
+  | Eop Onotl (t2:::Enil) => Eop (Oxorlimm (Int64.not n1)) (t2:::Enil)
+  | _ => Eop (Oxorlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction xorl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.xorl e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => xorlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => xorlimm n2 t1
+  | _, _ => Eop Oxorl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction rolml (e1: expr) (amount2: int) (mask2: int64) :=
+  if Int.eq amount2 Int.zero then andlimm mask2 e1 else
+  match e1 with
+  | Eop (Olongconst n1) Enil =>
+      longconst (Int64.and (Int64.rol' n1 amount2) mask2)
+  | Eop (Orolml amount1 mask1) (t1:::Enil) =>
+      Eop (Orolml (Int.modu (Int.add amount1 amount2) Int64.iwordsize')
+                  (Int64.and (Int64.rol' mask1 amount2) mask2))
+          (t1:::Enil)
+  | Eop (Oandlimm mask1) (t1:::Enil) =>
+      Eop (Orolml amount2
+                  (Int64.and (Int64.rol' mask1 amount2) mask2))
+          (t1:::Enil)
+  | _ =>
+      Eop (Orolml amount2 mask2) (e1:::Enil)
+  end.
+
+Definition shllimm (e1: expr) (n: int) :=
+  if Archi.splitlong then SplitLong.shllimm e1 n else
+  if Int.eq n Int.zero then e1 else
+  if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshll (e1:::Eop (Ointconst n) Enil:::Enil)
+  else
+    let n' := Int64.repr (Int.unsigned n) in
+    rolml e1 n (Int64.shl Int64.mone n').
+
+Definition shrluimm (e1: expr) (n: int) :=
+  if Archi.splitlong then SplitLong.shrluimm e1 n else
+  if Int.eq n Int.zero then e1 else
+  if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshrlu (e1:::Eop (Ointconst n) Enil:::Enil)
+  else
+    let n' := Int64.repr (Int.unsigned n) in
+    rolml e1 (Int.sub Int64.iwordsize' n) (Int64.shru Int64.mone n').
+
+Nondetfunction shrlimm (e1: expr) (n: int) :=
+  if Archi.splitlong then SplitLong.shrlimm e1 n else
+  if Int.eq n Int.zero then e1 else
+  if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshrl (e1:::Eop (Ointconst n) Enil:::Enil)
+  else
+  match e1 with
+  | Eop (Olongconst n1) Enil =>
+      Eop (Olongconst(Int64.shr' n1 n)) Enil
+  | Eop (Oshrlimm n1) (t1:::Enil) =>
+      if Int.ltu (Int.add n n1) Int64.iwordsize'
+      then Eop (Oshrlimm (Int.add n n1)) (t1:::Enil)
+      else Eop (Oshrlimm n) (e1:::Enil)
+  | _ =>
+      Eop (Oshrlimm n) (e1:::Enil)
+  end.
+
+Definition shll (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.shll e1 e2 else
+  match is_intconst e2 with
+  | Some n2 => shllimm e1 n2
+  | None => Eop Oshll (e1:::e2:::Enil)
+  end.
+
+Definition shrl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.shrl e1 e2 else
+  match is_intconst e2 with
+  | Some n2 => shrlimm e1 n2
+  | None => Eop Oshrl (e1:::e2:::Enil)
+  end.
+
+Definition shrlu (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.shrlu e1 e2 else
+  match is_intconst e2 with
+  | Some n2 => shrluimm e1 n2
+  | _ => Eop Oshrlu (e1:::e2:::Enil)
+  end.
+
+Nondetfunction addlimm (n: int64) (e: expr) :=
+  if Int64.eq n Int64.zero then e else
+  match e with
+  | Eop (Olongconst m) Enil       => longconst (Int64.add m n)
+  | Eop (Oaddlimm m) (t ::: Enil) => Eop (Oaddlimm(Int64.add m n)) (t ::: Enil)
+  | _                             => Eop (Oaddlimm n) (e ::: Enil)
+  end.
+
+Nondetfunction addl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.addl e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => addlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => addlimm n2 t1
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.add n1 n2) (Eop Oaddl (t1:::t2:::Enil))
+  | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+      addlimm n1 (Eop Oaddl (t1:::t2:::Enil))
+  | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm n2 (Eop Oaddl (t1:::t2:::Enil))
+  | _, _ =>
+      Eop Oaddl (e1:::e2:::Enil)
+  end.
+
+Definition negl (e: expr) :=
+  if Archi.splitlong then SplitLong.negl e else
+  match is_longconst e with
+  | Some n => longconst (Int64.neg n)
+  | None =>  Eop Onegl (e ::: Enil)
+  end.
+
+Nondetfunction subl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.subl e1 e2 else
+  match e1, e2 with
+  | t1, Eop (Olongconst n2) Enil => addlimm (Int64.neg n2) t1
+  | _, _ =>
+      Eop Osubl (e1:::e2:::Enil)
+  end.
+
+Definition mullimm_base (n1: int64) (e2: expr) :=
+  match Int64.one_bits' n1 with
+  | i :: nil =>
+      shllimm e2 i
+  | i :: j :: nil =>
+      Elet e2 (addl (shllimm (Eletvar 0) i) (shllimm (Eletvar 0) j))
+  | _ =>
+       Eop Omull (e2:::longconst n1:::Enil)
+  end.
+
+Nondetfunction mullimm (n1: int64) (e2: expr) :=
+  if Archi.splitlong then SplitLong.mullimm n1 e2
+  else if Int64.eq n1 Int64.zero then longconst Int64.zero
+  else if Int64.eq n1 Int64.one then e2
+  else match e2 with
+  | Eop (Olongconst n2) Enil => longconst (Int64.mul n1 n2)
+  | _ => mullimm_base n1 e2
+  end.
+
+Nondetfunction mull (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.mull e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => mullimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => mullimm n2 t1
+  | _, _ => Eop Omull (e1:::e2:::Enil)
+  end.
+
+Definition mullhu (e1: expr) (n2: int64) :=
+  if Archi.splitlong then SplitLong.mullhu e1 n2 else
+  Eop Omullhu (e1 ::: longconst n2 ::: Enil).
+
+Definition mullhs (e1: expr) (n2: int64) :=
+  if Archi.splitlong then SplitLong.mullhs e1 n2 else
+  Eop Omullhs (e1 ::: longconst n2 ::: Enil).
+
+Definition shrxlimm (e: expr) (n: int) :=
+  if Archi.splitlong then SplitLong.shrxlimm e n else
+  if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil).
+
+
+Definition modl_aux (divop: operation) (e1 e2: expr) :=
+  Elet e1
+    (Elet (lift e2)
+      (Eop Osubl (Eletvar 1 :::
+                  Eop Omull (Eop divop (Eletvar 1 ::: Eletvar 0 ::: Enil) :::
+                             Eletvar 0 :::
+                             Enil) :::
+                  Enil))).
+
+Definition divls_base (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.divls_base e1 e2 else Eop Odivl (e1:::e2:::Enil).
+
+Definition divlu_base (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.divlu_base e1 e2 else Eop Odivlu (e1:::e2:::Enil).
+
+Definition modls_base (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.modls_base e1 e2 else
+  let default := modl_aux Odivl e1 e2 in
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 =>
+      if Int64.eq Int64.zero n2 then default else
+      if Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone then default
+      else longconst (Int64.mods n1 n2)
+  | _, _ => default
+  end.
+
+Definition modlu_base (e1 e2: expr) :=
+  if Archi.splitlong then SplitLong.modlu_base e1 e2 else
+  let default := modl_aux Odivlu e1 e2 in
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 =>
+      if Int64.eq Int64.zero n2
+      then default
+      else longconst (Int64.modu n1 n2)
+  | _, Some n2 =>
+      match Int64.is_power2 n2 with
+      | Some _ => andlimm (Int64.sub n2 Int64.one) e1
+      | _ => default
+      end
+  | _, _ => default
+  end.
+
+Definition cmplu (c: comparison) (e1 e2: expr) :=
+  if Archi.splitlong then SplitLong.cmplu c e1 e2 else
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 =>
+      Eop (Ointconst (if Int64.cmpu c n1 n2 then Int.one else Int.zero)) Enil
+  | Some n1, None => Eop (Ocmp (Ccompluimm (swap_comparison c) n1)) (e2:::Enil)
+  | None, Some n2 => Eop (Ocmp (Ccompluimm c n2)) (e1:::Enil)
+  | None, None => Eop (Ocmp (Ccomplu c)) (e1:::e2:::Enil)
+  end.
+
+Definition cmpl (c: comparison) (e1 e2: expr) :=
+  if Archi.splitlong then SplitLong.cmpl c e1 e2 else
+  match is_longconst e1, is_longconst e2 with
+  | Some n1, Some n2 =>
+      Eop (Ointconst (if Int64.cmp c n1 n2 then Int.one else Int.zero)) Enil
+  | Some n1, None => Eop (Ocmp (Ccomplimm (swap_comparison c) n1)) (e2:::Enil)
+  | None, Some n2 => Eop (Ocmp (Ccomplimm c n2)) (e1:::Enil)
+  | None, None => Eop (Ocmp (Ccompl c)) (e1:::e2:::Enil)
+  end.
+
+Definition longoffloat (e: expr) :=
+  if Archi.splitlong
+  then SplitLong.longoffloat e
+  else Eop Olongoffloat (e:::Enil).
+
+Definition floatoflong (e: expr) :=
+  if Archi.splitlong
+  then SplitLong.floatoflong e
+  else Eop Ofloatoflong (e:::Enil).
+
+Definition longofsingle (arg: expr) :=
+  longoffloat (floatofsingle arg).
+
+End SELECT.
diff --git a/powerpc/SelectLongproof.v b/powerpc/SelectLongproof.v
index a82c082c..3e5e82d3 100644
--- a/powerpc/SelectLongproof.v
+++ b/powerpc/SelectLongproof.v
@@ -10,7 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Instruction selection for 64-bit integer operations *)
+(** Correctness of instruction selection for 64-bit integer operations *)
 
 Require Import String Coqlib Maps Integers Floats Errors.
 Require Archi.
@@ -19,4 +19,625 @@ Require Import Cminor Op CminorSel.
 Require Import SelectOp SelectOpproof SplitLong SplitLongproof.
 Require Import SelectLong.
 
-(** This file is empty because we use the default implementation provided in [SplitLong]. *)
+Local Open Scope cminorsel_scope.
+Local Open Scope string_scope.
+
+(** * Correctness of the instruction selection functions for 64-bit operators *)
+
+Section CMCONSTR.
+
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
+Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop :=
+  forall le a x,
+  eval_expr ge sp e m le a x ->
+  exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v.
+
+Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop :=
+  forall le a x b y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
+
+Definition partial_unary_constructor_sound (cstr: expr -> expr) (sem: val -> option val) : Prop :=
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  sem x = Some y ->
+  exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef y v.
+
+Definition partial_binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> option val) : Prop :=
+  forall le a x b y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  sem x y = Some z ->
+  exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef z v.
+
+Theorem eval_longconst:
+  forall le n, eval_expr ge sp e m le (longconst n) (Vlong n).
+Proof.
+  unfold longconst; intros; destruct Archi.splitlong.
+  apply SplitLongproof.eval_longconst.
+  EvalOp.
+Qed.
+
+Lemma is_longconst_sound:
+  forall v a n le,
+  is_longconst a = Some n -> eval_expr ge sp e m le a v -> v = Vlong n.
+Proof with (try discriminate).
+  intros. unfold is_longconst in *. destruct Archi.splitlong.
+  eapply SplitLongproof.is_longconst_sound; eauto.
+  assert (a = Eop (Olongconst n) Enil).
+  { destruct a... destruct o... destruct e0... congruence. }
+  subst a. InvEval. auto.
+Qed.
+
+Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword.
+Proof.
+  unfold intoflong; destruct Archi.splitlong. apply SplitLongproof.eval_intoflong.
+  red; intros. destruct (is_longconst a) as [n|] eqn:C.
+- TrivialExists. simpl. erewrite (is_longconst_sound x) by eauto. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_longofintu: unary_constructor_sound longofintu Val.longofintu.
+Proof.
+  unfold longofintu; destruct Archi.splitlong. apply SplitLongproof.eval_longofintu.
+  red; intros. destruct (is_intconst a) as [n|] eqn:C.
+- econstructor; split. apply eval_longconst.
+  exploit is_intconst_sound; eauto. intros; subst x. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_longofint: unary_constructor_sound longofint Val.longofint.
+Proof.
+  unfold longofint; destruct Archi.splitlong. apply SplitLongproof.eval_longofint.
+  red; intros. destruct (is_intconst a) as [n|] eqn:C.
+- econstructor; split. apply eval_longconst.
+  exploit is_intconst_sound; eauto. intros; subst x. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_notl: unary_constructor_sound notl Val.notl.
+Proof.
+  unfold notl; destruct Archi.splitlong. apply SplitLongproof.eval_notl.
+  red; intros. destruct (notl_match a).
+- InvEval. econstructor; split. apply eval_longconst. auto.
+- InvEval. subst. exists v1; split; auto. destruct v1; simpl; auto. rewrite Int64.not_involutive; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_andlimm: forall n, unary_constructor_sound (andlimm n) (fun v => Val.andl v (Vlong n)).
+Proof.
+  unfold andlimm; intros; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists (Vlong Int64.zero); split. apply eval_longconst.
+  subst. destruct x; simpl; auto. rewrite Int64.and_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  exists x; split. assumption.
+  subst. destruct x; simpl; auto. rewrite Int64.and_mone; auto.
+  destruct (andlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.and_commut; auto.
+- TrivialExists. simpl. rewrite Val.andl_assoc. rewrite Int64.and_commut; auto.
+- TrivialExists. simpl. destruct v1; simpl; auto. unfold Int64.rolm. rewrite Int64.and_assoc.
+  rewrite  (Int64.and_commut mask2 n). reflexivity.
+- TrivialExists.
+Qed.
+
+Theorem eval_andl: binary_constructor_sound andl Val.andl.
+Proof.
+  unfold andl; destruct Archi.splitlong. apply SplitLongproof.eval_andl.
+  red; intros. destruct (andl_match a b).
+- InvEval. rewrite Val.andl_commut. apply eval_andlimm; auto.
+- InvEval. apply eval_andlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_orlimm: forall n, unary_constructor_sound (orlimm n) (fun v => Val.orl v (Vlong n)).
+Proof.
+  unfold orlimm; intros; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.or_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  econstructor; split. apply eval_longconst. subst. destruct x; simpl; auto. rewrite Int64.or_mone; auto.
+  destruct (orlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.or_commut; auto.
+- TrivialExists. simpl. rewrite Val.orl_assoc. rewrite Int64.or_commut; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_orl: binary_constructor_sound orl Val.orl.
+Proof.
+  unfold orl; destruct Archi.splitlong. apply SplitLongproof.eval_orl.
+  red; intros.
+  assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop Oorl (a:::b:::Enil)) v /\ Val.lessdef (Val.orl x y) v) by TrivialExists.
+  assert (ROLM: forall v n1 n2 m1 m2,
+             n1 = n2 ->
+             Val.lessdef (Val.orl (Val.rolml v n1 m1) (Val.rolml v n2 m2))
+                         (Val.rolml v n1 (Int64.or m1 m2))).
+  { intros. destruct v; simpl; auto. unfold Int64.rolm.
+    rewrite Int64.and_or_distrib. rewrite H1. auto. }
+  destruct (orl_match a b).
+- predSpec Int.eq Int.eq_spec amount1 amount2; simpl.
+   destruct (same_expr_pure t1 t2) eqn:?; auto. InvEval.
+   exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
+   exists (Val.rolml v0 amount2 (Int64.or mask1 mask2)); split. EvalOp.
+   apply ROLM; auto. auto.
+- InvEval. rewrite Val.orl_commut. apply eval_orlimm; auto.
+- InvEval. apply eval_orlimm; auto.
+- apply DEFAULT.
+Qed.
+
+Theorem eval_xorlimm: forall n, unary_constructor_sound (xorlimm n) (fun v => Val.xorl v (Vlong n)).
+Proof.
+  unfold xorlimm; intros; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.xor_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  replace (Val.xorl x (Vlong n)) with (Val.notl x). apply eval_notl; auto.
+  subst n. destruct x; simpl; auto.
+  destruct (xorlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.xor_commut; auto.
+- TrivialExists. simpl. rewrite Val.xorl_assoc. rewrite Int64.xor_commut; auto.
+- TrivialExists. simpl. destruct v1; simpl; auto. unfold Int64.not.
+  rewrite Int64.xor_assoc. apply f_equal. apply f_equal. apply f_equal.
+  apply Int64.xor_commut.
+- TrivialExists.
+Qed.
+
+Theorem eval_xorl: binary_constructor_sound xorl Val.xorl.
+Proof.
+  unfold xorl; destruct Archi.splitlong. apply SplitLongproof.eval_xorl.
+  red; intros. destruct (xorl_match a b).
+- InvEval. rewrite Val.xorl_commut. apply eval_xorlimm; auto.
+- InvEval. apply eval_xorlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_rolml: forall amount mask, unary_constructor_sound (fun v => rolml v amount mask) (fun v => Val.rolml v amount mask).
+Proof.
+  unfold rolml. intros; red; intros.
+  predSpec Int.eq Int.eq_spec amount Int.zero.
+  rewrite H0.
+  exploit (eval_andlimm). eauto. intros (x0 & (H1 & H2)).
+  exists x0. split. apply H1. destruct x; auto. simpl. unfold Int64.rolm.
+  change (Int64.repr (Int.unsigned Int.zero)) with Int64.zero. rewrite Int64.rol_zero.
+  apply H2.
+  destruct (rolml_match a).
+- econstructor; split. apply eval_longconst. simpl. InvEval. unfold Val.rolml. auto.
+- InvEval. TrivialExists. simpl. rewrite <- H. 
+  unfold Val.rolml; destruct v1; simpl; auto.
+  rewrite Int64.rolm_rolm by (exists (two_p (64-6)); auto).
+  f_equal. f_equal. f_equal.
+  unfold Int64.add. rewrite ! Int64.int_unsigned_repr. unfold Int.add. 
+  set (a := Int.unsigned amount1 + Int.unsigned amount).
+  unfold Int.modu, Int64.modu. 
+  change (Int.unsigned Int64.iwordsize') with 64.
+  change (Int64.unsigned Int64.iwordsize) with 64.
+  f_equal.
+  rewrite Int.unsigned_repr. 
+  apply Int.eqmod_mod_eq. omega. 
+  apply Int.eqmod_trans with a.
+  apply Int.eqmod_divides with Int.modulus. apply Int.eqm_sym. apply Int.eqm_unsigned_repr.
+  exists (two_p (32-6)); auto.
+  apply Int.eqmod_divides with Int64.modulus. apply Int64.eqm_unsigned_repr.
+  exists (two_p (64-6)); auto.
+  assert (0 <= Int.unsigned (Int.repr a) mod 64 < 64) by (apply Z_mod_lt; omega).
+  assert (64 < Int.max_unsigned) by (compute; auto).
+  omega.
+- InvEval. TrivialExists. simpl. rewrite <- H.
+  unfold Val.rolml; destruct v1; simpl; auto. unfold Int64.rolm.
+  rewrite Int64.rol_and. rewrite Int64.and_assoc. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_shllimm: forall n, unary_constructor_sound (fun e => shllimm e n) (fun v => Val.shll v (Vint n)).
+Proof.
+  intros; unfold shllimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shllimm; auto.
+  red; intros.
+  assert (ROLM: forall n1 v,
+             Int.ltu n1 Int64.iwordsize' = true ->
+             Val.shll v (Vint n1) = Val.rolml v n1 (Int64.shl Int64.mone (Int64.repr (Int.unsigned n1)))).
+  { intros. destruct v; auto. simpl. rewrite H0. rewrite <- Int64.shl_rolm. unfold Int64.shl.
+    rewrite Int64.int_unsigned_repr. constructor. unfold Int64.ltu. rewrite Int64.int_unsigned_repr.
+    apply H0. }
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  exists x; split; auto. subst n; destruct x; simpl; auto.
+  destruct (Int.ltu Int.zero Int64.iwordsize'); auto.
+  change (Int64.shl' i Int.zero) with (Int64.shl i Int64.zero). rewrite Int64.shl_zero; auto.
+  destruct (Int.ltu n Int64.iwordsize') eqn:LT; simpl.
+- rewrite ROLM by apply LT. apply eval_rolml. auto.
+- TrivialExists. constructor; eauto.  constructor. EvalOp. simpl; eauto. constructor.
+  constructor.
+Qed.
+
+Theorem eval_shrluimm: forall n, unary_constructor_sound (fun e => shrluimm e n) (fun v => Val.shrlu v (Vint n)).
+Proof.
+  unfold shrluimm; destruct Archi.splitlong. apply SplitLongproof.eval_shrluimm. auto.
+  red; intros.
+  assert (ROLM: forall n1 v,
+             Int.ltu n1 Int64.iwordsize' = true ->
+             Val.shrlu v (Vint n1) = Val.rolml v (Int.sub Int64.iwordsize' n1) (Int64.shru Int64.mone (Int64.repr (Int.unsigned n1)))).
+  { intros. destruct v; auto. simpl. rewrite H0.
+    rewrite Int64.int_sub_ltu by apply H0. rewrite Int64.repr_unsigned. rewrite <- Int64.shru_rolm. unfold Int64.shru'.  unfold Int64.shru.
+    rewrite Int64.unsigned_repr. reflexivity. apply Int64.int_unsigned_range.
+    unfold Int64.ltu. rewrite Int64.int_unsigned_repr. auto.
+  }
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  exists x. split. apply H. destruct x; simpl; auto. rewrite H0. rewrite Int64.shru'_zero. constructor.
+  destruct (Int.ltu n Int64.iwordsize') eqn:LT; simpl.
+- rewrite ROLM by apply LT. apply eval_rolml. auto.
+- TrivialExists. constructor; eauto.  constructor. EvalOp. simpl; eauto. constructor.
+  constructor.
+Qed.
+
+Theorem eval_shrlimm: forall n, unary_constructor_sound (fun e => shrlimm e n) (fun v => Val.shrl v (Vint n)).
+Proof.
+  intros; unfold shrlimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrlimm; auto.
+  red; intros.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  exists x; split; auto. subst n; destruct x; simpl; auto.
+  destruct (Int.ltu Int.zero Int64.iwordsize'); auto.
+  change (Int64.shr' i Int.zero) with (Int64.shr i Int64.zero). rewrite Int64.shr_zero; auto.
+  destruct (Int.ltu n Int64.iwordsize') eqn:LT; simpl.
+  assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrlimm n) (a:::Enil)) v
+                         /\  Val.lessdef (Val.shrl x (Vint n)) v) by TrivialExists.
+  destruct (shrlimm_match a); InvEval.
+- TrivialExists. simpl; rewrite LT; auto.
+- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
+  subst. econstructor; split. EvalOp. simpl; eauto.
+  destruct v1; simpl; auto. rewrite LT'.
+  destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto.
+  simpl; rewrite LT. rewrite Int.add_commut, Int64.shr'_shr'; auto. rewrite Int.add_commut; auto.
+- apply DEFAULT.
+- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
+Qed.
+
+Theorem eval_shll: binary_constructor_sound shll Val.shll.
+Proof.
+  unfold shll. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shll; auto.
+  red; intros. destruct (is_intconst b) as [n2|] eqn:C.
+- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shllimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_shrlu: binary_constructor_sound shrlu Val.shrlu.
+Proof.
+  unfold shrlu. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrlu; auto.
+  red; intros. destruct (is_intconst b) as [n2|] eqn:C.
+- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shrluimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_shrl: binary_constructor_sound shrl Val.shrl.
+Proof.
+  unfold shrl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrl; auto.
+  red; intros. destruct (is_intconst b) as [n2|] eqn:C.
+- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shrlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_negl: unary_constructor_sound negl Val.negl.
+Proof.
+  unfold negl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_negl; auto.
+  red; intros. destruct (is_longconst a) as [n|] eqn:C.
+- exploit is_longconst_sound; eauto. intros EQ; subst x.
+  econstructor; split. apply eval_longconst. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_addlimm: forall n, unary_constructor_sound (addlimm n) (fun v => Val.addl v (Vlong n)).
+Proof.
+  unfold addlimm.
+  red; intros. predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists x. split; auto. rewrite H0. destruct x; auto. simpl. rewrite Int64.add_zero. constructor.
+  destruct (addlimm_match a).
+- econstructor; split. apply eval_longconst. simpl. InvEval. unfold Val.rolml. auto.
+- InvEval. TrivialExists. simpl. rewrite <- H. rewrite Val.addl_assoc. reflexivity.
+- InvEval. TrivialExists.
+Qed.
+
+
+Theorem eval_addl: binary_constructor_sound addl Val.addl.
+Proof.
+  unfold addl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_addl; auto.
+  red; intros. destruct (addl_match a b); InvEval; subst.
+- exploit (eval_addlimm n1); eauto. intros (n & (H1 & H2)). exists n. split; auto.
+  rewrite Val.addl_commut. exact H2.
+- exploit (eval_addlimm n2). apply H. auto.
+- rewrite Val.addl_permut_4. simpl.
+  apply eval_addlimm; EvalOp.
+- rewrite Val.addl_assoc. rewrite Val.addl_permut. rewrite Val.addl_commut.
+  apply eval_addlimm; EvalOp.
+- rewrite Val.addl_commut. rewrite Val.addl_assoc. rewrite Val.addl_permut.
+  rewrite Val.addl_commut. apply eval_addlimm; EvalOp. rewrite Val.addl_commut.
+  constructor.
+- TrivialExists.
+Qed.
+
+Theorem eval_subl: binary_constructor_sound subl Val.subl.
+Proof.
+  unfold subl. destruct Archi.splitlong eqn:SL.
+  apply SplitLongproof.eval_subl. apply Archi.splitlong_ptr32; auto.
+  red; intros; destruct (subl_match a b); InvEval.
+- rewrite Val.subl_addl_opp. apply eval_addlimm; auto.
+-  TrivialExists.
+Qed.
+
+Theorem eval_mullimm_base: forall n, unary_constructor_sound (mullimm_base n) (fun v => Val.mull v (Vlong n)).
+Proof.
+  intros; unfold mullimm_base. red. intros.
+  assert (DEFAULT: exists v : val, eval_expr ge sp e m le (Eop Omull (a ::: longconst n ::: Enil)) v
+                              /\ Val.lessdef (Val.mull x (Vlong n)) v).
+  { TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto. }
+  generalize (Int64.one_bits'_decomp n); intros D.
+  destruct (Int64.one_bits' n) as [ | i [ | j [ | ? ? ]]] eqn:B; auto.
+- replace (Val.mull x (Vlong n)) with (Val.shll x (Vint i)).
+  apply eval_shllimm; auto.
+  simpl in D. rewrite D, Int64.add_zero. destruct x; simpl; auto.
+  rewrite (Int64.one_bits'_range n) by (rewrite B; auto with coqlib).
+  rewrite Int64.shl'_mul; auto.
+- set (le' := x :: le).
+  assert (A0: eval_expr ge sp e m le' (Eletvar O) x) by (constructor; reflexivity).
+  exploit (eval_shllimm i). eexact A0. intros (v1 & A1 & B1).
+  exploit (eval_shllimm j). eexact A0. intros (v2 & A2 & B2).
+  exploit (eval_addl). eexact A1. eexact A2. intros (v3 & A3 & B3).
+  exists v3; split. econstructor; eauto.
+  rewrite D. simpl. rewrite Int64.add_zero. destruct x; auto.
+  simpl in *.
+  rewrite (Int64.one_bits'_range n) in B1 by (rewrite B; auto with coqlib).
+  rewrite (Int64.one_bits'_range n) in B2 by (rewrite B; auto with coqlib).
+  inv B1; inv B2. simpl in B3; inv B3.
+  rewrite Int64.mul_add_distr_r. rewrite <- ! Int64.shl'_mul. auto.
+Qed.
+
+Theorem eval_mullimm: forall n, unary_constructor_sound (mullimm n) (fun v => Val.mull v (Vlong n)).
+Proof.
+  unfold mullimm. intros.
+  destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_mullimm; eauto.
+  red; intros. predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists (Vlong Int64.zero).
+  split. apply eval_longconst. destruct x; simpl; auto.
+  subst n; rewrite Int64.mul_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.one.
+  exists x; split; auto.
+  destruct x; simpl; auto. subst n; rewrite Int64.mul_one; auto.
+  destruct (mullimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. rewrite Int64.mul_commut; auto.
+- exploit (eval_mullimm_base n); eauto.
+Qed.
+
+Theorem eval_mull: binary_constructor_sound mull Val.mull.
+Proof.
+  unfold mull. destruct Archi.splitlong eqn:SL.
+  apply SplitLongproof.eval_mull; auto.
+  red; intros. destruct (mull_match a b).
+- exploit (eval_mullimm n1); eauto. intros (n & (H1 & H2)). InvEval. exists n. split; auto.
+  rewrite Val.mull_commut. exact H2.
+- exploit (eval_mullimm n2). apply H. InvEval. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_mullhu:
+  forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
+Proof.
+  unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto.
+  red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
+Qed.
+
+Theorem eval_mullhs:
+  forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
+Proof.
+  unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto.
+  red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
+Qed.
+
+Theorem eval_shrxlimm:
+  forall le a n x z,
+  eval_expr ge sp e m le a x ->
+  Val.shrxl x (Vint n) = Some z ->
+  exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v.
+Proof.
+  unfold shrxlimm. intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_shrxlimm; eauto.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto.
+  change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
+Proof.
+  unfold divls_base; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_divls_base; eauto.
+  TrivialExists.
+Qed.
+
+Lemma eval_modl_aux:
+  forall divop semdivop,
+  (forall sp x y m, eval_operation ge sp divop (x :: y :: nil) m = semdivop x y) ->
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  semdivop x y = Some z ->
+  eval_expr ge sp e m le (modl_aux divop a b) (Val.subl x (Val.mull z y)).
+Proof.
+  intros; unfold modl_aux.
+  eapply eval_Elet. eexact H0. eapply eval_Elet.
+  apply eval_lift. eexact H1.
+  eapply eval_Eop. eapply eval_Econs.
+  eapply eval_Eletvar. simpl; reflexivity.
+  eapply eval_Econs. eapply eval_Eop.
+  eapply eval_Econs. eapply eval_Eop.
+  eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity.
+  eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity.
+  apply eval_Enil.
+  rewrite H. eauto.
+  eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity.
+  apply eval_Enil.
+  simpl; reflexivity. apply eval_Enil.
+  reflexivity.
+Qed.
+
+Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
+Proof.
+  unfold modls_base. red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_modls_base; eauto.
+  assert (DEFAULT: exists v : val, eval_expr ge sp e m le (modl_aux Odivl a b) v /\ Val.lessdef z v).
+  exploit Val.modls_divls; eauto. intros [v [A B]].
+  { subst. econstructor; split; eauto.
+    apply eval_modl_aux with (semdivop := Val.divls); auto. }
+
+  destruct (is_longconst a) as [n1|] eqn:A. exploit is_longconst_sound. eauto. eauto. intros.
+  destruct (is_longconst b) as [n2|] eqn:B; auto. exploit is_longconst_sound. eauto. eauto. intros.
+  predSpec Int64.eq Int64.eq_spec Int64.zero n2; simpl.
+  (* n1 mod n2, n2 = 0 *)
+  auto.
+  predSpec Int64.eq Int64.eq_spec n1 (Int64.repr Int64.min_signed); predSpec Int64.eq Int64.eq_spec n2 Int64.mone; simpl; auto; subst.
+- (* signed_min mod n2 | n2 != 0, n2 !- =1 *)
+  econstructor; split. apply eval_longconst.
+  unfold Val.modls in H1.
+  rewrite Int64.eq_false in H1; auto.
+  rewrite (Int64.eq_false n2 Int64.mone H6) in H1.
+  inversion H1. auto.
+- (* n1 mod -1, n1 !- signed_min *)
+  econstructor; split. apply eval_longconst.
+  unfold Val.modls in H1.
+  rewrite Int64.eq_false in H1; auto.
+  rewrite Int64.eq_false in H1; auto.
+  inversion H1. auto.
+- (* other valid cases *)
+  econstructor; split. apply eval_longconst.
+  unfold Val.modls in H1.
+  rewrite Int64.eq_false in H1; auto.
+  rewrite Int64.eq_false in H1; auto.
+  inversion H1.
+  auto.
+- (* fallback *)
+  apply DEFAULT.
+Qed.
+
+
+Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
+Proof.
+  unfold divlu_base; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_divlu_base; eauto.
+  TrivialExists.
+Qed.
+
+Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
+Proof.
+  unfold modlu_base; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_modlu_base; eauto.
+  assert (DEFAULT: exists v : val, eval_expr ge sp e m le (modl_aux Odivlu a b) v /\ Val.lessdef z v).
+  exploit Val.modlu_divlu; eauto. intros [v [A B]].
+  subst. econstructor; split; eauto.
+  apply eval_modl_aux with (semdivop := Val.divlu); auto.
+  (* n1 and n2 are longconsts *)
+  destruct (is_longconst a) as [n1|] eqn:A. exploit is_longconst_sound; eauto.
+  destruct (is_longconst b) as [n2|] eqn:B; auto. exploit is_longconst_sound; eauto. intros.
+  predSpec Int64.eq Int64.eq_spec Int64.zero n2; simpl.
+  (* n2 = 0 *)
+-  auto.
+  (* n2 != 0 *)
+-  econstructor; split. apply eval_longconst.
+  rewrite H2 in H1.
+  rewrite H3 in H1.
+  unfold Val.modlu in H1.
+  rewrite Int64.eq_false in H1; auto.
+  inversion H1. auto.
+-  (* n1 no longconst, n2 is longconst *)
+  destruct (is_longconst b) as [n2|] eqn:B; auto. exploit is_longconst_sound; eauto. intros.
+  destruct (Int64.is_power2 n2) eqn:C; auto.
+  (* n2 is power of 2 *)
+  exploit eval_andlimm. apply H. intros. destruct H3.
+  exists x0.  split. apply H3.
+  replace z with (Val.andl x (Vlong (Int64.sub n2 Int64.one))). apply H3.
+  apply (Val.modlu_pow2 x n2 i z); congruence.
+Qed.
+
+Theorem eval_cmplu:
+  forall c le a x b y v,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.cmplu (Mem.valid_pointer m) c x y = Some v ->
+  eval_expr ge sp e m le (cmplu c a b) v.
+Proof.
+  unfold cmplu; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_cmplu; eauto using Archi.splitlong_ptr32.
+  unfold Val.cmplu in H1.
+  destruct (Val.cmplu_bool (Mem.valid_pointer m) c x y) as [vb|] eqn:C; simpl in H1; inv H1.
+  destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2;
+  try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto));
+  try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto));
+  subst.
+- simpl in C; inv C. EvalOp. destruct (Int64.cmpu c n1 n2); reflexivity.
+- EvalOp. simpl. rewrite Val.swap_cmplu_bool. rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+Qed.
+
+Theorem eval_cmpl:
+  forall c le a x b y v,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.cmpl c x y = Some v ->
+  eval_expr ge sp e m le (cmpl c a b) v.
+Proof.
+  unfold cmpl; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_cmpl; eauto.
+  unfold Val.cmpl in H1.
+  destruct (Val.cmpl_bool c x y) as [vb|] eqn:C; simpl in H1; inv H1.
+  destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2;
+  try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto));
+  try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto));
+  subst.
+- simpl in C; inv C. EvalOp. destruct (Int64.cmp c n1 n2); reflexivity.
+- EvalOp. simpl. rewrite Val.swap_cmpl_bool. rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+Qed.
+
+Theorem eval_longoffloat:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longoffloat x = Some y ->
+  exists v, eval_expr ge sp e m le (longoffloat a) v /\ Val.lessdef y v.
+Proof.
+  unfold longoffloat. intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_longoffloat; eauto.
+  TrivialExists.
+Qed.
+
+Theorem eval_floatoflong:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.floatoflong x = Some y ->
+  exists v, eval_expr ge sp e m le (floatoflong a) v /\ Val.lessdef y v.
+Proof.
+  unfold floatoflong. intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_floatoflong; eauto.
+  TrivialExists.
+Qed.
+
+Theorem eval_longofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (longofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longofsingle.
+  destruct x; simpl in H0; inv H0. destruct (Float32.to_long f) as [n|] eqn:EQ; simpl in H2; inv H2.
+  exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+  apply Float32.to_long_double in EQ.
+  eapply eval_longoffloat; eauto. simpl.
+  change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
+End CMCONSTR.
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 0a4b3ef6..2d9ae7a5 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -239,7 +239,7 @@ Definition mulhu (e1: expr) (e2: expr) := Eop Omulhu (e1 ::: e2 ::: Enil).
 
 (** ** Bitwise and, or, xor *)
 
-Nondetfunction andimm (n1: int) (e2: expr) := 
+Nondetfunction andimm (n1: int) (e2: expr) :=
   if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else
   if Int.eq n1 Int.mone then e2 else
   match e2 with
@@ -249,7 +249,7 @@ Nondetfunction andimm (n1: int) (e2: expr) :=
       let n := Int.and n1 n2 in
       if Int.eq (Int.shru (Int.shl n amount) amount) n
       && Int.ltu amount Int.iwordsize
-      then rolm t2 (Int.sub Int.iwordsize amount) 
+      then rolm t2 (Int.sub Int.iwordsize amount)
                    (Int.and (Int.shru Int.mone amount) n)
       else Eop (Oandimm n) (Eop (Oshrimm amount) (t2:::Enil) ::: Enil)
   | Eop (Oandimm n2) (t2:::Enil) =>
@@ -259,7 +259,7 @@ Nondetfunction andimm (n1: int) (e2: expr) :=
   | Eop (Oshrimm amount) (t2:::Enil) =>
       if Int.eq (Int.shru (Int.shl n1 amount) amount) n1
       && Int.ltu amount Int.iwordsize
-      then rolm t2 (Int.sub Int.iwordsize amount) 
+      then rolm t2 (Int.sub Int.iwordsize amount)
                    (Int.and (Int.shru Int.mone amount) n1)
       else Eop (Oandimm n1) (e2:::Enil)
   | _ =>
@@ -396,14 +396,14 @@ Nondetfunction compimm (default: comparison -> int -> condition)
         Eop (Ocmp (negate_condition c)) el
       else if Int.eq_dec n2 Int.one then
         Eop (Ocmp c) el
-      else 
+      else
         Eop (Ointconst Int.zero) Enil
   | Cne, Eop (Ocmp c) el =>
       if Int.eq_dec n2 Int.zero then
         Eop (Ocmp c) el
       else if Int.eq_dec n2 Int.one then
         Eop (Ocmp (negate_condition c)) el
-      else 
+      else
         Eop (Ointconst Int.one) Enil
   | Ceq, Eop (Oandimm n1) (t1 ::: Enil) =>
       if Int.eq_dec n2 Int.zero then
@@ -483,7 +483,8 @@ Nondetfunction floatofintu (e: expr) :=
   | Eop (Ointconst n) Enil =>
       Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
-      if Archi.ppc64 then Eop Ofloatofintu (e ::: Enil) else
+      if Archi.ppc64 then
+        Eop Ofloatofintu (e ::: Enil)  else
         subf (Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: e ::: Enil))
              (Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Int.zero)) Enil)
   end.
@@ -493,7 +494,8 @@ Nondetfunction floatofint (e: expr) :=
   | Eop (Ointconst n) Enil =>
       Eop (Ofloatconst (Float.of_int n)) Enil
   | _ =>
-      if Archi.ppc64 then Eop Ofloatofint (e ::: Enil) else
+      if Archi.ppc64 then
+        Eop Ofloatofint (e ::: Enil) else
         subf (Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil
                                  ::: addimm Float.ox8000_0000 e ::: Enil))
              (Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Float.ox8000_0000)) Enil)
@@ -517,7 +519,7 @@ Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
 (** ** Recognition of addressing modes for load and store operations *)
 
 Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
-  match chunk with Mint64 => false | _ => true end.
+  match chunk with Mint64 => Archi.ppc64 | _ => true end.
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
   match e with
@@ -539,6 +541,7 @@ Nondetfunction builtin_arg (e: expr) :=
   | Eop (Ointconst n) Enil => BA_int n
   | Eop (Oaddrsymbol id ofs) Enil => BA_addrglobal id ofs
   | Eop (Oaddrstack ofs) Enil => BA_addrstack ofs
+  | Eop (Olongconst n) Enil => BA_long n
   | Eop Omakelong (Eop (Ointconst h) Enil ::: Eop (Ointconst l) Enil ::: Enil) =>
         BA_long (Int64.ofwords h l)
   | Eop Omakelong (h ::: l ::: Enil) => BA_splitlong (BA h) (BA l)
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index 7f3da409..31ddf304 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -160,7 +160,7 @@ Remark shift_symbol_address:
   Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.of_int delta)) = Val.add (Genv.symbol_address ge id ofs) (Vint delta).
 Proof.
   intros. unfold Genv.symbol_address. destruct (Genv.find_symbol ge id); auto.
-Qed.   
+Qed.
 
 Theorem eval_addimm:
   forall n, unary_constructor_sound (addimm n) (fun x => Val.add x (Vint n)).
@@ -172,7 +172,7 @@ Proof.
   case (addimm_match a); intros; InvEval; simpl; TrivialExists; simpl.
   rewrite Int.add_commut. auto.
   unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. rewrite Ptrofs.add_commut; auto.
-  destruct sp; simpl; auto. rewrite Ptrofs.add_assoc. do 3 f_equal. apply Ptrofs.add_commut.   
+  destruct sp; simpl; auto. rewrite Ptrofs.add_assoc. do 3 f_equal. apply Ptrofs.add_commut.
   subst. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
   subst. rewrite Ptrofs.add_commut. rewrite shift_symbol_address. rewrite ! Val.add_assoc. f_equal. f_equal. apply Val.add_commut.
 Qed.
@@ -207,7 +207,7 @@ Proof.
   repeat rewrite Val.add_assoc. decEq. apply Val.add_commut.
 - subst. TrivialExists.
     econstructor. EvalOp. simpl. reflexivity. econstructor. eauto. constructor.
-    simpl. rewrite Val.add_permut, Val.add_commut. do 2 f_equal.  
+    simpl. rewrite Val.add_permut, Val.add_commut. do 2 f_equal.
     destruct sp; simpl; auto. rewrite Ptrofs.add_assoc; auto.
 - replace (Val.add x y) with
     (Val.add (Genv.symbol_address ge s (Ptrofs.add ofs (Ptrofs.of_int n))) (Val.add v1 v0)).
@@ -847,6 +847,7 @@ Proof.
   intros; unfold intoffloat. TrivialExists.
 Qed.
 
+
 Theorem eval_intuoffloat:
   forall le a x y,
   eval_expr ge sp e m le a x ->
@@ -1032,6 +1033,7 @@ Proof.
 - constructor.
 - constructor.
 - constructor.
+- constructor.
 - simpl in H5. inv H5. constructor.
 - subst v. constructor; auto.
 - inv H. InvEval. simpl in H6; inv H6. constructor; auto.
@@ -1040,4 +1042,3 @@ Proof.
 Qed.
 
 End CMCONSTR.
-
diff --git a/powerpc/Stacklayout.v b/powerpc/Stacklayout.v
index 2b78fd11..17104b33 100644
--- a/powerpc/Stacklayout.v
+++ b/powerpc/Stacklayout.v
@@ -93,7 +93,7 @@ Local Opaque Z.add Z.mul sepconj range.
   apply range_drop_right with 8. omega.
   apply range_split. omega.
   apply range_split_2. fold ol; omega. omega.
-  apply range_split. omega. 
+  apply range_split. omega.
   apply range_split. omega.
   apply range_drop_right with ostkdata. omega.
   eapply sep_drop2. eexact H.
diff --git a/powerpc/TargetPrinter.ml b/powerpc/TargetPrinter.ml
index 68cd001b..cb5f2304 100644
--- a/powerpc/TargetPrinter.ml
+++ b/powerpc/TargetPrinter.ml
@@ -359,6 +359,26 @@ module Target (System : SYSTEM):TARGET =
       assert (!count = 2 || (!count = 0 && !last));
       (!mb, !me-1)
 
+    (* Encoding 64-bit masks for rldic PPC64 instructions *)
+
+    let rolm64_mask n =
+      let rec leftmost_one pos mask =
+        assert (pos < 64);
+        let mask' = Int64.shift_right_logical mask 1 in
+        if Int64.logand n mask = 0L
+        then leftmost_one (pos + 1) mask'
+        else (pos, rightmost_one (pos + 1) mask')
+      and rightmost_one pos mask =
+        if pos >= 64 then
+          63
+        else if Int64.logand n mask > 0L then
+          rightmost_one (pos + 1) (Int64.shift_right_logical mask 1)
+        else if Int64.logand n (Int64.pred mask) = 0L then
+          pos - 1
+        else
+          assert false
+      in leftmost_one 0 0x8000_0000_0000_0000L
+
     (* Determine if the displacement of a conditional branch fits the short form *)
 
     let short_cond_branch tbl pc lbl_dest =
@@ -370,7 +390,7 @@ module Target (System : SYSTEM):TARGET =
     (* Printing of instructions *)
 
     let print_instruction oc tbl pc fallthrough = function
-      | Padd(r1, r2, r3) ->
+      | Padd(r1, r2, r3) | Padd64(r1, r2, r3) ->
           fprintf oc "	add	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Paddc(r1, r2, r3) ->
           fprintf oc "	addc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
@@ -378,22 +398,30 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	adde	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Paddi(r1, r2, c) ->
           fprintf oc "	addi	%a, %a, %a\n" ireg r1 ireg_or_zero r2 constant c
+      | Paddi64(r1, r2, n) ->
+          fprintf oc "	addi	%a, %a, %Ld\n" ireg r1 ireg_or_zero r2 (camlint64_of_coqint n)
       | Paddic(r1, r2, c) ->
           fprintf oc "	addic	%a, %a, %a\n" ireg r1 ireg_or_zero r2 constant c
       | Paddis(r1, r2, c) ->
           fprintf oc "	addis	%a, %a, %a\n" ireg r1 ireg_or_zero r2 constant c
-      | Paddze(r1, r2) ->
+      | Paddis64(r1, r2, n) ->
+          fprintf oc "	addis	%a, %a, %Ld\n" ireg r1 ireg_or_zero r2 (camlint64_of_coqint n)
+      | Paddze(r1, r2) | Paddze64(r1, r2) ->
           fprintf oc "	addze	%a, %a\n" ireg r1 ireg r2
       | Pallocframe(sz, ofs, _) ->
           assert false
-      | Pand_(r1, r2, r3) ->
+      | Pand_(r1, r2, r3) | Pand_64(r1, r2, r3) ->
           fprintf oc "	and.	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pandc(r1, r2, r3) ->
           fprintf oc "	andc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pandi_(r1, r2, c) ->
           fprintf oc "	andi.	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Pandi_64(r1, r2, n) ->
+          fprintf oc "	andi.	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Pandis_(r1, r2, c) ->
           fprintf oc "	andis.	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Pandis_64(r1, r2, n) ->
+          fprintf oc "	andis.	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Pb lbl ->
           fprintf oc "	b	%a\n" label (transl_label lbl)
       | Pbctr sg ->
@@ -445,14 +473,24 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "%s end pseudoinstr btbl\n" comment
       | Pcmpb (r1, r2, r3) ->
           fprintf oc "	cmpb	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pcmpld(r1, r2) ->
+          fprintf oc "	cmpld	%a, %a, %a\n" creg 0 ireg r1 ireg r2
+      | Pcmpldi(r1, n) ->
+          fprintf oc "	cmpldi	%a, %a, %Ld\n" creg 0 ireg r1 (camlint64_of_coqint n)
       | Pcmplw(r1, r2) ->
           fprintf oc "	cmplw	%a, %a, %a\n" creg 0 ireg r1 ireg r2
       | Pcmplwi(r1, c) ->
           fprintf oc "	cmplwi	%a, %a, %a\n" creg 0 ireg r1 constant c
+      | Pcmpd(r1, r2) ->
+          fprintf oc "	cmpd	%a, %a, %a\n" creg 0 ireg r1 ireg r2
+      | Pcmpdi(r1, n) ->
+          fprintf oc "	cmpdi	%a, %a, %Ld\n" creg 0 ireg r1 (camlint64_of_coqint n)
       | Pcmpw(r1, r2) ->
           fprintf oc "	cmpw	%a, %a, %a\n" creg 0 ireg r1 ireg r2
       | Pcmpwi(r1, c) ->
           fprintf oc "	cmpwi	%a, %a, %a\n" creg 0 ireg r1 constant c
+      | Pcntlzd(r1, r2) ->
+          fprintf oc "	cntlzd	%a, %a\n" ireg r1 ireg r2
       | Pcntlzw(r1, r2) ->
           fprintf oc "	cntlzw	%a, %a\n" ireg r1 ireg r2
       | Pcreqv(c1, c2, c3) ->
@@ -477,6 +515,10 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	divw	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pdivwu(r1, r2, r3) ->
           fprintf oc "	divwu	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pdivd(r1, r2, r3) ->
+          fprintf oc "	divd	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pdivdu(r1, r2, r3) ->
+          fprintf oc "	divdu	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Peieio ->
           fprintf oc "	eieio\n"
       | Peqv(r1, r2, r3) ->
@@ -487,6 +529,8 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	extsh	%a, %a\n" ireg r1 ireg r2
       | Pextsw(r1, r2) ->
           fprintf oc "	extsw	%a, %a\n" ireg r1 ireg r2
+      | Pextzw(r1, r2) ->
+          assert false
       | Pfreeframe(sz, ofs) ->
           assert false
       | Pfabs(r1, r2) | Pfabss(r1, r2) ->
@@ -499,12 +543,16 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	fcmpu	%a, %a, %a\n" creg 0 freg r1 freg r2
       | Pfcfi(r1, r2) ->
           assert false
+      | Pfcfl(r1, r2) ->
+          assert false
       | Pfcfid(r1, r2) ->
           fprintf oc "	fcfid	%a, %a\n" freg r1 freg r2
       | Pfcfiu(r1, r2) ->
           assert false
       | Pfcti(r1, r2) ->
           assert false
+      | Pfctid(r1, r2) ->
+          assert false
       | Pfctidz(r1, r2) ->
           fprintf oc "	fctidz	%a, %a\n" freg r1 freg r2
       | Pfctiu(r1, r2) ->
@@ -565,6 +613,10 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	lbz	%a, %a(%a)\n" ireg r1 constant c ireg r2
       | Plbzx(r1, r2, r3) ->
           fprintf oc "	lbzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pld(r1, c, r2) | Pld_a(r1, c, r2) ->
+          fprintf oc "	ld	%a, %a(%a)\n" ireg r1 constant c ireg r2
+      | Pldx(r1, r2, r3) | Pldx_a(r1, r2, r3) ->
+          fprintf oc "	ldx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Plfd(r1, c, r2)  | Plfd_a(r1, c, r2) ->
           fprintf oc "	lfd	%a, %a(%a)\n" freg r1 constant c ireg r2
       | Plfdx(r1, r2, r3) | Plfdx_a(r1, r2, r3) ->
@@ -583,6 +635,17 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	lhz	%a, %a(%a)\n" ireg r1 constant c ireg r2
       | Plhzx(r1, r2, r3) ->
           fprintf oc "	lhzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pldi(r1, c) ->
+          let lbl = new_label() in
+          fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
+          fprintf oc "	ld	%a, %a(%a) %s %Ld\n" ireg r1 label_low lbl ireg GPR12 comment (camlint64_of_coqint c);
+          int64_literals := (lbl, camlint64_of_coqint c) :: !int64_literals;
+      | Plmake(_, _, _) ->
+          assert false
+      | Pllo _ ->
+          assert false
+      | Plhi(_, _) ->
+          assert false
       | Plfi(r1, c) ->
           let lbl = new_label() in
           fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
@@ -621,6 +684,8 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	mfspr	%a, %ld\n" ireg rd (camlint_of_coqint spr)
       | Pmtspr(spr, rs) ->
           fprintf oc "	mtspr	%ld, %a\n" (camlint_of_coqint spr) ireg rs
+      | Pmulld(r1, r2, r3) ->
+          fprintf oc "	mulld	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pmulli(r1, r2, c) ->
           fprintf oc "	mulli	%a, %a, %a\n" ireg r1 ireg r2 constant c
       | Pmullw(r1, r2, r3) ->
@@ -629,24 +694,51 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	mulhw	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pmulhwu(r1, r2, r3) ->
           fprintf oc "	mulhwu	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pmulhd (r1,r2,r3) ->
+          fprintf oc "	mulhd	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pmulhdu (r1,r2,r3) ->
+          fprintf oc "	mulhdu	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pnand(r1, r2, r3) ->
           fprintf oc "	nand	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-      | Pnor(r1, r2, r3) ->
+      | Pnor(r1, r2, r3) | Pnor64(r1, r2, r3) ->
           fprintf oc "	nor	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-      | Por(r1, r2, r3) ->
+      | Por(r1, r2, r3) | Por64(r1, r2, r3) ->
           fprintf oc "	or	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Porc(r1, r2, r3) ->
           fprintf oc "	orc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pori(r1, r2, c) ->
           fprintf oc "	ori	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Pori64(r1, r2, n) ->
+          fprintf oc "	ori	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Poris(r1, r2, c) ->
           fprintf oc "	oris	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Poris64(r1, r2, n) ->
+          fprintf oc "	oris	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Prldicl(r1, r2, c1, c2) ->
           fprintf oc "	rldicl	%a, %a, %ld, %ld\n"
             ireg r1 ireg r2 (camlint_of_coqint c1) (camlint_of_coqint c2)
-      | Prldicr(r1, r2, c1, c2) ->
-          fprintf oc "	rldicr	%a, %a, %ld, %ld\n"
-            ireg r1 ireg r2 (camlint_of_coqint c1) (camlint_of_coqint c2)
+      | Prldinm(r1, r2, c1, c2) ->
+          let amount = camlint64_of_coqint c1 in
+          let mask = camlint64_of_coqint c2 in
+          let (first, last) = rolm64_mask mask in
+          if last = 63 then
+            fprintf oc "	rldicl	%a, %a, %Ld, %d %s 0x%Lx\n"
+              ireg r1 ireg r2 amount first comment mask
+          else if first = 0 then
+            fprintf oc "	rldicr	%a, %a, %Ld, %d %s 0x%Lx\n"
+              ireg r1 ireg r2 amount last comment mask
+          else if last = 63 - Int64.to_int amount then
+            fprintf oc "	rldic	%a, %a, %Ld, %d %s 0x%Lx\n"
+              ireg r1 ireg r2 amount first comment mask
+          else
+            assert false
+      | Prldimi(r1, r2, c1, c2) ->
+          let amount = camlint64_of_coqint c1 in
+          let mask = camlint64_of_coqint c2 in
+          let (first, last) = rolm64_mask mask in
+          assert (last = 63 - Int64.to_int amount);
+          fprintf oc "	rldimi	%a, %a, %Ld, %d %s 0x%Lx\n"
+            ireg r1 ireg r2 amount first comment mask
       | Prlwinm(r1, r2, c1, c2) ->
           let (mb, me) = rolm_mask (camlint_of_coqint c2) in
           fprintf oc "	rlwinm	%a, %a, %ld, %d, %d %s 0x%lx\n"
@@ -657,18 +749,30 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	rlwimi	%a, %a, %ld, %d, %d %s 0x%lx\n"
             ireg r1 ireg r2 (camlint_of_coqint c1) mb me
             comment (camlint_of_coqint c2)
+      | Psld(r1, r2, r3) ->
+          fprintf oc "	sld	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pslw(r1, r2, r3) ->
           fprintf oc "	slw	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Psrad(r1, r2, r3) ->
+          fprintf oc "	srad	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Psradi(r1, r2, c) ->
+          fprintf oc "	sradi	%a, %a, %ld\n" ireg r1 ireg r2 (camlint_of_coqint c)
       | Psraw(r1, r2, r3) ->
           fprintf oc "	sraw	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Psrawi(r1, r2, c) ->
           fprintf oc "	srawi	%a, %a, %ld\n" ireg r1 ireg r2 (camlint_of_coqint c)
+      | Psrd(r1, r2, r3) ->
+          fprintf oc "	srd	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Psrw(r1, r2, r3) ->
           fprintf oc "	srw	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pstb(r1, c, r2) ->
           fprintf oc "	stb	%a, %a(%a)\n" ireg r1 constant c ireg r2
       | Pstbx(r1, r2, r3) ->
           fprintf oc "	stbx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+      | Pstd(r1, c, r2) | Pstd_a(r1, c, r2) ->
+          fprintf oc "	std	%a, %a(%a)\n" ireg r1 constant c ireg r2
+      | Pstdx(r1, r2, r3) | Pstdx_a(r1, r2, r3) ->
+          fprintf oc "	stdx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pstdu(r1, c, r2) ->
           fprintf oc "	stdu	%a, %a(%a)\n" ireg r1 constant c ireg r2
       | Pstfd(r1, c, r2) | Pstfd_a(r1, c, r2) ->
@@ -699,7 +803,7 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	stwbrx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pstwcx_(r1, r2, r3) ->
           fprintf oc "	stwcx.	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-      | Psubfc(r1, r2, r3) ->
+      | Psubfc(r1, r2, r3) | Psubfc64(r1, r2, r3) ->
           fprintf oc "	subfc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Psubfe(r1, r2, r3) ->
           fprintf oc "	subfe	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
@@ -707,16 +811,22 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	subfze	%a, %a\n" ireg r1 ireg r2
       | Psubfic(r1, r2, c) ->
           fprintf oc "	subfic	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Psubfic64(r1, r2, n) ->
+          fprintf oc "	subfic	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Psync ->
           fprintf oc "	sync\n"
       | Ptrap ->
           fprintf oc "	trap\n"
-      | Pxor(r1, r2, r3) ->
+      | Pxor(r1, r2, r3) | Pxor64(r1, r2, r3) ->
           fprintf oc "	xor	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
       | Pxori(r1, r2, c) ->
           fprintf oc "	xori	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Pxori64(r1, r2, n) ->
+          fprintf oc "	xori	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Pxoris(r1, r2, c) ->
           fprintf oc "	xoris	%a, %a, %a\n" ireg r1 ireg r2 constant c
+      | Pxoris64(r1, r2, n) ->
+          fprintf oc "	xoris	%a, %a, %Ld\n" ireg r1 ireg r2 (camlint64_of_coqint n)
       | Plabel lbl ->
           if (not fallthrough) && !Clflags.option_falignbranchtargets > 0 then
             fprintf oc "	.balign	%d\n" !Clflags.option_falignbranchtargets;
@@ -826,12 +936,16 @@ module Target (System : SYSTEM):TARGET =
     let print_fun_info = elf_print_fun_info
 
     let emit_constants oc lit =
-      if !float64_literals <> [] || !float32_literals <> [] then begin
+      if !float64_literals <> [] || !float32_literals <> []
+                                 || !int64_literals <> [] then begin
         section oc lit;
         fprintf oc "	.balign 8\n";
+        List.iter (print_literal64 oc) !int64_literals;
+        int64_literals := [];
         List.iter (print_literal64 oc) !float64_literals;
+        float64_literals := [];
         List.iter (print_literal32 oc) !float32_literals;
-        float64_literals := []; float32_literals := []
+        float32_literals := []
       end
 
     let print_optional_fun_info _ = ()
diff --git a/powerpc/ValueAOp.v b/powerpc/ValueAOp.v
index 8081f557..f7f65e9e 100644
--- a/powerpc/ValueAOp.v
+++ b/powerpc/ValueAOp.v
@@ -34,6 +34,10 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
   | Cmaskzero n, v1 :: nil => maskzero v1 n
   | Cmasknotzero n, v1 :: nil => cnot (maskzero v1 n)
+  | Ccompl c, v1 :: v2 :: nil => cmpl_bool c v1 v2
+  | Ccomplu c, v1 :: v2 :: nil => cmplu_bool c v1 v2
+  | Ccomplimm c n, v1 :: nil => cmpl_bool c v1 (L n)
+  | Ccompluimm c n, v1 :: nil => cmplu_bool c v1 (L n)
   | _, _ => Bnone
   end.
 
@@ -87,6 +91,33 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Oshru, v1::v2::nil => shru v1 v2
   | Orolm amount mask, v1::nil => rolm v1 amount mask
   | Oroli amount mask, v1::v2::nil => or (and v1 (I (Int.not mask))) (rolm v2 amount mask)
+  | Olongconst n, nil => L n
+  | Ocast32signed, v1::nil => longofint v1
+  | Ocast32unsigned, v1::nil => longofintu v1
+  | Oaddl, v1::v2::nil => addl v1 v2
+  | Oaddlimm n, v1::nil => addl v1 (L n)
+  | Osubl, v1::v2::nil => subl v1 v2
+  | Onegl, v1::nil => negl v1
+  | Omull, v1::v2::nil => mull v1 v2
+  | Omullhs, v1::v2::nil => mullhs v1 v2
+  | Omullhu, v1::v2::nil => mullhu v1 v2
+  | Odivl, v1::v2::nil => divls v1 v2
+  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Oandl, v1::v2::nil => andl v1 v2
+  | Oandlimm n, v1::nil => andl v1 (L n)
+  | Oorl, v1::v2::nil => orl v1 v2
+  | Oorlimm n, v1::nil => orl v1 (L n)
+  | Oxorl, v1::v2::nil => xorl v1 v2
+  | Oxorlimm n, v1::nil => xorl v1 (L n)
+  | Onotl, v1::nil => notl v1
+  | Oshll, v1::v2::nil => shll v1 v2
+  | Oshrl, v1::v2::nil => shrl v1 v2
+  | Oshrlimm n, v1::nil => shrl v1 (I n)
+  | Oshrxlimm n, v1::nil => shrxl v1 (I n)
+  | Oshrlu, v1::v2::nil => shrlu v1 v2
+  | Orolml amount mask, v1::nil => rolml v1 amount mask
+  | Olongoffloat, v1::nil => longoffloat v1
+  | Ofloatoflong, v1::nil => floatoflong v1
   | Onegf, v1::nil => negf v1
   | Oabsf, v1::nil => absf v1
   | Oaddf, v1::v2::nil => addf v1 v2
@@ -177,9 +208,9 @@ Proof.
   destruct (propagate_float_constants tt); constructor.
   rewrite Ptrofs.add_zero_l; eauto with va.
   fold (Val.sub (Vint i) a1). auto with va.
+  apply rolml_sound; auto.
   apply floatofwords_sound; auto.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
 Qed.
 
 End SOUNDNESS.
-
diff --git a/powerpc/extractionMachdep.v b/powerpc/extractionMachdep.v
index b0f05536..b5ae048d 100644
--- a/powerpc/extractionMachdep.v
+++ b/powerpc/extractionMachdep.v
@@ -24,7 +24,7 @@ Extract Constant Asm.ireg_eq => "fun (x: ireg) (y: ireg) -> x = y".
 Extract Constant Asm.freg_eq => "fun (x: freg) (y: freg) -> x = y".
 Extract Constant Asm.preg_eq => "fun (x: preg) (y: preg) -> x = y".
 
-(* Choice of PPC variant *)
+(* Choice of PPC splitlong *)
 Extract Constant Archi.ppc64 =>
   "begin match Configuration.model with
    | ""ppc64"" -> true
diff --git a/runtime/Makefile b/runtime/Makefile
index b819991d..213779a4 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -10,8 +10,18 @@ ARCH=x86_32
 endif
 endif
 
+ifeq ($(ARCH),powerpc)
+ifeq ($(MODEL),ppc64)
+ARCH=powerpc64
+else ifeq ($(MODEL),e5500)
+ARCH=powerpc64
+endif
+endif
+
 ifeq ($(ARCH),x86_64)
 OBJS=i64_dtou.o i64_utod.o i64_utof.o vararg.o
+else ifeq ($(ARCH),powerpc64)
+OBJS=i64_dtou.o i64_stof.o i64_utod.o i64_utof.o vararg.o
 else
 OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
   i64_shr.o i64_smod.o i64_stod.o i64_stof.o \
@@ -28,14 +38,6 @@ INCLUDES=include/float.h include/stdarg.h include/stdbool.h \
 
 VPATH=$(ARCH)
 
-ifeq ($(ARCH),powerpc)
-ifeq ($(MODEL),ppc64)
-VPATH=powerpc/ppc64 $(ARCH)
-else ifeq ($(MODEL),e5500)
-VPATH=powerpc/ppc64 $(ARCH)
-endif
-endif
-
 ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
 all: $(LIB)
 else
diff --git a/runtime/README b/runtime/README
index 5d824300..f38ed894 100644
--- a/runtime/README
+++ b/runtime/README
@@ -1,11 +1,11 @@
 This is the support library for CompCert-generated code.
+
 It provides helper functions for:
 - 64-bit integer arithmetic
 - implementing the va_arg macro from <stdarg.h>
 
 The implementation is written in assembly language in the
-arm/ ia32/ powerpc/ directories.
+arm/ powerpc/ powerpc64/ riscV/ x86_32/ x86_64/ directories.
 
 The c/ directory contains a C implementation of the 64-bit integer functions.
 It is provided for reference and as a guide for the asm implementations.
-
diff --git a/runtime/powerpc/ppc64/i64_dtos.s b/runtime/powerpc/ppc64/i64_dtos.s
deleted file mode 100644
index 95f7f700..00000000
--- a/runtime/powerpc/ppc64/i64_dtos.s
+++ /dev/null
@@ -1,52 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-### Conversion from double float to signed long	
-
-        .balign 16
-        .globl __i64_dtos
-__i64_dtos:
-	fctidz f1, f1
-        stfdu f1, -16(r1)
-        lwz r3, 0(r1)
-        lwz r4, 4(r1)
-        addi r1, r1, 16
-        blr
-        .type __i64_dtos, @function
-        .size __i64_dtos, .-__i64_dtos
-        
-\ No newline at end of file
diff --git a/runtime/powerpc/ppc64/i64_sar.s b/runtime/powerpc/ppc64/i64_sar.s
deleted file mode 100644
index 4fc4451e..00000000
--- a/runtime/powerpc/ppc64/i64_sar.s
+++ /dev/null
@@ -1,51 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-# Shift right signed	
-	
-        .balign 16
-        .globl __i64_sar
-__i64_sar:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-	srad r4, r4, r5
-        srdi r3, r4, 32        # split r4 into (r3,r4)
-        blr
-        .type __i64_sar, @function
-        .size __i64_sar, .-__i64_sar
-
-        
-\ No newline at end of file
diff --git a/runtime/powerpc/ppc64/i64_sdiv.s b/runtime/powerpc/ppc64/i64_sdiv.s
deleted file mode 100644
index 2bf5b574..00000000
--- a/runtime/powerpc/ppc64/i64_sdiv.s
+++ /dev/null
@@ -1,52 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-### Signed division	
-	
-        .balign 16
-        .globl __i64_sdiv
-__i64_sdiv:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-        rldimi r6, r5, 32, 0   # reassemble (r5,r6) as a 64-bit integer in r6
-	divd r4, r4, r6
-        srdi r3, r4, 32        # split r4 into (r3,r4)
-	blr
-        .type __i64_sdiv, @function
-        .size __i64_sdiv, .-__i64_sdiv
-	
-        
-\ No newline at end of file
diff --git a/runtime/powerpc/ppc64/i64_shl.s b/runtime/powerpc/ppc64/i64_shl.s
deleted file mode 100644
index 955de565..00000000
--- a/runtime/powerpc/ppc64/i64_shl.s
+++ /dev/null
@@ -1,50 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-# Shift left
-	
-        .balign 16
-        .globl __i64_shl
-__i64_shl:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-	sld r4, r4, r5
-        srdi r3, r4, 32        # split r4 into (r3,r4)
-        blr
-        .type __i64_shl, @function
-        .size __i64_shl, .-__i64_shl
-        
-\ No newline at end of file
diff --git a/runtime/powerpc/ppc64/i64_shr.s b/runtime/powerpc/ppc64/i64_shr.s
deleted file mode 100644
index ca5ac9b2..00000000
--- a/runtime/powerpc/ppc64/i64_shr.s
+++ /dev/null
@@ -1,51 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-# Shift right unsigned
-	
-        .balign 16
-        .globl __i64_shr
-__i64_shr:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-	srd r4, r4, r5
-        srdi r3, r4, 32        # split r4 into (r3,r4)
-        blr
-        .type __i64_shr, @function
-        .size __i64_shr, .-__i64_shr
-	
-        
-\ No newline at end of file
diff --git a/runtime/powerpc/ppc64/i64_stod.s b/runtime/powerpc/ppc64/i64_stod.s
deleted file mode 100644
index 3636d0b5..00000000
--- a/runtime/powerpc/ppc64/i64_stod.s
+++ /dev/null
@@ -1,50 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-### Conversion from signed long to double float	
-
-        .balign 16
-        .globl __i64_stod
-__i64_stod:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-	stdu r4, -16(r1)
-        lfd f1, 0(r1)
-        fcfid f1, f1
-        addi r1, r1, 16
-        blr
-        .type __i64_stod, @function
-        .size __i64_stod, .-__i64_stod
-
diff --git a/runtime/powerpc/ppc64/i64_udiv.s b/runtime/powerpc/ppc64/i64_udiv.s
deleted file mode 100644
index a6a3bcb3..00000000
--- a/runtime/powerpc/ppc64/i64_udiv.s
+++ /dev/null
@@ -1,51 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-### Unsigned division	
-
-        .balign 16
-        .globl __i64_udiv
-__i64_udiv:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-        rldimi r6, r5, 32, 0   # reassemble (r5,r6) as a 64-bit integer in r6
-	divdu r4, r4, r6
-        srdi r3, r4, 32        # split r4 into (r3,r4)
-        blr
-        .type __i64_udiv, @function
-        .size __i64_udiv, .-__i64_udiv
-
diff --git a/runtime/powerpc/ppc64/i64_umod.s b/runtime/powerpc/ppc64/i64_umod.s
deleted file mode 100644
index 6bda1903..00000000
--- a/runtime/powerpc/ppc64/i64_umod.s
+++ /dev/null
@@ -1,53 +0,0 @@
-# *****************************************************************
-#
-#               The Compcert verified compiler
-#
-#           Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-#  en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in the
-#       documentation and/or other materials provided with the distribution.
-#     * Neither the name of the <organization> nor the
-#       names of its contributors may be used to endorse or promote products
-#       derived from this software without specific prior written permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
-
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
-
-        .text
-
-### Unsigned modulus
-
-        .balign 16
-        .globl __i64_umod
-__i64_umod:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-        rldimi r6, r5, 32, 0   # reassemble (r5,r6) as a 64-bit integer in r6
-	divdu r0, r4, r6
-	mulld r0, r0, r6
-        subf r4, r0, r4
-        srdi r3, r4, 32        # split r4 into (r3,r4)
-        blr
-        .type __i64_umod, @function
-        .size __i64_umod, .-__i64_umod
-
diff --git a/runtime/powerpc/ppc64/i64_dtou.s b/runtime/powerpc64/i64_dtou.s
index 60d5c9bf..60d5c9bf 100644
--- a/runtime/powerpc/ppc64/i64_dtou.s
+++ b/runtime/powerpc64/i64_dtou.s
diff --git a/runtime/powerpc/ppc64/i64_stof.s b/runtime/powerpc64/i64_stof.s
index 8830d594..8830d594 100644
--- a/runtime/powerpc/ppc64/i64_stof.s
+++ b/runtime/powerpc64/i64_stof.s
diff --git a/runtime/powerpc/ppc64/i64_utod.s b/runtime/powerpc64/i64_utod.s
index ddde91dd..ddde91dd 100644
--- a/runtime/powerpc/ppc64/i64_utod.s
+++ b/runtime/powerpc64/i64_utod.s
diff --git a/runtime/powerpc/ppc64/i64_smod.s b/runtime/powerpc64/i64_utof.s
index 35be366d..2617cbda 100644
--- a/runtime/powerpc/ppc64/i64_smod.s
+++ b/runtime/powerpc64/i64_utof.s
@@ -32,23 +32,33 @@
 #
 # *********************************************************************
 
-# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
 
         .text
 
-## Signed remainder	
-	
+### Conversion from unsigned long to single float	
+
         .balign 16
-        .globl __i64_smod
-__i64_smod:
-        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
-        rldimi r6, r5, 32, 0   # reassemble (r5,r6) as a 64-bit integer in r6
-	divd r0, r4, r6
-	mulld r0, r0, r6
-        subf r4, r0, r4
-        srdi r3, r4, 32        # split r4 into (r3,r4)
+        .globl __i64_utof
+__i64_utof:
+	mflr r9
+   # Check whether X < 2^53	
+        andis. r0, r3, 0xFFE0       # test bits 53...63 of X
+        beq 1f
+   # X is large enough that double rounding can occur.
+   # Avoid it by nudging X away from the points where double rounding
+   # occurs (the "round to odd" technique)
+        rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r0, r0, 0x7FF      # r0 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r0 are 0
+        or r4, r4, r0           # correct bit number 12 of X
+        rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
+   # Convert to double, then round to single	
+1:      bl __i64_utod
+        mtlr r9
+        frsp f1, f1
         blr
-        .type __i64_smod, @function
-        .size __i64_smod, .-__i64_smod
-
-        
-\ No newline at end of file
+        .type __i64_utof, @function
+        .size __i64_utof, .-__i64_utof
+	
diff --git a/runtime/powerpc64/vararg.s b/runtime/powerpc64/vararg.s
new file mode 100644
index 00000000..8d7e62c8
--- /dev/null
+++ b/runtime/powerpc64/vararg.s
@@ -0,0 +1,163 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for variadic functions <stdarg.h>.  IA32 version
+
+# typedef struct {
+#   unsigned char ireg;    // index of next integer register
+#   unsigned char freg;    // index of next FP register
+#   char * stk;            // pointer to next argument in stack
+#   struct {
+#     int iregs[8];
+#     double fregs[8];
+#   } * regs;              // pointer to saved register area
+# } va_list[1];
+#
+# unsigned int __compcert_va_int32(va_list ap);
+# unsigned long long __compcert_va_int64(va_list ap);
+# double __compcert_va_float64(va_list ap);
+
+        .text
+
+        .balign 16
+        .globl __compcert_va_int32
+__compcert_va_int32:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 0(r3)       # r4 = ap->ireg = next integer register
+        cmplwi  r4, 8
+        bge     1f
+  # Next argument was passed in an integer register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+        rlwinm  r6, r4, 2, 0, 29 # r6 = r4 * 4
+	addi    r4, r4, 1       # increment ap->ireg
+        stb     r4, 0(r3)
+        lwzx    r3, r5, r6      # load argument in r3
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+        addi    r5, r5, 4       # advance ap->stk by 4
+        stw     r5, 4(r3)
+	lwz     r3, -4(r5)      # load argument in r3
+        blr
+        .type   __compcert_va_int32, @function
+        .size   __compcert_va_int32, .-__compcert_va_int32
+
+        .balign 16
+        .globl __compcert_va_int64
+__compcert_va_int64:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 0(r3)       # r4 = ap->ireg = next integer register
+        cmplwi  r4, 7
+        bge     1f
+  # Next argument was passed in two consecutive integer register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+	addi    r4, r4, 3       # round r4 up to an even number and add 2
+        rlwinm  r4, r4, 0, 0, 30 
+        rlwinm  r6, r4, 2, 0, 29 # r6 = r4 * 4
+	add     r5, r5, r6      # r5 = address of argument + 8
+        stb     r4, 0(r3)       # update ap->ireg
+        lwz     r3, -8(r5)      # load argument in r3:r4
+        lwz     r4, -4(r5)
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+	li      r4, 8
+        stb     r4, 0(r3)       # set ap->ireg = 8 so that no ireg is left
+	addi    r5, r5, 15      # round r5 to a multiple of 8 and add 8
+        rlwinm  r5, r5, 0, 0, 28
+	stw     r5, 4(r3)       # update ap->stk
+        lwz     r3, -8(r5)      # load argument in r3:r4
+        lwz     r4, -4(r5)
+        blr
+        .type   __compcert_va_int64, @function
+        .size   __compcert_va_int64, .-__compcert_va_int64
+
+        .balign 16
+        .globl __compcert_va_float64
+__compcert_va_float64:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 1(r3)       # r4 = ap->freg = next float register
+        cmplwi  r4, 8
+        bge     1f
+  # Next argument was passed in a FP register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+        rlwinm  r6, r4, 3, 0, 28 # r6 = r4 * 8
+	add     r5, r5, r6
+        lfd     f1, 32(r5)      # load argument in f1
+	addi    r4, r4, 1       # increment ap->freg
+        stb     r4, 1(r3)
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+	addi    r5, r5, 15      # round r5 to a multiple of 8 and add 8
+        rlwinm  r5, r5, 0, 0, 28
+        lfd     f1, -8(r5)      # load argument in f1
+	stw     r5, 4(r3)       # update ap->stk
+        blr
+        .type   __compcert_va_float64, @function
+        .size   __compcert_va_float64, .-__compcert_va_int64
+
+        .balign 16
+        .globl __compcert_va_composite
+__compcert_va_composite:
+	b       __compcert_va_int32
+        .type   __compcert_va_composite, @function
+        .size   __compcert_va_composite, .-__compcert_va_composite
+
+# Save integer and FP registers at beginning of vararg function        
+
+        .balign 16
+        .globl  __compcert_va_saveregs
+__compcert_va_saveregs:
+        lwz     r11, 0(r1)      # r11 point to top of our frame
+        stwu    r3, -96(r11)    # register save area is 96 bytes below
+        stw     r4, 4(r11)
+        stw     r5, 8(r11)
+        stw     r6, 12(r11)
+        stw     r7, 16(r11)
+        stw     r8, 20(r11)
+        stw     r9, 24(r11)
+        stw     r10, 28(r11)
+	bf      6, 1f           # don't save FP regs if CR6 bit is clear
+        stfd    f1, 32(r11)
+        stfd    f2, 40(r11)
+        stfd    f3, 48(r11)
+        stfd    f4, 56(r11)
+        stfd    f5, 64(r11)
+        stfd    f6, 72(r11)
+        stfd    f7, 80(r11)
+        stfd    f8, 88(r11)
+1:      blr
+        .type   __compcert_va_saveregs, @function
+        .size   __compcert_va_saveregs, .-__compcert_va_saveregs
diff --git a/x86/Asm.v b/x86/Asm.v
index 1c204b02..8b873e48 100644
--- a/x86/Asm.v
+++ b/x86/Asm.v
@@ -209,7 +209,7 @@ Inductive instruction: Type :=
   | Pcmpl_ri (r1: ireg) (n: int)
   | Pcmpq_ri (r1: ireg) (n: int64)
   | Ptestl_rr (r1 r2: ireg)
-  | Ptestq_rr (r1 r2: ireg) 
+  | Ptestq_rr (r1 r2: ireg)
   | Ptestl_ri (r1: ireg) (n: int)
   | Ptestq_ri (r1: ireg) (n: int64)
   | Pcmov (c: testcond) (rd: ireg) (r1: ireg)
@@ -792,7 +792,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pxorl_rr rd r1 =>
       Next (nextinstr_nf (rs#rd <- (Val.xor rs#rd rs#r1))) m
   | Pxorq_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.xorl rs#rd rs#r1))) m 
+      Next (nextinstr_nf (rs#rd <- (Val.xorl rs#rd rs#r1))) m
   | Pxorl_ri rd n =>
       Next (nextinstr_nf (rs#rd <- (Val.xor rs#rd (Vint n)))) m
   | Pxorq_ri rd n =>
@@ -1145,7 +1145,7 @@ Proof.
   { intros. inv H; inv H0; congruence. }
   assert (B: forall p v1 v2,
              extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2).
-  { intros. inv H; inv H0. 
+  { intros. inv H; inv H0.
     eapply A; eauto.
     f_equal; eapply A; eauto. }
   assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 ->
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index 90dc0e69..8e69061e 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -29,7 +29,7 @@ let _1 = Integers.Int.one
 let _2 = coqint_of_camlint 2l
 let _4 = coqint_of_camlint 4l
 let _8 = coqint_of_camlint 8l
-  
+
 let _0z = Z.zero
 let _1z = Z.one
 let _2z = Z.of_sint 2
@@ -49,7 +49,7 @@ let _Plea (r, addr) =
 let align n a =
   if n >= 0 then (n + a - 1) land (-a) else n land (-a)
 
-let sp_adjustment_32 sz =  
+let sp_adjustment_32 sz =
   let sz = Z.to_int sz in
   (* Preserve proper alignment of the stack *)
   let sz = align sz (stack_alignment ()) in
@@ -72,7 +72,7 @@ let sp_adjustment_64 sz =
     (* The top 8 bytes have already been allocated by the "call" instruction. *)
     (sz - 8, -1)
   end
-  
+
 (* Built-ins.  They come in two flavors:
    - annotation statements: take their arguments in registers or stack
    locations; generate no code;
@@ -500,7 +500,7 @@ let expand_instruction instr =
        emit (Pleaq (RAX, addr1));
        emit (Pmovq_mr (addr2, RAX));
        current_function_stacksize := Int64.of_int fullsz
-     end else begin     
+     end else begin
        let sz = sp_adjustment_32 sz in
        (* Allocate frame *)
        let sz' = Z.of_uint sz in
@@ -512,7 +512,7 @@ let expand_instruction instr =
        emit (Pleal (RAX,addr1));
        emit (Pmovl_mr (addr2,RAX));
        PrintAsmaux.current_function_stacksize := Int32.of_int sz
-     end  
+     end
   | Pfreeframe(sz, ofs_ra, ofs_link) ->
      if Archi.ptr64 then begin
        let (sz, _) = sp_adjustment_64 sz in
diff --git a/x86/Asmgenproof.v b/x86/Asmgenproof.v
index e56dc429..6caf4531 100644
--- a/x86/Asmgenproof.v
+++ b/x86/Asmgenproof.v
@@ -780,7 +780,7 @@ Opaque loadind.
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H5); intro NOOV.
   set (rs1 := rs0 #RAX <- Vundef #RDX <- Vundef).
-  exploit (find_label_goto_label f tf lbl rs1); eauto. 
+  exploit (find_label_goto_label f tf lbl rs1); eauto.
   intros [tc' [rs' [A [B C]]]].
   exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
   left; econstructor; split.
@@ -789,7 +789,7 @@ Opaque loadind.
   simpl. rewrite <- H9.  unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
   econstructor; eauto.
 Transparent destroyed_by_jumptable.
-  apply agree_undef_regs with rs0; auto. 
+  apply agree_undef_regs with rs0; auto.
   simpl; intros. destruct H8. rewrite C by auto with asmgen. unfold rs1; Simplifs.
   congruence.
 
@@ -834,7 +834,7 @@ Transparent destroyed_by_jumptable.
   left; econstructor; split.
   apply plus_one. econstructor; eauto.
   simpl. rewrite Ptrofs.unsigned_zero. simpl. eauto.
-  simpl. rewrite C. simpl in F, P. 
+  simpl. rewrite C. simpl in F, P.
   replace (chunk_of_type Tptr) with Mptr in F, P by (unfold Tptr, Mptr; destruct Archi.ptr64; auto).
   rewrite (sp_val _ _ _ AG) in F. rewrite F.
   rewrite ATLR. rewrite P. eauto.
@@ -883,7 +883,7 @@ Proof.
   econstructor; eauto.
   constructor.
   apply Mem.extends_refl.
-  split. reflexivity. simpl. 
+  split. reflexivity. simpl.
   unfold Vnullptr; destruct Archi.ptr64; congruence.
   intros. rewrite Regmap.gi. auto.
   unfold Genv.symbol_address.
diff --git a/x86/Asmgenproof1.v b/x86/Asmgenproof1.v
index 6191ea39..aade95d2 100644
--- a/x86/Asmgenproof1.v
+++ b/x86/Asmgenproof1.v
@@ -254,7 +254,7 @@ Proof.
   set (rs5 := nextinstr_nf (rs4#RAX <- v4)).
   assert (X: forall v1 v2,
              Val.addl v1 (Val.addl v2 (Vlong Int64.zero)) = Val.addl v1 v2).
-  { intros. unfold Val.addl; destruct Archi.ptr64 eqn:SF, v0; auto; destruct v5; auto. 
+  { intros. unfold Val.addl; destruct Archi.ptr64 eqn:SF, v0; auto; destruct v5; auto.
     rewrite Int64.add_zero; auto.
     rewrite Ptrofs.add_zero; auto.
     rewrite Int64.add_zero; auto.
diff --git a/x86/CombineOpproof.v b/x86/CombineOpproof.v
index f59e582b..a7024501 100644
--- a/x86/CombineOpproof.v
+++ b/x86/CombineOpproof.v
@@ -125,7 +125,7 @@ Theorem combine_addr_32_sound:
 Proof.
   intros. functional inversion H; subst.
   (* indexed - lea *)
-  UseGetSound. simpl. unfold offset_addressing in H7. destruct (addressing_valid (offset_addressing_total a n)); inv H7. 
+  UseGetSound. simpl. unfold offset_addressing in H7. destruct (addressing_valid (offset_addressing_total a n)); inv H7.
   eapply eval_offset_addressing_total_32; eauto.
 Qed.
 
@@ -136,7 +136,7 @@ Theorem combine_addr_64_sound:
 Proof.
   intros. functional inversion H; subst.
   (* indexed - leal *)
-  UseGetSound. simpl. unfold offset_addressing in H7. destruct (addressing_valid (offset_addressing_total a n)); inv H7. 
+  UseGetSound. simpl. unfold offset_addressing in H7. destruct (addressing_valid (offset_addressing_total a n)); inv H7.
   eapply eval_offset_addressing_total_64; eauto.
 Qed.
 
diff --git a/x86/ConstpropOp.vp b/x86/ConstpropOp.vp
index 0bf143d2..759d7c16 100644
--- a/x86/ConstpropOp.vp
+++ b/x86/ConstpropOp.vp
@@ -46,7 +46,7 @@ Definition const_for_result (a: aval) : option operation :=
   one if some of its arguments are statically known.  These are again
   large pattern-matchings expressed in indirect style. *)
 
-Nondetfunction cond_strength_reduction 
+Nondetfunction cond_strength_reduction
               (cond: condition) (args: list reg) (vl: list aval) :=
   match cond, args, vl with
   | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
@@ -65,7 +65,7 @@ Nondetfunction cond_strength_reduction
       (Ccompluimm (swap_comparison c) n1, r2 :: nil)
   | Ccomplu c, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
       (Ccompluimm c n2, r1 :: nil)
-  | _, _, _ => 
+  | _, _, _ =>
       (cond, args)
   end.
 
@@ -350,7 +350,7 @@ Definition make_cast16signed (r: reg) (a: aval) :=
 Definition make_cast16unsigned (r: reg) (a: aval) :=
   if vincl a (Uns Ptop 16) then (Omove, r :: nil) else (Ocast16unsigned, r :: nil).
 
-Nondetfunction op_strength_reduction 
+Nondetfunction op_strength_reduction
               (op: operation) (args: list reg) (vl: list aval) :=
   match op, args, vl with
   | Ocast8signed, r1 :: nil, v1 :: nil => make_cast8signed r1 v1
diff --git a/x86/ConstpropOpproof.v b/x86/ConstpropOpproof.v
index 4f582f86..5eb46e34 100644
--- a/x86/ConstpropOpproof.v
+++ b/x86/ConstpropOpproof.v
@@ -85,7 +85,7 @@ Lemma eval_Olea_ptr:
   forall a el,
   eval_operation ge (Vptr sp Ptrofs.zero) (Olea_ptr a) el m = eval_addressing ge (Vptr sp Ptrofs.zero) a el.
 Proof.
-  unfold Olea_ptr, eval_addressing; intros. destruct Archi.ptr64; auto. 
+  unfold Olea_ptr, eval_addressing; intros. destruct Archi.ptr64; auto.
 Qed.
 
 Lemma const_for_result_correct:
@@ -112,12 +112,12 @@ Proof.
     exists (Genv.symbol_address ge id Ptrofs.zero); auto.
   * inv H2. exists (Genv.symbol_address ge id ofs); split.
     rewrite eval_Olea_ptr. apply eval_addressing_Aglobal.
-    auto.  
+    auto.
   + (* stack *)
     inv H2. exists (Vptr sp ofs); split.
-    rewrite eval_Olea_ptr. rewrite eval_addressing_Ainstack.  
+    rewrite eval_Olea_ptr. rewrite eval_addressing_Ainstack.
     simpl. rewrite Ptrofs.add_zero_l; auto.
-    auto.  
+    auto.
 Qed.
 
 Lemma cond_strength_reduction_correct:
@@ -152,8 +152,8 @@ Local Opaque Val.add.
   assert (B: forall x y z, Int.repr (Int.signed x * y + z) = Int.add (Int.mul x (Int.repr y)) (Int.repr z)).
   { intros; apply Int.eqm_samerepr; apply Int.eqm_add; auto with ints.
     unfold Int.mul; auto using Int.eqm_signed_unsigned with ints. }
-  intros until res; intros VL EA. 
-  unfold addr_strength_reduction_32_generic; destruct (addr_strength_reduction_32_generic_match addr args vl); 
+  intros until res; intros VL EA.
+  unfold addr_strength_reduction_32_generic; destruct (addr_strength_reduction_32_generic_match addr args vl);
   simpl in *; InvApproxRegs; SimplVM; try (inv EA).
 - econstructor; split; eauto. rewrite A, Val.add_assoc, Val.add_permut. auto.
 - econstructor; split; eauto. rewrite A, Val.add_assoc. auto.
@@ -178,19 +178,19 @@ Proof.
              Val.add (Genv.symbol_address ge symb ofs) (Vint (Int.repr n))).
   { intros. rewrite <- A. apply Genv.shift_symbol_address_32; auto. }
 Local Opaque Val.add.
-  destruct (addr_strength_reduction_32_match addr args vl); 
+  destruct (addr_strength_reduction_32_match addr args vl);
   simpl in *; InvApproxRegs; SimplVM; FuncInv; subst; rewrite ?SF.
 - econstructor; split; eauto. rewrite B. apply Val.add_lessdef; auto.
 - econstructor; split; eauto. rewrite Ptrofs.add_zero_l.
 Local Transparent Val.add.
   inv H0; auto. rewrite H2. simpl; rewrite SF, A. auto.
-- econstructor; split; eauto. 
-  unfold Ptrofs.add at 2. rewrite B. 
+- econstructor; split; eauto.
+  unfold Ptrofs.add at 2. rewrite B.
   fold (Ptrofs.add n1 (Ptrofs.of_int n2)).
   rewrite Genv.shift_symbol_address_32 by auto.
   rewrite ! Val.add_assoc. apply Val.add_lessdef; auto.
 - econstructor; split; eauto.
-  unfold Ptrofs.add at 2. rewrite B. 
+  unfold Ptrofs.add at 2. rewrite B.
   fold (Ptrofs.add n2 (Ptrofs.of_int n1)).
   rewrite Genv.shift_symbol_address_32 by auto.
   rewrite ! Val.add_assoc. rewrite Val.add_permut. apply Val.add_lessdef; auto.
@@ -203,7 +203,7 @@ Local Transparent Val.add.
   apply Val.lessdef_same; do 3 f_equal. auto with ptrofs.
 - econstructor; split; eauto. rewrite B. rewrite ! Val.add_assoc. rewrite (Val.add_commut (Vint (Int.repr ofs))).
   apply Val.add_lessdef; auto.
-- econstructor; split; eauto. rewrite B. rewrite (Val.add_commut e#r1). rewrite ! Val.add_assoc. 
+- econstructor; split; eauto. rewrite B. rewrite (Val.add_commut e#r1). rewrite ! Val.add_assoc.
   rewrite (Val.add_commut (Vint (Int.repr ofs))). apply Val.add_lessdef; auto.
 - econstructor; split; eauto. rewrite B. rewrite Genv.shift_symbol_address_32 by auto.
   rewrite ! Val.add_assoc. apply Val.add_lessdef; auto.
@@ -229,8 +229,8 @@ Local Opaque Val.addl.
   assert (B: forall x y z, Int64.repr (Int64.signed x * y + z) = Int64.add (Int64.mul x (Int64.repr y)) (Int64.repr z)).
   { intros; apply Int64.eqm_samerepr; apply Int64.eqm_add; auto with ints.
     unfold Int64.mul; auto using Int64.eqm_signed_unsigned with ints. }
-  intros until res; intros VL EA. 
-  unfold addr_strength_reduction_64_generic; destruct (addr_strength_reduction_64_generic_match addr args vl); 
+  intros until res; intros VL EA.
+  unfold addr_strength_reduction_64_generic; destruct (addr_strength_reduction_64_generic_match addr args vl);
   simpl in *; InvApproxRegs; SimplVM; try (inv EA).
 - econstructor; split; eauto. rewrite A, Val.addl_assoc, Val.addl_permut. auto.
 - econstructor; split; eauto. rewrite A, Val.addl_assoc. auto.
@@ -256,19 +256,19 @@ Proof.
              Val.addl (Genv.symbol_address ge symb ofs) (Vlong (Int64.repr n))).
   { intros. rewrite <- A. apply Genv.shift_symbol_address_64; auto. }
 Local Opaque Val.addl.
-  destruct (addr_strength_reduction_64_match addr args vl); 
+  destruct (addr_strength_reduction_64_match addr args vl);
   simpl in *; InvApproxRegs; SimplVM; FuncInv; subst; rewrite ?SF.
 - econstructor; split; eauto. rewrite B. apply Val.addl_lessdef; auto.
 - econstructor; split; eauto. rewrite Ptrofs.add_zero_l.
 Local Transparent Val.addl.
   inv H0; auto. rewrite H2. simpl; rewrite SF, A. auto.
-- econstructor; split; eauto. 
-  unfold Ptrofs.add at 2. rewrite B. 
+- econstructor; split; eauto.
+  unfold Ptrofs.add at 2. rewrite B.
   fold (Ptrofs.add n1 (Ptrofs.of_int64 n2)).
   rewrite Genv.shift_symbol_address_64 by auto.
   rewrite ! Val.addl_assoc. apply Val.addl_lessdef; auto.
 - econstructor; split; eauto.
-  unfold Ptrofs.add at 2. rewrite B. 
+  unfold Ptrofs.add at 2. rewrite B.
   fold (Ptrofs.add n2 (Ptrofs.of_int64 n1)).
   rewrite Genv.shift_symbol_address_64 by auto.
   rewrite ! Val.addl_assoc. rewrite Val.addl_permut. apply Val.addl_lessdef; auto.
@@ -350,8 +350,8 @@ Proof.
   intros. unfold make_addimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros.
   subst. exists (e#r); split; auto.
-  destruct (e#r); simpl; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto. 
-  exists (Val.add e#r (Vint n)); split; auto. simpl. rewrite Int.repr_signed; auto. 
+  destruct (e#r); simpl; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto.
+  exists (Val.add e#r (Vint n)); split; auto. simpl. rewrite Int.repr_signed; auto.
 Qed.
 
 Lemma make_shlimm_correct:
@@ -514,7 +514,7 @@ Proof.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
   subst. exists (e#r); split; auto.
   destruct (e#r); simpl; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto.
-  exists (Val.addl e#r (Vlong n)); split; auto. simpl. rewrite Int64.repr_signed; auto. 
+  exists (Val.addl e#r (Vlong n)); split; auto. simpl. rewrite Int64.repr_signed; auto.
 Qed.
 
 Lemma make_shllimm_correct:
@@ -606,8 +606,8 @@ Proof.
   econstructor; split. simpl; eauto.
   rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
   simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
+  erewrite Int64.is_power2'_range by eauto.
+  erewrite Int64.divu_pow2' by eauto.  auto.
   exists v; auto.
 Qed.
 
@@ -621,7 +621,7 @@ Proof.
   intros; unfold make_modluimm.
   destruct (Int64.is_power2 n) eqn:?.
   exists v; split; auto. simpl. decEq.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. 
+  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
   simpl. erewrite Int64.modu_and by eauto. auto.
   exists v; auto.
 Qed.
@@ -830,7 +830,7 @@ Proof.
   InvApproxRegs; SimplVM; inv H0.
   replace (Val.subl e#r1 (Vlong n2)) with (Val.addl e#r1 (Vlong (Int64.neg n2))).
   apply make_addlimm_correct; auto.
-  unfold Val.addl, Val.subl. destruct Archi.ptr64 eqn:SF, e#r1; auto. 
+  unfold Val.addl, Val.subl. destruct Archi.ptr64 eqn:SF, e#r1; auto.
   rewrite Int64.sub_add_opp; auto.
   rewrite Ptrofs.sub_add_opp. do 2 f_equal. auto with ptrofs.
   rewrite Int64.sub_add_opp; auto.
diff --git a/x86/Conventions1.v b/x86/Conventions1.v
index dbc8b064..ecfb85bf 100644
--- a/x86/Conventions1.v
+++ b/x86/Conventions1.v
@@ -63,6 +63,8 @@ Definition destroyed_at_call :=
 Definition dummy_int_reg := AX.     (**r Used in [Regalloc]. *)
 Definition dummy_float_reg := X0.   (**r Used in [Regalloc]. *)
 
+Definition callee_save_type := mreg_type.
+  
 Definition is_float_reg (r: mreg) :=
   match r with
   | AX | BX | CX | DX | SI | DI | BP
@@ -146,11 +148,11 @@ Lemma loc_result_pair:
   | One _ => True
   | Twolong r1 r2 =>
        r1 <> r2 /\ sg.(sig_res) = Some Tlong
-    /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true 
-    /\ Archi.splitlong = true
+    /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
+    /\ Archi.ptr64 = false
   end.
 Proof.
-  intros. change Archi.splitlong with (negb Archi.ptr64).
+  intros. 
   unfold loc_result, loc_result_32, loc_result_64, mreg_type;
   destruct Archi.ptr64; destruct (sig_res sg) as [[]|]; auto.
   split; auto. congruence.
@@ -162,7 +164,7 @@ Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
   intros. unfold loc_result, loc_result_32, loc_result_64.
-  destruct Archi.ptr64; rewrite H; auto.  
+  destruct Archi.ptr64; rewrite H; auto.
 Qed.
 
 (** ** Location of function arguments *)
@@ -310,7 +312,7 @@ Opaque list_nth_z.
   { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1.
     subst. left. eapply list_nth_z_in; eauto.
     eapply IHtyl; eauto.
-    subst. split. omega. assumption. 
+    subst. split. omega. assumption.
     eapply Y; eauto. omega. }
   assert (B: forall ty, In p
       match list_nth_z float_param_regs fr with
@@ -321,7 +323,7 @@ Opaque list_nth_z.
   { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1.
     subst. right. eapply list_nth_z_in; eauto.
     eapply IHtyl; eauto.
-    subst. split. omega. assumption. 
+    subst. split. omega. assumption.
     eapply Y; eauto. omega. }
   destruct a; eauto.
 Qed.
@@ -337,15 +339,15 @@ Proof.
   assert (X: forall l, loc_argument_64_charact 0 l -> loc_argument_acceptable l).
   { unfold loc_argument_64_charact, loc_argument_acceptable.
     destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
-    intros [C D]. split; auto. apply Zdivide_trans with 2; auto. 
+    intros [C D]. split; auto. apply Zdivide_trans with 2; auto.
     exists (2 / typealign ty); destruct ty; reflexivity.
   }
-  exploit loc_arguments_64_charact; eauto using Zdivide_0. 
+  exploit loc_arguments_64_charact; eauto using Zdivide_0.
   unfold forall_rpair; destruct p; intuition auto.
 - (* 32 bits *)
   assert (X: forall l, loc_argument_32_charact 0 l -> loc_argument_acceptable l).
   { destruct l as [r | [] ofs ty]; simpl; intuition auto. rewrite H2; apply Z.divide_1_l. }
-  exploit loc_arguments_32_charact; eauto. 
+  exploit loc_arguments_32_charact; eauto.
   unfold forall_rpair; destruct p; intuition auto.
 Qed.
 
@@ -373,14 +375,14 @@ Proof.
     | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0
     | None => size_arguments_64 tyl ir fr (ofs0 + 2)
     end).
-  { destruct (list_nth_z int_param_regs ir); eauto. 
+  { destruct (list_nth_z int_param_regs ir); eauto.
     apply Zle_trans with (ofs0 + 2); auto. omega. }
   assert (B: ofs0 <=
     match list_nth_z float_param_regs fr with
     | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0
     | None => size_arguments_64 tyl ir fr (ofs0 + 2)
     end).
-  { destruct (list_nth_z float_param_regs fr); eauto. 
+  { destruct (list_nth_z float_param_regs fr); eauto.
     apply Zle_trans with (ofs0 + 2); auto. omega. }
   destruct a; auto.
 Qed.
@@ -420,7 +422,7 @@ Proof.
   contradiction.
   assert (T: forall ty0, typesize ty0 <= 2).
   { destruct ty0; simpl; omega. }
-  assert (A: forall ty0, 
+  assert (A: forall ty0,
              In (S Outgoing ofs ty) (regs_of_rpairs
               match list_nth_z int_param_regs ir with
               | Some ireg =>
@@ -435,9 +437,9 @@ Proof.
   { intros. destruct (list_nth_z int_param_regs ir); simpl in H0; destruct H0.
   - discriminate.
   - eapply IHtyl; eauto.
-  - inv H0. apply Zle_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.  
+  - inv H0. apply Zle_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.
   - eapply IHtyl; eauto. }
-  assert (B: forall ty0, 
+  assert (B: forall ty0,
              In (S Outgoing ofs ty) (regs_of_rpairs
               match list_nth_z float_param_regs fr with
               | Some ireg =>
@@ -452,7 +454,7 @@ Proof.
   { intros. destruct (list_nth_z float_param_regs fr); simpl in H0; destruct H0.
   - discriminate.
   - eapply IHtyl; eauto.
-  - inv H0. apply Zle_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.  
+  - inv H0. apply Zle_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.
   - eapply IHtyl; eauto. }
   destruct a; eauto.
 Qed.
diff --git a/x86/Machregs.v b/x86/Machregs.v
index 04be0cd6..ffaf2531 100644
--- a/x86/Machregs.v
+++ b/x86/Machregs.v
@@ -58,7 +58,7 @@ Proof.
 Qed.
 
 Instance Decidable_eq_mreg : forall (x y: mreg), Decidable (eq x y) := Decidable_eq mreg_eq.
-  
+
 Instance Finite_mreg : Finite mreg := {
   Finite_elements := all_mregs;
   Finite_elements_spec := all_mregs_complete
@@ -151,7 +151,7 @@ Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mre
   match chunk with
   | Mint8signed | Mint8unsigned => if Archi.ptr64 then nil else AX :: CX :: nil
   | _ => nil
-  end. 
+  end.
 
 Definition destroyed_by_cond (cond: condition): list mreg :=
   nil.
diff --git a/x86/NeedOp.v b/x86/NeedOp.v
index 09013cdd..68ecc745 100644
--- a/x86/NeedOp.v
+++ b/x86/NeedOp.v
@@ -225,7 +225,7 @@ Proof.
 - eapply needs_of_addressing_32_sound; eauto.
 - change (eval_addressing64 ge (Vptr sp Ptrofs.zero) a args')
     with (eval_operation ge (Vptr sp Ptrofs.zero) (Oleal a) args' m').
-  eapply default_needs_of_operation_sound; eauto. 
+  eapply default_needs_of_operation_sound; eauto.
   destruct a; simpl in H0; auto.
 - destruct (eval_condition cond args m) as [b|] eqn:EC; simpl in H2.
   erewrite needs_of_condition_sound by eauto.
diff --git a/x86/Op.v b/x86/Op.v
index 43133cfa..18f9e092 100644
--- a/x86/Op.v
+++ b/x86/Op.v
@@ -393,14 +393,14 @@ Remark eval_addressing_Aglobal:
   forall (F V: Type) (genv: Genv.t F V) sp id ofs,
   eval_addressing genv sp (Aglobal id ofs) nil = Some (Genv.symbol_address genv id ofs).
 Proof.
-  intros. unfold eval_addressing, eval_addressing32, eval_addressing64; destruct Archi.ptr64; auto. 
+  intros. unfold eval_addressing, eval_addressing32, eval_addressing64; destruct Archi.ptr64; auto.
 Qed.
 
 Remark eval_addressing_Ainstack:
   forall (F V: Type) (genv: Genv.t F V) sp ofs,
   eval_addressing genv sp (Ainstack ofs) nil = Some (Val.offset_ptr sp ofs).
 Proof.
-  intros. unfold eval_addressing, eval_addressing32, eval_addressing64; destruct Archi.ptr64; auto. 
+  intros. unfold eval_addressing, eval_addressing32, eval_addressing64; destruct Archi.ptr64; auto.
 Qed.
 
 Remark eval_addressing_Ainstack_inv:
@@ -605,7 +605,7 @@ Corollary type_of_addressing_sound:
   eval_addressing genv sp addr vl = Some v ->
   Val.has_type v Tptr.
 Proof.
-  unfold eval_addressing, Tptr; intros. 
+  unfold eval_addressing, Tptr; intros.
   destruct Archi.ptr64; eauto using type_of_addressing64_sound, type_of_addressing32_sound.
 Qed.
 
@@ -815,7 +815,7 @@ Lemma eval_shift_stack_addressing32:
   eval_addressing32 ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
   eval_addressing32 ge (Vptr sp (Ptrofs.repr delta)) addr vl.
 Proof.
-  intros. 
+  intros.
   assert (A: forall i, Ptrofs.add Ptrofs.zero (Ptrofs.add i (Ptrofs.repr delta)) = Ptrofs.add (Ptrofs.repr delta) i).
   { intros. rewrite Ptrofs.add_zero_l. apply Ptrofs.add_commut. }
   destruct addr; simpl; rewrite ?A; reflexivity.
@@ -826,7 +826,7 @@ Lemma eval_shift_stack_addressing64:
   eval_addressing64 ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
   eval_addressing64 ge (Vptr sp (Ptrofs.repr delta)) addr vl.
 Proof.
-  intros. 
+  intros.
   assert (A: forall i, Ptrofs.add Ptrofs.zero (Ptrofs.add i (Ptrofs.repr delta)) = Ptrofs.add (Ptrofs.repr delta) i).
   { intros. rewrite Ptrofs.add_zero_l. apply Ptrofs.add_commut. }
   destruct addr; simpl; rewrite ?A; reflexivity.
@@ -837,7 +837,7 @@ Lemma eval_shift_stack_addressing:
   eval_addressing ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
   eval_addressing ge (Vptr sp (Ptrofs.repr delta)) addr vl.
 Proof.
-  intros. unfold eval_addressing. 
+  intros. unfold eval_addressing.
   destruct Archi.ptr64; auto using eval_shift_stack_addressing32, eval_shift_stack_addressing64.
 Qed.
 
@@ -1234,7 +1234,7 @@ Proof.
   inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
   inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
-  inv H4; simpl in H1; try discriminate. simpl. destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists. 
+  inv H4; simpl in H1; try discriminate. simpl. destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
   inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
   inv H4; simpl; auto.
@@ -1426,7 +1426,7 @@ Proof.
   rewrite eval_shift_stack_addressing.
   eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma eval_operation_inject:
@@ -1446,7 +1446,7 @@ Proof.
   intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
   intros; eapply Mem.different_pointers_inject; eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 End EVAL_INJECT.
diff --git a/x86/SelectLongproof.v b/x86/SelectLongproof.v
index 2262a70b..3bef632d 100644
--- a/x86/SelectLongproof.v
+++ b/x86/SelectLongproof.v
@@ -399,7 +399,7 @@ Theorem eval_mullimm: forall n, unary_constructor_sound (mullimm n) (fun v => Va
 Proof.
   unfold mullimm. intros; red; intros.
   destruct Archi.splitlong eqn:SL.
-  eapply SplitLongproof.eval_mullimm; eauto.  
+  eapply SplitLongproof.eval_mullimm; eauto.
   predSpec Int64.eq Int64.eq_spec n Int64.zero.
   exists (Vlong Int64.zero); split. apply eval_longconst.
   destruct x; simpl; auto. subst n; rewrite Int64.mul_zero; auto.
@@ -426,14 +426,14 @@ Proof.
 - TrivialExists.
 Qed.
 
-Theorem eval_mullhu: 
+Theorem eval_mullhu:
   forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
 Proof.
   unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto.
   red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
 Qed.
 
-Theorem eval_mullhs: 
+Theorem eval_mullhs:
   forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
 Proof.
   unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto.
diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index 2037760f..1200c3d7 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -369,14 +369,14 @@ Nondetfunction compimm (default: comparison -> int -> condition)
         Eop (Ocmp (negate_condition c)) el
       else if Int.eq_dec n2 Int.one then
         Eop (Ocmp c) el
-      else 
+      else
         Eop (Ointconst Int.zero) Enil
   | Cne, Eop (Ocmp c) el =>
       if Int.eq_dec n2 Int.zero then
         Eop (Ocmp c) el
       else if Int.eq_dec n2 Int.one then
         Eop (Ocmp (negate_condition c)) el
-      else 
+      else
         Eop (Ointconst Int.one) Enil
   | Ceq, Eop (Oandimm n1) (t1 ::: Enil) =>
       if Int.eq_dec n2 Int.zero then
@@ -420,7 +420,7 @@ Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
 
 (** ** Integer conversions *)
 
-Nondetfunction cast8unsigned (e: expr) := 
+Nondetfunction cast8unsigned (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil =>
       Eop (Ointconst (Int.zero_ext 8 n)) Enil
@@ -438,7 +438,7 @@ Nondetfunction cast8signed (e: expr) :=
       Eop Ocast8signed (e ::: Enil)
   end.
 
-Nondetfunction cast16unsigned (e: expr) := 
+Nondetfunction cast16unsigned (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil =>
       Eop (Ointconst (Int.zero_ext 16 n)) Enil
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index 1728c39d..e2e0b830 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -115,7 +115,7 @@ Lemma eval_Olea_ptr:
   forall a el m,
   eval_operation ge sp (Olea_ptr a) el m = eval_addressing ge sp a el.
 Proof.
-  unfold Olea_ptr, eval_addressing; intros. destruct Archi.ptr64; auto. 
+  unfold Olea_ptr, eval_addressing; intros. destruct Archi.ptr64; auto.
 Qed.
 
 Theorem eval_addrsymbol:
@@ -162,7 +162,7 @@ Proof.
 + TrivialExists; simpl. rewrite Int.add_commut. auto.
 + inv H0. simpl in H6. TrivialExists. simpl.
   erewrite eval_offset_addressing_total_32 by eauto. rewrite Int.repr_signed; auto.
-+ TrivialExists. simpl. rewrite Int.repr_signed; auto. 
++ TrivialExists. simpl. rewrite Int.repr_signed; auto.
 Qed.
 
 Theorem eval_add: binary_constructor_sound add Val.add.
@@ -172,7 +172,7 @@ Proof.
   assert (B: forall id ofs n, Archi.ptr64 = false ->
              Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.repr n)) =
              Val.add (Genv.symbol_address ge id ofs) (Vint (Int.repr n))).
-  { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int (Int.repr n)) by auto with ptrofs. 
+  { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int (Int.repr n)) by auto with ptrofs.
     apply Genv.shift_symbol_address_32; auto. }
   red; intros until y.
   unfold add; case (add_match a b); intros; InvEval.
@@ -193,7 +193,7 @@ Proof.
 - TrivialExists.
 - TrivialExists. simpl. repeat rewrite Val.add_assoc. decEq; decEq. apply Val.add_commut.
 - TrivialExists. simpl. rewrite Val.add_assoc; auto.
-- TrivialExists. simpl. 
+- TrivialExists. simpl.
   unfold Val.add; destruct Archi.ptr64, x, y; auto.
   + rewrite Int.add_zero; auto.
   + rewrite Int.add_zero; auto.
@@ -324,7 +324,7 @@ Proof.
   exploit (eval_shlimm j (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]].
   exploit eval_add. eexact A1. eexact A2. intros [v3 [A3 B3]].
   exists v3; split. econstructor; eauto.
-  rewrite D; simpl; rewrite Int.add_zero. 
+  rewrite D; simpl; rewrite Int.add_zero.
   replace (Vint (Int.add (Int.shl Int.one i) (Int.shl Int.one j)))
      with (Val.add (Val.shl Vone (Vint i)) (Val.shl Vone (Vint j))).
   rewrite Val.mul_add_distr_r.
@@ -936,12 +936,12 @@ Proof.
                      /\ eval_addressing ge sp (Aindexed 0) vl = Some v).
   { intros. exists (v :: nil); split. constructor; auto. constructor. auto. }
   unfold addressing; case (addressing_match a); intros.
-- destruct (negb Archi.ptr64 && addressing_valid addr) eqn:E. 
-+ inv H. InvBooleans. apply negb_true_iff in H. unfold eval_addressing; rewrite H. 
+- destruct (negb Archi.ptr64 && addressing_valid addr) eqn:E.
++ inv H. InvBooleans. apply negb_true_iff in H. unfold eval_addressing; rewrite H.
   exists vl; auto.
 + apply D; auto.
-- destruct (Archi.ptr64 && addressing_valid addr) eqn:E. 
-+ inv H. InvBooleans. unfold eval_addressing; rewrite H. 
+- destruct (Archi.ptr64 && addressing_valid addr) eqn:E.
++ inv H. InvBooleans. unfold eval_addressing; rewrite H.
   exists vl; auto.
 + apply D; auto.
 - apply D; auto.
diff --git a/x86/Stacklayout.v b/x86/Stacklayout.v
index 44fd43b2..22c68099 100644
--- a/x86/Stacklayout.v
+++ b/x86/Stacklayout.v
@@ -72,7 +72,7 @@ Local Opaque Z.add Z.mul sepconj range.
   assert (0 <= 4 * b.(bound_outgoing)) by omega.
   assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
   assert (olink + w <= ocs) by (unfold ocs; omega).
-  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
+  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
   assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
   assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega).
   assert (ostkdata + bound_stack_data b <= oretaddr) by (apply align_le; omega).
@@ -88,13 +88,13 @@ Local Opaque Z.add Z.mul sepconj range.
   rewrite sep_swap34.
 (* Apply range_split and range_split2 repeatedly *)
   unfold fe_ofs_arg.
-  apply range_split_2. fold olink. omega. omega. 
+  apply range_split_2. fold olink. omega. omega.
   apply range_split. omega.
   apply range_split_2. fold ol. omega. omega.
   apply range_drop_right with ostkdata. omega.
   rewrite sep_swap.
   apply range_drop_left with (ostkdata + bound_stack_data b). omega.
-  rewrite sep_swap. 
+  rewrite sep_swap.
   exact H.
 Qed.
 
@@ -115,11 +115,11 @@ Proof.
   assert (0 <= 4 * b.(bound_outgoing)) by omega.
   assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
   assert (olink + w <= ocs) by (unfold ocs; omega).
-  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
+  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
   assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
   assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega).
   assert (ostkdata + bound_stack_data b <= oretaddr) by (apply align_le; omega).
-  split. omega. omega. 
+  split. omega. omega.
 Qed.
 
 Lemma frame_env_aligned:
diff --git a/x86_32/Archi.v b/x86_32/Archi.v
index 29073be8..8e96b4f1 100644
--- a/x86_32/Archi.v
+++ b/x86_32/Archi.v
@@ -31,7 +31,7 @@ Definition splitlong := negb ptr64.
 
 Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
 Proof.
-  unfold splitlong. destruct ptr64; simpl; congruence. 
+  unfold splitlong. destruct ptr64; simpl; congruence.
 Qed.
 
 Program Definition default_pl_64 : bool * nan_pl 53 :=
diff --git a/x86_64/Archi.v b/x86_64/Archi.v
index 7b1136c8..7b5301df 100644
--- a/x86_64/Archi.v
+++ b/x86_64/Archi.v
@@ -31,7 +31,7 @@ Definition splitlong := negb ptr64.
 
 Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
 Proof.
-  unfold splitlong. destruct ptr64; simpl; congruence. 
+  unfold splitlong. destruct ptr64; simpl; congruence.
 Qed.
 
 Program Definition default_pl_64 : bool * nan_pl 53 :=