55 files changed, 12921 insertions, 10999 deletions
diff --git a/backend/Allocation.v b/backend/Allocation.v
index 74c85cfe..eab5233d 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -1,7 +1,7 @@
-(** Register allocation, spilling, reloading and explicitation of
-   calling conventions. *)
+(** Register allocation. *)
 
 Require Import Coqlib.
+Require Import Errors.
 Require Import Maps.
 Require Import Lattice.
 Require Import AST.
@@ -16,15 +16,14 @@ Require Import Kildall.
 Require Import Locations.
 Require Import Conventions.
 Require Import Coloring.
-Require Import Parallelmove.
 
 (** * Liveness analysis over RTL *)
 
 (** A register [r] is live at a point [p] if there exists a path
-    from [p] to some instruction that uses [r] as argument,
-    and [r] is not redefined along this path.
-    Liveness can be computed by a backward dataflow analysis.
-    The analysis operates over sets of (live) pseudo-registers. *)
+  from [p] to some instruction that uses [r] as argument,
+  and [r] is not redefined along this path.
+  Liveness can be computed by a backward dataflow analysis.
+  The analysis operates over sets of (live) pseudo-registers. *)
 
 Notation reg_live := Regset.add.
 Notation reg_dead := Regset.remove.
@@ -50,25 +49,25 @@ Fixpoint reg_list_dead
   end.
 
 (** Here is the transfer function for the dataflow analysis.
-    Since this is a backward dataflow analysis, it takes as argument
-    the abstract register set ``after'' the given instruction,
-    i.e. the registers that are live after; and it returns as result
-    the abstract register set ``before'' the given instruction,
-    i.e. the registers that must be live before.
-    The general relation between ``live before'' and ``live after''
-    an instruction is that a register is live before if either
-    it is one of the arguments of the instruction, or it is not the result
-    of the instruction and it is live after.
-    However, if the result of a side-effect-free instruction is not 
-    live ``after'', the whole instruction will be removed later
-    (since it computes a useless result), thus its arguments need not
-    be live ``before''. *)
+  Since this is a backward dataflow analysis, it takes as argument
+  the abstract register set ``after'' the given instruction,
+  i.e. the registers that are live after; and it returns as result
+  the abstract register set ``before'' the given instruction,
+  i.e. the registers that must be live before.
+  The general relation between ``live before'' and ``live after''
+  an instruction is that a register is live before if either
+  it is one of the arguments of the instruction, or it is not the result
+  of the instruction and it is live after.
+  However, if the result of a side-effect-free instruction is not 
+  live ``after'', the whole instruction will be removed later
+  (since it computes a useless result), thus its arguments need not
+  be live ``before''. *)
 
 Definition transfer
             (f: RTL.function) (pc: node) (after: Regset.t) : Regset.t :=
   match f.(fn_code)!pc with
   | None =>
-      after
+      Regset.empty
   | Some i =>
       match i with
       | Inop s =>
@@ -88,18 +87,20 @@ Definition transfer
       | Icall sig ros args res s =>
           reg_list_live args
            (reg_sum_live ros (reg_dead res after))
+      | Itailcall sig ros args =>
+	  reg_list_live args (reg_sum_live ros Regset.empty)
       | Ialloc arg res s =>
           reg_live arg (reg_dead res after)
       | Icond cond args ifso ifnot =>
           reg_list_live args after
       | Ireturn optarg =>
-          reg_option_live optarg after
+          reg_option_live optarg Regset.empty
       end
   end.
 
 (** The liveness analysis is then obtained by instantiating the
-    general framework for backward dataflow analysis provided by
-    module [Kildall].  *)
+  general framework for backward dataflow analysis provided by
+  module [Kildall].  *)
 
 Module RegsetLat := LFSet(Regset).
 Module DS := Backward_Dataflow_Solver(RegsetLat)(NodeSetBackward).
@@ -107,315 +108,108 @@ Module DS := Backward_Dataflow_Solver(RegsetLat)(NodeSetBackward).
 Definition analyze (f: RTL.function): option (PMap.t Regset.t) :=
   DS.fixpoint (successors f) f.(fn_nextpc) (transfer f) nil.
 
-(** * Spilling and reloading *)
-
-(** LTL operations, like those of the target processor, operate only
-  over machine registers, but not over stack slots.  Consider
-  the RTL instruction
-<<
-        r1 <- Iop(Oadd, r1 :: r2 :: nil)
->>
-  and assume that [r1] and [r2] are assigned to stack locations [S s1]
-  and [S s2], respectively.  The translated LTL code must load these
-  stack locations into temporary integer registers (this is called
-  ``reloading''), perform the [Oadd] operation over these temporaries,
-  leave the result in a temporary, then store the temporary back to
-  stack location [S s1] (this is called ``spilling'').  In other term,
-  the generated LTL code has the following shape:
-<<
-        IT1 <- Bgetstack s1;
-        IT2 <- Bgetstack s2;
-        IT1 <- Bop(Oadd, IT1 :: IT2 :: nil);
-        Bsetstack s1 IT1;
->>
-  This section provides functions that assist in choosing appropriate
-  temporaries and inserting the required spilling and reloading
-  operations. *)
-
-(** ** Allocation of temporary registers for reloading and spilling. *)
-
-(** [reg_for l] returns a machine register appropriate for working
-    over the location [l]: either the machine register [m] if [l = R m],
-    or a temporary register of [l]'s type if [l] is a stack slot. *)
-
-Definition reg_for (l: loc) : mreg :=
-  match l with
-  | R r => r
-  | S s => match slot_type s with Tint => IT1 | Tfloat => FT1 end
-  end.
-
-(** [regs_for ll] is similar, for a list of locations [ll] of length
-    at most 3.  We ensure that distinct temporaries are used for
-    different elements of [ll]. *)
-
-Fixpoint regs_for_rec (locs: list loc) (itmps ftmps: list mreg)
-                      {struct locs} : list mreg :=
-  match locs, itmps, ftmps with
-  | l1 :: ls, it1 :: its, ft1 :: fts =>
-      match l1 with
-      | R r => r
-      | S s => match slot_type s with Tint => it1 | Tfloat => ft1 end
-      end
-      :: regs_for_rec ls its fts
-  | _, _, _ => nil
-  end.
-
-Definition regs_for (locs: list loc) :=
-  regs_for_rec locs (IT1 :: IT2 :: IT3 :: nil) (FT1 :: FT2 :: FT3 :: nil).
-
-(** ** Insertion of LTL reloads, stores and moves *)
+(** * Translation from RTL to LTL *)
 
 Require Import LTL.
 
-(** [add_spill src dst k] prepends to [k] the instructions needed
-  to assign location [dst] the value of machine register [mreg]. *)
-
-Definition add_spill (src: mreg) (dst: loc) (k: block) :=
-  match dst with
-  | R rd => if mreg_eq src rd then k else Bop Omove (src :: nil) rd k
-  | S sl => Bsetstack src sl k
-  end.
-
-(** [add_reload src dst k] prepends to [k] the instructions needed
-  to assign machine register [mreg] the value of the location [src]. *)
-
-Definition add_reload (src: loc) (dst: mreg) (k: block) :=
-  match src with
-  | R rs => if mreg_eq rs dst then k else Bop Omove (rs :: nil) dst k
-  | S sl => Bgetstack sl dst k
-  end.
-
-(** [add_reloads] is similar for a list of locations (as sources)
-  and a list of machine registers (as destinations).  *)
-
-Fixpoint add_reloads (srcs: list loc) (dsts: list mreg) (k: block)
-                                            {struct srcs} : block :=
-  match srcs, dsts with
-  | s1 :: sl, t1 :: tl =>
-      add_reload s1 t1 (add_reloads sl tl k)
-  | _, _ =>
-      k
-  end.
-
-(** [add_move src dst k] prepends to [k] the instructions that copy
-  the value of location [src] into location [dst]. *)
-
-Definition add_move (src dst: loc) (k: block) :=
-  if Loc.eq src dst then k else
-    match src, dst with
-    | R rs, _ =>
-        add_spill rs dst k
-    | _, R rd =>
-        add_reload src rd k
-    | S ss, S sd =>
-        let tmp :=
-          match slot_type ss with Tint => IT1 | Tfloat => FT1 end in
-        add_reload src tmp (add_spill tmp dst k)
-    end.
-
-(** [parallel_move srcs dsts k] is similar, but for a list of
-  source locations and a list of destination locations of the same
-  length.  This is a parallel move, meaning that arbitrary overlap
-  between the sources and destinations is permitted. *)
-
-Definition parallel_move (srcs dsts: list loc) (k: block) : block :=
-  List.fold_right
-    (fun p k => add_move (fst p) (snd p) k)
-     k (parmove srcs dsts).
-
-(** ** Constructors for LTL instructions *)
-
-(** The following functions generate LTL instructions operating
-  over the given locations.  Appropriate reloading and spilling operations
-  are added around the core LTL instruction. *)
-
-Definition add_op (op: operation) (args: list loc) (res: loc) (s: node) :=
-  match is_move_operation op args with
-  | Some src =>
-      add_move src res (Bgoto s)
-  | None =>
-      let rargs := regs_for args in
-      let rres := reg_for res in
-      add_reloads args rargs (Bop op rargs rres (add_spill rres res (Bgoto s)))
-  end.
-
-Definition add_load (chunk: memory_chunk) (addr: addressing)
-                    (args: list loc) (dst: loc) (s: node) :=
-  let rargs := regs_for args in
-  let rdst := reg_for dst in
-  add_reloads args rargs
-    (Bload chunk addr rargs rdst (add_spill rdst dst (Bgoto s))).
-
-Definition add_store (chunk: memory_chunk) (addr: addressing)
-                     (args: list loc) (src: loc) (s: node) :=
-  match regs_for (src :: args) with
-  | nil => Breturn                      (* never happens *)
-  | rsrc :: rargs =>
-      add_reloads (src :: args) (rsrc :: rargs)
-        (Bstore chunk addr rargs rsrc (Bgoto s))
-  end.
-
-Definition add_alloc (arg: loc) (res: loc) (s: node) :=
-  add_reload arg Conventions.loc_alloc_argument
-    (Balloc (add_spill Conventions.loc_alloc_result res (Bgoto s))).
-
-(** For function calls, we also add appropriate moves to and from
-  the canonical locations for function arguments and function results,
-  as dictated by the calling conventions. *)
-
-Definition add_call (sig: signature) (ros: loc + ident)
-                    (args: list loc) (res: loc) (s: node) :=
-  let rargs := loc_arguments sig in
-  let rres  := loc_result sig in
-  match ros with
-  | inl fn =>
-      (add_reload fn IT3
-        (parallel_move args rargs
-          (Bcall sig (inl _ IT3) (add_spill rres res (Bgoto s)))))
-  | inr id =>
-      parallel_move args rargs
-        (Bcall sig (inr _ id) (add_spill rres res (Bgoto s)))
-  end.
-
-Definition add_cond (cond: condition) (args: list loc) (ifso ifnot: node) :=
-  let rargs := regs_for args in
-  add_reloads args rargs (Bcond cond rargs ifso ifnot).
-
-(** For function returns, we add the appropriate move of the result
-  to the conventional location for the function result.  If the function
-  returns with no value, we explicitly set the function result register
-  to the [Vundef] value, for consistency with RTL's semantics. *)
-
-Definition add_return (sig: signature) (optarg: option loc) :=
-  match optarg with
-  | Some arg => add_reload arg (loc_result sig) Breturn
-  | None     => Bop Oundef nil (loc_result sig) Breturn
-  end.
-  
-(** For function entry points, we move from the parameter locations
-  dictated by the calling convention to the locations of the function
-  parameters.  We also explicitly set to [Vundef] the locations
-  of pseudo-registers that are live at function entry but are not
-  parameters, again for consistency with RTL's semantics. *)
-
-Fixpoint add_undefs (ll: list loc) (b: block) {struct ll} : block :=
-  match ll with
-  | nil => b
-  | R r :: ls => Bop Oundef nil r (add_undefs ls b)
-  | S s :: ls => add_undefs ls b
-  end.
-
-Definition add_entry (sig: signature) (params: list loc) (undefs: list loc)
-                     (s: node) :=
-  parallel_move (loc_parameters sig) params (add_undefs undefs (Bgoto s)).
-
-(** * Translation from RTL to LTL *)
-
-(** Each [RTL] instruction translates to an [LTL] basic block.
+(** Each [RTL] instruction translates to an [LTL] instruction.
   The register assignment [assign] returned by register allocation
   is applied to the arguments and results of the RTL
-  instruction, followed by an invocation of the appropriate [LTL]
-  constructor function that will deal with spilling, reloading and
-  calling conventions.  In addition, dead instructions are eliminated.
+  instruction.  Moreover, dead instructions and redundant moves
+  are eliminated (turned into a [Lnop] instruction).
   Dead instructions are instructions without side-effects ([Iop] and
   [Iload]) whose result register is dead, i.e. whose result value
-  is never used. *)
+  is never used.  Redundant moves are moves whose source and destination
+  are assigned the same location. *)
+
+Definition is_redundant_move
+    (op: operation) (args: list reg) (res: reg) (assign: reg -> loc) : bool :=
+  match is_move_operation op args with
+  | None => false
+  | Some src => if Loc.eq (assign src) (assign res) then true else false
+  end.
 
 Definition transf_instr
          (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc)
-         (pc: node) (instr: RTL.instruction) : LTL.block :=
+         (pc: node) (instr: RTL.instruction) : LTL.instruction :=
   match instr with
   | Inop s =>
-      Bgoto s
+      Lnop s
   | Iop op args res s =>
       if Regset.mem res live!!pc then
-        add_op op (List.map assign args) (assign res) s
+        if is_redundant_move op args res assign then
+          Lnop s
+        else 
+          Lop op (List.map assign args) (assign res) s
       else
-        Bgoto s
+        Lnop s
   | Iload chunk addr args dst s =>
       if Regset.mem dst live!!pc then
-        add_load chunk addr (List.map assign args) (assign dst) s
+        Lload chunk addr (List.map assign args) (assign dst) s
       else
-        Bgoto s
+        Lnop s
   | Istore chunk addr args src s =>
-      add_store chunk addr (List.map assign args) (assign src) s
+      Lstore chunk addr (List.map assign args) (assign src) s
   | Icall sig ros args res s =>
-      add_call sig (sum_left_map assign ros) (List.map assign args)
-                   (assign res) s
+      Lcall sig (sum_left_map assign ros) (List.map assign args)
+                (assign res) s
+  | Itailcall sig ros args =>
+      Ltailcall sig (sum_left_map assign ros) (List.map assign args)
   | Ialloc arg res s =>
-      add_alloc (assign arg) (assign res) s
+      Lalloc (assign arg) (assign res) s
   | Icond cond args ifso ifnot =>
-      add_cond cond (List.map assign args) ifso ifnot
+      Lcond cond (List.map assign args) ifso ifnot
   | Ireturn optarg =>
-      add_return f.(RTL.fn_sig) (option_map assign optarg)
+      Lreturn (option_map assign optarg)
   end.
 
-Definition transf_entrypoint
-     (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc)
-     (newcode: LTL.code) : LTL.code :=
-  let oldentry := RTL.fn_entrypoint f in
-  let newentry := RTL.fn_nextpc f in
-  let undefs :=
-    Regset.elements (reg_list_dead (RTL.fn_params f)
-                                   (transfer f oldentry live!!oldentry)) in
-  PTree.set
-    newentry
-    (add_entry (RTL.fn_sig f)
-               (List.map assign (RTL.fn_params f))
-               (List.map assign undefs)
-               oldentry)
-    newcode.
-
-Lemma transf_entrypoint_wf:
+Lemma transf_body_wf:
   forall (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc),
-  let tc1 := PTree.map (transf_instr f live assign) (RTL.fn_code f) in
-  let tc2 := transf_entrypoint f live assign tc1 in
-  forall (pc: node), Plt pc (Psucc (RTL.fn_nextpc f)) \/ tc2!pc = None.
+  let tc := PTree.map (transf_instr f live assign) (RTL.fn_code f) in
+  forall (pc: node), Plt pc (RTL.fn_nextpc f) \/ tc!pc = None.
 Proof.
-  intros. case (plt pc (Psucc (RTL.fn_nextpc f))); intro.
-  left. auto. 
-  right. 
-  assert (pc <> RTL.fn_nextpc f).
-  red; intro. subst pc. elim n. apply Plt_succ.
-  assert (~ (Plt pc (RTL.fn_nextpc f))).
-  red; intro. elim n. apply Plt_trans_succ; auto.
-  unfold tc2. unfold transf_entrypoint. 
-  rewrite PTree.gso; auto. 
-  unfold tc1. rewrite PTree.gmap. 
-  elim (RTL.fn_code_wf f pc); intro.
-  contradiction. unfold option_map. rewrite H1. auto.
+  intros. elim (RTL.fn_code_wf f pc); intro.
+  left. auto. right. unfold tc. rewrite PTree.gmap.
+  unfold option_map. rewrite H. auto.
 Qed.
 
+Definition transf_fun (f: RTL.function) (live: PMap.t Regset.t)
+                      (assign: reg -> loc) : LTL.function :=
+  LTL.mkfunction
+     (RTL.fn_sig f)
+     (List.map assign (RTL.fn_params f))
+     (RTL.fn_stacksize f)
+     (PTree.map (transf_instr f live assign) (RTL.fn_code f))
+     (RTL.fn_entrypoint f)
+     (RTL.fn_nextpc f)
+     (transf_body_wf f live assign).
+
 (** The translation of a function performs liveness analysis,
   construction and coloring of the inference graph, and per-instruction
   transformation as described above. *)
 
-Definition transf_function (f: RTL.function) : option LTL.function :=
+Definition live0 (f: RTL.function) (live: PMap.t Regset.t) :=
+  transfer f f.(RTL.fn_entrypoint) live!!(f.(RTL.fn_entrypoint)).
+
+Open Scope string_scope.
+
+Definition transf_function (f: RTL.function) : res LTL.function :=
   match type_function f with
-  | None => None
-  | Some env =>
+  | Error msg => Error msg
+  | OK env =>
     match analyze f with
-    | None => None
+    | None => Error (msg "Liveness analysis failure")
     | Some live =>
-      let pc0 := f.(RTL.fn_entrypoint) in
-      let live0 := transfer f pc0 live!!pc0 in
-      match regalloc f live live0 env with
-      | None => None
-      | Some assign =>
-          Some (LTL.mkfunction
-            (RTL.fn_sig f)
-            (RTL.fn_stacksize f)
-            (transf_entrypoint f live assign
-                   (PTree.map (transf_instr f live assign) (RTL.fn_code f)))
-            (RTL.fn_nextpc f)
-            (transf_entrypoint_wf f live assign))
-      end
+        match regalloc f live (live0 f live) env with
+        | None => Error (msg "Incorrect graph coloring")
+        | Some assign => OK (transf_fun f live assign)
+        end
     end
   end.
 
-Definition transf_fundef (fd: RTL.fundef) : option LTL.fundef :=
-  transf_partial_fundef transf_function fd.
+Definition transf_fundef (fd: RTL.fundef) : res LTL.fundef :=
+  AST.transf_partial_fundef transf_function fd.
 
-Definition transf_program (p: RTL.program) : option LTL.program :=
+Definition transf_program (p: RTL.program) : res LTL.program :=
   transform_partial_program transf_fundef p.
 
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index f0b2968f..1b5a4156 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -1,16 +1,17 @@
 (** Correctness proof for the [Allocation] pass (translation from
   RTL to LTL). *)
 
-Require Import Relations.
 Require Import FSets.
 Require Import SetoidList.
 Require Import Coqlib.
+Require Import Errors.
 Require Import Maps.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
 Require Import Mem.
 Require Import Events.
+Require Import Smallstep.
 Require Import Globalenvs.
 Require Import Op.
 Require Import Registers.
@@ -20,7 +21,6 @@ Require Import Locations.
 Require Import Conventions.
 Require Import Coloring.
 Require Import Coloringproof.
-Require Import Parallelmove.
 Require Import Allocation.
 
 (** * Semantic properties of calling conventions *)
@@ -53,9 +53,27 @@ Proof.
   auto.
   case (In_dec Loc.eq (R (loc_result sig)) destroyed_at_call); intro.
   auto.
-  elim n0. apply loc_result_acceptable.
+  elim n0. apply loc_result_caller_save.
 Qed.
 
+(** Function arguments for a tail call are preserved by a
+    [return_regs] operation. *)
+
+Lemma return_regs_arguments:
+  forall sig caller callee,
+  tailcall_possible sig ->
+  map (LTL.return_regs caller callee) (loc_arguments sig) =
+  map callee (loc_arguments sig).
+Proof.
+  intros. apply list_map_exten; intros.
+  generalize (H x H0). destruct x; intro.
+  unfold LTL.return_regs.
+  destruct (In_dec Loc.eq (R m) temporaries). auto.
+  destruct (In_dec Loc.eq (R m) destroyed_at_call). auto.
+  elim n0. eapply arguments_caller_save; eauto.
+  contradiction.
+Qed. 
+
 (** Acceptable locations that are not destroyed at call keep
   their values across a call. *)
 
@@ -73,31 +91,22 @@ Proof.
   auto.
 Qed.
 
-(** * Correctness condition for the liveness analysis *)
-
-(** The liveness information computed by the dataflow analysis is
-  correct in the following sense: all registers live ``before''
-  an instruction are live ``after'' all of its predecessors. *)
+(** Characterization of parallel assignments. *)
 
-Lemma analyze_correct:
-  forall (f: function) (live: PMap.t Regset.t) (n s: node),
-  analyze f = Some live ->
-  f.(fn_code)!n <> None ->
-  f.(fn_code)!s <> None ->
-  In s (successors f n) ->
-  RegsetLat.ge live!!n (transfer f s live!!s).
+Lemma parmov_spec:
+  forall ls srcs dsts,
+  Loc.norepet dsts -> List.length srcs = List.length dsts ->
+  List.map (LTL.parmov srcs dsts ls) dsts = List.map ls srcs /\
+  (forall l, Loc.notin l dsts -> LTL.parmov srcs dsts ls l = ls l).
 Proof.
-  intros.
-  eapply DS.fixpoint_solution.
-  unfold analyze in H. eexact H.
-  elim (fn_code_wf f n); intro. auto. contradiction.
-  elim (fn_code_wf f s); intro. auto. contradiction.
+  induction srcs; destruct dsts; simpl; intros; try discriminate.
   auto.
+  inv H. inv H0. destruct (IHsrcs _ H4 H1). split.
+  f_equal. apply Locmap.gss. rewrite <- H. apply list_map_exten; intros.
+  symmetry. apply Locmap.gso. eapply Loc.in_notin_diff; eauto. 
+  intros x [A B]. rewrite Locmap.gso; auto. apply Loc.diff_sym; auto.
 Qed.
 
-Definition live0 (f: RTL.function) (live: PMap.t Regset.t) :=
-  transfer f f.(RTL.fn_entrypoint) live!!(f.(RTL.fn_entrypoint)).
-
 (** * Properties of allocated locations *)
 
 (** We list here various properties of the locations [alloc r],
@@ -112,19 +121,6 @@ Variable live: PMap.t Regset.t.
 Variable alloc: reg -> loc.
 Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc.
 
-Lemma loc_acceptable_noteq_diff:
-  forall l1 l2,
-  loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2.
-Proof.
-  unfold loc_acceptable, Loc.diff; destruct l1; destruct l2;
-  try (destruct s); try (destruct s0); intros; auto; try congruence.
-  case (zeq z z0); intro. 
-  compare t t0; intro.
-  subst z0; subst t0; tauto.
-  tauto. tauto.
-  contradiction. contradiction.
-Qed.
-
 Lemma regalloc_noteq_diff:
   forall r1 l2,
   alloc r1 <> l2 -> Loc.diff (alloc r1) l2.
@@ -134,18 +130,6 @@ Proof.
   auto.
 Qed.   
 
-Lemma loc_acceptable_notin_notin:
-  forall r ll,
-  loc_acceptable r ->
-  ~(In r ll) -> Loc.notin r ll.
-Proof.
-  induction ll; simpl; intros.
-  auto.
-  split. apply loc_acceptable_noteq_diff. assumption. 
-  apply sym_not_equal. tauto. 
-  apply IHll. assumption. tauto. 
-Qed.
-
 Lemma regalloc_notin_notin:
   forall r ll,
   ~(In (alloc r) ll) -> Loc.notin (alloc r) ll.
@@ -153,6 +137,15 @@ Proof.
   intros. apply loc_acceptable_notin_notin. 
   eapply regalloc_acceptable; eauto. auto.
 Qed.
+
+Lemma regalloc_notin_notin_2:
+  forall l rl,
+  ~(In l (map alloc rl)) -> Loc.notin l (map alloc rl).
+Proof.
+  induction rl; simpl; intros. auto. 
+  split. apply Loc.diff_sym. apply regalloc_noteq_diff. tauto.
+  apply IHrl. tauto.
+Qed.
   
 Lemma regalloc_norepet_norepet:
   forall rl,
@@ -215,7 +208,7 @@ Hypothesis REGALLOC: regalloc f flive (live0 f flive) env = Some assign.
   of [assign r] can be arbitrary. *)
 
 Definition agree (live: Regset.t) (rs: regset) (ls: locset) : Prop :=
-  forall (r: reg), Regset.In r live -> ls (assign r) = rs#r.
+  forall (r: reg), Regset.In r live -> Val.lessdef (rs#r) (ls (assign r)).
 
 (** What follows is a long list of lemmas expressing properties
   of the [agree_live_regs] predicate that are useful for the 
@@ -232,6 +225,24 @@ Proof.
   apply H0. apply H. auto.
 Qed.
 
+Lemma agree_succ:
+  forall n s rs ls live,
+  analyze f = Some live ->
+  In s (RTL.successors f n) ->
+  agree live!!n rs ls ->
+  agree (transfer f s live!!s) rs ls.
+Proof.
+  intros.
+  elim (RTL.fn_code_wf f n); intro.
+  elim (RTL.fn_code_wf f s); intro.
+  apply agree_increasing with (live!!n).
+  eapply DS.fixpoint_solution. unfold analyze in H; eauto.
+  auto. auto. auto. auto.
+  unfold transfer. rewrite H3.
+  red; intros. elim (Regset.empty_1 _ H4).
+  unfold RTL.successors in H0; rewrite H2 in H0; elim H0.
+Qed.
+
 (** Some useful special cases of [agree_increasing]. *)
 
 
@@ -266,7 +277,7 @@ Qed.
 
 Lemma agree_eval_reg:
   forall r live rs ls,
-  agree (reg_live r live) rs ls -> ls (assign r) = rs#r.
+  agree (reg_live r live) rs ls -> Val.lessdef (rs#r) (ls (assign r)).
 Proof.
   intros. apply H. apply Regset.add_1. auto. 
 Qed.
@@ -276,11 +287,11 @@ Qed.
 Lemma agree_eval_regs:
   forall rl live rs ls,
   agree (reg_list_live rl live) rs ls ->
-  List.map ls (List.map assign rl) = rs##rl.
+  Val.lessdef_list (rs##rl) (List.map ls (List.map assign rl)).
 Proof.
   induction rl; simpl; intros.
-  reflexivity.
-  apply (f_equal2 (@cons val)). 
+  constructor.
+  constructor.
   apply agree_eval_reg with live. 
   apply agree_reg_list_live with rl. auto.
   eapply IHrl. eexact H.
@@ -322,21 +333,18 @@ Qed.
   are mapped to locations other than that of [r]. *)
 
 Lemma agree_assign_live:
-  forall live r rs ls ls' v,
+  forall live r rs ls v v',
   (forall s,
      Regset.In s live -> s <> r -> assign s <> assign r) ->
-  ls' (assign r) = v ->
-  (forall l, Loc.diff l (assign r) -> Loc.notin l temporaries -> ls' l = ls l) ->
+  Val.lessdef v v' ->
   agree (reg_dead r live) rs ls ->
-  agree live (rs#r <- v) ls'.
+  agree live (rs#r <- v) (Locmap.set (assign r) v' ls).
 Proof.
-  unfold agree; intros.
-  case (Reg.eq r r0); intro.
-  subst r0. rewrite Regmap.gss. assumption.
-  rewrite Regmap.gso; auto.
-  rewrite H1. apply H2. apply Regset.remove_2; auto. 
-  eapply regalloc_noteq_diff. eauto. apply H. auto.  auto.
-  eapply regalloc_not_temporary; eauto.
+  unfold agree; intros. rewrite Regmap.gsspec.
+  destruct (peq r0 r).
+  subst r0. rewrite Locmap.gss. auto.
+  rewrite Locmap.gso. apply H1. apply Regset.remove_2; auto.
+  eapply regalloc_noteq_diff. eauto. apply sym_not_equal. apply H. auto.  auto.
 Qed.
 
 (** This is a special case of the previous lemma where the value [v]
@@ -347,30 +355,47 @@ Qed.
   are mapped to locations other than that of [res]. *)
 
 Lemma agree_move_live:
-  forall live arg res rs (ls ls': locset),
+  forall live arg res rs (ls: locset),
   (forall r,
      Regset.In r live -> r <> res -> r <> arg -> 
      assign r <> assign res) ->
-  ls' (assign res) = ls (assign arg) ->
-  (forall l, Loc.diff l (assign res) -> Loc.notin l temporaries -> ls' l = ls l) ->
   agree (reg_live arg (reg_dead res live)) rs ls ->
-  agree live (rs#res <- (rs#arg)) ls'.
+  agree live (rs#res <- (rs#arg)) (Locmap.set (assign res) (ls (assign arg)) ls).
 Proof.
-  unfold agree; intros. 
-  case (Reg.eq res r); intro.
-  subst r. rewrite Regmap.gss. rewrite H0. apply H2.
+  unfold agree; intros. rewrite Regmap.gsspec. destruct (peq r res). 
+  subst r. rewrite Locmap.gss. apply H0.
   apply Regset.add_1; auto.
-  rewrite Regmap.gso; auto.
-  case (Loc.eq (assign r) (assign res)); intro.
-  rewrite e. rewrite H0.
-  case (Reg.eq arg r); intro.
-  subst r. apply H2. apply Regset.add_1; auto.
-  elim (H r); auto.
-  rewrite H1. apply H2. 
-  case (Reg.eq arg r); intro. subst r. apply Regset.add_1; auto.
-  apply Regset.add_2. apply Regset.remove_2. auto. auto. 
-  eapply regalloc_noteq_diff; eauto.
-  eapply regalloc_not_temporary; eauto.
+  destruct (Reg.eq r arg).
+  subst r.
+  replace (Locmap.set (assign res) (ls (assign arg)) ls (assign arg))
+     with (ls (assign arg)).
+  apply H0. apply Regset.add_1. auto.
+  symmetry. destruct (Loc.eq (assign arg) (assign res)).
+  rewrite <- e. apply Locmap.gss.
+  apply Locmap.gso. eapply regalloc_noteq_diff; eauto.
+
+  rewrite Locmap.gso. apply H0. apply Regset.add_2. apply Regset.remove_2. auto. auto.
+  eapply regalloc_noteq_diff; eauto. apply sym_not_equal. apply H; auto.
+Qed.
+
+(** Yet another special case corresponding to the case of 
+  a redundant move. *)
+
+Lemma agree_redundant_move_live:
+  forall live arg res rs (ls: locset),
+  (forall r,
+     Regset.In r live -> r <> res -> r <> arg -> 
+     assign r <> assign res) ->
+  agree (reg_live arg (reg_dead res live)) rs ls ->
+  assign res = assign arg ->
+  agree live (rs#res <- (rs#arg)) ls.
+Proof.
+  intros. 
+  apply agree_exten with (Locmap.set (assign res) (ls (assign arg)) ls).
+  eapply agree_move_live; eauto. 
+  intros. symmetry. rewrite H1. destruct (Loc.eq l (assign arg)).
+  subst l. apply Locmap.gss.
+  apply Locmap.gso. eapply regalloc_noteq_diff; eauto. 
 Qed.
 
 (** This complicated lemma states agreement between the states after
@@ -384,7 +409,7 @@ Lemma agree_call:
     ~(In (assign r) Conventions.destroyed_at_call)) ->
   (forall r,
     Regset.In r live -> r <> res -> assign r <> assign res) ->
-  ls' (assign res) = v ->
+  Val.lessdef v (ls' (assign res)) ->
   (forall l,
     Loc.notin l destroyed_at_call -> loc_acceptable l -> Loc.diff l (assign res) ->
     ls' l = ls l) ->
@@ -413,757 +438,33 @@ Lemma agree_init_regs:
   (forall r1 r2,
     In r1 rl -> Regset.In r2 live -> r1 <> r2 ->
     assign r1 <> assign r2) ->
-  List.map ls (List.map assign rl) = vl ->
-  agree (reg_list_dead rl live) (Regmap.init Vundef) ls ->
+  Val.lessdef_list vl (List.map ls (List.map assign rl)) ->
   agree live (init_regs vl rl) ls.
 Proof.
   induction rl; simpl; intros.
-  assumption.
-  destruct vl. discriminate. 
-  assert (agree (reg_dead a live) (init_regs vl rl) ls).
-  apply IHrl. intros. apply H. tauto.
-  eapply Regset.remove_3; eauto.
-  auto. congruence. assumption.
+  red; intros. rewrite Regmap.gi. auto.
+  inv H0. 
+  assert (agree live (init_regs vl1 rl) ls).
+  apply IHrl. intros. apply H. tauto. auto. auto. auto.
   red; intros. case (Reg.eq a r); intro.
-  subst r. rewrite Regmap.gss. congruence. 
-  rewrite Regmap.gso; auto. apply H2.
-  apply Regset.remove_2; auto. 
+  subst r. rewrite Regmap.gss. auto.
+  rewrite Regmap.gso; auto.
 Qed.
 
 Lemma agree_parameters:
   forall vl ls,
   let params := f.(RTL.fn_params) in
-  List.map ls (List.map assign params) = vl ->
-  (forall r,
-     Regset.In r (reg_list_dead params (live0 f flive)) ->
-     ls (assign r) = Vundef) ->
-  agree (live0 f flive) (init_regs vl params) ls.
+  Val.lessdef_list vl (List.map ls (List.map assign params)) ->
+  agree (live0 f flive)
+        (init_regs vl params) 
+        ls.
 Proof.
   intros. apply agree_init_regs. 
   intros. eapply regalloc_correct_3; eauto.
-  assumption. 
-  red; intros. rewrite Regmap.gi. auto.
-Qed.
-
-End AGREE.
-
-(** * Correctness of the LTL constructors *)
-
-(** This section proves theorems that establish the correctness of the
-  LTL constructor functions such as [add_op].  The theorems are of
-  the general form ``the generated LTL instructions execute and
-  modify the location set in the expected way: the result location(s)
-  contain the expected values and other, non-temporary locations keep
-  their values''. *)
-
-Section LTL_CONSTRUCTORS.
-
-Variable ge: LTL.genv.
-Variable sp: val.
-
-Lemma reg_for_spec:
-  forall l,
-  R(reg_for l) = l \/ In (R (reg_for l)) temporaries.
-Proof.
-  intros. unfold reg_for. destruct l. tauto.
-  case (slot_type s); simpl; tauto.
-Qed.
-
-Lemma add_reload_correct:
-  forall src dst k rs m,
-  exists rs',
-  exec_instrs ge sp (add_reload src dst k) rs m E0 k rs' m /\
-  rs' (R dst) = rs src /\
-  forall l, Loc.diff (R dst) l -> rs' l = rs l.
-Proof.
-  intros. unfold add_reload. destruct src.
-  case (mreg_eq m0 dst); intro.
-  subst dst. exists rs. split. apply exec_refl. tauto.
-  exists (Locmap.set (R dst) (rs (R m0)) rs).
-  split. apply exec_one; apply exec_Bop.  reflexivity. 
-  split. apply Locmap.gss. 
-  intros; apply Locmap.gso; auto.
-  exists (Locmap.set (R dst) (rs (S s)) rs).
-  split. apply exec_one; apply exec_Bgetstack. 
-  split. apply Locmap.gss. 
-  intros; apply Locmap.gso; auto.
-Qed.
-
-Lemma add_spill_correct:
-  forall src dst k rs m,
-  exists rs',
-  exec_instrs ge sp (add_spill src dst k) rs m E0 k rs' m /\
-  rs' dst = rs (R src) /\
-  forall l, Loc.diff dst l -> rs' l = rs l.
-Proof.
-  intros. unfold add_spill. destruct dst.
-  case (mreg_eq src m0); intro.
-  subst src. exists rs. split. apply exec_refl. tauto.
-  exists (Locmap.set (R m0) (rs (R src)) rs).
-  split. apply exec_one. apply exec_Bop. reflexivity.
-  split. apply Locmap.gss.
-  intros; apply Locmap.gso; auto.
-  exists (Locmap.set (S s) (rs (R src)) rs).
-  split. apply exec_one. apply exec_Bsetstack. 
-  split. apply Locmap.gss.
-  intros; apply Locmap.gso; auto.
-Qed.
-
-Lemma add_reloads_correct_rec:
-  forall srcs itmps ftmps k rs m,
-  (List.length srcs <= List.length itmps)%nat ->
-  (List.length srcs <= List.length ftmps)%nat ->
-  (forall r, In (R r) srcs -> In r itmps -> False) ->
-  (forall r, In (R r) srcs -> In r ftmps -> False) ->
-  list_disjoint itmps ftmps ->
-  list_norepet itmps ->
-  list_norepet ftmps ->
-  exists rs',
-  exec_instrs ge sp (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m E0 k rs' m /\
-  reglist (regs_for_rec srcs itmps ftmps) rs' = map rs srcs /\
-  (forall r, ~(In r itmps) -> ~(In r ftmps) -> rs' (R r) = rs (R r)) /\
-  (forall s, rs' (S s) = rs (S s)).
-Proof.
-  induction srcs; simpl; intros.
-  (* base case *)
-  exists rs. split. apply exec_refl. tauto.
-  (* inductive case *)
-  destruct itmps; simpl in H. omegaContradiction.
-  destruct ftmps; simpl in H0. omegaContradiction.
-  assert (R1: (length srcs <= length itmps)%nat). omega.
-  assert (R2: (length srcs <= length ftmps)%nat). omega.
-  assert (R3: forall r, In (R r) srcs -> In r itmps -> False).
-    intros. apply H1 with r. tauto. auto with coqlib. 
-  assert (R4: forall r, In (R r) srcs -> In r ftmps -> False).
-    intros. apply H2 with r. tauto. auto with coqlib.
-  assert (R5: list_disjoint itmps ftmps).
-    eapply list_disjoint_cons_left.
-    eapply list_disjoint_cons_right. eauto.
-  assert (R6: list_norepet itmps).
-    inversion H4; auto.
-  assert (R7: list_norepet ftmps).
-    inversion H5; auto.
-  destruct a.
-  (* a is a register *)
-  generalize (IHsrcs itmps ftmps k rs m R1 R2 R3 R4 R5 R6 R7).
-  intros [rs' [EX [RES [OTH1 OTH2]]]].
-  exists rs'. split.
-  unfold add_reload. case (mreg_eq m2 m2); intro; tauto.
-  split. simpl. apply (f_equal2 (@cons val)). 
-  apply OTH1. 
-  red; intro; apply H1 with m2. tauto. auto with coqlib.
-  red; intro; apply H2 with m2. tauto. auto with coqlib.
-  assumption.
-  split. intros. apply OTH1. simpl in H6; tauto. simpl in H7; tauto.
-  auto.
-  (* a is a stack location *)
-  set (tmp := match slot_type s with Tint => m0 | Tfloat => m1 end).
-  assert (NI: ~(In tmp itmps)).
-    unfold tmp; case (slot_type s).
-    inversion H4; auto. 
-    apply list_disjoint_notin with (m1 :: ftmps). 
-    apply list_disjoint_sym. apply list_disjoint_cons_left with m0.
-    auto. auto with coqlib.
-  assert (NF: ~(In tmp ftmps)).
-    unfold tmp; case (slot_type s).
-    apply list_disjoint_notin with (m0 :: itmps). 
-    apply list_disjoint_cons_right with m1.
-    auto. auto with coqlib.
-    inversion H5; auto. 
-  generalize
-    (add_reload_correct (S s) tmp
-       (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m).
-  intros [rs1 [EX1 [RES1 OTH]]].     
-  generalize (IHsrcs itmps ftmps k rs1 m R1 R2 R3 R4 R5 R6 R7).
-  intros [rs' [EX [RES [OTH1 OTH2]]]].
-  exists rs'.
-  split. eapply exec_trans; eauto. traceEq.
-  split. simpl. apply (f_equal2 (@cons val)). 
-  rewrite OTH1; auto.
-  rewrite RES. apply list_map_exten. intros.
-  symmetry. apply OTH. 
-  destruct x; try exact I. simpl. red; intro; subst m2.
-  generalize H6; unfold tmp. case (slot_type s).
-  intro. apply H1 with m0. tauto. auto with coqlib.
-  intro. apply H2 with m1. tauto. auto with coqlib.
-  split. intros. simpl in H6; simpl in H7.
-  rewrite OTH1. apply OTH. 
-  simpl. unfold tmp. case (slot_type s); tauto.
-  tauto. tauto.
-  intros. rewrite OTH2. apply OTH. exact I.
-Qed.
-
-Lemma add_reloads_correct:
-  forall srcs k rs m,
-  (List.length srcs <= 3)%nat ->
-  Loc.disjoint srcs temporaries ->
-  exists rs',
-  exec_instrs ge sp (add_reloads srcs (regs_for srcs) k) rs m E0 k rs' m /\
-  reglist (regs_for srcs) rs' = List.map rs srcs /\
-  forall l, Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros.
-  pose (itmps := IT1 :: IT2 :: IT3 :: nil).
-  pose (ftmps := FT1 :: FT2 :: FT3 :: nil).
-  assert (R1: (List.length srcs <= List.length itmps)%nat).
-    unfold itmps; simpl; assumption.
-  assert (R2: (List.length srcs <= List.length ftmps)%nat).
-    unfold ftmps; simpl; assumption.
-  assert (R3: forall r, In (R r) srcs -> In r itmps -> False).
-    intros. assert (In (R r) temporaries). 
-    simpl in H2; simpl; intuition congruence.
-    generalize (H0 _ _ H1 H3). simpl. tauto.
-  assert (R4: forall r, In (R r) srcs -> In r ftmps -> False).
-    intros. assert (In (R r) temporaries). 
-    simpl in H2; simpl; intuition congruence.
-    generalize (H0 _ _ H1 H3). simpl. tauto.
-  assert (R5: list_disjoint itmps ftmps).
-    red; intros r1 r2; simpl; intuition congruence.
-  assert (R6: list_norepet itmps).
-    unfold itmps. NoRepet.
-  assert (R7: list_norepet ftmps).
-    unfold ftmps. NoRepet.
-  generalize (add_reloads_correct_rec srcs itmps ftmps k rs m
-                R1 R2 R3 R4 R5 R6 R7).
-  intros [rs' [EX [RES [OTH1 OTH2]]]].
-  exists rs'. split. exact EX. 
-  split. exact RES.
-  intros. destruct l. apply OTH1.
-  generalize (Loc.notin_not_in _ _ H1). simpl. intuition congruence.
-  generalize (Loc.notin_not_in _ _ H1). simpl. intuition congruence.
-  apply OTH2.
-Qed.
-
-Lemma add_move_correct:
-  forall src dst k rs m,
-  exists rs',
-  exec_instrs ge sp (add_move src dst k) rs m E0 k rs' m /\
-  rs' dst = rs src /\
-  forall l, Loc.diff l dst -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> rs' l = rs l.
-Proof.
-  intros; unfold add_move.
-  case (Loc.eq src dst); intro.
-  subst dst. exists rs. split. apply exec_refl. tauto.
-  destruct src.
-  (* src is a register *)
-  generalize (add_spill_correct m0 dst k rs m); intros [rs' [EX [RES OTH]]].
-  exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto.
-  destruct dst.
-  (* src is a stack slot, dst a register *)
-  generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]].
-  exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto.
-  (* src and dst are stack slots *)
-  set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end).
-  generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m);
-  intros [rs1 [EX1 [RES1 OTH1]]].
-  generalize (add_spill_correct tmp (S s0) k rs1 m);
-  intros [rs2 [EX2 [RES2 OTH2]]].
-  exists rs2. split.
-  eapply exec_trans; eauto. traceEq.
-  split. congruence.
-  intros. rewrite OTH2. apply OTH1. 
-  apply Loc.diff_sym. unfold tmp; case (slot_type s); auto.
-  apply Loc.diff_sym; auto.
-Qed.
-
-Lemma effect_move_sequence:
-  forall k moves rs m,
-  let k' := List.fold_right (fun p k => add_move (fst p) (snd p) k) k moves in
-  exists rs',
-  exec_instrs ge sp k' rs m E0 k rs' m /\
-  effect_seqmove moves rs rs'.
-Proof.
-  induction moves; intros until m; simpl.
-  exists rs; split. constructor. constructor.
-  destruct a as [src dst]; simpl.
-  set (k1 := fold_right
-              (fun (p : loc * loc) (k : block) => add_move (fst p) (snd p) k)
-              k moves) in *.
-  destruct (add_move_correct src dst k1 rs m) as [rs1 [A [B C]]].
-  destruct (IHmoves rs1 m) as [rs' [D E]].
-  exists rs'; split. 
-  eapply exec_trans; eauto. traceEq.
-  econstructor; eauto. red. tauto. 
-Qed.
-
-Theorem parallel_move_correct:
-  forall srcs dsts k rs m,
-  List.length srcs = List.length dsts ->
-  Loc.no_overlap srcs dsts ->
-  Loc.norepet dsts ->
-  Loc.disjoint srcs temporaries ->
-  Loc.disjoint dsts temporaries ->
-  exists rs',
-  exec_instrs ge sp (parallel_move srcs dsts k) rs m E0 k rs' m /\
-  List.map rs' dsts = List.map rs srcs /\
-  rs' (R IT3) = rs (R IT3) /\
-  forall l, Loc.notin l dsts -> Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. 
-  generalize (effect_move_sequence k (parmove srcs dsts) rs m).
-  intros [rs' [EXEC EFFECT]].
-  exists rs'. split. exact EXEC. 
-  apply effect_parmove; auto.
-Qed.
- 
-Lemma add_op_correct:
-  forall op args res s rs m v,
-  (List.length args <= 3)%nat ->
-  Loc.disjoint args temporaries ->
-  eval_operation ge sp op (List.map rs args) = Some v ->
-  exists rs',
-  exec_block ge sp (add_op op args res s) rs m E0 (Cont s) rs' m /\
-  rs' res =  v /\
-  forall l, Loc.diff l res -> Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. unfold add_op. 
-  caseEq (is_move_operation op args).
-  (* move *)
-  intros arg IMO. 
-  generalize (is_move_operation_correct op args IMO). 
-  intros [EQ1 EQ2]. subst op; subst args.   
-  generalize (add_move_correct arg res (Bgoto s) rs m).
-  intros [rs' [EX [RES OTHER]]].
-  exists rs'. split. 
-  apply exec_Bgoto. exact EX. 
-  split. simpl in H1. congruence.
-  intros. unfold temporaries in H3; simpl in H3. 
-  apply OTHER. assumption. tauto. tauto. 
-  (* other ops *)
-  intros.
-  set (rargs := regs_for args). set (rres := reg_for res).
-  generalize (add_reloads_correct args
-                (Bop op rargs rres (add_spill rres res (Bgoto s)))
-                rs m H H0).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
-  pose (rs2 := Locmap.set (R rres) v rs1).
-  generalize (add_spill_correct rres res (Bgoto s) rs2 m).
-  intros [rs3 [EX3 [RES3 OTHER3]]].
-  exists rs3.
-  split. apply exec_Bgoto. eapply exec_trans. eexact EX1. 
-  eapply exec_trans; eauto. 
-  apply exec_one. unfold rs2. apply exec_Bop. 
-  unfold rargs. rewrite RES1. auto. traceEq.
-  split. rewrite RES3. unfold rs2; apply Locmap.gss. 
-  intros. rewrite OTHER3. unfold rs2. rewrite Locmap.gso.
-  apply OTHER1. assumption. 
-  apply Loc.diff_sym. unfold rres. elim (reg_for_spec res); intro.
-  rewrite H5; auto. 
-  eapply Loc.in_notin_diff; eauto. apply Loc.diff_sym; auto.
-Qed.
-
-Lemma add_load_correct:
-  forall chunk addr args res s rs m a v,
-  (List.length args <= 2)%nat ->
-  Loc.disjoint args temporaries ->
-  eval_addressing ge sp addr (List.map rs args) = Some a ->
-  loadv chunk m a = Some v ->
-  exists rs',
-  exec_block ge sp (add_load chunk addr args res s) rs m E0 (Cont s) rs' m /\
-  rs' res = v /\
-  forall l, Loc.diff l res -> Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. unfold add_load. 
-  set (rargs := regs_for args). set (rres := reg_for res).
-  assert (LL: (List.length args <= 3)%nat). omega.
-  generalize (add_reloads_correct args
-                (Bload chunk addr rargs rres (add_spill rres res (Bgoto s)))
-                rs m LL H0).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
-  pose (rs2 := Locmap.set (R rres) v rs1).
-  generalize (add_spill_correct rres res (Bgoto s) rs2 m).
-  intros [rs3 [EX3 [RES3 OTHER3]]].
-  exists rs3.
-  split. apply exec_Bgoto. eapply exec_trans; eauto.
-  eapply exec_trans; eauto. 
-  apply exec_one.  unfold rs2. apply exec_Bload with a.
-  unfold rargs; rewrite RES1. assumption. assumption. traceEq.
-  split. rewrite RES3. unfold rs2; apply Locmap.gss.
-  intros. rewrite OTHER3. unfold rs2. rewrite Locmap.gso. 
-  apply OTHER1. assumption. 
-  apply Loc.diff_sym. unfold rres. elim (reg_for_spec res); intro.
-  rewrite H5; auto.
-  eapply Loc.in_notin_diff; eauto. apply Loc.diff_sym; auto.
-Qed.
-
-Lemma add_store_correct:
-  forall chunk addr args src s rs m m' a,
-  (List.length args <= 2)%nat ->
-  Loc.disjoint args temporaries ->
-  Loc.notin src temporaries ->
-  eval_addressing ge sp addr (List.map rs args) = Some a ->
-  storev chunk m a (rs src) = Some m' ->
-  exists rs',
-  exec_block ge sp (add_store chunk addr args src s) rs m E0 (Cont s) rs' m' /\
-  forall l, Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. 
-  assert (LL: (List.length (src :: args) <= 3)%nat).
-    simpl. omega.
-  assert (DISJ: Loc.disjoint (src :: args) temporaries).
-    red; intros. elim H4; intro. subst x1. 
-    eapply Loc.in_notin_diff; eauto.
-    auto with coqlib.
-  unfold add_store. caseEq (regs_for (src :: args)).
-  unfold regs_for; simpl; intro; discriminate.
-  intros rsrc rargs EQ. 
-  generalize (add_reloads_correct (src :: args)
-               (Bstore chunk addr rargs rsrc (Bgoto s))
-               rs m LL DISJ).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
-  rewrite EQ in RES1. simpl in RES1. injection RES1. 
-  intros RES2 RES3. 
-  exists rs1.
-  split. apply exec_Bgoto. 
-  eapply exec_trans. rewrite <- EQ. eexact EX1. 
-  apply exec_one. apply exec_Bstore with a. 
-  rewrite RES2. assumption. rewrite RES3. assumption. traceEq.
-  exact OTHER1.
-Qed.
-
-Lemma add_alloc_correct:
-  forall arg res s rs m m' sz b,
-  rs arg = Vint sz ->
-  Mem.alloc m 0 (Int.signed sz) = (m', b) ->
-  exists rs',
-  exec_block ge sp (add_alloc arg res s) rs m E0 (Cont s) rs' m' /\
-  rs' res = Vptr b Int.zero /\
-  forall l,
-    Loc.diff l (R Conventions.loc_alloc_argument) ->
-    Loc.diff l (R Conventions.loc_alloc_result) ->
-    Loc.diff l res -> 
-    rs' l = rs l.
-Proof.
-  intros; unfold add_alloc.
-  generalize (add_reload_correct arg loc_alloc_argument
-                (Balloc (add_spill loc_alloc_result res (Bgoto s)))
-                rs m).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
-  pose (rs2 := Locmap.set (R loc_alloc_result) (Vptr b Int.zero) rs1).
-  generalize (add_spill_correct loc_alloc_result res (Bgoto s) rs2 m').
-  intros [rs3 [EX3 [RES3 OTHER3]]].
-  exists rs3.
-  split. apply exec_Bgoto. eapply exec_trans. eexact EX1.
-  eapply exec_trans. apply exec_one. eapply exec_Balloc; eauto. congruence. 
-  fold rs2. eexact EX3. reflexivity. traceEq. 
-  split. rewrite RES3; unfold rs2. apply Locmap.gss.
-  intros. rewrite OTHER3. unfold rs2. rewrite Locmap.gso.
-  apply OTHER1. apply Loc.diff_sym; auto.
-  apply Loc.diff_sym; auto.
-  apply Loc.diff_sym; auto.
-Qed.
-
-Lemma add_cond_correct:
-  forall cond args ifso ifnot rs m b s,
-  (List.length args <= 3)%nat ->
-  Loc.disjoint args temporaries ->
-  eval_condition cond (List.map rs args) = Some b ->
-  s = (if b then ifso else ifnot) ->
-  exists rs',
-  exec_block ge sp (add_cond cond args ifso ifnot) rs m E0 (Cont s) rs' m /\
-  forall l, Loc.notin l temporaries -> rs' l = rs l.
-Proof.
-  intros. unfold add_cond.
-  set (rargs := regs_for args).
-  generalize (add_reloads_correct args
-               (Bcond cond rargs ifso ifnot)
-               rs m H H0).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
-  fold rargs in EX1.
-  exists rs1.
-  split. destruct b; subst s.
-  eapply exec_Bcond_true. eexact EX1. 
-  unfold rargs; rewrite RES1. assumption. 
-  eapply exec_Bcond_false. eexact EX1.
-  unfold rargs; rewrite RES1. assumption. 
-  exact OTHER1.
-Qed.
-
-Definition find_function2 (los: loc + ident) (ls: locset) : option fundef :=
-  match los with
-  | inl l => Genv.find_funct ge (ls l)
-  | inr symb =>
-      match Genv.find_symbol ge symb with
-      | None => None
-      | Some b => Genv.find_funct_ptr ge b
-      end
-  end.
-
-Lemma add_call_correct:
-  forall f vargs m t vres m' sig los args res s ls
-    (EXECF:
-      forall lsi,
-        List.map lsi (loc_arguments (funsig f)) = vargs ->
-        exists lso,
-            exec_function ge f lsi m t lso m'
-         /\ lso (R (loc_result (funsig f))) = vres)
-    (FIND: find_function2 los ls = Some f)
-    (SIG: sig = funsig f)
-    (VARGS: List.map ls args = vargs)
-    (LARGS: List.length args = List.length sig.(sig_args))
-    (AARGS: locs_acceptable args)
-    (RES: loc_acceptable res),
-  exists ls',
-  exec_block ge sp (add_call sig los args res s) ls m t (Cont s) ls' m' /\
-  ls' res = vres /\
-  forall l,
-    Loc.notin l destroyed_at_call -> loc_acceptable l -> Loc.diff l res ->
-    ls' l = ls l.
-Proof.
-  intros until los. 
-  case los; intro fn; intros; simpl in FIND; rewrite <- SIG in EXECF; unfold add_call.
-  (* indirect call *)
-  assert (LEN: List.length args = List.length (loc_arguments sig)).
-    rewrite LARGS. symmetry. apply loc_arguments_length.
-  pose (DISJ := locs_acceptable_disj_temporaries args AARGS).
-  generalize (add_reload_correct fn IT3
-       (parallel_move args (loc_arguments sig)
-          (Bcall sig (inl ident IT3)
-             (add_spill (loc_result sig) res (Bgoto s))))
-       ls m).
-  intros [ls1 [EX1 [RES1 OTHER1]]].
-  generalize
-    (parallel_move_correct args (loc_arguments sig)
-        (Bcall sig (inl ident IT3)
-             (add_spill (loc_result sig) res (Bgoto s)))
-        ls1 m LEN 
-        (no_overlap_arguments args sig AARGS)
-        (loc_arguments_norepet sig)
-        DISJ 
-        (loc_arguments_not_temporaries sig)).
-  intros [ls2 [EX2 [RES2 [TMP2 OTHER2]]]].
-  assert (PARAMS: List.map ls2 (loc_arguments sig) = vargs).
-    rewrite <- VARGS. rewrite RES2. 
-    apply list_map_exten. intros. symmetry. apply OTHER1.
-    apply Loc.diff_sym. apply DISJ. auto. simpl; tauto.
-  generalize (EXECF ls2 PARAMS).
-  intros [ls3 [EX3 RES3]].
-  pose (ls4 := return_regs ls2 ls3).
-  generalize (add_spill_correct (loc_result sig) res
-                (Bgoto s) ls4 m').
-  intros [ls5 [EX5 [RES5 OTHER5]]].
-  exists ls5.
-  (* Execution *)
-  split. apply exec_Bgoto. 
-  eapply exec_trans. eexact EX1.
-  eapply exec_trans. eexact EX2.
-  eapply exec_trans. apply exec_one. apply exec_Bcall with f.
-  unfold find_function. rewrite TMP2. rewrite RES1. 
-  assumption. assumption. eexact EX3.
-  eexact EX5. reflexivity. reflexivity. traceEq.
-  (* Result *)
-  split. rewrite RES5. unfold ls4. rewrite return_regs_result. 
-  assumption.
-  (* Other regs *)
-  intros. rewrite OTHER5; auto.
-  unfold ls4; rewrite return_regs_not_destroyed; auto. 
-  rewrite OTHER2. apply OTHER1. 
-  apply Loc.diff_sym. apply Loc.in_notin_diff with temporaries.  
-  apply temporaries_not_acceptable; auto. simpl; tauto.
-  apply arguments_not_preserved; auto.
-  apply temporaries_not_acceptable; auto.
-  apply Loc.diff_sym; auto.
-  (* direct call *)
-  assert (LEN: List.length args = List.length (loc_arguments sig)).
-    rewrite LARGS. symmetry. apply loc_arguments_length.
-  pose (DISJ := locs_acceptable_disj_temporaries args AARGS).
-  generalize
-    (parallel_move_correct args (loc_arguments sig)
-        (Bcall sig (inr mreg fn)
-             (add_spill (loc_result sig) res (Bgoto s)))
-        ls m LEN 
-        (no_overlap_arguments args sig AARGS)
-        (loc_arguments_norepet sig)
-        DISJ (loc_arguments_not_temporaries sig)).
-  intros [ls2 [EX2 [RES2 [TMP2 OTHER2]]]].
-  assert (PARAMS: List.map ls2 (loc_arguments sig) = vargs).
-    rewrite <- VARGS. rewrite RES2. auto.
-  generalize (EXECF ls2 PARAMS).
-  intros [ls3 [EX3 RES3]].
-  pose (ls4 := return_regs ls2 ls3).
-  generalize (add_spill_correct (loc_result sig) res
-                (Bgoto s) ls4 m').
-  intros [ls5 [EX5 [RES5 OTHER5]]].
-  exists ls5.
-  (* Execution *)
-  split. apply exec_Bgoto. 
-  eapply exec_trans. eexact EX2.
-  eapply exec_trans. apply exec_one. apply exec_Bcall with f.
-  unfold find_function. assumption. assumption. eexact EX3.
-  eexact EX5. reflexivity. traceEq.
-  (* Result *)
-  split. rewrite RES5. 
-  unfold ls4. rewrite return_regs_result. 
-  assumption.
-  (* Other regs *)
-  intros. rewrite OTHER5; auto.
-  unfold ls4; rewrite return_regs_not_destroyed; auto. 
-  apply OTHER2.
-  apply arguments_not_preserved; auto.
-  apply temporaries_not_acceptable; auto.
-  apply Loc.diff_sym; auto.
-Qed.
-
-Lemma add_undefs_correct:
-  forall res b ls m,
-  (forall l, In l res -> loc_acceptable l) ->
-  (forall ofs ty, In (S (Local ofs ty)) res -> ls (S (Local ofs ty)) = Vundef) ->
-  exists ls',
-  exec_instrs ge sp (add_undefs res b) ls m E0 b ls' m /\
-  (forall l, In l res -> ls' l = Vundef) /\
-  (forall l, Loc.notin l res -> ls' l = ls l).
-Proof.
-  induction res; simpl; intros.
-  exists ls. split. apply exec_refl. tauto.
-  assert (ACC: forall l, In l res -> loc_acceptable l).
-    intros. apply H. tauto.
-  destruct a.
-  (* a is a register *)
-  pose (ls1 := Locmap.set (R m0) Vundef ls).
-  assert (UNDEFS: forall ofs ty, In (S (Local ofs ty)) res -> ls1 (S (Local ofs ty)) = Vundef).
-    intros. unfold ls1; rewrite Locmap.gso. auto. red; auto.
-  generalize (IHres b (Locmap.set (R m0) Vundef ls) m ACC UNDEFS).
-  intros [ls2 [EX2 [RES2 OTHER2]]].
-  exists ls2. split.
-  eapply exec_trans. apply exec_one. apply exec_Bop. 
-  simpl; reflexivity. eexact EX2. traceEq.
-  split. intros. case (In_dec Loc.eq l res); intro.
-  apply RES2; auto. 
-  rewrite OTHER2. elim H1; intro.
-  subst l. apply Locmap.gss.
-  contradiction.
-  apply loc_acceptable_notin_notin; auto.  
-  intros. rewrite OTHER2. apply Locmap.gso. 
-  apply Loc.diff_sym; tauto. tauto.
-  (* a is a stack location *)
-  assert (UNDEFS: forall ofs ty, In (S (Local ofs ty)) res -> ls (S (Local ofs ty)) = Vundef).
-    intros. apply H0. tauto.
-  generalize (IHres b ls m ACC UNDEFS).
-  intros [ls2 [EX2 [RES2 OTHER2]]].
-  exists ls2. split. assumption.
-  split. intros. case (In_dec Loc.eq l res); intro.
   auto.
-  rewrite OTHER2. elim H1; intro. 
-  subst l. generalize (H (S s) (in_eq _ _)). 
-  unfold loc_acceptable; destruct s; intuition auto.
-  contradiction.
-  apply loc_acceptable_notin_notin; auto. 
-  intros. apply OTHER2. tauto.
-Qed.
-
-Lemma add_entry_correct:
-  forall sig params undefs s ls m,
-  List.length params = List.length sig.(sig_args) ->
-  Loc.norepet params ->
-  locs_acceptable params ->
-  Loc.disjoint params undefs ->
-  locs_acceptable undefs ->
-  (forall ofs ty, ls (S (Local ofs ty)) = Vundef) ->
-  exists ls',
-  exec_block ge sp (add_entry sig params undefs s) ls m E0 (Cont s) ls' m /\
-  List.map ls' params = List.map ls (loc_parameters sig) /\
-  (forall l, In l undefs -> ls' l = Vundef).
-Proof.
-  intros. 
-  assert (List.length (loc_parameters sig) = List.length params).
-    unfold loc_parameters. rewrite list_length_map. 
-    rewrite loc_arguments_length. auto.
-  assert (DISJ: Loc.disjoint params temporaries).
-    apply locs_acceptable_disj_temporaries; auto.
-  generalize (parallel_move_correct _ _ (add_undefs undefs (Bgoto s))
-                ls m H5
-                (no_overlap_parameters _ _ H1)
-                H0 (loc_parameters_not_temporaries sig) DISJ).
-  intros [ls1 [EX1 [RES1 [TMP1 OTHER1]]]].
-  assert (forall ofs ty, In (S (Local ofs ty)) undefs -> ls1 (S (Local ofs ty)) = Vundef).
-    intros. rewrite OTHER1. auto. apply Loc.disjoint_notin with undefs.
-    apply Loc.disjoint_sym. auto. auto.
-    simpl; tauto.
-  generalize (add_undefs_correct undefs (Bgoto s) ls1 m H3 H6).
-  intros [ls2 [EX2 [RES2 OTHER2]]].
-  exists ls2.
-  split. apply exec_Bgoto. unfold add_entry. 
-  eapply exec_trans. eexact EX1. eexact EX2. traceEq.
-  split. rewrite <- RES1. apply list_map_exten. 
-  intros. symmetry. apply OTHER2. eapply Loc.disjoint_notin; eauto.
-  exact RES2.
-Qed.
-
-Lemma add_return_correct:
-  forall sig optarg ls m,
-  exists ls',
-  exec_block ge sp (add_return sig optarg) ls m E0 Return ls' m /\
-  match optarg with
-  | Some arg => ls' (R (loc_result sig)) = ls arg
-  | None => ls' (R (loc_result sig)) = Vundef
-  end.
-Proof.
-  intros. unfold add_return.
-  destruct optarg.
-  generalize (add_reload_correct l (loc_result sig) Breturn ls m).
-  intros [ls1 [EX1 [RES1 OTH1]]].
-  exists ls1.
-    split. apply exec_Breturn. assumption. assumption.
-  exists (Locmap.set (R (loc_result sig)) Vundef ls).
-    split. apply exec_Breturn. apply exec_one. 
-    apply exec_Bop. reflexivity. apply Locmap.gss. 
-Qed.
-
-End LTL_CONSTRUCTORS.
-
-(** * Exploitation of the typing hypothesis *)
-
-(** Register allocation is applied to RTL code that passed type inference
-  (see file [RTLtyping]), and therefore is well-typed in the type system
-  of [RTLtyping].  We exploit this hypothesis to obtain information on
-  the number of arguments to operations, addressing modes and conditions. *)
-
-Remark length_type_of_condition:
-  forall (c: condition), (List.length (type_of_condition c) <= 3)%nat.
-Proof.
-  destruct c; unfold type_of_condition; simpl; omega.
-Qed.
-
-Remark length_type_of_operation:
-  forall (op: operation), (List.length (fst (type_of_operation op)) <= 3)%nat.
-Proof.
-  destruct op; unfold type_of_operation; simpl; try omega.
-  apply length_type_of_condition.
-Qed.
-
-Remark length_type_of_addressing:
-  forall (addr: addressing), (List.length (type_of_addressing addr) <= 2)%nat.
-Proof.
-  destruct addr; unfold type_of_addressing; simpl; omega.
 Qed.
 
-Lemma length_op_args:
-  forall (env: regenv) (op: operation) (args: list reg) (res: reg),
-  (List.map env args, env res) = type_of_operation op ->
-  (List.length args <= 3)%nat.
-Proof.
-  intros. rewrite <- (list_length_map env). 
-  generalize (length_type_of_operation op).
-  rewrite <- H. simpl. auto.
-Qed.
-
-Lemma length_addr_args:
-  forall (env: regenv) (addr: addressing) (args: list reg),
-  List.map env args = type_of_addressing addr ->
-  (List.length args <= 2)%nat.
-Proof.
-  intros. rewrite <- (list_length_map env). 
-  rewrite H. apply length_type_of_addressing.
-Qed.
-
-Lemma length_cond_args:
-  forall (env: regenv) (cond: condition) (args: list reg),
-  List.map env args = type_of_condition cond ->
-  (List.length args <= 3)%nat.
-Proof.
-  intros. rewrite <- (list_length_map env). 
-  rewrite H. apply length_type_of_condition.
-Qed.
+End AGREE.
 
 (** * Preservation of semantics *)
 
@@ -1176,7 +477,7 @@ Section PRESERVATION.
 
 Variable prog: RTL.program.
 Variable tprog: LTL.program.
-Hypothesis TRANSF: transf_program prog = Some tprog.
+Hypothesis TRANSF: transf_program prog = OK tprog.
 
 Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
@@ -1193,740 +494,460 @@ Lemma functions_translated:
   forall (v: val) (f: RTL.fundef),
   Genv.find_funct ge v = Some f ->
   exists tf,
-  Genv.find_funct tge v = Some tf /\ transf_fundef f = Some tf.
-Proof.  
-  intros. 
-  generalize 
-   (Genv.find_funct_transf_partial transf_fundef TRANSF H).
-  case (transf_fundef f).
-  intros tf [A B]. exists tf. tauto.
-  intros [A B]. elim B. reflexivity.
-Qed.
+  Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_transf_partial transf_fundef TRANSF).
 
 Lemma function_ptr_translated:
-  forall (b: Values.block) (f: RTL.fundef),
+  forall (b: block) (f: RTL.fundef),
   Genv.find_funct_ptr ge b = Some f ->
   exists tf,
-  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = Some tf.
-Proof.  
-  intros. 
-  generalize 
-   (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF H).
-  case (transf_fundef f).
-  intros tf [A B]. exists tf. tauto.
-  intros [A B]. elim B. reflexivity.
-Qed.
+  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF).
 
 Lemma sig_function_translated:
   forall f tf,
-  transf_fundef f = Some tf ->
+  transf_fundef f = OK tf ->
   LTL.funsig tf = RTL.funsig f.
 Proof.
-  intros f tf. destruct f; simpl. 
+  intros f tf. destruct f; simpl.
   unfold transf_function.
   destruct (type_function f).
   destruct (analyze f).
-  destruct (regalloc f t). 
-  intro EQ; injection EQ; intro EQ1; rewrite <- EQ1; simpl; auto.
-  congruence. congruence. congruence.
-  intro EQ; inversion EQ; subst tf. reflexivity.
-Qed.
-
-Lemma entrypoint_function_translated:
-  forall f tf,
-  transf_function f = Some tf ->
-  tf.(LTL.fn_entrypoint) = f.(RTL.fn_nextpc).
-Proof.
-  intros f tf. unfold transf_function.
-  destruct (type_function f).
-  destruct (analyze f).
-  destruct (regalloc f t). 
-  intro EQ; injection EQ; intro EQ1; rewrite <- EQ1; simpl; auto.
-  intros; discriminate.
-  intros; discriminate.
-  intros; discriminate.
+  destruct (regalloc f t).
+  intro. monadInv H. inv EQ. auto.  
+  simpl; congruence. simpl; congruence. simpl; congruence.
+  intro EQ; inv EQ. auto.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument
   based on diagrams of the following form:
 <<
-        pc, rs, m ------------------- pc, ls, m
-            |                             |
-            |                             |
-            v                             v
-        pc', rs', m' ---------------- Cont pc', ls', m'
+           st1 --------------- st2
+            |                   |
+           t|                   |t
+            |                   |
+            v                   v
+           st1'--------------- st2'
 >>
   Hypotheses: the left vertical arrow represents a transition in the
-  original RTL code.  The top horizontal bar expresses agreement between
-  [rs] and [ls] over the pseudo-registers live before the RTL instruction
-  at [pc].  
+  original RTL code.  The top horizontal bar is the [match_states]
+  relation defined below.  It implies agreement between
+  the RTL register map [rs] and the LTL location map [ls]
+  over the pseudo-registers live before the RTL instruction at [pc].  
 
-  Conclusions: the right vertical arrow is an [exec_blocks] transition
+  Conclusions: the right vertical arrow is an [exec_instrs] transition
   in the LTL code generated by translation of the current function.
-  The bottom horizontal bar expresses agreement between [rs'] and [ls']
-  over the pseudo-registers live after the RTL instruction at [pc]
-  (which implies agreement over the pseudo-registers live before
-  the instruction at [pc']).
-
-  We capture these diagrams in the following propositions parameterized
-  by the transition in the original RTL code (the left arrow).
+  The bottom horizontal bar is the [match_states] relation.
 *)
 
-Definition exec_instr_prop
-        (c: RTL.code) (sp: val)
-        (pc: node) (rs: regset) (m: mem) (t: trace)
-        (pc': node) (rs': regset) (m': mem) : Prop :=
-  forall f env live assign ls
-         (CF: c = f.(RTL.fn_code))
-         (WT: wt_function f env)
-         (ASG: regalloc f live (live0 f live) env = Some assign)
-         (AG: agree assign (transfer f pc live!!pc) rs ls),
-  let tc := PTree.map (transf_instr f live assign) c in
-  exists ls',
-    exec_blocks tge tc sp pc ls m t (Cont pc') ls' m' /\
-    agree assign live!!pc rs' ls'.
-
-Definition exec_instrs_prop
-        (c: RTL.code) (sp: val)
-        (pc: node) (rs: regset) (m: mem) (t: trace)
-        (pc': node) (rs': regset) (m': mem) : Prop :=
-  forall f env live assign ls,
-  forall (CF: c = f.(RTL.fn_code))
-         (WT: wt_function f env)
-         (ANL: analyze f = Some live)
-         (ASG: regalloc f live (live0 f live) env = Some assign)
-         (AG: agree assign (transfer f pc live!!pc) rs ls)
-         (VALIDPC': c!pc' <> None),
-  let tc := PTree.map (transf_instr f live assign) c in
-  exists ls',
-    exec_blocks tge tc sp pc ls m t (Cont pc') ls' m' /\
-    agree assign (transfer f pc' live!!pc') rs' ls'.
-
-Definition exec_function_prop
-        (f: RTL.fundef) (args: list val) (m: mem)
-        (t: trace) (res: val) (m': mem) : Prop :=
-  forall ls tf,
-  transf_fundef f = Some tf ->
-  List.map ls (Conventions.loc_arguments (funsig tf)) = args ->
-  exists ls',
-    LTL.exec_function tge tf ls m t ls' m' /\
-    ls' (R (Conventions.loc_result (funsig tf))) = res.
-
-(** The simulation proof is by structural induction over the RTL evaluation
-  derivation.  We prove each case of the proof as a separate lemma.
-  There is one lemma for each RTL evaluation rule.  Each lemma concludes
-  one of the [exec_*_prop] predicates, and takes the induction hypotheses
-  (if any) as hypotheses also expressed with the [exec_*_prop] predicates.
-*)
+Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signature -> Prop :=
+  | match_stackframes_nil: forall ty_args,
+      match_stackframes nil nil (mksignature ty_args (Some Tint))
+  | match_stackframes_cons:
+      forall s ts sig res f sp pc rs ls env live assign,
+      match_stackframes s ts (RTL.fn_sig f) ->
+      wt_function f env ->
+      analyze f = Some live ->
+      regalloc f live (live0 f live) env = Some assign ->
+      (forall rv ls1,
+        (forall l, Loc.notin l destroyed_at_call -> loc_acceptable l -> ls1 l = ls l) ->
+        Val.lessdef rv (ls1 (R (loc_result sig))) ->
+        agree assign (transfer f pc live!!pc)
+              (rs#res <- rv)
+              (Locmap.set (assign res) (ls1 (R (loc_result sig))) ls1)) ->
+      match_stackframes
+        (RTL.Stackframe res (RTL.fn_code f) sp pc rs :: s)
+        (LTL.Stackframe (assign res) (transf_fun f live assign) sp ls pc :: ts)
+        sig.
+
+Inductive match_states: RTL.state -> LTL.state -> Prop :=
+  | match_states_intro:
+      forall s f sp pc rs m ts ls tm live assign env
+      (STACKS: match_stackframes s ts (RTL.fn_sig f))
+      (WT: wt_function f env)
+      (ANL: analyze f = Some live)
+      (ASG: regalloc f live (live0 f live) env = Some assign)
+      (AG: agree assign (transfer f pc live!!pc) rs ls)
+      (MMD: Mem.lessdef m tm),
+      match_states (RTL.State s (RTL.fn_code f) sp pc rs m)
+                   (LTL.State ts (transf_fun f live assign) sp pc ls tm)
+  | match_states_call:
+      forall s f args m ts tf ls tm,
+      match_stackframes s ts (RTL.funsig f) ->
+      transf_fundef f = OK tf ->
+      Val.lessdef_list args (List.map ls (loc_arguments (funsig tf))) ->
+      Mem.lessdef m tm ->
+      (forall l, Loc.notin l destroyed_at_call -> loc_acceptable l -> ls l = parent_locset ts l) ->
+      match_states (RTL.Callstate s f args m)
+                   (LTL.Callstate ts tf ls tm)
+  | match_states_return:
+      forall s v m ts sig ls tm,
+      match_stackframes s ts sig ->
+      Val.lessdef v (ls (R (loc_result sig))) ->
+      Mem.lessdef m tm ->
+      (forall l, Loc.notin l destroyed_at_call -> loc_acceptable l -> ls l = parent_locset ts l) ->
+      match_states (RTL.Returnstate s v m)
+                   (LTL.Returnstate ts sig ls tm).
+
+Remark match_stackframes_change:
+  forall s ts sig,
+  match_stackframes s ts sig ->
+  forall sig',
+  sig_res sig' = sig_res sig ->
+  match_stackframes s ts sig'.
+Proof.
+  induction 1; intros.
+  destruct sig'. simpl in H; inv H. constructor.
+  assert (loc_result sig' = loc_result sig).
+  unfold loc_result. rewrite H4; auto.
+  econstructor; eauto.
+  rewrite H5. auto. 
+Qed.
+
+(** The simulation proof is by case analysis over the RTL transition
+  taken in the source program. *)
 
 Ltac CleanupHyps :=
   match goal with
+  | H: (match_states _ _) |- _ =>
+      inv H; CleanupHyps
   | H1: (PTree.get _ _ = Some _),
-    H2: (_ = RTL.fn_code _),
-    H3: (agree _ (transfer _ _ _) _ _) |- _ =>
-      unfold transfer in H3; rewrite <- H2 in H3; rewrite H1 in H3;
-      simpl in H3;
-      CleanupHyps
+    H2: (agree _ (transfer _ _ _) _ _) |- _ =>
+      unfold transfer in H2; rewrite H1 in H2; simpl in H2; CleanupHyps
+  | _ => idtac
+  end.
+
+Ltac WellTypedHyp :=
+  match goal with
   | H1: (PTree.get _ _ = Some _),
-    H2: (_ = RTL.fn_code _),
-    H3: (wt_function _ _) |- _ =>
-      let H := fresh in
+    H2: (wt_function _ _) |- _ =>
       let R := fresh "WTI" in (
-      generalize (wt_instrs _ _ H3); intro H;
-      rewrite <- H2 in H; generalize (H _ _ H1);
-      intro R; clear H; clear H3);
-      CleanupHyps
+      generalize (wt_instrs _ _ H2 _ _ H1); intro R)
   | _ => idtac
   end.
 
-Ltac CleanupGoal :=
+Ltac TranslInstr :=
   match goal with
-  | H1: (PTree.get _ _ = Some _) |- _ =>
-      eapply exec_blocks_one;
-      [rewrite PTree.gmap; rewrite H1;
-       unfold option_map; unfold transf_instr; reflexivity
-      |idtac]
+  | H: (PTree.get _ _ = Some _) |- _ =>
+      simpl; rewrite PTree.gmap; rewrite H; simpl; auto
   end.
 
-Lemma transl_Inop_correct:
- forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : regset) (m : mem) (pc' : RTL.node),
-  c ! pc = Some (Inop pc') ->
-  exec_instr_prop c sp pc rs m E0 pc' rs m.
+Ltac MatchStates :=
+  match goal with
+  | |- match_states (RTL.State _ _ _ _ _ _) (LTL.State _ _ _ _ _ _) =>
+      eapply match_states_intro; eauto; MatchStates
+  | H: (PTree.get ?pc _ = Some _) |- agree _ _ _ _ =>
+      eapply agree_succ with (n := pc); eauto; MatchStates
+  | H: (PTree.get _ _ = Some _) |- In _ (RTL.successors _ _) =>
+      unfold RTL.successors; rewrite H; auto with coqlib
+  | _ => idtac
+  end.
+
+
+Lemma transl_find_function:
+  forall ros f args lv rs ls alloc,
+  RTL.find_function ge ros rs = Some f ->
+  agree alloc (reg_list_live args (reg_sum_live ros lv)) rs ls ->
+  exists tf,
+    LTL.find_function tge (sum_left_map alloc ros) ls = Some tf /\
+    transf_fundef f = OK tf.
 Proof.
-  intros; red; intros; CleanupHyps.
-  exists ls. split.
-  CleanupGoal. apply exec_Bgoto. apply exec_refl.
-  assumption.
+  intros; destruct ros; simpl in *.
+  assert (Val.lessdef (rs#r) (ls (alloc r))).
+    eapply agree_eval_reg. eapply agree_reg_list_live; eauto.
+  inv H1. apply functions_translated. auto.
+  exploit Genv.find_funct_inv; eauto. intros [b EQ]. congruence.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
+  apply function_ptr_translated. auto. discriminate.
 Qed.
 
-Lemma transl_Iop_correct:
-  forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : Regmap.t val) (m : mem) (op : operation) (args : list reg)
-    (res : reg) (pc' : RTL.node) (v: val),
-  c ! pc = Some (Iop op args res pc') ->
-  eval_operation ge sp op (rs ## args) = Some v ->
-  exec_instr_prop c sp pc rs m E0 pc' (rs # res <- v) m.
+Theorem transl_step_correct:
+  forall s1 t s2, RTL.step ge s1 t s2 ->
+  forall s1', match_states s1 s1' ->
+  exists s2', LTL.step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
-  intros; red; intros; CleanupHyps.
+  induction 1; intros; CleanupHyps; WellTypedHyp.
+
+  (* Inop *)
+  econstructor; split.
+  eapply exec_Lnop. TranslInstr. MatchStates.
+
+  (* Iop *)
+  generalize (PTree.gmap (transf_instr f live assign) pc (RTL.fn_code f)).
+  rewrite H. simpl. 
   caseEq (Regset.mem res live!!pc); intro LV;
   rewrite LV in AG.
   generalize (Regset.mem_2 _ _ LV). intro LV'.
-  assert (LL: (List.length (List.map assign args) <= 3)%nat).
-    rewrite list_length_map. 
-    inversion WTI. simpl; omega. simpl; omega.
-    eapply length_op_args. eauto.
-  assert (DISJ: Loc.disjoint (List.map assign args) temporaries).
-    eapply regalloc_disj_temporaries; eauto.
-  assert (eval_operation tge sp op (map ls (map assign args)) = Some v).
-    replace (map ls (map assign args)) with rs##args. 
-    rewrite (eval_operation_preserved symbols_preserved). assumption.
-    symmetry. eapply agree_eval_regs; eauto.
-  generalize (add_op_correct tge sp op 
-               (List.map assign args) (assign res)
-               pc' ls m v LL DISJ H1).
-  intros [ls' [EX [RES OTHER]]].
-  exists ls'. split. 
-  CleanupGoal. rewrite LV. exact EX. 
-  rewrite CF in H. 
   generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr. 
+  unfold correct_alloc_instr, is_redundant_move.
   caseEq (is_move_operation op args).
   (* Special case for moves *)
   intros arg IMO CORR.
   generalize (is_move_operation_correct _ _ IMO).
   intros [EQ1 EQ2]. subst op; subst args. 
-  injection H0; intro. rewrite <- H2. 
-  apply agree_move_live with f env live ls; auto. 
-  rewrite RES. rewrite <- H2. symmetry. eapply agree_eval_reg.
-  simpl in AG. eexact AG.
+  injection H0; intro.
+  destruct (Loc.eq (assign arg) (assign res)); intro CODE.
+  (* sub-case: redundant move *)
+  econstructor; split. eapply exec_Lnop; eauto.
+  MatchStates. 
+  rewrite <- H1. eapply agree_redundant_move_live; eauto.
+  (* sub-case: non-redundant move *)
+  econstructor; split. eapply exec_Lop; eauto. simpl. eauto.
+  MatchStates.
+  rewrite <- H1. eapply agree_move_live; eauto.
   (* Not a move *)
-  intros INMO CORR.
-  apply agree_assign_live with f env live ls; auto.
+  intros INMO CORR CODE.
+  assert (exists v1,
+          eval_operation tge sp op (map ls (map assign args)) tm = Some v1
+          /\ Val.lessdef v v1).
+    apply eval_operation_lessdef with m (rs##args); auto.
+    eapply agree_eval_regs; eauto.
+    rewrite (eval_operation_preserved symbols_preserved). assumption.
+  destruct H1 as [v1 [EV VMD]].
+  econstructor; split. eapply exec_Lop; eauto. 
+  MatchStates. 
+  apply agree_assign_live with f env live; auto.
   eapply agree_reg_list_live; eauto.
   (* Result is not live, instruction turned into a nop *)
-  exists ls. split.
-  CleanupGoal. rewrite LV. 
-  apply exec_Bgoto; apply exec_refl.
-  apply agree_assign_dead; auto. 
+  intro CODE. econstructor; split. eapply exec_Lnop; eauto.
+  MatchStates. apply agree_assign_dead; auto. 
   red; intro. exploit Regset.mem_1; eauto. congruence.
-Qed.
 
-Lemma transl_Iload_correct:
- forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : Regmap.t val) (m : mem) (chunk : memory_chunk)
-    (addr : addressing) (args : list reg) (dst : reg) (pc' : RTL.node)
-    (a v : val),
-  c ! pc = Some (Iload chunk addr args dst pc') ->
-  eval_addressing ge sp addr rs ## args = Some a ->
-  loadv chunk m a = Some v ->
-  exec_instr_prop c sp pc rs m E0 pc' rs # dst <- v m.
-Proof.
-  intros; red; intros; CleanupHyps.
+  (* Iload *)
   caseEq (Regset.mem dst live!!pc); intro LV;
   rewrite LV in AG.
   (* dst is live *)
   exploit Regset.mem_2; eauto. intro LV'.
-  assert (LL: (List.length (List.map assign args) <= 2)%nat).
-    rewrite list_length_map. 
-    inversion WTI. 
-    eapply length_addr_args. eauto.
-  assert (DISJ: Loc.disjoint (List.map assign args) temporaries).
-    eapply regalloc_disj_temporaries; eauto.
-  assert (EADDR:
-      eval_addressing tge sp addr (map ls (map assign args)) = Some a).
-    rewrite <- H0. 
-    replace (rs##args) with (map ls (map assign args)).
-    apply eval_addressing_preserved. exact symbols_preserved.
+  assert (exists a',
+      eval_addressing tge sp addr (map ls (map assign args)) = Some a'
+      /\ Val.lessdef a a').
+    apply eval_addressing_lessdef with (rs##args). 
     eapply agree_eval_regs; eauto.
-  generalize (add_load_correct tge sp chunk addr
-               (List.map assign args) (assign dst)
-               pc' ls m _ _ LL DISJ EADDR H1).
-  intros [ls' [EX [RES OTHER]]].
-  exists ls'. split. CleanupGoal. rewrite LV. exact EX. 
-  rewrite CF in H. 
+    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  destruct H2 as [a' [EVAL VMD]].
+  exploit Mem.loadv_lessdef; eauto. 
+  intros [v' [LOADV VMD2]].
+  econstructor; split.
+  eapply exec_Lload; eauto. TranslInstr. rewrite LV; auto.
   generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
   unfold correct_alloc_instr. intro CORR.
+  MatchStates. 
   eapply agree_assign_live; eauto.
   eapply agree_reg_list_live; eauto.
   (* dst is dead *)
-  exists ls. split.
-  CleanupGoal. rewrite LV. 
-  apply exec_Bgoto; apply exec_refl.
-  apply agree_assign_dead; auto.
+  econstructor; split.
+  eapply exec_Lnop. TranslInstr. rewrite LV; auto.
+  MatchStates. apply agree_assign_dead; auto.
   red; intro; exploit Regset.mem_1; eauto. congruence.
-Qed.
 
-Lemma transl_Istore_correct:
- forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : Regmap.t val) (m : mem) (chunk : memory_chunk)
-    (addr : addressing) (args : list reg) (src : reg) (pc' : RTL.node)
-    (a : val) (m' : mem),
-  c ! pc = Some (Istore chunk addr args src pc') ->
-  eval_addressing ge sp addr rs ## args = Some a ->
-  storev chunk m a rs # src = Some m' ->
-  exec_instr_prop c sp pc rs m E0 pc' rs m'.
-Proof.
-  intros; red; intros; CleanupHyps.
-  assert (LL: (List.length (List.map assign args) <= 2)%nat).
-    rewrite list_length_map. 
-    inversion WTI. 
-    eapply length_addr_args. eauto.
-  assert (DISJ: Loc.disjoint (List.map assign args) temporaries).
-    eapply regalloc_disj_temporaries; eauto.
-  assert (SRC: Loc.notin (assign src) temporaries).
-    eapply regalloc_not_temporary; eauto.
-  assert (EADDR:
-      eval_addressing tge sp addr (map ls (map assign args)) = Some a).
-    rewrite <- H0.
-    replace (rs##args) with (map ls (map assign args)).
-    apply eval_addressing_preserved. exact symbols_preserved.
+  (* Istore *)
+  assert (exists a',
+      eval_addressing tge sp addr (map ls (map assign args)) = Some a'
+      /\ Val.lessdef a a').
+    apply eval_addressing_lessdef with (rs##args). 
     eapply agree_eval_regs; eauto.
-  assert (ESRC: ls (assign src) = rs#src).
+    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  destruct H2 as [a' [EVAL VMD]].
+  assert (ESRC: Val.lessdef rs#src (ls (assign src))).
     eapply agree_eval_reg. eapply agree_reg_list_live. eauto.
-  rewrite <- ESRC in H1.
-  generalize (add_store_correct tge sp chunk addr
-               (List.map assign args) (assign src)
-               pc' ls m m' a LL DISJ SRC EADDR H1).
-  intros [ls' [EX RES]].
-  exists ls'. split. CleanupGoal. exact EX. 
-  rewrite CF in H. 
-  generalize (regalloc_correct_1 f env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr. intro CORR.
-  eapply agree_exten. eauto. 
-  eapply agree_reg_live. eapply agree_reg_list_live. eauto.
-  assumption.
-Qed.  
-
-Lemma transl_Icall_correct:
- forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : regset) (m : mem) (sig : signature) (ros : reg + ident)
-    (args : list reg) (res : reg) (pc' : RTL.node)
-    (f : RTL.fundef) (vres : val) (m' : mem) (t: trace),
-  c ! pc = Some (Icall sig ros args res pc') ->
-  RTL.find_function ge ros rs = Some f ->
-  RTL.funsig f = sig ->
-  RTL.exec_function ge f (rs##args) m t vres m' ->
-  exec_function_prop f (rs##args) m t vres m' ->
-  exec_instr_prop c sp pc rs m t pc' (rs#res <- vres) m'.
-Proof.
-  intros; red; intros; CleanupHyps.
-  set (los := sum_left_map assign ros).
-  assert (FIND: exists tf,
-            find_function2 tge los ls = Some tf /\
-            transf_fundef f = Some tf).
-    unfold los. destruct ros; simpl; simpl in H0.
-    apply functions_translated. 
-    replace (ls (assign r)) with rs#r. assumption.
-    simpl in AG. symmetry; eapply agree_eval_reg.
-    eapply agree_reg_list_live; eauto.
-    rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
-    apply function_ptr_translated. auto.
-    discriminate.
-  elim FIND; intros tf [AFIND TRF]; clear FIND.
-  assert (ASIG: sig = funsig tf).
-    rewrite (sig_function_translated _ _ TRF). auto.
-  generalize (fun ls => H3 ls tf TRF); intro AEXECF.
-  assert (AVARGS: List.map ls (List.map assign args) = rs##args).
-    eapply agree_eval_regs; eauto.
-  assert (ALARGS: List.length (List.map assign args) = 
-                                   List.length sig.(sig_args)).
-    inversion WTI. rewrite <- H10. 
-    repeat rewrite list_length_map. auto.
-  assert (AACCEPT: locs_acceptable (List.map assign args)).
-    eapply regsalloc_acceptable; eauto.
-  rewrite CF in H. 
-  generalize (regalloc_correct_1 f0 env live _ _ _ _ ASG H).
-  unfold correct_alloc_instr. intros [CORR1 CORR2].
-  assert (ARES: loc_acceptable (assign res)).
-    eapply regalloc_acceptable; eauto.
-  generalize (add_call_correct tge sp tf _ _ _ _ _ _ _ _ _ pc' _
-                AEXECF AFIND ASIG AVARGS ALARGS
-                AACCEPT ARES).
-  intros [ls' [EX [RES OTHER]]].
-  exists ls'.
-  split. rewrite CF. CleanupGoal. exact EX.
-  simpl. eapply agree_call; eauto.
-Qed.
-
-Lemma transl_Ialloc_correct:
-  forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : Regmap.t val) (m : mem) (pc': RTL.node) (arg res: reg)
-    (sz: int) (m': mem) (b: Values.block),
-  c ! pc = Some (Ialloc arg res pc') ->
-  rs#arg = Vint sz ->
-  Mem.alloc m 0 (Int.signed sz) = (m', b) ->
-  exec_instr_prop c sp pc rs m E0 pc' (rs # res <- (Vptr b Int.zero)) m'.
-Proof.
-  intros; red; intros; CleanupHyps.
-  assert (SZ: ls (assign arg) = Vint sz).
+  assert (exists tm', storev chunk tm a' (ls (assign src)) = Some tm'
+                     /\ Mem.lessdef m' tm').
+    eapply Mem.storev_lessdef; eauto. 
+  destruct H2 as [m1' [STORE MMD']].
+  econstructor; split.
+  eapply exec_Lstore; eauto. TranslInstr.
+  MatchStates. eapply agree_reg_live. eapply agree_reg_list_live. eauto.
+
+  (* Icall *)
+  exploit transl_find_function; eauto.  intros [tf [TFIND TF]].
+  generalize (regalloc_correct_1 f0 env live _ _ _ _ ASG H).
+  unfold correct_alloc_instr. intros [CORR1 CORR2].
+  exploit (parmov_spec ls (map assign args) 
+                            (loc_arguments (RTL.funsig f))).
+  apply loc_arguments_norepet.
+  rewrite loc_arguments_length. inv WTI.
+  rewrite <- H7. repeat rewrite list_length_map. auto.
+  intros [PM1 PM2].  
+  econstructor; split. 
+  eapply exec_Lcall; eauto. TranslInstr.
+  rewrite (sig_function_translated _ _ TF). eauto.
+  rewrite (sig_function_translated _ _ TF).
+  econstructor; eauto.
+  econstructor; eauto.
+  intros. eapply agree_succ with (n := pc); eauto.
+  unfold RTL.successors; rewrite H; auto with coqlib.
+  eapply agree_call with (ls := ls); eauto.
+  rewrite Locmap.gss. auto.
+  intros. rewrite Locmap.gso. rewrite H1; auto. apply PM2; auto.
+  eapply arguments_not_preserved; eauto. apply Loc.diff_sym; auto.
+  rewrite (sig_function_translated _ _ TF).
+  change Regset.elt with reg in PM1. 
+  rewrite PM1. eapply agree_eval_regs; eauto. 
+
+  (* Itailcall *)
+  exploit transl_find_function; eauto.  intros [tf [TFIND TF]].
+  exploit (parmov_spec ls (map assign args) 
+                            (loc_arguments (RTL.funsig f))).
+  apply loc_arguments_norepet.
+  rewrite loc_arguments_length. inv WTI.
+  rewrite <- H6. repeat rewrite list_length_map. auto.
+  intros [PM1 PM2].  
+  econstructor; split. 
+  eapply exec_Ltailcall; eauto. TranslInstr.
+  rewrite (sig_function_translated _ _ TF). eauto.
+  rewrite (sig_function_translated _ _ TF).
+  econstructor; eauto.
+  apply match_stackframes_change with (RTL.fn_sig f0); auto.
+  inv WTI. auto.
+  rewrite (sig_function_translated _ _ TF).
+  rewrite return_regs_arguments. 
+  change Regset.elt with reg in PM1. 
+  rewrite PM1. eapply agree_eval_regs; eauto.
+  inv WTI; auto.
+  apply free_lessdef; auto.
+  intros. rewrite return_regs_not_destroyed; auto. 
+
+  (* Ialloc *)
+  assert (Val.lessdef (Vint sz) (ls (assign arg))).
     rewrite <- H0. eapply agree_eval_reg. eauto.
-  generalize (add_alloc_correct tge sp (assign arg) (assign res)
-                                pc' ls m m' sz b SZ H1).
-  intros [ls' [EX [RES OTHER]]].
-  exists ls'. 
-  split. CleanupGoal. exact EX. 
-  rewrite CF in H.
+  inversion H2. subst v. 
+  pose (ls1 := Locmap.set (R loc_alloc_argument) (ls (assign arg)) ls).
+  pose (ls2 := Locmap.set (R loc_alloc_result) (Vptr b Int.zero) ls1).
+  pose (ls3 := Locmap.set (assign res) (ls2 (R loc_alloc_result)) ls2).
+  caseEq (alloc tm 0 (Int.signed sz)). intros tm' b1 ALLOC1.
+  exploit Mem.alloc_lessdef; eauto. intros [EQ MMD1]. subst b1.
+  exists (State ts (transf_fun f live assign) sp pc' ls3 tm'); split.
+  unfold ls3, ls2, ls1. eapply exec_Lalloc; eauto. TranslInstr.
   generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). 
   unfold correct_alloc_instr.  intros [CORR1 CORR2].
-  eapply agree_call with (args := arg :: nil) (ros := inr reg 1%positive) (ls := ls) (ls' := ls'); eauto.
-  intros. apply OTHER. 
-  eapply Loc.in_notin_diff; eauto. 
-  unfold loc_alloc_argument, destroyed_at_call; simpl; tauto.
-  eapply Loc.in_notin_diff; eauto. 
+  MatchStates.
+  eapply agree_call with (args := arg :: nil) (ros := inr reg 1%positive) (ls := ls); eauto.
+  unfold ls3; rewrite Locmap.gss. 
+  unfold ls2; rewrite Locmap.gss. auto.
+  intros. unfold ls3; rewrite Locmap.gso.
+  unfold ls2; rewrite Locmap.gso. 
+  unfold ls1; apply Locmap.gso.
+  apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto. 
   unfold loc_alloc_argument, destroyed_at_call; simpl; tauto.
-  auto.
-Qed.
-
-Lemma transl_Icond_true_correct:
- forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : Regmap.t val) (m : mem) (cond : condition) (args : list reg)
-    (ifso ifnot : RTL.node),
-  c ! pc = Some (Icond cond args ifso ifnot) ->
-  eval_condition cond rs ## args = Some true ->
-  exec_instr_prop c sp pc rs m E0 ifso rs m.
-Proof.
-  intros; red; intros; CleanupHyps.
-  assert (LL: (List.length (map assign args) <= 3)%nat).
-    rewrite list_length_map. inversion WTI. 
-    eapply length_cond_args. eauto.
-  assert (DISJ: Loc.disjoint (map assign args) temporaries).
-    eapply regalloc_disj_temporaries; eauto.
-  assert (COND: eval_condition cond (map ls (map assign args)) = Some true).
-    replace (map ls (map assign args)) with rs##args. assumption.
-    symmetry. eapply agree_eval_regs; eauto.
-  generalize (add_cond_correct tge sp _ _ _ ifnot _ m _ _
-               LL DISJ COND (refl_equal ifso)).
-  intros [ls' [EX OTHER]].
-  exists ls'. split.
-  CleanupGoal. assumption.
-  eapply agree_exten. eauto. eapply agree_reg_list_live. eauto. 
-  assumption.
-Qed.
-
-Lemma transl_Icond_false_correct:
- forall (c : PTree.t instruction) (sp: val) (pc : positive)
-    (rs : Regmap.t val) (m : mem) (cond : condition) (args : list reg)
-    (ifso ifnot : RTL.node),
-  c ! pc = Some (Icond cond args ifso ifnot) ->
-  eval_condition cond rs ## args = Some false ->
-  exec_instr_prop c sp pc rs m E0 ifnot rs m.
-Proof.
-  intros; red; intros; CleanupHyps.
-  assert (LL: (List.length (map assign args) <= 3)%nat).
-    rewrite list_length_map. inversion WTI. 
-    eapply length_cond_args. eauto.
-  assert (DISJ: Loc.disjoint (map assign args) temporaries).
-    eapply regalloc_disj_temporaries; eauto.
-  assert (COND: eval_condition cond (map ls (map assign args)) = Some false).
-    replace (map ls (map assign args)) with rs##args. assumption.
-    symmetry. eapply agree_eval_regs; eauto.
-  generalize (add_cond_correct tge sp _ _ ifso _ _ m _ _
-               LL DISJ COND (refl_equal ifnot)).
-  intros [ls' [EX OTHER]].
-  exists ls'. split.
-  CleanupGoal. assumption.
-  eapply agree_exten. eauto. eapply agree_reg_list_live. eauto. 
-  assumption.
-Qed.
-
-Lemma transl_refl_correct:
- forall (c : RTL.code) (sp: val) (pc : RTL.node) (rs : regset)
-    (m : mem), exec_instrs_prop c sp pc rs m E0 pc rs m.
-Proof.
-  intros; red; intros. 
-  exists ls. split. apply exec_blocks_refl. assumption.
-Qed.
-
-Lemma transl_one_correct:
- forall (c : RTL.code) (sp: val) (pc : RTL.node) (rs : regset)
-    (m : mem) (t: trace) (pc' : RTL.node) (rs' : regset) (m' : mem),
-  RTL.exec_instr ge c sp pc rs m t pc' rs' m' ->
-  exec_instr_prop c sp pc rs m t pc' rs' m' ->
-  exec_instrs_prop c sp pc rs m t pc' rs' m'.
-Proof.
-  intros; red; intros.
-  generalize (H0 f env live assign ls CF WT ASG AG).
-  intros [ls' [EX AG']].
-  exists ls'. split.
-  exact EX.
-  apply agree_increasing with live!!pc.
-  apply analyze_correct. auto. 
-  rewrite <- CF. eapply exec_instr_present; eauto.
-  rewrite <- CF. auto.
-  eapply RTL.successors_correct.
-  rewrite <- CF. eexact H. exact AG'.
-Qed.
-
-Lemma transl_trans_correct:
- forall (c : RTL.code) (sp: val) (pc1 : RTL.node) (rs1 : regset)
-    (m1 : mem) (t1: trace) (pc2 : RTL.node) (rs2 : regset) (m2 : mem)
-    (t2: trace) (pc3 : RTL.node) (rs3 : regset) (m3 : mem) (t3: trace),
-  RTL.exec_instrs ge c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
-  exec_instrs_prop c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
-  RTL.exec_instrs ge c sp pc2 rs2 m2 t2 pc3 rs3 m3 ->
-  exec_instrs_prop c sp pc2 rs2 m2 t2 pc3 rs3 m3 ->
-  t3 = t1 ** t2 ->
-  exec_instrs_prop c sp pc1 rs1 m1 t3 pc3 rs3 m3.
-Proof.
-  intros; red; intros.
-  assert (VALIDPC2: c!pc2 <> None).
-    eapply exec_instrs_present; eauto.
-  generalize (H0 f env live assign ls CF WT ANL ASG AG VALIDPC2).
-  intros [ls1 [EX1 AG1]]. 
-  generalize (H2 f env live assign ls1 CF WT ANL ASG AG1 VALIDPC'). 
-  intros [ls2 [EX2 AG2]].
-  exists ls2. split.
-  eapply exec_blocks_trans. eexact EX1. eexact EX2. auto.
-  exact AG2.
-Qed.
-
-Remark regset_mem_reg_list_dead:
-  forall rl r live,
-  Regset.In r (reg_list_dead rl live) ->
-  ~(In r rl) /\ Regset.In r live.
-Proof.
-  induction rl; simpl; intros.
-  tauto.
-  elim (IHrl r (reg_dead a live) H). intros.
-  assert (a <> r). red; intro; subst r.
-  exploit Regset.remove_1; eauto. 
-  intuition. eapply Regset.remove_3; eauto.
-Qed. 
-
-Lemma transf_entrypoint_correct:
-  forall f env live assign c ls args sp m,
-  wt_function f env ->
-  regalloc f live (live0 f live) env = Some assign ->
-  c!(RTL.fn_nextpc f) = None ->
-  List.map ls (loc_parameters (RTL.fn_sig f)) = args ->
-  (forall ofs ty, ls (S (Local ofs ty)) = Vundef) ->
-  let tc := transf_entrypoint f live assign c in
-  exists ls',
-  exec_blocks tge tc sp (RTL.fn_nextpc f) ls m E0
-                        (Cont (RTL.fn_entrypoint f)) ls' m /\
-  agree assign (transfer f (RTL.fn_entrypoint f) live!!(RTL.fn_entrypoint f))
-        (init_regs args (RTL.fn_params f)) ls'.
-Proof.
-  intros until m.
-  unfold transf_entrypoint. 
-  set (oldentry := RTL.fn_entrypoint f).
-  set (newentry := RTL.fn_nextpc f).
-  set (params := RTL.fn_params f).
-  set (undefs := Regset.elements (reg_list_dead params (transfer f oldentry live!!oldentry))).
-  intros.
-
-  assert (A1: List.length (List.map assign params) =
-              List.length (RTL.fn_sig f).(sig_args)).
-    rewrite <- (wt_params _ _ H).  
-    repeat (rewrite list_length_map). auto.
-  assert (A2: Loc.norepet (List.map assign (RTL.fn_params f))).
-    eapply regalloc_norepet_norepet; eauto.
-    eapply regalloc_correct_2; eauto.
-    eapply wt_norepet; eauto.
-  assert (A3: locs_acceptable (List.map assign (RTL.fn_params f))).
-    eapply regsalloc_acceptable; eauto.
-  assert (A4: Loc.disjoint 
-                (List.map assign (RTL.fn_params f))
-                (List.map assign undefs)).
-    red. intros ap au INAP INAU.
-    generalize (list_in_map_inv _ _ _ INAP).
-    intros [p [AP INP]]. clear INAP; subst ap.
-    generalize (list_in_map_inv _ _ _ INAU).
-    intros [u [AU INU]]. clear INAU; subst au.
-    assert (INU': InA Regset.E.eq u undefs).
-      rewrite InA_alt. exists u; intuition. 
-    generalize (Regset.elements_2 _ _ INU'). intro.
-    generalize (regset_mem_reg_list_dead _ _ _ H4).
-    intros [A B]. 
-    eapply regalloc_noteq_diff; eauto.
-    eapply regalloc_correct_3; eauto.
-    red; intro; subst u. elim (A INP).
-  assert (A5: forall l, In l (List.map assign undefs) -> loc_acceptable l).
-    intros. 
-    generalize (list_in_map_inv _ _ _ H4).
-    intros [r [AR INR]]. clear H4; subst l.
-    eapply regalloc_acceptable; eauto.
-  generalize (add_entry_correct
-    tge sp (RTL.fn_sig f)
-    (List.map assign (RTL.fn_params f))
-    (List.map assign undefs)
-    oldentry ls m A1 A2 A3 A4 A5 H3).
-  intros [ls1 [EX1 [PARAMS1 UNDEFS1]]].
-  exists ls1. 
-  split. eapply exec_blocks_one.
-  rewrite PTree.gss. reflexivity.
-  assumption.
-  change (transfer f oldentry live!!oldentry)
-    with (live0 f live).
-  unfold params; eapply agree_parameters; eauto.
-  change Regset.elt with reg in PARAMS1. 
-  rewrite PARAMS1. assumption.
-  fold oldentry; fold params. intros.
-  apply UNDEFS1. apply in_map. 
-  unfold undefs.
-  change (transfer f oldentry live!!oldentry)
-    with (live0 f live).
-  exploit Regset.elements_1; eauto.
-  rewrite InA_alt. intros [r' [C D]]. hnf in C. subst r'. auto.
-Qed.
-
-Lemma transl_function_correct:
- forall (f : RTL.function) (m m1 : mem) (stk : Values.block)
-    (args : list val) (t: trace) (pc : RTL.node) (rs : regset) (m2 : mem)
-    (or : option reg) (vres : val),
-  alloc m 0 (RTL.fn_stacksize f) = (m1, stk) ->
-  RTL.exec_instrs ge (RTL.fn_code f) (Vptr stk Int.zero)
-    (RTL.fn_entrypoint f) (init_regs args (fn_params f)) m1 t pc rs m2 ->
-  exec_instrs_prop (RTL.fn_code f) (Vptr stk Int.zero)
-    (RTL.fn_entrypoint f) (init_regs args (fn_params f)) m1 t pc rs m2 ->
-  (RTL.fn_code f) ! pc = Some (Ireturn or) ->
-  vres = regmap_optget or Vundef rs ->
-  exec_function_prop (Internal f) args m t vres (free m2 stk).
-Proof.
-  intros; red; intros until tf.
-  unfold transf_fundef, transf_partial_fundef, transf_function.
-  caseEq (type_function f).
-  intros env TRF. 
-  caseEq (analyze f).
-  intros live ANL.
-  change (transfer f (RTL.fn_entrypoint f) live!!(RTL.fn_entrypoint f))
-    with (live0 f live).
-  caseEq (regalloc f live (live0 f live) env).
-  intros alloc ASG.
-  set (tc1 := PTree.map (transf_instr f live alloc) (RTL.fn_code f)).
-  set (tc2 := transf_entrypoint f live alloc tc1).
-  intro EQ; injection EQ; intro TF; clear EQ. intro VARGS. 
-  generalize (type_function_correct _ _ TRF); intro WTF.
-  assert (NEWINSTR: tc1!(RTL.fn_nextpc f) = None).
-    unfold tc1; rewrite PTree.gmap. unfold option_map.
-    elim (RTL.fn_code_wf f (fn_nextpc f)); intro.
-    elim (Plt_ne _ _ H4). auto.
-    rewrite H4. auto.
-  pose (ls1 := call_regs ls).
-  assert (VARGS1: List.map ls1 (loc_parameters (RTL.fn_sig f)) = args).
-    replace (RTL.fn_sig f) with (funsig tf).
-    rewrite <- VARGS. unfold loc_parameters.
-    rewrite list_map_compose. apply list_map_exten.
-    intros. symmetry. unfold ls1. eapply call_regs_param_of_arg; eauto.
-    rewrite <- TF; reflexivity.
-  assert (VUNDEFS: forall (ofs : Z) (ty : typ), ls1 (S (Local ofs ty)) = Vundef).
-    intros. reflexivity.    
-  generalize (transf_entrypoint_correct f env live alloc
-                tc1 ls1 args (Vptr stk Int.zero) m1
-                WTF ASG NEWINSTR VARGS1 VUNDEFS).
-  fold tc2. intros [ls2 [EX2 AGREE2]].
-  assert (VALIDPC: f.(RTL.fn_code)!pc <> None). congruence.
-  generalize (H1 f env live alloc ls2
-               (refl_equal _) WTF ANL ASG AGREE2 VALIDPC).
-  fold tc1. intros [ls3 [EX3 AGREE3]].
-  generalize (add_return_correct tge (Vptr stk Int.zero) (RTL.fn_sig f) 
-                            (option_map alloc or) ls3 m2).
-  intros [ls4 [EX4 RES4]].
-  exists ls4. 
-  (* Execution *)
-  split. rewrite <- TF; apply exec_funct_internal with m1; simpl.
-  assumption.
-  fold ls1. 
-  eapply exec_blocks_trans. eexact EX2. 
-  apply exec_blocks_extends with tc1.
-  intro p. unfold tc2. unfold transf_entrypoint.
-  case (peq p (fn_nextpc f)); intro.
-  subst p. right. unfold tc1; rewrite PTree.gmap. 
-  elim (RTL.fn_code_wf f (fn_nextpc f)); intro.
-  elim (Plt_ne _ _ H4); auto. rewrite H4; reflexivity.
-  left; apply PTree.gso; auto.
-  eapply exec_blocks_trans. eexact EX3.
-  eapply exec_blocks_one. 
-  unfold tc1. rewrite PTree.gmap. rewrite H2. simpl. reflexivity.
-  eexact EX4. reflexivity. traceEq.
-  destruct or.
-  simpl in RES4. simpl in H3.
-  rewrite H3. rewrite <- TF; simpl. rewrite RES4.
-  eapply agree_eval_reg; eauto.
-  unfold transfer in AGREE3; rewrite H2 in AGREE3. 
-  unfold reg_option_live in AGREE3. eexact AGREE3.
-  simpl in RES4. simpl in H3.
-  rewrite <- TF; simpl. congruence.
-  intros; discriminate.
-  intros; discriminate.
-  intros; discriminate.
-Qed.
+  apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto. 
+  unfold loc_alloc_result, destroyed_at_call; simpl; tauto.
+  apply Loc.diff_sym; auto.
 
-Lemma transl_external_function_correct:
-  forall (ef : external_function) (args : list val) (m : mem)
-         (t: trace) (res: val),
-  event_match ef args t res ->
-  exec_function_prop (External ef) args m t res m.
-Proof.
-  intros; red; intros.
-  simpl in H0. 
-  simplify_eq H0; intro.
-  exists (Locmap.set (R (loc_result (funsig tf))) res ls); split.
-  subst tf. eapply exec_funct_external; eauto. 
-  apply Locmap.gss.
+  (* Icond, true *)
+  assert (COND: eval_condition cond (map ls (map assign args)) tm = Some true).
+    eapply eval_condition_lessdef; eauto. 
+    eapply agree_eval_regs; eauto.
+  econstructor; split.
+  eapply exec_Lcond_true; eauto. TranslInstr.
+  MatchStates. eapply agree_reg_list_live. eauto.
+  (* Icond, false *)
+  assert (COND: eval_condition cond (map ls (map assign args)) tm = Some false).
+    eapply eval_condition_lessdef; eauto. 
+    eapply agree_eval_regs; eauto.
+  econstructor; split.
+  eapply exec_Lcond_false; eauto. TranslInstr.
+  MatchStates. eapply agree_reg_list_live. eauto.
+
+  (* Ireturn *)
+  econstructor; split.
+  eapply exec_Lreturn; eauto. TranslInstr; eauto.
+  econstructor; eauto.
+  rewrite return_regs_result.  
+  inv WTI. destruct or; simpl in *. 
+  rewrite Locmap.gss. eapply agree_eval_reg; eauto.
+  constructor. 
+  apply free_lessdef; auto.
+  intros. apply return_regs_not_destroyed; auto.
+
+  (* internal function *)
+  generalize H6. simpl.  unfold transf_function.
+  caseEq (type_function f); simpl; try congruence. intros env TYP.
+  assert (WTF: wt_function f env). apply type_function_correct; auto.
+  caseEq (analyze f); simpl; try congruence. intros live ANL.
+  caseEq (regalloc f live (live0 f live) env); simpl; try congruence.
+  intros alloc ALLOC EQ. inv EQ. simpl in *.
+  caseEq (Mem.alloc tm 0 (RTL.fn_stacksize f)). intros tm' stk' MEMALLOC.
+  exploit alloc_lessdef; eauto. intros [EQ LDM]. subst stk'.
+  econstructor; split.
+  eapply exec_function_internal; simpl; eauto.
+  simpl. econstructor; eauto.
+  apply agree_init_regs. intros; eapply regalloc_correct_3; eauto.
+  inv WTF.
+  exploit (parmov_spec (call_regs ls) 
+                       (loc_parameters (RTL.fn_sig f))
+                       (map alloc (RTL.fn_params f))).
+  eapply regalloc_norepet_norepet; eauto.
+  eapply regalloc_correct_2; eauto.
+  rewrite loc_parameters_length. symmetry. 
+  transitivity (length (map env (RTL.fn_params f))).
+  repeat rewrite list_length_map. auto.
+  rewrite wt_params; auto.
+  intros [PM1 PM2].
+  change Regset.elt with reg in PM1. rewrite PM1.
+  replace (map (call_regs ls) (loc_parameters (RTL.fn_sig f)))
+     with (map ls (loc_arguments (RTL.fn_sig f))).
+  auto.
+  symmetry. unfold loc_parameters. rewrite list_map_compose.
+  apply list_map_exten. intros. symmetry. eapply call_regs_param_of_arg; eauto.
+
+  (* external function *)
+  injection H6; intro EQ; inv EQ.
+  exploit event_match_lessdef; eauto. intros [tres [A B]].
+  econstructor; split.
+  eapply exec_function_external; eauto.
+  eapply match_states_return; eauto.
+  rewrite Locmap.gss. auto.
+  intros. rewrite <- H10; auto. apply Locmap.gso.
+  apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto.
+  apply loc_result_caller_save. 
+
+  (* return *)
+  inv H3. 
+  econstructor; split.
+  eapply exec_return; eauto.
+  econstructor; eauto.
 Qed.
 
-(** The correctness of the code transformation is obtained by
-  structural induction (and case analysis on the last rule used)
-  on the RTL evaluation derivation.
-  This is a 3-way mutual induction, using [exec_instr_prop],
-  [exec_instrs_prop] and [exec_function_prop] as the induction
-  hypotheses, and the lemmas above as cases for the induction. *)
-
-Theorem transl_function_correctness:
-  forall f args m t res m',
-  RTL.exec_function ge f args m t res m' ->
-  exec_function_prop f args m t res m'.
-Proof
-  (exec_function_ind_3 ge 
-          exec_instr_prop 
-          exec_instrs_prop
-          exec_function_prop
-         
-          transl_Inop_correct
-          transl_Iop_correct
-          transl_Iload_correct
-          transl_Istore_correct
-          transl_Icall_correct
-          transl_Ialloc_correct
-          transl_Icond_true_correct
-          transl_Icond_false_correct
-
-          transl_refl_correct
-          transl_one_correct
-          transl_trans_correct
-
-          transl_function_correct
-          transl_external_function_correct).
-
 (** The semantic equivalence between the original and transformed programs
   follows easily. *)
 
-Theorem transl_program_correct:
-  forall (t: trace) (r: val),
-  RTL.exec_program prog t r -> LTL.exec_program tprog t r.
+Lemma transf_initial_states:
+  forall st1, RTL.initial_state prog st1 ->
+  exists st2, LTL.initial_state tprog st2 /\ match_states st1 st2.
 Proof.
-  intros t r [b [f [m [FIND1 [FIND2 [SIG EXEC]]]]]].
-  generalize (function_ptr_translated _ _ FIND2).
-  intros [tf [TFIND TRF]].
+  intros. inversion H.
+  exploit function_ptr_translated; eauto. intros [tf [FIND TR]].
   assert (SIG2: funsig tf = mksignature nil (Some Tint)).
-    rewrite <- SIG. apply sig_function_translated; auto.
-  assert (VPARAMS: map (Locmap.init Vundef) (loc_arguments (funsig tf)) = nil).
-    rewrite SIG2. reflexivity.
-  generalize (transl_function_correctness _ _ _ _ _ _ EXEC
-                (Locmap.init Vundef) tf TRF VPARAMS).
-  intros [ls' [TEXEC RES]].
-  red. exists b; exists tf; exists ls'; exists m.
-  split. rewrite symbols_preserved. 
-  rewrite (transform_partial_program_main _ _ TRANSF).
-  assumption. 
-  split. assumption.
-  split. assumption.
-  split. replace (Genv.init_mem tprog) with (Genv.init_mem prog).
-  assumption. symmetry. 
-  exact (Genv.init_mem_transf_partial _ _ TRANSF).
-  assumption.
+    rewrite <- H2. apply sig_function_translated; auto.
+  assert (VPARAMS: Val.lessdef_list nil (map (Locmap.init Vundef) (loc_arguments (funsig tf)))).
+    rewrite SIG2. simpl. constructor.
+  assert (GENV: (Genv.init_mem tprog) = (Genv.init_mem prog)).
+    exact (Genv.init_mem_transf_partial _ _ TRANSF).
+  assert (MMD: Mem.lessdef (Genv.init_mem prog) (Genv.init_mem tprog)).
+    rewrite GENV. apply Mem.lessdef_refl.
+  exists (LTL.Callstate nil tf (Locmap.init Vundef) (Genv.init_mem tprog)); split.
+  econstructor; eauto.
+  rewrite symbols_preserved. 
+  rewrite (transform_partial_program_main _ _ TRANSF).  auto.
+  constructor; auto. rewrite H2; constructor. 
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> RTL.final_state st1 r -> LTL.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv H3. econstructor.
+  inv H4. auto. 
+Qed.
+
+Theorem transf_program_correct:
+  forall (beh: program_behavior),
+  RTL.exec_program prog beh -> LTL.exec_program tprog beh.
+Proof.
+  unfold RTL.exec_program, LTL.exec_program; intros.
+  eapply simulation_step_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact transl_step_correct. 
 Qed.
 
 End PRESERVATION.
diff --git a/backend/Alloctyping.v b/backend/Alloctyping.v
index 4c4c4f76..c0abf0d6 100644
--- a/backend/Alloctyping.v
+++ b/backend/Alloctyping.v
@@ -2,6 +2,7 @@
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import AST.
 Require Import Op.
 Require Import Registers.
@@ -15,7 +16,6 @@ Require Import Allocproof.
 Require Import RTLtyping.
 Require Import LTLtyping.
 Require Import Conventions.
-Require Import Parallelmove.
 
 (** This file proves that register allocation (the translation from
   RTL to LTL defined in file [Allocation]) preserves typing:
@@ -30,9 +30,10 @@ Variable live: PMap.t Regset.t.
 Variable alloc: reg -> loc.
 Variable tf: LTL.function.
 
-Hypothesis TYPE_RTL: type_function f = Some env.
+Hypothesis TYPE_RTL: type_function f = OK env.
+Hypothesis LIVE: analyze f = Some live.
 Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc.
-Hypothesis TRANSL: transf_function f = Some tf.
+Hypothesis TRANSL: transf_function f = OK tf.
 
 Lemma wt_rtl_function: RTLtyping.wt_function f env.
 Proof.
@@ -51,473 +52,90 @@ Proof.
   intros. symmetry. apply alloc_type. 
 Qed.
 
-(** [loc_read_ok l] states whether location [l] is well-formed in an
-  argument context (for reading). *)
-
-Definition loc_read_ok (l: loc) : Prop :=
-  match l with R r => True | S s => slot_bounded tf s end.
-
-(** [loc_write_ok l] states whether location [l] is well-formed in a
-  result context (for writing). *)
-
-Definition loc_write_ok (l: loc) : Prop :=
-  match l with 
-  | R r => True
-  | S (Incoming _ _) => False
-  | S s => slot_bounded tf s end.
-
-Definition locs_read_ok (ll: list loc) : Prop :=
-  forall l, In l ll -> loc_read_ok l.
-
-Definition locs_write_ok (ll: list loc) : Prop :=
-  forall l, In l ll -> loc_write_ok l.
-
-Remark loc_write_ok_read_ok:
-  forall l, loc_write_ok l -> loc_read_ok l.
-Proof.
-  destruct l; simpl. auto.
-  destruct s; tauto.
-Qed.
-Hint Resolve loc_write_ok_read_ok: allocty.
-
-Remark locs_write_ok_read_ok:
-  forall ll, locs_write_ok ll -> locs_read_ok ll.
-Proof.
-  unfold locs_write_ok, locs_read_ok. auto with allocty.
-Qed.
-Hint Resolve locs_write_ok_read_ok: allocty.
-
-Lemma alloc_write_ok:
-  forall r, loc_write_ok (alloc r).
+Lemma alloc_acceptable: 
+  forall r, loc_acceptable (alloc r).
 Proof.
-  intros. generalize (regalloc_acceptable _ _ _ _ _ r ALLOC).
-  destruct (alloc r); simpl. auto. 
-  destruct s; try contradiction. simpl. omega.
+  intros. eapply regalloc_acceptable; eauto.
 Qed.
-Hint Resolve alloc_write_ok: allocty.
 
-Lemma allocs_write_ok:
-  forall rl, locs_write_ok (List.map alloc rl).
+Lemma allocs_acceptable:
+  forall rl, locs_acceptable (List.map alloc rl).
 Proof.
-  intros; red; intros.
-  generalize (list_in_map_inv _ _ _ H). intros [r [EQ IN]].
-  subst l. auto with allocty.
+  intros. eapply regsalloc_acceptable; eauto.
 Qed.
-Hint Resolve allocs_write_ok: allocty.
-
-(** * Typing LTL constructors *)
-
-(** We show that the LTL constructor functions defined in [Allocation]
-  generate well-typed LTL basic blocks, given sufficient typing
-  and well-formedness hypotheses over the locations involved. *)
 
-Lemma wt_add_reload:
-  forall src dst k,
-  loc_read_ok src ->
-  Loc.type src = mreg_type dst ->
-  wt_block tf k ->
-  wt_block tf (add_reload src dst k).
+Remark transf_unroll:
+  tf = transf_fun f live alloc.
 Proof.
-  intros. unfold add_reload. destruct src. 
-  case (mreg_eq m dst); intro. auto. apply wt_Bopmove. exact H0. auto.
-  apply wt_Bgetstack. exact H0. exact H. auto.
+  generalize TRANSL. unfold transf_function.
+  rewrite TYPE_RTL. rewrite LIVE. rewrite ALLOC. congruence.
 Qed.
 
-Lemma wt_add_spill:
-  forall src dst k,
-  loc_write_ok dst ->
-  mreg_type src = Loc.type dst ->
-  wt_block tf k ->
-  wt_block tf (add_spill src dst k).
+Lemma valid_successor_transf:
+  forall s,
+  RTLtyping.valid_successor f s ->
+  LTLtyping.valid_successor tf s.
 Proof.
-  intros. unfold add_spill. destruct dst. 
-  case (mreg_eq src m); intro. auto. 
-  apply wt_Bopmove. exact H0. auto.
-  apply wt_Bsetstack. generalize H. simpl. destruct s; auto.
-  symmetry. exact H0. generalize H. simpl. destruct s; auto. contradiction.
-  auto.
-Qed.
+  unfold RTLtyping.valid_successor, LTLtyping.valid_successor.
+  intros s [i AT].
+  rewrite transf_unroll; simpl. rewrite PTree.gmap. 
+  rewrite AT. exists (transf_instr f live alloc s i). auto.
+Qed.  
 
-Lemma wt_add_reloads:
-  forall srcs dsts k,
-  locs_read_ok srcs ->
-  List.map Loc.type srcs = List.map mreg_type dsts ->
-  wt_block tf k ->
-  wt_block tf (add_reloads srcs dsts k).
-Proof.
-  induction srcs; intros.
-  destruct dsts. simpl; auto. simpl in H0; discriminate. 
-  destruct dsts; simpl in H0. discriminate. simpl. 
-  apply wt_add_reload. apply H; apply in_eq. congruence. 
-  apply IHsrcs. red; intros; apply H; apply in_cons; auto.
-  congruence. auto.
-Qed.
-
-Lemma wt_reg_for:
-  forall l, mreg_type (reg_for l) = Loc.type l.
-Proof.
-  intros. destruct l; simpl. auto.
-  case (slot_type s); reflexivity.
-Qed.
-Hint Resolve wt_reg_for: allocty.
-
-Lemma wt_regs_for_rec:
-  forall locs itmps ftmps,
-  (List.length locs <= List.length itmps)%nat ->
-  (List.length locs <= List.length ftmps)%nat ->
-  (forall r, In r itmps -> mreg_type r = Tint) ->
-  (forall r, In r ftmps -> mreg_type r = Tfloat) ->
-  List.map mreg_type (regs_for_rec locs itmps ftmps) = List.map Loc.type locs.
-Proof.
-  induction locs; intros.
-  simpl. auto.
-  destruct itmps; simpl in H. omegaContradiction.
-  destruct ftmps; simpl in H0. omegaContradiction.
-  simpl. apply (f_equal2 (@cons typ)). 
-  destruct a. reflexivity. simpl. case (slot_type s).
-  apply H1; apply in_eq. apply H2; apply in_eq.
-  apply IHlocs. omega. omega. 
-  intros; apply H1; apply in_cons; auto.
-  intros; apply H2; apply in_cons; auto.
-Qed.
-
-Lemma wt_regs_for:
-  forall locs,
-  (List.length locs <= 3)%nat ->
-  List.map mreg_type (regs_for locs) = List.map Loc.type locs.
-Proof.
-  intros. unfold regs_for. apply wt_regs_for_rec.
-  simpl. auto. simpl. auto. 
-  simpl; intros; intuition; subst r; reflexivity.
-  simpl; intros; intuition; subst r; reflexivity.
-Qed.
-Hint Resolve wt_regs_for: allocty.
-
-Lemma wt_add_move:
-  forall src dst b,
-  loc_read_ok src -> loc_write_ok dst ->
-  Loc.type src = Loc.type dst ->
-  wt_block tf b ->
-  wt_block tf (add_move src dst b).
-Proof.
-  intros. unfold add_move. 
-  case (Loc.eq src dst); intro.
-  auto.
-  destruct src. apply wt_add_spill; auto. 
-  destruct dst. apply wt_add_reload; auto.
-  set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end).
-  apply wt_add_reload. auto. 
-  simpl. unfold tmp. case (slot_type s); reflexivity.
-  apply wt_add_spill. auto.
-  simpl. simpl in H1. rewrite <- H1. unfold tmp. case (slot_type s); reflexivity.
-  auto.
-Qed.
-
-Lemma wt_add_moves:
-  forall b moves,
-  (forall s d, In (s, d) moves ->
-      loc_read_ok s /\ loc_write_ok d /\ Loc.type s = Loc.type d) ->
-  wt_block tf b ->
-  wt_block tf 
-    (List.fold_right (fun p k => add_move (fst p) (snd p) k) b moves).
-Proof.
-  induction moves; simpl; intros.
-  auto.
-  destruct a as [s d]. simpl. 
-  elim (H s d). intros A [B C]. apply wt_add_move; auto. auto. 
-Qed.
-
-Theorem wt_parallel_move:
- forall srcs dsts b,
- List.map Loc.type srcs = List.map Loc.type dsts ->
- locs_read_ok srcs ->
- locs_write_ok dsts -> wt_block tf b ->  wt_block tf (parallel_move srcs dsts b).
-Proof.
-  intros. unfold parallel_move. apply wt_add_moves; auto.
-  intros. 
-  elim (parmove_prop_2 _ _ _ _ H3); intros A B.
-  split. destruct A as [C|[C|C]]. 
-  apply H0; auto. subst s; exact I. subst s; exact I.
-  split. destruct B as [C|[C|C]]. 
-  apply H1; auto. subst d; exact I. subst d; exact I.
-  eapply parmove_prop_3; eauto.
-Qed.
- 
-Lemma wt_add_op_move:
-  forall src res s,
-  Loc.type src = Loc.type res ->
-  loc_read_ok src -> loc_write_ok res ->
-  wt_block tf (add_op Omove (src :: nil) res s).
-Proof.
-  intros. unfold add_op. simpl. 
-  apply wt_add_move. auto. auto. auto. constructor. 
-Qed.
-
-Lemma wt_add_op_undef:
-  forall res s,
-  loc_write_ok res ->
-  wt_block tf (add_op Oundef nil res s).
-Proof.
-  intros. unfold add_op. simpl. 
-  apply wt_Bopundef. apply wt_add_spill. auto. auto with allocty. 
-  constructor.
-Qed.
-
-Lemma wt_add_op_others:
-  forall op args res s,
-  op <> Omove -> op <> Oundef ->
-  (List.map Loc.type args, Loc.type res) = type_of_operation op ->
-  locs_read_ok args ->
-  loc_write_ok res ->
-  wt_block tf (add_op op args res s).
-Proof.
-  intros. unfold add_op. 
-  caseEq (is_move_operation op args). intros.
-  generalize (is_move_operation_correct op args H4). tauto.
-  intro. assert ((List.length args <= 3)%nat).
-    replace (length args) with (length (fst (type_of_operation op))).
-    apply Allocproof.length_type_of_operation. 
-    rewrite <- H1. simpl. apply list_length_map. 
-  generalize (wt_regs_for args H5); intro.
-  generalize (wt_reg_for res); intro.
-  apply wt_add_reloads. auto. auto. 
-  apply wt_Bop. auto. auto. congruence. 
-  apply wt_add_spill. auto. auto. constructor.
-Qed.
-
-Lemma wt_add_load:
-  forall chunk addr args dst s,
-  List.map Loc.type args = type_of_addressing addr ->
-  Loc.type dst = type_of_chunk chunk ->
-  locs_read_ok args ->
-  loc_write_ok dst ->
-  wt_block tf (add_load chunk addr args dst s).
-Proof.
-  intros. unfold add_load.
-  assert ((List.length args <= 2)%nat).
-    replace (length args) with (length (type_of_addressing addr)).
-    apply Allocproof.length_type_of_addressing.
-    rewrite <- H. apply list_length_map.
-  assert ((List.length args <= 3)%nat). omega.
-  generalize (wt_regs_for args H4); intro.
-  generalize (wt_reg_for dst); intro.
-  apply wt_add_reloads. auto. auto. 
-  apply wt_Bload. congruence. congruence. 
-  apply wt_add_spill. auto. auto. constructor.
-Qed.
-
-Lemma wt_add_store:
-  forall chunk addr args src s,
-  List.map Loc.type args = type_of_addressing addr ->
-  Loc.type src = type_of_chunk chunk ->
-  locs_read_ok args ->
-  loc_read_ok src ->
-  wt_block tf (add_store chunk addr args src s).
-Proof.
-  intros. unfold add_store.
-  assert ((List.length args <= 2)%nat).
-    replace (length args) with (length (type_of_addressing addr)).
-    apply Allocproof.length_type_of_addressing.
-    rewrite <- H. apply list_length_map.
-  assert ((List.length (src :: args) <= 3)%nat). simpl. omega.
-  generalize (wt_regs_for (src :: args) H4); intro.
-  caseEq (regs_for (src :: args)).
-  intro. constructor.
-  intros rsrc rargs EQ. rewrite EQ in H5. simpl in H5.
-  apply wt_add_reloads. 
-  red; intros. elim H6; intro. subst l; auto. auto. 
-  simpl. congruence. 
-  apply wt_Bstore. congruence. congruence. constructor.
-Qed.
-
-Lemma wt_add_call:
-  forall sig los args res s,
-  match los with inl l => Loc.type l = Tint | inr s => True end ->
-  List.map Loc.type args = sig.(sig_args) ->
-  Loc.type res = match sig.(sig_res) with None => Tint | Some ty => ty end ->
-  locs_read_ok args ->
-  match los with inl l => loc_read_ok l | inr s => True end ->
-  loc_write_ok res ->
-  wt_block tf (add_call sig los args res s).
-Proof.
-  intros. 
-  assert (locs_write_ok (loc_arguments sig)).
-    red; intros. generalize (loc_arguments_acceptable sig l H5).
-    destruct l; simpl. auto. 
-    destruct s0; try contradiction. simpl. omega.
-  unfold add_call. destruct los.
-  apply wt_add_reload. auto. simpl; congruence. 
-  apply wt_parallel_move. rewrite loc_arguments_type. auto.
-  auto. auto. 
-  apply wt_Bcall. reflexivity. 
-  apply wt_add_spill. auto. 
-  rewrite loc_result_type. auto. constructor.
-  apply wt_parallel_move. rewrite loc_arguments_type. auto.
-  auto. auto. 
-  apply wt_Bcall. auto.
-  apply wt_add_spill. auto. 
-  rewrite loc_result_type. auto. constructor.
-Qed.
-
-Lemma wt_add_alloc:
-  forall arg res s,
-  Loc.type arg = Tint -> Loc.type res = Tint ->
-  loc_read_ok arg -> loc_write_ok res ->
-  wt_block tf (add_alloc arg res s).
-Proof.
-  intros. 
-  unfold add_alloc. apply wt_add_reload. auto. rewrite H; reflexivity.
-  apply wt_Balloc. apply wt_add_spill. auto. rewrite H0; reflexivity.
-  apply wt_Bgoto. 
-Qed.
-
-Lemma wt_add_cond:
-  forall cond args ifso ifnot,
-  List.map Loc.type args = type_of_condition cond ->
-  locs_read_ok args ->
-  wt_block tf (add_cond cond args ifso ifnot).
-Proof.
-  intros. 
-  assert ((List.length args) <= 3)%nat.
-    replace (length args) with (length (type_of_condition cond)).
-    apply Allocproof.length_type_of_condition. 
-    rewrite <- H. apply list_length_map. 
-  generalize (wt_regs_for args H1). intro.  
-  unfold add_cond. apply wt_add_reloads.
-  auto. auto. 
-  apply wt_Bcond. congruence. 
-Qed.
-
-Lemma wt_add_return:
-  forall sig optarg,
-  option_map Loc.type optarg = sig.(sig_res) ->
-  match optarg with None => True | Some arg => loc_read_ok arg end ->
-  wt_block tf (add_return sig optarg).
-Proof.
-  intros. unfold add_return. destruct optarg.
-  apply wt_add_reload. auto. rewrite loc_result_type. 
-  simpl in H. destruct (sig_res sig). congruence. discriminate.
-  constructor.
-  apply wt_Bopundef. constructor.
-Qed.
-
-Lemma wt_add_undefs:
-  forall ll b,
-  wt_block tf b -> wt_block tf (add_undefs ll b).
-Proof.
-  induction ll; intros.
-  simpl. auto. 
-  simpl. destruct a. apply wt_Bopundef. auto. auto.
-Qed.
-
-Lemma wt_add_entry:
-  forall params undefs s,
-  List.map Loc.type params = sig_args (RTL.fn_sig f) ->
-  locs_write_ok params ->
-  wt_block tf (add_entry (RTL.fn_sig f) params undefs s).
-Proof.
-  set (sig := RTL.fn_sig f).
-  assert (sig = tf.(fn_sig)).
-    unfold sig. 
-    assert (transf_fundef (Internal f) = Some (Internal tf)).
-      unfold transf_fundef, transf_partial_fundef. rewrite TRANSL; auto.
-    generalize (Allocproof.sig_function_translated _ _ H). simpl; auto.
-  assert (locs_read_ok (loc_parameters sig)).
-    red; unfold loc_parameters. intros. 
-    generalize (list_in_map_inv _ _ _ H0). intros [l1 [EQ IN]].
-    subst l. generalize (loc_arguments_acceptable _ _ IN).
-    destruct l1. simpl. auto. 
-    destruct s; try contradiction. 
-    simpl; intros. split. omega. rewrite <- H. 
-    apply loc_arguments_bounded. auto.
-  intros. unfold add_entry.
-  apply wt_parallel_move. rewrite loc_parameters_type. auto.
-  auto. auto. 
-  apply wt_add_undefs. constructor.
-Qed.
+Hint Resolve alloc_acceptable allocs_acceptable: allocty.
+Hint Rewrite alloc_type alloc_types: allocty.
+Hint Resolve valid_successor_transf: allocty.
 
 (** * Type preservation during translation from RTL to LTL *)
 
+Ltac WT := 
+  constructor; auto with allocty; autorewrite with allocty; auto.
+
 Lemma wt_transf_instr:
   forall pc instr,
-  RTLtyping.wt_instr env f.(RTL.fn_sig) instr ->
-  wt_block tf (transf_instr f live alloc pc instr).
+  RTLtyping.wt_instr env f instr ->
+  wt_instr tf (transf_instr f live alloc pc instr).
 Proof.
-  intros. inversion H; simpl.
+  intros. inv H; simpl.
   (* nop *)
-  constructor.
+  WT.
   (* move *)
-  case (Regset.mem r live!!pc). 
-  apply wt_add_op_move; auto with allocty.
-  repeat rewrite alloc_type. auto. constructor.
-  (* undef *)
-  case (Regset.mem r live!!pc). 
-  apply wt_add_op_undef; auto with allocty.
-  constructor.
+  destruct (Regset.mem r live!!pc).
+  destruct (is_redundant_move Omove (r1 :: nil) r alloc); WT.
+  WT.
   (* other ops *)
-  case (Regset.mem res live!!pc). 
-  apply wt_add_op_others; auto with allocty. 
-  rewrite alloc_types. rewrite alloc_type. auto. 
-  constructor.
+  destruct (Regset.mem res live!!pc).
+  destruct (is_redundant_move op args res alloc); WT.
+  WT.
   (* load *)
-  case (Regset.mem dst live!!pc). 
-  apply wt_add_load; auto with allocty. 
-  rewrite alloc_types. auto. rewrite alloc_type. auto. 
-  constructor.
+  destruct (Regset.mem dst live!!pc); WT.
   (* store *)
-  apply wt_add_store; auto with allocty.
-  rewrite alloc_types. auto. rewrite alloc_type. auto. 
+  WT.
   (* call *)
-  apply wt_add_call. 
-  destruct ros; simpl. rewrite alloc_type; auto. auto. 
-  rewrite alloc_types; auto.
-  rewrite alloc_type. auto. 
-  auto with allocty.
+  WT.
+  destruct ros; simpl. autorewrite with allocty; auto. auto.
+  destruct ros; simpl; auto with allocty.
+  (* tailcall *)
+  WT.
+  destruct ros; simpl. autorewrite with allocty; auto. auto.
   destruct ros; simpl; auto with allocty.
-  auto with allocty.
+  rewrite transf_unroll; auto.
   (* alloc *)
-  apply wt_add_alloc; auto with allocty.
-  rewrite alloc_type; auto. rewrite alloc_type; auto.
+  WT.
   (* cond *)
-  apply wt_add_cond. rewrite alloc_types; auto. auto with allocty.
+  WT.
   (* return *)
-  apply wt_add_return. 
-  destruct optres; simpl. rewrite alloc_type. exact H0. exact H0.
+  WT.
+  rewrite transf_unroll; simpl. 
+  destruct optres; simpl. autorewrite with allocty. auto. auto.
   destruct optres; simpl; auto with allocty.
 Qed.
 
-Lemma wt_transf_instrs:
-  let c := PTree.map (transf_instr f live alloc) (RTL.fn_code f) in
-  forall pc b, c!pc = Some b -> wt_block tf b.
-Proof.
-  intros until b.
-  unfold c. rewrite PTree.gmap. caseEq (RTL.fn_code f)!pc. 
-  intros instr EQ. simpl. intros. injection H; intro; subst b.
-  apply wt_transf_instr. eapply RTLtyping.wt_instrs; eauto.
-  apply wt_rtl_function. 
-  simpl; intros; discriminate.
-Qed.
-
-Lemma wt_transf_entrypoint:
-  let c := transf_entrypoint f live alloc
-            (PTree.map (transf_instr f live alloc) (RTL.fn_code f)) in
-  (forall pc b, c!pc = Some b -> wt_block tf b).
-Proof.
-  simpl. unfold transf_entrypoint. 
-  intros pc b. rewrite PTree.gsspec. 
-  case (peq pc (fn_nextpc f)); intros.
-  injection H; intro; subst b. 
-  apply wt_add_entry.
-  rewrite alloc_types. eapply RTLtyping.wt_params. apply wt_rtl_function.
-  auto with allocty. 
-  apply wt_transf_instrs with pc; auto.
-Qed.
-
 End TYPING_FUNCTION.
 
 Lemma wt_transf_function:
   forall f tf,
-  transf_function f = Some tf -> wt_function tf.
+  transf_function f = OK tf -> wt_function tf.
 Proof.
   intros. generalize H; unfold transf_function.
   caseEq (type_function f). intros env TYP. 
@@ -527,19 +145,27 @@ Proof.
     with (live0 f live).
   caseEq (regalloc f live (live0 f live) env).
   intros alloc ALLOC.
-  intro EQ; injection EQ; intro; subst tf.
-  red. simpl. intros. eapply wt_transf_entrypoint; eauto.
-  intros; discriminate.
-  intros; discriminate.
-  intros; discriminate.
-Qed. 
+  intro EQ; injection EQ; intro.
+  assert (RTLtyping.wt_function f env). apply type_function_correct; auto.
+  inversion H1.
+  constructor; rewrite <- H0; simpl.
+  rewrite (alloc_types _ _ _ _ ALLOC). auto.
+  eapply regsalloc_acceptable; eauto.
+  eapply regalloc_norepet_norepet; eauto.
+  eapply regalloc_correct_2; eauto.
+  intros until instr. rewrite PTree.gmap. 
+  caseEq (RTL.fn_code f)!pc; simpl; intros.
+  inversion H3. eapply wt_transf_instr; eauto. congruence. discriminate.
+  eapply valid_successor_transf; eauto. congruence. 
+  congruence. congruence. congruence.
+Qed.
 
 Lemma wt_transf_fundef:
   forall f tf,
-  transf_fundef f = Some tf -> wt_fundef tf.
+  transf_fundef f = OK tf -> wt_fundef tf.
 Proof.
   intros until tf; destruct f; simpl. 
-  caseEq (transf_function f). intros g TF EQ. inversion EQ.
+  caseEq (transf_function f); simpl. intros g TF EQ. inversion EQ.
   constructor. eapply wt_transf_function; eauto.
   congruence.
   intros. inversion H. constructor. 
@@ -547,7 +173,7 @@ Qed.
 
 Lemma program_typing_preserved:
   forall (p: RTL.program) (tp: LTL.program),
-  transf_program p = Some tp ->
+  transf_program p = OK tp ->
   LTLtyping.wt_program tp.
 Proof.
   intros; red; intros.
diff --git a/backend/Bounds.v b/backend/Bounds.v
new file mode 100644
index 00000000..a0f09ce8
--- /dev/null
+++ b/backend/Bounds.v
@@ -0,0 +1,357 @@
+(** Computation of resource bounds forr Linear code. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Op.
+Require Import Locations.
+Require Import Linear.
+Require Import Lineartyping.
+Require Import Conventions.
+
+(** * Resource bounds for a function *)
+
+(** The [bounds] record capture how many local and outgoing stack slots
+  and callee-save registers are used by a function. *)
+
+(** We demand that all bounds are positive or null,
+  and moreover [bound_outgoing] is greater or equal to 6.
+  These properties are used later to reason about the layout of
+  the activation record. *)
+
+Record bounds : Set := mkbounds {
+  bound_int_local: Z;
+  bound_float_local: Z;
+  bound_int_callee_save: Z;
+  bound_float_callee_save: Z;
+  bound_outgoing: Z;
+  bound_int_local_pos: bound_int_local >= 0;
+  bound_float_local_pos: bound_float_local >= 0;
+  bound_int_callee_save_pos: bound_int_callee_save >= 0;
+  bound_float_callee_save_pos: bound_float_callee_save >= 0;
+  bound_outgoing_pos: bound_outgoing >= 6
+}.
+
+(** The following predicates define the correctness of a set of bounds
+    for the code of a function. *)
+
+Section BELOW.
+
+Variable funct: function.
+Variable b: bounds.
+
+Definition mreg_within_bounds (r: mreg) :=
+  match mreg_type r with
+  | Tint => index_int_callee_save r < bound_int_callee_save b
+  | Tfloat => index_float_callee_save r < bound_float_callee_save b
+  end.
+
+Definition slot_within_bounds (s: slot) :=
+  match s with
+  | Local ofs Tint => 0 <= ofs < bound_int_local b
+  | Local ofs Tfloat => 0 <= ofs < bound_float_local b
+  | Outgoing ofs ty => 14 <= ofs /\ ofs + typesize ty <= bound_outgoing b
+  | Incoming ofs ty => 14 <= ofs /\ ofs + typesize ty <= size_arguments funct.(fn_sig)
+  end.
+
+Definition instr_within_bounds (i: instruction) :=
+  match i with
+  | Lgetstack s r => slot_within_bounds s /\ mreg_within_bounds r
+  | Lsetstack r s => slot_within_bounds s
+  | Lop op args res => mreg_within_bounds res
+  | Lload chunk addr args dst => mreg_within_bounds dst
+  | Lcall sig ros => size_arguments sig <= bound_outgoing b
+  | _ => True
+  end.
+
+End BELOW.
+
+Definition function_within_bounds (f: function) (b: bounds) : Prop :=
+  forall instr, In instr f.(fn_code) -> instr_within_bounds f b instr.
+
+(** * Inference of resource bounds for a function *)
+
+(** The resource bounds for a function are computed by a linear scan
+  of its instructions. *)
+
+Section BOUNDS.
+
+Variable f: function.
+
+(** In the proof of the [Stacking] pass, we only need to bound the
+  registers written by an instruction.  Therefore, this function
+  returns these registers, ignoring registers used only as
+  arguments. *)
+
+Definition regs_of_instr (i: instruction) : list mreg :=
+  match i with
+  | Lgetstack s r => r :: nil
+  | Lsetstack r s => r :: nil
+  | Lop op args res => res :: nil
+  | Lload chunk addr args dst => dst :: nil
+  | Lstore chunk addr args src => nil
+  | Lcall sig ros => nil
+  | Ltailcall sig ros => nil
+  | Lalloc => nil
+  | Llabel lbl => nil
+  | Lgoto lbl => nil
+  | Lcond cond args lbl => nil
+  | Lreturn => nil
+  end.
+
+Definition slots_of_instr (i: instruction) : list slot :=
+  match i with
+  | Lgetstack s r => s :: nil
+  | Lsetstack r s => s :: nil
+  | _ => nil
+  end.
+
+Definition max_over_list (A: Set) (valu: A -> Z) (l: list A) : Z :=
+  List.fold_left (fun m l => Zmax m (valu l)) l 0.
+
+Definition max_over_instrs (valu: instruction -> Z) : Z :=
+  max_over_list instruction valu f.(fn_code).
+
+Definition max_over_regs_of_instr (valu: mreg -> Z) (i: instruction) : Z :=
+  max_over_list mreg valu (regs_of_instr i).
+
+Definition max_over_slots_of_instr (valu: slot -> Z) (i: instruction) : Z :=
+  max_over_list slot valu (slots_of_instr i).
+
+Definition max_over_regs_of_funct (valu: mreg -> Z) : Z :=
+  max_over_instrs (max_over_regs_of_instr valu).
+
+Definition max_over_slots_of_funct (valu: slot -> Z) : Z :=
+  max_over_instrs (max_over_slots_of_instr valu).
+
+Definition int_callee_save (r: mreg) := 1 + index_int_callee_save r.
+
+Definition float_callee_save (r: mreg) := 1 + index_float_callee_save r.
+
+Definition int_local (s: slot) :=
+  match s with Local ofs Tint => 1 + ofs | _ => 0 end.
+
+Definition float_local (s: slot) :=
+  match s with Local ofs Tfloat => 1 + ofs | _ => 0 end.
+
+Definition outgoing_slot (s: slot) :=
+  match s with Outgoing ofs ty => ofs + typesize ty | _ => 0 end.
+
+Definition outgoing_space (i: instruction) :=
+  match i with Lcall sig _ => size_arguments sig | _ => 0 end.
+
+Lemma max_over_list_pos:
+  forall (A: Set) (valu: A -> Z) (l: list A),
+  max_over_list A valu l >= 0.
+Proof.
+  intros until valu. unfold max_over_list.
+  assert (forall l z, fold_left (fun x y => Zmax x (valu y)) l z >= z).
+  induction l; simpl; intros.
+  omega. apply Zge_trans with (Zmax z (valu a)). 
+  auto. apply Zle_ge. apply Zmax1. auto.
+Qed.
+
+Lemma max_over_slots_of_funct_pos:
+  forall (valu: slot -> Z), max_over_slots_of_funct valu >= 0.
+Proof.
+  intros. unfold max_over_slots_of_funct.
+  unfold max_over_instrs. apply max_over_list_pos.
+Qed.
+
+Lemma max_over_regs_of_funct_pos:
+  forall (valu: mreg -> Z), max_over_regs_of_funct valu >= 0.
+Proof.
+  intros. unfold max_over_regs_of_funct.
+  unfold max_over_instrs. apply max_over_list_pos.
+Qed.
+
+Remark Zmax_6: forall x, Zmax 6 x >= 6.
+Proof.
+  intros. apply Zle_ge. apply Zmax_bound_l. omega.
+Qed.
+ 
+Definition function_bounds :=
+  mkbounds
+    (max_over_slots_of_funct int_local)
+    (max_over_slots_of_funct float_local)
+    (max_over_regs_of_funct int_callee_save)
+    (max_over_regs_of_funct float_callee_save)
+    (Zmax 6
+          (Zmax (max_over_instrs outgoing_space)
+                (max_over_slots_of_funct outgoing_slot)))
+    (max_over_slots_of_funct_pos int_local)
+    (max_over_slots_of_funct_pos float_local)
+    (max_over_regs_of_funct_pos int_callee_save)
+    (max_over_regs_of_funct_pos float_callee_save)
+    (Zmax_6 _).
+
+(** We now show the correctness of the inferred bounds. *)
+
+Lemma max_over_list_bound:
+  forall (A: Set) (valu: A -> Z) (l: list A) (x: A),
+  In x l -> valu x <= max_over_list A valu l.
+Proof.
+  intros until x. unfold max_over_list.
+  assert (forall c z,
+            let f := fold_left (fun x y => Zmax x (valu y)) c z in
+            z <= f /\ (In x c -> valu x <= f)).
+    induction c; simpl; intros.
+    split. omega. tauto.
+    elim (IHc (Zmax z (valu a))); intros. 
+    split. apply Zle_trans with (Zmax z (valu a)). apply Zmax1. auto. 
+    intro H1; elim H1; intro. 
+    subst a. apply Zle_trans with (Zmax z (valu x)). 
+    apply Zmax2. auto. auto.
+  intro. elim (H l 0); intros. auto.
+Qed.
+
+Lemma max_over_instrs_bound:
+  forall (valu: instruction -> Z) i,
+  In i f.(fn_code) -> valu i <= max_over_instrs valu.
+Proof.
+  intros. unfold max_over_instrs. apply max_over_list_bound; auto.
+Qed.
+
+Lemma max_over_regs_of_funct_bound:
+  forall (valu: mreg -> Z) i r,
+  In i f.(fn_code) -> In r (regs_of_instr i) ->
+  valu r <= max_over_regs_of_funct valu.
+Proof.
+  intros. unfold max_over_regs_of_funct. 
+  apply Zle_trans with (max_over_regs_of_instr valu i).
+  unfold max_over_regs_of_instr. apply max_over_list_bound. auto.
+  apply max_over_instrs_bound. auto.
+Qed.
+
+Lemma max_over_slots_of_funct_bound:
+  forall (valu: slot -> Z) i s,
+  In i f.(fn_code) -> In s (slots_of_instr i) ->
+  valu s <= max_over_slots_of_funct valu.
+Proof.
+  intros. unfold max_over_slots_of_funct. 
+  apply Zle_trans with (max_over_slots_of_instr valu i).
+  unfold max_over_slots_of_instr. apply max_over_list_bound. auto.
+  apply max_over_instrs_bound. auto.
+Qed.
+
+Lemma int_callee_save_bound:
+  forall i r,
+  In i f.(fn_code) -> In r (regs_of_instr i) ->
+  index_int_callee_save r < bound_int_callee_save function_bounds.
+Proof.
+  intros. apply Zlt_le_trans with (int_callee_save r).
+  unfold int_callee_save. omega.
+  unfold function_bounds, bound_int_callee_save. 
+  eapply max_over_regs_of_funct_bound; eauto.
+Qed.
+
+Lemma float_callee_save_bound:
+  forall i r,
+  In i f.(fn_code) -> In r (regs_of_instr i) ->
+  index_float_callee_save r < bound_float_callee_save function_bounds.
+Proof.
+  intros. apply Zlt_le_trans with (float_callee_save r).
+  unfold float_callee_save. omega.
+  unfold function_bounds, bound_float_callee_save. 
+  eapply max_over_regs_of_funct_bound; eauto.
+Qed.
+
+Lemma int_local_slot_bound:
+  forall i ofs,
+  In i f.(fn_code) -> In (Local ofs Tint) (slots_of_instr i) ->
+  ofs < bound_int_local function_bounds.
+Proof.
+  intros. apply Zlt_le_trans with (int_local (Local ofs Tint)).
+  unfold int_local. omega.
+  unfold function_bounds, bound_int_local.
+  eapply max_over_slots_of_funct_bound; eauto.
+Qed.
+
+Lemma float_local_slot_bound:
+  forall i ofs,
+  In i f.(fn_code) -> In (Local ofs Tfloat) (slots_of_instr i) ->
+  ofs < bound_float_local function_bounds.
+Proof.
+  intros. apply Zlt_le_trans with (float_local (Local ofs Tfloat)).
+  unfold float_local. omega.
+  unfold function_bounds, bound_float_local.
+  eapply max_over_slots_of_funct_bound; eauto.
+Qed.
+
+Lemma outgoing_slot_bound:
+  forall i ofs ty,
+  In i f.(fn_code) -> In (Outgoing ofs ty) (slots_of_instr i) ->
+  ofs + typesize ty <= bound_outgoing function_bounds.
+Proof.
+  intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing ofs ty)).
+  unfold function_bounds, bound_outgoing.
+  apply Zmax_bound_r. apply Zmax_bound_r. 
+  eapply max_over_slots_of_funct_bound; eauto.
+Qed.
+
+Lemma size_arguments_bound:
+  forall sig ros,
+  In (Lcall sig ros) f.(fn_code) ->
+  size_arguments sig <= bound_outgoing function_bounds.
+Proof.
+  intros. change (size_arguments sig) with (outgoing_space (Lcall sig ros)).
+  unfold function_bounds, bound_outgoing.
+  apply Zmax_bound_r. apply Zmax_bound_l.
+  apply max_over_instrs_bound; auto.
+Qed.
+
+(** Consequently, all machine registers or stack slots mentioned by one
+  of the instructions of function [f] are within bounds. *)
+
+Lemma mreg_is_within_bounds:
+  forall i, In i f.(fn_code) ->
+  forall r, In r (regs_of_instr i) ->
+  mreg_within_bounds function_bounds r.
+Proof.
+  intros. unfold mreg_within_bounds. 
+  case (mreg_type r).
+  eapply int_callee_save_bound; eauto.
+  eapply float_callee_save_bound; eauto.
+Qed.
+
+Lemma slot_is_within_bounds:
+  forall i, In i f.(fn_code) -> 
+  forall s, In s (slots_of_instr i) -> Lineartyping.slot_valid f s ->
+  slot_within_bounds f function_bounds s.
+Proof.
+  intros. unfold slot_within_bounds. 
+  destruct s.
+  destruct t.
+  split. exact H1. eapply int_local_slot_bound; eauto.
+  split. exact H1. eapply float_local_slot_bound; eauto.
+  exact H1.
+  split. exact H1. eapply outgoing_slot_bound; eauto.
+Qed.
+
+(** It follows that every instruction in the function is within bounds, 
+    in the sense of the [instr_within_bounds] predicate. *)
+
+Lemma instr_is_within_bounds:
+  forall i,
+  In i f.(fn_code) ->
+  Lineartyping.wt_instr f i ->
+  instr_within_bounds f function_bounds i.
+Proof.
+  intros; 
+  destruct i;
+  generalize (mreg_is_within_bounds _ H); generalize (slot_is_within_bounds _ H); 
+  simpl; intros; auto.
+  inv H0. split; auto.
+  inv H0; auto.
+  eapply size_arguments_bound; eauto.
+Qed.
+
+Lemma function_is_within_bounds:
+  Lineartyping.wt_code f f.(fn_code) ->
+  function_within_bounds f function_bounds.
+Proof.
+  intros; red; intros. apply instr_is_within_bounds; auto.
+Qed.
+
+End BOUNDS.
+
diff --git a/backend/CSE.v b/backend/CSE.v
index 68010133..a7901d61 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -251,7 +251,7 @@ Definition equation_holds
      (vres: valnum) (rh: rhs) : Prop :=
   match rh with
   | Op op vl =>
-      eval_operation ge sp op (List.map valuation vl) =
+      eval_operation ge sp op (List.map valuation vl) m =
       Some (valuation vres)
   | Load chunk addr vl =>
       exists a,
@@ -337,6 +337,8 @@ Definition transfer (f: function) (pc: node) (before: numbering) :=
           kill_loads before
       | Icall sig ros args res s =>
           empty_numbering
+      | Itailcall sig ros args =>
+          empty_numbering
       | Ialloc arg res s =>
           add_unknown before res
       | Icond cond args ifso ifnot =>
@@ -373,7 +375,6 @@ Definition is_trivial_op (op: operation) : bool :=
   | Ointconst _ => true
   | Oaddrsymbol _ _ => true
   | Oaddrstack _ => true
-  | Oundef => true
   | _ => false
   end.
 
@@ -426,7 +427,8 @@ Definition transf_function (f: function) : function :=
         f.(fn_nextpc)
         (transf_code_wf f approxs f.(fn_code_wf)).
 
-Definition transf_fundef := AST.transf_fundef transf_function.
+Definition transf_fundef (f: fundef) : fundef :=
+  AST.transf_fundef transf_function f.
 
 Definition transf_program (p: program) : program :=
   transform_program transf_fundef p.
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 79657c55..d46a39f1 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -9,6 +9,7 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
@@ -218,6 +219,7 @@ Proof.
   apply wf_add_load; auto.
   apply wf_kill_loads; auto.
   apply wf_empty_numbering.
+  apply wf_empty_numbering.
   apply wf_add_unknown; auto.
 Qed.
 
@@ -387,7 +389,7 @@ Definition rhs_evals_to
     (valu: valnum -> val) (rh: rhs) (v: val) : Prop :=
   match rh with
   | Op op vl => 
-      eval_operation ge sp op (List.map valu vl) = Some v
+      eval_operation ge sp op (List.map valu vl) m = Some v
   | Load chunk addr vl =>
       exists a,
       eval_addressing ge sp addr (List.map valu vl) = Some a /\
@@ -468,7 +470,7 @@ Lemma add_op_satisfiable:
   forall n rs op args dst v,
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
-  eval_operation ge sp op rs##args = Some v ->
+  eval_operation ge sp op rs##args m = Some v ->
   numbering_satisfiable ge sp (rs#dst <- v) m (add_op n dst op args).
 Proof.
   intros. inversion H0.
@@ -545,7 +547,7 @@ Proof.
   intros. destruct H0 as [valu [A B]].
   exists valu; split; intros.
   generalize (A _ _ H0). destruct rh; simpl.
-  auto.
+  intro. eapply eval_operation_alloc; eauto. 
   intros [addr [C D]]. exists addr; split. auto.
   destruct addr; simpl in *; try discriminate. 
   eapply Mem.load_alloc_other; eauto. 
@@ -569,17 +571,21 @@ Proof.
 Qed.
 
 Lemma kill_load_satisfiable:
-  forall n rs m',
+  forall n rs chunk addr v m',
+  Mem.storev chunk m addr v = Some m' ->
   numbering_satisfiable ge sp rs m n ->
   numbering_satisfiable ge sp rs m' (kill_loads n).
 Proof.
-  intros. inversion H. inversion H0.
+  intros. inversion H0. inversion H1.
   generalize (kill_load_eqs_incl n.(num_eqs)). intro.
   exists x. split; intros.
-  generalize (H1 _ _ (H3 _ H4)). 
-  generalize (kill_load_eqs_ops _ _ _ H4).
-  destruct rh; simpl. auto. tauto.
-  apply H2. assumption.
+  generalize (H2 _ _ (H4 _ H5)). 
+  generalize (kill_load_eqs_ops _ _ _ H5).
+  destruct rh; simpl.
+  intros. destruct addr; simpl in H; try discriminate. 
+  eapply eval_operation_store; eauto.
+  tauto.
+  apply H3. assumption.
 Qed.
 
 (** Correctness of [reg_valnum]: if it returns a register [r],
@@ -633,7 +639,7 @@ Lemma find_op_correct:
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
   find_op n op args = Some r ->
-  eval_operation ge sp op rs##args = Some rs#r.
+  eval_operation ge sp op rs##args m = Some rs#r.
 Proof.
   intros until r. intros WF [valu NH].
   unfold find_op. caseEq (valnum_regs n args). intros n' vl VR FIND.
@@ -664,29 +670,6 @@ Qed.
 
 End SATISFIABILITY.
 
-(** The transfer function preserves satisfiability of numberings. *)
-
-Lemma transfer_correct:
-  forall ge c sp pc rs m t pc' rs' m' f n,
-  exec_instr ge c sp pc rs m t pc' rs' m' ->
-  c = f.(fn_code) ->
-  wf_numbering n ->
-  numbering_satisfiable ge sp rs m n ->
-  numbering_satisfiable ge sp rs' m' (transfer f pc n).
-Proof.
-  induction 1; intros; subst c; unfold transfer; rewrite H; auto.
-  (* Iop *)
-  eapply add_op_satisfiable; eauto.
-  (* Iload *)
-  eapply add_load_satisfiable; eauto.
-  (* Istore *)
-  eapply kill_load_satisfiable; eauto.
-  (* Icall *)
-  apply empty_numbering_satisfiable.
-  (* Ialloc *)
-  apply add_unknown_satisfiable; auto. eapply alloc_satisfiable; eauto.
-Qed.
-
 (** The numberings associated to each instruction by the static analysis
   are inductively satisfiable, in the following sense: the numbering
   at the function entry point is satisfiable, and for any RTL execution 
@@ -694,43 +677,25 @@ Qed.
   satisfiability at [pc']. *)
 
 Theorem analysis_correct_1:
-  forall ge c sp pc rs m t pc' rs' m' f,
-  exec_instr ge c sp pc rs m t pc' rs' m' ->
-  c = f.(fn_code) ->
-  numbering_satisfiable ge sp rs m (analyze f)!!pc ->
-  numbering_satisfiable ge sp rs' m' (analyze f)!!pc'.
+  forall ge sp rs m f pc pc',
+  In pc' (successors f pc) ->
+  numbering_satisfiable ge sp rs m (transfer f pc (analyze f)!!pc) ->
+  numbering_satisfiable ge sp rs m (analyze f)!!pc'.
 Proof.
-  intros until f. intros EXEC CODE.
+  intros until pc'.
   generalize (wf_analyze f pc).
   unfold analyze.
   caseEq (Solver.fixpoint (successors f) (fn_nextpc f) 
                           (transfer f) (fn_entrypoint f)).
-  intros res FIXPOINT WF NS.
-  assert (numbering_satisfiable ge sp rs' m' (transfer f pc res!!pc)).
-    eapply transfer_correct; eauto.
+  intros res FIXPOINT WF SUCC NS.
   assert (Numbering.ge res!!pc' (transfer f pc res!!pc)).
     eapply Solver.fixpoint_solution; eauto.
     elim (fn_code_wf f pc); intro. auto.
-    rewrite <- CODE in H0.
-    elim (exec_instr_present _ _ _ _ _ _ _ _ _ _ EXEC H0).
-    rewrite CODE in EXEC. eapply successors_correct; eauto.
-  apply H0. auto.
+    unfold successors in SUCC; rewrite H in SUCC; contradiction.
+  apply H. auto.
   intros. rewrite PMap.gi. apply empty_numbering_satisfiable.
 Qed.
 
-Theorem analysis_correct_N:
-  forall ge c sp pc rs m t pc' rs' m' f,
-  exec_instrs ge c sp pc rs m t pc' rs' m' ->
-  c = f.(fn_code) ->
-  numbering_satisfiable ge sp rs m (analyze f)!!pc ->
-  numbering_satisfiable ge sp rs' m' (analyze f)!!pc'.
-Proof.
-  induction 1; intros.
-  assumption.
-  eapply analysis_correct_1; eauto.
-  eauto.
-Qed.
-
 Theorem analysis_correct_entry:
   forall ge sp rs m f,
   numbering_satisfiable ge sp rs m (analyze f)!!(f.(fn_entrypoint)).
@@ -773,49 +738,67 @@ Lemma funct_ptr_translated:
   Genv.find_funct_ptr tge b = Some (transf_fundef f).
 Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
 
-Lemma sig_translated:
-  forall (f: RTL.fundef),
-  funsig (transf_fundef f) = funsig f.
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
 Proof.
-  intros; case f; intros; reflexivity.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs f,
+  find_function ge ros rs = Some f ->
+  find_function tge ros rs = Some (transf_fundef f).
+Proof.
+  intros until f; destruct ros; simpl.
+  intro. apply functions_translated; auto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i); intro.
+  apply funct_ptr_translated; auto.
+  discriminate.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument using
   diagrams of the following form:
 <<
-      pc, rs, m ------------------------ pc, rs, m
-          |                                  |
-          |                                  |
-          v                                  v
-     pc', rs', m' --------------------- pc', rs', m'
+           st1 --------------- st2
+            |                   |
+           t|                   |t
+            |                   |
+            v                   v
+           st1'--------------- st2'
 >>
   Left: RTL execution in the original program.  Right: RTL execution in
-  the optimized program.  Precondition (top): the numbering at [pc]
-  (returned by the static analysis) is satisfiable.  Postcondition: none. 
+  the optimized program.  Precondition (top) and postcondition (bottom):
+  agreement between the states, including the fact that
+  the numbering at [pc] (returned by the static analysis) is satisfiable.
 *)
 
-Definition exec_instr_prop
-        (c: code) (sp: val)
-        (pc: node) (rs: regset) (m: mem) (t: trace)
-        (pc': node) (rs': regset) (m': mem) : Prop :=
-  forall f
-         (CF: c = f.(RTL.fn_code))
-         (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc),
-  exec_instr tge (transf_code (analyze f) c) sp pc rs m t pc' rs' m'.
-
-Definition exec_instrs_prop
-        (c: code) (sp: val)
-        (pc: node) (rs: regset) (m: mem) (t: trace)
-        (pc': node) (rs': regset) (m': mem) : Prop :=
-  forall f
-         (CF: c = f.(RTL.fn_code))
-         (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc),
-  exec_instrs tge (transf_code (analyze f) c) sp pc rs m t pc' rs' m'.
-
-Definition exec_function_prop
-        (f: RTL.fundef) (args: list val) (m: mem) (t: trace)
-        (res: val) (m': mem) : Prop :=
-  exec_function tge (transf_fundef f) args m t res m'.
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  match_stackframes_intro:
+    forall res c sp pc rs f,
+    c = f.(RTL.fn_code) ->
+    (forall v m, numbering_satisfiable ge sp (rs#res <- v) m (analyze f)!!pc) ->
+    match_stackframes
+      (Stackframe res c sp pc rs)
+      (Stackframe res (transf_code (analyze f) c) sp pc rs).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s c sp pc rs m s' f
+             (CF: c = f.(RTL.fn_code))
+             (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc)
+             (STACKS: list_forall2 match_stackframes s s'),
+      match_states (State s c sp pc rs m)
+                   (State s' (transf_code (analyze f) c) sp pc rs m)
+  | match_states_call:
+      forall s f args m s',
+      list_forall2 match_stackframes s s' ->
+      match_states (Callstate s f args m)
+                   (Callstate s' (transf_fundef f) args m)
+  | match_states_return:
+      forall s s' v m,
+      list_forall2 match_stackframes s s' ->
+      match_states (Returnstate s v m)
+                   (Returnstate s' v m).
 
 Ltac TransfInstr :=
   match goal with
@@ -826,34 +809,49 @@ Ltac TransfInstr :=
         unfold option_map; rewrite H1; reflexivity ]
   end.
 
-(** The proof of simulation is by structural induction on the evaluation
-  derivation for the source program. *)
+(** The proof of simulation is a case analysis over the transition
+  in the source code. *)
 
-Lemma transf_function_correct:
-  forall f args m t res m',
-  exec_function ge f args m t res m' ->
-  exec_function_prop f args m t res m'.
+Lemma transf_step_correct:
+  forall s1 t s2, step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  exists s2', step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
-  apply (exec_function_ind_3 ge
-          exec_instr_prop exec_instrs_prop exec_function_prop);
-  intros; red; intros; try TransfInstr.
+  induction 1; intros; inv MS; try (TransfInstr; intro C).
+
   (* Inop *)
-  intro; apply exec_Inop; auto.
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' rs m); split.
+  apply exec_Inop; auto.
+  econstructor; eauto. 
+  apply analysis_correct_1 with pc. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H; auto.
+
   (* Iop *)
-  assert (eval_operation tge sp op rs##args = Some v).
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' (rs#res <- v) m); split.
+  assert (eval_operation tge sp op rs##args m = Some v).
     rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  generalize C; clear C.
   case (is_trivial_op op).
   intro. eapply exec_Iop'; eauto. 
   caseEq (find_op (analyze f)!!pc op args). intros r FIND CODE.
   eapply exec_Iop'; eauto. simpl. 
-  assert (eval_operation ge sp op rs##args = Some rs#r).
+  assert (eval_operation ge sp op rs##args m = Some rs#r).
     eapply find_op_correct; eauto.
     eapply wf_analyze; eauto.
   congruence.
-  intros. eapply exec_Iop'; eauto. 
+  intros. eapply exec_Iop'; eauto.
+  econstructor; eauto.
+  apply analysis_correct_1 with pc.
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  eapply add_op_satisfiable; eauto. apply wf_analyze.
+
   (* Iload *)
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' (rs#dst <- v) m); split.
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  generalize C; clear C.
   caseEq (find_load (analyze f)!!pc chunk addr args). intros r FIND CODE.
   eapply exec_Iop'; eauto. simpl. 
   assert (exists a, eval_addressing ge sp addr rs##args = Some a
@@ -862,52 +860,125 @@ Proof.
     eapply wf_analyze; eauto.
   elim H3; intros a' [A B].
   congruence.
-  intros. eapply exec_Iload'; eauto. 
+  intros. eapply exec_Iload'; eauto.
+  econstructor; eauto.
+  apply analysis_correct_1 with pc.
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  eapply add_load_satisfiable; eauto. apply wf_analyze.
+
   (* Istore *)
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' rs m'); split.
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-  intro; eapply exec_Istore; eauto. 
+  eapply exec_Istore; eauto.
+  econstructor; eauto.
+  apply analysis_correct_1 with pc. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H.
+  eapply kill_load_satisfiable; eauto. 
+
   (* Icall *)
-  assert (find_function tge ros rs = Some (transf_fundef f)).
-    destruct ros; simpl in H0; simpl.
-    apply functions_translated; auto.
-    rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
-    apply funct_ptr_translated; auto. discriminate.
-  intro; eapply exec_Icall with (f := transf_fundef f); eauto. 
-  generalize (sig_translated f); congruence.
+  exploit find_function_translated; eauto. intro FIND'.
+  econstructor; split.
+  eapply exec_Icall with (f := transf_fundef f); eauto.
+  apply sig_preserved. 
+  econstructor; eauto. 
+  constructor; auto. 
+  econstructor; eauto. 
+  intros. apply analysis_correct_1 with pc.
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  apply empty_numbering_satisfiable.
+
+  (* Itailcall *)
+  exploit find_function_translated; eauto. intro FIND'.
+  econstructor; split.
+  eapply exec_Itailcall with (f := transf_fundef f); eauto.
+  apply sig_preserved. 
+  econstructor; eauto. 
+
   (* Ialloc *)
-  intro; eapply exec_Ialloc; eauto.
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' (rs#res <- (Vptr b Int.zero)) m'); split.
+  eapply exec_Ialloc; eauto.
+  econstructor; eauto.
+  apply analysis_correct_1 with pc. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  apply add_unknown_satisfiable. apply wf_analyze; auto.
+  eapply alloc_satisfiable; eauto.
+
   (* Icond true *)
-  intro; eapply exec_Icond_true; eauto. 
+  econstructor; split.
+  eapply exec_Icond_true; eauto. 
+  econstructor; eauto.
+  apply analysis_correct_1 with pc. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H; auto.
+
   (* Icond false *)
-  intro; eapply exec_Icond_false; eauto. 
-  (* refl *)
-  apply exec_refl.
-  (* one *)
-  apply exec_one; auto.
-  (* trans *)
-  eapply exec_trans; eauto. apply H2; auto. 
-  eapply analysis_correct_N; eauto.
+  econstructor; split.
+  eapply exec_Icond_false; eauto. 
+  econstructor; eauto.
+  apply analysis_correct_1 with pc. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H; auto.
+
+  (* Ireturn *)
+  econstructor; split.
+  eapply exec_Ireturn; eauto.
+  constructor; auto.
+
   (* internal function *)
-  intro. unfold transf_function; simpl; eapply exec_funct_internal; simpl; eauto. 
-  eapply H1; eauto. eapply analysis_correct_entry; eauto.
+  simpl. econstructor; split.
+  eapply exec_function_internal; eauto. 
+  simpl. econstructor; eauto.
+  apply analysis_correct_entry. 
+
   (* external function *)
-  unfold transf_function; simpl. apply exec_funct_external; auto.
+  simpl. econstructor; split.
+  eapply exec_function_external; eauto.
+  econstructor; eauto.
+
+  (* return *)
+  inv H3. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  econstructor; eauto. 
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. 
+  exists (Callstate nil (transf_fundef f) nil (Genv.init_mem tprog)); split.
+  econstructor; eauto.
+  change (prog_main tprog) with (prog_main prog).
+  rewrite symbols_preserved. eauto.
+  apply funct_ptr_translated; auto.
+  rewrite <- H2. apply sig_preserved. 
+  replace (Genv.init_mem tprog) with (Genv.init_mem prog).
+  constructor. constructor. auto.
+  symmetry. unfold tprog, transf_program. apply Genv.init_mem_transf.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv H4. constructor. 
 Qed.
 
 Theorem transf_program_correct:
-  forall (t: trace) (r: val),
-  exec_program prog t r -> exec_program tprog t r.
-Proof.
-  intros t r [fptr [f [m [FINDS [FINDF [SIG EXEC]]]]]].
-  red. exists fptr; exists (transf_fundef f); exists m. 
-  split. change (prog_main tprog) with (prog_main prog).
-  rewrite symbols_preserved. assumption.
-  split. apply funct_ptr_translated; auto.
-  split. generalize (sig_translated f); congruence.
-  apply transf_function_correct. 
-  unfold tprog, transf_program. rewrite Genv.init_mem_transf. 
-  exact EXEC.
+  forall (beh: program_behavior),
+  exec_program prog beh -> exec_program tprog beh.
+Proof.
+  unfold exec_program; intros.
+  eapply simulation_step_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact transf_step_correct. 
 Qed.
 
 End PRESERVATION.
diff --git a/backend/Cminor.v b/backend/Cminor.v
index 9ed5e19d..2b9945ac 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -8,41 +8,79 @@ Require Import Floats.
 Require Import Events.
 Require Import Values.
 Require Import Mem.
-Require Import Op.
 Require Import Globalenvs.
+Require Import Switch.
 
 (** * Abstract syntax *)
 
 (** Cminor is a low-level imperative language structured in expressions,
-  statements, functions and programs.  Expressions include
-  reading local variables, reading and writing store locations,
-  arithmetic operations, function calls, and conditional expressions
-  (similar to [e1 ? e2 : e3] in C).  The [Elet] and [Eletvar] constructs
-  enable sharing the computations of subexpressions.  De Bruijn notation
-  is used: [Eletvar n] refers to the value bound by then [n+1]-th enclosing
-  [Elet] construct.
-
-  A variant [condexpr] of [expr] is used to represent expressions
-  which are evaluated for their boolean value only and not their exact value.
-*)
+  statements, functions and programs.  We first define the constants
+  and operators that occur within expressions. *)
+
+Inductive constant : Set :=
+  | Ointconst: int -> constant     (**r integer constant *)
+  | Ofloatconst: float -> constant (**r floating-point constant *)
+  | Oaddrsymbol: ident -> int -> constant (**r address of the symbol plus the offset *)
+  | Oaddrstack: int -> constant.   (**r stack pointer plus the given offset *)
+
+Inductive unary_operation : Set :=
+  | Ocast8unsigned: unary_operation        (**r 8-bit zero extension  *)
+  | Ocast8signed: unary_operation          (**r 8-bit sign extension  *)
+  | Ocast16unsigned: unary_operation       (**r 16-bit zero extension  *)
+  | Ocast16signed: unary_operation         (**r 16-bit sign extension *)
+  | Onegint: unary_operation               (**r integer opposite *)
+  | Onotbool: unary_operation              (**r boolean negation  *)
+  | Onotint: unary_operation               (**r bitwise complement  *)
+  | Onegf: unary_operation                 (**r float opposite *)
+  | Oabsf: unary_operation                 (**r float absolute value *)
+  | Osingleoffloat: unary_operation        (**r float truncation *)
+  | Ointoffloat: unary_operation           (**r integer to float *)
+  | Ofloatofint: unary_operation           (**r float to signed integer *)
+  | Ofloatofintu: unary_operation.         (**r float to unsigned integer *)
+
+Inductive binary_operation : Set :=
+  | Oadd: binary_operation                 (**r integer addition *)
+  | Osub: binary_operation                 (**r integer subtraction *)
+  | Omul: binary_operation                 (**r integer multiplication *)
+  | Odiv: binary_operation                 (**r integer signed division *)
+  | Odivu: binary_operation                (**r integer unsigned division *)
+  | Omod: binary_operation                 (**r integer signed modulus *)
+  | Omodu: binary_operation                (**r integer unsigned modulus *)
+  | Oand: binary_operation                 (**r bitwise ``and'' *)
+  | Oor: binary_operation                  (**r bitwise ``or'' *)
+  | Oxor: binary_operation                 (**r bitwise ``xor'' *)
+  | Oshl: binary_operation                 (**r left shift *)
+  | Oshr: binary_operation                 (**r right signed shift *)
+  | Oshru: binary_operation                (**r right unsigned shift *)
+  | Oaddf: binary_operation                (**r float addition *)
+  | Osubf: binary_operation                (**r float subtraction *)
+  | Omulf: binary_operation                (**r float multiplication *)
+  | Odivf: binary_operation                (**r float division *)
+  | Ocmp: comparison -> binary_operation   (**r integer signed comparison *)
+  | Ocmpu: comparison -> binary_operation  (**r integer unsigned comparison *)
+  | Ocmpf: comparison -> binary_operation. (**r float comparison *)
+
+(** Expressions include reading local variables, constants and
+  arithmetic operations, reading and writing store locations,
+  function calls, and conditional expressions
+  (similar to [e1 ? e2 : e3] in C).
+  The [Elet] and [Eletvar] constructs enable sharing the computations
+  of subexpressions.  De Bruijn notation is used: [Eletvar n] refers
+  to the value bound by then [n+1]-th enclosing [Elet] construct. *)
 
 Inductive expr : Set :=
   | Evar : ident -> expr
-  | Eop : operation -> exprlist -> expr
-  | Eload : memory_chunk -> addressing -> exprlist -> expr
-  | Estore : memory_chunk -> addressing -> exprlist -> expr -> expr
+  | Econst : constant -> expr
+  | Eunop : unary_operation -> expr -> expr
+  | Ebinop : binary_operation -> expr -> expr -> expr
+  | Eload : memory_chunk -> expr -> expr
+  | Estore : memory_chunk -> expr -> expr -> expr
   | Ecall : signature -> expr -> exprlist -> expr
-  | Econdition : condexpr -> expr -> expr -> expr
+  | Econdition : expr -> expr -> expr -> expr
   | Elet : expr -> expr -> expr
   | Eletvar : nat -> expr
   | Ealloc : expr -> expr
 
-with condexpr : Set :=
-  | CEtrue: condexpr
-  | CEfalse: condexpr
-  | CEcond: condition -> exprlist -> condexpr
-  | CEcondition : condexpr -> condexpr -> condexpr -> condexpr
-
 with exprlist : Set :=
   | Enil: exprlist
   | Econs: expr -> exprlist -> exprlist.
@@ -57,12 +95,13 @@ Inductive stmt : Set :=
   | Sexpr: expr -> stmt
   | Sassign : ident -> expr -> stmt
   | Sseq: stmt -> stmt -> stmt
-  | Sifthenelse: condexpr -> stmt -> stmt -> stmt
+  | Sifthenelse: expr -> stmt -> stmt -> stmt
   | Sloop: stmt -> stmt
   | Sblock: stmt -> stmt
   | Sexit: nat -> stmt
   | Sswitch: expr -> list (int * nat) -> nat -> stmt
-  | Sreturn: option expr -> stmt.
+  | Sreturn: option expr -> stmt
+  | Stailcall: signature -> expr -> exprlist -> stmt.
 
 (** Functions are composed of a signature, a list of parameter names,
   a list of local variables, and a  statement representing
@@ -97,30 +136,31 @@ Definition funsig (fd: fundef) :=
 Inductive outcome: Set :=
   | Out_normal: outcome                (**r continue in sequence *)
   | Out_exit: nat -> outcome           (**r terminate [n+1] enclosing blocks *)
-  | Out_return: option val -> outcome. (**r return immediately to caller *)
-
-Definition outcome_result_value
-                 (out: outcome) (ot: option typ) (v: val) : Prop :=
-  match out, ot with
-  | Out_normal, None => v = Vundef
-  | Out_return None, None => v = Vundef
-  | Out_return (Some v'), Some ty => v = v'
-  | _, _ => False
-  end.
+  | Out_return: option val -> outcome  (**r return immediately to caller *)
+  | Out_tailcall_return: val -> outcome. (**r already returned to caller via a tailcall *)
 
 Definition outcome_block (out: outcome) : outcome :=
   match out with
-  | Out_normal => Out_normal
   | Out_exit O => Out_normal
   | Out_exit (S n) => Out_exit n
-  | Out_return optv => Out_return optv
+  | out => out
+  end.
+
+Definition outcome_result_value
+    (out: outcome) (retsig: option typ) (vres: val) : Prop :=
+  match out, retsig with
+  | Out_normal, None => vres = Vundef
+  | Out_return None, None => vres = Vundef
+  | Out_return (Some v), Some ty => vres = v
+  | Out_tailcall_return v, _ => vres = v
+  | _, _ => False
   end.
 
-Fixpoint switch_target (n: int) (dfl: nat) (cases: list (int * nat))
-                       {struct cases} : nat :=
-  match cases with
-  | nil => dfl
-  | (n1, lbl1) :: rem => if Int.eq n n1 then lbl1 else switch_target n dfl rem
+Definition outcome_free_mem
+    (out: outcome) (m: mem) (sp: block) : mem :=
+  match out with
+  | Out_tailcall_return _ => m
+  | _ => Mem.free m sp
   end.
 
 (** Three kinds of evaluation environments are involved:
@@ -154,10 +194,105 @@ Section RELSEM.
 
 Variable ge: genv.
 
-(** Evaluation of an expression: [eval_expr ge sp le e m a m' v]
+(** Evaluation of constants and operator applications.
+    [None] is returned when the computation is undefined, e.g.
+    if arguments are of the wrong types, or in case of an integer division
+    by zero. *)
+
+Definition eval_constant (sp: val) (cst: constant) : option val :=
+  match cst with
+  | Ointconst n => Some (Vint n)
+  | Ofloatconst n => Some (Vfloat n)
+  | Oaddrsymbol s ofs =>
+      match Genv.find_symbol ge s with
+      | None => None
+      | Some b => Some (Vptr b ofs)
+      end
+  | Oaddrstack ofs =>
+      match sp with
+      | Vptr b n => Some (Vptr b (Int.add n ofs))
+      | _ => None
+      end
+  end.
+
+Definition eval_unop (op: unary_operation) (arg: val) : option val :=
+  match op, arg with
+  | Ocast8unsigned, _ => Some (Val.cast8unsigned arg)
+  | Ocast8signed, _ => Some (Val.cast8signed arg)
+  | Ocast16unsigned, _ => Some (Val.cast16unsigned arg)
+  | Ocast16signed, _ => Some (Val.cast16signed arg)
+  | Onegint, Vint n1 => Some (Vint (Int.neg n1))
+  | Onotbool, Vint n1 => Some (Val.of_bool (Int.eq n1 Int.zero))
+  | Onotbool, Vptr b1 n1 => Some Vfalse
+  | Onotint, Vint n1 => Some (Vint (Int.not n1))
+  | Onegf, Vfloat f1 => Some (Vfloat (Float.neg f1))
+  | Oabsf, Vfloat f1 => Some (Vfloat (Float.abs f1))
+  | Osingleoffloat, _ => Some (Val.singleoffloat arg)
+  | Ointoffloat, Vfloat f1 => Some (Vint (Float.intoffloat f1))
+  | Ofloatofint, Vint n1 => Some (Vfloat (Float.floatofint n1))
+  | Ofloatofintu, Vint n1 => Some (Vfloat (Float.floatofintu n1))
+  | _, _ => None
+  end.
+
+Definition eval_compare_null (c: comparison) (n: int) : option val :=
+  if Int.eq n Int.zero 
+  then match c with Ceq => Some Vfalse | Cne => Some Vtrue | _ => None end
+  else None.
+
+Definition eval_binop
+            (op: binary_operation) (arg1 arg2: val) (m: mem): option val :=
+  match op, arg1, arg2 with
+  | Oadd, Vint n1, Vint n2 => Some (Vint (Int.add n1 n2))
+  | Oadd, Vint n1, Vptr b2 n2 => Some (Vptr b2 (Int.add n2 n1))
+  | Oadd, Vptr b1 n1, Vint n2 => Some (Vptr b1 (Int.add n1 n2))
+  | Osub, Vint n1, Vint n2 => Some (Vint (Int.sub n1 n2))
+  | Osub, Vptr b1 n1, Vint n2 => Some (Vptr b1 (Int.sub n1 n2))
+  | Osub, Vptr b1 n1, Vptr b2 n2 =>
+      if eq_block b1 b2 then Some (Vint (Int.sub n1 n2)) else None
+  | Omul, Vint n1, Vint n2 => Some (Vint (Int.mul n1 n2))
+  | Odiv, Vint n1, Vint n2 =>
+      if Int.eq n2 Int.zero then None else Some (Vint (Int.divs n1 n2))
+  | Odivu, Vint n1, Vint n2 =>
+      if Int.eq n2 Int.zero then None else Some (Vint (Int.divu n1 n2))
+  | Omod, Vint n1, Vint n2 =>
+      if Int.eq n2 Int.zero then None else Some (Vint (Int.mods n1 n2))
+  | Omodu, Vint n1, Vint n2 =>
+      if Int.eq n2 Int.zero then None else Some (Vint (Int.modu n1 n2))
+  | Oand, Vint n1, Vint n2 => Some (Vint (Int.and n1 n2))
+  | Oor, Vint n1, Vint n2 => Some (Vint (Int.or n1 n2))
+  | Oxor, Vint n1, Vint n2 => Some (Vint (Int.xor n1 n2))
+  | Oshl, Vint n1, Vint n2 =>
+      if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shl n1 n2)) else None
+  | Oshr, Vint n1, Vint n2 =>
+      if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shr n1 n2)) else None
+  | Oshru, Vint n1, Vint n2 =>
+      if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shru n1 n2)) else None
+   | Oaddf, Vfloat f1, Vfloat f2 => Some (Vfloat (Float.add f1 f2))
+  | Osubf, Vfloat f1, Vfloat f2 => Some (Vfloat (Float.sub f1 f2))
+  | Omulf, Vfloat f1, Vfloat f2 => Some (Vfloat (Float.mul f1 f2))
+  | Odivf, Vfloat f1, Vfloat f2 => Some (Vfloat (Float.div f1 f2))
+  | Ocmp c, Vint n1, Vint n2 =>
+      Some (Val.of_bool(Int.cmp c n1 n2))
+  | Ocmp c, Vptr b1 n1, Vptr b2 n2 =>
+      if valid_pointer m b1 (Int.signed n1)
+      && valid_pointer m b2 (Int.signed n2) then
+        if eq_block b1 b2 then Some(Val.of_bool(Int.cmp c n1 n2)) else None
+      else
+        None
+  | Ocmp c, Vptr b1 n1, Vint n2 => eval_compare_null c n2
+  | Ocmp c, Vint n1, Vptr b2 n2 => eval_compare_null c n1
+  | Ocmpu c, Vint n1, Vint n2 =>
+      Some (Val.of_bool(Int.cmpu c n1 n2))
+  | Ocmpf c, Vfloat f1, Vfloat f2 =>
+      Some (Val.of_bool (Float.cmp c f1 f2))
+  | _, _, _ => None
+  end.
+
+(** Evaluation of an expression: [eval_expr ge sp le e m a t m' v]
   states that expression [a], in initial local environment [e] and
   memory state [m], evaluates to value [v].  [m'] is the final
   memory state, reflecting memory stores possibly performed by [a].
+  [t] is the trace of I/O events generated during the evaluation.
   Expressions do not assign variables, therefore the local environment
   [e] is unchanged.  [ge] and [le] are the global environment and let
   environment respectively, and are unchanged during evaluation.  [sp]
@@ -172,25 +307,34 @@ Inductive eval_expr:
       forall sp le e m id v,
       PTree.get id e = Some v ->
       eval_expr sp le e m (Evar id) E0 m v
-  | eval_Eop:
-      forall sp le e m op al t m1 vl v,
-      eval_exprlist sp le e m al t m1 vl ->
-      eval_operation ge sp op vl = Some v ->
-      eval_expr sp le e m (Eop op al) t m1 v
+  | eval_Econst:
+      forall sp le e m cst v,
+      eval_constant sp cst = Some v ->
+      eval_expr sp le e m (Econst cst) E0 m v
+  | eval_Eunop:
+      forall sp le e m op a t m1 v1 v,
+      eval_expr sp le e m a t m1 v1 ->
+      eval_unop op v1 = Some v ->
+      eval_expr sp le e m (Eunop op a) t m1 v
+  | eval_Ebinop:
+      forall sp le e m op a1 a2 t1 m1 v1 t2 m2 v2 t v,
+      eval_expr sp le e m a1 t1 m1 v1 ->
+      eval_expr sp le e m1 a2 t2 m2 v2 ->
+      eval_binop op v1 v2 m2 = Some v ->
+      t = t1 ** t2 ->
+      eval_expr sp le e m (Ebinop op a1 a2) t m2 v
   | eval_Eload:
-      forall sp le e m chunk addr al t m1 v vl a,
-      eval_exprlist sp le e m al t m1 vl ->
-      eval_addressing ge sp addr vl = Some a ->
-      Mem.loadv chunk m1 a = Some v ->
-      eval_expr sp le e m (Eload chunk addr al) t m1 v
+      forall sp le e m chunk a t m1 v1 v,
+      eval_expr sp le e m a t m1 v1 ->
+      Mem.loadv chunk m1 v1 = Some v ->
+      eval_expr sp le e m (Eload chunk a) t m1 v
   | eval_Estore:
-      forall sp le e m chunk addr al b t t1 m1 vl t2 m2 m3 v a,
-      eval_exprlist sp le e m al t1 m1 vl ->
-      eval_expr sp le e m1 b t2 m2 v ->
-      eval_addressing ge sp addr vl = Some a ->
-      Mem.storev chunk m2 a v = Some m3 ->
+      forall sp le e m chunk a1 a2 t t1 m1 v1 t2 m2 v2 m3,
+      eval_expr sp le e m a1 t1 m1 v1 ->
+      eval_expr sp le e m1 a2 t2 m2 v2 ->
+      Mem.storev chunk m2 v1 v2 = Some m3 ->
       t = t1 ** t2 ->
-      eval_expr sp le e m (Estore chunk addr al b) t m3 v
+      eval_expr sp le e m (Estore chunk a1 a2) t m3 v2
   | eval_Ecall:
       forall sp le e m sig a bl t t1 m1 t2 m2 t3 m3 vf vargs vres f,
       eval_expr sp le e m a t1 m1 vf ->
@@ -201,11 +345,12 @@ Inductive eval_expr:
       t = t1 ** t2 ** t3 ->
       eval_expr sp le e m (Ecall sig a bl) t m3 vres
   | eval_Econdition:
-      forall sp le e m a b c t t1 m1 v1 t2 m2 v2,
-      eval_condexpr sp le e m a t1 m1 v1 ->
-      eval_expr sp le e m1 (if v1 then b else c) t2 m2 v2 ->
+      forall sp le e m a1 a2 a3 t t1 m1 v1 b1 t2 m2 v2,
+      eval_expr sp le e m a1 t1 m1 v1 ->
+      Val.bool_of_val v1 b1 ->
+      eval_expr sp le e m1 (if b1 then a2 else a3) t2 m2 v2 ->
       t = t1 ** t2 ->
-      eval_expr sp le e m (Econdition a b c) t m2 v2
+      eval_expr sp le e m (Econdition a1 a2 a3) t m2 v2
   | eval_Elet:
       forall sp le e m a b t t1 m1 v1 t2 m2 v2,
       eval_expr sp le e m a t1 m1 v1 ->
@@ -222,33 +367,6 @@ Inductive eval_expr:
       Mem.alloc m1 0 (Int.signed n) = (m2, b) ->
       eval_expr sp le e m (Ealloc a) t m2 (Vptr b Int.zero)
 
-(** Evaluation of a condition expression:
-  [eval_condexpr ge sp le e m a m' b]
-  states that condition expression [a] evaluates to the boolean value [b].
-  The other parameters are as in [eval_expr].
-*)
-
-with eval_condexpr:
-         val -> letenv -> env ->
-         mem -> condexpr -> trace -> mem -> bool -> Prop :=
-  | eval_CEtrue:
-      forall sp le e m,
-      eval_condexpr sp le e m CEtrue E0 m true
-  | eval_CEfalse:
-      forall sp le e m,
-      eval_condexpr sp le e m CEfalse E0 m false
-  | eval_CEcond:
-      forall sp le e m cond al t1 m1 vl b,
-      eval_exprlist sp le e m al t1 m1 vl ->
-      eval_condition cond vl = Some b ->
-      eval_condexpr sp le e m (CEcond cond al) t1 m1 b
-  | eval_CEcondition:
-      forall sp le e m a b c t t1 m1 vb1 t2 m2 vb2,
-      eval_condexpr sp le e m a t1 m1 vb1 ->
-      eval_condexpr sp le e m1 (if vb1 then b else c) t2 m2 vb2 ->
-      t = t1 ** t2 ->
-      eval_condexpr sp le e m (CEcondition a b c) t m2 vb2
-
 (** Evaluation of a list of expressions:
   [eval_exprlist ge sp le al m a m' vl]
   states that the list [al] of expressions evaluate 
@@ -272,6 +390,7 @@ with eval_exprlist:
 (** Evaluation of a function invocation: [eval_funcall ge m f args m' res]
   means that the function [f], applied to the arguments [args] in
   memory state [m], returns the value [res] in modified memory state [m'].
+  [t] is the trace of observable events generated during the invocation.
 *)
 
 with eval_funcall:
@@ -283,7 +402,7 @@ with eval_funcall:
       set_locals f.(fn_vars) (set_params vargs f.(fn_params)) = e ->
       exec_stmt (Vptr sp Int.zero) e m1 f.(fn_body) t e2 m2 out ->
       outcome_result_value out f.(fn_sig).(sig_res) vres ->
-      eval_funcall m (Internal f) vargs t (Mem.free m2 sp) vres
+      eval_funcall m (Internal f) vargs t (outcome_free_mem out m2 sp) vres
   | eval_funcall_external:
       forall ef m args t res,
       event_match ef args t res ->
@@ -291,7 +410,10 @@ with eval_funcall:
 
 (** Execution of a statement: [exec_stmt ge sp e m s e' m' out]
   means that statement [s] executes with outcome [out].
-  The other parameters are as in [eval_expr]. *)
+  [e] is the initial environment and [m] is the initial memory state.
+  [e'] is the final environment, reflecting variable assignments performed
+  by [s].  [m'] is the final memory state, reflecting memory stores
+  performed by [s].  The other parameters are as in [eval_expr]. *)
 
 with exec_stmt:
          val ->
@@ -309,9 +431,10 @@ with exec_stmt:
       eval_expr sp nil e m a t m1 v ->
       exec_stmt sp e m (Sassign id a) t (PTree.set id v e) m1 Out_normal
   | exec_Sifthenelse:
-      forall sp e m a s1 s2 t t1 m1 v1 t2 e2 m2 out,
-      eval_condexpr sp nil e m a t1 m1 v1 ->
-      exec_stmt sp e m1 (if v1 then s1 else s2) t2 e2 m2 out ->
+      forall sp e m a s1 s2 t t1 m1 v1 b1 t2 e2 m2 out,
+      eval_expr sp nil e m a t1 m1 v1 ->
+      Val.bool_of_val v1 b1 ->
+      exec_stmt sp e m1 (if b1 then s1 else s2) t2 e2 m2 out ->
       t = t1 ** t2 ->
       exec_stmt sp e m (Sifthenelse a s1 s2) t e2 m2 out
   | exec_Sseq_continue:
@@ -354,13 +477,29 @@ with exec_stmt:
   | exec_Sreturn_some:
       forall sp e m a t m1 v,
       eval_expr sp nil e m a t m1 v ->
-      exec_stmt sp e m (Sreturn (Some a)) t e m1 (Out_return (Some v)).
+      exec_stmt sp e m (Sreturn (Some a)) t e m1 (Out_return (Some v))
+  | exec_Stailcall:
+      forall sp e m sig a bl t t1 m1 t2 m2 t3 m3 vf vargs vres f,
+      eval_expr (Vptr sp Int.zero) nil e m a t1 m1 vf ->
+      eval_exprlist (Vptr sp Int.zero) nil e m1 bl t2 m2 vargs ->
+      Genv.find_funct ge vf = Some f ->
+      funsig f = sig ->
+      eval_funcall (Mem.free m2 sp) f vargs t3 m3 vres ->
+      t = t1 ** t2 ** t3 ->
+      exec_stmt (Vptr sp Int.zero) e m (Stailcall sig a bl) t e m3 (Out_tailcall_return vres).
+
+Scheme eval_expr_ind4 := Minimality for eval_expr Sort Prop
+  with eval_exprlist_ind4 := Minimality for eval_exprlist Sort Prop
+  with eval_funcall_ind4 := Minimality for eval_funcall Sort Prop
+  with exec_stmt_ind4 := Minimality for exec_stmt Sort Prop.
 
 End RELSEM.
 
-(** Execution of a whole program: [exec_program p r]
+(** Execution of a whole program: [exec_program p t r]
   holds if the application of [p]'s main function to no arguments
-  in the initial memory state for [p] eventually returns value [r]. *)
+  in the initial memory state for [p] performs the input/output
+  operations described in the trace [t], and eventually returns value [r].
+*)
 
 Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
   let ge := Genv.globalenv p in
diff --git a/backend/CminorSel.v b/backend/CminorSel.v
new file mode 100644
index 00000000..331105ea
--- /dev/null
+++ b/backend/CminorSel.v
@@ -0,0 +1,296 @@
+(** The Cminor language after instruction selection. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Events.
+Require Import Values.
+Require Import Mem.
+Require Import Cminor.
+Require Import Op.
+Require Import Globalenvs.
+Require Import Switch.
+
+(** * Abstract syntax *)
+
+(** CminorSel programs share the general structure of Cminor programs:
+  functions, statements and expressions.  However, CminorSel uses
+  machine-dependent operations, addressing modes and conditions,
+  as defined in module [Op] and used in lower-level intermediate
+  languages ([RTL] and below).  Moreover, a variant [condexpr] of [expr]
+  is used to represent expressions which are evaluated for their
+  boolean value only and not their exact value.
+*)
+
+Inductive expr : Set :=
+  | Evar : ident -> expr
+  | Eop : operation -> exprlist -> expr
+  | Eload : memory_chunk -> addressing -> exprlist -> expr
+  | Estore : memory_chunk -> addressing -> exprlist -> expr -> expr
+  | Ecall : signature -> expr -> exprlist -> expr
+  | Econdition : condexpr -> expr -> expr -> expr
+  | Elet : expr -> expr -> expr
+  | Eletvar : nat -> expr
+  | Ealloc : expr -> expr
+
+with condexpr : Set :=
+  | CEtrue: condexpr
+  | CEfalse: condexpr
+  | CEcond: condition -> exprlist -> condexpr
+  | CEcondition : condexpr -> condexpr -> condexpr -> condexpr
+
+with exprlist : Set :=
+  | Enil: exprlist
+  | Econs: expr -> exprlist -> exprlist.
+
+(** Statements are as in Cminor, except that the condition of an
+  if/then/else conditional is a [condexpr]. *)
+
+Inductive stmt : Set :=
+  | Sskip: stmt
+  | Sexpr: expr -> stmt
+  | Sassign : ident -> expr -> stmt
+  | Sseq: stmt -> stmt -> stmt
+  | Sifthenelse: condexpr -> stmt -> stmt -> stmt
+  | Sloop: stmt -> stmt
+  | Sblock: stmt -> stmt
+  | Sexit: nat -> stmt
+  | Sswitch: expr -> list (int * nat) -> nat -> stmt
+  | Sreturn: option expr -> stmt
+  | Stailcall: signature -> expr -> exprlist -> stmt.
+
+Record function : Set := mkfunction {
+  fn_sig: signature;
+  fn_params: list ident;
+  fn_vars: list ident;
+  fn_stackspace: Z;
+  fn_body: stmt
+}.
+
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+
+Definition funsig (fd: fundef) :=
+  match fd with
+  | Internal f => f.(fn_sig)
+  | External ef => ef.(ef_sig)
+  end.
+
+(** * Operational semantics *)
+
+(** Three kinds of evaluation environments are involved:
+- [genv]: global environments, define symbols and functions;
+- [env]: local environments, map local variables to values;
+- [lenv]: let environments, map de Bruijn indices to values.
+*)
+
+Definition genv := Genv.t fundef.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+(** The evaluation predicates have the same general shape as those
+    of Cminor.  Refer to the description of Cminor semantics for
+    the meaning of the parameters of the predicates.
+    One additional predicate is introduced:
+    [eval_condexpr ge sp le e m a t m' b], meaning that the conditional
+    expression [a] evaluates to the boolean [b]. *)
+
+Inductive eval_expr:
+         val -> letenv -> env ->
+         mem -> expr -> trace -> mem -> val -> Prop :=
+  | eval_Evar:
+      forall sp le e m id v,
+      PTree.get id e = Some v ->
+      eval_expr sp le e m (Evar id) E0 m v
+  | eval_Eop:
+      forall sp le e m op al t m1 vl v,
+      eval_exprlist sp le e m al t m1 vl ->
+      eval_operation ge sp op vl m1 = Some v ->
+      eval_expr sp le e m (Eop op al) t m1 v
+  | eval_Eload:
+      forall sp le e m chunk addr al t m1 v vl a,
+      eval_exprlist sp le e m al t m1 vl ->
+      eval_addressing ge sp addr vl = Some a ->
+      Mem.loadv chunk m1 a = Some v ->
+      eval_expr sp le e m (Eload chunk addr al) t m1 v
+  | eval_Estore:
+      forall sp le e m chunk addr al b t t1 m1 vl t2 m2 m3 v a,
+      eval_exprlist sp le e m al t1 m1 vl ->
+      eval_expr sp le e m1 b t2 m2 v ->
+      eval_addressing ge sp addr vl = Some a ->
+      Mem.storev chunk m2 a v = Some m3 ->
+      t = t1 ** t2 ->
+      eval_expr sp le e m (Estore chunk addr al b) t m3 v
+  | eval_Ecall:
+      forall sp le e m sig a bl t t1 m1 t2 m2 t3 m3 vf vargs vres f,
+      eval_expr sp le e m a t1 m1 vf ->
+      eval_exprlist sp le e m1 bl t2 m2 vargs ->
+      Genv.find_funct ge vf = Some f ->
+      funsig f = sig ->
+      eval_funcall m2 f vargs t3 m3 vres ->
+      t = t1 ** t2 ** t3 ->
+      eval_expr sp le e m (Ecall sig a bl) t m3 vres
+  | eval_Econdition:
+      forall sp le e m a b c t t1 m1 v1 t2 m2 v2,
+      eval_condexpr sp le e m a t1 m1 v1 ->
+      eval_expr sp le e m1 (if v1 then b else c) t2 m2 v2 ->
+      t = t1 ** t2 ->
+      eval_expr sp le e m (Econdition a b c) t m2 v2
+  | eval_Elet:
+      forall sp le e m a b t t1 m1 v1 t2 m2 v2,
+      eval_expr sp le e m a t1 m1 v1 ->
+      eval_expr sp (v1::le) e m1 b t2 m2 v2 ->
+      t = t1 ** t2 ->
+      eval_expr sp le e m (Elet a b) t m2 v2
+  | eval_Eletvar:
+      forall sp le e m n v,
+      nth_error le n = Some v ->
+      eval_expr sp le e m (Eletvar n) E0 m v
+  | eval_Ealloc:
+      forall sp le e m a t m1 n m2 b,
+      eval_expr sp le e m a t m1 (Vint n) ->
+      Mem.alloc m1 0 (Int.signed n) = (m2, b) ->
+      eval_expr sp le e m (Ealloc a) t m2 (Vptr b Int.zero)
+
+with eval_condexpr:
+         val -> letenv -> env ->
+         mem -> condexpr -> trace -> mem -> bool -> Prop :=
+  | eval_CEtrue:
+      forall sp le e m,
+      eval_condexpr sp le e m CEtrue E0 m true
+  | eval_CEfalse:
+      forall sp le e m,
+      eval_condexpr sp le e m CEfalse E0 m false
+  | eval_CEcond:
+      forall sp le e m cond al t1 m1 vl b,
+      eval_exprlist sp le e m al t1 m1 vl ->
+      eval_condition cond vl m1 = Some b ->
+      eval_condexpr sp le e m (CEcond cond al) t1 m1 b
+  | eval_CEcondition:
+      forall sp le e m a b c t t1 m1 vb1 t2 m2 vb2,
+      eval_condexpr sp le e m a t1 m1 vb1 ->
+      eval_condexpr sp le e m1 (if vb1 then b else c) t2 m2 vb2 ->
+      t = t1 ** t2 ->
+      eval_condexpr sp le e m (CEcondition a b c) t m2 vb2
+
+with eval_exprlist:
+         val -> letenv -> env ->
+         mem -> exprlist -> trace -> mem -> list val -> Prop :=
+  | eval_Enil:
+      forall sp le e m,
+      eval_exprlist sp le e m Enil E0 m nil
+  | eval_Econs:
+      forall sp le e m a bl t t1 m1 v t2 m2 vl,
+      eval_expr sp le e m a t1 m1 v ->
+      eval_exprlist sp le e m1 bl t2 m2 vl ->
+      t = t1 ** t2 ->
+      eval_exprlist sp le e m (Econs a bl) t m2 (v :: vl)
+
+with eval_funcall:
+        mem -> fundef -> list val -> trace ->
+        mem -> val -> Prop :=
+  | eval_funcall_internal:
+      forall m f vargs m1 sp e t e2 m2 out vres,
+      Mem.alloc m 0 f.(fn_stackspace) = (m1, sp) ->
+      set_locals f.(fn_vars) (set_params vargs f.(fn_params)) = e ->
+      exec_stmt (Vptr sp Int.zero) e m1 f.(fn_body) t e2 m2 out ->
+      outcome_result_value out f.(fn_sig).(sig_res) vres ->
+      eval_funcall m (Internal f) vargs t (outcome_free_mem out m2 sp) vres
+  | eval_funcall_external:
+      forall ef m args t res,
+      event_match ef args t res ->
+      eval_funcall m (External ef) args t m res
+
+with exec_stmt:
+         val ->
+         env -> mem -> stmt -> trace ->
+         env -> mem -> outcome -> Prop :=
+  | exec_Sskip:
+      forall sp e m,
+      exec_stmt sp e m Sskip E0 e m Out_normal
+  | exec_Sexpr:
+      forall sp e m a t m1 v,
+      eval_expr sp nil e m a t m1 v ->
+      exec_stmt sp e m (Sexpr a) t e m1 Out_normal
+  | exec_Sassign:
+      forall sp e m id a t m1 v,
+      eval_expr sp nil e m a t m1 v ->
+      exec_stmt sp e m (Sassign id a) t (PTree.set id v e) m1 Out_normal
+  | exec_Sifthenelse:
+      forall sp e m a s1 s2 t t1 m1 v1 t2 e2 m2 out,
+      eval_condexpr sp nil e m a t1 m1 v1 ->
+      exec_stmt sp e m1 (if v1 then s1 else s2) t2 e2 m2 out ->
+      t = t1 ** t2 ->
+      exec_stmt sp e m (Sifthenelse a s1 s2) t e2 m2 out
+  | exec_Sseq_continue:
+      forall sp e m t s1 t1 e1 m1 s2 t2 e2 m2 out,
+      exec_stmt sp e m s1 t1 e1 m1 Out_normal ->
+      exec_stmt sp e1 m1 s2 t2 e2 m2 out ->
+      t = t1 ** t2 ->
+      exec_stmt sp e m (Sseq s1 s2) t e2 m2 out
+  | exec_Sseq_stop:
+      forall sp e m t s1 s2 e1 m1 out,
+      exec_stmt sp e m s1 t e1 m1 out ->
+      out <> Out_normal ->
+      exec_stmt sp e m (Sseq s1 s2) t e1 m1 out
+  | exec_Sloop_loop:
+      forall sp e m s t t1 e1 m1 t2 e2 m2 out,
+      exec_stmt sp e m s t1 e1 m1 Out_normal ->
+      exec_stmt sp e1 m1 (Sloop s) t2 e2 m2 out ->
+      t = t1 ** t2 ->
+      exec_stmt sp e m (Sloop s) t e2 m2 out
+  | exec_Sloop_stop:
+      forall sp e m t s e1 m1 out,
+      exec_stmt sp e m s t e1 m1 out ->
+      out <> Out_normal ->
+      exec_stmt sp e m (Sloop s) t e1 m1 out
+  | exec_Sblock:
+      forall sp e m s t e1 m1 out,
+      exec_stmt sp e m s t e1 m1 out ->
+      exec_stmt sp e m (Sblock s) t e1 m1 (outcome_block out)
+  | exec_Sexit:
+      forall sp e m n,
+      exec_stmt sp e m (Sexit n) E0 e m (Out_exit n)
+  | exec_Sswitch:
+      forall sp e m a cases default t1 m1 n,
+      eval_expr sp nil e m a t1 m1 (Vint n) ->
+      exec_stmt sp e m (Sswitch a cases default)
+                t1 e m1 (Out_exit (switch_target n default cases))
+  | exec_Sreturn_none:
+      forall sp e m,
+      exec_stmt sp e m (Sreturn None) E0 e m (Out_return None)
+  | exec_Sreturn_some:
+      forall sp e m a t m1 v,
+      eval_expr sp nil e m a t m1 v ->
+      exec_stmt sp e m (Sreturn (Some a)) t e m1 (Out_return (Some v))
+  | exec_Stailcall:
+      forall sp e m sig a bl t t1 m1 t2 m2 t3 m3 vf vargs vres f,
+      eval_expr (Vptr sp Int.zero) nil e m a t1 m1 vf ->
+      eval_exprlist (Vptr sp Int.zero) nil e m1 bl t2 m2 vargs ->
+      Genv.find_funct ge vf = Some f ->
+      funsig f = sig ->
+      eval_funcall (Mem.free m2 sp) f vargs t3 m3 vres ->
+      t = t1 ** t2 ** t3 ->
+      exec_stmt (Vptr sp Int.zero) e m (Stailcall sig a bl) t e m3 (Out_tailcall_return vres).
+
+Scheme eval_expr_ind5 := Minimality for eval_expr Sort Prop
+  with eval_condexpr_ind5 := Minimality for eval_condexpr Sort Prop
+  with eval_exprlist_ind5 := Minimality for eval_exprlist Sort Prop
+  with eval_funcall_ind5 := Minimality for eval_funcall Sort Prop
+  with exec_stmt_ind5 := Minimality for exec_stmt Sort Prop.
+
+End RELSEM.
+
+Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
+  let ge := Genv.globalenv p in
+  let m0 := Genv.init_mem p in
+  exists b, exists f, exists m,
+  Genv.find_symbol ge p.(prog_main) = Some b /\
+  Genv.find_funct_ptr ge b = Some f /\
+  funsig f = mksignature nil (Some Tint) /\
+  eval_funcall ge m0 f nil t m r.
+
diff --git a/backend/Coloring.v b/backend/Coloring.v
index 0b8a4ccc..57f7d594 100644
--- a/backend/Coloring.v
+++ b/backend/Coloring.v
@@ -150,6 +150,8 @@ Definition add_edges_instr
           (add_interf_op res live
             (add_interf_call
               (Regset.remove res live) destroyed_at_call_regs g)))
+  | Itailcall sig ros args =>
+      add_prefs_call args (loc_arguments sig) g
   | Ialloc arg res s =>
       add_pref_mreg arg loc_alloc_argument
         (add_pref_mreg res loc_alloc_result
diff --git a/backend/Coloringproof.v b/backend/Coloringproof.v
index f3801d07..ce24030d 100644
--- a/backend/Coloringproof.v
+++ b/backend/Coloringproof.v
@@ -325,6 +325,7 @@ Proof.
   eapply graph_incl_trans; [idtac|apply add_pref_mreg_incl].
   eapply graph_incl_trans; [idtac|apply add_interf_op_incl].
   apply add_interf_call_incl.
+  apply add_prefs_call_incl.
   eapply graph_incl_trans; [idtac|apply add_pref_mreg_incl].
   eapply graph_incl_trans; [idtac|apply add_pref_mreg_incl].
   eapply graph_incl_trans; [idtac|apply add_interf_op_incl].
diff --git a/backend/Constprop.v b/backend/Constprop.v
index d34c6eed..fecfb19f 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -632,6 +632,8 @@ Definition transfer (f: function) (pc: node) (before: D.t) :=
           before
       | Icall sig ros args res s =>
           D.set res Unknown before
+      | Itailcall sig ros args =>
+          before
       | Ialloc arg res s =>
           D.set res Unknown before
       | Icond cond args ifso ifnot =>
@@ -649,9 +651,12 @@ Definition transfer (f: function) (pc: node) (before: D.t) :=
 
 Module DS := Dataflow_Solver(D)(NodeSetForward).
 
-Definition analyze (f: RTL.function): option (PMap.t D.t) :=
-  DS.fixpoint (successors f) f.(fn_nextpc) (transfer f) 
-              ((f.(fn_entrypoint), D.top) :: nil).
+Definition analyze (f: RTL.function): PMap.t D.t :=
+  match DS.fixpoint (successors f) f.(fn_nextpc) (transfer f) 
+                    ((f.(fn_entrypoint), D.top) :: nil) with
+  | None => PMap.init D.top
+  | Some res => res
+  end.
 
 (** * Code transformation *)
 
@@ -986,6 +991,16 @@ End STRENGTH_REDUCTION.
     and similarly for the addressing modes of load and store instructions.
     Other instructions are unchanged. *)
 
+Definition transf_ros (approx: D.t) (ros: reg + ident) : reg + ident :=
+  match ros with
+  | inl r =>
+      match D.get r approx with
+      | S symb ofs => if Int.eq ofs Int.zero then inr _ symb else ros
+      | _ => ros
+      end
+  | inr s => ros
+  end.
+
 Definition transf_instr (approx: D.t) (instr: instruction) :=
   match instr with
   | Iop op args res s =>
@@ -1007,16 +1022,9 @@ Definition transf_instr (approx: D.t) (instr: instruction) :=
       let (addr', args') := addr_strength_reduction approx addr args in
       Istore chunk addr' args' src s      
   | Icall sig ros args res s =>
-      let ros' :=
-        match ros with
-        | inl r =>
-            match D.get r approx with
-            | S symb ofs => if Int.eq ofs Int.zero then inr _ symb else ros
-            | _ => ros
-            end
-        | inr s => ros
-        end in
-      Icall sig ros' args res s
+      Icall sig (transf_ros approx ros) args res s
+  | Itailcall sig ros args =>
+      Itailcall sig (transf_ros approx ros) args
   | Ialloc arg res s =>
       Ialloc arg res s
   | Icond cond args s1 s2 =>
@@ -1048,20 +1056,18 @@ Proof.
 Qed.
 
 Definition transf_function (f: function) : function :=
-  match analyze f with
-  | None => f
-  | Some approxs =>
-      mkfunction
-        f.(fn_sig)
-        f.(fn_params)
-        f.(fn_stacksize)
-        (transf_code approxs f.(fn_code))
-        f.(fn_entrypoint)
-        f.(fn_nextpc)
-        (transf_code_wf f approxs f.(fn_code_wf))
-  end.
+  let approxs := analyze f in
+  mkfunction
+    f.(fn_sig)
+    f.(fn_params)
+    f.(fn_stacksize)
+    (transf_code approxs f.(fn_code))
+    f.(fn_entrypoint)
+    f.(fn_nextpc)
+    (transf_code_wf f approxs f.(fn_code_wf)).
 
-Definition transf_fundef := AST.transf_fundef transf_function.
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
 
 Definition transf_program (p: program) : program :=
   transform_program transf_fundef p.
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index ee241873..dfa828bf 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -9,6 +9,7 @@ Require Import Values.
 Require Import Events.
 Require Import Mem.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
@@ -41,6 +42,13 @@ Definition val_match_approx (a: approx) (v: val) : Prop :=
 Definition regs_match_approx (a: D.t) (rs: regset) : Prop :=
   forall r, val_match_approx (D.get r a) rs#r.
 
+Lemma regs_match_approx_top:
+  forall rs, regs_match_approx D.top rs.
+Proof.
+  intros. red; intros. simpl. rewrite PTree.gempty. 
+  unfold Approx.top, val_match_approx. auto.
+Qed.
+
 Lemma val_match_approx_increasing:
   forall a1 a2 v,
   Approx.ge a1 a2 -> val_match_approx a2 v -> val_match_approx a1 v.
@@ -123,10 +131,10 @@ Ltac InvVLMA :=
   approximations returned by [eval_static_operation]. *)
 
 Lemma eval_static_condition_correct:
-  forall cond al vl b,
+  forall cond al vl m b,
   val_list_match_approx al vl ->
   eval_static_condition cond al = Some b ->
-  eval_condition cond vl = Some b.
+  eval_condition cond vl m = Some b.
 Proof.
   intros until b.
   unfold eval_static_condition. 
@@ -135,9 +143,9 @@ Proof.
 Qed.
 
 Lemma eval_static_operation_correct:
-  forall op sp al vl v,
+  forall op sp al vl m v,
   val_list_match_approx al vl ->
-  eval_operation ge sp op vl = Some v ->
+  eval_operation ge sp op vl m = Some v ->
   val_match_approx (eval_static_operation op al) v.
 Proof.
   intros until v.
@@ -183,7 +191,7 @@ Proof.
   rewrite <- H3. replace v0 with (Vfloat n1). reflexivity. congruence.
 
   caseEq (eval_static_condition c vl0).
-  intros. generalize (eval_static_condition_correct _ _ _ _ H H1).
+  intros. generalize (eval_static_condition_correct _ _ _ m _ H H1).
   intro. rewrite H2 in H0. 
   destruct b; injection H0; intro; subst v; simpl; auto.
   intros; simpl; auto.
@@ -194,80 +202,40 @@ Proof.
   auto.
 Qed.
 
-(** The correctness of the transfer function follows: if the register
-  state before a transition matches the static approximations at that
-  program point, the register state after that transition matches
-  the static approximation returned by the transfer function. *)
-
-Lemma transfer_correct:
-  forall f c sp pc rs m t pc' rs' m' ra,
-  exec_instr ge c sp pc rs m t pc' rs' m' ->
-  c = f.(fn_code) ->
-  regs_match_approx ra rs ->
-  regs_match_approx (transfer f pc ra) rs'.
-Proof.
-  induction 1; intros; subst c; unfold transfer; rewrite H; auto.
-  (* Iop *)
-  apply regs_match_approx_update. 
-  apply eval_static_operation_correct with sp rs##args. 
-  eapply approx_regs_val_list. auto. auto. auto.
-  (* Iload *)
-  apply regs_match_approx_update; auto. simpl; auto.
-  (* Icall *)
-  apply regs_match_approx_update; auto. simpl; auto.
-  (* Ialloc *)
-  apply regs_match_approx_update; auto. simpl; auto.
-Qed.
-
 (** The correctness of the static analysis follows from the results
   above and the fact that the result of the static analysis is
   a solution of the forward dataflow inequations. *)
 
 Lemma analyze_correct_1:
-  forall f approxs,
-  analyze f = Some approxs ->
-  forall c sp pc rs m t pc' rs' m',
-  exec_instr ge c sp pc rs m t pc' rs' m' ->
-  c = f.(fn_code) ->
-  regs_match_approx approxs!!pc rs ->
-  regs_match_approx approxs!!pc' rs'.
+  forall f pc rs pc',
+  In pc' (successors f pc) ->
+  regs_match_approx (transfer f pc (analyze f)!!pc) rs ->
+  regs_match_approx (analyze f)!!pc' rs.
 Proof.
-  intros. 
+  intros until pc'. unfold analyze. 
+  caseEq (DS.fixpoint (successors f) (fn_nextpc f) (transfer f)
+                      ((fn_entrypoint f, D.top) :: nil)).
+  intros approxs; intros.
   apply regs_match_approx_increasing with (transfer f pc approxs!!pc).
-  eapply DS.fixpoint_solution. 
-  unfold analyze in H. eexact H. 
+  eapply DS.fixpoint_solution; eauto.
   elim (fn_code_wf f pc); intro. auto. 
-  generalize (exec_instr_present _ _ _ _ _ _ _ _ _ _ H0).
-  rewrite H1. intro. contradiction.
-  eapply successors_correct. rewrite <- H1. eauto.
-  eapply transfer_correct; eauto.
-Qed.
-
-Lemma analyze_correct_2:
-  forall f approxs,
-  analyze f = Some approxs ->
-  forall c sp pc rs m t pc' rs' m',
-  exec_instrs ge c sp pc rs m t pc' rs' m' ->
-  c = f.(fn_code) ->
-  regs_match_approx approxs!!pc rs ->
-  regs_match_approx approxs!!pc' rs'.
-Proof.
-  intros f approxs ANL. induction 1.
+  unfold successors in H0; rewrite H2 in H0; simpl; contradiction.
   auto.
-  intros. eapply analyze_correct_1; eauto.
-  eauto.
+  intros. rewrite PMap.gi. apply regs_match_approx_top. 
 Qed.
 
 Lemma analyze_correct_3:
-  forall f approxs rs,
-  analyze f = Some approxs ->
-  regs_match_approx approxs!!(f.(fn_entrypoint)) rs.
+  forall f rs,
+  regs_match_approx (analyze f)!!(f.(fn_entrypoint)) rs.
 Proof.
-  intros. 
+  intros. unfold analyze. 
+  caseEq (DS.fixpoint (successors f) (fn_nextpc f) (transfer f)
+                      ((fn_entrypoint f, D.top) :: nil)).
+  intros approxs; intros.
   apply regs_match_approx_increasing with D.top.
-  eapply DS.fixpoint_entry. unfold analyze in H; eexact H.
-  auto with coqlib.
-  red; intros. rewrite D.get_top. simpl; auto.
+  eapply DS.fixpoint_entry; eauto. auto with coqlib.
+  apply regs_match_approx_top. 
+  intros. rewrite PMap.gi. apply regs_match_approx_top.
 Qed.
 
 (** * Correctness of strength reduction *)
@@ -296,9 +264,9 @@ Proof.
 Qed.
 
 Lemma cond_strength_reduction_correct:
-  forall cond args,
+  forall cond args m,
   let (cond', args') := cond_strength_reduction approx cond args in
-  eval_condition cond' rs##args' = eval_condition cond rs##args.
+  eval_condition cond' rs##args' m = eval_condition cond rs##args m.
 Proof.
   intros. unfold cond_strength_reduction.
   case (cond_strength_reduction_match cond args); intros.
@@ -312,7 +280,6 @@ Proof.
   caseEq (intval approx r1); intros.
   simpl. rewrite (intval_correct _ _ H). 
   destruct (rs#r2); auto. rewrite Int.swap_cmpu. auto.
-  destruct c; reflexivity.
   caseEq (intval approx r2); intros.
   simpl. rewrite (intval_correct _ _ H0). auto.
   auto.
@@ -320,10 +287,10 @@ Proof.
 Qed.
 
 Lemma make_addimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_addimm n r in
-  eval_operation ge sp Oadd (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oadd (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_addimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -333,10 +300,10 @@ Proof.
 Qed.
   
 Lemma make_shlimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_shlimm n r in
-  eval_operation ge sp Oshl (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oshl (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_shlimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -347,10 +314,10 @@ Proof.
 Qed.
 
 Lemma make_shrimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_shrimm n r in
-  eval_operation ge sp Oshr (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oshr (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_shrimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -359,10 +326,10 @@ Proof.
 Qed.
 
 Lemma make_shruimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_shruimm n r in
-  eval_operation ge sp Oshru (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oshru (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_shruimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -373,10 +340,10 @@ Proof.
 Qed.
 
 Lemma make_mulimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_mulimm n r in
-  eval_operation ge sp Omul (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Omul (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_mulimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -384,8 +351,8 @@ Proof.
   generalize (Int.eq_spec n Int.one); case (Int.eq n Int.one); intros.
   subst n. simpl in H1. simpl. FuncInv. rewrite Int.mul_one in H0. congruence.
   caseEq (Int.is_power2 n); intros.
-  replace (eval_operation ge sp Omul (rs # r :: Vint n :: nil))
-     with (eval_operation ge sp Oshl (rs # r :: Vint i :: nil)).
+  replace (eval_operation ge sp Omul (rs # r :: Vint n :: nil) m)
+     with (eval_operation ge sp Oshl (rs # r :: Vint i :: nil) m).
   apply make_shlimm_correct. 
   simpl. generalize (Int.is_power2_range _ _ H1). 
   change (Z_of_nat wordsize) with 32. intro. rewrite H2.
@@ -394,10 +361,10 @@ Proof.
 Qed.
 
 Lemma make_andimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_andimm n r in
-  eval_operation ge sp Oand (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oand (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_andimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -408,10 +375,10 @@ Proof.
 Qed.
 
 Lemma make_orimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_orimm n r in
-  eval_operation ge sp Oor (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oor (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_orimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -422,10 +389,10 @@ Proof.
 Qed.
 
 Lemma make_xorimm_correct:
-  forall n r v,
+  forall n r m v,
   let (op, args) := make_xorimm n r in
-  eval_operation ge sp Oxor (rs#r :: Vint n :: nil) = Some v ->
-  eval_operation ge sp op rs##args = Some v.
+  eval_operation ge sp Oxor (rs#r :: Vint n :: nil) m = Some v ->
+  eval_operation ge sp op rs##args m = Some v.
 Proof.
   intros; unfold make_xorimm.
   generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros.
@@ -434,18 +401,18 @@ Proof.
 Qed.
 
 Lemma op_strength_reduction_correct:
-  forall op args v,
+  forall op args m v,
   let (op', args') := op_strength_reduction approx op args in
-  eval_operation ge sp op rs##args = Some v ->
-  eval_operation ge sp op' rs##args' = Some v.
+  eval_operation ge sp op rs##args m = Some v ->
+  eval_operation ge sp op' rs##args' m = Some v.
 Proof.
   intros; unfold op_strength_reduction;
   case (op_strength_reduction_match op args); intros; simpl List.map.
   (* Oadd *)
   caseEq (intval approx r1); intros.
   rewrite (intval_correct _ _ H). 
-  replace (eval_operation ge sp Oadd (Vint i :: rs # r2 :: nil))
-     with (eval_operation ge sp Oadd (rs # r2 :: Vint i :: nil)).
+  replace (eval_operation ge sp Oadd (Vint i :: rs # r2 :: nil) m)
+     with (eval_operation ge sp Oadd (rs # r2 :: Vint i :: nil) m).
   apply make_addimm_correct. 
   simpl. destruct rs#r2; auto. rewrite Int.add_commut; auto.
   caseEq (intval approx r2); intros.
@@ -456,16 +423,16 @@ Proof.
   rewrite (intval_correct _ _ H) in H0. assumption. 
   caseEq (intval approx r2); intros.
   rewrite (intval_correct _ _ H0). 
-  replace (eval_operation ge sp Osub (rs # r1 :: Vint i :: nil))
-     with (eval_operation ge sp Oadd (rs # r1 :: Vint (Int.neg i) :: nil)).
+  replace (eval_operation ge sp Osub (rs # r1 :: Vint i :: nil) m)
+     with (eval_operation ge sp Oadd (rs # r1 :: Vint (Int.neg i) :: nil) m).
   apply make_addimm_correct.
   simpl. destruct rs#r1; auto; rewrite Int.sub_add_opp; auto. 
   assumption.
   (* Omul *)
   caseEq (intval approx r1); intros.
   rewrite (intval_correct _ _ H). 
-  replace (eval_operation ge sp Omul (Vint i :: rs # r2 :: nil))
-     with (eval_operation ge sp Omul (rs # r2 :: Vint i :: nil)).
+  replace (eval_operation ge sp Omul (Vint i :: rs # r2 :: nil) m)
+     with (eval_operation ge sp Omul (rs # r2 :: Vint i :: nil) m).
   apply make_mulimm_correct. 
   simpl. destruct rs#r2; auto. rewrite Int.mul_commut; auto.
   caseEq (intval approx r2); intros.
@@ -485,8 +452,8 @@ Proof.
   caseEq (intval approx r2); intros.
   caseEq (Int.is_power2 i); intros.
   rewrite (intval_correct _ _ H).
-  replace (eval_operation ge sp Odivu (rs # r1 :: Vint i :: nil))
-     with (eval_operation ge sp Oshru (rs # r1 :: Vint i0 :: nil)).
+  replace (eval_operation ge sp Odivu (rs # r1 :: Vint i :: nil) m)
+     with (eval_operation ge sp Oshru (rs # r1 :: Vint i0 :: nil) m).
   apply make_shruimm_correct. 
   simpl. destruct rs#r1; auto. 
   change 32 with (Z_of_nat wordsize). 
@@ -499,8 +466,8 @@ Proof.
   (* Oand *)
   caseEq (intval approx r1); intros.
   rewrite (intval_correct _ _ H). 
-  replace (eval_operation ge sp Oand (Vint i :: rs # r2 :: nil))
-     with (eval_operation ge sp Oand (rs # r2 :: Vint i :: nil)).
+  replace (eval_operation ge sp Oand (Vint i :: rs # r2 :: nil) m)
+     with (eval_operation ge sp Oand (rs # r2 :: Vint i :: nil) m).
   apply make_andimm_correct. 
   simpl. destruct rs#r2; auto. rewrite Int.and_commut; auto.
   caseEq (intval approx r2); intros.
@@ -509,8 +476,8 @@ Proof.
   (* Oor *)
   caseEq (intval approx r1); intros.
   rewrite (intval_correct _ _ H). 
-  replace (eval_operation ge sp Oor (Vint i :: rs # r2 :: nil))
-     with (eval_operation ge sp Oor (rs # r2 :: Vint i :: nil)).
+  replace (eval_operation ge sp Oor (Vint i :: rs # r2 :: nil) m)
+     with (eval_operation ge sp Oor (rs # r2 :: Vint i :: nil) m).
   apply make_orimm_correct. 
   simpl. destruct rs#r2; auto. rewrite Int.or_commut; auto.
   caseEq (intval approx r2); intros.
@@ -519,8 +486,8 @@ Proof.
   (* Oxor *)
   caseEq (intval approx r1); intros.
   rewrite (intval_correct _ _ H). 
-  replace (eval_operation ge sp Oxor (Vint i :: rs # r2 :: nil))
-     with (eval_operation ge sp Oxor (rs # r2 :: Vint i :: nil)).
+  replace (eval_operation ge sp Oxor (Vint i :: rs # r2 :: nil) m)
+     with (eval_operation ge sp Oxor (rs # r2 :: Vint i :: nil) m).
   apply make_xorimm_correct. 
   simpl. destruct rs#r2; auto. rewrite Int.xor_commut; auto.
   caseEq (intval approx r2); intros.
@@ -647,261 +614,329 @@ Let tge := Genv.globalenv tprog.
 
 Lemma symbols_preserved:
   forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof (Genv.find_symbol_transf transf_fundef prog).
+Proof.
+  intros; unfold ge, tge, tprog, transf_program. 
+  apply Genv.find_symbol_transf.
+Qed.
 
 Lemma functions_translated:
-  forall (v: val) (f: RTL.fundef),
+  forall (v: val) (f: fundef),
   Genv.find_funct ge v = Some f ->
   Genv.find_funct tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog).
+Proof.  
+  intros.
+  exact (Genv.find_funct_transf transf_fundef H).
+Qed.
 
 Lemma function_ptr_translated:
-  forall (v: block) (f: RTL.fundef),
-  Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
+  forall (b: block) (f: fundef),
+  Genv.find_funct_ptr ge b = Some f ->
+  Genv.find_funct_ptr tge b = Some (transf_fundef f).
+Proof.  
+  intros. 
+  exact (Genv.find_funct_ptr_transf transf_fundef H).
+Qed.
 
-Lemma sig_translated:
-  forall (f: RTL.fundef),
+Lemma sig_function_translated:
+  forall f,
   funsig (transf_fundef f) = funsig f.
 Proof.
-  intro. case f; intros; simpl. 
-  unfold transf_function. case (analyze f0); intros; reflexivity.
-  reflexivity.
+  intros. destruct f; reflexivity.
+Qed.
+
+Lemma transf_ros_correct:
+  forall ros rs f approx,
+  regs_match_approx ge approx rs ->
+  find_function ge ros rs = Some f ->
+  find_function tge (transf_ros approx ros) rs = Some (transf_fundef f).
+Proof.
+  intros until approx; intro MATCH.
+  destruct ros; simpl.
+  intro.
+  exploit functions_translated; eauto. intro FIND.  
+  caseEq (D.get r approx); intros; auto.
+  generalize (Int.eq_spec i0 Int.zero); case (Int.eq i0 Int.zero); intros; auto.
+  generalize (MATCH r). rewrite H0. intros [b [A B]].
+  rewrite <- symbols_preserved in A.
+  rewrite B in FIND. rewrite H1 in FIND. 
+  rewrite Genv.find_funct_find_funct_ptr in FIND.
+  simpl. rewrite A. auto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
+  intro. apply function_ptr_translated. auto.
+  congruence.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument
   based on diagrams of the following form:
 <<
-        pc, rs, m ------------------- pc, rs, m
-            |                             |
-            |                             |
-            v                             v
-        pc', rs', m' ---------------- pc', rs', m'
+           st1 --------------- st2
+            |                   |
+           t|                   |t
+            |                   |
+            v                   v
+           st1'--------------- st2'
 >>
   The left vertical arrow represents a transition in the
-  original RTL code.  The top horizontal bar expresses that the values
-  of registers [rs] match their compile-time approximations at point [pc].
+  original RTL code.  The top horizontal bar is the [match_states]
+  invariant between the initial state [st1] in the original RTL code
+  and an initial state [st2] in the transformed code.
+  This invariant expresses that all code fragments appearing in [st2]
+  are obtained by [transf_code] transformation of the corresponding
+  fragments in [st1].  Moreover, the values of registers in [st1]
+  must match their compile-time approximations at the current program
+  point.
   These two parts of the diagram are the hypotheses.  In conclusions,
   we want to prove the other two parts: the right vertical arrow,
   which is a transition in the transformed RTL code, and the bottom
-  horizontal bar, which means that [rs'] matches the compile-time
-  approximations at [pc'].
-
-  To help express those diagrams, we define the following propositions
-  parameterized by the transition in the original RTL code (left arrow)
-  and summarizing the three other arrows of the diagram.  *)
-
-Definition exec_instr_prop
-        (c: code) (sp: val)
-        (pc: node) (rs: regset) (m: mem) (t: trace)
-        (pc': node) (rs': regset) (m': mem) : Prop :=
-  exec_instr tge c sp pc rs m t pc' rs' m' /\
-  forall f approxs
-         (CF: c = f.(RTL.fn_code))
-         (ANL: analyze f = Some approxs)
-         (MATCH: regs_match_approx ge approxs!!pc rs),
-  exec_instr tge (transf_code approxs c) sp pc rs m t pc' rs' m'.
-
-Definition exec_instrs_prop
-        (c: code) (sp: val)
-        (pc: node) (rs: regset) (m: mem) (t: trace)
-        (pc': node) (rs': regset) (m': mem) : Prop :=
-  exec_instrs tge c sp pc rs m t pc' rs' m' /\
-  forall f approxs
-         (CF: c = f.(RTL.fn_code))
-         (ANL: analyze f = Some approxs)
-         (MATCH: regs_match_approx ge approxs!!pc rs),
-  exec_instrs tge (transf_code approxs c) sp pc rs m t pc' rs' m'.
-
-Definition exec_function_prop
-        (f: RTL.fundef) (args: list val) (m: mem) (t: trace)
-        (res: val) (m': mem) : Prop :=
-  exec_function tge (transf_fundef f) args m t res m'.
+  horizontal bar, which means that the [match_state] predicate holds
+  between the final states [st1'] and [st2']. *)
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+   match_stackframe_intro:
+      forall res c sp pc rs f,
+      c = f.(RTL.fn_code) ->
+      (forall v, regs_match_approx ge (analyze f)!!pc (rs#res <- v)) ->
+    match_stackframes
+        (Stackframe res c sp pc rs)
+        (Stackframe res (transf_code (analyze f) c) sp pc rs).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s c sp pc rs m f s'
+           (CF: c = f.(RTL.fn_code))
+           (MATCH: regs_match_approx ge (analyze f)!!pc rs)
+           (STACKS: list_forall2 match_stackframes s s'),
+      match_states (State s c sp pc rs m)
+                   (State s' (transf_code (analyze f) c) sp pc rs m)
+  | match_states_call:
+      forall s f args m s',
+      list_forall2 match_stackframes s s' ->
+      match_states (Callstate s f args m)
+                   (Callstate s' (transf_fundef f) args m)
+  | match_states_return:
+      forall s s' v m,
+      list_forall2 match_stackframes s s' ->
+      match_states (Returnstate s v m)
+                   (Returnstate s' v m).
 
 Ltac TransfInstr :=
   match goal with
-  | H1: (PTree.get ?pc ?c = Some ?instr),
-    H2: (analyze ?f = Some ?approxs) |- _ =>
-      cut ((transf_code approxs c)!pc = Some(transf_instr approxs!!pc instr));
+  | H1: (PTree.get ?pc ?c = Some ?instr), f: function |- _ =>
+      cut ((transf_code (analyze f) c)!pc = Some(transf_instr (analyze f)!!pc instr));
       [ simpl
       | unfold transf_code; rewrite PTree.gmap; 
         unfold option_map; rewrite H1; reflexivity ]
   end.
 
-(** The predicates above serve as induction hypotheses in the proof of
-  simulation, which proceeds by induction over the
-  evaluation derivation of the original code. *)
+(** The proof of simulation proceeds by case analysis on the transition
+  taken in the source code. *)
 
-Lemma transf_funct_correct:
-  forall f args m t res m',
-  exec_function ge f args m t res m' ->
-  exec_function_prop f args m t res m'.
+Lemma transf_step_correct:
+  forall s1 t s2,
+  step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  exists s2', step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
-  apply (exec_function_ind_3 ge
-          exec_instr_prop exec_instrs_prop exec_function_prop);
-  intros; red.
+  induction 1; intros; inv MS.
+
   (* Inop *)
-  split; [idtac| intros; TransfInstr].
-  apply exec_Inop; auto.
-  intros; apply exec_Inop; auto.
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' rs m); split.
+  TransfInstr; intro. eapply exec_Inop; eauto.
+  econstructor; eauto.
+  eapply analyze_correct_1 with (pc := pc); eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. auto.
+
   (* Iop *)
-  split; [idtac| intros; TransfInstr].
-  apply exec_Iop with op args. auto. 
-  rewrite (eval_operation_preserved symbols_preserved). auto.
-  caseEq (op_strength_reduction approxs!!pc op args);
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' (rs#res <- v) m); split.
+  TransfInstr. caseEq (op_strength_reduction (analyze f)!!pc op args);
   intros op' args' OSR.
-  assert (eval_operation tge sp op' rs##args' = Some v).
+  assert (eval_operation tge sp op' rs##args' m = Some v).
     rewrite (eval_operation_preserved symbols_preserved). 
-    generalize (op_strength_reduction_correct ge approxs!!pc sp rs
-                  MATCH op args v).
+    generalize (op_strength_reduction_correct ge (analyze f)!!pc sp rs
+                  MATCH op args m v).
     rewrite OSR; simpl. auto.
   generalize (eval_static_operation_correct ge op sp
-               (approx_regs args approxs!!pc) rs##args v
+               (approx_regs args (analyze f)!!pc) rs##args m v
                (approx_regs_val_list _ _ _ args MATCH) H0).
-  case (eval_static_operation op (approx_regs args approxs!!pc)); intros;
-  simpl in H1;
+  case (eval_static_operation op (approx_regs args (analyze f)!!pc)); intros;
+  simpl in H2;
   eapply exec_Iop; eauto; simpl.
-  simpl in H2; congruence.
-  simpl in H2; congruence.
+  congruence.
+  congruence.
   elim H2; intros b [A B]. rewrite symbols_preserved. 
   rewrite A; rewrite B; auto.
+  econstructor; eauto. 
+  eapply analyze_correct_1 with (pc := pc); eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  apply regs_match_approx_update; auto. 
+  eapply eval_static_operation_correct; eauto.
+  apply approx_regs_val_list; auto.
+
   (* Iload *)
-  split; [idtac| intros; TransfInstr].
-  eapply exec_Iload; eauto. 
-  rewrite (eval_addressing_preserved symbols_preserved). auto.
-  caseEq (addr_strength_reduction approxs!!pc addr args);
+  caseEq (addr_strength_reduction (analyze f)!!pc addr args);
   intros addr' args' ASR.
   assert (eval_addressing tge sp addr' rs##args' = Some a).
     rewrite (eval_addressing_preserved symbols_preserved). 
-    generalize (addr_strength_reduction_correct ge approxs!!pc sp rs
+    generalize (addr_strength_reduction_correct ge (analyze f)!!pc sp rs
                   MATCH addr args).
     rewrite ASR; simpl. congruence. 
-  intro. eapply exec_Iload; eauto. 
+  TransfInstr. rewrite ASR. intro.
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' (rs#dst <- v) m); split.
+  eapply exec_Iload; eauto.
+  econstructor; eauto. 
+  apply analyze_correct_1 with pc; auto. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  apply regs_match_approx_update; auto. simpl; auto.
+
   (* Istore *)
-  split; [idtac| intros; TransfInstr].
-  eapply exec_Istore; eauto.
-  rewrite (eval_addressing_preserved symbols_preserved). auto.
-  caseEq (addr_strength_reduction approxs!!pc addr args);
+  caseEq (addr_strength_reduction (analyze f)!!pc addr args);
   intros addr' args' ASR.
   assert (eval_addressing tge sp addr' rs##args' = Some a).
     rewrite (eval_addressing_preserved symbols_preserved). 
-    generalize (addr_strength_reduction_correct ge approxs!!pc sp rs
+    generalize (addr_strength_reduction_correct ge (analyze f)!!pc sp rs
                   MATCH addr args).
-    rewrite ASR; simpl. congruence.
-  intro. eapply exec_Istore; eauto. 
+    rewrite ASR; simpl. congruence. 
+  TransfInstr. rewrite ASR. intro.
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' rs m'); split.
+  eapply exec_Istore; eauto.
+  econstructor; eauto. 
+  apply analyze_correct_1 with pc; auto. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. auto. 
+
   (* Icall *)
-  assert (find_function tge ros rs = Some (transf_fundef f)).
-  generalize H0; unfold find_function; destruct ros.
-  apply functions_translated. 
-  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
-  apply function_ptr_translated. congruence.
-  assert (funsig (transf_fundef f) = sig).
-    generalize (sig_translated f). congruence.
-  split; [idtac| intros; TransfInstr].
-  eapply exec_Icall; eauto.
-  set (ros' :=
-     match ros with
-     | inl r =>
-         match D.get r approxs !! pc with
-         | Novalue => ros
-         | Unknown => ros
-         | I _ => ros
-         | F _ => ros
-         | S symb ofs => if Int.eq ofs Int.zero then inr reg symb else ros
-         end
-     | inr _ => ros
-     end).
-  intros; eapply exec_Icall; eauto. 
-  unfold ros'; destruct ros; auto.
-  caseEq (D.get r approxs!!pc); intros; auto.
-  generalize (Int.eq_spec i0 Int.zero); case (Int.eq i0 Int.zero); intros; auto.
-  generalize (MATCH r). rewrite H7. intros [b [A B]].
-  rewrite <- symbols_preserved in A. 
-  generalize H4. simpl. rewrite A. rewrite B. subst i0. 
-  rewrite Genv.find_funct_find_funct_ptr. auto.
+  exploit transf_ros_correct; eauto. intro FIND'.
+  TransfInstr; intro.
+  econstructor; split.
+  eapply exec_Icall; eauto. apply sig_function_translated; auto.
+  constructor; auto. constructor; auto.
+  econstructor; eauto. 
+  intros. apply analyze_correct_1 with pc; auto.
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H.
+  apply regs_match_approx_update; auto. simpl. auto.
+
+  (* Itailcall *)
+  exploit transf_ros_correct; eauto. intros FIND'.
+  TransfInstr; intro.
+  econstructor; split.
+  eapply exec_Itailcall; eauto. apply sig_function_translated; auto.
+  constructor; auto. 
 
   (* Ialloc *)
-  split; [idtac|intros; TransfInstr].
-  eapply exec_Ialloc; eauto. 
-  intros. eapply exec_Ialloc; eauto. 
+  TransfInstr; intro.
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp pc' (rs#res <- (Vptr b Int.zero)) m'); split.
+  eapply exec_Ialloc; eauto.
+  econstructor; eauto.
+  apply analyze_correct_1 with pc; auto. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H. 
+  apply regs_match_approx_update; auto. simpl; auto.
 
   (* Icond, true *)
-  split; [idtac| intros; TransfInstr].
-  eapply exec_Icond_true; eauto.
-  caseEq (cond_strength_reduction approxs!!pc cond args);
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp ifso rs m); split. 
+  caseEq (cond_strength_reduction (analyze f)!!pc cond args);
   intros cond' args' CSR.
-  assert (eval_condition cond' rs##args' = Some true).
+  assert (eval_condition cond' rs##args' m = Some true).
     generalize (cond_strength_reduction_correct
-                  ge approxs!!pc rs MATCH cond args).
+                  ge (analyze f)!!pc rs MATCH cond args m).
     rewrite CSR.  intro. congruence.
-  caseEq (eval_static_condition cond (approx_regs args approxs!!pc)).
+  TransfInstr. rewrite CSR. 
+  caseEq (eval_static_condition cond (approx_regs args (analyze f)!!pc)).
   intros b ESC. 
-  generalize (eval_static_condition_correct ge cond _ _ _
+  generalize (eval_static_condition_correct ge cond _ _ m _
                (approx_regs_val_list _ _ _ args MATCH) ESC); intro.
   replace b with true. intro; eapply exec_Inop; eauto. congruence.
-  intros. eapply exec_Icond_true; eauto. 
+  intros. eapply exec_Icond_true; eauto.
+  econstructor; eauto.
+  apply analyze_correct_1 with pc; auto. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H; auto.
 
   (* Icond, false *)
-  split; [idtac| intros; TransfInstr].
-  eapply exec_Icond_false; eauto.
-  caseEq (cond_strength_reduction approxs!!pc cond args);
+  exists (State s' (transf_code (analyze f) (fn_code f)) sp ifnot rs m); split. 
+  caseEq (cond_strength_reduction (analyze f)!!pc cond args);
   intros cond' args' CSR.
-  assert (eval_condition cond' rs##args' = Some false).
+  assert (eval_condition cond' rs##args' m = Some false).
     generalize (cond_strength_reduction_correct
-                  ge approxs!!pc rs MATCH cond args).
+                  ge (analyze f)!!pc rs MATCH cond args m).
     rewrite CSR.  intro. congruence.
-  caseEq (eval_static_condition cond (approx_regs args approxs!!pc)).
+  TransfInstr. rewrite CSR. 
+  caseEq (eval_static_condition cond (approx_regs args (analyze f)!!pc)).
   intros b ESC. 
-  generalize (eval_static_condition_correct ge cond _ _ _
+  generalize (eval_static_condition_correct ge cond _ _ m _
                (approx_regs_val_list _ _ _ args MATCH) ESC); intro.
   replace b with false. intro; eapply exec_Inop; eauto. congruence.
-  intros. eapply exec_Icond_false; eauto. 
-
-  (* refl *)
-  split. apply exec_refl. intros. apply exec_refl.
-  (* one *)
-  elim H0; intros.
-  split. apply exec_one; auto.
-  intros. apply exec_one. eapply H2; eauto.
-  (* trans *)
-  elim H0; intros. elim H2; intros.
-  split.
-  apply exec_trans with t1 pc2 rs2 m2 t2; auto.
-  intros; apply exec_trans with t1 pc2 rs2 m2 t2.
-  eapply H5; eauto. eapply H7; eauto.
-  eapply analyze_correct_2; eauto. auto.
+  intros. eapply exec_Icond_false; eauto.
+  econstructor; eauto.
+  apply analyze_correct_1 with pc; auto. 
+  unfold successors; rewrite H; auto with coqlib.
+  unfold transfer; rewrite H; auto.
+
+  (* Ireturn *)
+  exists (Returnstate s' (regmap_optget or Vundef rs) (free m stk)); split.
+  eapply exec_Ireturn; eauto. TransfInstr; auto.
+  constructor; auto.
 
   (* internal function *)
-  elim H1; intros.
-  simpl. unfold transf_function. 
-  caseEq (analyze f). 
-  intros approxs ANL.
-  eapply exec_funct_internal; simpl; eauto. 
-  eapply H5. reflexivity. auto. 
+  simpl. unfold transf_function.
+  econstructor; split.
+  eapply exec_function_internal; simpl; eauto.
+  simpl. econstructor; eauto.
   apply analyze_correct_3; auto.
-  TransfInstr; auto.
-  intros. eapply exec_funct_internal; eauto.
+
   (* external function *)
-  unfold transf_function; simpl. apply exec_funct_external; auto.
+  simpl. econstructor; split.
+  eapply exec_function_external; eauto.
+  constructor; auto.
+
+  (* return *)
+  inv H3. inv H1. 
+  econstructor; split.
+  eapply exec_return; eauto. 
+  econstructor; eauto. 
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto. intro FIND.
+  exists (Callstate nil (transf_fundef f) nil (Genv.init_mem tprog)); split.
+  econstructor; eauto.
+  replace (prog_main tprog) with (prog_main prog).
+  rewrite symbols_preserved. eauto.
+  reflexivity.
+  rewrite <- H2. apply sig_function_translated.
+  replace (Genv.init_mem tprog) with (Genv.init_mem prog).
+  constructor. constructor. auto.
+  symmetry. unfold tprog, transf_program. apply Genv.init_mem_transf. 
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv H4. constructor. 
 Qed.
 
 (** The preservation of the observable behavior of the program then
-  follows. *)
+  follows, using the generic preservation theorem
+  [Smallstep.simulation_step_preservation]. *)
 
 Theorem transf_program_correct:
-  forall (t: trace) (r: val),
-  exec_program prog t r -> exec_program tprog t r.
+  forall (beh: program_behavior),
+  exec_program prog beh -> exec_program tprog beh.
 Proof.
-  intros t r [b [f [m [SYMB [FIND [SIG EXEC]]]]]].
-  red. exists b. exists (transf_fundef f). exists m. 
-  split. change (prog_main tprog) with (prog_main prog).
-  rewrite symbols_preserved. auto.
-  split. apply function_ptr_translated; auto.
-  split. generalize (sig_translated f). congruence.
-  apply transf_funct_correct. 
-  unfold tprog, transf_program. rewrite Genv.init_mem_transf. 
-  exact EXEC.
+  unfold exec_program; intros.
+  eapply simulation_step_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact transf_step_correct. 
 Qed.
 
 End PRESERVATION.
diff --git a/backend/Conventions.v b/backend/Conventions.v
index d621e7c0..9d005b34 100644
--- a/backend/Conventions.v
+++ b/backend/Conventions.v
@@ -247,6 +247,31 @@ Proof.
   apply temporaries_not_acceptable. auto. 
 Qed.
 
+Lemma loc_acceptable_noteq_diff:
+  forall l1 l2,
+  loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2.
+Proof.
+  unfold loc_acceptable, Loc.diff; destruct l1; destruct l2;
+  try (destruct s); try (destruct s0); intros; auto; try congruence.
+  case (zeq z z0); intro. 
+  compare t t0; intro.
+  subst z0; subst t0; tauto.
+  tauto. tauto.
+  contradiction. contradiction.
+Qed.
+
+Lemma loc_acceptable_notin_notin:
+  forall r ll,
+  loc_acceptable r ->
+  ~(In r ll) -> Loc.notin r ll.
+Proof.
+  induction ll; simpl; intros.
+  auto.
+  split. apply loc_acceptable_noteq_diff. assumption. 
+  apply sym_not_equal. tauto. 
+  apply IHll. assumption. tauto. 
+Qed.
+
 (** * Function calling conventions *)
 
 (** The functions in this section determine the locations (machine registers
@@ -292,9 +317,20 @@ Proof.
   reflexivity.
 Qed.
 
-(** The result location is a caller-save register. *)
+(** The result location is acceptable. *)
 
 Lemma loc_result_acceptable:
+  forall sig, loc_acceptable (R (loc_result sig)).
+Proof.
+  intros. unfold loc_acceptable. red.
+  unfold loc_result. destruct (sig_res sig).
+  destruct t; simpl; NotOrEq.
+  simpl; NotOrEq.
+Qed.
+
+(** The result location is a caller-save register. *)
+
+Lemma loc_result_caller_save:
   forall (s: signature), In (R (loc_result s)) destroyed_at_call.
 Proof.
   intros; unfold loc_result.
@@ -309,7 +345,7 @@ Lemma loc_result_not_callee_save:
   forall (s: signature),
   ~(In (loc_result s) int_callee_save_regs \/ In (loc_result s) float_callee_save_regs).
 Proof.
-  intros. generalize (loc_result_acceptable s).
+  intros. generalize (loc_result_caller_save s).
   generalize (int_callee_save_not_destroyed (loc_result s)).
   generalize (float_callee_save_not_destroyed (loc_result s)).
   tauto.
@@ -340,16 +376,18 @@ Fixpoint loc_arguments_rec
   | nil => nil
   | Tint :: tys =>
       match iregl with
-      | nil => S (Outgoing ofs Tint)
-      | ireg :: _ => R ireg
-      end ::
-      loc_arguments_rec tys (list_drop1 iregl) fregl (ofs + 1)
+      | nil =>
+          S (Outgoing ofs Tint) :: loc_arguments_rec tys nil fregl (ofs + 1)
+      | ireg :: iregs =>
+          R ireg :: loc_arguments_rec tys iregs fregl ofs
+      end
   | Tfloat :: tys =>
       match fregl with
-      | nil => S (Outgoing ofs Tfloat)
-      | freg :: _ => R freg
-      end ::
-      loc_arguments_rec tys (list_drop2 iregl) (list_drop1 fregl) (ofs + 2)
+      | nil =>
+          S (Outgoing ofs Tfloat) :: loc_arguments_rec tys iregl nil (ofs + 2)
+      | freg :: fregs =>
+          R freg :: loc_arguments_rec tys (list_drop2 iregl) fregs ofs
+      end
   end.
 
 Definition int_param_regs :=
@@ -361,28 +399,45 @@ Definition float_param_regs :=
   when calling a function with signature [s].  *)
 
 Definition loc_arguments (s: signature) : list loc :=
-  loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 6.
+  loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 14.
 
 (** [size_arguments s] returns the number of [Outgoing] slots used
   to call a function with signature [s]. *)
 
-Fixpoint size_arguments_rec (tyl: list typ) : Z :=
+Fixpoint size_arguments_rec
+    (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
+    (ofs: Z) {struct tyl} : Z :=
   match tyl with
-  | nil => 6
-  | Tint :: tys => 1 + size_arguments_rec tys
-  | Tfloat :: tys => 2 + size_arguments_rec tys
+  | nil => ofs
+  | Tint :: tys =>
+      match iregl with
+      | nil => size_arguments_rec tys nil fregl (ofs + 1)
+      | ireg :: iregs => size_arguments_rec tys iregs fregl ofs
+      end
+  | Tfloat :: tys =>
+      match fregl with
+      | nil => size_arguments_rec tys iregl nil (ofs + 2)
+      | freg :: fregs => size_arguments_rec tys (list_drop2 iregl) fregs ofs
+      end
   end.
 
 Definition size_arguments (s: signature) : Z :=
-  size_arguments_rec s.(sig_args).
+  size_arguments_rec s.(sig_args) int_param_regs float_param_regs 14.
+
+(** A tail-call is possible for a signature if the corresponding
+    arguments are all passed in registers. *)
+
+Definition tailcall_possible (s: signature) : Prop :=
+  forall l, In l (loc_arguments s) ->
+  match l with R _ => True | S _ => False end.
 
 (** Argument locations are either non-temporary registers or [Outgoing] 
-  stack slots at offset 6 or more. *)
+  stack slots at offset 14 or more. *)
 
 Definition loc_argument_acceptable (l: loc) : Prop :=
   match l with
   | R r => ~(In l temporaries)
-  | S (Outgoing ofs ty) => ofs >= 6
+  | S (Outgoing ofs ty) => ofs >= 14
   | _ => False
   end.
 
@@ -397,16 +452,18 @@ Remark loc_arguments_rec_charact:
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
   elim H.
-  destruct a; elim H; intros.
-  destruct iregl; subst l. omega. left; auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). 
-  destruct l. intros [A|B]. left; apply list_drop1_incl; auto. tauto.
-  destruct s; try contradiction. omega.
-  destruct fregl; subst l. omega. right; auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). 
-  destruct l. intros [A|B]. left; apply list_drop2_incl; auto. 
-  right; apply list_drop1_incl; auto.
-  destruct s; try contradiction. omega.
+  destruct a. 
+  destruct iregl; elim H; intro. 
+  subst l. omega.
+  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
+  subst l. auto with coqlib.
+  generalize (IHtyl _ _ _ _ H0). destruct l; auto. simpl; intuition.
+  destruct fregl; elim H; intro. 
+  subst l. omega.
+  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
+  subst l. auto with coqlib.
+  generalize (IHtyl _ _ _ _ H0). destruct l; auto.
+  intros [A|B]. left; apply list_drop2_incl; auto. right; auto with coqlib.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -432,12 +489,15 @@ Remark loc_arguments_rec_notin_reg:
 Proof.
   induction tyl; simpl; intros.
   auto.
-  destruct a; simpl; split.
-  destruct iregl. auto. red; intro; subst m. apply H. auto with coqlib.
-  apply IHtyl. red; intro. apply H. apply list_drop1_incl; auto. auto.
-  destruct fregl. auto. red; intro; subst m. apply H0. auto with coqlib.
-  apply IHtyl. red; intro. apply H. apply list_drop2_incl; auto.
-  red; intro. apply H0. apply list_drop1_incl; auto.
+  destruct a. 
+  destruct iregl; simpl. auto.
+  simpl in H. split. apply sym_not_equal. tauto.
+  apply IHtyl. tauto. tauto.
+  destruct fregl; simpl. auto.
+  simpl in H0. split. apply sym_not_equal. tauto.
+  apply IHtyl. 
+  red; intro. apply H. apply list_drop2_incl. auto.
+  tauto.
 Qed.
 
 Remark loc_arguments_rec_notin_local:
@@ -446,11 +506,9 @@ Remark loc_arguments_rec_notin_local:
 Proof.
   induction tyl; simpl; intros.
   auto.
-  destruct a; simpl; split.
-  destruct iregl. auto. auto.
-  apply IHtyl. 
-  destruct fregl. auto. auto.
-  apply IHtyl. 
+  destruct a.
+  destruct iregl; simpl; auto.
+  destruct fregl; simpl; auto.
 Qed.
 
 Remark loc_arguments_rec_notin_outgoing:
@@ -460,11 +518,13 @@ Remark loc_arguments_rec_notin_outgoing:
 Proof.
   induction tyl; simpl; intros.
   auto.
-  destruct a; simpl; split.
-  destruct iregl. omega. auto.
-  apply IHtyl. omega.
-  destruct fregl. omega. auto.
-  apply IHtyl. omega.
+  destruct a.
+  destruct iregl; simpl. 
+  split. omega. eapply IHtyl. omega.
+  auto.
+  destruct fregl; simpl. 
+  split. omega. eapply IHtyl. omega.
+  auto.
 Qed.
 
 Lemma loc_arguments_norepet:
@@ -477,21 +537,21 @@ Proof.
     Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)).
   induction tyl; simpl; intros.
   constructor.
-  destruct a; constructor. 
-  destruct iregl. 
-  apply loc_arguments_rec_notin_outgoing. simpl; omega.
-  apply loc_arguments_rec_notin_reg. simpl. inversion H. auto.
+  destruct a. 
+  destruct iregl; constructor.
+  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
+  apply loc_arguments_rec_notin_reg. inversion H. auto.
   apply list_disjoint_notin with (m :: iregl); auto with coqlib.
-  apply IHtyl. apply list_drop1_norepet; auto. auto. 
-  red; intros. apply H1. apply list_drop1_incl; auto. auto.
-  destruct fregl. 
-  apply loc_arguments_rec_notin_outgoing. simpl; omega.
-  apply loc_arguments_rec_notin_reg. simpl. 
+  apply IHtyl. inv H; auto. auto.
+  eapply list_disjoint_cons_left; eauto.
+  destruct fregl; constructor.
+  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
+  apply loc_arguments_rec_notin_reg.
   red; intro. apply (H1 m m). apply list_drop2_incl; auto. 
-  auto with coqlib. auto.
-  simpl. inversion H0. auto.
-  apply IHtyl. apply list_drop2_norepet; auto. apply list_drop1_norepet; auto.
-  red; intros. apply H1. apply list_drop2_incl; auto. apply list_drop1_incl; auto.
+  auto with coqlib. auto. inv H0; auto.
+  apply IHtyl. apply list_drop2_norepet; auto.
+  inv H0; auto. 
+  red; intros. apply H1. apply list_drop2_incl; auto. auto with coqlib.
 
   intro. unfold loc_arguments. apply H.
   unfold int_param_regs. NoRepet.
@@ -501,32 +561,42 @@ Qed.
 
 (** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
 
+Remark size_arguments_rec_above:
+  forall tyl iregl fregl ofs0,
+  ofs0 <= size_arguments_rec tyl iregl fregl ofs0.
+Proof.
+  induction tyl; simpl; intros.
+  omega.
+  destruct a.
+  destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega. auto.
+  destruct fregl. apply Zle_trans with (ofs0 + 2); auto; omega. auto.
+Qed.
+
+Lemma size_arguments_above:
+  forall s, size_arguments s >= 14.
+Proof.
+  intros; unfold size_arguments. apply Zle_ge. 
+  apply size_arguments_rec_above.
+Qed.
+
 Lemma loc_arguments_bounded:
   forall (s: signature) (ofs: Z) (ty: typ),
   In (S (Outgoing ofs ty)) (loc_arguments s) ->
   ofs + typesize ty <= size_arguments s.
 Proof.
   intros.
-  assert (forall tyl, size_arguments_rec tyl >= 6).
-    induction tyl; unfold size_arguments_rec; fold size_arguments_rec; intros.
-    omega.
-    destruct a; omega.
   assert (forall tyl iregl fregl ofs0,
     In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) ->
-    ofs + typesize ty <= size_arguments_rec tyl + ofs0 - 6).
-  induction tyl; simpl loc_arguments_rec; intros.
-  elim H1.
-  unfold size_arguments_rec; fold size_arguments_rec.
-  destruct a. 
-  elim H1; intro. destruct iregl; simplify_eq H2; intros. 
-  subst ty; subst ofs. generalize (H0 tyl). simpl typesize. omega.
-  generalize (IHtyl _ _ _ H2). omega. 
-  elim H1; intro. destruct fregl; simplify_eq H2; intros. 
-  subst ty; subst ofs. generalize (H0 tyl). simpl typesize. omega.
-  generalize (IHtyl _ _ _ H2). omega. 
-  replace (size_arguments s) with (size_arguments s + 6 - 6).
-  unfold size_arguments. eapply H1. unfold loc_arguments in H. eauto.
-  omega.
+    ofs + typesize ty <= size_arguments_rec tyl iregl fregl ofs0).
+  induction tyl; simpl; intros.
+  elim H0.
+  destruct a. destruct iregl; elim H0; intro.
+  inv H1. simpl. apply size_arguments_rec_above. auto.
+  discriminate. auto. 
+  destruct fregl; elim H0; intro.
+  inv H1. simpl. apply size_arguments_rec_above. auto.
+  discriminate. auto. 
+  unfold size_arguments. eapply H0. unfold loc_arguments in H. eauto.
 Qed.
 
 (** Temporary registers do not overlap with argument locations. *)
@@ -543,6 +613,18 @@ Proof.
 Qed.
 Hint Resolve loc_arguments_not_temporaries: locs.
 
+(** Argument registers are caller-save. *)
+
+Lemma arguments_caller_save:
+  forall sig r,
+  In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
+Proof.
+  unfold loc_arguments; intros.
+  elim (loc_arguments_rec_charact _ _ _ _ _ H); simpl.
+  ElimOrEq; intuition.
+  ElimOrEq; intuition.
+Qed.
+
 (** Callee-save registers do not overlap with argument locations. *)
 
 Lemma arguments_not_preserved:
@@ -571,7 +653,9 @@ Proof.
     List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl).
   induction tyl; simpl; intros.
   auto.
-  destruct a; simpl; decEq; auto.
+  destruct a. 
+  destruct iregl; simpl; decEq; auto.
+  destruct fregl; simpl; decEq; auto.
   intros. unfold loc_arguments. auto.
 Qed.
 
@@ -586,14 +670,10 @@ Proof.
     List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl).
   induction tyl; simpl; intros.
   auto.
-  destruct a; simpl; apply (f_equal2 (@cons typ)).
-  destruct iregl.  reflexivity. simpl. apply H. auto with coqlib.
-  apply IHtyl.
-  intros. apply H. apply list_drop1_incl. auto. auto.
-  destruct fregl.  reflexivity. simpl. apply H0. auto with coqlib.
-  apply IHtyl.
-  intros. apply H. apply list_drop2_incl. auto. 
-  intros. apply H0. apply list_drop1_incl. auto.
+  destruct a; [destruct iregl|destruct fregl]; simpl;
+  f_equal; eauto with coqlib.
+  apply IHtyl. intros. apply H. apply list_drop2_incl; auto.
+  eauto with coqlib.
 
   intros. unfold loc_arguments. apply H. 
   intro; simpl. ElimOrEq; reflexivity.
@@ -618,6 +698,30 @@ Proof.
   destruct s; try tauto. destruct s0; tauto.
 Qed.
 
+(** A tailcall is possible if and only if the size of arguments is 14. *)
+
+Lemma tailcall_possible_size:
+  forall s, tailcall_possible s <-> size_arguments s = 14.
+Proof.
+  intro; split; intro.
+  assert (forall tyl iregl fregl ofs,
+    (forall l, In l (loc_arguments_rec tyl iregl fregl ofs) ->
+               match l with R _ => True | S _ => False end) ->
+    size_arguments_rec tyl iregl fregl ofs = ofs).
+  induction tyl; simpl; intros.
+  auto.
+  destruct a. destruct iregl. elim (H0 _ (in_eq _ _)).
+  apply IHtyl; intros. apply H0. auto with coqlib.
+  destruct fregl. elim (H0 _ (in_eq _ _)).
+  apply IHtyl; intros. apply H0. auto with coqlib.
+  unfold size_arguments. apply H0. assumption.
+  red; intros.
+  generalize (loc_arguments_acceptable s l H0). 
+  destruct l; simpl. auto. destruct s0; intro; auto.
+  generalize (loc_arguments_bounded _ _ _ H0). 
+  generalize (typesize_pos t). omega.
+Qed.
+
 (** ** Location of function parameters *)
 
 (** A function finds the values of its parameter in the same locations
@@ -645,6 +749,13 @@ Proof.
   destruct s; reflexivity.
 Qed.
 
+Lemma loc_parameters_length:
+  forall sg, List.length (loc_parameters sg) = List.length sg.(sig_args).
+Proof.
+  intros. unfold loc_parameters. rewrite list_length_map. 
+  apply loc_arguments_length.
+Qed.
+
 Lemma loc_parameters_not_temporaries:
   forall sig, Loc.disjoint (loc_parameters sig) temporaries.
 Proof.
@@ -676,7 +787,7 @@ Proof.
   intros; simpl. tauto.
 Qed.
 
-(** ** Location of argument and result of dynamic allocation *)
+(** ** Location of argument and result for dynamic memory allocation *)
 
 Definition loc_alloc_argument := R3.
 Definition loc_alloc_result := R3.
diff --git a/backend/LTL.v b/backend/LTL.v
index 0dc97020..edb8ecc5 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -3,7 +3,6 @@
   LTL (``Location Transfer Language'') is the target language
   for register allocation and the source language for linearization. *)
 
-Require Import Relations.
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
@@ -12,54 +11,38 @@ Require Import Values.
 Require Import Events.
 Require Import Mem.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
-Require Conventions.
+Require Import Conventions.
 
 (** * Abstract syntax *)
 
-(** LTL is close to RTL, but uses locations instead of pseudo-registers,
-   and basic blocks instead of single instructions as nodes of its
-   control-flow graph. *)
+(** LTL is close to RTL, but uses locations instead of pseudo-registers. *)
 
 Definition node := positive.
 
-(** A basic block is a sequence of instructions terminated by
-    a [Bgoto], [Bcond] or [Breturn] instruction.  (This invariant
-    is enforced by the following inductive type definition.)
-    The instructions behave like the similarly-named instructions
-    of RTL.  They take machine registers (type [mreg]) as arguments
-    and results.  Two new instructions are added: [Bgetstack]
-    and [Bsetstack], which are ``move'' instructions between
-    a machine register and a stack slot. *)
-
-Inductive block: Set :=
-  | Bgetstack: slot -> mreg -> block -> block
-  | Bsetstack: mreg -> slot -> block -> block
-  | Bop: operation -> list mreg -> mreg -> block -> block
-  | Bload: memory_chunk -> addressing -> list mreg -> mreg -> block -> block
-  | Bstore: memory_chunk -> addressing -> list mreg -> mreg -> block -> block
-  | Bcall: signature -> mreg + ident -> block -> block
-  | Balloc: block -> block
-  | Bgoto: node -> block
-  | Bcond: condition -> list mreg -> node -> node -> block
-  | Breturn: block.
-
-Definition code: Set := PTree.t block.
-
-(** Unlike in RTL, parameter passing (passing values of the arguments
-  to a function call to the parameters of the called function) is done
-  via conventional locations (machine registers and stack slots).
-  Consequently, the [Bcall] instruction has no list of argument registers,
-  and function descriptions have no list of parameter registers. *)
+Inductive instruction: Set :=
+  | Lnop: node -> instruction
+  | Lop: operation -> list loc -> loc -> node -> instruction
+  | Lload: memory_chunk -> addressing -> list loc -> loc -> node -> instruction
+  | Lstore: memory_chunk -> addressing -> list loc -> loc -> node -> instruction
+  | Lcall: signature -> loc + ident -> list loc -> loc -> node -> instruction
+  | Ltailcall: signature -> loc + ident -> list loc -> instruction
+  | Lalloc: loc -> loc -> node -> instruction
+  | Lcond: condition -> list loc -> node -> node -> instruction
+  | Lreturn: option loc -> instruction.
+
+Definition code: Set := PTree.t instruction.
 
 Record function: Set := mkfunction {
   fn_sig: signature;
+  fn_params: list loc;
   fn_stacksize: Z;
   fn_code: code;
   fn_entrypoint: node;
-  fn_code_wf:
-    forall (pc: node), Plt pc (Psucc fn_entrypoint) \/ fn_code!pc = None
+  fn_nextpc: node;
+  fn_code_wf: forall (pc: node), Plt pc fn_nextpc \/ fn_code!pc = None
 }.
 
 Definition fundef := AST.fundef function.
@@ -107,7 +90,7 @@ Definition call_regs (caller: locset) : locset :=
   set [callee] of the callee at the return instruction.
 - Callee-save machine registers have the same values as in the caller
   before the call.
-- Caller-save and temporary machine registers have the same values
+- Caller-save machine registers have the same values
   as in the callee at the return point.
 - Stack slots have the same values as in the caller before the call.
 *)
@@ -125,11 +108,72 @@ Definition return_regs (caller callee: locset) : locset :=
     | S s => caller (S s)
     end.
 
+(** [parmov srcs dsts ls] performs the parallel assignment
+  of locations [dsts] to the values of the corresponding locations
+  [srcs].  *)
+
+Fixpoint parmov (srcs dsts: list loc) (ls: locset) {struct srcs} : locset :=
+  match srcs, dsts with
+  | s1 :: sl, d1 :: dl => Locmap.set d1 (ls s1) (parmov sl dl ls)
+  | _, _ => ls
+  end.
+
+Definition set_result_reg (s: signature) (or: option loc) (ls: locset) :=
+  match or with 
+  | Some r => Locmap.set (R (loc_result s)) (ls r) ls
+  | None => ls
+  end.
+
+(** The components of an LTL execution state are:
+
+- [State cs f sp pc ls m]: [f] is the function currently executing.
+  [sp] is the stack pointer (as in RTL).  [pc] is the current
+  program point (CFG node) within the code of [f].
+  [ls] maps locations to their current values. [m] is the current
+  memory state.
+- [Callstate cs f ls m]:
+  [f] is the function definition that we are calling.
+  [ls] is the values of locations just before the call.
+  [m] is the current memory state.
+- [Returnstate cs sig ls m]:
+  [sig] is the signature of the function that just returned.
+  [ls] is the values of locations just before the return.
+  [m] is the current memory state.
+
+[cs] is a list of stack frames [Stackframe res f sp ls pc],
+where [res] is the location that will receive the result of the call,
+[f] is the calling function, [sp] its stack pointer,
+[ls] the values of locations just before the call,
+and [pc] the program point within [f] of the successor of the
+[Lcall] instruction. *)
+
+Inductive stackframe : Set :=
+  | Stackframe:
+      forall (res: loc) (f: function) (sp: val) (ls: locset) (pc: node), 
+      stackframe.
+
+Inductive state : Set :=
+  | State:
+      forall (stack: list stackframe) (f: function) (sp: val)
+             (pc: node) (ls: locset) (m: mem), state
+  | Callstate:
+      forall (stack: list stackframe) (f: fundef) (ls: locset) (m: mem),
+      state
+  | Returnstate:
+      forall (stack: list stackframe) (sig: signature) (ls: locset) (m: mem),
+      state.
+
+Definition parent_locset (stack: list stackframe) : locset :=
+  match stack with
+  | nil => Locmap.init Vundef
+  | Stackframe res f sp ls pc :: stack' => ls
+  end.
+
 Variable ge: genv.
 
-Definition find_function (ros: mreg + ident) (rs: locset) : option fundef :=
-  match ros with
-  | inl r => Genv.find_funct ge (rs (R r))
+Definition find_function (los: loc + ident) (rs: locset) : option fundef :=
+  match los with
+  | inl l => Genv.find_funct ge (rs l)
   | inr symb =>
       match Genv.find_symbol ge symb with
       | None => None
@@ -137,158 +181,140 @@ Definition find_function (ros: mreg + ident) (rs: locset) : option fundef :=
       end
   end.
 
-Definition reglist (rl: list mreg) (rs: locset) : list val :=
-  List.map (fun r => rs (R r)) rl.
-
-(** The dynamic semantics of LTL, like that of RTL, is a combination
-  of small-step transition semantics and big-step semantics.
-  Function calls are treated in big-step style so that they appear
-  as a single transition in the caller function.  Other instructions
-  are treated in purely small-step style, as a single transition.
-
-  The introduction of basic blocks increases the number of inductive
-  predicates needed to express the semantics:
-- [exec_instr ge sp b ls m b' ls' m'] is the execution of the first
-  instruction of block [b].  [b'] is the remainder of the block.
-- [exec_instrs ge sp b ls m b' ls' m'] is similar, but executes
-  zero, one or several instructions at the beginning of block [b].
-- [exec_block ge sp b ls m out ls' m'] executes all instructions
-  of block [b].  The outcome [out] is either [Cont s], indicating
-  that the block terminates by branching to block labeled [s],
-  or [Return], indicating that the block terminates by returning
-  from the current function.
-- [exec_blocks ge code sp pc ls m out ls' m'] executes a sequence
-  of zero, one or several blocks, starting at the block labeled [pc].
-  [code] is the control-flow graph for the current function.
-  The outcome [out] indicates how the last block in this sequence
-  terminates: by branching to another block or by returning from the
-  function.
-- [exec_function ge f ls m ls' m'] executes the body of function [f],
-  from its entry point to the first [Lreturn] instruction encountered.
-
-  In all these predicates, [ls] and [ls'] are the location sets
-  (values of locations) at the beginning and end of the transitions,
-  respectively.
+(** The main difference between the LTL transition relation
+  and the RTL transition relation is the handling of function calls.
+  In RTL, arguments and results to calls are transmitted via
+  [vargs] and [vres] components of [Callstate] and [Returnstate],
+  respectively.  The semantics takes care of transferring these values
+  between the pseudo-registers of the caller and of the callee.
+  
+  In lower-level intermediate languages (e.g [Linear], [Mach], [PPC]),
+  arguments and results are transmitted implicitly: the generated
+  code for the caller arranges for arguments to be left in conventional
+  registers and stack locations, as determined by the calling conventions,
+  where the function being called will find them.  Similarly,
+  conventional registers will be used to pass the result value back
+  to the caller.
+
+  In LTL, we take an hybrid view of argument and result passing.
+  The LTL code does not contain (yet) instructions for moving
+  arguments and results to the conventional registers.  However,
+  the dynamic semantics "goes through the motions" of such code:
+- The [exec_Lcall] transition from [State] to [Callstate]
+  leaves the values of arguments in the conventional locations
+  given by [loc_arguments].
+- The [exec_function_internal] transition from [Callstate] to [State]
+  changes the view of stack slots ([Outgoing] slots slide to
+  [Incoming] slots as per [call_regs]), then recovers the
+  values of parameters from the conventional locations given by
+  [loc_parameters].
+- The [exec_Lreturn] transition from [State] to [Returnstate]
+  moves the result value to the conventional location [loc_result],
+  then restores the values of callee-save locations from
+  the location state of the caller, using [return_regs].
+- The [exec_return] transition from [Returnstate] to [State]
+  reads the result value from the conventional location [loc_result],
+  then stores it in the result location for the [Lcall] instruction.
+
+This complicated protocol will make it much easier to prove
+the correctness of the [Stacking] pass later, which inserts actual
+code that performs all the shuffling of arguments and results
+described above.
 *)
 
-Inductive outcome: Set :=
-  | Cont: node -> outcome
-  | Return: outcome.
-
-Inductive exec_instr: val ->
-                      block -> locset -> mem -> trace ->
-                      block -> locset -> mem -> Prop :=
-  | exec_Bgetstack:
-      forall sp sl r b rs m,
-      exec_instr sp (Bgetstack sl r b) rs m
-                 E0 b (Locmap.set (R r) (rs (S sl)) rs) m
-  | exec_Bsetstack:
-      forall sp r sl b rs m,
-      exec_instr sp (Bsetstack r sl b) rs m
-                 E0 b (Locmap.set (S sl) (rs (R r)) rs) m
-  | exec_Bop:
-      forall sp op args res b rs m v,
-      eval_operation ge sp op (reglist args rs) = Some v ->
-      exec_instr sp (Bop op args res b) rs m
-                 E0 b (Locmap.set (R res) v rs) m
-  | exec_Bload:
-      forall sp chunk addr args dst b rs m a v,
-      eval_addressing ge sp addr (reglist args rs) = Some a ->
-      loadv chunk m a = Some v ->
-      exec_instr sp (Bload chunk addr args dst b) rs m
-                 E0 b (Locmap.set (R dst) v rs) m
-  | exec_Bstore:
-      forall sp chunk addr args src b rs m m' a,
-      eval_addressing ge sp addr (reglist args rs) = Some a ->
-      storev chunk m a (rs (R src)) = Some m' ->
-      exec_instr sp (Bstore chunk addr args src b) rs m
-                 E0 b rs m'
-  | exec_Bcall:
-      forall sp sig ros b rs m t f rs' m',
-      find_function ros rs = Some f ->
-      sig = funsig f ->
-      exec_function f rs m t rs' m' ->
-      exec_instr sp (Bcall sig ros b) rs m
-                  t b (return_regs rs rs') m'
-  | exec_Balloc:
-      forall sp b rs m sz m' blk,
-      rs (R Conventions.loc_alloc_argument) = Vint sz ->
-      Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
-      exec_instr sp (Balloc b) rs m E0 b
-                 (Locmap.set (R Conventions.loc_alloc_result) 
-                             (Vptr blk Int.zero) rs) m'
-
-with exec_instrs: val ->
-                  block -> locset -> mem -> trace ->
-                  block -> locset -> mem -> Prop :=
-  | exec_refl:
-      forall sp b rs m,
-      exec_instrs sp b rs m E0 b rs m
-  | exec_one:
-      forall sp b1 rs1 m1 t b2 rs2 m2,
-      exec_instr sp b1 rs1 m1 t b2 rs2 m2 ->
-      exec_instrs sp b1 rs1 m1 t b2 rs2 m2
-  | exec_trans:
-      forall sp b1 rs1 m1 t t1 b2 rs2 m2 t2 b3 rs3 m3,
-      exec_instrs sp b1 rs1 m1 t1 b2 rs2 m2 ->
-      exec_instrs sp b2 rs2 m2 t2 b3 rs3 m3 ->
-      t = t1 ** t2 ->
-      exec_instrs sp b1 rs1 m1 t b3 rs3 m3
-
-with exec_block: val ->
-                 block -> locset -> mem -> trace ->
-                 outcome -> locset -> mem -> Prop :=
-  | exec_Bgoto:
-      forall sp b rs m t s rs' m',
-      exec_instrs sp b rs m t (Bgoto s) rs' m' ->
-      exec_block sp b rs m t (Cont s) rs' m'
-  | exec_Bcond_true:
-      forall sp b rs m t cond args ifso ifnot rs' m',
-      exec_instrs sp b rs m t (Bcond cond args ifso ifnot) rs' m' ->
-      eval_condition cond (reglist args rs') = Some true ->
-      exec_block sp b rs m t (Cont ifso) rs' m'
-  | exec_Bcond_false:
-      forall sp b rs m t cond args ifso ifnot rs' m',
-      exec_instrs sp b rs m t (Bcond cond args ifso ifnot) rs' m' ->
-      eval_condition cond (reglist args rs') = Some false ->
-      exec_block sp b rs m t (Cont ifnot) rs' m'
-  | exec_Breturn:
-      forall sp b rs m t rs' m',
-      exec_instrs sp b rs m t Breturn rs' m' ->
-      exec_block sp b rs m t Return rs' m'
-
-with exec_blocks: code -> val ->
-                  node -> locset -> mem -> trace ->
-                  outcome -> locset -> mem -> Prop :=
-  | exec_blocks_refl:
-      forall c sp pc rs m,
-      exec_blocks c sp pc rs m E0 (Cont pc) rs m
-  | exec_blocks_one:
-      forall c sp pc b m rs t out rs' m',
-      c!pc = Some b ->
-      exec_block sp b rs m t out rs' m' ->
-      exec_blocks c sp pc rs m t out rs' m'
-  | exec_blocks_trans:
-      forall c sp pc1 rs1 m1 t t1 pc2 rs2 m2 t2 out rs3 m3,
-      exec_blocks c sp pc1 rs1 m1 t1 (Cont pc2) rs2 m2 ->
-      exec_blocks c sp pc2 rs2 m2 t2 out rs3 m3 ->
-      t = t1 ** t2 ->
-      exec_blocks c sp pc1 rs1 m1 t out rs3 m3
-
-with exec_function: fundef -> locset -> mem -> trace ->
-                                locset -> mem -> Prop :=
-  | exec_funct_internal:
-      forall f rs m m1 stk t rs2 m2,
-      alloc m 0 f.(fn_stacksize) = (m1, stk) ->
-      exec_blocks f.(fn_code) (Vptr stk Int.zero)
-                  f.(fn_entrypoint) (call_regs rs) m1 t Return rs2 m2 ->
-      exec_function (Internal f) rs m t rs2 (free m2 stk)
-  | exec_funct_external:
-      forall ef args res rs1 rs2 m t,
+Inductive step: state -> trace -> state -> Prop :=
+  | exec_Lnop:
+      forall s f sp pc rs m pc',
+      (fn_code f)!pc = Some(Lnop pc') ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' rs m)
+  | exec_Lop:
+      forall s f sp pc rs m op args res pc' v,
+      (fn_code f)!pc = Some(Lop op args res pc') ->
+      eval_operation ge sp op (map rs args) m = Some v ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (Locmap.set res v rs) m)
+  | exec_Lload:
+      forall s f sp pc rs m chunk addr args dst pc' a v,
+      (fn_code f)!pc = Some(Lload chunk addr args dst pc') ->
+      eval_addressing ge sp addr (map rs args) = Some a ->
+      Mem.loadv chunk m a = Some v ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (Locmap.set dst v rs) m)
+  | exec_Lstore:
+      forall s f sp pc rs m chunk addr args src pc' a m',
+      (fn_code f)!pc = Some(Lstore chunk addr args src pc') ->
+      eval_addressing ge sp addr (map rs args) = Some a ->
+      Mem.storev chunk m a (rs src) = Some m' ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' rs m')
+  | exec_Lcall:
+      forall s f sp pc rs m sig ros args res pc' f',
+      (fn_code f)!pc = Some(Lcall sig ros args res pc') ->
+      find_function ros rs = Some f' ->
+      funsig f' = sig ->
+      let rs1 := parmov args (loc_arguments sig) rs in
+      step (State s f sp pc rs m)
+        E0 (Callstate (Stackframe res f sp rs1 pc' :: s) f' rs1 m)
+  | exec_Ltailcall:
+      forall s f stk pc rs m sig ros args f',
+      (fn_code f)!pc = Some(Ltailcall sig ros args) ->
+      find_function ros rs = Some f' ->
+      funsig f' = sig ->
+      let rs1 := parmov args (loc_arguments sig) rs in
+      let rs2 := return_regs (parent_locset s) rs1 in
+      step (State s f (Vptr stk Int.zero) pc rs m)
+        E0 (Callstate s f' rs2 (Mem.free m stk))
+  | exec_Lalloc:
+      forall s f sp pc rs m pc' arg res sz m' b,
+      (fn_code f)!pc = Some(Lalloc arg res pc') ->
+      rs arg = Vint sz ->
+      Mem.alloc m 0 (Int.signed sz) = (m', b) ->
+      let rs1 := Locmap.set (R loc_alloc_argument) (rs arg) rs in
+      let rs2 := Locmap.set (R loc_alloc_result) (Vptr b Int.zero) rs1 in
+      let rs3 := Locmap.set res (rs2 (R loc_alloc_result)) rs2 in
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' rs3 m')
+  | exec_Lcond_true:
+      forall s f sp pc rs m cond args ifso ifnot,
+      (fn_code f)!pc = Some(Lcond cond args ifso ifnot) ->
+      eval_condition cond (map rs args) m = Some true ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp ifso rs m)
+  | exec_Lcond_false:
+      forall s f sp pc rs m cond args ifso ifnot,
+      (fn_code f)!pc = Some(Lcond cond args ifso ifnot) ->
+      eval_condition cond (map rs args) m = Some false ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp ifnot rs m)
+  | exec_Lreturn:
+      forall s f stk pc rs m or,
+      (fn_code f)!pc = Some(Lreturn or) ->
+      let rs1 := set_result_reg f.(fn_sig) or rs in
+      let rs2 := return_regs (parent_locset s) rs1 in
+      step (State s f (Vptr stk Int.zero) pc rs m)
+        E0 (Returnstate s f.(fn_sig) rs2 (Mem.free m stk))
+  | exec_function_internal:
+      forall s f rs m m' stk,
+      Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
+      let rs1 := call_regs rs in
+      let rs2 := parmov (loc_parameters f.(fn_sig)) f.(fn_params) rs1 in
+      step (Callstate s (Internal f) rs m)
+        E0 (State s f (Vptr stk Int.zero) f.(fn_entrypoint) rs2 m')
+  | exec_function_external:
+      forall s ef t res rs m,
+      let args := map rs (Conventions.loc_arguments ef.(ef_sig)) in
       event_match ef args t res ->
-      args = List.map rs1 (Conventions.loc_arguments ef.(ef_sig)) ->
-      rs2 = Locmap.set (R (Conventions.loc_result ef.(ef_sig))) res rs1 ->
-      exec_function (External ef) rs1 m t rs2 m.
+      let rs1 :=
+        Locmap.set (R (Conventions.loc_result ef.(ef_sig))) res rs in
+      step (Callstate s (External ef) rs m)
+         t (Returnstate s ef.(ef_sig) rs1 m)
+  | exec_return:
+      forall res f sp rs0 pc s sig rs m,
+      let rs1 := Locmap.set res (rs (R (loc_result sig))) rs in
+      step (Returnstate (Stackframe res f sp rs0 pc :: s) 
+                        sig rs m)
+        E0 (State s f sp pc rs1 m).
 
 End RELSEM.
 
@@ -297,87 +323,41 @@ End RELSEM.
   main function, to be found in the machine register dictated
   by the calling conventions. *)
 
-Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
-  let ge := Genv.globalenv p in
-  let m0 := Genv.init_mem p in
-  exists b, exists f, exists rs, exists m,
-  Genv.find_symbol ge p.(prog_main) = Some b /\
-  Genv.find_funct_ptr ge b  = Some f /\
-  funsig f = mksignature nil (Some Tint) /\
-  exec_function ge f (Locmap.init Vundef) m0 t rs m /\
-  rs (R (Conventions.loc_result (funsig f))) = r.
-
-(** We remark that the [exec_blocks] relation is stable if
-  the control-flow graph is extended by adding new basic blocks
-  at previously unused graph nodes. *)
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall b f,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      funsig f = mksignature nil (Some Tint) ->
+      initial_state p (Callstate nil f (Locmap.init Vundef) m0).
 
-Section EXEC_BLOCKS_EXTENDS.
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall sig rs m r,
+      rs (R (loc_result sig)) = Vint r ->
+      final_state (Returnstate nil sig rs m) r.
 
-Variable ge: genv.
-Variable c1 c2: code.
-Hypothesis EXT: forall pc, c2!pc = c1!pc \/ c1!pc = None.
-
-Lemma exec_blocks_extends:
-  forall sp pc1 rs1 m1 t out rs2 m2,
-  exec_blocks ge c1 sp pc1 rs1 m1 t out rs2 m2 ->
-  exec_blocks ge c2 sp pc1 rs1 m1 t out rs2 m2.
-Proof.
-  induction 1. 
-  apply exec_blocks_refl.
-  apply exec_blocks_one with b. 
-    elim (EXT pc); intro; congruence. assumption.
-  eapply exec_blocks_trans; eauto.
-Qed.
-
-End EXEC_BLOCKS_EXTENDS.
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
 
 (** * Operations over LTL *)
 
-(** Computation of the possible successors of a basic block.
-  This is used for dataflow analyses. *)
-
-Fixpoint successors_aux (b: block) : list node :=
-  match b with
-  | Bgetstack s r b => successors_aux b
-  | Bsetstack r s b => successors_aux b
-  | Bop op args res b => successors_aux b
-  | Bload chunk addr args dst b => successors_aux b
-  | Bstore chunk addr args src b => successors_aux b
-  | Bcall sig ros b => successors_aux b
-  | Balloc b => successors_aux b
-  | Bgoto n => n :: nil
-  | Bcond cond args ifso ifnot => ifso :: ifnot :: nil
-  | Breturn => nil
-  end.
+(** Computation of the possible successors of an instruction.
+  This is used in particular for dataflow analyses. *)
 
 Definition successors (f: function) (pc: node) : list node :=
   match f.(fn_code)!pc with
   | None => nil
-  | Some b => successors_aux b
+  | Some i =>
+      match i with
+      | Lnop s => s :: nil
+      | Lop op args res s => s :: nil
+      | Lload chunk addr args dst s => s :: nil
+      | Lstore chunk addr args src s => s :: nil
+      | Lcall sig ros args res s => s :: nil
+      | Ltailcall sig ros args => nil
+      | Lalloc arg res s => s :: nil
+      | Lcond cond args ifso ifnot => ifso :: ifnot :: nil
+      | Lreturn optarg => nil
+      end
   end.
-
-Lemma successors_aux_invariant:
-  forall ge sp b rs m t b' rs' m',
-  exec_instrs ge sp b rs m t b' rs' m' ->
-  successors_aux b = successors_aux b'.
-Proof.
-  induction 1; simpl; intros.
-  reflexivity.
-  inversion H; reflexivity.
-  transitivity (successors_aux b2); auto.
-Qed.
-
-Lemma successors_correct:
-  forall ge f sp pc rs m t pc' rs' m' b,
-  f.(fn_code)!pc = Some b ->
-  exec_block ge sp b rs m t (Cont pc') rs' m' ->
-  In pc' (successors f pc).
-Proof.
-  intros. unfold successors. rewrite H. inversion H0.
-  rewrite (successors_aux_invariant _ _ _ _ _ _ _ _ _ H7); simpl.
-  tauto.
-  rewrite (successors_aux_invariant _ _ _ _ _ _ _ _ _ H2); simpl.
-  tauto.
-  rewrite (successors_aux_invariant _ _ _ _ _ _ _ _ _ H2); simpl.
-  tauto.
-Qed.
diff --git a/backend/LTLin.v b/backend/LTLin.v
new file mode 100644
index 00000000..368c13cd
--- /dev/null
+++ b/backend/LTLin.v
@@ -0,0 +1,255 @@
+(** The LTLin intermediate language: abstract syntax and semantcs *)
+
+(** The LTLin language is a variant of LTL where control-flow is not
+    expressed as a graph of basic blocks, but as a linear list of
+    instructions with explicit labels and ``goto'' instructions. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Mem.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Op.
+Require Import Locations.
+Require Import LTL.
+Require Import Conventions.
+
+(** * Abstract syntax *)
+
+Definition label := positive.
+
+(** LTLin instructions are similar to those of LTL.
+  Except the last three, these instructions continue in sequence
+  with the next instruction in the linear list of instructions.
+  Unconditional branches [Lgoto] and conditional branches [Lcond]
+  transfer control to the given code label.  Labels are explicitly
+  inserted in the instruction list as pseudo-instructions [Llabel]. *)
+
+Inductive instruction: Set :=
+  | Lop: operation -> list loc -> loc -> instruction
+  | Lload: memory_chunk -> addressing -> list loc -> loc -> instruction
+  | Lstore: memory_chunk -> addressing -> list loc -> loc -> instruction
+  | Lcall: signature -> loc + ident -> list loc -> loc -> instruction
+  | Ltailcall: signature -> loc + ident -> list loc -> instruction
+  | Lalloc: loc -> loc -> instruction
+  | Llabel: label -> instruction
+  | Lgoto: label -> instruction
+  | Lcond: condition -> list loc -> label -> instruction
+  | Lreturn: option loc -> instruction.
+
+Definition code: Set := list instruction.
+
+Record function: Set := mkfunction {
+  fn_sig: signature;
+  fn_params: list loc;
+  fn_stacksize: Z;
+  fn_code: code
+}.
+
+Definition fundef := AST.fundef function.
+
+Definition program := AST.program fundef unit.
+
+Definition funsig (fd: fundef) :=
+  match fd with
+  | Internal f => f.(fn_sig)
+  | External ef => ef.(ef_sig)
+  end.
+
+Definition genv := Genv.t fundef.
+Definition locset := Locmap.t.
+
+(** * Operational semantics *)
+
+(** Looking up labels in the instruction list.  *)
+
+Definition is_label (lbl: label) (instr: instruction) : bool :=
+  match instr with
+  | Llabel lbl' => if peq lbl lbl' then true else false
+  | _ => false
+  end.
+
+Lemma is_label_correct:
+  forall lbl instr,
+  if is_label lbl instr then instr = Llabel lbl else instr <> Llabel lbl.
+Proof.
+  intros.  destruct instr; simpl; try discriminate.
+  case (peq lbl l); intro; congruence.
+Qed.
+
+(** [find_label lbl c] returns a list of instruction, suffix of the
+  code [c], that immediately follows the [Llabel lbl] pseudo-instruction.
+  If the label [lbl] is multiply-defined, the first occurrence is
+  retained.  If the label [lbl] is not defined, [None] is returned. *)
+
+Fixpoint find_label (lbl: label) (c: code) {struct c} : option code :=
+  match c with
+  | nil => None
+  | i1 :: il => if is_label lbl i1 then Some il else find_label lbl il
+  end.
+
+(** The states of the dynamic semantics are similar to those used
+  in the LTL semantics (see module [LTL]).  The only difference
+  is that program points [pc] (nodes of the CFG in LTL) become
+  code sequences [c] (suffixes of the code of the current function).
+*)
+
+Inductive stackframe : Set :=
+  | Stackframe:
+      forall (res: loc) (f: function) (sp: val) (ls: locset) (c: code), 
+      stackframe.
+
+Inductive state : Set :=
+  | State:
+      forall (stack: list stackframe) (f: function) (sp: val)
+             (c: code) (ls: locset) (m: mem), state
+  | Callstate:
+      forall (stack: list stackframe) (f: fundef) (ls: locset) (m: mem),
+      state
+  | Returnstate:
+      forall (stack: list stackframe) (sig: signature) (ls: locset) (m: mem),
+      state.
+
+Definition parent_locset (stack: list stackframe) : locset :=
+  match stack with
+  | nil => Locmap.init Vundef
+  | Stackframe res f sp ls pc :: stack' => ls
+  end.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+Definition find_function (ros: loc + ident) (rs: locset) : option fundef :=
+  match ros with
+  | inl r => Genv.find_funct ge (rs r)
+  | inr symb =>
+      match Genv.find_symbol ge symb with
+      | None => None
+      | Some b => Genv.find_funct_ptr ge b
+      end
+  end.
+
+Inductive step: state -> trace -> state -> Prop :=
+  | exec_Lop:
+      forall s f sp op args res b rs m v,
+      eval_operation ge sp op (map rs args) m = Some v ->
+      step (State s f sp (Lop op args res :: b) rs m)
+        E0 (State s f sp b (Locmap.set res v rs) m)
+  | exec_Lload:
+      forall s f sp chunk addr args dst b rs m a v,
+      eval_addressing ge sp addr (map rs args) = Some a ->
+      loadv chunk m a = Some v ->
+      step (State s f sp (Lload chunk addr args dst :: b) rs m)
+        E0 (State s f sp b (Locmap.set dst v rs) m)
+  | exec_Lstore:
+      forall s f sp chunk addr args src b rs m m' a,
+      eval_addressing ge sp addr (map rs args) = Some a ->
+      storev chunk m a (rs src) = Some m' ->
+      step (State s f sp (Lstore chunk addr args src :: b) rs m)
+        E0 (State s f sp b rs m')
+  | exec_Lcall:
+      forall s f sp sig ros args res b rs m f',
+      find_function ros rs = Some f' ->
+      sig = funsig f' ->
+      let rs1 := parmov args (loc_arguments sig) rs in
+      step (State s f sp (Lcall sig ros args res :: b) rs m)
+        E0 (Callstate (Stackframe res f sp rs1 b :: s) f' rs1 m)
+  | exec_Ltailcall:
+      forall s f stk sig ros args b rs m f',
+      find_function ros rs = Some f' ->
+      sig = funsig f' ->
+      let rs1 := parmov args (loc_arguments sig) rs in
+      let rs2 := return_regs (parent_locset s) rs1 in
+      step (State s f (Vptr stk Int.zero) (Ltailcall sig ros args :: b) rs m)
+        E0 (Callstate s f' rs2 (Mem.free m stk))
+  | exec_Lalloc:
+      forall s f sp arg res b rs m sz m' blk,
+      rs arg = Vint sz ->
+      Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
+      let rs1 := Locmap.set (R loc_alloc_argument) (rs arg) rs in
+      let rs2 := Locmap.set (R loc_alloc_result) (Vptr blk Int.zero) rs1 in
+      let rs3 := Locmap.set res (rs2 (R loc_alloc_result)) rs2 in
+      step (State s f sp (Lalloc arg res :: b) rs m)
+        E0 (State s f sp b rs3 m')
+  | exec_Llabel:
+      forall s f sp lbl b rs m,
+      step (State s f sp (Llabel lbl :: b) rs m)
+        E0 (State s f sp b rs m)
+  | exec_Lgoto:
+      forall s f sp lbl b rs m b',
+      find_label lbl f.(fn_code) = Some b' ->
+      step (State s f sp (Lgoto lbl :: b) rs m)
+        E0 (State s f sp b' rs m)
+  | exec_Lcond_true:
+      forall s f sp cond args lbl b rs m b',
+      eval_condition cond (map rs args) m = Some true ->
+      find_label lbl f.(fn_code) = Some b' ->
+      step (State s f sp (Lcond cond args lbl :: b) rs m)
+        E0 (State s f sp b' rs m)
+  | exec_Lcond_false:
+      forall s f sp cond args lbl b rs m,
+      eval_condition cond (map rs args) m = Some false ->
+      step (State s f sp (Lcond cond args lbl :: b) rs m)
+        E0 (State s f sp b rs m)
+  | exec_Lreturn:
+      forall s f stk rs m or b,
+      let rs1 := set_result_reg f.(fn_sig) or rs in
+      let rs2 := return_regs (parent_locset s) rs1 in
+      step (State s f (Vptr stk Int.zero) (Lreturn or :: b) rs m)
+        E0 (Returnstate s f.(fn_sig) rs2 (Mem.free m stk))
+  | exec_function_internal:
+      forall s f rs m m' stk,
+      Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
+      let rs1 := call_regs rs in
+      let rs2 := parmov (loc_parameters f.(fn_sig)) f.(fn_params) rs1 in
+      step (Callstate s (Internal f) rs m)
+        E0 (State s f (Vptr stk Int.zero) f.(fn_code) rs2 m')
+  | exec_function_external:
+      forall s ef t res rs m,
+      let args := map rs (Conventions.loc_arguments ef.(ef_sig)) in
+      event_match ef args t res ->
+      let rs1 :=
+        Locmap.set (R (Conventions.loc_result ef.(ef_sig))) res rs in
+      step (Callstate s (External ef) rs m)
+         t (Returnstate s ef.(ef_sig) rs1 m)
+  | exec_return:
+      forall res f sp rs0 b s sig rs m,
+      let rs1 := Locmap.set res (rs (R (loc_result sig))) rs in
+      step (Returnstate (Stackframe res f sp rs0 b :: s) sig rs m)
+        E0 (State s f sp b rs1 m).
+
+End RELSEM.
+
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall b f,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      funsig f = mksignature nil (Some Tint) ->
+      initial_state p (Callstate nil f (Locmap.init Vundef) m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall sig rs m r,
+      rs (R (loc_result sig)) = Vint r ->
+      final_state (Returnstate nil sig rs m) r.
+
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
+
+(** * Properties of the operational semantics *)
+
+Lemma find_label_is_tail:
+  forall lbl c c', find_label lbl c = Some c' -> is_tail c' c.
+Proof.
+  induction c; simpl; intros.
+  discriminate.
+  destruct (is_label lbl a). inv H. constructor. constructor.
+  constructor. auto.
+Qed.
+  
diff --git a/backend/LTLintyping.v b/backend/LTLintyping.v
new file mode 100644
index 00000000..06c50f8b
--- /dev/null
+++ b/backend/LTLintyping.v
@@ -0,0 +1,104 @@
+(** Typing rules for LTLin. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Op.
+Require Import RTL.
+Require Import Locations.
+Require Import LTLin.
+Require Import Conventions.
+
+(** The following predicates define a type system for LTLin similar to that
+  of LTL. *)
+
+Section WT_INSTR.
+
+Variable funsig: signature.
+
+Inductive wt_instr : instruction -> Prop :=
+  | wt_Lopmove:
+      forall r1 r,
+      Loc.type r1 = Loc.type r -> loc_acceptable r1 -> loc_acceptable r ->
+      wt_instr (Lop Omove (r1 :: nil) r)
+  | wt_Lop:
+      forall op args res,
+      op <> Omove ->
+      (List.map Loc.type args, Loc.type res) = type_of_operation op ->
+      locs_acceptable args -> loc_acceptable res ->
+      wt_instr (Lop op args res)
+  | wt_Lload:
+      forall chunk addr args dst,
+      List.map Loc.type args = type_of_addressing addr ->
+      Loc.type dst = type_of_chunk chunk ->
+      locs_acceptable args -> loc_acceptable dst ->
+      wt_instr (Lload chunk addr args dst)
+  | wt_Lstore:
+      forall chunk addr args src,
+      List.map Loc.type args = type_of_addressing addr ->
+      Loc.type src = type_of_chunk chunk ->
+      locs_acceptable args -> loc_acceptable src ->
+      wt_instr (Lstore chunk addr args src)
+  | wt_Lcall:
+      forall sig ros args res,
+      match ros with inl r => Loc.type r = Tint | inr s => True end ->
+      List.map Loc.type args = sig.(sig_args) ->
+      Loc.type res = match sig.(sig_res) with None => Tint | Some ty => ty end ->
+      match ros with inl r => loc_acceptable r | inr s => True end ->
+      locs_acceptable args -> loc_acceptable res ->
+      wt_instr (Lcall sig ros args res)
+  | wt_Ltailcall:
+      forall sig ros args,
+      match ros with inl r => Loc.type r = Tint | inr s => True end ->
+      List.map Loc.type args = sig.(sig_args) ->
+      match ros with inl r => loc_acceptable r | inr s => True end ->
+      locs_acceptable args -> 
+      sig.(sig_res) = funsig.(sig_res) ->
+      Conventions.tailcall_possible sig ->
+      wt_instr (Ltailcall sig ros args)
+  | wt_Lalloc:
+      forall arg res,
+      Loc.type arg = Tint -> Loc.type res = Tint ->
+      loc_acceptable arg -> loc_acceptable res ->
+      wt_instr (Lalloc arg res)
+  | wt_Llabel: forall lbl,
+      wt_instr (Llabel lbl)
+  | wt_Lgoto: forall lbl,
+      wt_instr (Lgoto lbl)
+  | wt_Lcond:
+      forall cond args lbl,
+      List.map Loc.type args = type_of_condition cond ->
+      locs_acceptable args ->
+      wt_instr (Lcond cond args lbl)
+  | wt_Lreturn: 
+      forall optres,
+      option_map Loc.type optres = funsig.(sig_res) ->
+      match optres with None => True | Some r => loc_acceptable r end ->
+      wt_instr (Lreturn optres).
+
+Definition wt_code (c: code) : Prop :=
+  forall i, In i c -> wt_instr i.
+
+End WT_INSTR.
+
+Record wt_function (f: function): Prop :=
+  mk_wt_function {
+    wt_params:
+      List.map Loc.type f.(fn_params) = f.(fn_sig).(sig_args);
+    wt_acceptable:
+      locs_acceptable f.(fn_params);
+    wt_norepet:
+      Loc.norepet f.(fn_params);
+    wt_instrs:
+      wt_code f.(fn_sig) f.(fn_code)
+}.
+
+Inductive wt_fundef: fundef -> Prop :=
+  | wt_fundef_external: forall ef,
+      wt_fundef (External ef)
+  | wt_function_internal: forall f,
+      wt_function f ->
+      wt_fundef (Internal f).
+
+Definition wt_program (p: program): Prop :=
+  forall i f, In (i, f) (prog_funct p) -> wt_fundef f.
diff --git a/backend/LTLtyping.v b/backend/LTLtyping.v
index 187c5cb8..646edc82 100644
--- a/backend/LTLtyping.v
+++ b/backend/LTLtyping.v
@@ -13,84 +13,99 @@ Require Import Conventions.
   of [RTL] (see file [RTLtyping]): it statically guarantees that operations
   and addressing modes are applied to the right number of arguments
   and that the arguments are of the correct types.  Moreover, it also
-  enforces some correctness conditions on the offsets of stack slots
-  accessed through [Bgetstack] and [Bsetstack] LTL instructions. *)
+  guarantees that the locations of arguments and results are "acceptable",
+  i.e. either non-temporary registers or [Local] stack locations. *)
 
-Section WT_BLOCK.
+Section WT_INSTR.
 
 Variable funct: function.
 
-Definition slot_bounded (s: slot) :=
-  match s with
-  | Local ofs ty => 0 <= ofs
-  | Outgoing ofs ty => 6 <= ofs
-  | Incoming ofs ty => 6 <= ofs /\ ofs + typesize ty <= size_arguments funct.(fn_sig)
-  end.
+Definition valid_successor (s: node) : Prop :=
+  exists i, funct.(fn_code)!s = Some i.
 
-Inductive wt_block : block -> Prop :=
-  | wt_Bgetstack:
-      forall s r b,
-      slot_type s = mreg_type r ->
-      slot_bounded s ->
-      wt_block b ->
-      wt_block (Bgetstack s r b)
-  | wt_Bsetstack:
-      forall r s b,
-      match s with Incoming _ _ => False | _ => True end ->
-      slot_type s = mreg_type r ->
-      slot_bounded s ->
-      wt_block b ->
-      wt_block (Bsetstack r s b)
-  | wt_Bopmove:
-      forall r1 r b,
-      mreg_type r1 = mreg_type r ->
-      wt_block b ->
-      wt_block (Bop Omove (r1 :: nil) r b)
-  | wt_Bopundef:
-      forall r b,
-      wt_block b ->
-      wt_block (Bop Oundef nil r b)
-  | wt_Bop:
-      forall op args res b,
-      op <> Omove -> op <> Oundef ->
-      (List.map mreg_type args, mreg_type res) = type_of_operation op ->
-      wt_block b ->
-      wt_block (Bop op args res b)
-  | wt_Bload:
-      forall chunk addr args dst b,
-      List.map mreg_type args = type_of_addressing addr ->
-      mreg_type dst = type_of_chunk chunk ->
-      wt_block b ->
-      wt_block (Bload chunk addr args dst b)
-  | wt_Bstore:
-      forall chunk addr args src b,
-      List.map mreg_type args = type_of_addressing addr ->
-      mreg_type src = type_of_chunk chunk ->
-      wt_block b ->
-      wt_block (Bstore chunk addr args src b)
-  | wt_Bcall:
-      forall sig ros b,
-      match ros with inl r => mreg_type r = Tint | _ => True end ->
-      wt_block b ->
-      wt_block (Bcall sig ros b)
-  | wt_Balloc:
-      forall b,
-      wt_block b ->
-      wt_block (Balloc b)
-  | wt_Bgoto:
-      forall lbl,
-      wt_block (Bgoto lbl)
-  | wt_Bcond:
-      forall cond args ifso ifnot,
-      List.map mreg_type args = type_of_condition cond ->
-      wt_block (Bcond cond args ifso ifnot)
-  | wt_Breturn: 
-      wt_block (Breturn).
+Inductive wt_instr : instruction -> Prop :=
+  | wt_Lnop:
+      forall s,
+      valid_successor s ->
+      wt_instr (Lnop s)
+  | wt_Lopmove:
+      forall r1 r s,
+      Loc.type r1 = Loc.type r -> loc_acceptable r1 -> loc_acceptable r ->
+      valid_successor s ->
+      wt_instr (Lop Omove (r1 :: nil) r s)
+  | wt_Lop:
+      forall op args res s,
+      op <> Omove ->
+      (List.map Loc.type args, Loc.type res) = type_of_operation op ->
+      locs_acceptable args -> loc_acceptable res ->
+      valid_successor s ->
+      wt_instr (Lop op args res s)
+  | wt_Lload:
+      forall chunk addr args dst s,
+      List.map Loc.type args = type_of_addressing addr ->
+      Loc.type dst = type_of_chunk chunk ->
+      locs_acceptable args -> loc_acceptable dst ->
+      valid_successor s ->
+      wt_instr (Lload chunk addr args dst s)
+  | wt_Lstore:
+      forall chunk addr args src s,
+      List.map Loc.type args = type_of_addressing addr ->
+      Loc.type src = type_of_chunk chunk ->
+      locs_acceptable args -> loc_acceptable src ->
+      valid_successor s ->
+      wt_instr (Lstore chunk addr args src s)
+  | wt_Lcall:
+      forall sig ros args res s,
+      match ros with inl r => Loc.type r = Tint | inr s => True end ->
+      List.map Loc.type args = sig.(sig_args) ->
+      Loc.type res = proj_sig_res sig ->
+      match ros with inl r => loc_acceptable r | inr s => True end ->
+      locs_acceptable args -> loc_acceptable res ->
+      valid_successor s ->
+      wt_instr (Lcall sig ros args res s)
+  | wt_Ltailcall:
+      forall sig ros args,
+      match ros with inl r => Loc.type r = Tint | inr s => True end ->
+      List.map Loc.type args = sig.(sig_args) ->
+      match ros with inl r => loc_acceptable r | inr s => True end ->
+      locs_acceptable args ->
+      sig.(sig_res) = funct.(fn_sig).(sig_res) ->
+      Conventions.tailcall_possible sig ->
+      wt_instr (Ltailcall sig ros args)
+  | wt_Lalloc:
+      forall arg res s,
+      Loc.type arg = Tint -> Loc.type res = Tint ->
+      loc_acceptable arg -> loc_acceptable res ->
+      valid_successor s ->
+      wt_instr (Lalloc arg res s)
+  | wt_Lcond:
+      forall cond args s1 s2,
+      List.map Loc.type args = type_of_condition cond ->
+      locs_acceptable args ->
+      valid_successor s1 -> valid_successor s2 ->
+      wt_instr (Lcond cond args s1 s2)
+  | wt_Lreturn: 
+      forall optres,
+      option_map Loc.type optres = funct.(fn_sig).(sig_res) ->
+      match optres with None => True | Some r => loc_acceptable r end ->
+      wt_instr (Lreturn optres).
 
-End WT_BLOCK.
+End WT_INSTR.
 
-Definition wt_function (f: function) : Prop :=
-  forall pc b, f.(fn_code)!pc = Some b -> wt_block f b.
+Record wt_function (f: function): Prop :=
+  mk_wt_function {
+    wt_params:
+      List.map Loc.type f.(fn_params) = f.(fn_sig).(sig_args);
+    wt_acceptable:
+      locs_acceptable f.(fn_params);
+    wt_norepet:
+      Loc.norepet f.(fn_params);
+    wt_instrs:
+      forall pc instr, 
+      f.(fn_code)!pc = Some instr -> wt_instr f instr;
+    wt_entrypoint:
+      valid_successor f f.(fn_entrypoint)
+}.
 
 Inductive wt_fundef: fundef -> Prop :=
   | wt_fundef_external: forall ef,
@@ -99,6 +114,5 @@ Inductive wt_fundef: fundef -> Prop :=
       wt_function f ->
       wt_fundef (Internal f).
 
-Definition wt_program (p: program) : Prop :=
+Definition wt_program (p: program): Prop :=
   forall i f, In (i, f) (prog_funct p) -> wt_fundef f.
-
diff --git a/backend/Linear.v b/backend/Linear.v
index 0f1a31f2..65803710 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -1,10 +1,10 @@
 (** The Linear intermediate language: abstract syntax and semantcs *)
 
-(** The Linear language is a variant of LTL where control-flow is not
-    expressed as a graph of basic blocks, but as a linear list of
-    instructions with explicit labels and ``goto'' instructions. *)
+(** The Linear language is a variant of LTLin where arithmetic
+    instructions operate on machine registers (type [mreg]) instead
+    of arbitrary locations.  Special instructions [Lgetstack] and
+    [Lsetstack] are provided to access stack slots. *)
 
-Require Import Relations.
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
@@ -13,24 +13,16 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
-Require Conventions.
+Require Import Conventions.
 
 (** * Abstract syntax *)
 
 Definition label := positive.
 
-(** Linear instructions are similar to their LTL counterpart:
-  arguments and results are machine registers, except for the
-  [Lgetstack] and [Lsetstack] instructions which are register-stack moves.
-  Except the last three, these instructions continue in sequence
-  with the next instruction in the linear list of instructions.
-  Unconditional branches [Lgoto] and conditional branches [Lcond]
-  transfer control to the given code label.  Labels are explicitly
-  inserted in the instruction list as pseudo-instructions [Llabel]. *)
-
 Inductive instruction: Set :=
   | Lgetstack: slot -> mreg -> instruction
   | Lsetstack: mreg -> slot -> instruction
@@ -38,6 +30,7 @@ Inductive instruction: Set :=
   | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lcall: signature -> mreg + ident -> instruction
+  | Ltailcall: signature -> mreg + ident -> instruction
   | Lalloc: instruction
   | Llabel: label -> instruction
   | Lgoto: label -> instruction
@@ -108,121 +101,161 @@ Definition find_function (ros: mreg + ident) (rs: locset) : option fundef :=
       end
   end.
 
-(** [exec_instr ge f sp c ls m c' ls' m'] represents the execution
-  of the first instruction in the code sequence [c].  [ls] and [m]
-  are the initial location set and memory state, respectively.
-  [c'] is the current code sequence after execution of the instruction:
-  it is the tail of [c] if the instruction falls through. 
-  [ls'] and [m'] are the final location state and memory state. *)
+Definition reglist (rs: locset) (rl: list mreg) : list val :=
+  List.map (fun r => rs (R r)) rl.
+
+(** The components of a Linear execution state are:
+
+- [State cs f sp c rs m]: [f] is the function currently executing.
+  [sp] is the stack pointer.  [c] is the sequence of instructions
+  that remain to be executed.
+  [rs] maps locations to their current values. [m] is the current
+  memory state.
+
+- [Callstate cs f rs m]:
+  [f] is the function definition that we are calling.
+  [rs] is the values of locations just before the call.
+  [m] is the current memory state.
+
+- [Returnstate cs rs m]:
+  [rs] is the values of locations just before the return.
+  [m] is the current memory state.
+
+[cs] is a list of stack frames [Stackframe res f rs pc].
+[f] is the calling function, [sp] its stack pointer.
+[rs] the values of locations just before the call.
+[c] is the sequence of instructions following the call in the code of [f].
+*)
+
+Inductive stackframe: Set :=
+  | Stackframe:
+      forall (f: function) (sp: val) (rs: locset) (c: code),
+      stackframe.
+
+Inductive state: Set :=
+  | State:
+      forall (stack: list stackframe) (f: function) (sp: val)
+             (c: code) (rs: locset) (m: mem),
+      state
+  | Callstate:
+      forall (stack: list stackframe) (f: fundef) (rs: locset) (m: mem),
+      state
+  | Returnstate:
+      forall (stack: list stackframe) (rs: locset) (m: mem),
+      state.
 
-Inductive exec_instr: function -> val ->
-                      code -> locset -> mem -> trace ->
-                      code -> locset -> mem -> Prop :=
+(** [parent_locset cs] returns the mapping of values for locations
+  of the caller function. *)
+
+Definition parent_locset (stack: list stackframe) : locset :=
+  match stack with
+  | nil => Locmap.init Vundef
+  | Stackframe f sp ls c :: stack' => ls
+  end.
+
+Inductive step: state -> trace -> state -> Prop :=
   | exec_Lgetstack:
-      forall f sp sl r b rs m,
-      exec_instr f sp (Lgetstack sl r :: b) rs m
-                   E0 b (Locmap.set (R r) (rs (S sl)) rs) m
+      forall s f sp sl r b rs m,
+      step (State s f sp (Lgetstack sl r :: b) rs m)
+        E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) rs) m)
   | exec_Lsetstack:
-      forall f sp r sl b rs m,
-      exec_instr f sp (Lsetstack r sl :: b) rs m
-                   E0 b (Locmap.set (S sl) (rs (R r)) rs) m
+      forall s f sp r sl b rs m,
+      step (State s f sp (Lsetstack r sl :: b) rs m)
+        E0 (State s f sp b (Locmap.set (S sl) (rs (R r)) rs) m)
   | exec_Lop:
-      forall f sp op args res b rs m v,
-      eval_operation ge sp op (reglist args rs) = Some v ->
-      exec_instr f sp (Lop op args res :: b) rs m
-                   E0 b (Locmap.set (R res) v rs) m
+      forall s f sp op args res b rs m v,
+      eval_operation ge sp op (reglist rs args) m = Some v ->
+      step (State s f sp (Lop op args res :: b) rs m)
+        E0 (State s f sp b (Locmap.set (R res) v rs) m)
   | exec_Lload:
-      forall f sp chunk addr args dst b rs m a v,
-      eval_addressing ge sp addr (reglist args rs) = Some a ->
+      forall s f sp chunk addr args dst b rs m a v,
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
       loadv chunk m a = Some v ->
-      exec_instr f sp (Lload chunk addr args dst :: b) rs m
-                   E0 b (Locmap.set (R dst) v rs) m
+      step (State s f sp (Lload chunk addr args dst :: b) rs m)
+        E0 (State s f sp b (Locmap.set (R dst) v rs) m)
   | exec_Lstore:
-      forall f sp chunk addr args src b rs m m' a,
-      eval_addressing ge sp addr (reglist args rs) = Some a ->
+      forall s f sp chunk addr args src b rs m m' a,
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
       storev chunk m a (rs (R src)) = Some m' ->
-      exec_instr f sp (Lstore chunk addr args src :: b) rs m
-                   E0 b rs m'
+      step (State s f sp (Lstore chunk addr args src :: b) rs m)
+        E0 (State s f sp b rs m')
   | exec_Lcall:
-      forall f sp sig ros b rs m t f' rs' m',
+      forall s f sp sig ros b rs m f',
       find_function ros rs = Some f' ->
       sig = funsig f' ->
-      exec_function f' rs m t rs' m' ->
-      exec_instr f sp (Lcall sig ros :: b) rs m
-                    t b (return_regs rs rs') m'
+      step (State s f sp (Lcall sig ros :: b) rs m)
+        E0 (Callstate (Stackframe f sp rs b:: s) f' rs m)
+  | exec_Ltailcall:
+      forall s f stk sig ros b rs m f',
+      find_function ros rs = Some f' ->
+      sig = funsig f' ->
+      step (State s f (Vptr stk Int.zero) (Ltailcall sig ros :: b) rs m)
+        E0 (Callstate s f' (return_regs (parent_locset s) rs) (Mem.free m stk))
   | exec_Lalloc:
-      forall f sp b rs m sz m' blk,
+      forall s f sp b rs m sz m' blk,
       rs (R Conventions.loc_alloc_argument) = Vint sz ->
       Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
-      exec_instr f sp (Lalloc :: b) rs m
-                   E0 b (Locmap.set (R Conventions.loc_alloc_result) 
-                                    (Vptr blk Int.zero) rs) m'
+      step (State s f sp (Lalloc :: b) rs m)
+        E0 (State s f sp b
+                    (Locmap.set (R Conventions.loc_alloc_result)
+                                (Vptr blk Int.zero) rs)
+                    m')
   | exec_Llabel:
-      forall f sp lbl b rs m,
-      exec_instr f sp (Llabel lbl :: b) rs m
-                   E0 b rs m
+      forall s f sp lbl b rs m,
+      step (State s f sp (Llabel lbl :: b) rs m)
+        E0 (State s f sp b rs m)
   | exec_Lgoto:
-      forall f sp lbl b rs m b',
+      forall s f sp lbl b rs m b',
       find_label lbl f.(fn_code) = Some b' ->
-      exec_instr f sp (Lgoto lbl :: b) rs m
-                   E0 b' rs m
+      step (State s f sp (Lgoto lbl :: b) rs m)
+        E0 (State s f sp b' rs m)
   | exec_Lcond_true:
-      forall f sp cond args lbl b rs m b',
-      eval_condition cond (reglist args rs) = Some true ->
+      forall s f sp cond args lbl b rs m b',
+      eval_condition cond (reglist rs args) m = Some true ->
       find_label lbl f.(fn_code) = Some b' ->
-      exec_instr f sp (Lcond cond args lbl :: b) rs m
-                   E0 b' rs m
+      step (State s f sp (Lcond cond args lbl :: b) rs m)
+        E0 (State s f sp b' rs m)
   | exec_Lcond_false:
-      forall f sp cond args lbl b rs m,
-      eval_condition cond (reglist args rs) = Some false ->
-      exec_instr f sp (Lcond cond args lbl :: b) rs m
-                   E0 b rs m
-
-with exec_instrs: function -> val ->
-                  code -> locset -> mem -> trace ->
-                  code -> locset -> mem -> Prop :=
-  | exec_refl:
-      forall f sp b rs m,
-      exec_instrs f sp b rs m E0 b rs m
-  | exec_one:
-      forall f sp b1 rs1 m1 t b2 rs2 m2,
-      exec_instr f sp b1 rs1 m1 t b2 rs2 m2 ->
-      exec_instrs f sp b1 rs1 m1 t b2 rs2 m2
-  | exec_trans:
-      forall f sp b1 rs1 m1 t1 b2 rs2 m2 t2 b3 rs3 m3 t,
-      exec_instrs f sp b1 rs1 m1 t1 b2 rs2 m2 ->
-      exec_instrs f sp b2 rs2 m2 t2 b3 rs3 m3 ->
-      t = t1 ** t2 ->
-      exec_instrs f sp b1 rs1 m1 t b3 rs3 m3
-
-with exec_function: fundef -> locset -> mem -> trace -> locset -> mem -> Prop :=
-  | exec_funct_internal:
-      forall f rs m t m1 stk rs2 m2 b,
-      alloc m 0 f.(fn_stacksize) = (m1, stk) ->
-      exec_instrs f (Vptr stk Int.zero)
-                  f.(fn_code) (call_regs rs) m1
-                t (Lreturn :: b) rs2 m2 ->
-      exec_function (Internal f) rs m t rs2 (free m2 stk)
-  | exec_funct_external:
-      forall ef args res rs1 rs2 m t,
+      forall s f sp cond args lbl b rs m,
+      eval_condition cond (reglist rs args) m = Some false ->
+      step (State s f sp (Lcond cond args lbl :: b) rs m)
+        E0 (State s f sp b rs m)
+  | exec_Lreturn:
+      forall s f stk b rs m,
+      step (State s f (Vptr stk Int.zero) (Lreturn :: b) rs m)
+        E0 (Returnstate s (return_regs (parent_locset s) rs) (Mem.free m stk))
+  | exec_function_internal:
+      forall s f rs m m' stk,
+      alloc m 0 f.(fn_stacksize) = (m', stk) ->
+      step (Callstate s (Internal f) rs m)
+        E0 (State s f (Vptr stk Int.zero) f.(fn_code) (call_regs rs) m')
+  | exec_function_external:
+      forall s ef args res rs1 rs2 m t,
       event_match ef args t res ->
       args = List.map rs1 (Conventions.loc_arguments ef.(ef_sig)) ->
       rs2 = Locmap.set (R (Conventions.loc_result ef.(ef_sig))) res rs1 ->
-      exec_function (External ef) rs1 m t rs2 m.
-
-Scheme exec_instr_ind3 := Minimality for exec_instr Sort Prop
-  with exec_instrs_ind3 := Minimality for exec_instrs Sort Prop
-  with exec_function_ind3 := Minimality for exec_function Sort Prop.
+      step (Callstate s (External ef) rs1 m)
+         t (Returnstate s rs2 m)
+  | exec_return:
+      forall s f sp rs0 c rs m,
+      step (Returnstate (Stackframe f sp rs0 c :: s) rs m)
+        E0 (State s f sp c rs m).
 
 End RELSEM.
 
-Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
-  let ge := Genv.globalenv p in
-  let m0 := Genv.init_mem p in
-  exists b, exists f, exists rs, exists m,
-  Genv.find_symbol ge p.(prog_main) = Some b /\
-  Genv.find_funct_ptr ge b = Some f /\
-  funsig f = mksignature nil (Some Tint) /\
-  exec_function ge f (Locmap.init Vundef) m0 t rs m /\
-  rs (R (Conventions.loc_result (funsig f))) = r.
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall b f,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      funsig f = mksignature nil (Some Tint) ->
+      initial_state p (Callstate nil f (Locmap.init Vundef) m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall rs m r,
+      rs (R R3) = Vint r ->
+      final_state (Returnstate nil rs m) r.
 
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
diff --git a/backend/Linearize.v b/backend/Linearize.v
index 3151628c..305473ba 100644
--- a/backend/Linearize.v
+++ b/backend/Linearize.v
@@ -1,5 +1,5 @@
 (** Linearization of the control-flow graph: 
-    translation from LTL to Linear *)
+    translation from LTL to LTLin *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -9,24 +9,24 @@ Require Import Globalenvs.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
-Require Import Linear.
+Require Import LTLin.
 Require Import Kildall.
 Require Import Lattice.
 
-(** To translate from LTL to Linear, we must layout the basic blocks
+(** To translate from LTL to LTLin, we must lay out the basic blocks
   of the LTL control-flow graph in some linear order, and insert
   explicit branches and conditional branches to make sure that
   each basic block jumps to its successors as prescribed by the
   LTL control-flow graph.  However, branches are not necessary
-  if the fall-through behaviour of Linear instructions already
+  if the fall-through behaviour of LTLin instructions already
   implements the desired flow of control.  For instance,
-  consider the two LTL basic blocks
+  consider the two LTL instructions
 <<
-    L1: Bop op args res (Bgoto L2)
+    L1: Lop op args res L2
     L2: ...
 >>
-  If the blocks [L1] and [L2] are laid out consecutively in the Linear
-  code, we can generate the following Linear code:
+  If the instructions [L1] and [L2] are laid out consecutively in the LTLin
+  code, we can generate the following LTLin code:
 <<
     L1: Lop op args res
     L2: ...
@@ -49,19 +49,13 @@ Require Import Lattice.
 - Choosing an order for the basic blocks.  This returns an enumeration
   of CFG nodes stating that the basic blocks must be laid out in
   the order shown in the list.
-- Generate naive Linear code where each basic block branches explicitly
-  to its successors, even if one of these successors is the next basic
-  block.
-- Simplify the naive Linear code, removing unconditional branches to
-  a label that immediately follows:
-<<
-          ...                                  ...
-          Igoto L1;           becomes      L1: ...
-      L1: ...
->>
+- Generate LTLin code where each basic block branches explicitly
+  to its successors, except if one of these successors is the
+  immediately following instruction.
+
   The beauty of this approach is that correct code is generated
   under surprisingly weak hypotheses on the enumeration of
-  CFG nodes: it suffices that every reachable basic block occurs
+  CFG nodes: it suffices that every reachable instruction occurs
   exactly once in the enumeration.  While the enumeration heuristic we use
   is quite trivial, it is easy to implement more sophisticated
   trace picking heuristics: as long as they satisfy the property above,
@@ -73,10 +67,10 @@ Require Import Lattice.
 (** * Determination of the order of basic blocks *)
 
 (** We first compute a mapping from CFG nodes to booleans,
-  indicating whether a CFG basic block is reachable or not.
+  indicating whether a CFG instruction is reachable or not.
   This computation is a trivial forward dataflow analysis
   where the transfer function is the identity: the successors
-  of a reachable block are reachable, by the very
+  of a reachable instruction are reachable, by the very
   definition of reachability. *)
 
 Module DS := Dataflow_Solver(LBoolean)(NodeSetForward).
@@ -84,7 +78,7 @@ Module DS := Dataflow_Solver(LBoolean)(NodeSetForward).
 Definition reachable_aux (f: LTL.function) : option (PMap.t bool) :=
   DS.fixpoint
     (successors f)
-    (Psucc f.(fn_entrypoint))
+    (f.(fn_nextpc))
     (fun pc r => r)
     ((f.(fn_entrypoint), true) :: nil).
 
@@ -108,15 +102,17 @@ Definition enumerate (f: LTL.function) : list node :=
   let reach := reachable f in
   positive_rec (list node) nil
     (fun pc nodes => if reach!!pc then pc :: nodes else nodes)
-    (Psucc f.(fn_entrypoint)).
+    f.(fn_nextpc).
 
-(** * Translation from LTL to Linear *)
+(** * Translation from LTL to LTLin *)
 
 (** We now flatten the structure of the CFG graph, laying out
-  basic blocks consecutively in the order computed by [enumerate],
-  and inserting a branch at the end of every basic block.
+  LTL instructions consecutively in the order computed by [enumerate],
+  and inserting branches to the labels of sucessors if necessary.
+  Whether to insert a branch or not is determined by
+  the [starts_with] function below.
 
-  For blocks ending in a conditional branch [Bcond cond args s1 s2],
+  For LTL conditional branches [Lcond cond args s1 s2],
   we have two possible translations:
 <<
       Lcond cond args s1;       or     Lcond (not cond) args s2;
@@ -124,8 +120,8 @@ Definition enumerate (f: LTL.function) : list node :=
 >>
   We favour the first translation if [s2] is the label of the
   next instruction, and the second if [s1] is the label of the
-  next instruction, thus exhibiting more opportunities for
-  fall-through optimization later. *)
+  next instruction, thus avoiding the insertion of a redundant [Lgoto]
+  instruction. *)
 
 Fixpoint starts_with (lbl: label) (k: code) {struct k} : bool :=
   match k with
@@ -133,35 +129,35 @@ Fixpoint starts_with (lbl: label) (k: code) {struct k} : bool :=
   | _ => false
   end.
 
-Fixpoint linearize_block (b: block) (k: code) {struct b} : code :=
+Definition add_branch (s: label) (k: code) : code :=
+  if starts_with s k then k else Lgoto s :: k.
+
+Definition linearize_instr (b: LTL.instruction) (k: code) : code :=
   match b with
-  | Bgetstack s r b =>
-      Lgetstack s r :: linearize_block b k
-  | Bsetstack r s b =>
-      Lsetstack r s :: linearize_block b k
-  | Bop op args res b =>
-      Lop op args res :: linearize_block b k
-  | Bload chunk addr args dst b =>
-      Lload chunk addr args dst :: linearize_block b k
-  | Bstore chunk addr args src b =>
-      Lstore chunk addr args src :: linearize_block b k
-  | Bcall sig ros b =>
-      Lcall sig ros :: linearize_block b k
-  | Balloc b =>
-      Lalloc :: linearize_block b k
-  | Bgoto s =>
-      Lgoto s :: k
-  | Bcond cond args s1 s2 =>
+  | LTL.Lnop s =>
+      add_branch s k
+  | LTL.Lop op args res s =>
+      Lop op args res :: add_branch s k
+  | LTL.Lload chunk addr args dst s =>
+      Lload chunk addr args dst :: add_branch s k
+  | LTL.Lstore chunk addr args src s =>
+      Lstore chunk addr args src :: add_branch s k
+  | LTL.Lcall sig ros args res s =>
+      Lcall sig ros args res :: add_branch s k
+  | LTL.Ltailcall sig ros args =>
+      Ltailcall sig ros args :: k
+  | LTL.Lalloc arg res s =>
+      Lalloc arg res :: add_branch s k
+  | LTL.Lcond cond args s1 s2 =>
       if starts_with s1 k then
-        Lcond (negate_condition cond) args s2 :: Lgoto s1 :: k
+        Lcond (negate_condition cond) args s2 :: add_branch s1 k
       else
-        Lcond cond args s1 :: Lgoto s2 :: k
-  | Breturn =>
-      Lreturn :: k
+        Lcond cond args s1 :: add_branch s2 k
+  | LTL.Lreturn or =>
+      Lreturn or :: k
   end.
 
-(* Linearize a function body according to an enumeration of its
-   nodes.  *)
+(** Linearize a function body according to an enumeration of its nodes.  *)
 
 Fixpoint linearize_body (f: LTL.function) (enum: list node)
                         {struct enum} : code :=
@@ -170,47 +166,21 @@ Fixpoint linearize_body (f: LTL.function) (enum: list node)
   | pc :: rem =>
       match f.(LTL.fn_code)!pc with
       | None => linearize_body f rem
-      | Some b => Llabel pc :: linearize_block b (linearize_body f rem)
+      | Some b => Llabel pc :: linearize_instr b (linearize_body f rem)
       end
   end.
 
-Definition linearize_function (f: LTL.function) : Linear.function :=
+(** * Entry points for code linearization *)
+
+Definition transf_function (f: LTL.function) : LTLin.function :=
   mkfunction
     (LTL.fn_sig f)
+    (LTL.fn_params f)
     (LTL.fn_stacksize f)
-    (linearize_body f (enumerate f)).
-
-(** * Cleanup of the linearized code *)
-
-(** We now eliminate [Lgoto] instructions that branch to an
-  immediately following label: these are redundant, as the fall-through
-  behaviour obtained by removing the [Lgoto] instruction is
-  semantically equivalent. *)
-
-Fixpoint cleanup_code (c: code) {struct c} : code :=
-  match c with
-  | nil => nil
-  | Lgoto lbl :: c' => 
-      if starts_with lbl c'
-      then cleanup_code c'
-      else Lgoto lbl :: cleanup_code c'
-  | i :: c' =>
-      i :: cleanup_code c'
-  end.
-
-Definition cleanup_function (f: Linear.function) : Linear.function :=
-  mkfunction
-    (fn_sig f)
-    (fn_stacksize f)
-    (cleanup_code f.(fn_code)).
-
-(** * Entry points for code linearization *)
-
-Definition transf_function (f: LTL.function) : Linear.function :=
-  cleanup_function (linearize_function f).
+    (add_branch (LTL.fn_entrypoint f) (linearize_body f (enumerate f))).
 
-Definition transf_fundef (f: LTL.fundef) : Linear.fundef :=
+Definition transf_fundef (f: LTL.fundef) : LTLin.fundef :=
   AST.transf_fundef transf_function f.
 
-Definition transf_program (p: LTL.program) : Linear.program :=
+Definition transf_program (p: LTL.program) : LTLin.program :=
   transform_program transf_fundef p.
diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v
index 5937fc34..c7299085 100644
--- a/backend/Linearizeproof.v
+++ b/backend/Linearizeproof.v
@@ -8,10 +8,12 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
-Require Import Linear.
+Require Import LTLtyping.
+Require Import LTLin.
 Require Import Linearize.
 Require Import Lattice.
 
@@ -46,6 +48,18 @@ Proof.
   destruct f; reflexivity.
 Qed.
 
+Lemma find_function_translated:
+  forall ros ls f,
+  LTL.find_function ge ros ls = Some f ->
+  find_function tge ros ls = Some (transf_fundef f).
+Proof.
+  intros until f. destruct ros; simpl.
+  intro. apply functions_translated; auto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
+  apply function_ptr_translated; auto.
+  congruence.
+Qed.
+
 (** * Correctness of reachability analysis *)
 
 (** The entry point of the function is reachable. *)
@@ -62,11 +76,10 @@ Proof.
   intros. apply PMap.gi.
 Qed.
 
-(** The successors of a reachable basic block are reachable. *)
+(** The successors of a reachable instruction are reachable. *)
 
 Lemma reachable_successors:
   forall f pc pc',
-  f.(LTL.fn_code)!pc <> None ->
   In pc' (successors f pc) ->
   (reachable f)!!pc = true ->
   (reachable f)!!pc' = true.
@@ -74,51 +87,19 @@ Proof.
   intro f. unfold reachable.
   caseEq (reachable_aux f).
   unfold reachable_aux. intro reach; intros.
+  elim (LTL.fn_code_wf f pc); intro.
   assert (LBoolean.ge reach!!pc' reach!!pc).
   change (reach!!pc) with ((fun pc r => r) pc (reach!!pc)).
-  eapply DS.fixpoint_solution. eexact H.
-  elim (fn_code_wf f pc); intro. auto. contradiction.
-  auto. 
+  eapply DS.fixpoint_solution. eexact H. auto. auto.
   elim H3; intro. congruence. auto.
+  unfold successors in H0. rewrite H2 in H0. contradiction.
   intros. apply PMap.gi.
 Qed.
 
-(* If we have a valid LTL transition from [pc] to [pc'], and
-  [pc] is reachable, then [pc'] is reachable. *)
-
-Lemma reachable_correct_1:
-  forall f sp pc rs m t pc' rs' m' b,
-  f.(LTL.fn_code)!pc = Some b ->
-  exec_block ge sp b rs m t (Cont pc') rs' m' ->
-  (reachable f)!!pc = true ->
-  (reachable f)!!pc' = true.
-Proof.
-  intros. apply reachable_successors with pc; auto.
-  congruence.
-  eapply successors_correct; eauto.
-Qed.
-
-Lemma reachable_correct_2:
-  forall c sp pc rs m t out rs' m',
-  exec_blocks ge c sp pc rs m t out rs' m' ->  
-  forall f pc',
-  c = f.(LTL.fn_code) ->
-  out = Cont pc' ->
-  (reachable f)!!pc = true ->
-  (reachable f)!!pc' = true.
-Proof.
-  induction 1; intros.
-  congruence.
-  eapply reachable_correct_1. rewrite <- H1; eauto. 
-  subst out; eauto. auto.
-  auto.
-Qed.
-
 (** * Properties of node enumeration *)
 
 (** An enumeration of CFG nodes is correct if the following conditions hold:
 - All nodes for reachable basic blocks must be in the list.
-- The function entry point is the first element in the list.
 - The list is without repetition (so that no code duplication occurs).
 
 We prove that our [enumerate] function satisfies all three. *)
@@ -131,10 +112,10 @@ Lemma enumerate_complete:
 Proof.
   intros. 
   assert (forall p, 
-    Plt p (Psucc f.(fn_entrypoint)) ->
+    Plt p f.(fn_nextpc) ->
     (reachable f)!!p = true ->
     In p (enumerate f)).
-  unfold enumerate. pattern (Psucc (fn_entrypoint f)).
+  unfold enumerate. pattern (fn_nextpc f).
   apply positive_Peano_ind. 
   intros. compute in H1. destruct p; discriminate. 
   intros. rewrite positive_rec_succ. elim (Plt_succ_inv _ _ H2); intro.
@@ -143,27 +124,15 @@ Proof.
   elim (LTL.fn_code_wf f pc); intro. auto. congruence.
 Qed. 
 
-Lemma enumerate_head:
-  forall f, exists l, enumerate f = f.(LTL.fn_entrypoint) :: l.
-Proof.
-  intro. unfold enumerate. rewrite positive_rec_succ. 
-  rewrite reachable_entrypoint.
-  exists (positive_rec (list node) nil
-    (fun (pc : positive) (nodes : list node) =>
-      if (reachable f) !! pc then pc :: nodes else nodes) 
-    (fn_entrypoint f) ).
-  auto.
-Qed.
-
 Lemma enumerate_norepet:
   forall f, list_norepet (enumerate f).
 Proof.
   intro. 
-  unfold enumerate. pattern (Psucc (fn_entrypoint f)).
+  unfold enumerate. pattern (fn_nextpc f).
   apply positive_Peano_ind.  
   rewrite positive_rec_base. constructor.
   intros. rewrite positive_rec_succ.
-  case (reachable f)!!x. auto.
+  case (reachable f)!!x.
   constructor. 
   assert (forall y,
     In y (positive_rec
@@ -183,9 +152,9 @@ Proof.
   auto.
 Qed.
 
-(** * Correctness of the cleanup pass *)
+(** * Properties related to labels *)
 
-(** If labels are globally unique and the Linear code [c] contains
+(** If labels are globally unique and the LTLin code [c] contains
   a subsequence [Llabel lbl :: c1], [find_label lbl c] returns [c1].
 *)
 
@@ -196,27 +165,6 @@ Fixpoint unique_labels (c: code) : Prop :=
   | i :: c => unique_labels c
   end.
 
-Inductive is_tail: code -> code -> Prop :=
-  | is_tail_refl:
-      forall c, is_tail c c
-  | is_tail_cons:
-      forall i c1 c2, is_tail c1 c2 -> is_tail c1 (i :: c2).
-
-Lemma is_tail_in:
-  forall i c1 c2, is_tail (i :: c1) c2 -> In i c2.
-Proof.
-  induction c2; simpl; intros.
-  inversion H.
-  inversion H. tauto. right; auto.
-Qed.
-
-Lemma is_tail_cons_left:
-  forall i c1 c2, is_tail (i :: c1) c2 -> is_tail c1 c2.
-Proof.
-  induction c2; intros; inversion H.
-  constructor. constructor. constructor. auto. 
-Qed.
-
 Lemma find_label_unique:
   forall lbl c1 c2 c3,
   is_tail (Llabel lbl :: c1) c2 ->
@@ -237,163 +185,51 @@ Qed.
 (** Correctness of the [starts_with] test. *)
 
 Lemma starts_with_correct:
-  forall lbl c1 c2 c3 f sp ls m,
+  forall lbl c1 c2 c3 s f sp ls m,
   is_tail c1 c2 ->
   unique_labels c2 ->
   starts_with lbl c1 = true ->
   find_label lbl c2 = Some c3 ->
-  exec_instrs tge f sp (cleanup_code c1) ls m 
-                    E0 (cleanup_code c3) ls m.
+  plus step tge (State s f sp c1 ls m)
+             E0 (State s f sp c3 ls m).
 Proof.
   induction c1.
   simpl; intros; discriminate.
   simpl starts_with. destruct a; try (intros; discriminate).
-  intros. apply exec_trans with E0 (cleanup_code c1) ls m E0.
-  simpl. 
-  constructor. constructor. 
+  intros. 
+  apply plus_left with E0 (State s f sp c1 ls m) E0.
+  simpl. constructor. 
   destruct (peq lbl l).
   subst l. replace c3 with c1. constructor.
   apply find_label_unique with lbl c2; auto.
+  apply plus_star. 
   apply IHc1 with c2; auto. eapply is_tail_cons_left; eauto.
   traceEq.
 Qed.
 
-(** Code cleanup preserves the labeling of the code. *)
-
-Lemma find_label_cleanup_code:
-  forall lbl c c',
-  find_label lbl c = Some c' ->
-  find_label lbl (cleanup_code c) = Some (cleanup_code c').
-Proof.
-  induction c.  
-  simpl. intros; discriminate.
-  generalize (is_label_correct lbl a). 
-  simpl find_label. case (is_label lbl a); intro.
-  subst a. intros. injection H; intros. subst c'. 
-  simpl. rewrite peq_true. auto.
-  intros. destruct a; auto. 
-  simpl. rewrite peq_false. auto.
-  congruence. 
-  case (starts_with l c). auto. simpl. auto.
-Qed.
-
-(** One transition in the original Linear code corresponds to zero
-  or one transitions in the cleaned-up code. *)
-
-Lemma cleanup_code_correct_1:
-  forall f sp c1 ls1 m1 t c2 ls2 m2,
-  exec_instr tge f sp c1 ls1 m1 t c2 ls2 m2 ->
-  is_tail c1 f.(fn_code) ->
-  unique_labels f.(fn_code) ->
-  exec_instrs tge (cleanup_function f) sp 
-                       (cleanup_code c1) ls1 m1
-                     t (cleanup_code c2) ls2 m2.
-Proof.
-  induction 1; simpl; intros;
-  try (apply exec_one; econstructor; eauto; fail).
-  (* goto *)
-  caseEq (starts_with lbl b); intro SW.
-  eapply starts_with_correct; eauto.
-  eapply is_tail_cons_left; eauto.
-  constructor. constructor. 
-  unfold cleanup_function; simpl. 
-  apply find_label_cleanup_code. auto.
-  (* cond, taken *)
-  constructor. apply exec_Lcond_true. auto.
-  unfold cleanup_function; simpl. 
-  apply find_label_cleanup_code. auto.
-  (* cond, not taken *)
-  constructor. apply exec_Lcond_false. auto.
-Qed. 
-
-Lemma is_tail_find_label:
-  forall lbl c2 c1,
-  find_label lbl c1 = Some c2 -> is_tail c2 c1.
-Proof.
-  induction c1; simpl.
-  intros; discriminate.
-  case (is_label lbl a). intro. injection H; intro. subst c2.
-  constructor. constructor.
-  intro. constructor. auto. 
-Qed.
-
-Lemma is_tail_exec_instr:
-  forall f sp c1 ls1 m1 t c2 ls2 m2,
-  exec_instr tge f sp c1 ls1 m1 t c2 ls2 m2 ->
-  is_tail c1 f.(fn_code) -> is_tail c2 f.(fn_code).
-Proof.
-  induction 1; intros;
-  try (eapply is_tail_cons_left; eauto; fail).
-  eapply is_tail_find_label; eauto.
-  eapply is_tail_find_label; eauto.
-Qed.
-
-Lemma is_tail_exec_instrs:
-  forall f sp c1 ls1 m1 t c2 ls2 m2,
-  exec_instrs tge f sp c1 ls1 m1 t c2 ls2 m2 ->
-  is_tail c1 f.(fn_code) -> is_tail c2 f.(fn_code).
-Proof.
-  induction 1; intros.
-  auto.
-  eapply is_tail_exec_instr; eauto.
-  auto.
-Qed.
-
-(** Zero, one or several transitions in the original Linear code correspond
-  to zero, one or several transitions in the cleaned-up code. *)
-
-Lemma cleanup_code_correct_2:
-  forall f sp c1 ls1 m1 t c2 ls2 m2,
-  exec_instrs tge f sp c1 ls1 m1 t c2 ls2 m2 ->
-  is_tail c1 f.(fn_code) ->
-  unique_labels f.(fn_code) ->
-  exec_instrs tge (cleanup_function f) sp 
-                       (cleanup_code c1) ls1 m1
-                     t (cleanup_code c2) ls2 m2.
-Proof.
-  induction 1; simpl; intros.
-  apply exec_refl.
-  eapply cleanup_code_correct_1; eauto.
-  apply exec_trans with t1 (cleanup_code b2) rs2 m2 t2.
-  auto. 
-  apply IHexec_instrs2; auto.
-  eapply is_tail_exec_instrs; eauto.
-  auto.
-Qed.
+(** Connection between [find_label] and linearization. *)
 
-Lemma cleanup_function_correct:
-  forall f ls1 m1 t ls2 m2,
-  exec_function tge (Internal f) ls1 m1 t ls2 m2 ->
-  unique_labels f.(fn_code) ->
-  exec_function tge (Internal (cleanup_function f)) ls1 m1 t ls2 m2. 
+Lemma find_label_add_branch:
+  forall lbl k s,
+  find_label lbl (add_branch s k) = find_label lbl k.
 Proof.
-  intros. inversion H; subst.
-  generalize (cleanup_code_correct_2 _ _ _ _ _ _ _ _ _ H3 (is_tail_refl _) H0).
-  simpl. intro.
-  econstructor; eauto.
+  intros. unfold add_branch. destruct (starts_with s k); auto.
 Qed.
 
-(** * Properties of linearized code *)
-
-(** We now state useful properties of the Linear code generated by
-  the naive LTL-to-Linear translation. *)
-
-(** Connection between [find_label] and linearization. *)
-
-Lemma find_label_lin_block:
+Lemma find_label_lin_instr:
   forall lbl k b,
-  find_label lbl (linearize_block b k) = find_label lbl k.
+  find_label lbl (linearize_instr b k) = find_label lbl k.
 Proof.
+  intros lbl k. generalize (find_label_add_branch lbl k); intro.
   induction b; simpl; auto.
-  case (starts_with n k); reflexivity.
+  case (starts_with n k); simpl; auto.
 Qed.
 
 Lemma find_label_lin_rec:
   forall f enum pc b,
   In pc enum ->
   f.(LTL.fn_code)!pc = Some b ->
-  exists k,
-  find_label pc (linearize_body f enum) = Some (linearize_block b k).
+  exists k, find_label pc (linearize_body f enum) = Some (linearize_instr b k).
 Proof.
   induction enum; intros.
   elim H.
@@ -403,7 +239,7 @@ Proof.
   assert (In pc enum). simpl in H. tauto.
   elim (IHenum pc b H1 H0). intros k FIND.
   exists k. simpl. destruct (LTL.fn_code f)!a. 
-  simpl. rewrite peq_false. rewrite find_label_lin_block. auto. auto.
+  simpl. rewrite peq_false. rewrite find_label_lin_instr. auto. auto.
   auto.
 Qed.
 
@@ -412,20 +248,56 @@ Lemma find_label_lin:
   f.(LTL.fn_code)!pc = Some b ->
   (reachable f)!!pc = true ->
   exists k,
-  find_label pc (fn_code (linearize_function f)) = Some (linearize_block b k).
+  find_label pc (fn_code (transf_function f)) = Some (linearize_instr b k).
 Proof.
-  intros. unfold linearize_function; simpl. apply find_label_lin_rec.
+  intros. unfold transf_function; simpl.
+  rewrite find_label_add_branch. apply find_label_lin_rec.
   eapply enumerate_complete; eauto. auto.
 Qed.
 
+Lemma find_label_lin_inv:
+  forall f pc b k ,
+  f.(LTL.fn_code)!pc = Some b ->
+  (reachable f)!!pc = true ->
+  find_label pc (fn_code (transf_function f)) = Some k ->
+  exists k', k = linearize_instr b k'.
+Proof.
+  intros. exploit find_label_lin; eauto. intros [k' FIND].
+  exists k'. congruence.
+Qed.
+
+Lemma find_label_lin_succ:
+  forall f s,
+  valid_successor f s ->
+  (reachable f)!!s = true ->
+  exists k,
+  find_label s (fn_code (transf_function f)) = Some k.
+Proof.
+  intros. destruct H as [i AT]. 
+  exploit find_label_lin; eauto. intros [k FIND].
+  exists (linearize_instr i k); auto.
+Qed.
+
 (** Unique label property for linearized code. *)
 
-Lemma label_in_lin_block:
+Lemma label_in_add_branch:
+  forall lbl s k,
+  In (Llabel lbl) (add_branch s k) -> In (Llabel lbl) k.
+Proof.
+  intros until k; unfold add_branch.
+  destruct (starts_with s k); simpl; intuition congruence.
+Qed.
+
+Lemma label_in_lin_instr:
   forall lbl k b,
-  In (Llabel lbl) (linearize_block b k) -> In (Llabel lbl) k.
+  In (Llabel lbl) (linearize_instr b k) -> In (Llabel lbl) k.
 Proof.
-  induction b; simpl; try (intuition congruence).
-  case (starts_with n k); simpl; intuition congruence.
+  induction b; simpl; intros;
+  try (apply label_in_add_branch with n; intuition congruence);
+  try (intuition congruence).
+  destruct (starts_with n k); simpl in H.
+  apply label_in_add_branch with n; intuition congruence.
+  apply label_in_add_branch with n0; intuition congruence.
 Qed.
 
 Lemma label_in_lin_rec:
@@ -436,16 +308,24 @@ Proof.
   simpl; auto.
   simpl. destruct (LTL.fn_code f)!a. 
   simpl. intros [A|B]. left; congruence. 
-  right. apply IHenum. eapply label_in_lin_block; eauto.
+  right. apply IHenum. eapply label_in_lin_instr; eauto.
   intro; right; auto.
 Qed.
 
-Lemma unique_labels_lin_block:
+Lemma unique_labels_add_branch:
+  forall lbl k,
+  unique_labels k -> unique_labels (add_branch lbl k).
+Proof.
+  intros; unfold add_branch. 
+  destruct (starts_with lbl k); simpl; intuition.
+Qed.
+
+Lemma unique_labels_lin_instr:
   forall k b,
-  unique_labels k -> unique_labels (linearize_block b k).
+  unique_labels k -> unique_labels (linearize_instr b k).
 Proof.
-  induction b; simpl; auto.
-  case (starts_with n k); simpl; auto.
+  induction b; intro; simpl; auto; try (apply unique_labels_add_branch; auto).
+  case (starts_with n k); simpl; apply unique_labels_add_branch; auto.
 Qed.
 
 Lemma unique_labels_lin_rec:
@@ -458,268 +338,339 @@ Proof.
   intro. simpl. destruct (LTL.fn_code f)!a. 
   simpl. split. red. intro. inversion H. elim H3.
   apply label_in_lin_rec with f. 
-  apply label_in_lin_block with b. auto.
-  apply unique_labels_lin_block. apply IHenum. inversion H; auto.
+  apply label_in_lin_instr with i. auto.
+  apply unique_labels_lin_instr. apply IHenum. inversion H; auto.
   apply IHenum. inversion H; auto.
 Qed.
 
-Lemma unique_labels_lin_function:
+Lemma unique_labels_transf_function:
   forall f,
-  unique_labels (fn_code (linearize_function f)).
+  unique_labels (fn_code (transf_function f)).
 Proof.
-  intros. unfold linearize_function; simpl.
+  intros. unfold transf_function; simpl.
+  apply unique_labels_add_branch. 
   apply unique_labels_lin_rec. apply enumerate_norepet. 
 Qed.
 
-(** * Correctness of linearization *)
+(** Correctness of [add_branch]. *)
+
+Lemma is_tail_find_label:
+  forall lbl c2 c1,
+  find_label lbl c1 = Some c2 -> is_tail c2 c1.
+Proof.
+  induction c1; simpl.
+  intros; discriminate.
+  case (is_label lbl a). intro. injection H; intro. subst c2.
+  constructor. constructor.
+  intro. constructor. auto. 
+Qed.
 
-(** The outcome of an LTL [exec_block] or [exec_blocks] execution
-  is valid if it corresponds to a reachable, existing basic block.
-  All outcomes occurring in an LTL program execution are actually
-  valid, but we will prove that along with the main simulation proof. *)
+Lemma is_tail_add_branch:
+  forall lbl c1 c2, is_tail (add_branch lbl c1) c2 -> is_tail c1 c2.
+Proof.
+  intros until c2. unfold add_branch. destruct (starts_with lbl c1).
+  auto. eauto with coqlib.
+Qed.
 
-Definition valid_outcome (f: LTL.function) (out: outcome) :=
-  match out with
-  | Cont s =>
-      (reachable f)!!s = true /\ exists b, f.(LTL.fn_code)!s = Some b
-  | Return => True
-  end.
+Lemma add_branch_correct:
+  forall lbl c k s f sp ls m,
+  is_tail k (transf_function f).(fn_code) ->
+  find_label lbl (transf_function f).(fn_code) = Some c ->
+  plus step tge (State s (transf_function f) sp (add_branch lbl k) ls m)
+             E0 (State s (transf_function f) sp c ls m).
+Proof.
+  intros. unfold add_branch.
+  caseEq (starts_with lbl k); intro SW.
+  eapply starts_with_correct; eauto.
+  apply unique_labels_transf_function.
+  apply plus_one. apply exec_Lgoto. auto.
+Qed.
 
-(** Auxiliary lemma used to establish the [valid_outcome] property. *)
+(** * Correctness of linearization *)
 
-Lemma exec_blocks_valid_outcome:
-  forall c sp pc ls1 m1 t out ls2 m2,
-  exec_blocks ge c sp pc ls1 m1 t out ls2 m2 ->
-  forall f,
-  c = f.(LTL.fn_code) ->
-  (reachable f)!!pc = true ->
-  valid_outcome f out ->
-  valid_outcome f (Cont pc).
+(** The proof of semantic preservation is a simulation argument
+  based on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  +|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The invariant (horizontal lines above) is the [match_states]
+  predicate defined below.  It captures the fact that the flow
+  of data is the same in the source and linearized codes.
+  Moreover, whenever the source state is at node [pc] in its
+  control-flow graph, the transformed state is at a code
+  sequence [c] that starts with the label [pc]. *)
+
+Inductive match_stackframes: LTL.stackframe -> LTLin.stackframe -> Prop :=
+  | match_stackframe_intro:
+      forall res f sp pc ls c,
+      (reachable f)!!pc = true ->
+      valid_successor f pc ->
+      is_tail c (fn_code (transf_function f)) ->
+      wt_function f ->
+      match_stackframes
+        (LTL.Stackframe res f sp ls pc)
+        (LTLin.Stackframe res (transf_function f) sp ls (add_branch pc c)).
+
+Inductive match_states: LTL.state -> LTLin.state -> Prop :=
+  | match_states_intro:
+      forall s f sp pc ls m ts c
+        (STACKS: list_forall2 match_stackframes s ts)
+        (REACH: (reachable f)!!pc = true)
+        (AT: find_label pc (fn_code (transf_function f)) = Some c)
+        (WTF: wt_function f),
+      match_states (LTL.State s f sp pc ls m)
+                   (LTLin.State ts (transf_function f) sp c ls m)
+  | match_states_call:
+      forall s f ls m ts,
+      list_forall2 match_stackframes s ts ->
+      wt_fundef f ->
+      match_states (LTL.Callstate s f ls m)
+                   (LTLin.Callstate ts (transf_fundef f) ls m)
+  | match_states_return:
+      forall s sig ls m ts,
+      list_forall2 match_stackframes s ts ->
+      match_states (LTL.Returnstate s sig ls m)
+                   (LTLin.Returnstate ts sig ls m).
+
+Remark parent_locset_match:
+  forall s ts,
+  list_forall2 match_stackframes s ts ->
+  parent_locset ts = LTL.parent_locset s.
 Proof.
-  induction 1.
-  auto.
-  intros. simpl. split. auto. exists b. congruence. 
-  intros. apply IHexec_blocks1. auto. auto.
-  apply IHexec_blocks2. auto. 
-  eapply reachable_correct_2. eexact H. auto. auto. auto.
-  auto.
+  induction 1; simpl; auto. inv H; auto. 
 Qed.
 
-(** Correspondence between an LTL outcome and a fragment of generated
-  Linear code. *)
-
-Inductive cont_for_outcome: LTL.function -> outcome -> code -> Prop :=
-  | co_return:
-      forall f k,
-      cont_for_outcome f Return (Lreturn :: k)
-  | co_goto:
-      forall f s k,
-      find_label s (linearize_function f).(fn_code) = Some k ->
-      cont_for_outcome f (Cont s) k.
-
-(** The simulation properties used in the inductive proof.
-  They are parameterized by an LTL execution, and state
-  that a matching execution is possible in the generated Linear code. *)
-
-Definition exec_instr_prop 
-  (sp: val) (b1: block) (ls1: locset) (m1: mem)
-  (t: trace) (b2: block) (ls2: locset) (m2: mem) : Prop :=
-  forall f k,
-  exec_instr tge (linearize_function f) sp
-                 (linearize_block b1 k) ls1 m1
-               t (linearize_block b2 k) ls2 m2.
-
-Definition exec_instrs_prop
-  (sp: val) (b1: block) (ls1: locset) (m1: mem)
-  (t: trace) (b2: block) (ls2: locset) (m2: mem) : Prop :=
-  forall f k,
-  exec_instrs tge (linearize_function f) sp
-                  (linearize_block b1 k) ls1 m1
-                t (linearize_block b2 k) ls2 m2.
-
-Definition exec_block_prop
-  (sp: val) (b: block) (ls1: locset) (m1: mem) 
-  (t: trace) (out: outcome) (ls2: locset) (m2: mem): Prop :=
-  forall f k,
-  valid_outcome f out ->
-  exists k',
-  exec_instrs tge (linearize_function f) sp
-                  (linearize_block b k) ls1 m1
-                t k' ls2 m2
-  /\ cont_for_outcome f out k'.
-
-Definition exec_blocks_prop
-  (c: LTL.code) (sp: val) (pc: node) (ls1: locset) (m1: mem) 
-         (t: trace) (out: outcome) (ls2: locset) (m2: mem): Prop :=
-  forall f k,
-  c = f.(LTL.fn_code) ->
-  (reachable f)!!pc = true ->
-  find_label pc (fn_code (linearize_function f)) = Some k ->
-  valid_outcome f out ->
-  exists k',
-  exec_instrs tge (linearize_function f) sp
-                  k ls1 m1
-                t k' ls2 m2
-  /\ cont_for_outcome f out k'.
-
-Definition exec_function_prop
-  (f: LTL.fundef) (ls1: locset) (m1: mem) (t: trace)
-  (ls2: locset) (m2: mem): Prop :=
-  exec_function tge (transf_fundef f) ls1 m1 t ls2 m2.
-
-Scheme exec_instr_ind5 := Minimality for LTL.exec_instr Sort Prop
-  with exec_instrs_ind5 := Minimality for LTL.exec_instrs Sort Prop
-  with exec_block_ind5 := Minimality for LTL.exec_block Sort Prop
-  with exec_blocks_ind5 := Minimality for LTL.exec_blocks Sort Prop
-  with exec_function_ind5 := Minimality for LTL.exec_function Sort Prop.
-
-(** The obligatory proof by structural induction on the LTL evaluation
-  derivation. *)
-
-Lemma transf_function_correct:
-  forall f ls1 m1 t ls2 m2,
-  LTL.exec_function ge f ls1 m1 t ls2 m2 ->
-  exec_function_prop f ls1 m1 t ls2 m2.
+Hypothesis wt_prog: wt_program prog.
+
+Theorem transf_step_correct:
+  forall s1 t s2, LTL.step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  exists s2', plus LTLin.step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
-  apply (exec_function_ind5 ge
-           exec_instr_prop
-           exec_instrs_prop
-           exec_block_prop
-           exec_blocks_prop
-           exec_function_prop);
-  intros; red; intros; simpl.
-  (* getstack *)
-  constructor.
-  (* setstack *)
-  constructor.
-  (* op *)
-  constructor. rewrite <- H. apply eval_operation_preserved.
-  exact symbols_preserved.
-  (* load *)
-  apply exec_Lload with a. 
-  rewrite <- H. apply eval_addressing_preserved.
-  exact symbols_preserved.
-  auto.
-  (* store *)
-  apply exec_Lstore with a.
-  rewrite <- H. apply eval_addressing_preserved.
-  exact symbols_preserved.
-  auto.
-  (* call *)
-  apply exec_Lcall with (transf_fundef f).
-  generalize H. destruct ros; simpl.
-  intro; apply functions_translated; auto.
-  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
-  intro; apply function_ptr_translated; auto. congruence.
-  generalize (sig_preserved f). congruence.
-  apply H2. 
-  (* alloc *)
-  eapply exec_Lalloc; eauto. 
-  (* instr_refl *)
-  apply exec_refl.
-  (* instr_one *)
-  apply exec_one. apply H0. 
-  (* instr_trans *)
-  apply exec_trans with t1 (linearize_block b2 k) rs2 m2 t2.
-  apply H0. apply H2. auto.
-  (* goto *)
-  elim H1. intros REACH [b2 AT2]. 
-  generalize (H0 f k). simpl. intro.
-  elim (find_label_lin f s b2 AT2 REACH). intros k2 FIND.
-  exists (linearize_block b2 k2).
-  split. 
-  eapply exec_trans. eexact H2. constructor. constructor. auto. traceEq.
-  constructor. auto. 
-  (* cond, true *)
-  elim H2. intros REACH [b2 AT2]. 
-  elim (find_label_lin f ifso b2 AT2 REACH). intros k2 FIND.
-  exists (linearize_block b2 k2).
-  split.
-  generalize (H0 f k). simpl. 
-  case (starts_with ifso k); intro.
-  eapply exec_trans. eexact H3. 
-  eapply exec_trans. apply exec_one. apply exec_Lcond_false.
-  change false with (negb true). apply eval_negate_condition. auto.
-  apply exec_one. apply exec_Lgoto. auto. reflexivity. traceEq.
-  eapply exec_trans. eexact H3. 
-  apply exec_one. apply exec_Lcond_true.
-  auto. auto. traceEq.
-  constructor; auto.
-  (* cond, false *)
-  elim H2. intros REACH [b2 AT2]. 
-  elim (find_label_lin f ifnot b2 AT2 REACH). intros k2 FIND.
-  exists (linearize_block b2 k2).
-  split.
-  generalize (H0 f k). simpl. 
-  case (starts_with ifso k); intro.
-  eapply exec_trans. eexact H3. 
-  apply exec_one. apply exec_Lcond_true. 
-  change true with (negb false). apply eval_negate_condition. auto.
-  auto. traceEq.
-  eapply exec_trans. eexact H3.
-  eapply exec_trans. apply exec_one. apply exec_Lcond_false. auto.
-  apply exec_one. apply exec_Lgoto. auto. reflexivity. traceEq.
-  constructor; auto.
-  (* return *)
-  exists (Lreturn :: k). split. 
-  exact (H0 f k). constructor.
-  (* refl blocks *)
-  exists k. split. apply exec_refl. constructor. auto.
-  (* one blocks *)
-  subst c. elim (find_label_lin f pc b H H3). intros k' FIND.
-  assert (k = linearize_block b k'). congruence. subst k.
-  elim (H1 f k' H5). intros k'' [EXEC CFO].
-  exists k''. tauto.
-  (* trans blocks *)
-  assert ((reachable f)!!pc2 = true).
-    eapply reachable_correct_2. eexact H. auto. auto. auto.
-  assert (valid_outcome f (Cont pc2)).
-    eapply exec_blocks_valid_outcome; eauto.
-  generalize (H0 f k H4 H5 H6 H9). intros [k1 [EX1 CFO2]].
-  inversion CFO2. 
-  generalize (H2 f k1 H4 H8 H12 H7). intros [k2 [EX2 CFO3]].
-  exists k2. split. eapply exec_trans; eauto. auto.
-  (* internal function -- TA-DA! *)
-  assert (REACH0: (reachable f)!!(fn_entrypoint f) = true).
+  induction 1; intros; try (inv MS);
+  try (generalize (wt_instrs _ WTF _ _ H); intro WTI).
+  (* Lnop *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!pc' = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  econstructor; split.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_find_label. eauto.
+  econstructor; eauto. 
+  (* Lop *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!pc' = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  econstructor; split.
+  eapply plus_left'.
+  eapply exec_Lop with (v := v); eauto.
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  traceEq.
+  econstructor; eauto.
+  (* Lload *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!pc' = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  econstructor; split.
+  eapply plus_left'.
+  eapply exec_Lload; eauto.
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  traceEq.
+  econstructor; eauto.
+  (* Lstore *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!pc' = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  econstructor; split.
+  eapply plus_left'.
+  eapply exec_Lstore; eauto.
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  traceEq.
+  econstructor; eauto.
+  (* Lcall *)
+  unfold rs1 in *. inv MS. fold rs1.
+  generalize (wt_instrs _ WTF _ _ H); intro WTI.
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!pc' = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  assert (VALID: valid_successor f pc'). inv WTI; auto.
+  econstructor; split.
+  apply plus_one. eapply exec_Lcall with (f' := transf_fundef f'); eauto.
+  apply find_function_translated; auto.
+  symmetry; apply sig_preserved.
+  econstructor; eauto.
+  constructor; auto. econstructor; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  destruct ros; simpl in H0.
+  eapply Genv.find_funct_prop; eauto.
+  destruct (Genv.find_symbol ge i); try discriminate.  
+  eapply Genv.find_funct_ptr_prop; eauto.
+
+  (* Ltailcall *)
+  unfold rs2, rs1 in *. inv MS. fold rs1; fold rs2.
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  econstructor; split.
+  apply plus_one. eapply exec_Ltailcall with (f' := transf_fundef f'); eauto.
+  apply find_function_translated; auto.
+  symmetry; apply sig_preserved.
+  rewrite (parent_locset_match _ _ STACKS).
+  econstructor; eauto.
+  destruct ros; simpl in H0.
+  eapply Genv.find_funct_prop; eauto.
+  destruct (Genv.find_symbol ge i); try discriminate.  
+  eapply Genv.find_funct_ptr_prop; eauto.
+
+  (* Lalloc *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!pc' = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  econstructor; split.
+  eapply plus_left'. 
+  eapply exec_Lalloc; eauto.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  traceEq.
+  econstructor; eauto.
+  (* Lcond true *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!ifso = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  destruct (starts_with ifso c').
+  econstructor; split.
+  eapply plus_left'.  
+  eapply exec_Lcond_false; eauto.
+  change false with (negb true). apply eval_negate_condition; auto.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  traceEq.
+  econstructor; eauto.
+  econstructor; split.
+  apply plus_one. eapply exec_Lcond_true; eauto.
+  econstructor; eauto.
+  (* Lcond false *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  assert (REACH': (reachable f)!!ifnot = true).
+  eapply reachable_successors; eauto.
+  unfold successors; rewrite H; auto with coqlib.
+  exploit find_label_lin_succ; eauto. inv WTI; auto. intros [c'' AT'].
+  destruct (starts_with ifso c').
+  econstructor; split.
+  apply plus_one. eapply exec_Lcond_true; eauto.
+  change true with (negb false). apply eval_negate_condition; auto.
+  econstructor; eauto.
+  econstructor; split.
+  eapply plus_left'. 
+  eapply exec_Lcond_false; eauto.
+  eapply add_branch_correct; eauto.
+  eapply is_tail_add_branch. eapply is_tail_cons_left. 
+  eapply is_tail_find_label. eauto.
+  traceEq.
+  econstructor; eauto.
+  (* Lreturn *)
+  destruct (find_label_lin_inv _ _ _ _ H REACH AT) as [c' EQ].
+  simpl in EQ. subst c.
+  econstructor; split.
+  apply plus_one. eapply exec_Lreturn; eauto.
+  rewrite (parent_locset_match _ _ STACKS).
+  econstructor; eauto. 
+  (* internal function *)
+  assert (REACH: (reachable f)!!(LTL.fn_entrypoint f) = true).
     apply reachable_entrypoint.
-  assert (VO2: valid_outcome f Return). simpl; auto.
-  assert (VO1: valid_outcome f (Cont (fn_entrypoint f))).
-    eapply exec_blocks_valid_outcome; eauto.
-  assert (exists k, fn_code (linearize_function f) = Llabel f.(fn_entrypoint) :: k).
-    unfold linearize_function; simpl. 
-    elim (enumerate_head f). intros tl EN. rewrite EN. 
-    simpl. elim VO1. intros REACH [b EQ]. rewrite EQ. 
-    exists (linearize_block b (linearize_body f tl)). auto.
-  elim H2; intros k EQ.
-  assert (find_label (fn_entrypoint f) (fn_code (linearize_function f)) =
-            Some k).
-    rewrite EQ. simpl. rewrite peq_true. auto.
-  generalize (H1 f k (refl_equal _) REACH0 H3 VO2). 
-  intros [k' [EX CO]].
-  inversion CO. subst k'. 
-  unfold transf_function. apply cleanup_function_correct.
-  econstructor. eauto. rewrite EQ. eapply exec_trans.
-  apply exec_one. constructor. eauto. traceEq.
-  apply unique_labels_lin_function.
+  inv H6. 
+  exploit find_label_lin_succ; eauto. inv H1; auto. intros [c'' AT'].
+  simpl. econstructor; split.
+  eapply plus_left'.  
+  eapply exec_function_internal; eauto.
+  simpl. eapply add_branch_correct. 
+  simpl. eapply is_tail_add_branch. constructor. eauto.
+  traceEq.
+  econstructor; eauto.
   (* external function *)
+  simpl. econstructor; split.
+  apply plus_one. eapply exec_function_external; eauto.
+  econstructor; eauto.
+  (* return *)
+  inv H4. inv H1.
+  exploit find_label_lin_succ; eauto. intros [c' AT].
+  econstructor; split.
+  eapply plus_left'.
+  eapply exec_return; eauto.
+  eapply add_branch_correct; eauto. traceEq.
   econstructor; eauto.
 Qed.
 
-End LINEARIZATION.
+Lemma transf_initial_states:
+  forall st1, LTL.initial_state prog st1 ->
+  exists st2, LTLin.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. 
+  exists (Callstate nil (transf_fundef f) (Locmap.init Vundef) (Genv.init_mem tprog)); split.
+  econstructor; eauto.
+  change (prog_main tprog) with (prog_main prog).
+  rewrite symbols_preserved. eauto.
+  apply function_ptr_translated; auto.
+  rewrite <- H2. apply sig_preserved.
+  replace (Genv.init_mem tprog) with (Genv.init_mem prog).
+  constructor. constructor.
+  eapply Genv.find_funct_ptr_prop; eauto.
+  symmetry. unfold tprog, transf_program. apply Genv.init_mem_transf.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> LTL.final_state st1 r -> LTLin.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv H6. constructor. auto. 
+Qed.
 
 Theorem transf_program_correct:
-  forall (p: LTL.program) (t: trace) (r: val),
-  LTL.exec_program p t r ->
-  Linear.exec_program (transf_program p) t r.
+  forall (beh: program_behavior),
+  LTL.exec_program prog beh -> LTLin.exec_program tprog beh.
 Proof.
-  intros p t r [b [f [ls [m [FIND1 [FIND2 [SIG [EX RES]]]]]]]].
-  red. exists b; exists (transf_fundef f); exists ls; exists m.
-  split. change (prog_main (transf_program p)) with (prog_main p).
-  rewrite symbols_preserved; auto.
-  split. apply function_ptr_translated. auto.
-  split. generalize (sig_preserved f); congruence.
-  split. apply transf_function_correct. 
-  unfold transf_program. rewrite Genv.init_mem_transf. auto.
-  rewrite sig_preserved. exact RES.
+  unfold LTL.exec_program, exec_program; intros.
+  eapply simulation_plus_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact transf_step_correct.
 Qed.
 
+End LINEARIZATION.
diff --git a/backend/Linearizetyping.v b/backend/Linearizetyping.v
index 66926e9a..c930ca52 100644
--- a/backend/Linearizetyping.v
+++ b/backend/Linearizetyping.v
@@ -6,327 +6,72 @@ Require Import AST.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
-Require Import Linear.
-Require Import Linearize.
 Require Import LTLtyping.
-Require Import Lineartyping.
+Require Import LTLin.
+Require Import Linearize.
+Require Import LTLintyping.
 Require Import Conventions.
 
-(** * Validity of resource bounds *)
-
-(** In this section, we show that the resource bounds computed by
-  [function_bounds] are valid: all references to callee-save registers
-  and stack slots in the code of the function are within those bounds. *)
-
-Section BOUNDS.
-
-Variable f: Linear.function.
-Let b := function_bounds f.
-
-Lemma max_over_list_bound:
-  forall (A: Set) (valu: A -> Z) (l: list A) (x: A),
-  In x l -> valu x <= max_over_list A valu l.
-Proof.
-  intros until x. unfold max_over_list.
-  assert (forall c z,
-            let f := fold_left (fun x y => Zmax x (valu y)) c z in
-            z <= f /\ (In x c -> valu x <= f)).
-    induction c; simpl; intros.
-    split. omega. tauto.
-    elim (IHc (Zmax z (valu a))); intros. 
-    split. apply Zle_trans with (Zmax z (valu a)). apply Zmax1. auto. 
-    intro H1; elim H1; intro. 
-    subst a. apply Zle_trans with (Zmax z (valu x)). 
-    apply Zmax2. auto. auto.
-  intro. elim (H l 0); intros. auto.
-Qed.
-
-Lemma max_over_instrs_bound:
-  forall (valu: instruction -> Z) i,
-  In i f.(fn_code) -> valu i <= max_over_instrs f valu.
-Proof.
-  intros. unfold max_over_instrs. apply max_over_list_bound; auto.
-Qed.
-
-Lemma max_over_regs_of_funct_bound:
-  forall (valu: mreg -> Z) i r,
-  In i f.(fn_code) -> In r (regs_of_instr i) ->
-  valu r <= max_over_regs_of_funct f valu.
-Proof.
-  intros. unfold max_over_regs_of_funct. 
-  apply Zle_trans with (max_over_regs_of_instr valu i).
-  unfold max_over_regs_of_instr. apply max_over_list_bound. auto.
-  apply max_over_instrs_bound. auto.
-Qed.
-
-Lemma max_over_slots_of_funct_bound:
-  forall (valu: slot -> Z) i s,
-  In i f.(fn_code) -> In s (slots_of_instr i) ->
-  valu s <= max_over_slots_of_funct f valu.
-Proof.
-  intros. unfold max_over_slots_of_funct. 
-  apply Zle_trans with (max_over_slots_of_instr valu i).
-  unfold max_over_slots_of_instr. apply max_over_list_bound. auto.
-  apply max_over_instrs_bound. auto.
-Qed.
-
-Lemma int_callee_save_bound:
-  forall i r,
-  In i f.(fn_code) -> In r (regs_of_instr i) ->
-  index_int_callee_save r < bound_int_callee_save b.
-Proof.
-  intros. apply Zlt_le_trans with (int_callee_save r).
-  unfold int_callee_save. omega.
-  unfold b, function_bounds, bound_int_callee_save. 
-  eapply max_over_regs_of_funct_bound; eauto.
-Qed.
-
-Lemma float_callee_save_bound:
-  forall i r,
-  In i f.(fn_code) -> In r (regs_of_instr i) ->
-  index_float_callee_save r < bound_float_callee_save b.
-Proof.
-  intros. apply Zlt_le_trans with (float_callee_save r).
-  unfold float_callee_save. omega.
-  unfold b, function_bounds, bound_float_callee_save. 
-  eapply max_over_regs_of_funct_bound; eauto.
-Qed.
-
-Lemma int_local_slot_bound:
-  forall i ofs,
-  In i f.(fn_code) -> In (Local ofs Tint) (slots_of_instr i) ->
-  ofs < bound_int_local b.
-Proof.
-  intros. apply Zlt_le_trans with (int_local (Local ofs Tint)).
-  unfold int_local. omega.
-  unfold b, function_bounds, bound_int_local.
-  eapply max_over_slots_of_funct_bound; eauto.
-Qed.
-
-Lemma float_local_slot_bound:
-  forall i ofs,
-  In i f.(fn_code) -> In (Local ofs Tfloat) (slots_of_instr i) ->
-  ofs < bound_float_local b.
-Proof.
-  intros. apply Zlt_le_trans with (float_local (Local ofs Tfloat)).
-  unfold float_local. omega.
-  unfold b, function_bounds, bound_float_local.
-  eapply max_over_slots_of_funct_bound; eauto.
-Qed.
-
-Lemma outgoing_slot_bound:
-  forall i ofs ty,
-  In i f.(fn_code) -> In (Outgoing ofs ty) (slots_of_instr i) ->
-  ofs + typesize ty <= bound_outgoing b.
-Proof.
-  intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing ofs ty)).
-  unfold b, function_bounds, bound_outgoing.
-  apply Zmax_bound_r. apply Zmax_bound_r. 
-  eapply max_over_slots_of_funct_bound; eauto.
-Qed.
-
-Lemma size_arguments_bound:
-  forall sig ros,
-  In (Lcall sig ros) f.(fn_code) ->
-  size_arguments sig <= bound_outgoing b.
-Proof.
-  intros. change (size_arguments sig) with (outgoing_space (Lcall sig ros)).
-  unfold b, function_bounds, bound_outgoing.
-  apply Zmax_bound_r. apply Zmax_bound_l.
-  apply max_over_instrs_bound; auto.
-Qed.
-
-End BOUNDS.
-
-(** Consequently, all machine registers or stack slots mentioned by one
-  of the instructions of function [f] are within bounds. *)
-
-Lemma mreg_is_bounded:
-  forall f i r,
-  In i f.(fn_code) -> In r (regs_of_instr i) ->
-  mreg_bounded f r.
-Proof.
-  intros. unfold mreg_bounded. 
-  case (mreg_type r).
-  eapply int_callee_save_bound; eauto.
-  eapply float_callee_save_bound; eauto.
-Qed.
-
-Lemma slot_is_bounded:
-  forall f i s,
-  In i (transf_function f).(fn_code) -> In s (slots_of_instr i) ->
-  LTLtyping.slot_bounded f s ->
-  slot_bounded (transf_function f) s.
-Proof.
-  intros. unfold slot_bounded. 
-  destruct s.
-  destruct t.
-  split. exact H1. eapply int_local_slot_bound; eauto.
-  split. exact H1. eapply float_local_slot_bound; eauto.
-  unfold linearize_function; simpl. exact H1.
-  split. exact H1. eapply outgoing_slot_bound; eauto.
-Qed.
-
-(** * Conservation property of the cleanup pass *)
-
-(** We show that the cleanup pass only deletes some [Lgoto] instructions:
-  all other instructions present in the output of naive linearization
-  are in the cleaned-up code, and all instructions in the cleaned-up code
-  are in the output of naive linearization. *)
-
-Lemma cleanup_code_conservation:
-  forall i,
-  match i with Lgoto _ => False | _ => True end ->
-  forall c,
-  In i c -> In i (cleanup_code c).
-Proof.
-  induction c; simpl.
-  auto.
-  intro.
-  assert (In i (a :: cleanup_code c)).
-    elim H0; intro. subst i. auto with coqlib.
-    apply in_cons. auto.
-  destruct a; auto. 
-  assert (In i (cleanup_code c)).
-    elim H1; intro. subst i. contradiction. auto.
-  case (starts_with l c). auto. apply in_cons; auto. 
-Qed.
-
-Lemma cleanup_function_conservation:
-  forall f i,
-  In i (linearize_function f).(fn_code) ->
-  match i with Lgoto _ => False | _ => True end ->
-  In i (transf_function f).(fn_code).
-Proof.
-  intros. unfold transf_function. unfold cleanup_function.
-  simpl. simpl in H0. eapply cleanup_code_conservation; eauto.
-Qed.
-
-Lemma cleanup_code_conservation_2:
-  forall i c, In i (cleanup_code c) -> In i c.
-Proof.
-  induction c; simpl.
-  auto.
-  assert (In i (a :: cleanup_code c) -> a = i \/ In i c).
-    intro. elim H; tauto.
-  destruct a; auto.
-  case (starts_with l c). auto. auto. 
-Qed.
+(** * Type preservation for the linearization pass *)
 
-Lemma cleanup_function_conservation_2:
-  forall f i,
-  In i (transf_function f).(fn_code) ->
-  In i (linearize_function f).(fn_code).
+Lemma wt_add_instr:
+  forall f i k, wt_instr f i -> wt_code f k -> wt_code f (i :: k).
 Proof.
-  simpl. intros. eapply cleanup_code_conservation_2; eauto.
+  intros; red; intros. elim H1; intro. subst i0; auto. auto.
 Qed.
 
-(** * Type preservation for the linearization pass *)
-
-Lemma linearize_block_incl:
-  forall k b, incl k (linearize_block b k).
+Lemma wt_add_branch:
+  forall f s k, wt_code f k -> wt_code f (add_branch s k).
 Proof.
-  induction b; simpl; auto with coqlib.
-  case (starts_with n k); auto with coqlib.
+  intros; unfold add_branch. destruct (starts_with s k).
+  auto. apply wt_add_instr; auto. constructor.
 Qed.
 
-Lemma wt_linearize_block:
-  forall f k,
-  (forall i, In i k -> wt_instr (transf_function f) i) ->
-  forall b i,
-  wt_block f b ->
-  incl (linearize_block b k) (linearize_function f).(fn_code) ->
-  In i (linearize_block b k) -> wt_instr (transf_function f) i.
+Lemma wt_linearize_instr:
+  forall f instr,
+  LTLtyping.wt_instr f instr ->
+  forall k,
+  wt_code f.(LTL.fn_sig) k ->
+  wt_code f.(LTL.fn_sig) (linearize_instr instr k).
 Proof.
-  induction b; simpl; intros;
-  try (generalize (cleanup_function_conservation 
-                     _ _ (H1 _ (in_eq _ _)) I));
-  inversion H0;
-  try (elim H2; intro; [subst i|eauto with coqlib]);
-  intros.
-  (* getstack *)
-  constructor. auto. eapply slot_is_bounded; eauto.
-  simpl; auto with coqlib. 
-  eapply mreg_is_bounded; eauto.
-  simpl; auto with coqlib. 
-  (* setstack *)
-  constructor. auto. auto. 
-  eapply slot_is_bounded; eauto. 
-  simpl; auto with coqlib. 
-  (* move *)
-  subst o; subst l. constructor. auto. 
-  eapply mreg_is_bounded; eauto.
-  simpl; auto with coqlib. 
-  (* undef *)
-  subst o; subst l. constructor. 
-  eapply mreg_is_bounded; eauto.
-  simpl; auto with coqlib. 
-  (* other ops *)
-  constructor; auto. 
-  eapply mreg_is_bounded; eauto.
-  simpl; auto with coqlib. 
-  (* load *)
-  constructor; auto.
-  eapply mreg_is_bounded; eauto.
-  simpl; auto with coqlib. 
-  (* store *)
-  constructor; auto.
-  (* call *)
-  constructor; auto. 
-  eapply size_arguments_bound; eauto.
-  (* alloc *)
-  constructor.
-  (* goto *)
-  constructor. 
-  (* cond *)
-  destruct (starts_with n k).
-  elim H2; intro.
-  subst i. constructor. 
-  rewrite H4. destruct c; reflexivity.
-  elim H8; intro.
-  subst i. constructor.
-  eauto with coqlib.
-  elim H2; intro.
-  subst i. constructor. auto.
-  elim H8; intro.
-  subst i. constructor.
-  eauto with coqlib.
-  (* return *)
-  constructor.
+  induction 1; simpl; intros.
+  apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. auto.
+  apply wt_add_instr. constructor; auto. apply wt_add_branch; auto.
+  destruct (starts_with s1 k); apply wt_add_instr.
+  constructor; auto. rewrite H. destruct cond; auto. 
+  apply wt_add_branch; auto.
+  constructor; auto. apply wt_add_branch; auto.
+  apply wt_add_instr. constructor; auto. auto.
 Qed.
 
 Lemma wt_linearize_body:
   forall f,
-  LTLtyping.wt_function f ->
-  forall enum i,
-  incl (linearize_body f enum) (linearize_function f).(fn_code) ->
-  In i (linearize_body f enum) -> wt_instr (transf_function f) i.
-Proof.
-  induction enum.
-  simpl; intros; contradiction.
-  intro i. simpl.
-  caseEq (LTL.fn_code f)!a. intros b EQ INCL IN.
-  elim IN; intro. subst i; constructor. 
-  apply wt_linearize_block with (linearize_body f enum) b.
-  intros; apply IHenum. 
-  apply incl_tran with (linearize_block b (linearize_body f enum)).
-  apply linearize_block_incl.
-  eauto with coqlib.
-  auto.
-  eapply H; eauto.
-  eauto with coqlib. auto.
+  (forall pc instr, 
+     f.(LTL.fn_code)!pc = Some instr -> LTLtyping.wt_instr f instr) ->
+  forall enum,
+  wt_code f.(LTL.fn_sig) (linearize_body f enum).
+Proof.
+  induction enum; simpl.
+  red; simpl; intros; contradiction.
+  caseEq ((LTL.fn_code f)!a); intros.
+  apply wt_add_instr. constructor. apply wt_linearize_instr; eauto with coqlib.
   auto.
-Qed. 
+Qed.
 
 Lemma wt_transf_function:
   forall f,
   LTLtyping.wt_function f ->
   wt_function (transf_function f). 
 Proof.
-  intros; red; intros.
-  apply wt_linearize_body with (enumerate f); auto.
-  simpl. apply incl_refl.
-  apply cleanup_function_conservation_2; auto.
+  intros. inv H. constructor; auto.
+  simpl. apply wt_add_branch. 
+  apply wt_linearize_body. auto. 
 Qed.
 
 Lemma wt_transf_fundef:
@@ -342,7 +87,7 @@ Qed.
 Lemma program_typing_preserved:
   forall (p: LTL.program),
   LTLtyping.wt_program p ->
-  Lineartyping.wt_program (transf_program p).
+  LTLintyping.wt_program (transf_program p).
 Proof.
   intros; red; intros.
   generalize (transform_program_function transf_fundef p i f H0).
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index cbe18311..baf522aa 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -1,4 +1,4 @@
-(** Typing rules and computation of stack bounds for Linear. *)
+(** Typing rules for Linear. *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -9,218 +9,55 @@ Require Import Locations.
 Require Import Linear.
 Require Import Conventions.
 
-(** * Resource bounds for a function *)
-
-(** The [bounds] record capture how many local and outgoing stack slots
-  and callee-save registers are used by a function. *)
-
-Record bounds : Set := mkbounds {
-  bound_int_local: Z;
-  bound_float_local: Z;
-  bound_int_callee_save: Z;
-  bound_float_callee_save: Z;
-  bound_outgoing: Z
-}.
-
-(** The resource bounds for a function are computed by a linear scan
-  of its instructions. *)
-
-Section BOUNDS.
-
-Variable f: function.
-
-Definition regs_of_instr (i: instruction) : list mreg :=
-  match i with
-  | Lgetstack s r => r :: nil
-  | Lsetstack r s => r :: nil
-  | Lop op args res => res :: args
-  | Lload chunk addr args dst => dst :: args
-  | Lstore chunk addr args src => src :: args
-  | Lcall sig (inl fn) => fn :: nil
-  | Lcall sig (inr _) => nil
-  | Lalloc => nil
-  | Llabel lbl => nil
-  | Lgoto lbl => nil
-  | Lcond cond args lbl => args
-  | Lreturn => nil
-  end.
-
-Definition slots_of_instr (i: instruction) : list slot :=
-  match i with
-  | Lgetstack s r => s :: nil
-  | Lsetstack r s => s :: nil
-  | _ => nil
-  end.
-
-Definition max_over_list (A: Set) (valu: A -> Z) (l: list A) : Z :=
-  List.fold_left (fun m l => Zmax m (valu l)) l 0.
-
-Definition max_over_instrs (valu: instruction -> Z) : Z :=
-  max_over_list instruction valu f.(fn_code).
-
-Definition max_over_regs_of_instr (valu: mreg -> Z) (i: instruction) : Z :=
-  max_over_list mreg valu (regs_of_instr i).
-
-Definition max_over_slots_of_instr (valu: slot -> Z) (i: instruction) : Z :=
-  max_over_list slot valu (slots_of_instr i).
-
-Definition max_over_regs_of_funct (valu: mreg -> Z) : Z :=
-  max_over_instrs (max_over_regs_of_instr valu).
-
-Definition max_over_slots_of_funct (valu: slot -> Z) : Z :=
-  max_over_instrs (max_over_slots_of_instr valu).
-
-Definition int_callee_save (r: mreg) := 1 + index_int_callee_save r.
-
-Definition float_callee_save (r: mreg) := 1 + index_float_callee_save r.
-
-Definition int_local (s: slot) :=
-  match s with Local ofs Tint => 1 + ofs | _ => 0 end.
-
-Definition float_local (s: slot) :=
-  match s with Local ofs Tfloat => 1 + ofs | _ => 0 end.
-
-Definition outgoing_slot (s: slot) :=
-  match s with Outgoing ofs ty => ofs + typesize ty | _ => 0 end.
-
-Definition outgoing_space (i: instruction) :=
-  match i with Lcall sig _ => size_arguments sig | _ => 0 end.
-
-Definition function_bounds :=
-  mkbounds
-    (max_over_slots_of_funct int_local)
-    (max_over_slots_of_funct float_local)
-    (max_over_regs_of_funct int_callee_save)
-    (max_over_regs_of_funct float_callee_save)
-    (Zmax 6
-          (Zmax (max_over_instrs outgoing_space)
-                (max_over_slots_of_funct outgoing_slot))).
-
-(** We show that bounds computed by [function_bounds] are all positive
-  or null, and moreover [bound_outgoing] is greater or equal to 6.
-  These properties are used later to reason about the layout of
-  the activation record. *)
-
-Lemma max_over_list_pos:
-  forall (A: Set) (valu: A -> Z) (l: list A),
-  max_over_list A valu l >= 0.
-Proof.
-  intros until valu. unfold max_over_list.
-  assert (forall l z, fold_left (fun x y => Zmax x (valu y)) l z >= z).
-  induction l; simpl; intros.
-  omega. apply Zge_trans with (Zmax z (valu a)). 
-  auto. apply Zle_ge. apply Zmax1. auto.
-Qed.
-
-Lemma max_over_slots_of_funct_pos:
-  forall (valu: slot -> Z), max_over_slots_of_funct valu >= 0.
-Proof.
-  intros. unfold max_over_slots_of_funct.
-  unfold max_over_instrs. apply max_over_list_pos.
-Qed.
-
-Lemma max_over_regs_of_funct_pos:
-  forall (valu: mreg -> Z), max_over_regs_of_funct valu >= 0.
-Proof.
-  intros. unfold max_over_regs_of_funct.
-  unfold max_over_instrs. apply max_over_list_pos.
-Qed.
-
-Lemma bound_int_local_pos:
-  bound_int_local function_bounds >= 0.
-Proof.
-  simpl. apply max_over_slots_of_funct_pos.
-Qed.
-
-Lemma bound_float_local_pos:
-  bound_float_local function_bounds >= 0.
-Proof.
-  simpl. apply max_over_slots_of_funct_pos.
-Qed.
-
-Lemma bound_int_callee_save_pos:
-  bound_int_callee_save function_bounds >= 0.
-Proof.
-  simpl. apply max_over_regs_of_funct_pos.
-Qed.
-
-Lemma bound_float_callee_save_pos:
-  bound_float_callee_save function_bounds >= 0.
-Proof.
-  simpl. apply max_over_regs_of_funct_pos.
-Qed.
-
-Lemma bound_outgoing_pos:
-  bound_outgoing function_bounds >= 6.
-Proof.
-  simpl. apply Zle_ge. apply Zmax_bound_l. omega.
-Qed.
-
-End BOUNDS.
-
-(** * Typing rules for Linear *)
-
-(** The typing rules for Linear are similar to those for LTL: we check
+(** The typing rules for Linear are similar to those for LTLin: we check
   that instructions receive the right number of arguments,
   and that the types of the argument and result registers agree
-  with what the instruction expects.  Moreover, we state that references
-  to callee-save registers and to stack slots are within the bounds
-  computed by [function_bounds].  This is true by construction of
-  [function_bounds], and is proved in [Linearizetyping], but it
-  is convenient to integrate this property within the typing judgement.
-*)
+  with what the instruction expects.  Moreover, we  also
+  enforces some correctness conditions on the offsets of stack slots
+  accessed through [Lgetstack] and [Lsetstack] Linear instructions. *)
 
 Section WT_INSTR.
 
 Variable funct: function.
-Let b := function_bounds funct.
 
-Definition mreg_bounded (r: mreg) :=
-  match mreg_type r with
-  | Tint => index_int_callee_save r < bound_int_callee_save b
-  | Tfloat => index_float_callee_save r < bound_float_callee_save b
+Definition slot_valid (s: slot) :=
+  match s with
+  | Local ofs ty => 0 <= ofs
+  | Outgoing ofs ty => 14 <= ofs
+  | Incoming ofs ty => 14 <= ofs /\ ofs + typesize ty <= size_arguments funct.(fn_sig)
   end.
 
-Definition slot_bounded (s: slot) :=
+Definition slot_writable (s: slot) :=
   match s with
-  | Local ofs Tint => 0 <= ofs < bound_int_local b
-  | Local ofs Tfloat => 0 <= ofs < bound_float_local b
-  | Outgoing ofs ty => 6 <= ofs /\ ofs + typesize ty <= bound_outgoing b
-  | Incoming ofs ty => 6 <= ofs /\ ofs + typesize ty <= size_arguments funct.(fn_sig)
+  | Local ofs ty => True
+  | Outgoing ofs ty => True
+  | Incoming ofs ty => False
   end.
 
 Inductive wt_instr : instruction -> Prop :=
   | wt_Lgetstack:
       forall s r,
       slot_type s = mreg_type r ->
-      slot_bounded s -> mreg_bounded r ->
+      slot_valid s ->
       wt_instr (Lgetstack s r)
   | wt_Lsetstack:
       forall s r,
-      match s with Incoming _ _ => False | _ => True end ->
       slot_type s = mreg_type r ->
-      slot_bounded s ->
+      slot_valid s -> slot_writable s ->
       wt_instr (Lsetstack r s)
   | wt_Lopmove:
       forall r1 r,
       mreg_type r1 = mreg_type r ->
-      mreg_bounded r ->
       wt_instr (Lop Omove (r1 :: nil) r)
-  | wt_Lopundef:
-      forall r,
-      mreg_bounded r ->
-      wt_instr (Lop Oundef nil r)
   | wt_Lop:
       forall op args res,
-      op <> Omove -> op <> Oundef ->
+      op <> Omove ->
       (List.map mreg_type args, mreg_type res) = type_of_operation op ->
-      mreg_bounded res ->
       wt_instr (Lop op args res)
   | wt_Lload:
       forall chunk addr args dst,
       List.map mreg_type args = type_of_addressing addr ->
       mreg_type dst = type_of_chunk chunk ->
-      mreg_bounded dst ->
       wt_instr (Lload chunk addr args dst)
   | wt_Lstore:
       forall chunk addr args src,
@@ -229,9 +66,13 @@ Inductive wt_instr : instruction -> Prop :=
       wt_instr (Lstore chunk addr args src)
   | wt_Lcall:
       forall sig ros,
-      size_arguments sig <= bound_outgoing b ->
       match ros with inl r => mreg_type r = Tint | _ => True end ->
       wt_instr (Lcall sig ros)
+  | wt_Ltailcall:
+      forall sig ros,
+      tailcall_possible sig ->
+      match ros with inl r => r = IT3 | _ => True end ->
+      wt_instr (Ltailcall sig ros)
   | wt_Lalloc:
       wt_instr Lalloc
   | wt_Llabel:
@@ -249,8 +90,11 @@ Inductive wt_instr : instruction -> Prop :=
 
 End WT_INSTR.
 
+Definition wt_code (f: function) (c: code) : Prop :=
+  forall instr, In instr c -> wt_instr f instr.
+
 Definition wt_function (f: function) : Prop :=
-  forall instr, In instr f.(fn_code) -> wt_instr f instr.
+  wt_code f f.(fn_code).
 
 Inductive wt_fundef: fundef -> Prop :=
   | wt_fundef_external: forall ef,
diff --git a/backend/Locations.v b/backend/Locations.v
index c97855ea..aaefc08c 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -434,7 +434,7 @@ End Loc.
 
 (** The [Locmap] module defines mappings from locations to values,
   used as evaluation environments for the semantics of the [LTL] 
-  and [Linear] intermediate languages.  *)
+  and [LTLin] intermediate languages.  *)
 
 Set Implicit Arguments.
 
diff --git a/backend/Mach.v b/backend/Mach.v
index f61620d1..05805ec5 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -1,7 +1,8 @@
-(** The Mach intermediate language: abstract syntax and semantics.
+(** The Mach intermediate language: abstract syntax.
 
   Mach is the last intermediate language before generation of assembly
-  code.
+  code.  This file defines the abstract syntax for Mach; two dynamic
+  semantics are given in modules [Machabstr] and [Machconcr].
 *)
 
 Require Import Coqlib.
@@ -14,7 +15,6 @@ Require Import Events.
 Require Import Globalenvs.
 Require Import Op.
 Require Import Locations.
-Require Conventions.
 
 (** * Abstract syntax *)
 
@@ -30,8 +30,8 @@ Require Conventions.
   the caller.
 
   These instructions implement a more concrete view of the activation
-  record than the the [Bgetstack] and [Bsetstack] instructions of
-  Linear: actual offsets are used instead of abstract stack slots; the
+  record than the the [Lgetstack] and [Lsetstack] instructions of
+  Linear: actual offsets are used instead of abstract stack slots, and the
   distinction between the caller's frame and the callee's frame is
   made explicit. *)
 
@@ -45,6 +45,7 @@ Inductive instruction: Set :=
   | Mload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mcall: signature -> mreg + ident -> instruction
+  | Mtailcall: signature -> mreg + ident -> instruction
   | Malloc: instruction
   | Mlabel: label -> instruction
   | Mgoto: label -> instruction
@@ -105,238 +106,25 @@ Fixpoint find_label (lbl: label) (c: code) {struct c} : option code :=
   | i1 :: il => if is_label lbl i1 then Some il else find_label lbl il
   end.
 
-(** The three stack-related Mach instructions are interpreted as
-  memory accesses relative to the stack pointer.  More precisely:
-- [Mgetstack ofs ty r] is a memory load at offset [ofs * 4] relative
-  to the stack pointer.
-- [Msetstack r ofs ty] is a memory store at offset [ofs * 4] relative
-  to the stack pointer.
-- [Mgetparam ofs ty r] is a memory load at offset [ofs * 4]
-  relative to the pointer found at offset 0 from the stack pointer.
-  The semantics maintain a linked structure of activation records,
-  with the current record containing a pointer to the record of the
-  caller function at offset 0. *)
-
-Definition chunk_of_type (ty: typ) :=
-  match ty with Tint => Mint32 | Tfloat => Mfloat64 end.
-
-Definition load_stack (m: mem) (sp: val) (ty: typ) (ofs: int) :=
-  Mem.loadv (chunk_of_type ty) m (Val.add sp (Vint ofs)).
-
-Definition store_stack (m: mem) (sp: val) (ty: typ) (ofs: int) (v: val) :=
-  Mem.storev (chunk_of_type ty) m (Val.add sp (Vint ofs)) v.
-
-Definition align_16_top (lo hi: Z) :=
-  Zmax 0 (((hi - lo + 15) / 16) * 16 + lo).
-
-Section RELSEM.
-
-Variable ge: genv.
+Lemma find_label_incl:
+  forall lbl c c', find_label lbl c = Some c' -> incl c' c.
+Proof.
+  induction c; simpl; intros. discriminate.
+  destruct (is_label lbl a). inv H. auto with coqlib. eauto with coqlib. 
+Qed.
 
-Definition find_function (ros: mreg + ident) (rs: regset) : option fundef :=
+Definition find_function_ptr
+        (ge: genv) (ros: mreg + ident) (rs: regset) : option block :=
   match ros with
-  | inl r => Genv.find_funct ge (rs r)
-  | inr symb =>
-      match Genv.find_symbol ge symb with
-      | None => None
-      | Some b => Genv.find_funct_ptr ge b
+  | inl r =>
+      match rs r with
+      | Vptr b ofs => if Int.eq ofs Int.zero then Some b else None
+      | _ => None
       end
+  | inr symb =>
+      Genv.find_symbol ge symb
   end.
 
-(** Extract the values of the arguments of an external call. *)
-
-Inductive extcall_arg: regset -> mem -> val -> loc -> val -> Prop :=
-  | extcall_arg_reg: forall rs m sp r,
-      extcall_arg rs m sp (R r) (rs r)
-  | extcall_arg_stack: forall rs m sp ofs ty v,
-      load_stack m sp ty (Int.repr (4 * ofs)) = Some v ->
-      extcall_arg rs m sp (S (Outgoing ofs ty)) v.
-
-Inductive extcall_args: regset -> mem -> val -> list loc -> list val -> Prop :=
-  | extcall_args_nil: forall rs m sp,
-      extcall_args rs m sp nil nil
-  | extcall_args_cons: forall rs m sp l1 ll v1 vl,
-      extcall_arg rs m sp l1 v1 -> extcall_args rs m sp ll vl ->
-      extcall_args rs m sp (l1 :: ll) (v1 :: vl).
-
-Definition extcall_arguments
-   (rs: regset) (m: mem) (sp: val) (sg: signature) (args: list val) : Prop :=
-  extcall_args rs m sp (Conventions.loc_arguments sg) args.
-
-(** [exec_instr ge f sp c rs m c' rs' m'] reflects the execution of
-  the first instruction in the current instruction sequence [c].  
-  [c'] is the current instruction sequence after this execution.
-  [rs] and [rs'] map machine registers to values, respectively
-  before and after instruction execution.  [m] and [m'] are the
-  memory states before and after. *)
-
-Inductive exec_instr:
-      function -> val ->
-      code -> regset -> mem -> trace ->
-      code -> regset -> mem -> Prop :=
-  | exec_Mlabel:
-      forall f sp lbl c rs m,
-      exec_instr f sp
-                 (Mlabel lbl :: c) rs m
-              E0 c rs m
-  | exec_Mgetstack:
-      forall f sp ofs ty dst c rs m v,
-      load_stack m sp ty ofs = Some v ->
-      exec_instr f sp
-                 (Mgetstack ofs ty dst :: c) rs m
-              E0 c (rs#dst <- v) m
-  | exec_Msetstack:
-      forall f sp src ofs ty c rs m m',
-      store_stack m sp ty ofs (rs src) = Some m' ->
-      exec_instr f sp
-                 (Msetstack src ofs ty :: c) rs m
-              E0 c rs m'
-  | exec_Mgetparam:
-      forall f sp parent ofs ty dst c rs m v,
-      load_stack m sp Tint (Int.repr 0) = Some parent ->
-      load_stack m parent ty ofs = Some v ->
-      exec_instr f sp
-                 (Mgetparam ofs ty dst :: c) rs m
-              E0 c (rs#dst <- v) m
-  | exec_Mop:
-      forall f sp op args res c rs m v,
-      eval_operation ge sp op rs##args = Some v ->
-      exec_instr f sp
-                 (Mop op args res :: c) rs m
-              E0 c (rs#res <- v) m
-  | exec_Mload:
-      forall f sp chunk addr args dst c rs m a v,
-      eval_addressing ge sp addr rs##args = Some a ->
-      Mem.loadv chunk m a = Some v ->
-      exec_instr f sp 
-                 (Mload chunk addr args dst :: c) rs m
-              E0 c (rs#dst <- v) m
-  | exec_Mstore:
-      forall f sp chunk addr args src c rs m m' a,
-      eval_addressing ge sp addr rs##args = Some a ->
-      Mem.storev chunk m a (rs src) = Some m' ->
-      exec_instr f sp
-                 (Mstore chunk addr args src :: c) rs m
-              E0 c rs m'
-  | exec_Mcall:
-      forall f sp sig ros c rs m f' t rs' m',
-      find_function ros rs = Some f' ->
-      exec_function f' sp rs m t rs' m' ->
-      exec_instr f sp
-                 (Mcall sig ros :: c) rs m
-               t c rs' m'
-  | exec_Malloc:
-      forall f sp c rs m sz m' blk,
-      rs (Conventions.loc_alloc_argument) = Vint sz ->
-      Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
-      exec_instr f sp 
-                 (Malloc :: c) rs m
-              E0 c (rs#Conventions.loc_alloc_result <- 
-                                              (Vptr blk Int.zero)) m'
-  | exec_Mgoto:
-      forall f sp lbl c rs m c',
-      find_label lbl f.(fn_code) = Some c' ->
-      exec_instr f sp
-                 (Mgoto lbl :: c) rs m
-              E0 c' rs m
-  | exec_Mcond_true:
-      forall f sp cond args lbl c rs m c',
-      eval_condition cond rs##args = Some true ->
-      find_label lbl f.(fn_code) = Some c' ->
-      exec_instr f sp
-                 (Mcond cond args lbl :: c) rs m
-              E0 c' rs m
-  | exec_Mcond_false:
-      forall f sp cond args lbl c rs m,
-      eval_condition cond rs##args = Some false ->
-      exec_instr f sp
-                 (Mcond cond args lbl :: c) rs m
-              E0 c rs m
-
-with exec_instrs:
-      function -> val ->
-      code -> regset -> mem -> trace ->
-      code -> regset -> mem -> Prop :=
-  | exec_refl:
-      forall f sp c rs m,
-      exec_instrs f sp  c rs m E0 c rs m
-  | exec_one:
-      forall f sp c rs m t c' rs' m',
-      exec_instr f sp c rs m t c' rs' m' ->
-      exec_instrs f sp c rs m t c' rs' m'
-  | exec_trans:
-      forall f sp c1 rs1 m1 t1 c2 rs2 m2 t2 c3 rs3 m3 t3,
-      exec_instrs f sp  c1 rs1 m1 t1 c2 rs2 m2 ->
-      exec_instrs f sp  c2 rs2 m2 t2 c3 rs3 m3 ->
-      t3 = t1 ** t2 ->
-      exec_instrs f sp  c1 rs1 m1 t3 c3 rs3 m3
-
-(** In addition to reserving the word at offset 0 in the activation 
-  record for maintaining the linking of activation records,
-  we need to reserve the word at offset 12 to store the return address
-  into the caller.  However, the return address (a pointer within
-  the code of the caller) is not precisely known at this point:
-  it will be determined only after the final translation to PowerPC
-  assembly code.  Therefore, we simply reserve that word in the strongest
-  sense of the word ``reserve'': we make sure that whatever pointer
-  is stored there at function entry keeps the same value until the
-  final return instruction, and that the return value and final memory
-  state are the same regardless of the return address.  
-  This is captured in the evaluation rule [exec_function]
-  that quantifies universally over all possible values of the return
-  address, and pass this value to [exec_function_body].  In other
-  terms, the inference rule [exec_function] has an infinity of
-  premises, one for each possible return address.  Such infinitely
-  branching inference rules are uncommon in operational semantics,
-  but cause no difficulties in Coq. *)
-
-with exec_function_body:
-      function -> val -> val ->
-      regset -> mem -> trace -> regset -> mem -> Prop :=
-  | exec_funct_body:
-      forall f parent ra rs m t rs' m1 m2 m3 m4 stk c,
-      Mem.alloc m (- f.(fn_framesize))
-                  (align_16_top (- f.(fn_framesize)) f.(fn_stacksize))
-                                                        = (m1, stk) ->
-      let sp := Vptr stk (Int.repr (-f.(fn_framesize))) in
-      store_stack m1 sp Tint (Int.repr 0) parent = Some m2 ->
-      store_stack m2 sp Tint (Int.repr 12) ra = Some m3 ->
-      exec_instrs f sp
-                  f.(fn_code) rs m3
-                t (Mreturn :: c) rs' m4 ->
-      load_stack m4 sp Tint (Int.repr 0) = Some parent ->
-      load_stack m4 sp Tint (Int.repr 12) = Some ra ->
-      exec_function_body f parent ra rs m t rs' (Mem.free m4 stk)
-
-with exec_function:
-      fundef -> val -> regset -> mem -> trace -> regset -> mem -> Prop :=
-  | exec_funct_internal:
-      forall f parent rs m t rs' m',
-      (forall ra,
-         Val.has_type ra Tint ->
-         exec_function_body f parent ra rs m t rs' m') ->
-      exec_function (Internal f) parent rs m t rs' m'
-  | exec_funct_external:
-      forall ef parent args res rs1 rs2 m t,
-      event_match ef args t res ->
-      extcall_arguments rs1 m parent ef.(ef_sig) args ->
-      rs2 = (rs1#(Conventions.loc_result ef.(ef_sig)) <- res) ->
-      exec_function (External ef) parent rs1 m t rs2 m.
-
-Scheme exec_instr_ind4 := Minimality for exec_instr Sort Prop
-  with exec_instrs_ind4 := Minimality for exec_instrs Sort Prop
-  with exec_function_body_ind4 := Minimality for exec_function_body Sort Prop
-  with exec_function_ind4 := Minimality for exec_function Sort Prop.
-
-End RELSEM.
-
-Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
-  let ge := Genv.globalenv p in
-  let m0 := Genv.init_mem p in
-  exists b, exists f, exists rs, exists m,
-  Genv.find_symbol ge p.(prog_main) = Some b /\
-  Genv.find_funct_ptr ge b = Some f /\
-  exec_function ge f (Vptr Mem.nullptr Int.zero) (Regmap.init Vundef) m0
-                     t rs m /\
-  rs R3 = r.
+Definition align_16_top (lo hi: Z) :=
+  Zmax 0 (((hi - lo + 15) / 16) * 16 + lo).
 
diff --git a/backend/Machabstr.v b/backend/Machabstr.v
index d83ffa51..ad4e8e1a 100644
--- a/backend/Machabstr.v
+++ b/backend/Machabstr.v
@@ -1,4 +1,4 @@
-(** Alternate semantics for the Mach intermediate language. *)
+(** The Mach intermediate language: abstract semantics. *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -9,37 +9,53 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
 Require Conventions.
 Require Import Mach.
 
-(** This file defines an alternate semantics for the Mach intermediate
-  language, which differ from the standard semantics given in file [Mach]
-  as follows: the stack access instructions [Mgetstack], [Msetstack]
-  and [Mgetparam] are interpreted not as memory accesses, but as
-  accesses in a frame environment, not resident in memory.  The evaluation
-  relations take two such frame environments as parameters and results,
-  one for the current function and one for its caller. 
+(** This file defines the "abstract" semantics for the Mach
+  intermediate language, as opposed to the more concrete
+  semantics given in module [Machconcr].
+
+  The only difference with the concrete semantics is the interpretation
+  of the stack access instructions [Mgetstack], [Msetstack] and [Mgetparam].
+  In the concrete semantics, these are interpreted as memory accesses
+  relative to the stack pointer.  In the abstract semantics, these 
+  instructions are interpreted as accesses in a frame environment,
+  not resident in memory.  The frame environment is an additional
+  component of the state.
 
   Not having the frame data in memory facilitates the proof of
   the [Stacking] pass, which shows that the generated code executes
-  correctly with the alternate semantics.  In file [Machabstr2mach],
-  we show an implication from this alternate semantics to
-  the standard semantics, thus establishing that the [Stacking] pass
-  generates code that behaves correctly against the standard [Mach]
-  semantics as well. *)
+  correctly with the abstract semantics.
+*)
 
 (** * Structure of abstract stack frames *)
 
-(** A frame has the same structure as the contents of a memory block. *)
+(** An abstract stack frame comprises a low bound [fr_low] (the high bound
+  is implicitly 0) and a mapping from (type, offset) pairs to values. *)
+
+Record frame : Set := mkframe {
+  fr_low: Z;
+  fr_contents: typ -> Z -> val
+}.
 
-Definition frame := block_contents.
+Definition typ_eq: forall (ty1 ty2: typ), {ty1=ty2} + {ty1<>ty2}.
+Proof. decide equality. Defined.
 
-Definition empty_frame := empty_block 0 0.
+Definition update (ty: typ) (ofs: Z) (v: val) (f: frame) : frame :=
+  mkframe f.(fr_low)
+   (fun ty' ofs' => 
+     if zeq ofs ofs' then
+       if typ_eq ty ty' then v else Vundef
+     else
+       if zle (ofs' + AST.typesize ty') ofs then f.(fr_contents) ty' ofs'
+       else if zle (ofs + AST.typesize ty) ofs' then f.(fr_contents) ty' ofs'
+       else Vundef).
 
-Definition mem_type (ty: typ) :=
-  match ty with Tint => Size32 | Tfloat => Size64 end.
+Definition empty_frame := mkframe 0 (fun ty ofs => Vundef).
 
 (** [get_slot fr ty ofs v] holds if the frame [fr] contains value [v]
   with type [ty] at word offset [ofs]. *)
@@ -47,46 +63,24 @@ Definition mem_type (ty: typ) :=
 Inductive get_slot: frame -> typ -> Z -> val -> Prop :=
   | get_slot_intro:
       forall fr ty ofs v,
-      0 <= ofs ->
-      fr.(low) + ofs + 4 * typesize ty <= 0 ->
-      v = load_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) -> 
+      24 <= ofs ->
+      fr.(fr_low) + ofs + AST.typesize ty <= 0 ->
+      v = fr.(fr_contents) ty (fr.(fr_low) + ofs) -> 
       get_slot fr ty ofs v.
 
-Remark size_mem_type:
-  forall ty, size_mem (mem_type ty) = 4 * typesize ty.
-Proof.
-  destruct ty; reflexivity.
-Qed.
-
-Remark set_slot_undef_outside:
-  forall fr ty ofs v,
-  fr.(high) = 0 ->
-  0 <= ofs ->
-  fr.(low) + ofs + 4 * typesize ty <= 0 ->
-  (forall x, x < fr.(low) \/ x >= fr.(high) -> fr.(contents) x = Undef) ->
-  (forall x, x < fr.(low) \/ x >= fr.(high) ->
-     store_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) v x = Undef).
-Proof.
-  intros. apply store_contents_undef_outside with fr.(low) fr.(high).
-  rewrite <- size_mem_type in H1. omega. assumption. assumption.
-Qed.  
-
 (** [set_slot fr ty ofs v fr'] holds if frame [fr'] is obtained from
   frame [fr] by storing value [v] with type [ty] at word offset [ofs]. *)
 
 Inductive set_slot: frame -> typ -> Z -> val -> frame -> Prop :=
   | set_slot_intro:
-      forall fr ty ofs v
-      (A: fr.(high) = 0)
-      (B: 0 <= ofs)
-      (C: fr.(low) + ofs + 4 * typesize ty <= 0),
-      set_slot fr ty ofs v
-        (mkblock fr.(low) fr.(high)
-          (store_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) v)
-          (set_slot_undef_outside fr ty ofs v A B C fr.(undef_outside))).
+      forall fr ty ofs v fr',
+      24 <= ofs ->
+      fr.(fr_low) + ofs + AST.typesize ty <= 0 ->
+      fr' = update ty (fr.(fr_low) + ofs) v fr ->
+      set_slot fr ty ofs v fr'.
 
 Definition init_frame (f: function) :=
-  empty_block (- f.(fn_framesize)) 0.
+  mkframe (- f.(fn_framesize)) (fun ty ofs => Vundef).
 
 (** Extract the values of the arguments of an external call. *)
 
@@ -108,320 +102,172 @@ Definition extcall_arguments
    (rs: regset) (fr: frame) (sg: signature) (args: list val) : Prop :=
   extcall_args rs fr (Conventions.loc_arguments sg) args.
     
+(** The components of an execution state are:
+
+- [State cs f sp c rs fr m]: [f] is the function currently executing.
+  [sp] is the stack pointer.  [c] is the list of instructions that
+  remain to be executed.  [rs] assigns values to registers. 
+  [fr] is the current frame, as described above.  [m] is the memory state.
+- [Callstate cs f rs m]: [f] is the function definition being called.
+  [rs] is the current values of registers,
+  and [m] the current memory state.
+- [Returnstate cs rs m]: [rs] is the current values of registers,
+  and [m] the current memory state.
+
+[cs] is a list of stack frames [Stackframe f sp c fr],
+where [f] is the block reference for the calling function,
+[c] the code within this function that follows the call instruction,
+[sp] its stack pointer, and [fr] its private frame. *)
+
+Inductive stackframe: Set :=
+  | Stackframe:
+      forall (f: function) (sp: val) (c: code) (fr: frame),
+      stackframe.
+
+Inductive state: Set :=
+  | State:
+      forall (stack: list stackframe) (f: function) (sp: val)
+             (c: code) (rs: regset) (fr: frame) (m: mem),
+      state
+  | Callstate:
+      forall (stack: list stackframe) (f: fundef) (rs: regset) (m: mem),
+      state
+  | Returnstate:
+      forall (stack: list stackframe) (rs: regset) (m: mem),
+      state.
+
+(** [parent_frame s] returns the frame of the calling function.
+  It is used to access function parameters that are passed on the stack
+  (the [Mgetparent] instruction). *)
+
+Definition parent_frame (s: list stackframe) : frame :=
+  match s with
+  | nil => empty_frame
+  | Stackframe f sp c fr :: s => fr
+  end.
+
 Section RELSEM.
 
 Variable ge: genv.
 
-(** Execution of one instruction has the form
-  [exec_instr ge f sp parent c rs fr m c' rs' fr' m'],
-  where [parent] is the caller's frame (read-only)
-  and [fr], [fr'] are the current frame, before and after execution
-  of one instruction.  The other parameters are as in the Mach semantics. *)
+Definition find_function (ros: mreg + ident) (rs: regset) : option fundef :=
+  match ros with
+  | inl r => Genv.find_funct ge (rs r)
+  | inr symb =>
+      match Genv.find_symbol ge symb with
+      | None => None
+      | Some b => Genv.find_funct_ptr ge b
+      end
+  end.
 
-Inductive exec_instr:
-      function -> val -> frame ->
-      code -> regset -> frame -> mem -> trace ->
-      code -> regset -> frame -> mem -> Prop :=
+Inductive step: state -> trace -> state -> Prop :=
   | exec_Mlabel:
-      forall f sp parent lbl c rs fr m,
-      exec_instr f sp parent
-                 (Mlabel lbl :: c) rs fr m
-              E0 c rs fr m
+      forall s f sp lbl c rs fr m,
+      step (State s f sp (Mlabel lbl :: c) rs fr m)
+        E0 (State s f sp c rs fr m)
   | exec_Mgetstack:
-      forall f sp parent ofs ty dst c rs fr m v,
+      forall s f sp ofs ty dst c rs fr m v,
       get_slot fr ty (Int.signed ofs) v ->
-      exec_instr f sp parent
-                 (Mgetstack ofs ty dst :: c) rs fr m
-              E0 c (rs#dst <- v) fr m
+      step (State s f sp (Mgetstack ofs ty dst :: c) rs fr m)
+        E0 (State s f sp c (rs#dst <- v) fr m)
   | exec_Msetstack:
-     forall f sp parent src ofs ty c rs fr m fr',
+     forall s f sp src ofs ty c rs fr m fr',
       set_slot fr ty (Int.signed ofs) (rs src) fr' ->
-      exec_instr f sp parent
-                 (Msetstack src ofs ty :: c) rs fr m
-              E0 c rs fr' m
+      step (State s f sp (Msetstack src ofs ty :: c) rs fr m)
+        E0 (State s f sp c rs fr' m)
   | exec_Mgetparam:
-      forall f sp parent ofs ty dst c rs fr m v,
-      get_slot parent ty (Int.signed ofs) v ->
-      exec_instr f sp parent
-                 (Mgetparam ofs ty dst :: c) rs fr m
-              E0 c (rs#dst <- v) fr m
+      forall s f sp ofs ty dst c rs fr m v,
+      get_slot (parent_frame s) ty (Int.signed ofs) v ->
+      step (State s f sp (Mgetparam ofs ty dst :: c) rs fr m)
+        E0 (State s f sp c (rs#dst <- v) fr m)
   | exec_Mop:
-      forall f sp parent op args res c rs fr m v,
-      eval_operation ge sp op rs##args = Some v ->
-      exec_instr f sp parent
-                 (Mop op args res :: c) rs fr m
-              E0 c (rs#res <- v) fr m
+      forall s f sp op args res c rs fr m v,
+      eval_operation ge sp op rs##args m = Some v ->
+      step (State s f sp (Mop op args res :: c) rs fr m)
+        E0 (State s f sp c (rs#res <- v) fr m)
   | exec_Mload:
-      forall f sp parent chunk addr args dst c rs fr m a v,
+      forall s f sp chunk addr args dst c rs fr m a v,
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
-      exec_instr f sp parent
-                 (Mload chunk addr args dst :: c) rs fr m
-              E0 c (rs#dst <- v) fr m
+      step (State s f sp (Mload chunk addr args dst :: c) rs fr m)
+        E0 (State s f sp c (rs#dst <- v) fr m)
   | exec_Mstore:
-      forall f sp parent chunk addr args src c rs fr m m' a,
+      forall s f sp chunk addr args src c rs fr m m' a,
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.storev chunk m a (rs src) = Some m' ->
-      exec_instr f sp parent
-                 (Mstore chunk addr args src :: c) rs fr m
-              E0 c rs fr m'
+      step (State s f sp (Mstore chunk addr args src :: c) rs fr m)
+        E0 (State s f sp c rs fr m')
   | exec_Mcall:
-      forall f sp parent sig ros c rs fr m t f' rs' m',
-      find_function ge ros rs = Some f' ->
-      exec_function f' fr rs m t rs' m' ->
-      exec_instr f sp parent
-                 (Mcall sig ros :: c) rs fr m
-               t c rs' fr m'
+      forall s f sp sig ros c rs fr m f',
+      find_function ros rs = Some f' ->
+      step (State s f sp (Mcall sig ros :: c) rs fr m)
+        E0 (Callstate (Stackframe f sp c fr :: s) f' rs m)
+  | exec_Mtailcall:
+      forall s f stk soff sig ros c rs fr m f',
+      find_function ros rs = Some f' ->
+      step (State s f (Vptr stk soff) (Mtailcall sig ros :: c) rs fr m)
+        E0 (Callstate s f' rs (Mem.free m stk))
+
   | exec_Malloc:
-      forall f sp parent c rs fr m sz m' blk,
+      forall s f sp c rs fr m sz m' blk,
       rs (Conventions.loc_alloc_argument) = Vint sz ->
       Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
-      exec_instr f sp parent
-                 (Malloc :: c) rs fr m
-              E0 c (rs#Conventions.loc_alloc_result <- 
-                                              (Vptr blk Int.zero)) fr m'
+      step (State s f sp (Malloc :: c) rs fr m)
+        E0 (State s f sp c
+                   (rs#Conventions.loc_alloc_result <- (Vptr blk Int.zero))
+                    fr m')
   | exec_Mgoto:
-      forall f sp parent lbl c rs fr m c',
+      forall s f sp lbl c rs fr m c',
       find_label lbl f.(fn_code) = Some c' ->
-      exec_instr f sp parent
-                 (Mgoto lbl :: c) rs fr m
-              E0 c' rs fr m
+      step (State s f sp (Mgoto lbl :: c) rs fr m)
+        E0 (State s f sp c' rs fr m)
   | exec_Mcond_true:
-      forall f sp parent cond args lbl c rs fr m c',
-      eval_condition cond rs##args = Some true ->
+      forall s f sp cond args lbl c rs fr m c',
+      eval_condition cond rs##args m = Some true ->
       find_label lbl f.(fn_code) = Some c' ->
-      exec_instr f sp parent
-                 (Mcond cond args lbl :: c) rs fr m
-              E0 c' rs fr m
+      step (State s f sp (Mcond cond args lbl :: c) rs fr m)
+        E0 (State s f sp c' rs fr m)
   | exec_Mcond_false:
-      forall f sp parent cond args lbl c rs fr m,
-      eval_condition cond rs##args = Some false ->
-      exec_instr f sp parent
-                 (Mcond cond args lbl :: c) rs fr m
-              E0 c rs fr m
-
-with exec_instrs:
-      function -> val -> frame ->
-      code -> regset -> frame -> mem -> trace ->
-      code -> regset -> frame -> mem -> Prop :=
-  | exec_refl:
-      forall f sp parent c rs fr m,
-      exec_instrs f sp parent  c rs fr m E0 c rs fr m
-  | exec_one:
-      forall f sp parent c rs fr m t c' rs' fr' m',
-      exec_instr f sp parent  c rs fr m t c' rs' fr' m' ->
-      exec_instrs f sp parent  c rs fr m t c' rs' fr' m'
-  | exec_trans:
-      forall f sp parent c1 rs1 fr1 m1 t1 c2 rs2 fr2 m2 t2 c3 rs3 fr3 m3 t3,
-      exec_instrs f sp parent  c1 rs1 fr1 m1 t1 c2 rs2 fr2 m2 ->
-      exec_instrs f sp parent  c2 rs2 fr2 m2 t2 c3 rs3 fr3 m3 ->
-      t3 = t1 ** t2 ->
-      exec_instrs f sp parent  c1 rs1 fr1 m1 t3 c3 rs3 fr3 m3
-
-with exec_function_body:
-      function -> frame -> val -> val ->
-      regset -> mem -> trace -> regset -> mem -> Prop :=
-  | exec_funct_body:
-      forall f parent link ra rs m t rs' m1 m2 stk fr1 fr2 fr3 c,
-      Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) ->
-      set_slot (init_frame f) Tint 0 link fr1 ->
-      set_slot fr1 Tint 12 ra fr2 ->
-      exec_instrs f (Vptr stk (Int.repr (-f.(fn_framesize)))) parent
-                  f.(fn_code) rs fr2 m1
-                t (Mreturn :: c) rs' fr3 m2 ->
-      exec_function_body f parent link ra rs m t rs' (Mem.free m2 stk)
-
-with exec_function:
-      fundef -> frame -> regset -> mem -> trace -> regset -> mem -> Prop :=
-  | exec_funct_internal:
-      forall f parent rs m t rs' m',
-      (forall link ra, 
-         Val.has_type link Tint ->
-         Val.has_type ra Tint ->
-         exec_function_body f parent link ra rs m t rs' m') ->
-      exec_function (Internal f) parent rs m t rs' m'
-  | exec_funct_external:
-      forall ef parent args res rs1 rs2 m t,
+      forall s f sp cond args lbl c rs fr m,
+      eval_condition cond rs##args m = Some false ->
+      step (State s f sp (Mcond cond args lbl :: c) rs fr m)
+        E0 (State s f sp c rs fr m)
+  | exec_Mreturn:
+      forall s f stk soff c rs fr m,
+      step (State s f (Vptr stk soff) (Mreturn :: c) rs fr m)
+        E0 (Returnstate s rs (Mem.free m stk))
+  | exec_function_internal:
+      forall s f rs m m' stk,
+      Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
+      step (Callstate s (Internal f) rs m)
+        E0 (State s f (Vptr stk (Int.repr (-f.(fn_framesize))))
+                  f.(fn_code) rs (init_frame f) m')
+  | exec_function_external:
+      forall s ef args res rs1 rs2 m t,
       event_match ef args t res ->
-      extcall_arguments rs1 parent ef.(ef_sig) args ->
+      extcall_arguments rs1 (parent_frame s) ef.(ef_sig) args ->
       rs2 = (rs1#(Conventions.loc_result ef.(ef_sig)) <- res) ->
-      exec_function (External ef) parent rs1 m t rs2 m.
-
-Scheme exec_instr_ind4 := Minimality for exec_instr Sort Prop
-  with exec_instrs_ind4 := Minimality for exec_instrs Sort Prop
-  with exec_function_body_ind4 := Minimality for exec_function_body Sort Prop
-  with exec_function_ind4 := Minimality for exec_function Sort Prop.
-
-(** Ugly mutual induction principle over evaluation derivations.
-  Coq is not able to generate it directly, even though it is
-  an immediate consequence of the 4 induction principles generated
-  by the [Scheme] command above. *)
-
-Lemma exec_mutual_induction:
- forall
-         (P
-          P0 : function ->
-               val ->
-               frame ->
-               code ->
-               regset ->
-               frame ->
-               mem -> trace -> code -> regset -> frame -> mem -> Prop)
-         (P1 : function ->
-               frame ->
-               val -> val -> regset -> mem -> trace -> regset -> mem -> Prop)
-         (P2 : fundef ->
-               frame -> regset -> mem -> trace -> regset -> mem -> Prop),
-       (forall (f : function) (sp : val) (parent : frame) (lbl : label)
-          (c : list instruction) (rs : regset) (fr : frame) (m : mem),
-        P f sp parent (Mlabel lbl :: c) rs fr m E0 c rs fr m) ->
-       (forall (f0 : function) (sp : val) (parent : frame) (ofs : int)
-          (ty : typ) (dst : mreg) (c : list instruction) (rs : regset)
-          (fr : frame) (m : mem) (v : val),
-        get_slot fr ty (Int.signed ofs) v ->
-        P f0 sp parent (Mgetstack ofs ty dst :: c) rs fr m E0 c rs # dst <- v
-          fr m) ->
-       (forall (f1 : function) (sp : val) (parent : frame) (src : mreg)
-          (ofs : int) (ty : typ) (c : list instruction) (rs : mreg -> val)
-          (fr : frame) (m : mem) (fr' : frame),
-        set_slot fr ty (Int.signed ofs) (rs src) fr' ->
-        P f1 sp parent (Msetstack src ofs ty :: c) rs fr m E0 c rs fr' m) ->
-       (forall (f2 : function) (sp : val) (parent : frame) (ofs : int)
-          (ty : typ) (dst : mreg) (c : list instruction) (rs : regset)
-          (fr : frame) (m : mem) (v : val),
-        get_slot parent ty (Int.signed ofs) v ->
-        P f2 sp parent (Mgetparam ofs ty dst :: c) rs fr m E0 c rs # dst <- v
-          fr m) ->
-       (forall (f3 : function) (sp : val) (parent : frame) (op : operation)
-          (args : list mreg) (res : mreg) (c : list instruction)
-          (rs : mreg -> val) (fr : frame) (m : mem) (v : val),
-        eval_operation ge sp op rs ## args = Some v ->
-        P f3 sp parent (Mop op args res :: c) rs fr m E0 c rs # res <- v fr m) ->
-       (forall (f4 : function) (sp : val) (parent : frame)
-          (chunk : memory_chunk) (addr : addressing) (args : list mreg)
-          (dst : mreg) (c : list instruction) (rs : mreg -> val) (fr : frame)
-          (m : mem) (a v : val),
-        eval_addressing ge sp addr rs ## args = Some a ->
-        loadv chunk m a = Some v ->
-        P f4 sp parent (Mload chunk addr args dst :: c) rs fr m E0 c
-          rs # dst <- v fr m) ->
-       (forall (f5 : function) (sp : val) (parent : frame)
-          (chunk : memory_chunk) (addr : addressing) (args : list mreg)
-          (src : mreg) (c : list instruction) (rs : mreg -> val) (fr : frame)
-          (m m' : mem) (a : val),
-        eval_addressing ge sp addr rs ## args = Some a ->
-        storev chunk m a (rs src) = Some m' ->
-        P f5 sp parent (Mstore chunk addr args src :: c) rs fr m E0 c rs fr
-          m') ->
-       (forall (f6 : function) (sp : val) (parent : frame) (sig : signature)
-          (ros : mreg + ident) (c : list instruction) (rs : regset)
-          (fr : frame) (m : mem) (t : trace) (f' : fundef) (rs' : regset)
-          (m' : mem),
-        find_function ge ros rs = Some f' ->
-        exec_function f' fr rs m t rs' m' ->
-        P2 f' fr rs m t rs' m' ->
-        P f6 sp parent (Mcall sig ros :: c) rs fr m t c rs' fr m') ->
-       (forall (f7 : function) (sp : val) (parent : frame)
-          (c : list instruction) (rs : mreg -> val) (fr : frame) (m : mem)
-          (sz : int) (m' : mem) (blk : block),
-        rs Conventions.loc_alloc_argument = Vint sz ->
-        alloc m 0 (Int.signed sz) = (m', blk) ->
-        P f7 sp parent (Malloc :: c) rs fr m E0 c
-          rs # Conventions.loc_alloc_result <- (Vptr blk Int.zero) fr m') ->
-       (forall (f7 : function) (sp : val) (parent : frame) (lbl : label)
-          (c : list instruction) (rs : regset) (fr : frame) (m : mem)
-          (c' : code),
-        find_label lbl (fn_code f7) = Some c' ->
-        P f7 sp parent (Mgoto lbl :: c) rs fr m E0 c' rs fr m) ->
-       (forall (f8 : function) (sp : val) (parent : frame) (cond : condition)
-          (args : list mreg) (lbl : label) (c : list instruction)
-          (rs : mreg -> val) (fr : frame) (m : mem) (c' : code),
-        eval_condition cond rs ## args = Some true ->
-        find_label lbl (fn_code f8) = Some c' ->
-        P f8 sp parent (Mcond cond args lbl :: c) rs fr m E0 c' rs fr m) ->
-       (forall (f9 : function) (sp : val) (parent : frame) (cond : condition)
-          (args : list mreg) (lbl : label) (c : list instruction)
-          (rs : mreg -> val) (fr : frame) (m : mem),
-        eval_condition cond rs ## args = Some false ->
-        P f9 sp parent (Mcond cond args lbl :: c) rs fr m E0 c rs fr m) ->
-       (forall (f10 : function) (sp : val) (parent : frame) (c : code)
-          (rs : regset) (fr : frame) (m : mem),
-        P0 f10 sp parent c rs fr m E0 c rs fr m) ->
-       (forall (f11 : function) (sp : val) (parent : frame) (c : code)
-          (rs : regset) (fr : frame) (m : mem) (t : trace) (c' : code)
-          (rs' : regset) (fr' : frame) (m' : mem),
-        exec_instr f11 sp parent c rs fr m t c' rs' fr' m' ->
-        P f11 sp parent c rs fr m t c' rs' fr' m' ->
-        P0 f11 sp parent c rs fr m t c' rs' fr' m') ->
-       (forall (f12 : function) (sp : val) (parent : frame) (c1 : code)
-          (rs1 : regset) (fr1 : frame) (m1 : mem) (t1 : trace) (c2 : code)
-          (rs2 : regset) (fr2 : frame) (m2 : mem) (t2 : trace) (c3 : code)
-          (rs3 : regset) (fr3 : frame) (m3 : mem) (t3 : trace),
-        exec_instrs f12 sp parent c1 rs1 fr1 m1 t1 c2 rs2 fr2 m2 ->
-        P0 f12 sp parent c1 rs1 fr1 m1 t1 c2 rs2 fr2 m2 ->
-        exec_instrs f12 sp parent c2 rs2 fr2 m2 t2 c3 rs3 fr3 m3 ->
-        P0 f12 sp parent c2 rs2 fr2 m2 t2 c3 rs3 fr3 m3 ->
-        t3 = t1 ** t2 -> P0 f12 sp parent c1 rs1 fr1 m1 t3 c3 rs3 fr3 m3) ->
-       (forall (f13 : function) (parent : frame) (link ra : val)
-          (rs : regset) (m : mem) (t : trace) (rs' : regset) (m1 m2 : mem)
-          (stk : block) (fr1 fr2 fr3 : frame) (c : list instruction),
-        alloc m 0 (fn_stacksize f13) = (m1, stk) ->
-        set_slot (init_frame f13) Tint 0 link fr1 ->
-        set_slot fr1 Tint 12 ra fr2 ->
-        exec_instrs f13 (Vptr stk (Int.repr (- fn_framesize f13))) parent
-          (fn_code f13) rs fr2 m1 t (Mreturn :: c) rs' fr3 m2 ->
-        P0 f13 (Vptr stk (Int.repr (- fn_framesize f13))) parent
-          (fn_code f13) rs fr2 m1 t (Mreturn :: c) rs' fr3 m2 ->
-        P1 f13 parent link ra rs m t rs' (free m2 stk)) ->
-       (forall (f14 : function) (parent : frame) (rs : regset) (m : mem)
-          (t : trace) (rs' : regset) (m' : mem),
-        (forall link ra : val,
-         Val.has_type link Tint ->
-         Val.has_type ra Tint ->
-         exec_function_body f14 parent link ra rs m t rs' m') ->
-        (forall link ra : val,
-         Val.has_type link Tint ->
-         Val.has_type ra Tint -> P1 f14 parent link ra rs m t rs' m') ->
-        P2 (Internal f14) parent rs m t rs' m') ->
-       (forall (ef : external_function) (parent : frame) (args : list val)
-          (res : val) (rs1 : mreg -> val) (rs2 : RegEq.t -> val) (m : mem)
-          (t0 : trace),
-        event_match ef args t0 res ->
-        extcall_arguments rs1 parent ef.(ef_sig) args ->
-        rs2 = rs1 # (Conventions.loc_result (ef_sig ef)) <- res ->
-        P2 (External ef) parent rs1 m t0 rs2 m) ->
-      (forall (f16 : function) (v : val) (f17 : frame) (c : code)
-         (r : regset) (f18 : frame) (m : mem) (t : trace) (c0 : code)
-         (r0 : regset) (f19 : frame) (m0 : mem),
-       exec_instr f16 v f17 c r f18 m t c0 r0 f19 m0 ->
-       P f16 v f17 c r f18 m t c0 r0 f19 m0)
-   /\ (forall (f16 : function) (v : val) (f17 : frame) (c : code)
-         (r : regset) (f18 : frame) (m : mem) (t : trace) (c0 : code)
-         (r0 : regset) (f19 : frame) (m0 : mem),
-       exec_instrs f16 v f17 c r f18 m t c0 r0 f19 m0 ->
-       P0 f16 v f17 c r f18 m t c0 r0 f19 m0)
-   /\ (forall (f16 : function) (f17 : frame) (v v0 : val) (r : regset)
-         (m : mem) (t : trace) (r0 : regset) (m0 : mem),
-       exec_function_body f16 f17 v v0 r m t r0 m0 ->
-       P1 f16 f17 v v0 r m t r0 m0)
-   /\ (forall (f16 : fundef) (f17 : frame) (r : regset) (m : mem) (t : trace)
-         (r0 : regset) (m0 : mem),
-       exec_function f16 f17 r m t r0 m0 -> P2 f16 f17 r m t r0 m0).
-Proof.
-  intros. split. apply (exec_instr_ind4 P P0 P1 P2); assumption.
-  split. apply (exec_instrs_ind4 P P0 P1 P2); assumption.
-  split. apply (exec_function_body_ind4 P P0 P1 P2); assumption.
-  apply (exec_function_ind4 P P0 P1 P2); assumption.
-Qed.
+      step (Callstate s (External ef) rs1 m)
+         t (Returnstate s rs2 m)
+  | exec_return:
+      forall f sp c fr s rs m,
+      step (Returnstate (Stackframe f sp c fr :: s) rs m)
+        E0 (State s f sp c rs fr m).
 
 End RELSEM.
 
-Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
-  let ge := Genv.globalenv p in
-  let m0 := Genv.init_mem p in
-  exists b, exists f, exists rs, exists m,
-  Genv.find_symbol ge p.(prog_main) = Some b /\
-  Genv.find_funct_ptr ge b = Some f /\
-  exec_function ge f empty_frame (Regmap.init Vundef) m0 t rs m /\
-  rs R3 = r.
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall b f,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      initial_state p (Callstate nil f (Regmap.init Vundef) m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall rs m r,
+      rs R3 = Vint r ->
+      final_state (Returnstate nil rs m) r.
 
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
diff --git a/backend/Machabstr2concr.v b/backend/Machabstr2concr.v
new file mode 100644
index 00000000..5349fb50
--- /dev/null
+++ b/backend/Machabstr2concr.v
@@ -0,0 +1,947 @@
+(** Simulation between the two semantics for the Mach language. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Mem.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Op.
+Require Import Locations.
+Require Import Mach.
+Require Import Machtyping.
+Require Import Machabstr.
+Require Import Machconcr.
+Require Import Stackingproof.
+Require Import PPCgenretaddr.
+
+(** Two semantics were defined for the Mach intermediate language:
+- The concrete semantics (file [Mach]), where the whole activation
+  record resides in memory and the [Mgetstack], [Msetstack] and
+  [Mgetparent] are interpreted as [sp]-relative memory accesses.
+- The abstract semantics (file [Machabstr]), where the activation
+  record is split in two parts: the Cminor stack data, resident in
+  memory, and the frame information, residing not in memory but
+  in additional evaluation environments.
+
+  In this file, we show a simulation result between these
+  semantics: if a program executes with some behaviour [beh] in the
+  abstract semantics, it also executes with the same behaviour in
+  the concrete semantics.  This result bridges the correctness proof
+  in file [Stackingproof] (which uses the abstract Mach semantics
+  as output) and that in file [PPCgenproof] (which uses the concrete
+  Mach semantics as input).
+*)
+
+Remark align_16_top_ge:
+  forall lo hi,
+  hi <= align_16_top lo hi.
+Proof.
+  intros. unfold align_16_top. apply Zmax_bound_r. 
+  assert (forall x, x <= (x + 15) / 16 * 16).
+  intro. assert (16 > 0). omega.
+  generalize (Z_div_mod_eq (x + 15) 16 H). intro.
+  replace ((x + 15) / 16 * 16) with ((x + 15) - (x + 15) mod 16).
+  generalize (Z_mod_lt (x + 15) 16 H). omega. 
+  rewrite Zmult_comm. omega.
+  generalize (H (hi - lo)). omega. 
+Qed.
+
+Remark align_16_top_pos:
+  forall lo hi,
+  0 <= align_16_top lo hi.
+Proof.
+  intros. unfold align_16_top. apply Zmax_bound_l. omega.
+Qed.
+
+Remark size_type_chunk:
+  forall ty, size_chunk (chunk_of_type ty) = AST.typesize ty.
+Proof.
+  destruct ty; reflexivity.
+Qed.
+
+(** * Agreement between frames and memory-resident activation records *)
+
+(** ** Agreement for one frame *)
+
+(** The core idea of the simulation proof is that for all active
+  functions, the memory-allocated activation record, in the concrete 
+  semantics, contains the same data as the Cminor stack block
+  (at positive offsets) and the frame of the function (at negative
+  offsets) in the abstract semantics.
+
+  This intuition (activation record = Cminor stack data + frame)
+  is formalized by the following predicate [frame_match fr sp base mm ms].
+  [fr] is a frame and [mm] the current memory state in the abstract
+  semantics. [ms] is the current memory state in the concrete semantics.
+  The stack pointer is [Vptr sp base] in both semantics. *)
+
+Inductive frame_match (fr: frame) (sp: block) (base: int) 
+                      (mm ms: mem) : Prop :=
+  frame_match_intro:
+    valid_block ms sp ->
+    low_bound mm sp = 0 ->
+    low_bound ms sp = fr.(fr_low) ->
+    high_bound ms sp >= 0 ->
+    fr.(fr_low) <= -24 ->
+    Int.min_signed <= fr.(fr_low) ->
+    base = Int.repr fr.(fr_low) ->
+    (forall ty ofs,
+       fr.(fr_low) + 24 <= ofs -> ofs + AST.typesize ty <= 0 ->
+       load (chunk_of_type ty) ms sp ofs = Some(fr.(fr_contents) ty ofs)) ->
+    frame_match fr sp base mm ms.
+
+(** The following two innocuous-looking lemmas are the key results
+  showing that [sp]-relative memory accesses in the concrete
+  semantics behave like the direct frame accesses in the abstract
+  semantics.  First, a value [v] that has type [ty] is preserved
+  when stored in memory with chunk [chunk_of_type ty], then read
+  back with the same chunk.  The typing hypothesis is crucial here:
+  for instance, a float value reads back as [Vundef] when stored
+  and load with chunk [Mint32]. *)
+
+Lemma load_result_ty:
+  forall v ty,
+  Val.has_type v ty -> Val.load_result (chunk_of_type ty) v = v.
+Proof.
+  destruct v; destruct ty; simpl; contradiction || reflexivity.
+Qed.
+
+(** Second, computations of [sp]-relative offsets using machine
+  arithmetic (as done in the concrete semantics) never overflows
+  and behaves identically to the offset computations using exact
+  arithmetic (as done in the abstract semantics). *)
+
+Lemma int_add_no_overflow:
+  forall x y,
+  Int.min_signed <= Int.signed x + Int.signed y <= Int.max_signed ->
+  Int.signed (Int.add x y) = Int.signed x + Int.signed y.
+Proof.
+  intros. rewrite Int.add_signed. rewrite Int.signed_repr. auto. auto.
+Qed.
+
+(** Given matching frames and activation records, loading from the
+  activation record (in the concrete semantics) behaves identically
+  to reading the corresponding slot from the frame
+  (in the abstract semantics).  *)
+
+Lemma frame_match_get_slot:
+  forall fr sp base mm ms ty ofs v,
+  frame_match fr sp base mm ms ->
+  get_slot fr ty (Int.signed ofs) v ->
+  load_stack ms (Vptr sp base) ty ofs = Some v.
+Proof.
+  intros. inv H. inv H0. 
+  unfold load_stack, Val.add, loadv. 
+  assert (Int.signed (Int.repr (fr_low fr)) = fr_low fr).
+    apply Int.signed_repr.
+    split. auto. apply Zle_trans with (-24). auto. compute; congruence.
+  assert (Int.signed (Int.add (Int.repr (fr_low fr)) ofs) = fr_low fr + Int.signed ofs).
+    rewrite int_add_no_overflow. rewrite H0. auto.
+    rewrite H0. split. omega. apply Zle_trans with 0.
+    generalize (AST.typesize_pos ty). omega. compute; congruence.
+  rewrite H9. apply H8. omega. auto.
+Qed.
+
+(** Assigning a value to a frame slot (in the abstract semantics)
+  corresponds to storing this value in the activation record
+  (in the concrete semantics).  Moreover, agreement between frames
+  and activation records is preserved. *)
+
+Lemma frame_match_set_slot:
+  forall fr sp base mm ms ty ofs v fr',
+  frame_match fr sp base mm ms ->
+  set_slot fr ty (Int.signed ofs) v fr' ->
+  Val.has_type v ty ->
+  Mem.extends mm ms ->
+  exists ms',
+    store_stack ms (Vptr sp base) ty ofs v = Some ms' /\
+    frame_match fr' sp base mm ms' /\
+    Mem.extends mm ms' /\
+    Int.signed base + 24 <= Int.signed (Int.add base ofs).
+Proof.
+  intros. inv H. inv H0.
+  unfold store_stack, Val.add, storev.
+  assert (Int.signed (Int.repr (fr_low fr)) = fr_low fr).
+    apply Int.signed_repr.
+    split. auto. apply Zle_trans with (-24). auto. compute; congruence.
+  assert (Int.signed (Int.add (Int.repr (fr_low fr)) ofs) = fr_low fr + Int.signed ofs).
+    rewrite int_add_no_overflow. congruence. 
+    rewrite H0. split. omega. apply Zle_trans with 0.
+    generalize (AST.typesize_pos ty). omega. compute; congruence.
+  rewrite H11.
+  assert (exists ms', store (chunk_of_type ty) ms sp (fr_low fr + Int.signed ofs) v = Some ms').
+    apply valid_access_store. 
+    constructor. auto. omega.
+    rewrite size_type_chunk. omega.
+  destruct H12 as [ms' STORE]. 
+  generalize (low_bound_store _ _ _ _ _ _ STORE sp). intro LB.
+  generalize (high_bound_store _ _ _ _ _ _ STORE sp). intro HB.
+  exists ms'. 
+  split. exact STORE.
+  (* frame match *)
+  split. constructor; simpl fr_low; try congruence.
+    eauto with mem. intros. simpl.
+    destruct (zeq (fr_low fr + Int.signed ofs) ofs0). subst ofs0.
+    destruct (typ_eq ty ty0). subst ty0.
+    (* same *)
+    transitivity (Some (Val.load_result (chunk_of_type ty) v)).
+    eapply load_store_same; eauto.
+    decEq. apply load_result_ty; auto.
+    (* mismatch *)
+    eapply load_store_mismatch'; eauto with mem.
+    destruct ty; destruct ty0; simpl; congruence.
+    destruct (zle (ofs0 + AST.typesize ty0) (fr_low fr + Int.signed ofs)).
+    (* disjoint *)
+    rewrite <- H10; auto. eapply load_store_other; eauto.
+    right; left. rewrite size_type_chunk; auto.
+    destruct (zle (fr_low fr + Int.signed ofs + AST.typesize ty)).
+    rewrite <- H10; auto. eapply load_store_other; eauto.
+    right; right. rewrite size_type_chunk; auto.
+    (* overlap *)
+    eapply load_store_overlap'; eauto with mem.
+    rewrite size_type_chunk; auto. 
+    rewrite size_type_chunk; auto.
+  (* extends *)
+  split. eapply store_outside_extends; eauto.
+  left. rewrite size_type_chunk. omega.
+  (* bound *)
+  omega.
+Qed.
+
+(** Agreement is preserved by stores within blocks other than the
+  one pointed to by [sp], or to the low 24 bytes
+  of the [sp] block. *)
+
+Lemma frame_match_store_other:
+  forall fr sp base mm ms chunk b ofs v ms',
+  frame_match fr sp base mm ms ->
+  store chunk ms b ofs v = Some ms' ->
+  sp <> b \/ ofs + size_chunk chunk <= fr_low fr + 24 ->
+  frame_match fr sp base mm ms'.
+Proof.
+  intros. inv H. 
+  generalize (low_bound_store _ _ _ _ _ _ H0 sp). intro LB.
+  generalize (high_bound_store _ _ _ _ _ _ H0 sp). intro HB.
+  apply frame_match_intro; auto.
+  eauto with mem. 
+  congruence.
+  congruence.
+  intros. rewrite <- H9; auto. 
+  eapply load_store_other; eauto.
+  elim H1; intro. auto. right. rewrite size_type_chunk. omega. 
+Qed.
+
+Lemma frame_match_store:
+  forall fr sp base mm ms chunk b ofs v mm' ms',
+  frame_match fr sp base mm ms ->
+  store chunk mm b ofs v = Some mm' ->
+  store chunk ms b ofs v = Some ms' ->
+  frame_match fr sp base mm' ms'.
+Proof.
+  intros. inv H.  
+  generalize (low_bound_store _ _ _ _ _ _ H0 sp). intro LBm.
+  generalize (low_bound_store _ _ _ _ _ _ H1 sp). intro LBs.
+  generalize (high_bound_store _ _ _ _ _ _ H0 sp). intro HBm.
+  generalize (high_bound_store _ _ _ _ _ _ H1 sp). intro HBs.
+  apply frame_match_intro; auto.
+  eauto with mem.
+  congruence. congruence. congruence.
+  intros. rewrite <- H9; auto. eapply load_store_other; eauto.
+  destruct (zeq sp b). subst b. right. 
+  rewrite size_type_chunk. 
+  assert (valid_access mm chunk sp ofs) by eauto with mem.
+  inv H10. left. omega.
+  auto.
+Qed.
+
+(** The low 24 bytes of a frame are preserved by a parallel
+    store in the two memory states. *)
+
+Lemma frame_match_store_link_invariant:
+  forall fr sp base mm ms chunk b ofs v mm' ms' ofs',
+  frame_match fr sp base mm ms ->
+  store chunk mm b ofs v = Some mm' ->
+  store chunk ms b ofs v = Some ms' ->
+  ofs' <= fr_low fr + 20 -> 
+  load Mint32 ms' sp ofs' = load Mint32 ms sp ofs'.
+Proof.
+  intros. inv H.
+  eapply load_store_other; eauto.
+  destruct (eq_block sp b). subst b.
+  right; left. change (size_chunk Mint32) with 4. 
+  assert (valid_access mm chunk sp ofs) by eauto with mem.
+  inv H. omega. 
+  auto.
+Qed.
+
+(** Memory allocation of the Cminor stack data block (in the abstract
+  semantics) and of the whole activation record (in the concrete
+  semantics) return memory states that agree according to [frame_match].
+  Moreover, [frame_match] relations over already allocated blocks
+  remain true. *)
+
+Lemma frame_match_new:
+  forall mm ms mm' ms' sp sp' f,
+  mm.(nextblock) = ms.(nextblock) ->
+  alloc mm 0 f.(fn_stacksize) = (mm', sp) ->
+  alloc ms (- f.(fn_framesize)) (align_16_top (- f.(fn_framesize)) f.(fn_stacksize)) = (ms', sp') ->
+  f.(fn_framesize) >= 24 ->
+  f.(fn_framesize) <= -Int.min_signed ->
+  sp = sp' /\
+  frame_match (init_frame f) sp (Int.repr (-f.(fn_framesize))) mm' ms'.
+Proof.
+  intros. 
+  assert (sp = sp').
+    exploit alloc_result. eexact H0. exploit alloc_result. eexact H1. 
+    congruence. 
+  subst sp'. split. auto.
+  generalize (low_bound_alloc_same _ _ _ _ _ H0). intro LBm.
+  generalize (low_bound_alloc_same _ _ _ _ _ H1). intro LBs.
+  generalize (high_bound_alloc_same _ _ _ _ _ H0). intro HBm.
+  generalize (high_bound_alloc_same _ _ _ _ _ H1). intro HBs.
+  constructor; simpl; eauto with mem.
+  rewrite HBs. apply Zle_ge. apply align_16_top_pos.
+  omega. omega.
+  intros.
+  eapply load_alloc_same'; eauto. omega. 
+  rewrite size_type_chunk. apply Zle_trans with 0. auto.
+  apply align_16_top_pos.
+Qed.
+
+Lemma frame_match_alloc:
+  forall mm ms fr sp base lom him los his mm' ms' bm bs,
+  mm.(nextblock) = ms.(nextblock) ->
+  frame_match fr sp base mm ms ->
+  alloc mm lom him = (mm', bm) ->
+  alloc ms los his = (ms', bs) ->
+  frame_match fr sp base mm' ms'.
+Proof.
+  intros. inversion H0.
+  assert (valid_block mm sp). red. rewrite H. auto.
+  exploit low_bound_alloc_other. eexact H1. eexact H11. intro LBm.
+  exploit high_bound_alloc_other. eexact H1. eexact H11. intro HBm.
+  exploit low_bound_alloc_other. eexact H2. eexact H3. intro LBs.
+  exploit high_bound_alloc_other. eexact H2. eexact H3. intro HBs.
+  apply frame_match_intro.
+  eapply valid_block_alloc; eauto.
+  congruence. congruence. congruence. auto. auto. auto. 
+  intros. eapply load_alloc_other; eauto. 
+Qed.
+
+(** [frame_match] relations are preserved by freeing a block
+  other than the one pointed to by [sp]. *)
+
+Lemma frame_match_free:
+  forall fr sp base mm ms b,
+  frame_match fr sp base mm ms ->
+  sp <> b ->
+  frame_match fr sp base (free mm b) (free ms b).
+Proof.
+  intros. inversion H.
+  generalize (low_bound_free mm _ _ H0); intro LBm.
+  generalize (low_bound_free ms _ _ H0); intro LBs.
+  generalize (high_bound_free mm _ _ H0); intro HBm.
+  generalize (high_bound_free ms _ _ H0); intro HBs.
+  apply frame_match_intro; auto.
+  congruence. congruence. congruence.
+  intros. rewrite <- H8; auto. apply load_free. auto.
+Qed.
+
+(** ** Invariant for stacks *)
+
+Section SIMULATION.
+
+Variable p: program.
+Let ge := Genv.globalenv p.
+
+(** The [match_stacks] predicate relates a Machabstr call stack
+  with the corresponding Machconcr stack.  In addition to enforcing
+  the [frame_match] predicate for each stack frame, we also enforce:
+- Proper chaining of activation record on the Machconcr side.
+- Preservation of the return address stored at the bottom of the
+  activation record.
+- Separation between the memory blocks holding the activation records:
+  their addresses increase strictly from caller to callee.
+*)
+
+Inductive match_stacks: 
+      list Machabstr.stackframe -> list Machconcr.stackframe ->
+      block -> int -> mem -> mem -> Prop :=
+  | match_stacks_nil: forall sp base mm ms,
+      load Mint32 ms sp (Int.signed base) = Some (Vptr Mem.nullptr Int.zero) ->
+      load Mint32 ms sp (Int.signed base + 12) = Some Vzero ->
+      match_stacks nil nil sp base mm ms
+  | match_stacks_cons: forall f sp' base' c fr s fb ra ts sp base mm ms,
+      frame_match fr sp' base' mm ms ->
+      sp' < sp ->
+      load Mint32 ms sp (Int.signed base) = Some (Vptr sp' base') ->
+      load Mint32 ms sp (Int.signed base + 12) = Some ra ->
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      match_stacks s ts sp' base' mm ms ->
+      match_stacks (Machabstr.Stackframe f (Vptr sp' base') c fr :: s)
+                   (Machconcr.Stackframe fb (Vptr sp' base') ra c :: ts)
+                   sp base mm ms.
+
+Remark frame_match_base_eq:
+  forall fr sp base mm ms,
+  frame_match fr sp base mm ms -> Int.signed base = fr_low fr.
+Proof.
+  intros. inv H. apply Int.signed_repr. split; auto.
+  apply Zle_trans with (-24); auto. compute; congruence.
+Qed.
+
+(** If [match_stacks] holds, a lookup in the parent frame in the
+  Machabstr semantics corresponds to two memory loads in the
+  Machconcr semantics, one to load the pointer to the parent's
+  activation record, the second to read within this record. *)
+
+Lemma match_stacks_get_parent:
+  forall s ts sp base mm ms ty ofs v,
+  match_stacks s ts sp base mm ms ->
+  get_slot (parent_frame s) ty (Int.signed ofs) v ->
+  exists parent,
+     load_stack ms (Vptr sp base) Tint (Int.repr 0) = Some parent
+  /\ load_stack ms parent ty ofs = Some v.
+Proof.
+  intros. inv H; simpl in H0. 
+  inv H0. simpl in H3. elimtype False. generalize (AST.typesize_pos ty). omega.
+  exists (Vptr sp' base'); split.
+  unfold load_stack. simpl. rewrite Int.add_zero. auto.
+  eapply frame_match_get_slot; eauto.
+Qed.
+
+(** If [match_stacks] holds, reading memory at offsets 0 and 12
+  from the stack pointer returns the stack pointer and return
+  address of the caller, respectively. *)
+
+Lemma match_stacks_load_links:
+  forall fr s ts sp base mm ms,
+  frame_match fr sp base mm ms ->
+  match_stacks s ts sp base mm ms ->
+  load_stack ms (Vptr sp base) Tint (Int.repr 0) = Some (parent_sp ts) /\
+  load_stack ms (Vptr sp base) Tint (Int.repr 12) = Some (parent_ra ts).
+Proof.
+  intros. unfold load_stack. simpl. rewrite Int.add_zero.
+  replace (Int.signed (Int.add base (Int.repr 12)))
+     with (Int.signed base + 12).
+  inv H0; simpl; auto.
+  inv H. rewrite Int.add_signed. 
+  change (Int.signed (Int.repr 12)) with 12.
+  repeat rewrite Int.signed_repr. auto.
+  split. omega. apply Zle_trans with (-12). omega. compute; congruence.
+  split. auto. apply Zle_trans with (-24). auto. compute; congruence.
+Qed.
+
+(** The [match_stacks] invariant is preserved by memory stores
+  outside the 24-byte reserved area at the bottom of activation records.
+*)
+
+Lemma match_stacks_store_other:
+  forall s ts sp base ms mm,
+  match_stacks s ts sp base mm ms ->
+  forall chunk b ofs v ms',
+  store chunk ms b ofs v = Some ms' ->
+  sp < b ->
+  match_stacks s ts sp base mm ms'.
+Proof.
+  induction 1; intros.
+  assert (sp <> b). unfold block; omega.
+  constructor.
+  rewrite <- H. eapply load_store_other; eauto.
+  rewrite <- H0. eapply load_store_other; eauto.
+  assert (sp <> b). unfold block; omega.
+  econstructor; eauto.
+  eapply frame_match_store_other; eauto.
+  left. unfold block; omega.
+  rewrite <- H1. eapply load_store_other; eauto.
+  rewrite <- H2. eapply load_store_other; eauto.
+  eapply IHmatch_stacks; eauto. omega.
+Qed.
+
+Lemma match_stacks_store_slot:
+  forall s ts sp base ms mm,
+  match_stacks s ts sp base mm ms ->
+  forall ty ofs v ms',
+  store_stack ms (Vptr sp base) ty ofs v = Some ms' ->
+  Int.signed base + 24 <= Int.signed (Int.add base ofs) ->
+  match_stacks s ts sp base mm ms'.
+Proof.
+  intros.
+  unfold store_stack in H0. simpl in H0. 
+  assert (load Mint32 ms' sp (Int.signed base) = load Mint32 ms sp (Int.signed base)).
+  eapply load_store_other; eauto.
+  right; left. change (size_chunk Mint32) with 4; omega.
+  assert (load Mint32 ms' sp (Int.signed base + 12) = load Mint32 ms sp (Int.signed base + 12)).
+  eapply load_store_other; eauto.
+  right; left. change (size_chunk Mint32) with 4; omega.
+  inv H.
+  constructor; congruence.
+  constructor; auto.
+  eapply frame_match_store_other; eauto.
+  left; unfold block; omega.
+  congruence. congruence.
+  eapply match_stacks_store_other; eauto. 
+Qed.
+
+Lemma match_stacks_store:
+  forall s ts sp base ms mm,
+  match_stacks s ts sp base mm ms ->
+  forall fr chunk b ofs v mm' ms',
+  frame_match fr sp base mm ms ->
+  store chunk mm b ofs v = Some mm' ->
+  store chunk ms b ofs v = Some ms' ->
+  match_stacks s ts sp base mm' ms'.
+Proof.
+  induction 1; intros.
+  assert (Int.signed base = fr_low fr) by (eapply frame_match_base_eq; eauto).
+  constructor.
+  rewrite <- H. eapply frame_match_store_link_invariant; eauto. omega.
+  rewrite <- H0. eapply frame_match_store_link_invariant; eauto. omega.
+  assert (Int.signed base = fr_low fr0) by (eapply frame_match_base_eq; eauto).
+  econstructor; eauto.
+  eapply frame_match_store; eauto. 
+  rewrite <- H1. eapply frame_match_store_link_invariant; eauto. omega.
+  rewrite <- H2. eapply frame_match_store_link_invariant; eauto. omega.
+Qed.
+
+Lemma match_stacks_alloc:
+  forall s ts sp base ms mm,
+  match_stacks s ts sp base mm ms ->
+  forall lom him mm' bm los his ms' bs,
+  mm.(nextblock) = ms.(nextblock) ->
+  alloc mm lom him = (mm', bm) ->
+  alloc ms los his = (ms', bs) ->
+  match_stacks s ts sp base mm' ms'.
+Proof.
+  induction 1; intros.
+  constructor.
+  rewrite <- H; eapply load_alloc_unchanged; eauto with mem.
+  rewrite <- H0; eapply load_alloc_unchanged; eauto with mem.
+  econstructor; eauto.
+  eapply frame_match_alloc; eauto.
+  rewrite <- H1; eapply load_alloc_unchanged; eauto with mem.
+  rewrite <- H2; eapply load_alloc_unchanged; eauto with mem.
+Qed.
+
+Lemma match_stacks_free:
+  forall s ts sp base ms mm,
+  match_stacks s ts sp base mm ms ->
+  forall b,
+  sp < b ->
+  match_stacks s ts sp base (Mem.free mm b) (Mem.free ms b).
+Proof.
+  induction 1; intros.
+  assert (sp <> b). unfold block; omega.
+  constructor.
+  rewrite <- H. apply load_free; auto.
+  rewrite <- H0. apply load_free; auto.
+  assert (sp <> b). unfold block; omega.
+  econstructor; eauto.
+  eapply frame_match_free; eauto. unfold block; omega.
+  rewrite <- H1. apply load_free; auto.
+  rewrite <- H2. apply load_free; auto.
+  eapply IHmatch_stacks; eauto. omega.
+Qed.
+
+(** Invocation of a function temporarily violates the [mach_stacks]
+  invariant: the return address and back link are not yet stored
+  in the appropriate parts of the activation record.  
+  The following [match_stacks_partial] predicate is a weaker version
+  of [match_stacks] that captures this situation. *)
+
+Inductive match_stacks_partial: 
+      list Machabstr.stackframe -> list Machconcr.stackframe ->
+      mem -> mem -> Prop :=
+  | match_stacks_partial_nil: forall mm ms,
+      match_stacks_partial nil nil mm ms
+  | match_stacks_partial_cons: forall f sp base c fr s fb ra ts mm ms,
+      frame_match fr sp base mm ms ->
+      sp < ms.(nextblock) ->
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      match_stacks s ts sp base mm ms ->
+      Val.has_type ra Tint ->
+      match_stacks_partial (Machabstr.Stackframe f (Vptr sp base) c fr :: s)
+                           (Machconcr.Stackframe fb (Vptr sp base) ra c :: ts)
+                           mm ms.
+
+Lemma match_stacks_match_stacks_partial:
+  forall s ts sp base mm ms,
+  match_stacks s ts sp base mm ms ->
+  match_stacks_partial s ts (free mm sp) (free ms sp).
+Proof.
+  intros. inv H. constructor.
+  econstructor. 
+  eapply frame_match_free; eauto. unfold block; omega.
+  simpl. inv H0; auto.
+  auto.
+  apply match_stacks_free; auto.
+  generalize (Mem.load_inv _ _ _ _ _ H3). intros [A B].
+  rewrite B.
+  destruct (getN (pred_size_chunk Mint32) (Int.signed base + 12)
+                 (contents (blocks ms sp))); exact I.
+Qed.
+
+Lemma match_stacks_function_entry:
+  forall s ts mm ms lom him mm' los his ms' stk ms'' ms''' base,
+  match_stacks_partial s ts mm ms ->
+  alloc mm lom him = (mm', stk) ->
+  alloc ms los his = (ms', stk) ->
+  store Mint32 ms' stk (Int.signed base) (parent_sp ts) = Some ms'' ->
+  store Mint32 ms'' stk (Int.signed base + 12) (parent_ra ts) = Some ms''' ->
+  match_stacks s ts stk base mm' ms'''.
+Proof.
+  intros. 
+  assert (WT_SP: Val.has_type (parent_sp ts) Tint).
+    inv H; simpl; auto.
+  assert (WT_RA: Val.has_type (parent_ra ts) Tint).
+    inv H; simpl; auto.
+  assert (load Mint32 ms''' stk (Int.signed base) = Some (parent_sp ts)).
+    transitivity (load Mint32 ms'' stk (Int.signed base)).
+    eapply load_store_other; eauto. right; left. simpl. omega.
+    transitivity (Some (Val.load_result (chunk_of_type Tint) (parent_sp ts))).
+    simpl chunk_of_type. eapply load_store_same; eauto. 
+    decEq. apply load_result_ty. auto.
+  assert (load Mint32 ms''' stk (Int.signed base + 12) = Some (parent_ra ts)).
+    transitivity (Some (Val.load_result (chunk_of_type Tint) (parent_ra ts))).
+    simpl chunk_of_type. eapply load_store_same; eauto. 
+    decEq. apply load_result_ty. auto.
+  inv H; simpl in *.
+  constructor; auto.
+  assert (sp < stk). rewrite (alloc_result _ _ _ _ _ H1). auto.
+  assert (sp <> stk). unfold block; omega.
+  assert (nextblock mm = nextblock ms).
+  rewrite <- (alloc_result _ _ _ _ _ H0). 
+  rewrite <- (alloc_result _ _ _ _ _ H1). auto.
+  econstructor; eauto.
+  eapply frame_match_store_other; eauto. 
+  eapply frame_match_store_other; eauto.
+  eapply frame_match_alloc with (mm := mm) (ms := ms); eauto.
+  eapply match_stacks_store_other; eauto. 
+  eapply match_stacks_store_other; eauto.
+  eapply match_stacks_alloc with (mm := mm) (ms := ms); eauto.
+ Qed.
+
+(** ** Invariant between states. *)
+
+(** The [match_state] predicate relates a Machabstr state with
+  a Machconcr state.  In addition to [match_stacks] between the
+  stacks, we ask that
+- The Machabstr frame is properly stored in the activation record,
+  as characterized by [frame_match].
+- The Machconcr memory state extends the Machabstr memory state,
+  in the sense of the [Mem.extends] relation defined in module [Mem]. *)
+
+Inductive match_states:
+            Machabstr.state -> Machconcr.state -> Prop :=
+  | match_states_intro:
+      forall s f sp base c rs fr mm ts fb ms
+        (STACKS: match_stacks s ts sp base mm ms)
+        (FM: frame_match fr sp base mm ms)
+        (MEXT: Mem.extends mm ms)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)),
+      match_states (Machabstr.State s f (Vptr sp base) c rs fr mm)
+                   (Machconcr.State ts fb (Vptr sp base) c rs ms)
+  | match_states_call:
+      forall s f rs mm ts fb ms
+        (STACKS: match_stacks_partial s ts mm ms)
+        (MEXT: Mem.extends mm ms)
+        (FIND: Genv.find_funct_ptr ge fb = Some f),
+      match_states (Machabstr.Callstate s f rs mm)
+                   (Machconcr.Callstate ts fb rs ms)
+  | match_states_return:
+      forall s rs mm ts ms
+        (STACKS: match_stacks_partial s ts mm ms)
+        (MEXT: Mem.extends mm ms),
+      match_states (Machabstr.Returnstate s rs mm)
+                   (Machconcr.Returnstate ts rs ms).
+
+
+(** * The proof of simulation *)
+
+(** The proof of simulation relies on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                   |t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The precondition is matching between the initial states [st1] and
+  [st2], as usual, plus the fact that [st1] is well-typed
+  in the sense of predicate [wt_state] from module [Machtyping].
+  The postcondition is matching between the final states [st1']
+  and [st2'].  The well-typedness of [st2] is not part of the
+  postcondition, since it follows from that of [st1] if the
+  program is well-typed.  (See the subject reduction property
+  in module [Machtyping].)
+*)
+
+Lemma find_function_find_function_ptr:
+  forall ros rs f',
+  find_function ge ros rs = Some f' ->
+  exists fb', Genv.find_funct_ptr ge fb' = Some f' /\ find_function_ptr ge ros rs = Some fb'.
+Proof.
+  intros until f'. destruct ros; simpl.
+  intro. exploit Genv.find_funct_inv; eauto. intros [fb' EQ].
+  rewrite EQ in H. rewrite Genv.find_funct_find_funct_ptr in H.
+  exists fb'; split. auto. rewrite EQ. simpl. auto.
+  destruct (Genv.find_symbol ge i); intro.
+  exists b; auto. congruence.
+Qed.
+
+(** Preservation of arguments to external functions. *)
+
+Lemma transl_extcall_arguments:
+  forall rs s sg args ts m ms,
+  Machabstr.extcall_arguments rs (parent_frame s) sg args ->
+  match_stacks_partial s ts m ms ->
+  extcall_arguments rs ms (parent_sp ts) sg args.
+Proof.
+  unfold Machabstr.extcall_arguments, extcall_arguments; intros.
+  assert (forall ty ofs v,
+    get_slot (parent_frame s) ty (Int.signed ofs) v ->
+    load_stack ms (parent_sp ts) ty ofs = Some v).
+  inv H0; simpl; intros.
+  inv H0. simpl in H2. elimtype False. generalize (AST.typesize_pos ty). omega.
+  eapply frame_match_get_slot; eauto.
+  assert (forall locs vals,
+    Machabstr.extcall_args rs (parent_frame s) locs vals ->
+    extcall_args rs ms (parent_sp ts) locs vals).
+  induction locs; intros; inversion H2; subst; clear H2.
+  constructor.
+  constructor; auto.
+  inversion H7; subst; clear H7. 
+  constructor.
+  constructor. auto.  
+  auto.
+Qed.
+
+Theorem step_equiv:
+  forall s1 t s2, Machabstr.step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1') (WTS: wt_state s1),
+  exists s2', Machconcr.step ge s1' t s2' /\ match_states s2 s2'.
+Proof.
+  induction 1; intros; inv MS.
+
+  (* Mlabel *)
+  econstructor; split.
+  apply exec_Mlabel; auto.
+  econstructor; eauto with coqlib.
+
+  (* Mgetstack *)
+  exists (State ts fb (Vptr sp0 base) c (rs#dst <- v) ms); split.
+  apply exec_Mgetstack; auto. eapply frame_match_get_slot; eauto.
+  econstructor; eauto with coqlib.
+
+  (* Msetstack *)
+  assert (Val.has_type (rs src) ty).
+    inv WTS. 
+    generalize (wt_function_instrs _ WTF _ (is_tail_in TAIL)); intro WTI.
+    inv WTI. apply WTRS.
+  exploit frame_match_set_slot; eauto. 
+  intros [ms' [STORE [FM' [EXT' BOUND]]]].
+  exists (State ts fb (Vptr sp0 base) c rs ms'); split.
+  apply exec_Msetstack; auto.
+  econstructor; eauto.
+  eapply match_stacks_store_slot; eauto.
+
+  (* Mgetparam *)
+  exploit match_stacks_get_parent; eauto. 
+  intros [parent [LOAD1 LOAD2]].
+  exists (State ts fb (Vptr sp0 base) c (rs#dst <- v) ms); split.
+  eapply exec_Mgetparam; eauto. 
+  econstructor; eauto with coqlib. 
+ 
+  (* Mop *)
+  exists (State ts fb (Vptr sp0 base) c (rs#res <- v) ms); split.
+  apply exec_Mop; auto.
+  eapply eval_operation_change_mem with (m := m); eauto.
+  intros. eapply Mem.valid_pointer_extends; eauto.
+  econstructor; eauto with coqlib.
+
+  (* Mload *)
+  exists (State ts fb (Vptr sp0 base) c (rs#dst <- v) ms); split.
+  eapply exec_Mload; eauto.
+  destruct a; simpl in H0; try discriminate.
+  simpl. eapply Mem.load_extends; eauto.
+  econstructor; eauto with coqlib.
+
+  (* Mstore *)
+  destruct a; simpl in H0; try discriminate.
+  exploit Mem.store_within_extends; eauto. intros [ms' [STORE MEXT']].
+  exists (State ts fb (Vptr sp0 base) c rs ms'); split.
+  eapply exec_Mstore; eauto.
+  econstructor; eauto with coqlib.
+  eapply match_stacks_store; eauto.
+  eapply frame_match_store; eauto.
+
+  (* Mcall *)
+  exploit find_function_find_function_ptr; eauto. 
+  intros [fb' [FIND' FINDFUNCT]].
+  assert (exists ra', return_address_offset f c ra').
+    apply return_address_exists.
+    inv WTS. eapply is_tail_cons_left; eauto.
+  destruct H0 as [ra' RETADDR].
+  econstructor; split.
+  eapply exec_Mcall; eauto. 
+  econstructor; eauto. 
+  econstructor; eauto. inv FM; auto. exact I.  
+
+  (* Mtailcall *)
+  exploit find_function_find_function_ptr; eauto. 
+  intros [fb' [FIND' FINDFUNCT]].
+  exploit match_stacks_load_links; eauto. intros [LOAD1 LOAD2].
+  econstructor; split.
+  eapply exec_Mtailcall; eauto.
+  econstructor; eauto.
+  eapply match_stacks_match_stacks_partial; eauto. 
+  apply free_extends; auto.
+
+  (* Malloc *)
+  caseEq (alloc ms 0 (Int.signed sz)). intros ms' blk' ALLOC.
+  exploit alloc_extends; eauto. omega. omega. intros [EQ MEXT']. subst blk'.
+  exists (State ts fb (Vptr sp0 base) c (rs#Conventions.loc_alloc_result <- (Vptr blk Int.zero)) ms'); split.
+  eapply exec_Malloc; eauto.
+  econstructor; eauto.
+  eapply match_stacks_alloc; eauto. inv MEXT; auto. 
+  eapply frame_match_alloc with (mm := m) (ms := ms); eauto. inv MEXT; auto.
+
+  (* Mgoto *)
+  econstructor; split.
+  eapply exec_Mgoto; eauto.
+  econstructor; eauto.
+
+  (* Mcond *)
+  econstructor; split.
+  eapply exec_Mcond_true; eauto.
+  eapply eval_condition_change_mem with (m := m); eauto.
+  intros. eapply Mem.valid_pointer_extends; eauto.
+  econstructor; eauto.
+  econstructor; split.
+  eapply exec_Mcond_false; eauto.
+  eapply eval_condition_change_mem with (m := m); eauto.
+  intros. eapply Mem.valid_pointer_extends; eauto.
+  econstructor; eauto.
+
+  (* Mreturn *)
+  exploit match_stacks_load_links; eauto. intros [LOAD1 LOAD2].
+  econstructor; split.
+  eapply exec_Mreturn; eauto.
+  econstructor; eauto.  
+  eapply match_stacks_match_stacks_partial; eauto. 
+  apply free_extends; auto.
+
+  (* internal function *)
+  assert (WTF: wt_function f). inv WTS. inv H5. auto. inv WTF.
+  caseEq (alloc ms (- f.(fn_framesize))
+                   (align_16_top (- f.(fn_framesize)) f.(fn_stacksize))).
+  intros ms' stk' ALLOC.
+  exploit (alloc_extends m ms m' ms' 0 (fn_stacksize f)
+               (- fn_framesize f)
+               (align_16_top (- fn_framesize f) (fn_stacksize f))); eauto.
+  omega. apply align_16_top_ge. 
+  intros [EQ EXT']. subst stk'.
+  exploit (frame_match_new m ms); eauto. inv MEXT; auto.
+  intros [EQ FM]. clear EQ.
+  set (sp := Vptr stk (Int.repr (- fn_framesize f))).
+  assert (exists ms'', store Mint32 ms' stk (- fn_framesize f) (parent_sp ts) = Some ms'').
+    apply valid_access_store. constructor. 
+    eauto with mem.
+    rewrite (low_bound_alloc_same _ _ _ _ _ ALLOC). omega.
+    rewrite (high_bound_alloc_same _ _ _ _ _ ALLOC).
+    change (size_chunk Mint32) with 4. 
+    apply Zle_trans with 0. omega. apply align_16_top_pos.
+  destruct H0 as [ms'' STORE1].
+  assert (exists ms''', store Mint32 ms'' stk (- fn_framesize f + 12) (parent_ra ts) = Some ms''').
+    apply valid_access_store. constructor. 
+    eauto with mem.
+    rewrite (low_bound_store _ _ _ _ _ _ STORE1 stk). 
+    rewrite (low_bound_alloc_same _ _ _ _ _ ALLOC). omega.
+    rewrite (high_bound_store _ _ _ _ _ _ STORE1 stk). 
+    rewrite (high_bound_alloc_same _ _ _ _ _ ALLOC).
+    change (size_chunk Mint32) with 4. 
+    apply Zle_trans with 0. omega. apply align_16_top_pos.
+  destruct H0 as [ms''' STORE2].
+  assert (RANGE1: Int.min_signed <= - fn_framesize f <= Int.max_signed).
+  split. omega. apply Zle_trans with (-24). omega. compute;congruence.
+  assert (RANGE2: Int.min_signed <= - fn_framesize f + 12 <= Int.max_signed).
+  split. omega. apply Zle_trans with (-12). omega. compute;congruence.
+  econstructor; split.
+  eapply exec_function_internal; eauto.
+  unfold store_stack. simpl. rewrite Int.add_zero. 
+  rewrite Int.signed_repr. eauto. auto.
+  unfold store_stack. simpl. rewrite Int.add_signed.
+  change (Int.signed (Int.repr 12)) with 12.
+  repeat rewrite Int.signed_repr; eauto.
+  (* match states *)
+  unfold sp; econstructor; eauto.
+  eapply match_stacks_function_entry; eauto.
+  rewrite Int.signed_repr; eauto.
+  rewrite Int.signed_repr; eauto.
+  eapply frame_match_store_other with (ms := ms''); eauto.
+  eapply frame_match_store_other with (ms := ms'); eauto. 
+  simpl. right; omega. simpl. right; omega.
+  eapply store_outside_extends with (m2 := ms''); eauto.
+  eapply store_outside_extends with (m2 := ms'); eauto.
+  rewrite (low_bound_alloc_same _ _ _ _ _ H). simpl; omega.
+  rewrite (low_bound_alloc_same _ _ _ _ _ H). simpl; omega.
+
+  (* external function *)
+  econstructor; split.
+  eapply exec_function_external; eauto. 
+  eapply transl_extcall_arguments; eauto.
+  econstructor; eauto.
+
+  (* return *)
+  inv STACKS.
+  econstructor; split.
+  eapply exec_return. 
+  econstructor; eauto.
+Qed.
+
+Hypothesis wt_prog: wt_program p.
+
+Lemma equiv_initial_states:
+  forall st1, Machabstr.initial_state p st1 ->
+  exists st2, Machconcr.initial_state p st2 /\ match_states st1 st2 /\ wt_state st1.
+Proof.
+  intros. inversion H.
+  econstructor; split.
+  econstructor. eauto. 
+  split. econstructor. constructor. apply Mem.extends_refl. auto.
+  econstructor. simpl; intros; contradiction.
+  eapply Genv.find_funct_ptr_prop; eauto.
+  red; intros; exact I.
+Qed.
+
+Lemma equiv_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 /\ wt_state st1 -> Machabstr.final_state st1 r -> Machconcr.final_state st2 r.
+Proof.
+  intros. inv H0. destruct H. inv H. inv STACKS.
+  constructor; auto.
+Qed.
+
+Theorem exec_program_equiv:
+  forall (beh: program_behavior),
+  Machabstr.exec_program p beh -> Machconcr.exec_program p beh.
+Proof.
+  unfold Machconcr.exec_program, Machabstr.exec_program; intros.
+  eapply simulation_step_preservation with
+    (step1 := Machabstr.step)
+    (step2 := Machconcr.step)
+    (match_states := fun st1 st2 => match_states st1 st2 /\ wt_state st1).
+  eexact equiv_initial_states.
+  eexact equiv_final_states.
+  intros. destruct H1. exploit step_equiv; eauto.
+  intros [st2' [A B]]. exists st2'; split. auto. split. auto.
+  eapply Machtyping.subject_reduction; eauto. 
+  auto.
+Qed.
+
+End SIMULATION.
diff --git a/backend/Machabstr2mach.v b/backend/Machabstr2mach.v
deleted file mode 100644
index 8a2b01dd..00000000
--- a/backend/Machabstr2mach.v
+++ /dev/null
@@ -1,1185 +0,0 @@
-(** Simulation between the two semantics for the Mach language. *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import Integers.
-Require Import Values.
-Require Import Mem.
-Require Import Events.
-Require Import Globalenvs.
-Require Import Op.
-Require Import Locations.
-Require Import Machabstr.
-Require Import Mach.
-Require Import Machtyping.
-Require Import Stackingproof.
-
-(** Two semantics were defined for the Mach intermediate language:
-- The concrete semantics (file [Mach]), where the whole activation
-  record resides in memory and the [Mgetstack], [Msetstack] and
-  [Mgetparent] are interpreted as [sp]-relative memory accesses.
-- The abstract semantics (file [Machabstr]), where the activation
-  record is split in two parts: the Cminor stack data, resident in
-  memory, and the frame information, residing not in memory but
-  in additional evaluation environments.
-
-  In this file, we show a simulation result between these
-  semantics: if a program executes to some result [r] in the
-  abstract semantics, it also executes to the same result in
-  the concrete semantics.  This result bridges the correctness proof
-  in file [Stackingproof] (which uses the abstract Mach semantics
-  as output) and that in file [PPCgenproof] (which uses the concrete
-  Mach semantics as input).
-*)
-
-Remark align_16_top_ge:
-  forall lo hi,
-  hi <= align_16_top lo hi.
-Proof.
-  intros. unfold align_16_top. apply Zmax_bound_r. 
-  assert (forall x, x <= (x + 15) / 16 * 16).
-  intro. assert (16 > 0). omega.
-  generalize (Z_div_mod_eq (x + 15) 16 H). intro.
-  replace ((x + 15) / 16 * 16) with ((x + 15) - (x + 15) mod 16).
-  generalize (Z_mod_lt (x + 15) 16 H). omega. 
-  rewrite Zmult_comm. omega.
-  generalize (H (hi - lo)). omega. 
-Qed.
-
-Remark align_16_top_pos:
-  forall lo hi,
-  0 <= align_16_top lo hi.
-Proof.
-  intros. unfold align_16_top. apply Zmax_bound_l. omega.
-Qed.
-
-Remark size_mem_pos:
-  forall sz, size_mem sz > 0.
-Proof.
-  destruct sz; simpl; compute; auto.
-Qed.
-
-Remark size_type_chunk:
-  forall ty, size_chunk (chunk_of_type ty) = 4 * typesize ty.
-Proof.
-  destruct ty; reflexivity.
-Qed.
-
-Remark mem_chunk_type:
-  forall ty, mem_chunk (chunk_of_type ty) = mem_type ty.
-Proof.
-  destruct ty; reflexivity.
-Qed.
-
-Remark size_mem_type:
-  forall ty, size_mem (mem_type ty) = 4 * typesize ty.
-Proof.
-  destruct ty; reflexivity.
-Qed.
-
-(** * Agreement between frames and memory-resident activation records *)
-
-(** ** Agreement for one frame *)
-
-(** The core idea of the simulation proof is that for all active
-  functions, the memory-allocated activation record, in the concrete 
-  semantics, contains the same data as the Cminor stack block
-  (at positive offsets) and the frame of the function (at negative
-  offsets) in the abstract semantics.
-
-  This intuition (activation record = Cminor stack data + frame)
-  is formalized by the following predicate [frame_match fr sp base mm ms].
-  [fr] is a frame and [mm] the current memory state in the abstract
-  semantics. [ms] is the current memory state in the concrete semantics.
-  The stack pointer is [Vptr sp base] in both semantics. *)
-
-Inductive frame_match: frame -> block -> int -> mem -> mem -> Prop :=
-  frame_match_intro:
-    forall fr sp base mm ms,
-    valid_block ms sp ->
-    low_bound mm sp = 0 ->
-    low_bound ms sp = fr.(low) ->
-    high_bound ms sp = align_16_top fr.(low) (high_bound mm sp) ->
-    fr.(low) <= 0 ->
-    Int.min_signed <= fr.(low) ->
-    base = Int.repr fr.(low) ->
-    block_contents_agree fr.(low) 0 fr (ms.(blocks) sp) ->
-    block_contents_agree 0 (high_bound ms sp)
-                         (mm.(blocks) sp) (ms.(blocks) sp) ->
-    frame_match fr sp base mm ms.
-
-(** [frame_match], while presented as a relation for convenience,
-  is actually a function: it fully determines the contents of the
-  activation record [ms.(blocks) sp]. *)
-
-Lemma frame_match_exten:
-  forall fr sp base mm ms1 ms2,
-  frame_match fr sp base mm ms1 ->
-  frame_match fr sp base mm ms2 ->
-  ms1.(blocks) sp = ms2.(blocks) sp. 
-Proof.
-  intros. inversion H. inversion H0.
-  unfold low_bound, high_bound in *.
-  apply block_contents_exten.
-  congruence.
-  congruence.
-  hnf; intros.
-  elim H29. rewrite H3; rewrite H4; intros.
-  case (zlt ofs 0); intro.
-  assert (low fr <= ofs < 0). tauto.
-  transitivity (contents fr ofs).
-  symmetry. apply H8; auto.
-  apply H22; auto.
-  transitivity (contents (blocks mm sp) ofs).
-  symmetry. apply H9. rewrite H4. omega.
-  apply H23. rewrite H18. omega.
-Qed.
-
-(** The following two innocuous-looking lemmas are the key results
-  showing that [sp]-relative memory accesses in the concrete
-  semantics behave like the direct frame accesses in the abstract
-  semantics.  First, a value [v] that has type [ty] is preserved
-  when stored in memory with chunk [chunk_of_type ty], then read
-  back with the same chunk.  The typing hypothesis is crucial here:
-  for instance, a float value reads back as [Vundef] when stored
-  and load with chunk [Mint32]. *)
-
-Lemma load_result_ty:
-  forall v ty,
-  Val.has_type v ty -> Val.load_result (chunk_of_type ty) v = v.
-Proof.
-  destruct v; destruct ty; simpl; contradiction || reflexivity.
-Qed.
-
-(** Second, computations of [sp]-relative offsets using machine
-  arithmetic (as done in the concrete semantics) never overflows
-  and behaves identically to the offset computations using exact
-  arithmetic (as done in the abstract semantics). *)
-
-Lemma int_add_no_overflow:
-  forall x y,
-  Int.min_signed <= Int.signed x + Int.signed y <= Int.max_signed ->
-  Int.signed (Int.add x y) = Int.signed x + Int.signed y.
-Proof.
-  intros. rewrite Int.add_signed. rewrite Int.signed_repr. auto. auto.
-Qed.
-
-(** Given matching frames and activation records, loading from the
-  activation record (in the concrete semantics) behaves identically
-  to reading the corresponding slot from the frame
-  (in the abstract semantics).  *)
-
-Lemma frame_match_get_slot:
-  forall fr sp base mm ms ty ofs v,
-  frame_match fr sp base mm ms ->
-  get_slot fr ty (Int.signed ofs) v ->
-  Val.has_type v ty ->
-  load_stack ms (Vptr sp base) ty ofs = Some v.
-Proof.
-  intros. inversion H. inversion H0. subst v.
-  unfold load_stack, Val.add, loadv. 
-  assert (Int.signed base = low fr).
-    rewrite H8. apply Int.signed_repr.
-    split. auto. apply Zle_trans with 0. auto. compute; congruence.
-  assert (Int.signed (Int.add base ofs) = low fr + Int.signed ofs).
-    rewrite int_add_no_overflow. rewrite H18. auto.
-    rewrite H18. split. omega. apply Zle_trans with 0.
-    generalize (typesize_pos ty). omega. compute. congruence.
-  rewrite H23. 
-  assert (BND1: low_bound ms sp <= low fr + Int.signed ofs). 
-    omega. 
-  assert (BND2: (low fr + Int.signed ofs) + size_chunk (chunk_of_type ty) <= high_bound ms sp).
-    rewrite size_type_chunk. apply Zle_trans with 0.
-    assumption. rewrite H5. apply align_16_top_pos.
-  generalize (load_in_bounds (chunk_of_type ty) ms sp (low fr + Int.signed ofs) H2 BND1 BND2).
-  intros [v' LOAD].
-  generalize (load_inv _ _ _ _ _ LOAD).
-  intros [A [B [C D]]].
-  rewrite LOAD. rewrite <- D. 
-  decEq. rewrite mem_chunk_type. 
-  rewrite <- size_mem_type in H17.
-  assert (low fr <= low fr + Int.signed ofs). omega.
-  generalize (load_contentmap_agree _ _ _ _ _ _ H9 H24 H17).
-  intro. rewrite H25.
-  apply load_result_ty.
-  assumption.
-Qed.
-
-(** Loads from [sp], corresponding to accesses to Cminor stack data
-  in the abstract semantics, give the same results in the concrete
-  semantics.  This is because the offset from [sp] must be positive or
-  null for the original load to succeed, and because the part
-  of the activation record at positive offsets matches the Cminor
-  stack data block. *)
-
-Lemma frame_match_load:
-  forall fr sp base mm ms chunk ofs v,
-  frame_match fr sp base mm ms ->
-  load chunk mm sp ofs = Some v ->
-  load chunk ms sp ofs = Some v.
-Proof.
-  intros. inversion H. 
-  generalize (load_inv _ _ _ _ _ H0). intros [A [B [C D]]].
-  change (low (blocks mm sp)) with (low_bound mm sp) in B.
-  change (high (blocks mm sp)) with (high_bound mm sp) in C.
-  unfold load. rewrite zlt_true; auto.
-  rewrite in_bounds_holds. 
-  rewrite <- D. decEq. decEq. eapply load_contentmap_agree.
-  red in H9. eexact H9. 
-  omega.
-  unfold size_chunk in C. rewrite H4. 
-  apply Zle_trans with (high_bound mm sp). auto. 
-  apply align_16_top_ge. 
-  change (low (blocks ms sp)) with (low_bound ms sp).
-  rewrite H3. omega.
-  change (high (blocks ms sp)) with (high_bound ms sp).
-  rewrite H4. apply Zle_trans with (high_bound mm sp). auto.
-  apply align_16_top_ge.
-Qed.
-
-(** Assigning a value to a frame slot (in the abstract semantics)
-  corresponds to storing this value in the activation record
-  (in the concrete semantics).  Moreover, agreement between frames
-  and activation records is preserved. *)
-
-Lemma frame_match_set_slot:
-  forall fr sp base mm ms ty ofs v fr',
-  frame_match fr sp base mm ms ->
-  set_slot fr ty (Int.signed ofs) v fr' ->
-  exists ms',
-    store_stack ms (Vptr sp base) ty ofs v = Some ms' /\
-    frame_match fr' sp base mm ms'.
-Proof.
-  intros. inversion H. inversion H0. subst ty0.
-  unfold store_stack, Val.add, storev. 
-  assert (Int.signed base = low fr).
-    rewrite H7. apply Int.signed_repr.
-    split. auto. apply Zle_trans with 0. auto. compute; congruence.
-  assert (Int.signed (Int.add base ofs) = low fr + Int.signed ofs).
-    rewrite int_add_no_overflow. rewrite H16. auto.
-    rewrite H16. split. omega. apply Zle_trans with 0.
-    generalize (typesize_pos ty). omega. compute. congruence.
-  rewrite H20.
-  assert (BND1: low_bound ms sp <= low fr + Int.signed ofs). 
-    omega. 
-  assert (BND2: (low fr + Int.signed ofs) + size_chunk (chunk_of_type ty) <= high_bound ms sp).
-    rewrite size_type_chunk. rewrite H4.
-    apply Zle_trans with 0. subst ofs0. auto. apply align_16_top_pos. 
-  generalize (store_in_bounds _ _ _ _ v H1 BND1 BND2).
-  intros [ms' STORE].
-  generalize (store_inv _ _ _ _ _ _ STORE). intros [P [Q [R [S [x T]]]]].
-  generalize (low_bound_store _ _ _ _ sp _ _ STORE). intro LB.
-  generalize (high_bound_store _ _ _ _ sp _ _ STORE). intro HB.
-  exists ms'. 
-  split. exact STORE.
-  apply frame_match_intro; auto.
-  eapply valid_block_store; eauto.
-  rewrite LB. auto.
-  rewrite HB. auto.
-  red. rewrite T; rewrite update_s; simpl. 
-  rewrite mem_chunk_type. 
-  subst ofs0. eapply store_contentmap_agree; eauto.
-  rewrite HB; rewrite T; rewrite update_s.
-  red. simpl. apply store_contentmap_outside_agree.
-  assumption. left. rewrite mem_chunk_type. 
-  rewrite size_mem_type. subst ofs0. auto.
-Qed.
-
-(** Agreement is preserved by stores within blocks other than the
-  one pointed to by [sp]. *)
-
-Lemma frame_match_store_stack_other:
-  forall fr sp base mm ms sp' base' ty ofs v ms',
-  frame_match fr sp base mm ms ->
-  store_stack ms (Vptr sp' base') ty ofs v = Some ms' ->
-  sp <> sp' ->
-  frame_match fr sp base mm ms'.
-Proof.
-  unfold store_stack, Val.add, storev. intros. inversion H.
-  generalize (store_inv _ _ _ _ _ _ H0). intros [P [Q [R [S [x T]]]]].
-  generalize (low_bound_store _ _ _ _ sp _ _ H0). intro LB.
-  generalize (high_bound_store _ _ _ _ sp _ _ H0). intro HB.
-  apply frame_match_intro; auto.
-  eapply valid_block_store; eauto.
-  rewrite LB; auto.
-  rewrite HB; auto.
-  rewrite T; rewrite update_o; auto.
-  rewrite HB; rewrite T; rewrite update_o; auto.
-Qed.
-
-(** Stores relative to [sp], corresponding to accesses to Cminor stack data
-  in the abstract semantics, give the same results in the concrete
-  semantics.  Moreover, agreement between frames and activation
-  records is preserved. *)
-
-Lemma frame_match_store_ok:
-  forall fr sp base mm ms chunk ofs v mm',
-  frame_match fr sp base mm ms ->
-  store chunk mm sp ofs v = Some mm' ->
-  exists ms', store chunk ms sp ofs v = Some ms'.
-Proof.
-  intros. inversion H. 
-  generalize (store_inv _ _ _ _ _ _ H0). intros [P [Q [R [S [x T]]]]].
-  change (low (blocks mm sp)) with (low_bound mm sp) in Q.
-  change (high (blocks mm sp)) with (high_bound mm sp) in R.
-  apply store_in_bounds.
-  auto.
-  omega. 
-  apply Zle_trans with (high_bound mm sp).
-    auto. rewrite H4. apply align_16_top_ge. 
-Qed.
-
-Lemma frame_match_store:
-  forall fr sp base mm ms chunk b ofs v mm' ms',
-  frame_match fr sp base mm ms ->
-  store chunk mm b ofs v = Some mm' ->
-  store chunk ms b ofs v = Some ms' ->
-  frame_match fr sp base mm' ms'.
-Proof.
-  intros. inversion H. 
-  generalize (store_inv _ _ _ _ _ _ H1). intros [A [B [C [D [x1 E]]]]].
-  generalize (store_inv _ _ _ _ _ _ H0). intros [I [J [K [L [x2 M]]]]].
-  generalize (low_bound_store _ _ _ _ sp _ _ H0). intro LBm.
-  generalize (low_bound_store _ _ _ _ sp _ _ H1). intro LBs.
-  generalize (high_bound_store _ _ _ _ sp _ _ H0). intro HBm.
-  generalize (high_bound_store _ _ _ _ sp _ _ H1). intro HBs.
-  apply frame_match_intro; auto.
-  eapply valid_block_store; eauto.
-  congruence. congruence. congruence. 
-  rewrite E. unfold update. case (zeq sp b); intro.
-  subst b. red; simpl.
-  apply store_contentmap_outside_agree; auto.
-  right. unfold low_bound in H3. omega.
-  assumption.
-  rewrite HBs; rewrite E; rewrite M; unfold update.
-  case (zeq sp b); intro.
-  subst b. red; simpl.
-  apply store_contentmap_agree; auto.
-  assumption.
-Qed.
-
-(** Memory allocation of the Cminor stack data block (in the abstract
-  semantics) and of the whole activation record (in the concrete
-  semantics) return memory states that agree according to [frame_match].
-  Moreover, [frame_match] relations over already allocated blocks
-  remain true. *)
-
-Lemma frame_match_new:
-  forall mm ms mm' ms' sp sp' f,
-  mm.(nextblock) = ms.(nextblock) ->
-  alloc mm 0 f.(fn_stacksize) = (mm', sp) ->
-  alloc ms (- f.(fn_framesize)) (align_16_top (- f.(fn_framesize)) f.(fn_stacksize)) = (ms', sp') ->
-  f.(fn_framesize) >= 0 ->
-  f.(fn_framesize) <= -Int.min_signed ->
-  frame_match (init_frame f) sp (Int.repr (-f.(fn_framesize))) mm' ms'.
-Proof.
-  intros. 
-  injection H0; intros. injection H1; intros.
-  assert (sp = sp'). congruence. rewrite <- H8 in H6. subst sp'.
-  generalize (low_bound_alloc _ _ sp _ _ _ H0). rewrite zeq_true. intro LBm.
-  generalize (low_bound_alloc _ _ sp _ _ _ H1). rewrite zeq_true. intro LBs.
-  generalize (high_bound_alloc _ _ sp _ _ _ H0). rewrite zeq_true. intro HBm.
-  generalize (high_bound_alloc _ _ sp _ _ _ H1). rewrite zeq_true. intro HBs.
-  apply frame_match_intro; auto.
-  eapply valid_new_block; eauto.
-  simpl. congruence.
-  simpl. omega.
-  simpl. omega.
-  rewrite <- H7. simpl. rewrite H6; simpl. rewrite update_s.
-  hnf; simpl; auto.
-  rewrite HBs; rewrite <- H5; simpl; rewrite H4; rewrite <- H7; simpl; rewrite H6; simpl;
-  repeat (rewrite update_s).
-  hnf; simpl; auto.
-Qed.
-
-Lemma frame_match_alloc:
-  forall mm ms fr sp base lom him los his mm' ms' bm bs,
-  mm.(nextblock) = ms.(nextblock) ->
-  frame_match fr sp base mm ms ->
-  alloc mm lom him = (mm', bm) ->
-  alloc ms los his = (ms', bs) ->
-  frame_match fr sp base mm' ms'.
-Proof.
-  intros. inversion H0.
-  assert (sp <> bm). 
-    apply valid_not_valid_diff with mm. 
-    red. rewrite H. exact H3.
-    eapply fresh_block_alloc; eauto.
-  assert (sp <> bs).
-    apply valid_not_valid_diff with ms. auto. 
-    eapply fresh_block_alloc; eauto.
-  generalize (low_bound_alloc _ _ sp _ _ _  H1). 
-    rewrite zeq_false; auto; intro LBm.
-  generalize (low_bound_alloc _ _ sp _ _ _  H2). 
-    rewrite zeq_false; auto; intro LBs.
-  generalize (high_bound_alloc _ _ sp _ _ _  H1). 
-    rewrite zeq_false; auto; intro HBm.
-  generalize (high_bound_alloc _ _ sp _ _ _  H2). 
-    rewrite zeq_false; auto; intro HBs.
-  apply frame_match_intro.
-  eapply valid_block_alloc; eauto.
-  congruence. congruence. congruence. auto. auto. auto. 
-  injection H2; intros. rewrite <- H20; simpl; rewrite H19; simpl.
-    rewrite update_o; auto. 
-  rewrite HBs;
-  injection H2; intros. rewrite <- H20; simpl; rewrite H19; simpl.
-  injection H1; intros. rewrite <- H22; simpl; rewrite H21; simpl.
-  repeat (rewrite update_o; auto).
-Qed.
-
-(** [frame_match] relations are preserved by freeing a block
-  other than the one pointed to by [sp]. *)
-
-Lemma frame_match_free:
-  forall fr sp base mm ms b,
-  frame_match fr sp base mm ms ->
-  sp <> b ->
-  frame_match fr sp base (free mm b) (free ms b).
-Proof.
-  intros. inversion H.
-  generalize (low_bound_free mm _ _ H0); intro LBm.
-  generalize (low_bound_free ms _ _ H0); intro LBs.
-  generalize (high_bound_free mm _ _ H0); intro HBm.
-  generalize (high_bound_free ms _ _ H0); intro HBs.
-  apply frame_match_intro; auto.
-  congruence. congruence. congruence. 
-  unfold free; simpl. rewrite update_o; auto. 
-  rewrite HBs. 
-  unfold free; simpl. repeat (rewrite update_o; auto).
-Qed.
-
-(** ** Agreement for all the frames in a call stack *)
-
-(** We need to reason about all the frames and activation records
-    active at any given time during the executions: not just
-    about those for the currently executing function, but also
-    those for the callers.  These collections of 
-    active frames are called ``call stacks''.  They are lists
-    of triples representing a frame and a stack pointer
-    (reference and offset) in the abstract semantics. *)
-
-Definition callstack : Set := list (frame * block * int).
-
-(** The correct linking of frames (each frame contains a pointer
-  to the frame of its caller at the lowest offset) is captured
-  by the following predicate. *)
-
-Inductive callstack_linked: callstack -> Prop :=
-  | callstack_linked_nil:
-      callstack_linked nil
-  | callstack_linked_one:
-      forall fr1 sp1 base1,
-      callstack_linked ((fr1, sp1, base1) :: nil)
-  | callstack_linked_cons:
-      forall fr1 sp1 base1 fr2 base2 sp2 cs,
-      get_slot fr1 Tint 0 (Vptr sp2 base2) ->
-      callstack_linked ((fr2, sp2, base2) :: cs) ->
-      callstack_linked ((fr1, sp1, base1) :: (fr2, sp2, base2) :: cs).
-
-(** [callstack_dom cs b] (read: call stack [cs] is ``dominated''
-  by block reference [b]) means that the stack pointers in [cs]
-  strictly decrease and are all below [b].  This ensures that
-  the stack block for a function was allocated after that for its
-  callers.  A consequence is that no two active functions share
-  the same stack block. *)
-
-Inductive callstack_dom: callstack -> block -> Prop :=
-  | callstack_dom_nil:
-      forall b, callstack_dom nil b
-  | callstack_dom_cons:
-      forall fr sp base cs b,
-      sp < b ->
-      callstack_dom cs sp ->
-      callstack_dom ((fr, sp, base) :: cs) b.
-
-Lemma callstack_dom_less:
-  forall cs b, callstack_dom cs b ->
-  forall fr sp base, In (fr, sp, base) cs -> sp < b.
-Proof.
-  induction 1. simpl. tauto. 
-  simpl. intros fr0 sp0 base0 [A|B].
-  injection A; intros; subst fr0; subst sp0; subst base0. auto.
-  apply Zlt_trans with sp. eapply IHcallstack_dom; eauto. auto.
-Qed.
-
-Lemma callstack_dom_diff:
-  forall cs b, callstack_dom cs b ->
-  forall fr sp base, In (fr, sp, base) cs -> sp <> b.
-Proof.
-  intros. apply Zlt_not_eq. eapply callstack_dom_less; eauto.
-Qed.
-
-Lemma callstack_dom_incr:
-  forall cs b, callstack_dom cs b ->
-  forall b', b < b' -> callstack_dom cs b'.
-Proof.
-  induction 1; intros.
-  apply callstack_dom_nil.
-  apply callstack_dom_cons. omega. auto.
-Qed.
-
-(** Every block reference is either ``in'' a call stack (that is,
-  refers to the stack block of one of the calls) or  ``not in''
-  a call stack (that is, differs from all the stack blocks). *)
-
-Inductive notin_callstack: block -> callstack -> Prop :=
-  | notin_callstack_nil:
-      forall b, notin_callstack b nil
-  | notin_callstack_cons:
-      forall b fr sp base cs,
-      b <> sp ->
-      notin_callstack b cs ->
-      notin_callstack b ((fr, sp, base) :: cs).
-
-Lemma in_or_notin_callstack:
-  forall b cs,
-  (exists fr, exists base, In (fr, b, base) cs) \/ notin_callstack b cs.
-Proof.
-  induction cs.
-  right; constructor.
-  elim IHcs. 
-  intros [fr [base IN]]. left. exists fr; exists base; auto with coqlib.
-  intro NOTIN. destruct a. destruct p. case (eq_block b b0); intro.
-  left. exists f; exists i. subst b0. auto with coqlib.
-  right. apply notin_callstack_cons; auto.
-Qed. 
-
-(** [callstack_invariant cs mm ms] relates the memory state [mm]
-  from the abstract semantics with the memory state [ms] from the
-  concrete semantics, relative to the current call stack [cs].
-  Five conditions are enforced:
-- All frames in [cs] agree with the corresponding activation records
-  (in the sense of [frame_match]).
-- The frames in the call stack are properly linked.
-- Memory blocks that are not activation records for active function
-  calls are exactly identical in [mm] and [ms].
-- The allocation pointers are the same in [mm] and [ms].
-- The call stack [cs] is ``dominated'' by this allocation pointer,
-  implying that all activation records are valid, allocated blocks,
-  pairwise disjoint, and they were allocated in the order implied
-  by [cs]. *)
-
-Record callstack_invariant (cs: callstack) (mm ms: mem) : Prop :=
-  mk_callstack_invariant {
-    cs_frame:
-      forall fr sp base,
-      In (fr, sp, base) cs -> frame_match fr sp base mm ms;
-    cs_linked:
-      callstack_linked cs;
-    cs_others:
-      forall b, notin_callstack b cs ->
-            mm.(blocks) b = ms.(blocks) b;
-    cs_next:
-      mm.(nextblock) = ms.(nextblock);
-    cs_dom:
-      callstack_dom cs ms.(nextblock)
-  }.
-
-(** Again, while [callstack_invariant] is presented as a relation,
-  it actually fully determines the concrete memory state [ms]
-  from the abstract memory state [mm] and the call stack [cs]. *)
-
-Lemma callstack_exten:
-  forall cs mm ms1 ms2,
-  callstack_invariant cs mm ms1 ->
-  callstack_invariant cs mm ms2 ->
-  ms1 = ms2.
-Proof.
-  intros. inversion H; inversion H0. 
-  apply mem_exten.
-  congruence. 
-  intros. elim (in_or_notin_callstack b cs).
-  intros [fr [base FM]]. apply frame_match_exten with fr base mm; auto.
-  intro. transitivity (blocks mm b).
-  symmetry. auto. auto.
-Qed.
-
-(** The following properties of [callstack_invariant] are the
-  building blocks for the proof of simulation.  First, a [get_slot]
-  operation in the abstract semantics corresponds to a [sp]-relative
-  memory load in the concrete semantics. *)
-
-Lemma callstack_get_slot:
-  forall fr sp base cs mm ms ty ofs v,
-  callstack_invariant ((fr, sp, base) :: cs) mm ms ->
-  get_slot fr ty (Int.signed ofs) v ->
-  Val.has_type v ty ->
-  load_stack ms (Vptr sp base) ty ofs = Some v.
-Proof.
-  intros. inversion H. 
-  apply frame_match_get_slot with fr mm. 
-  apply cs_frame0. apply in_eq.
-  auto. auto.
-Qed.
-
-(** Similarly, a [get_parent] operation corresponds to loading
-  the back-link from the current activation record, then loading
-  from this back-link. *)
-
-Lemma callstack_get_parent:
-  forall fr1 sp1 base1 fr2 sp2 base2 cs mm ms ty ofs v,
-  callstack_invariant ((fr1, sp1, base1) :: (fr2, sp2, base2) :: cs) mm ms ->
-  get_slot fr2 ty (Int.signed ofs) v ->
-  Val.has_type v ty ->
-  load_stack ms (Vptr sp1 base1) Tint (Int.repr 0) = Some (Vptr sp2 base2) /\
-  load_stack ms (Vptr sp2 base2) ty ofs = Some v.
-Proof.
-  intros. inversion H. split.
-  inversion cs_linked0. 
-  apply frame_match_get_slot with fr1 mm.
-  apply cs_frame0. auto with coqlib.
-  rewrite Int.signed_repr. auto. compute. intuition congruence.
-  exact I.
-  apply frame_match_get_slot with fr2 mm. 
-  apply cs_frame0. auto with coqlib.
-  auto. auto.
-Qed.
-
-(** A memory load valid in the abstract semantics can also be performed
-  in the concrete semantics. *)
-
-Lemma callstack_load:
-  forall cs chunk mm ms a v,
-  callstack_invariant cs mm ms ->
-  loadv chunk mm a = Some v ->
-  loadv chunk ms a = Some v.
-Proof.
-  unfold loadv. intros. destruct a; try discriminate.
-  inversion H.
-  elim (in_or_notin_callstack b cs).
-  intros [fr [base IN]]. apply frame_match_load with fr base mm; auto.
-  intro. rewrite <- H0. unfold load. 
-  rewrite (cs_others0 b H1). rewrite cs_next0. reflexivity.
-Qed.
-
-(** A [set_slot] operation in the abstract semantics corresponds
-  to a [sp]-relative memory store in the concrete semantics.
-  Moreover, the property [callstack_invariant] still holds for
-  the final call stacks and memory states. *)
-
-Lemma callstack_set_slot:
-  forall fr sp base cs mm ms ty ofs v fr',
-  callstack_invariant ((fr, sp, base) :: cs) mm ms ->
-  set_slot fr ty (Int.signed ofs) v fr' ->
-  4 <= Int.signed ofs ->
-  exists ms',
-    store_stack ms (Vptr sp base) ty ofs v = Some ms' /\
-    callstack_invariant ((fr', sp, base) :: cs) mm ms'.
-Proof.
-  intros. inversion H.
-  assert (frame_match fr sp base mm ms). apply cs_frame0. apply in_eq.
-  generalize (frame_match_set_slot _ _ _ _ _ _ _ _ _ H2 H0).
-  intros [ms' [SS FM]].
-  generalize (store_inv _ _ _ _ _ _ SS). intros [A [B [C [D [x E]]]]].
-  exists ms'.
-  split. auto.
-  constructor.
-  (* cs_frame *)
-  intros. elim H3; intros. 
-  injection H4; intros; clear H4. 
-  subst fr0; subst sp0; subst base0. auto.
-  apply frame_match_store_stack_other with ms sp base ty ofs v.
-  apply cs_frame0. auto with coqlib. auto. 
-  apply callstack_dom_diff with cs fr0 base0. inversion cs_dom0; auto. auto.
-  (* cs_linked *)
-  inversion cs_linked0. apply callstack_linked_one.
-  apply callstack_linked_cons. 
-  eapply slot_gso; eauto.
-  auto.
-  (* cs_others *)
-  intros. inversion H3. 
-  rewrite E; simpl. rewrite update_o; auto. apply cs_others0.
-  constructor; auto.
-  (* cs_next *)
-  congruence.
-  (* cs_dom *)
-  inversion cs_dom0. constructor. rewrite D; auto. auto.
-Qed.
-
-(** A memory store in the abstract semantics can also be performed
-  in the concrete semantics.  Moreover, the property
-  [callstack_invariant] is preserved. *)
-
-Lemma callstack_store_aux:
-  forall cs mm ms chunk b ofs v mm' ms',
-  callstack_invariant cs mm ms ->
-  store chunk mm b ofs v = Some mm' ->
-  store chunk ms b ofs v = Some ms' ->
-  callstack_invariant cs mm' ms'.
-Proof.
-  intros. inversion H.
-  generalize (store_inv _ _ _ _ _ _ H0). intros [A [B [C [D [x E]]]]].
-  generalize (store_inv _ _ _ _ _ _ H1). intros [P [Q [R [S [y T]]]]].
-  constructor; auto.
-  (* cs_frame *)
-  intros. eapply frame_match_store; eauto.
-  (* cs_others *)
-  intros. generalize (cs_others0 b0 H2); intro.
-  rewrite E; rewrite T; unfold update.
-  case (zeq b0 b); intro.
-  subst b0. 
-  generalize x; generalize y. rewrite H3. 
-  intros. replace y0 with x0. reflexivity. apply proof_irrelevance.
-  auto.
-  (* cs_nextblock *)
-  congruence.
-  (* cs_dom *)
-  rewrite S. auto.
-Qed.
-
-Lemma callstack_store_ok:
-  forall cs mm ms chunk b ofs v mm',
-  callstack_invariant cs mm ms ->
-  store chunk mm b ofs v = Some mm' ->
-  exists ms', store chunk ms b ofs v = Some ms'.
-Proof.
-  intros. inversion H.
-  elim (in_or_notin_callstack b cs).
-  intros [fr [base IN]].
-  apply frame_match_store_ok with fr base mm mm'; auto.
-  intro. generalize (cs_others0 b H1). intro.
-  generalize (store_inv _ _ _ _ _ _ H0). 
-  rewrite cs_next0; rewrite H2. intros [A [B [C [D [x E]]]]].
-  apply store_in_bounds; auto.
-Qed.
-
-Lemma callstack_store:
-  forall cs mm ms chunk a v mm',
-  callstack_invariant cs mm ms ->
-  storev chunk mm a v = Some mm' ->
-  exists ms',
-    storev chunk ms a v = Some ms' /\
-    callstack_invariant cs mm' ms'.
-Proof.
-  unfold storev; intros. destruct a; try discriminate.
-  generalize (callstack_store_ok _ _ _ _ _ _ _ _ H H0).
-  intros [ms' STORE].
-  exists ms'. split. auto. eapply callstack_store_aux; eauto.
-Qed.
-
-(** Allocations of heap blocks can be performed in parallel in
-    both semantics, preserving [callstack_invariant]. *)
-
-Lemma callstack_alloc:
-  forall cs mm ms lo hi mm' blk,
-  callstack_invariant cs mm ms ->
-  Mem.alloc mm lo hi = (mm', blk) ->
-  exists ms',
-    Mem.alloc ms lo hi = (ms', blk) /\
-    callstack_invariant cs mm' ms'.
-Proof.
-  intros. inversion H. 
-  caseEq (alloc ms lo hi). intros ms' blk' ALLOC'.
-  injection H0; intros. injection ALLOC'; intros.
-  assert (blk' = blk). congruence. rewrite H5 in H3. rewrite H5. 
-  exists ms'; split. auto.
-  constructor. 
-  (* frame *)
-  intros; eapply frame_match_alloc; eauto.
-  (* linked *)
-  auto.
-  (* others *)
-  intros. rewrite <- H2; rewrite <- H4; simpl.
-  rewrite H1; rewrite H3. unfold update. case (zeq b blk); auto.
-  (* next *)
-  rewrite <- H2; rewrite <- H4; simpl; congruence.
-  (* dom *)
-  eapply callstack_dom_incr; eauto. rewrite <- H4; simpl. omega.
-Qed.
-
-(** At function entry, a new frame is pushed on the call stack,
-  and memory blocks are allocated in both semantics.  Moreover,
-  the back link to the caller's activation record is set up
-  in the concrete semantics.  All this preserves [callstack_invariant]. *)
-
-Lemma callstack_function_entry:
-  forall fr0 sp0 base0 cs mm ms f fr mm' sp ms' sp',
-  callstack_invariant ((fr0, sp0, base0) :: cs) mm ms ->
-  alloc mm 0 f.(fn_stacksize) = (mm', sp) ->
-  alloc ms (- f.(fn_framesize)) (align_16_top (- f.(fn_framesize)) f.(fn_stacksize)) = (ms', sp') ->
-  f.(fn_framesize) >= 0 ->
-  f.(fn_framesize) <= -Int.min_signed ->
-  set_slot (init_frame f) Tint 0 (Vptr sp0 base0) fr ->
-  let base := Int.repr (-f.(fn_framesize)) in
-  exists ms'',
-     store_stack ms' (Vptr sp base) Tint (Int.repr 0) (Vptr sp0 base0) = Some ms''
-  /\ callstack_invariant ((fr, sp, base) :: (fr0, sp0, base0) :: cs) mm' ms''
-  /\ sp' = sp.
-Proof.
-  assert (ZERO: 0 = Int.signed (Int.repr 0)).
-    rewrite Int.signed_repr. auto. compute; intuition congruence.
-  intros. inversion H.
-  injection H0; intros. injection H1; intros.
-  assert (sp' = sp). congruence. rewrite H9 in H7. subst sp'.
-  assert (frame_match (init_frame f) sp base mm' ms').
-    unfold base. eapply frame_match_new; eauto.
-  rewrite ZERO in H4.
-  generalize (frame_match_set_slot _ _ _ _ _ _ _ _ _ H9 H4).
-  intros [ms'' [SS FM]].
-  generalize (store_inv _ _ _ _ _ _ SS).  
-  intros [A [B [C [D [x E]]]]].
-  exists ms''. split; auto. split.
-  constructor.
-  (* cs_frame *)
-  intros. elim H10; intro.
-  injection H11; intros; clear H11.
-  subst fr1; subst sp1; subst base1. auto.
-  eapply frame_match_store_stack_other; eauto.
-  eapply frame_match_alloc; [idtac|idtac|eexact H0|eexact H1].
-  congruence. eapply cs_frame; eauto with coqlib. 
-  rewrite <- H7. eapply callstack_dom_diff; eauto with coqlib.
-  (* cs_linked *)
-  constructor. rewrite ZERO. eapply slot_gss; eauto. auto.
-  (* cs_others *)
-  intros. inversion H10. 
-  rewrite E; rewrite update_o; auto. 
-  rewrite <- H6; rewrite <- H8; simpl; rewrite H5; rewrite H7; simpl. 
-  repeat (rewrite update_o; auto). 
-  (* cs_next *)
-  rewrite D. rewrite <- H6; rewrite <- H8; simpl. congruence.
-  (* cs_dom *)
-  constructor. rewrite D; auto. rewrite <- H7. auto.
-  auto.
-Qed.
-
-(** At function return, the memory blocks corresponding to Cminor
-  stack data and activation record for the function are freed.
-  This preserves [callstack_invariant]. *)
-
-Lemma callstack_function_return:
-  forall fr sp base cs mm ms,
-  callstack_invariant ((fr, sp, base) :: cs) mm ms ->
-  callstack_invariant cs (free mm sp) (free ms sp).
-Proof.
-  intros. inversion H. inversion cs_dom0.
-  constructor.
-  (* cs_frame *)
-  intros. apply frame_match_free. apply cs_frame0; auto with coqlib.
-  apply callstack_dom_diff with cs fr1 base1. auto. auto.
-  (* cs_linked *)
-  inversion cs_linked0. apply callstack_linked_nil. auto.
-  (* cs_others *)
-  intros. 
-  unfold free; simpl; unfold update.
-  case (zeq b0 sp); intro.
-  auto.
-  apply cs_others0. apply notin_callstack_cons; auto.
-  (* cs_nextblock *)
-  simpl. auto.
-  (* cs_dom *)
-  simpl. apply callstack_dom_incr with sp; auto.
-Qed.
-
-(** Finally, [callstack_invariant] holds for the initial memory states
-  in the two semantics. *)
-
-Lemma callstack_init:
-  forall (p: program),
-  callstack_invariant ((empty_frame, nullptr, Int.zero) :: nil) 
-                      (Genv.init_mem p) (Genv.init_mem p).
-Proof.
-  intros.
-  generalize (Genv.initmem_nullptr p). intros [A B].
-  constructor.
-  (* cs_frame *)
-  intros. elim H; intro.
-  injection H0; intros; subst fr; subst sp; subst base.
-  constructor.
-  assumption.
-  unfold low_bound. rewrite B. reflexivity.
-  unfold low_bound, empty_frame. rewrite B. reflexivity.
-  unfold high_bound. rewrite B. reflexivity.
-  simpl. omega.
-  simpl. compute. intuition congruence.
-  reflexivity.
-  rewrite B. unfold empty_frame. simpl. hnf. auto.
-  rewrite B. hnf. auto.
-  elim H0.
-  (* cs_linked *)
-  apply callstack_linked_one.
-  (* cs_others *)
-  auto.
-  (* cs_nextblock *)
-  reflexivity.
-  (* cs_dom *)
-  constructor. exact A. constructor.
-Qed.
-
-(** Preservation of arguments to external functions. *)
-
-Lemma transl_extcall_arguments:
-  forall rs fr sg args stk base cs m ms,
-  Machabstr.extcall_arguments rs fr sg args ->
-  callstack_invariant ((fr, stk, base) :: cs) m ms ->
-  wt_frame fr ->
-  extcall_arguments rs ms (Vptr stk base) sg args.
-Proof.
-  unfold Machabstr.extcall_arguments, extcall_arguments; intros.
-  assert (forall locs vals,
-    Machabstr.extcall_args rs fr locs vals ->
-    extcall_args rs ms (Vptr stk base) locs vals).
-  induction locs; intros; inversion H2; subst; clear H2.
-  constructor.
-  constructor; auto.
-  inversion H7; subst; clear H7. 
-  constructor.
-  constructor. eapply callstack_get_slot; eauto. 
-  eapply wt_get_slot; eauto. 
-  auto.
-Qed.
-
-(** * The proof of simulation *)
-
-Section SIMULATION.
-
-Variable p: program.
-Hypothesis wt_p: wt_program p.
-Let ge := Genv.globalenv p.
-
-(** The proof of simulation relies on diagrams of the following form:
-<<
-     sp, parent, c, rs, fr, mm ----------- sp, c, rs, ms
-                 |                              |
-                 |                              |
-                 v                              v
-   sp, parent, c', rs', fr', mm' --------  sp, c', rs', ms'
->>
-  The left vertical arrow is a transition in the abstract semantics.
-  The right vertical arrow is a transition in the concrete semantics.
-  The precondition (top horizontal line) is the appropriate
-  [callstack_invariant] property between the initial memory states
-  [mm] and [ms] and any call stack with [fr] as top frame and
-  [parent] as second frame.  In addition, well-typedness conditions
-  over the code [c], the register [rs] and the frame [fr] are demanded.
-  The postcondition (bottom horizontal line) is [callstack_invariant]
-  between the final memory states [mm'] and [ms'] and the final
-  call stack.
-*)
-
-Definition exec_instr_prop
-    (f: function) (sp: val) (parent: frame)
-    (c: code) (rs: regset) (fr: frame) (mm: mem) (t: trace)
-    (c': code) (rs': regset) (fr': frame) (mm': mem) : Prop :=
-  forall ms stk base pstk pbase cs
-         (WTF: wt_function f)
-         (INCL: incl c f.(fn_code))
-         (WTRS: wt_regset rs)
-         (WTFR: wt_frame fr)
-         (WTPA: wt_frame parent)
-         (CSI: callstack_invariant ((fr, stk, base) :: (parent, pstk, pbase) :: cs) mm ms)
-         (SP: sp = Vptr stk base),
-  exists ms',
-    exec_instr ge f sp c rs ms t c' rs' ms' /\
-    callstack_invariant ((fr', stk, base) :: (parent, pstk, pbase) :: cs) mm' ms'.
-
-Definition exec_instrs_prop
-    (f: function) (sp: val) (parent: frame)
-    (c: code) (rs: regset) (fr: frame) (mm: mem) (t: trace)
-    (c': code) (rs': regset) (fr': frame) (mm': mem) : Prop :=
-  forall ms stk base pstk pbase cs
-         (WTF: wt_function f)
-         (INCL: incl c f.(fn_code))
-         (WTRS: wt_regset rs)
-         (WTFR: wt_frame fr)
-         (WTPA: wt_frame parent)
-         (CSI: callstack_invariant ((fr, stk, base) :: (parent, pstk, pbase) :: cs) mm ms)
-         (SP: sp = Vptr stk base),
-  exists ms',
-    exec_instrs ge f sp c rs ms t c' rs' ms' /\
-    callstack_invariant ((fr', stk, base) :: (parent, pstk, pbase) :: cs) mm' ms'.
-
-Definition exec_function_body_prop
-    (f: function) (parent: frame) (link ra: val)
-    (rs: regset) (mm: mem) (t: trace)
-    (rs': regset) (mm': mem) : Prop :=
-  forall ms pstk pbase cs
-         (WTF: wt_function f)
-         (WTRS: wt_regset rs)
-         (WTPA: wt_frame parent)
-         (WTRA: Val.has_type ra Tint)
-         (LINK: link = Vptr pstk pbase)
-         (CSI: callstack_invariant ((parent, pstk, pbase) :: cs) mm ms),
-  exists ms',
-    exec_function_body ge f (Vptr pstk pbase) ra rs ms t rs' ms' /\
-    callstack_invariant ((parent, pstk, pbase) :: cs) mm' ms'.
-
-Definition exec_function_prop
-    (f: fundef) (parent: frame)
-    (rs: regset) (mm: mem) (t: trace)
-    (rs': regset) (mm': mem) : Prop :=
-  forall ms pstk pbase cs
-         (WTF: wt_fundef f)
-         (WTRS: wt_regset rs)
-         (WTPA: wt_frame parent)
-         (CSI: callstack_invariant ((parent, pstk, pbase) :: cs) mm ms),
-  exists ms',
-    exec_function ge f (Vptr pstk pbase) rs ms t rs' ms' /\
-    callstack_invariant ((parent, pstk, pbase) :: cs) mm' ms'.
-
-Lemma exec_function_equiv:
-  forall f parent rs mm t rs' mm',
-  Machabstr.exec_function ge f parent rs mm t rs' mm' ->
-  exec_function_prop f parent rs mm t rs' mm'.
-Proof.
-  apply (Machabstr.exec_function_ind4 ge
-           exec_instr_prop
-           exec_instrs_prop
-           exec_function_body_prop
-           exec_function_prop);
-  intros; red; intros.
-
-  (* Mlabel *)
-  exists ms. split. constructor. auto.
-  (* Mgetstack *)
-  exists ms. split. 
-  constructor. rewrite SP. eapply callstack_get_slot; eauto. 
-  apply wt_get_slot with fr (Int.signed ofs); auto.
-  auto.
-  (* Msetstack *)
-  generalize (wt_function_instrs f WTF _ (INCL _ (in_eq _ _))).
-  intro WTI. inversion WTI.
-  assert (4 <= Int.signed ofs). omega. 
-  generalize (callstack_set_slot _ _ _ _ _ _ _ _ _ _ CSI H H5).
-  intros [ms' [STO CSI']].
-  exists ms'. split. constructor. rewrite SP. auto. auto.
-  (* Mgetparam *)
-  exists ms. split. 
-  assert (WTV: Val.has_type v ty). eapply wt_get_slot; eauto.
-  generalize (callstack_get_parent _ _ _ _ _ _ _ _ _ _ _ _
-                CSI H WTV).
-  intros [L1 L2].
-  eapply exec_Mgetparam. rewrite SP; eexact L1. eexact L2.
-  auto.
-  (* Mop *)
-  exists ms. split. constructor. auto. auto.
-  (* Mload *)
-  exists ms. split. econstructor. eauto. eapply callstack_load; eauto.
-  auto.
-  (* Mstore *)
-  generalize (callstack_store _ _ _ _ _ _ _ CSI H0).
-  intros [ms' [STO CSI']].
-  exists ms'. split. econstructor. eauto. auto.
-  auto. 
-  (* Mcall *)
-  red in H1. 
-  assert (WTF': wt_fundef f').
-    destruct ros; simpl in H.
-    apply (Genv.find_funct_prop wt_fundef wt_p H).
-    destruct (Genv.find_symbol ge i); try discriminate.
-    apply (Genv.find_funct_ptr_prop wt_fundef wt_p H).
-  generalize (H1 _ _ _ _ WTF' WTRS WTFR CSI). 
-  intros [ms' [EXF CSI']].
-  exists ms'. split. apply exec_Mcall with f'; auto. 
-  rewrite SP. auto.
-  auto.
-  (* Malloc *)
-  generalize (callstack_alloc _ _ _ _ _ _ _ CSI H0). 
-  intros [ms' [ALLOC' CSI']].
-  exists ms'; split.
-  eapply exec_Malloc; eauto.
-  auto.
-  (* Mgoto *)
-  exists ms. split. constructor; auto. auto.
-  (* Mcond *)
-  exists ms. split. apply exec_Mcond_true; auto. auto.
-  exists ms. split. apply exec_Mcond_false; auto. auto.
-
-  (* refl *)
-  exists ms. split. apply exec_refl. auto.
-  (* one *)
-  generalize (H0 _ _ _ _ _ _ WTF INCL WTRS WTFR WTPA CSI SP).
-  intros [ms' [EX CSI']].
-  exists ms'. split. apply exec_one; auto. auto.
-  (* trans *)
-  generalize (subject_reduction_instrs p wt_p 
-               _ _ _ _ _ _ _ _ _ _ _ _ H WTF INCL WTRS WTFR WTPA).
-  intros [INCL2 [WTRS2 WTFR2]].
-  generalize (H0 _ _ _ _ _ _ WTF INCL WTRS WTFR WTPA CSI SP).
-  intros [ms1 [EX1 CSI1]].
-  generalize (H2 _ _ _ _ _ _ WTF INCL2 WTRS2 WTFR2 WTPA CSI1 SP).
-  intros [ms2 [EX2 CSI2]].
-  exists ms2. split. eapply exec_trans; eauto. auto.
-
-  (* function body *)
-  caseEq (alloc ms (- f.(fn_framesize))
-                   (align_16_top (- f.(fn_framesize)) f.(fn_stacksize))).
-  intros ms1 stk1 ALL.
-  subst link.
-  assert (FS: f.(fn_framesize) >= 0).
-    generalize (wt_function_framesize f WTF). omega.
-  generalize (callstack_function_entry _ _ _ _ _ _ _ _ _ _ _ _
-               CSI H ALL FS
-               (wt_function_no_overflow f WTF) H0).
-  intros [ms2 [STORELINK [CSI2 EQ]]].
-  subst stk1.
-  assert (ZERO: Int.signed (Int.repr 0) = 0). reflexivity.
-  assert (TWELVE: Int.signed (Int.repr 12) = 12). reflexivity.
-  assert (BND: 4 <= Int.signed (Int.repr 12)).
-    rewrite TWELVE; omega.
-  rewrite <- TWELVE in H1.
-  generalize (callstack_set_slot _ _ _ _ _ _ _ _ _ _
-               CSI2 H1 BND).
-  intros [ms3 [STORERA CSI3]].
-  assert (WTFR2: wt_frame fr2).
-    eapply wt_set_slot; eauto. eapply wt_set_slot; eauto.
-    red. intros. simpl. auto. 
-    exact I. 
-  red in H3.
-  generalize (H3 _ _ _ _ _ _ WTF (incl_refl _) WTRS WTFR2 WTPA
-                CSI3 (refl_equal _)).
-  intros [ms4 [EXEC CSI4]].
-  generalize (exec_instrs_link_invariant _ _ _ _ _ _ _ _ _ _ _ _ _
-                H2 WTF (incl_refl _)).
-  intros [INCL LINKINV].
-  exists (free ms4 stk). split.
-  eapply exec_funct_body; eauto.
-  eapply callstack_get_slot. eexact CSI4.
-  apply LINKINV. rewrite ZERO. omega. 
-  eapply slot_gso; eauto. rewrite ZERO. eapply slot_gss; eauto. 
-  exact I. 
-  eapply callstack_get_slot. eexact CSI4.
-  apply LINKINV. rewrite TWELVE. omega. eapply slot_gss; eauto. auto.
-  eapply callstack_function_return; eauto.
-
-  (* internal function *)
-  inversion WTF. subst f0. 
-  generalize (H0 (Vptr pstk pbase) Vzero I I
-                 ms pstk pbase cs H2 WTRS WTPA I (refl_equal _) CSI).
-  intros [ms' [EXEC CSI']].
-  exists ms'. split. constructor. intros.
-  generalize (H0 (Vptr pstk pbase) ra I H1 
-                ms pstk pbase cs H2 WTRS WTPA H1 (refl_equal _) CSI).
-  intros [ms1 [EXEC1 CSI1]].
-  rewrite (callstack_exten _ _ _ _ CSI' CSI1). auto.
-  auto.
-
-  (* external function *)
-  exists ms; split. econstructor; eauto. 
-  eapply transl_extcall_arguments; eauto.
-  auto.
-Qed.  
-
-End SIMULATION.
-
-Theorem exec_program_equiv:
-  forall p t r, 
-  wt_program p ->
-  Machabstr.exec_program p t r -> 
-  Mach.exec_program p t r.
-Proof.
-  intros p t r WTP [fptr [f [rs [mm [FINDPTR [FINDF [EXEC RES]]]]]]].
-  assert (WTF: wt_fundef f).
-    apply (Genv.find_funct_ptr_prop wt_fundef WTP FINDF).
-  assert (WTRS: wt_regset (Regmap.init Vundef)).
-    red; intros. rewrite Regmap.gi; simpl; auto.
-  assert (WTFR: wt_frame empty_frame).
-    red; intros. simpl. auto.
-  generalize (exec_function_equiv p WTP
-                f empty_frame
-                (Regmap.init Vundef) (Genv.init_mem p) t rs mm
-                EXEC (Genv.init_mem p) nullptr Int.zero nil
-                WTF WTRS WTFR (callstack_init p)).
-  intros [ms' [EXEC' CSI]].
-  red. exists fptr; exists f; exists rs; exists ms'. tauto.
-Qed.
diff --git a/backend/Machconcr.v b/backend/Machconcr.v
new file mode 100644
index 00000000..fe9a7d90
--- /dev/null
+++ b/backend/Machconcr.v
@@ -0,0 +1,250 @@
+(** The Mach intermediate language: concrete semantics. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Mem.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Op.
+Require Import Locations.
+Require Conventions.
+Require Import Mach.
+Require PPCgenretaddr.
+
+(** In the concrete semantics for Mach, the three stack-related Mach
+  instructions are interpreted as memory accesses relative to the
+  stack pointer.  More precisely:
+- [Mgetstack ofs ty r] is a memory load at offset [ofs * 4] relative
+  to the stack pointer.
+- [Msetstack r ofs ty] is a memory store at offset [ofs * 4] relative
+  to the stack pointer.
+- [Mgetparam ofs ty r] is a memory load at offset [ofs * 4]
+  relative to the pointer found at offset 0 from the stack pointer.
+  The semantics maintain a linked structure of activation records,
+  with the current record containing a pointer to the record of the
+  caller function at offset 0.
+
+In addition to this linking of activation records, the concrete
+semantics also make provisions for storing a return address
+at offset 12 from the stack pointer.  This stack location will
+be used by the PPC code generated by [PPCgen] to save the return
+address into the caller at the beginning of a function, then restore
+it and jump to it at the end of a function.  The Mach concrete
+semantics does not attach any particular meaning to the pointer
+stored in this reserved location, but makes sure that it is preserved
+during execution of a function.  The [return_address_offset] predicate
+from module [PPCgenretaddr] is used to guess the value of the return
+address that the PPC code generated later will store in the
+reserved location.
+*)
+
+Definition chunk_of_type (ty: typ) :=
+  match ty with Tint => Mint32 | Tfloat => Mfloat64 end.
+
+Definition load_stack (m: mem) (sp: val) (ty: typ) (ofs: int) :=
+  Mem.loadv (chunk_of_type ty) m (Val.add sp (Vint ofs)).
+
+Definition store_stack (m: mem) (sp: val) (ty: typ) (ofs: int) (v: val) :=
+  Mem.storev (chunk_of_type ty) m (Val.add sp (Vint ofs)) v.
+
+(** Extract the values of the arguments of an external call. *)
+
+Inductive extcall_arg: regset -> mem -> val -> loc -> val -> Prop :=
+  | extcall_arg_reg: forall rs m sp r,
+      extcall_arg rs m sp (R r) (rs r)
+  | extcall_arg_stack: forall rs m sp ofs ty v,
+      load_stack m sp ty (Int.repr (4 * ofs)) = Some v ->
+      extcall_arg rs m sp (S (Outgoing ofs ty)) v.
+
+Inductive extcall_args: regset -> mem -> val -> list loc -> list val -> Prop :=
+  | extcall_args_nil: forall rs m sp,
+      extcall_args rs m sp nil nil
+  | extcall_args_cons: forall rs m sp l1 ll v1 vl,
+      extcall_arg rs m sp l1 v1 -> extcall_args rs m sp ll vl ->
+      extcall_args rs m sp (l1 :: ll) (v1 :: vl).
+
+Definition extcall_arguments
+   (rs: regset) (m: mem) (sp: val) (sg: signature) (args: list val) : Prop :=
+  extcall_args rs m sp (Conventions.loc_arguments sg) args.
+
+(** The components of an execution state are:
+
+- [State cs f sp c rs m]: [f] is the block reference corresponding
+  to the function currently executing.  [sp] is the stack pointer.
+  [c] is the list of instructions that remain to be executed.
+  [rs] assigns values to registers.  [m] is the memory state.
+- [Callstate cs f rs m]: [f] is the block reference corresponding
+  to the function being called. [rs] is the current values of registers,
+  and [m] the current memory state.
+- [Returnstate cs rs m]: [rs] is the current values of registers,
+  and [m] the current memory state.
+
+[cs] is a list of stack frames [Stackframe f sp retaddr c],
+where [f] is the block reference for the calling function,
+[c] the code within this function that follows the call instruction,
+[sp] its stack pointer, and [retaddr] the return address predicted
+by [PPCgenretaddr.return_address_offset].
+*)
+
+Inductive stackframe: Set :=
+  | Stackframe:
+      forall (f: block) (sp retaddr: val) (c: code),
+      stackframe.
+
+Inductive state: Set :=
+  | State:
+      forall (stack: list stackframe) (f: block) (sp: val)
+             (c: code) (rs: regset) (m: mem),
+      state
+  | Callstate:
+      forall (stack: list stackframe) (f: block) (rs: regset) (m: mem),
+      state
+  | Returnstate:
+      forall (stack: list stackframe) (rs: regset) (m: mem),
+      state.
+
+Definition parent_sp (s: list stackframe) : val :=
+  match s with
+  | nil => Vptr Mem.nullptr Int.zero
+  | Stackframe f sp ra c :: s' => sp
+  end.
+
+Definition parent_ra (s: list stackframe) : val :=
+  match s with
+  | nil => Vzero
+  | Stackframe f sp ra c :: s' => ra
+  end.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+Inductive step: state -> trace -> state -> Prop :=
+  | exec_Mlabel:
+      forall s f sp lbl c rs m,
+      step (State s f sp (Mlabel lbl :: c) rs m)
+        E0 (State s f sp c rs m)
+  | exec_Mgetstack:
+      forall s f sp ofs ty dst c rs m v,
+      load_stack m sp ty ofs = Some v ->
+      step (State s f sp (Mgetstack ofs ty dst :: c) rs m)
+        E0 (State s f sp c (rs#dst <- v) m)
+  | exec_Msetstack:
+      forall s f sp src ofs ty c rs m m',
+      store_stack m sp ty ofs (rs src) = Some m' ->
+      step (State s f sp (Msetstack src ofs ty :: c) rs m)
+        E0 (State s f sp c rs m')
+  | exec_Mgetparam:
+      forall s f sp parent ofs ty dst c rs m v,
+      load_stack m sp Tint (Int.repr 0) = Some parent ->
+      load_stack m parent ty ofs = Some v ->
+      step (State s f sp (Mgetparam ofs ty dst :: c) rs m)
+        E0 (State s f sp c (rs#dst <- v) m)
+  | exec_Mop:
+      forall s f sp op args res c rs m v,
+      eval_operation ge sp op rs##args m = Some v ->
+      step (State s f sp (Mop op args res :: c) rs m)
+        E0 (State s f sp c (rs#res <- v) m)
+  | exec_Mload:
+      forall s f sp chunk addr args dst c rs m a v,
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = Some v ->
+      step (State s f sp (Mload chunk addr args dst :: c) rs m)
+        E0 (State s f sp c (rs#dst <- v) m)
+  | exec_Mstore:
+      forall s f sp chunk addr args src c rs m m' a,
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.storev chunk m a (rs src) = Some m' ->
+      step (State s f sp (Mstore chunk addr args src :: c) rs m)
+        E0 (State s f sp c rs m')
+  | exec_Mcall:
+      forall s fb sp sig ros c rs m f f' ra,
+      find_function_ptr ge ros rs = Some f' ->
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      PPCgenretaddr.return_address_offset f c ra ->
+      step (State s fb sp (Mcall sig ros :: c) rs m)
+        E0 (Callstate (Stackframe fb sp (Vptr fb ra) c :: s)
+                       f' rs m)
+  | exec_Mtailcall:
+      forall s fb stk soff sig ros c rs m f',
+      find_function_ptr ge ros rs = Some f' ->
+      load_stack m (Vptr stk soff) Tint (Int.repr 0) = Some (parent_sp s) ->
+      load_stack m (Vptr stk soff) Tint (Int.repr 12) = Some (parent_ra s) ->
+      step (State s fb (Vptr stk soff) (Mtailcall sig ros :: c) rs m)
+        E0 (Callstate s f' rs (Mem.free m stk))
+  | exec_Malloc:
+      forall s f sp c rs m sz m' blk,
+      rs (Conventions.loc_alloc_argument) = Vint sz ->
+      Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
+      step (State s f sp (Malloc :: c) rs m)
+        E0 (State s f sp c
+                  (rs#Conventions.loc_alloc_result <- (Vptr blk Int.zero))
+                   m')
+  | exec_Mgoto:
+      forall s fb f sp lbl c rs m c',
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      find_label lbl f.(fn_code) = Some c' ->
+      step (State s fb sp (Mgoto lbl :: c) rs m)
+        E0 (State s fb sp c' rs m)
+  | exec_Mcond_true:
+      forall s fb f sp cond args lbl c rs m c',
+      eval_condition cond rs##args m = Some true ->
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      find_label lbl f.(fn_code) = Some c' ->
+      step (State s fb sp (Mcond cond args lbl :: c) rs m)
+        E0 (State s fb sp c' rs m)
+  | exec_Mcond_false:
+      forall s f sp cond args lbl c rs m,
+      eval_condition cond rs##args m = Some false ->
+      step (State s f sp (Mcond cond args lbl :: c) rs m)
+        E0 (State s f sp c rs m)
+  | exec_Mreturn:
+      forall s f stk soff c rs m,
+      load_stack m (Vptr stk soff) Tint (Int.repr 0) = Some (parent_sp s) ->
+      load_stack m (Vptr stk soff) Tint (Int.repr 12) = Some (parent_ra s) ->
+      step (State s f (Vptr stk soff) (Mreturn :: c) rs m)
+        E0 (Returnstate s rs (Mem.free m stk))
+  | exec_function_internal:
+      forall s fb rs m f m1 m2 m3 stk,
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      Mem.alloc m (- f.(fn_framesize))
+                  (align_16_top (- f.(fn_framesize)) f.(fn_stacksize))
+                                                        = (m1, stk) ->
+      let sp := Vptr stk (Int.repr (-f.(fn_framesize))) in
+      store_stack m1 sp Tint (Int.repr 0) (parent_sp s) = Some m2 ->
+      store_stack m2 sp Tint (Int.repr 12) (parent_ra s) = Some m3 ->
+      step (Callstate s fb rs m)
+        E0 (State s fb sp f.(fn_code) rs m3)
+  | exec_function_external:
+      forall s fb rs m t rs' ef args res,
+      Genv.find_funct_ptr ge fb = Some (External ef) ->
+      event_match ef args t res ->
+      extcall_arguments rs m (parent_sp s) ef.(ef_sig) args ->
+      rs' = (rs#(Conventions.loc_result ef.(ef_sig)) <- res) ->
+      step (Callstate s fb rs m)
+         t (Returnstate s rs' m)
+  | exec_return:
+      forall s f sp ra c rs m,
+      step (Returnstate (Stackframe f sp ra c :: s) rs m)
+        E0 (State s f sp c rs m).
+
+End RELSEM.
+
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall fb,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some fb ->
+      initial_state p (Callstate nil fb (Regmap.init Vundef) m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall rs m r,
+      rs R3 = Vint r ->
+      final_state (Returnstate nil rs m) r.
+
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
diff --git a/backend/Machtyping.v b/backend/Machtyping.v
index ad3ee886..28037cec 100644
--- a/backend/Machtyping.v
+++ b/backend/Machtyping.v
@@ -26,7 +26,7 @@ Inductive wt_instr : instruction -> Prop :=
      wt_instr (Mgetstack ofs ty r)
   | wt_Msetstack:
      forall ofs ty r,
-     mreg_type r = ty -> 24 <= Int.signed ofs ->
+     mreg_type r = ty ->
      wt_instr (Msetstack r ofs ty)
   | wt_Mgetparam:
      forall ofs ty r,
@@ -36,12 +36,9 @@ Inductive wt_instr : instruction -> Prop :=
      forall r1 r,
      mreg_type r1 = mreg_type r ->
      wt_instr (Mop Omove (r1 :: nil) r)
-  | wt_Mopundef:
-     forall r,
-     wt_instr (Mop Oundef nil r)
   | wt_Mop:
      forall op args res,
-      op <> Omove -> op <> Oundef ->
+      op <> Omove ->
       (List.map mreg_type args, mreg_type res) = type_of_operation op ->
       wt_instr (Mop op args res)
   | wt_Mload:
@@ -58,6 +55,11 @@ Inductive wt_instr : instruction -> Prop :=
       forall sig ros,
       match ros with inl r => mreg_type r = Tint | inr s => True end ->
       wt_instr (Mcall sig ros)
+  | wt_Mtailcall:
+      forall sig ros,
+      Conventions.tailcall_possible sig ->
+      match ros with inl r => mreg_type r = Tint | inr s => True end ->
+      wt_instr (Mtailcall sig ros)
   | wt_Malloc:
       wt_instr Malloc
   | wt_Mgoto:
@@ -95,27 +97,20 @@ Definition wt_program (p: program) : Prop :=
 
 Require Import Machabstr.
 
-(** We show a weak type soundness result for the alternate semantics
+(** We show a weak type soundness result for the abstract semantics
     of Mach: for a well-typed Mach program, if a transition is taken
     from a state where registers hold values of their static types,
     registers in the final state hold values of their static types
     as well.  This is a subject reduction theorem for our type system.
     It is used in the proof of implication from the abstract Mach
-    semantics to the concrete Mach semantics (file [Machabstr2mach]).
+    semantics to the concrete Mach semantics (file [Machabstr2concr]).
 *)
 
 Definition wt_regset (rs: regset) : Prop :=
   forall r, Val.has_type (rs r) (mreg_type r).
 
-Definition wt_content (c: content) : Prop :=
-  match c with
-  | Datum32 v => Val.has_type v Tint
-  | Datum64 v => Val.has_type v Tfloat
-  | _ => True
-  end.
-
 Definition wt_frame (fr: frame) : Prop :=
-  forall ofs, wt_content (fr.(contents) ofs).
+  forall ty ofs, Val.has_type (fr.(fr_contents) ty ofs) ty.
 
 Lemma wt_setreg:
   forall (rs: regset) (r: mreg) (v: val),
@@ -134,17 +129,8 @@ Lemma wt_get_slot:
   wt_frame fr ->
   Val.has_type v ty. 
 Proof.
-  induction 1; intro. subst v. 
-  set (pos := low fr + ofs).
-  case ty; simpl.
-  unfold getN. case (check_cont 3 (pos + 1) (contents fr)).
-  generalize (H2 pos). unfold wt_content.
-  destruct (contents fr pos); simpl; tauto.
-  simpl; auto.
-  unfold getN. case (check_cont 7 (pos + 1) (contents fr)).
-  generalize (H2 pos). unfold wt_content.
-  destruct (contents fr pos); simpl; tauto.
-  simpl; auto.
+  induction 1; intros.
+  subst v. apply H2.
 Qed.
 
 Lemma wt_set_slot:
@@ -154,43 +140,37 @@ Lemma wt_set_slot:
   Val.has_type v ty ->
   wt_frame fr'.
 Proof.
-  intros. induction H. 
-  set (i := low fr + ofs).
-  red; intro j; simpl. 
-  assert (forall n ofs c,
-            let c' := set_cont n ofs c in
-            forall k, c' k = c k \/ c' k = Cont).
-    induction n; simpl; intros.
-    left; auto.
-    unfold update. case (zeq k ofs0); intro.
-    right; auto.
-    apply IHn.
-  destruct ty; simpl; unfold store_contents; unfold setN;
-  unfold update; case (zeq j i); intro.
-  red. assumption.
-  elim (H 3%nat (i + 1) (contents fr) j); intro; rewrite H2.
-  apply H0. red; auto. 
-  red. assumption.
-  elim (H 7%nat (i + 1) (contents fr) j); intro; rewrite H2.
-  apply H0. red; auto. 
+  intros. induction H. subst fr'; red; intros; simpl.
+  destruct (zeq (fr_low fr + ofs) ofs0).
+  destruct (typ_eq ty ty0). congruence. exact I.
+  destruct (zle (ofs0 + AST.typesize ty0) (fr_low fr + ofs)).
+  apply H0.
+  destruct (zle (fr_low fr + ofs + AST.typesize ty) ofs0).
+  apply H0.
+  exact I.
+Qed.
+
+Lemma wt_empty_frame:
+  wt_frame empty_frame.
+Proof.
+  intros; red; intros; exact I.
 Qed.
 
 Lemma wt_init_frame:
   forall f,
   wt_frame (init_frame f).
 Proof.
-  intros. unfold init_frame. 
-  red; intros. simpl. exact I.
+  intros; red; intros; exact I.
 Qed.
 
-Lemma incl_find_label:
-  forall lbl c c', find_label lbl c = Some c' -> incl c' c.
+Lemma is_tail_find_label:
+  forall lbl c c', find_label lbl c = Some c' -> is_tail c' c.
 Proof.
   induction c; simpl.
   intros; discriminate.
   case (is_label lbl a); intros.
-  injection H; intro; subst c'; apply incl_tl; apply incl_refl.
-  apply incl_tl; auto.
+  injection H; intro; subst c'. constructor. constructor.
+  constructor; auto.
 Qed.
 
 Lemma wt_event_match:
@@ -203,193 +183,115 @@ Qed.
 
 Section SUBJECT_REDUCTION.
 
+Inductive wt_stackframe: stackframe -> Prop :=
+  | wt_stackframe_intro: forall f sp c fr,
+      wt_function f ->
+      Val.has_type sp Tint ->
+      is_tail c f.(fn_code) ->
+      wt_frame fr ->
+      wt_stackframe (Stackframe f sp c fr).
+
+Inductive wt_state: state -> Prop :=
+  | wt_state_intro: forall stk f sp c rs fr m
+      (STK: forall s, In s stk -> wt_stackframe s)
+      (WTF: wt_function f)
+      (WTSP: Val.has_type sp Tint)
+      (TAIL: is_tail c f.(fn_code))
+      (WTRS: wt_regset rs)
+      (WTFR: wt_frame fr),
+      wt_state (State stk f sp c rs fr m)
+  | wt_state_call: forall stk f rs m,
+      (forall s, In s stk -> wt_stackframe s) ->
+      wt_fundef f ->
+      wt_regset rs ->
+      wt_state (Callstate stk f rs m)
+  | wt_state_return: forall stk rs m,
+      (forall s, In s stk -> wt_stackframe s) ->
+      wt_regset rs ->
+      wt_state (Returnstate stk rs m).
+
 Variable p: program.
 Hypothesis wt_p: wt_program p.
 Let ge := Genv.globalenv p.
 
-Definition exec_instr_prop 
-  (f: function) (sp: val) (parent: frame)
-  (c1: code) (rs1: regset) (fr1: frame) (m1: mem) (t: trace)
-  (c2: code) (rs2: regset) (fr2: frame) (m2: mem) :=
-    forall (WTF: wt_function f)
-           (INCL: incl c1 f.(fn_code))
-           (WTRS: wt_regset rs1)
-           (WTFR: wt_frame fr1)
-           (WTPA: wt_frame parent),
-    incl c2 f.(fn_code) /\ wt_regset rs2 /\ wt_frame fr2.
-Definition exec_function_body_prop
-  (f: function) (parent: frame) (link ra: val)
-  (rs1: regset) (m1: mem) (t: trace) (rs2: regset) (m2: mem) :=
-    forall (WTF: wt_function f)
-           (WTRS: wt_regset rs1)
-           (WTPA: wt_frame parent)
-           (WTLINK: Val.has_type link Tint)
-           (WTRA: Val.has_type ra Tint),
-    wt_regset rs2.
-Definition exec_function_prop
-  (f: fundef) (parent: frame)
-  (rs1: regset) (m1: mem) (t: trace) (rs2: regset) (m2: mem) :=
-    forall (WTF: wt_fundef f)
-           (WTRS: wt_regset rs1)
-           (WTPA: wt_frame parent),
-    wt_regset rs2.
-
 Lemma subject_reduction:
-   (forall f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2,
-      exec_instr ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2 ->
-      exec_instr_prop f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2)
-/\ (forall f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2,
-      exec_instrs ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2 ->
-      exec_instr_prop f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2)
-/\ (forall f parent link ra rs1 m1 t rs2 m2,
-      exec_function_body ge f parent link ra rs1 m1 t rs2 m2 ->
-      exec_function_body_prop f parent link ra rs1 m1 t rs2 m2)
-/\ (forall f parent rs1 m1 t rs2 m2,
-      exec_function ge f parent rs1 m1 t rs2 m2 ->
-      exec_function_prop f parent rs1 m1 t rs2 m2).
+  forall s1 t s2, step ge s1 t s2 ->
+  forall (WTS: wt_state s1), wt_state s2.
 Proof.
-  apply exec_mutual_induction; red; intros; 
-  try (generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))); intro;
-       intuition eauto with coqlib).
+  induction 1; intros; inv WTS;
+  try (generalize (wt_function_instrs _ WTF _ (is_tail_in TAIL)); intro;
+       eapply wt_state_intro; eauto with coqlib).
 
   apply wt_setreg; auto. 
   inversion H0. rewrite H2. apply wt_get_slot with fr (Int.signed ofs); auto.
 
   inversion H0. eapply wt_set_slot; eauto. 
-  rewrite <- H3. apply WTRS.
+  rewrite <- H2. apply WTRS.
 
+  assert (wt_frame (parent_frame s)).
+    destruct s; simpl. apply wt_empty_frame. 
+    generalize (STK s (in_eq _ _)); intro. inv H1. auto. 
   inversion H0. apply wt_setreg; auto.
-  rewrite H2. apply wt_get_slot with parent (Int.signed ofs); auto.
+  rewrite H3. apply wt_get_slot with (parent_frame s) (Int.signed ofs); auto.
 
-  apply wt_setreg; auto. inversion H0.
-    simpl. subst args; subst op. simpl in H. 
+  apply wt_setreg; auto. inv H0.
+    simpl in H. 
     rewrite <- H2. replace v with (rs r1). apply WTRS. congruence.
-    subst args; subst op. simpl in H. 
-    replace v with Vundef. simpl; auto. congruence.
     replace (mreg_type res) with (snd (type_of_operation op)).
-    apply type_of_operation_sound with fundef ge rs##args sp; auto.
-    rewrite <- H6; reflexivity.
+    apply type_of_operation_sound with fundef ge rs##args sp m; auto.
+    rewrite <- H5; reflexivity.
 
   apply wt_setreg; auto. inversion H1. rewrite H7.
   eapply type_of_chunk_correct; eauto.
 
-  apply H1; auto.
-  destruct ros; simpl in H. 
-  apply (Genv.find_funct_prop wt_fundef wt_p H).
-  destruct (Genv.find_symbol ge i). 
-  apply (Genv.find_funct_ptr_prop wt_fundef wt_p H).
-  discriminate.
+  assert (WTFD: wt_fundef f').
+    destruct ros; simpl in H.
+    apply (Genv.find_funct_prop wt_fundef wt_p H).
+    destruct (Genv.find_symbol ge i); try discriminate.
+    apply (Genv.find_funct_ptr_prop wt_fundef wt_p H).
+  econstructor; eauto.
+  intros. elim H0; intro. subst s0. econstructor; eauto with coqlib.
+  auto.
+
+  assert (WTFD: wt_fundef f').
+    destruct ros; simpl in H.
+    apply (Genv.find_funct_prop wt_fundef wt_p H).
+    destruct (Genv.find_symbol ge i); try discriminate.
+    apply (Genv.find_funct_ptr_prop wt_fundef wt_p H).
+  econstructor; eauto.
 
   apply wt_setreg; auto. exact I. 
 
-  apply incl_find_label with lbl; auto. 
-  apply incl_find_label with lbl; auto. 
+  apply is_tail_find_label with lbl; congruence.
+  apply is_tail_find_label with lbl; congruence. 
 
-  tauto.
-  apply H0; auto.
-  generalize (H0 WTF INCL WTRS WTFR WTPA). intros [A [B C]].
-  apply H2; auto.
+  econstructor; eauto.
 
-  assert (WTFR2: wt_frame fr2).
-    eapply wt_set_slot; eauto. 
-    eapply wt_set_slot; eauto.
-    apply wt_init_frame.
-  generalize (H3 WTF (incl_refl _) WTRS WTFR2 WTPA).
-  tauto.
+  econstructor; eauto with coqlib. inv H5; auto. exact I. 
+  apply wt_init_frame.
 
-  apply (H0 Vzero Vzero). exact I. exact I. 
-  inversion WTF; auto. auto. auto. 
-  exact I. exact I.
-
-  rewrite H1. apply wt_setreg; auto. 
+  econstructor; eauto. apply wt_setreg; auto.
   generalize (wt_event_match _ _ _ _ H). 
   unfold proj_sig_res, Conventions.loc_result.
   destruct (sig_res (ef_sig ef)).
-  destruct t; simpl; auto.
+  destruct t0; simpl; auto.
   simpl; auto.
-Qed.
-
-Lemma subject_reduction_instr:
-   forall f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2,
-      exec_instr ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2 ->
-      exec_instr_prop f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2.
-Proof (proj1 subject_reduction).
-
-Lemma subject_reduction_instrs:
-   forall f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2,
-      exec_instrs ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2 ->
-      exec_instr_prop f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2.
-Proof (proj1 (proj2 subject_reduction)).
 
-Lemma subject_reduction_function:
-  forall f parent rs1 m1 t rs2 m2,
-      exec_function ge f parent rs1 m1 t rs2 m2 ->
-      exec_function_prop f parent rs1 m1 t rs2 m2.
-Proof (proj2 (proj2 (proj2 subject_reduction))).
-
-End SUBJECT_REDUCTION.
-
-(** * Preservation of the reserved frame slots during execution *)
-
-(** We now show another result useful for the proof of implication
-    between the two Mach semantics: well-typed functions do not
-    change the values of the back link and return address fields
-    of the frame slot (at offsets 0 and 12) during their execution.
-    Actually, we show that the whole reserved part of the frame
-    (between offsets 0 and 24) does not change. *)
-
-Definition link_invariant (fr1 fr2: frame) : Prop :=
-  forall pos v,
-  0 <= pos < 20 ->
-  get_slot fr1 Tint pos v -> get_slot fr2 Tint pos v.
-
-Remark link_invariant_refl:
-  forall fr, link_invariant fr fr.
-Proof.
-  intros; red; auto.
+  generalize (H1 _ (in_eq _ _)); intro. inv H.
+  econstructor; eauto. 
+  eauto with coqlib.
 Qed.
-
-Lemma set_slot_link_invariant:
-  forall fr ty ofs v fr',
-  set_slot fr ty ofs v fr' ->
-  24 <= ofs ->
-  link_invariant fr fr'.
-Proof.
-  induction 1. intros; red; intros.
-  inversion H1. constructor. auto. exact H3. 
-  simpl contents. simpl low. symmetry. rewrite H4. 
-  apply load_store_contents_other. simpl. simpl in H3.
-  omega.
-Qed.
-
-Lemma exec_instr_link_invariant:
-  forall ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2,
-  exec_instr ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2 ->
-  wt_function f ->
-  incl c1 f.(fn_code) ->
-  incl c2 f.(fn_code) /\ link_invariant fr1 fr2.
-Proof.
-  induction 1; intros; 
-  (split; [eauto with coqlib | try (apply link_invariant_refl)]).
-  eapply set_slot_link_invariant; eauto.
-  generalize (wt_function_instrs _ H0 _ (H1 _ (in_eq _ _))); intro.
-  inversion H2. auto.
-  eapply incl_find_label; eauto.
-  eapply incl_find_label; eauto.
-Qed.
-
-Lemma exec_instrs_link_invariant:
-  forall ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2,
-  exec_instrs ge f sp parent c1 rs1 fr1 m1 t c2 rs2 fr2 m2 ->
-  wt_function f ->
-  incl c1 f.(fn_code) ->
-  incl c2 f.(fn_code) /\ link_invariant fr1 fr2.
+(*
+Lemma subject_reduction_2:
+  forall s1 t s2, step ge s1 t s2 ->
+  forall (WTS: wt_state s1), wt_state s2.
 Proof.
   induction 1; intros.
-  split. auto. apply link_invariant_refl.
-  eapply exec_instr_link_invariant; eauto.
-  generalize (IHexec_instrs1 H2 H3); intros [A B].
-  generalize (IHexec_instrs2 H2 A); intros [C D].
-  split. auto. red; auto.
+  auto.
+  eapply subject_reduction; eauto.
+  auto.
 Qed.
+*)
+
+End SUBJECT_REDUCTION.
 
diff --git a/backend/Op.v b/backend/Op.v
index efd0d9ce..698b433c 100644
--- a/backend/Op.v
+++ b/backend/Op.v
@@ -1,5 +1,5 @@
 (** Operators and addressing modes.  The abstract syntax and dynamic
-  semantics for the Cminor, RTL, LTL and Mach languages depend on the
+  semantics for the CminorSel, RTL, LTL and Mach languages depend on the
   following types, defined in this library:
 - [condition]:  boolean conditions for conditional branches;
 - [operation]: arithmetic and logical operations;
@@ -9,7 +9,7 @@
   processor can compute in one instruction.  In other terms, these
   types reflect the state of the program after instruction selection.
   For a processor-independent set of operations, see the abstract
-  syntax and dynamic semantics of the Csharpminor input language.
+  syntax and dynamic semantics of the Cminor language.
 *)
 
 Require Import Coqlib.
@@ -43,7 +43,6 @@ Inductive operation : Set :=
   | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
   | Oaddrsymbol: ident -> int -> operation (**r [rd] is set to the the address of the symbol plus the offset *)
   | Oaddrstack: int -> operation        (**r [rd] is set to the stack pointer plus the given offset *)
-  | Oundef: operation                   (**r set [rd] to undefined value *)
 (*c Integer arithmetic: *)
   | Ocast8signed: operation             (**r [rd] is 8-bit sign extension of [r1] *)
   | Ocast8unsigned: operation           (**r [rd] is 8-bit zero extension of [r1] *)
@@ -110,32 +109,28 @@ Definition eval_compare_null (c: comparison) (n: int) : option bool :=
   then match c with Ceq => Some false | Cne => Some true | _ => None end
   else None.
 
-Definition eval_condition (cond: condition) (vl: list val) : option bool :=
+Definition eval_condition (cond: condition) (vl: list val) (m: mem):
+               option bool :=
   match cond, vl with
   | Ccomp c, Vint n1 :: Vint n2 :: nil =>
       Some (Int.cmp c n1 n2)
   | Ccomp c, Vptr b1 n1 :: Vptr b2 n2 :: nil =>
-      if eq_block b1 b2 then Some (Int.cmp c n1 n2) else None
+      if valid_pointer m b1 (Int.signed n1)
+      && valid_pointer m b2 (Int.signed n2) then
+        if eq_block b1 b2 then Some (Int.cmp c n1 n2) else None
+      else None
   | Ccomp c, Vptr b1 n1 :: Vint n2 :: nil =>
       eval_compare_null c n2
   | Ccomp c, Vint n1 :: Vptr b2 n2 :: nil =>
       eval_compare_null c n1
   | Ccompu c, Vint n1 :: Vint n2 :: nil =>
       Some (Int.cmpu c n1 n2)
-  | Ccompu c, Vptr b1 n1 :: Vptr b2 n2 :: nil =>
-      if eq_block b1 b2 then Some (Int.cmpu c n1 n2) else None
-  | Ccompu c, Vptr b1 n1 :: Vint n2 :: nil =>
-      eval_compare_null c n2
-  | Ccompu c, Vint n1 :: Vptr b2 n2 :: nil =>
-      eval_compare_null c n1
   | Ccompimm c n, Vint n1 :: nil =>
       Some (Int.cmp c n1 n)
   | Ccompimm c n, Vptr b1 n1 :: nil =>
       eval_compare_null c n
   | Ccompuimm c n, Vint n1 :: nil =>
       Some (Int.cmpu c n1 n)
-  | Ccompuimm c n, Vptr b1 n1 :: nil =>
-      eval_compare_null c n
   | Ccompf c, Vfloat f1 :: Vfloat f2 :: nil =>
       Some (Float.cmp c f1 f2)
   | Cnotcompf c, Vfloat f1 :: Vfloat f2 :: nil =>
@@ -156,7 +151,7 @@ Definition offset_sp (sp: val) (delta: int) : option val :=
 
 Definition eval_operation
     (F: Set) (genv: Genv.t F) (sp: val)
-    (op: operation) (vl: list val) : option val :=
+    (op: operation) (vl: list val) (m: mem): option val :=
   match op, vl with
   | Omove, v1::nil => Some v1
   | Ointconst n, nil => Some (Vint n)
@@ -167,7 +162,6 @@ Definition eval_operation
       | Some b => Some (Vptr b ofs)
       end
   | Oaddrstack ofs, nil => offset_sp sp ofs
-  | Oundef, nil => Some Vundef
   | Ocast8signed, v1 :: nil => Some (Val.cast8signed v1)
   | Ocast8unsigned, v1 :: nil => Some (Val.cast8unsigned v1)
   | Ocast16signed, v1 :: nil => Some (Val.cast16signed v1)
@@ -228,7 +222,7 @@ Definition eval_operation
   | Ofloatofintu, Vint n1 :: nil => 
       Some (Vfloat (Float.floatofintu n1))
   | Ocmp c, _ =>
-      match eval_condition c vl with
+      match eval_condition c vl m with
       | None => None
       | Some false => Some Vfalse
       | Some true => Some Vtrue
@@ -297,26 +291,23 @@ Proof.
 Qed.
 
 Lemma eval_negate_condition:
-  forall (cond: condition) (vl: list val) (b: bool),
-  eval_condition cond vl = Some b ->
-  eval_condition (negate_condition cond) vl = Some (negb b).
+  forall (cond: condition) (vl: list val) (b: bool) (m: mem),
+  eval_condition cond vl m = Some b ->
+  eval_condition (negate_condition cond) vl m = Some (negb b).
 Proof.
   intros. 
   destruct cond; simpl in H; FuncInv; try subst b; simpl.
   rewrite Int.negate_cmp. auto.
   apply eval_negate_compare_null; auto.
   apply eval_negate_compare_null; auto.
+  destruct (valid_pointer m b0 (Int.signed i) &&
+            valid_pointer m b1 (Int.signed i0)).
   destruct (eq_block b0 b1). rewrite Int.negate_cmp. congruence.
-  discriminate.
+  discriminate. discriminate.
   rewrite Int.negate_cmpu. auto.
-  apply eval_negate_compare_null; auto.
-  apply eval_negate_compare_null; auto.
-  destruct (eq_block b0 b1). rewrite Int.negate_cmpu. congruence.
-  discriminate.
   rewrite Int.negate_cmp. auto.
   apply eval_negate_compare_null; auto.
   rewrite Int.negate_cmpu. auto.
-  apply eval_negate_compare_null; auto.
   auto.
   rewrite negb_elim. auto.
   auto.
@@ -337,8 +328,8 @@ Hypothesis agree_on_symbols:
   forall (s: ident), Genv.find_symbol ge2 s = Genv.find_symbol ge1 s.
 
 Lemma eval_operation_preserved:
-  forall sp op vl,
-  eval_operation ge2 sp op vl = eval_operation ge1 sp op vl.
+  forall sp op vl m,
+  eval_operation ge2 sp op vl m = eval_operation ge1 sp op vl m.
 Proof.
   intros.
   unfold eval_operation; destruct op; try rewrite agree_on_symbols;
@@ -356,6 +347,74 @@ Qed.
 
 End GENV_TRANSF.
 
+(** [eval_condition] and [eval_operation] depend on a memory store
+    (to check pointer validity in pointer comparisons).
+    We show that their results are preserved by a change of
+    memory if this change preserves pointer validity.
+    In particular, this holds in case of a memory allocation
+    or a memory store. *)
+
+Lemma eval_condition_change_mem:
+  forall m m' c args b,
+  (forall b ofs, valid_pointer m b ofs = true -> valid_pointer m' b ofs = true) ->
+  eval_condition c args m = Some b -> eval_condition c args m' = Some b.
+Proof.
+  intros until b. intro INV. destruct c; simpl; auto.
+  destruct args; auto. destruct v; auto. destruct args; auto.
+  destruct v; auto. destruct args; auto. 
+  caseEq (valid_pointer m b0 (Int.signed i)); intro.
+  caseEq (valid_pointer m b1 (Int.signed i0)); intro.
+  simpl. rewrite (INV _ _ H). rewrite (INV _ _ H0). auto.
+  simpl; congruence. simpl; congruence.
+Qed.
+
+Lemma eval_operation_change_mem:
+  forall (F: Set) m m' (ge: Genv.t F) sp op args v,
+  (forall b ofs, valid_pointer m b ofs = true -> valid_pointer m' b ofs = true) ->
+  eval_operation ge sp op args m = Some v -> eval_operation ge sp op args m' = Some v.
+Proof.
+  intros until v; intro INV. destruct op; simpl; auto.
+  caseEq (eval_condition c args m); intros.
+  rewrite (eval_condition_change_mem _ _ _ _ INV H). auto.
+  discriminate.
+Qed.
+
+Lemma eval_condition_alloc:
+  forall m lo hi m' b c args v,
+  Mem.alloc m lo hi = (m', b) ->
+  eval_condition c args m = Some v -> eval_condition c args m' = Some v.
+Proof.
+  intros. apply eval_condition_change_mem with m; auto.
+  intros. eapply valid_pointer_alloc; eauto.
+Qed.
+
+Lemma eval_operation_alloc:
+  forall (F: Set) m lo hi m' b (ge: Genv.t F) sp op args v,
+  Mem.alloc m lo hi = (m', b) ->
+  eval_operation ge sp op args m = Some v -> eval_operation ge sp op args m' = Some v.
+Proof.
+  intros. apply eval_operation_change_mem with m; auto.
+  intros. eapply valid_pointer_alloc; eauto.
+Qed.
+
+Lemma eval_condition_store:
+  forall chunk m b ofs v' m' c args v,
+  Mem.store chunk m b ofs v' = Some m' ->
+  eval_condition c args m = Some v -> eval_condition c args m' = Some v.
+Proof.
+  intros. apply eval_condition_change_mem with m; auto.
+  intros. eapply valid_pointer_store; eauto.
+Qed.
+
+Lemma eval_operation_store:
+  forall (F: Set) chunk m b ofs v' m' (ge: Genv.t F) sp op args v,
+  Mem.store chunk m b ofs v' = Some m' ->
+  eval_operation ge sp op args m = Some v -> eval_operation ge sp op args m' = Some v.
+Proof.
+  intros. apply eval_operation_change_mem with m; auto.
+  intros. eapply valid_pointer_store; eauto.
+Qed.
+
 (** Recognition of move operations. *)
 
 Definition is_move_operation
@@ -398,7 +457,6 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Ofloatconst _ => (nil, Tfloat)
   | Oaddrsymbol _ _ => (nil, Tint)
   | Oaddrstack _ => (nil, Tint)
-  | Oundef => (nil, Tint)  (* treated specially *)
   | Ocast8signed => (Tint :: nil, Tint)
   | Ocast8unsigned => (Tint :: nil, Tint)
   | Ocast16signed => (Tint :: nil, Tint)
@@ -471,40 +529,40 @@ Variable A: Set.
 Variable genv: Genv.t A.
 
 Lemma type_of_operation_sound:
-  forall op vl sp v,
-  op <> Omove -> op <> Oundef ->
-  eval_operation genv sp op vl = Some v ->
+  forall op vl sp v m,
+  op <> Omove ->
+  eval_operation genv sp op vl m = Some v ->
   Val.has_type v (snd (type_of_operation op)).
 Proof.
   intros.
-  destruct op; simpl in H1; FuncInv; try subst v; try exact I.
+  destruct op; simpl in H0; FuncInv; try subst v; try exact I.
   congruence.
-  destruct (Genv.find_symbol genv i); simplify_eq H1; intro; subst v; exact I.
-  simpl. unfold offset_sp in H1. destruct sp; try discriminate.
-  inversion H1. exact I.
+  destruct (Genv.find_symbol genv i); simplify_eq H0; intro; subst v; exact I.
+  simpl. unfold offset_sp in H0. destruct sp; try discriminate.
+  inversion H0. exact I.
   destruct v0; exact I. 
   destruct v0; exact I. 
   destruct v0; exact I. 
   destruct v0; exact I. 
-  destruct (eq_block b b0). injection H1; intro; subst v; exact I.
+  destruct (eq_block b b0). injection H0; intro; subst v; exact I.
   discriminate.
   destruct (Int.eq i0 Int.zero). discriminate. 
-  injection H1; intro; subst v; exact I.
+  injection H0; intro; subst v; exact I.
   destruct (Int.eq i0 Int.zero). discriminate. 
-  injection H1; intro; subst v; exact I.
+  injection H0; intro; subst v; exact I.
   destruct (Int.ltu i0 (Int.repr 32)).
-  injection H1; intro; subst v; exact I. discriminate.
+  injection H0; intro; subst v; exact I. discriminate.
   destruct (Int.ltu i0 (Int.repr 32)).
-  injection H1; intro; subst v; exact I. discriminate.
+  injection H0; intro; subst v; exact I. discriminate.
   destruct (Int.ltu i (Int.repr 32)).
-  injection H1; intro; subst v; exact I. discriminate.
+  injection H0; intro; subst v; exact I. discriminate.
   destruct (Int.ltu i (Int.repr 32)).
-  injection H1; intro; subst v; exact I. discriminate.
+  injection H0; intro; subst v; exact I. discriminate.
   destruct (Int.ltu i0 (Int.repr 32)).
-  injection H1; intro; subst v; exact I. discriminate.
+  injection H0; intro; subst v; exact I. discriminate.
   destruct v0; exact I.
   destruct (eval_condition c vl). 
-  destruct b; injection H1; intro; subst v; exact I.
+  destruct b; injection H0; intro; subst v; exact I.
   discriminate.
 Qed.
 
@@ -519,7 +577,7 @@ Proof.
   intros until v. unfold Mem.loadv. 
   destruct addr; intros; try discriminate.
   generalize (Mem.load_inv _ _ _ _ _ H0).
-  intros [X [Y [Z W]]].  subst v.  apply H.
+  intros [X Y].  subst v.  apply H.
 Qed.
 
 End SOUNDNESS.
@@ -559,7 +617,6 @@ Definition eval_operation_total (sp: val) (op: operation) (vl: list val) : val :
   | Ofloatconst n, nil => Vfloat n
   | Oaddrsymbol s ofs, nil => find_symbol_offset s ofs
   | Oaddrstack ofs, nil => Val.add sp (Vint ofs)
-  | Oundef, nil => Vundef
   | Ocast8signed, v1::nil => Val.cast8signed v1
   | Ocast8unsigned, v1::nil => Val.cast8unsigned v1
   | Ocast16signed, v1::nil => Val.cast16signed v1
@@ -625,23 +682,25 @@ Proof.
 Qed.
 
 Lemma eval_condition_weaken:
-  forall c vl b,
-  eval_condition c vl = Some b ->
+  forall c vl m b,
+  eval_condition c vl m = Some b ->
   eval_condition_total c vl = Val.of_bool b.
 Proof.
   intros. 
   unfold eval_condition in H; destruct c; FuncInv;
   try subst b; try reflexivity; simpl;
   try (apply eval_compare_null_weaken; auto).
+  destruct (valid_pointer m b0 (Int.signed i) &&
+            valid_pointer m b1 (Int.signed i0)).
   unfold eq_block in H. destruct (zeq b0 b1); congruence.
-  unfold eq_block in H. destruct (zeq b0 b1); congruence.
+  discriminate.
   symmetry. apply Val.notbool_negb_1. 
   symmetry. apply Val.notbool_negb_1. 
 Qed.
 
 Lemma eval_operation_weaken:
-  forall sp op vl v,
-  eval_operation genv sp op vl = Some v ->
+  forall sp op vl m v,
+  eval_operation genv sp op vl m = Some v ->
   eval_operation_total sp op vl = v.
 Proof.
   intros.
@@ -660,9 +719,9 @@ Proof.
   destruct (Int.ltu i (Int.repr 32)); congruence.
   destruct (Int.ltu i (Int.repr 32)); congruence.
   destruct (Int.ltu i0 (Int.repr 32)); congruence.
-  caseEq (eval_condition c vl); intros; rewrite H0 in H.
+  caseEq (eval_condition c vl m); intros; rewrite H0 in H.
   replace v with (Val.of_bool b).
-  apply eval_condition_weaken; auto.
+  eapply eval_condition_weaken; eauto.
   destruct b; simpl; congruence.
   discriminate.
 Qed.
@@ -702,3 +761,95 @@ Proof.
 Qed.
 
 End EVAL_OP_TOTAL.
+
+(** Compatibility of the evaluation functions with the
+    ``is less defined'' relation over values and memory states. *)
+
+Section EVAL_LESSDEF.
+
+Variable F: Set.
+Variable genv: Genv.t F.
+Variables m1 m2: mem.
+Hypothesis MEM: Mem.lessdef m1 m2.
+
+Ltac InvLessdef :=
+  match goal with
+  | [ H: Val.lessdef (Vint _) _ |- _ ] =>
+      inv H; InvLessdef
+  | [ H: Val.lessdef (Vfloat _) _ |- _ ] =>
+      inv H; InvLessdef
+  | [ H: Val.lessdef (Vptr _ _) _ |- _ ] =>
+      inv H; InvLessdef
+  | [ H: Val.lessdef_list nil _ |- _ ] =>
+      inv H; InvLessdef
+  | [ H: Val.lessdef_list (_ :: _) _ |- _ ] =>
+      inv H; InvLessdef
+  | _ => idtac
+  end.
+
+Lemma eval_condition_lessdef:
+  forall cond vl1 vl2 b,
+  Val.lessdef_list vl1 vl2 ->
+  eval_condition cond vl1 m1 = Some b ->
+  eval_condition cond vl2 m2 = Some b.
+Proof.
+  intros. destruct cond; simpl in *; FuncInv; InvLessdef; auto.
+  generalize H0.
+  caseEq (valid_pointer m1 b0 (Int.signed i)); intro; simpl; try congruence.
+  caseEq (valid_pointer m1 b1 (Int.signed i0)); intro; simpl; try congruence.
+  destruct (eq_block b0 b1); try congruence.
+  intro. inv H2. 
+  rewrite (Mem.valid_pointer_lessdef _ _ _ _ MEM H1).
+  rewrite (Mem.valid_pointer_lessdef _ _ _ _ MEM H).
+  auto.
+Qed.
+
+Ltac TrivialExists :=
+  match goal with
+  | [ |- exists v2, Some ?v1 = Some v2 /\ Val.lessdef ?v1 v2 ] =>
+      exists v1; split; [auto | constructor]
+  | _ => idtac
+  end.
+
+Lemma eval_operation_lessdef:
+  forall sp op vl1 vl2 v1,
+  Val.lessdef_list vl1 vl2 ->
+  eval_operation genv sp op vl1 m1 = Some v1 ->
+  exists v2, eval_operation genv sp op vl2 m2 = Some v2 /\ Val.lessdef v1 v2.
+Proof.
+  intros. destruct op; simpl in *; FuncInv; InvLessdef; TrivialExists.
+  exists v2; auto.
+  destruct (Genv.find_symbol genv i); inv H0. TrivialExists.
+  exists v1; auto.
+  exists (Val.cast8signed v2); split. auto. apply Val.cast8signed_lessdef; auto.
+  exists (Val.cast8unsigned v2); split. auto. apply Val.cast8unsigned_lessdef; auto.
+  exists (Val.cast16signed v2); split. auto. apply Val.cast16signed_lessdef; auto.
+  exists (Val.cast16unsigned v2); split. auto. apply Val.cast16unsigned_lessdef; auto.
+  destruct (eq_block b b0); inv H0. TrivialExists.
+  destruct (Int.eq i0 Int.zero); inv H0; TrivialExists.
+  destruct (Int.eq i0 Int.zero); inv H0; TrivialExists.
+  destruct (Int.ltu i0 (Int.repr 32)); inv H0; TrivialExists.
+  destruct (Int.ltu i0 (Int.repr 32)); inv H0; TrivialExists.
+  destruct (Int.ltu i (Int.repr 32)); inv H0; TrivialExists.
+  destruct (Int.ltu i (Int.repr 32)); inv H0; TrivialExists.
+  destruct (Int.ltu i0 (Int.repr 32)); inv H0; TrivialExists.
+  exists (Val.singleoffloat v2); split. auto. apply Val.singleoffloat_lessdef; auto.
+  caseEq (eval_condition c vl1 m1); intros. rewrite H1 in H0. 
+  rewrite (eval_condition_lessdef c H H1).
+  destruct b; inv H0; TrivialExists.
+  rewrite H1 in H0. discriminate.
+Qed.
+
+Lemma eval_addressing_lessdef:
+  forall sp addr vl1 vl2 v1,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = Some v1 ->
+  exists v2, eval_addressing genv sp addr vl2 = Some v2 /\ Val.lessdef v1 v2.
+Proof.
+  intros. destruct addr; simpl in *; FuncInv; InvLessdef; TrivialExists.
+  destruct (Genv.find_symbol genv i); inv H0. TrivialExists.
+  destruct (Genv.find_symbol genv i); inv H0. TrivialExists.
+  exists v1; auto.
+Qed.
+
+End EVAL_LESSDEF.
diff --git a/backend/PPC.v b/backend/PPC.v
index ba645d02..66d96c29 100644
--- a/backend/PPC.v
+++ b/backend/PPC.v
@@ -9,6 +9,7 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 
 (** * Abstract syntax *)
 
@@ -97,7 +98,8 @@ Inductive instruction : Set :=
   | Pbctrl: instruction                                       (**r branch to contents of CTR and link *)
   | Pbf: crbit -> label -> instruction                        (**r branch if false *)
   | Pbl: ident -> instruction                                 (**r branch and link *)
-  | Pblr: instruction                                         (**r branch to contents: register LR *)
+  | Pbs: ident -> instruction                                 (**r branch to symbol *)
+  | Pblr: instruction                                         (**r branch to contents of register LR *)
   | Pbt: crbit -> label -> instruction                        (**r branch if true *)
   | Pcmplw: ireg -> ireg -> instruction                       (**r unsigned integer comparison *)
   | Pcmplwi: ireg -> constant -> instruction                  (**r same, with immediate argument *)
@@ -170,14 +172,11 @@ Inductive instruction : Set :=
   | Pxor: ireg -> ireg -> ireg -> instruction                 (**r bitwise xor *)
   | Pxori: ireg -> ireg -> constant -> instruction            (**r bitwise xor with immediate *)
   | Pxoris: ireg -> ireg -> constant -> instruction           (**r bitwise xor with immediate high *)
-  | Piundef: ireg -> instruction                              (**r set int reg to [Vundef] *)
-  | Pfundef: freg -> instruction                              (**r set float reg to [Vundef] *)
   | Plabel: label -> instruction.                             (**r define a code label *)
 
 (** The pseudo-instructions are the following:
 
 - [Plabel]: define a code label at the current program point
-
 - [Plfi]: load a floating-point constant in a float register.
   Expands to a float load [lfd] from an address in the constant data section
   initialized with the floating-point constant:
@@ -190,7 +189,6 @@ lbl:    .double floatcst
 >>
   Initialized data in the constant data section are not modeled here,
   which is why we use a pseudo-instruction for this purpose.
-
 - [Pfcti]: convert a float to an integer.  This requires a transfer
   via memory of a 32-bit integer from a float register to an int register,
   which our memory model cannot express.  Expands to:
@@ -200,7 +198,6 @@ lbl:    .double floatcst
         lwz     rdst, 4(r1)
         addi    r1, r1, 8
 >>
-
 - [Pictf]: convert a signed integer to a float.  This requires complicated
   bit-level manipulations of IEEE floats through mixed float and integer 
   arithmetic over a memory word, which our memory model and axiomatization
@@ -221,7 +218,6 @@ lbl:    .long   0x43300000, 0x80000000
 >>
   (Don't worry if you do not understand this instruction sequence: intimate
   knowledge of IEEE float arithmetic is necessary.)
-
 - [Piuctf]: convert an unsigned integer to a float.  The expansion is close
   to that [Pictf], and equally obscure.
 <<
@@ -237,7 +233,6 @@ lbl:    .long   0x43300000, 0x80000000
 lbl:    .long   0x43300000, 0x00000000
         .text
 >>
-
 - [Pallocframe lo hi]: in the formal semantics, this pseudo-instruction
   allocates a memory block with bounds [lo] and [hi], stores the value
   of register [r1] (the stack pointer, by convention) at the bottom
@@ -250,7 +245,6 @@ lbl:    .long   0x43300000, 0x00000000
   This cannot be expressed in our memory model, which does not reflect
   the fact that stack frames are adjacent and allocated/freed
   following a stack discipline.
-
 - [Pfreeframe]: in the formal semantics, this pseudo-instruction
   reads the bottom word of the block pointed by [r1] (the stack pointer),
   frees this block, and sets [r1] to the value of the bottom word.
@@ -261,27 +255,11 @@ lbl:    .long   0x43300000, 0x00000000
 >>
   Again, our memory model cannot comprehend that this operation
   frees (logically) the current stack frame.
-
 - [Pallocheap]: in the formal semantics, this pseudo-instruction
   allocates a heap block of size the contents of [GPR3], and leaves
   a pointer to this block in [GPR3].  In the generated assembly code,
   it is turned into a call to the allocation function of the run-time
   system.
-
-- [Piundef] and [Pfundef]: set an integer or float register (respectively)
-  to the [Vundef] value.  Expand to nothing, as the PowerPC processor has
-  no notion of ``undefined value''.  These two pseudo-instructions are used
-  to ensure that the generated PowerPC code computes exactly the same values
-  as predicted by the semantics of Cminor, which explicitly set uninitialized
-  local variables to the [Vundef] value.  A general property of our semantics,
-  not yet formally proved, is that they are monotone with respect to the
-  partial ordering [Vundef <= v] over values.  Consequently, if a program
-  evaluates to a non-[Vundef] result [r] from an initial state that contains
-  [Vundef] values, it will also evaluate to [r] if arbitrary values are put
-  in the initial state instead of the [Vundef] values.  This justifies
-  treating [Piundef] and [Pfundef] as no-operations, leaving in the target
-  register whatever value was already there instead of setting it to [Vundef].
-  The formal proof of this result remains to be done, however.
 *)
 
 Definition code := list instruction.
@@ -588,6 +566,8 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       end
   | Pbl ident =>
       OK (rs#LR <- (Val.add rs#PC Vone) #PC <- (symbol_offset ident Int.zero)) m
+  | Pbs ident =>
+      OK (rs#PC <- (symbol_offset ident Int.zero)) m
   | Pblr =>
       OK (rs#PC <- (rs#LR)) m
   | Pbt bit lbl =>
@@ -744,10 +724,6 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       OK (nextinstr (rs#rd <- (Val.xor rs#r1 (const_low cst)))) m
   | Pxoris rd r1 cst =>
       OK (nextinstr (rs#rd <- (Val.xor rs#r1 (const_high cst)))) m
-  | Piundef rd =>
-      OK (nextinstr (rs#rd <- Vundef)) m
-  | Pfundef rd =>
-      OK (nextinstr (rs#rd <- Vundef)) m
   | Plabel lbl =>
       OK (nextinstr rs) m
   end.
@@ -762,7 +738,7 @@ Inductive extcall_args (rs: regset) (m: mem):
       extcall_args rs m nil irl frl ofs nil
   | extcall_args_int_reg: forall tyl ir1 irl frl ofs v1 vl,
       v1 = rs (IR ir1) ->
-      extcall_args rs m tyl irl frl (ofs + 4) vl ->
+      extcall_args rs m tyl irl frl ofs vl ->
       extcall_args rs m (Tint :: tyl) (ir1 :: irl) frl ofs (v1 :: vl)
   | extcall_args_int_stack: forall tyl frl ofs v1 vl,
       Mem.loadv Mint32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr ofs))) = Some v1 ->
@@ -770,11 +746,11 @@ Inductive extcall_args (rs: regset) (m: mem):
       extcall_args rs m (Tint :: tyl) nil frl ofs (v1 :: vl)
   | extcall_args_float_reg: forall tyl irl fr1 frl ofs v1 vl,
       v1 = rs (FR fr1) ->
-      extcall_args rs m tyl (list_drop2 irl) frl (ofs + 8) vl ->
+      extcall_args rs m tyl (list_drop2 irl) frl ofs vl ->
       extcall_args rs m (Tfloat :: tyl) irl (fr1 :: frl) ofs (v1 :: vl)
   | extcall_args_float_stack: forall tyl irl ofs v1 vl,
       Mem.loadv Mfloat64 m (Val.add (rs (IR GPR1)) (Vint (Int.repr ofs))) = Some v1 ->
-      extcall_args rs m tyl (list_drop2 irl) nil (ofs + 8) vl ->
+      extcall_args rs m tyl irl nil (ofs + 8) vl ->
       extcall_args rs m (Tfloat :: tyl) irl nil ofs (v1 :: vl).
 
 Definition extcall_arguments
@@ -783,7 +759,7 @@ Definition extcall_arguments
     sg.(sig_args)
     (GPR3 :: GPR4 :: GPR5 :: GPR6 :: GPR7 :: GPR8 :: GPR9 :: GPR10 :: nil)
     (FPR1 :: FPR2 :: FPR3 :: FPR4 :: FPR5 :: FPR6 :: FPR7 :: FPR8 :: FPR9 :: FPR10 :: nil)
-    24 args.
+    56 args.
 
 Definition loc_external_result (s: signature) : preg :=
   match s.(sig_res) with
@@ -794,14 +770,17 @@ Definition loc_external_result (s: signature) : preg :=
 
 (** Execution of the instruction at [rs#PC]. *)
 
-Inductive exec_step: regset -> mem -> trace -> regset -> mem -> Prop :=
+Inductive state: Set :=
+  | State: regset -> mem -> state.
+
+Inductive step: state -> trace -> state -> Prop :=
   | exec_step_internal:
       forall b ofs c i rs m rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some i ->
       exec_instr c i rs m = OK rs' m' ->
-      exec_step rs m E0 rs' m'
+      step (State rs m) E0 (State rs' m')
   | exec_step_external:
       forall b ef args res rs m t rs',
       rs PC = Vptr b Int.zero ->
@@ -810,33 +789,26 @@ Inductive exec_step: regset -> mem -> trace -> regset -> mem -> Prop :=
       extcall_arguments rs m ef.(ef_sig) args ->
       rs' = (rs#(loc_external_result ef.(ef_sig)) <- res
                #PC <- (rs LR)) ->
-      exec_step rs m t rs' m.
-
-(** Execution of zero, one or several instructions starting at [rs#PC]. *)
-
-Inductive exec_steps: regset -> mem -> trace -> regset -> mem -> Prop :=
-  | exec_refl:
-      forall rs m,
-      exec_steps rs m E0 rs m
-  | exec_one:
-      forall rs m t rs' m',
-      exec_step rs m t rs' m' ->
-      exec_steps rs m t rs' m'
-  | exec_trans:
-      forall rs1 m1 t1 rs2 m2 t2 rs3 m3 t3,
-      exec_steps rs1 m1 t1 rs2 m2 ->
-      exec_steps rs2 m2 t2 rs3 m3 ->
-      t3 = t1 ** t2 ->
-      exec_steps rs1 m1 t3 rs3 m3.
+      step (State rs m) t (State rs' m).
 
 End RELSEM.
 
-Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
-  let ge := Genv.globalenv p in
-  let m0 := Genv.init_mem p in
-  let rs0 :=
-    (Pregmap.init Vundef) # PC <- (symbol_offset ge p.(prog_main) Int.zero)
-                          # LR <- Vzero
-                          # GPR1 <- (Vptr Mem.nullptr Int.zero) in
-  exists rs, exists m,
-  exec_steps ge rs0 m0 t rs m /\ rs#PC = Vzero /\ rs#GPR3 = r.
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro:
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      let rs0 :=
+        (Pregmap.init Vundef)
+        # PC <- (symbol_offset ge p.(prog_main) Int.zero)
+        # LR <- Vzero
+        # GPR1 <- (Vptr Mem.nullptr Int.zero) in
+      initial_state p (State rs0 m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall rs m r,
+      rs#PC = Vzero ->
+      rs#GPR3 = Vint r ->
+      final_state (State rs m) r.
+      
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
diff --git a/backend/PPCgen.v b/backend/PPCgen.v
index ba8ea285..d7a83b0b 100644
--- a/backend/PPCgen.v
+++ b/backend/PPCgen.v
@@ -2,6 +2,7 @@
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Floats.
@@ -268,11 +269,6 @@ Definition transl_op
       Pori (ireg_of r) (ireg_of r) (Csymbol_low_unsigned s ofs) :: k
   | Oaddrstack n, nil =>
       addimm (ireg_of r) GPR1 n k
-  | Oundef, nil =>
-      match mreg_type r with
-      | Tint => Piundef (ireg_of r) :: k
-      | Tfloat => Pfundef (freg_of r) :: k
-      end
   | Ocast8signed, a1 :: nil =>
       Pextsb (ireg_of r) (ireg_of a1) :: k
   | Ocast8unsigned, a1 :: nil =>
@@ -474,6 +470,17 @@ Definition transl_instr (i: Mach.instruction) (k: code) :=
       Pmtctr (ireg_of r) :: Pbctrl :: k
   | Mcall sig (inr symb) =>
       Pbl symb :: k
+  | Mtailcall sig (inl r) =>
+      Pmtctr (ireg_of r) :: 
+      Plwz GPR2 (Cint (Int.repr 12)) GPR1 ::
+      Pmtlr GPR2 ::
+      Pfreeframe :: 
+      Pbctr :: k
+  | Mtailcall sig (inr symb) =>
+      Plwz GPR2 (Cint (Int.repr 12)) GPR1 ::
+      Pmtlr GPR2 ::
+      Pfreeframe :: 
+      Pbs symb :: k
   | Malloc =>
       Pallocblock :: k
   | Mlabel lbl =>
@@ -510,14 +517,17 @@ Fixpoint code_size (c: code) : Z :=
   | instr :: c' => code_size c' + 1
   end.
 
-Definition transf_function (f: Mach.function) : option PPC.code :=
+Open Local Scope string_scope.
+
+Definition transf_function (f: Mach.function) : res PPC.code :=
   let c := transl_function f in
   if zlt Int.max_unsigned (code_size c)
-  then None
-  else Some c.
+  then Errors.Error (msg "code size exceeded")
+  else Errors.OK c.
 
-Definition transf_fundef (f: Mach.fundef) : option PPC.fundef :=
+Definition transf_fundef (f: Mach.fundef) : res PPC.fundef :=
   transf_partial_fundef transf_function f.
 
-Definition transf_program (p: Mach.program) : option PPC.program :=
+Definition transf_program (p: Mach.program) : res PPC.program :=
   transform_partial_program transf_fundef p.
+
diff --git a/backend/PPCgenproof.v b/backend/PPCgenproof.v
index f1ee9f22..8d6d9342 100644
--- a/backend/PPCgenproof.v
+++ b/backend/PPCgenproof.v
@@ -2,6 +2,7 @@
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Floats.
@@ -9,19 +10,22 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
 Require Import Mach.
+Require Import Machconcr.
 Require Import Machtyping.
 Require Import PPC.
 Require Import PPCgen.
+Require Import PPCgenretaddr.
 Require Import PPCgenproof1.
 
 Section PRESERVATION.
 
 Variable prog: Mach.program.
 Variable tprog: PPC.program.
-Hypothesis TRANSF: transf_program prog = Some tprog.
+Hypothesis TRANSF: transf_program prog = Errors.OK tprog.
 
 Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
@@ -35,16 +39,11 @@ Proof.
 Qed.
 
 Lemma functions_translated:
-  forall f b,
+  forall b f,
   Genv.find_funct_ptr ge b = Some f ->
-  exists tf, Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = Some tf.
-Proof.
-  intros. 
-  generalize (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF H).
-  case (transf_fundef f). 
-  intros f' [A B]. exists f'; split. assumption. auto.
-  tauto.
-Qed.
+  exists tf, Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = Errors.OK tf.
+Proof
+  (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF).
 
 Lemma functions_transl:
   forall f b,
@@ -54,8 +53,8 @@ Proof.
   intros. 
   destruct (functions_translated _ _ H) as [tf [A B]].
   rewrite A. generalize B. unfold transf_fundef, transf_partial_fundef, transf_function.
-  case (zlt Int.max_unsigned (code_size (transl_function f))); intro.
-  congruence. auto.
+  case (zlt Int.max_unsigned (code_size (transl_function f))); simpl; intro.
+  congruence. intro. inv B0. auto.
 Qed.
 
 Lemma functions_transl_no_overflow:
@@ -66,7 +65,7 @@ Proof.
   intros. 
   destruct (functions_translated _ _ H) as [tf [A B]].
   generalize B. unfold transf_fundef, transf_partial_fundef, transf_function.
-  case (zlt Int.max_unsigned (code_size (transl_function f))); intro.
+  case (zlt Int.max_unsigned (code_size (transl_function f))); simpl; intro.
   congruence. intro; omega.
 Qed.
 
@@ -81,23 +80,17 @@ Proof.
   left; congruence. right; eauto.
 Qed.
 
-(** The ``code tail'' of an instruction list [c] is the list of instructions
-  starting at PC [pos]. *)
-
-Fixpoint code_tail (pos: Z) (c: code) {struct c} : code :=
-  match c with
-  | nil => nil
-  | i :: il => if zeq pos 0 then c else code_tail (pos - 1) il
-  end.
-
 Lemma find_instr_tail:
-  forall c pos,
-  find_instr pos c =
-    match code_tail pos c with nil => None | i1 :: il => Some i1 end.
+  forall c1 i c2 pos,
+  code_tail pos c1 (i :: c2) ->
+  find_instr pos c1 = Some i.
 Proof.
-  induction c; simpl; intros.
-  auto.
-  case (zeq pos 0); auto.
+  induction c1; simpl; intros.
+  inv H.
+  destruct (zeq pos 0). subst pos.
+  inv H. auto. generalize (code_tail_pos _ _ _ H4). intro. omegaContradiction.
+  inv H. congruence. replace (pos0 + 1 - 1) with pos0 by omega.
+  eauto.
 Qed.
 
 Remark code_size_pos:
@@ -108,60 +101,39 @@ Qed.
 
 Remark code_tail_bounds:
   forall fn ofs i c,
-  code_tail ofs fn = i :: c -> 0 <= ofs < code_size fn.
-Proof.
-  induction fn; simpl.
-  intros; discriminate.
-  intros until c. case (zeq ofs 0); intros.
-  generalize (code_size_pos fn).  omega.
-  generalize (IHfn _ _ _ H). omega.
-Qed.
-
-Remark code_tail_unfold:
-  forall ofs i c,
-  ofs >= 0 ->
-  code_tail (ofs + 1) (i :: c) = code_tail ofs c.
-Proof.
-  intros. simpl. case (zeq (ofs + 1) 0); intros.
-  omegaContradiction.
-  replace (ofs + 1 - 1) with ofs. auto. omega.
-Qed.
-
-Remark code_tail_zero:
-  forall fn, code_tail 0 fn = fn.
+  code_tail ofs fn (i :: c) -> 0 <= ofs < code_size fn.
 Proof.
-  intros. destruct fn; simpl. auto. auto.
+  assert (forall ofs fn c, code_tail ofs fn c ->
+          forall i c', c = i :: c' -> 0 <= ofs < code_size fn).
+  induction 1; intros; simpl. 
+  rewrite H. simpl. generalize (code_size_pos c'). omega.
+  generalize (IHcode_tail _ _ H0). omega.
+  eauto.
 Qed.
 
 Lemma code_tail_next:
   forall fn ofs i c,
-  code_tail ofs fn = i :: c ->
-  code_tail (ofs + 1) fn = c.
+  code_tail ofs fn (i :: c) ->
+  code_tail (ofs + 1) fn c.
 Proof.
-  induction fn.
-  simpl; intros; discriminate.
-  intros until c. case (zeq ofs 0); intro.
-  subst ofs. intros. rewrite code_tail_zero in H. injection H.
-  intros. subst c. rewrite code_tail_unfold. apply code_tail_zero.
-  omega.
-  intro; generalize (code_tail_bounds _ _ _ _ H); intros [A B].
-  assert (ofs = (ofs - 1) + 1). omega.
-  rewrite H0 in H. rewrite code_tail_unfold in H.
-  rewrite code_tail_unfold. rewrite H0. eauto.
-  omega. omega. 
+  assert (forall ofs fn c, code_tail ofs fn c ->
+          forall i c', c = i :: c' -> code_tail (ofs + 1) fn c').
+  induction 1; intros.
+  subst c. constructor. constructor.
+  constructor. eauto.
+  eauto.
 Qed.
 
 Lemma code_tail_next_int:
   forall fn ofs i c,
   code_size fn <= Int.max_unsigned ->
-  code_tail (Int.unsigned ofs) fn = i :: c ->
-  code_tail (Int.unsigned (Int.add ofs Int.one)) fn = c.
+  code_tail (Int.unsigned ofs) fn (i :: c) ->
+  code_tail (Int.unsigned (Int.add ofs Int.one)) fn c.
 Proof.
-  intros. rewrite Int.add_unsigned. unfold Int.one. 
-  repeat rewrite Int.unsigned_repr. apply code_tail_next with i; auto.
-  compute; intuition congruence.
+  intros. rewrite Int.add_unsigned.
+  change (Int.unsigned Int.one) with 1.
+  rewrite Int.unsigned_repr. apply code_tail_next with i; auto.
   generalize (code_tail_bounds _ _ _ _ H0). omega. 
-  compute; intuition congruence.
 Qed.
 
 (** [transl_code_at_pc pc fn c] holds if the code pointer [pc] points
@@ -169,12 +141,12 @@ Qed.
   and [c] is the tail of the generated code at the position corresponding
   to the code pointer [pc]. *)
 
-Inductive transl_code_at_pc: val -> Mach.function -> Mach.code -> Prop :=
+Inductive transl_code_at_pc: val -> block -> Mach.function -> Mach.code -> Prop :=
   transl_code_at_pc_intro:
     forall b ofs f c,
     Genv.find_funct_ptr ge b = Some (Internal f) ->
-    code_tail (Int.unsigned ofs) (transl_function f) = transl_code c ->
-    transl_code_at_pc (Vptr b ofs) f c.
+    code_tail (Int.unsigned ofs) (transl_function f) (transl_code c) ->
+    transl_code_at_pc (Vptr b ofs) b f c.
 
 (** The following lemmas show that straight-line executions
   (predicate [exec_straight]) correspond to correct PPC executions
@@ -187,14 +159,16 @@ Lemma exec_straight_steps_1:
   forall b ofs,
   rs#PC = Vptr b ofs ->
   Genv.find_funct_ptr tge b = Some (Internal fn) ->
-  code_tail (Int.unsigned ofs) fn = c ->
-  exec_steps tge rs m E0 rs' m'.
+  code_tail (Int.unsigned ofs) fn c ->
+  plus step tge (State rs m) E0 (State rs' m').
 Proof.
-  induction 1.
-  intros. apply exec_refl. 
-  intros. apply exec_trans with E0 rs2 m2 E0.
-  apply exec_one; econstructor; eauto. 
-  rewrite find_instr_tail. rewrite H5. auto.
+  induction 1; intros.
+  apply plus_one.
+  econstructor; eauto. 
+  eapply find_instr_tail. eauto.
+  eapply plus_left'.
+  econstructor; eauto. 
+  eapply find_instr_tail. eauto.
   apply IHexec_straight with b (Int.add ofs Int.one). 
   auto. rewrite H0. rewrite H3. reflexivity.
   auto. 
@@ -209,35 +183,79 @@ Lemma exec_straight_steps_2:
   forall b ofs,
   rs#PC = Vptr b ofs ->
   Genv.find_funct_ptr tge b = Some (Internal fn) ->
-  code_tail (Int.unsigned ofs) fn = c ->
+  code_tail (Int.unsigned ofs) fn c ->
   exists ofs',
      rs'#PC = Vptr b ofs'
-  /\ code_tail (Int.unsigned ofs') fn = c'.
+  /\ code_tail (Int.unsigned ofs') fn c'.
 Proof.
   induction 1; intros.
-  exists ofs. split. auto. auto.
+  exists (Int.add ofs Int.one). split.
+  rewrite H0. rewrite H2. auto.
+  apply code_tail_next_int with i1; auto.
   apply IHexec_straight with (Int.add ofs Int.one).
   auto. rewrite H0. rewrite H3. reflexivity. auto. 
   apply code_tail_next_int with i; auto.
 Qed.
 
-Lemma exec_straight_steps:
-  forall f c c' rs m rs' m',
-  transl_code_at_pc (rs PC) f c ->
+Lemma exec_straight_exec:
+  forall fb f c c' rs m rs' m',
+  transl_code_at_pc (rs PC) fb f c ->
+  exec_straight tge (transl_function f)
+                (transl_code c) rs m c' rs' m' ->
+  plus step tge (State rs m) E0 (State rs' m').
+Proof.
+  intros. inversion H. subst.
+  eapply exec_straight_steps_1; eauto.
+  eapply functions_transl_no_overflow; eauto. 
+  eapply functions_transl; eauto.
+Qed.
+
+Lemma exec_straight_at:
+  forall fb f c c' rs m rs' m',
+  transl_code_at_pc (rs PC) fb f c ->
   exec_straight tge (transl_function f)
                 (transl_code c) rs m (transl_code c') rs' m' ->
-  exec_steps tge rs m E0 rs' m' /\ transl_code_at_pc (rs' PC) f c'.
+  transl_code_at_pc (rs' PC) fb f c'.
 Proof.
-  intros. inversion H.
+  intros. inversion H. subst. 
   generalize (functions_transl_no_overflow _ _ H2). intro.
   generalize (functions_transl _ _ H2). intro.
-  split. eapply exec_straight_steps_1; eauto. 
   generalize (exec_straight_steps_2 _ _ _ _ _ _ _
-                H0 H6 _ _ (sym_equal H1) H7 H3).
+                H0 H4 _ _ (sym_equal H1) H5 H3).
   intros [ofs' [PC' CT']].
   rewrite PC'. constructor; auto.
 Qed.
 
+(** Correctness of the return addresses predicted by
+    [PPCgen.return_address_offset]. *)
+
+Remark code_tail_no_bigger:
+  forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat.
+Proof.
+  induction 1; simpl; omega.
+Qed.
+
+Remark code_tail_unique:
+  forall fn c pos pos',
+  code_tail pos fn c -> code_tail pos' fn c -> pos = pos'.
+Proof.
+  induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto.
+  generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega.
+  generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega.
+  f_equal. eauto.
+Qed.
+
+Lemma return_address_offset_correct:
+  forall b ofs fb f c ofs',
+  transl_code_at_pc (Vptr b ofs) fb f c ->
+  return_address_offset f c ofs' ->
+  ofs' = ofs.
+Proof.
+  intros. inv H0. inv H. 
+  generalize (code_tail_unique _ _ _ _ H1 H7). intro. rewrite H. 
+  apply Int.repr_unsigned.
+Qed.
+
 (** The [find_label] function returns the code tail starting at the
   given label.  A connection with [code_tail] is then established. *)
 
@@ -253,7 +271,7 @@ Lemma label_pos_code_tail:
   find_label lbl c = Some c' ->
   exists pos',
   label_pos lbl pos c = Some pos' 
-  /\ code_tail (pos' - pos) c = c'
+  /\ code_tail (pos' - pos) c c'
   /\ pos < pos' <= pos + code_size c.
 Proof.
   induction c. 
@@ -262,13 +280,13 @@ Proof.
   case (is_label lbl a).
   intro EQ; injection EQ; intro; subst c'.
   exists (pos + 1). split. auto. split.
-  rewrite zeq_false. replace (pos + 1 - pos - 1) with 0.
-  apply code_tail_zero. omega. omega.
+  replace (pos + 1 - pos) with (0 + 1) by omega. constructor. constructor. 
   generalize (code_size_pos c). omega. 
   intros. generalize (IHc (pos + 1) c' H). intros [pos' [A [B C]]].
   exists pos'. split. auto. split.
-  rewrite zeq_false. replace (pos' - pos - 1) with (pos' - (pos + 1)).
-  auto. omega. omega. omega.
+  replace (pos' - pos) with ((pos' - (pos + 1)) + 1) by omega.
+  constructor. auto. 
+  omega.
 Qed.
 
 (** The following lemmas show that the translation from Mach to PPC
@@ -409,7 +427,6 @@ Proof.
   destruct op; destruct args; try (destruct args); try (destruct args); try (destruct args);
   try reflexivity; autorewrite with labels; try reflexivity.
   case (mreg_type m); reflexivity.
-  case (mreg_type r); reflexivity.
   case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity.
   case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity.
   case (snd (crbit_for_cond c)); reflexivity.
@@ -453,6 +470,7 @@ Proof.
   destruct m; rewrite transl_load_store_label; intros; reflexivity. 
   destruct m; rewrite transl_load_store_label; intros; reflexivity. 
   destruct s0; reflexivity.
+  destruct s0; reflexivity.
   rewrite peq_false. auto. congruence.
   case (snd (crbit_for_cond c)); reflexivity.
 Qed.
@@ -488,7 +506,7 @@ Lemma find_label_goto_label:
   Mach.find_label lbl f.(fn_code) = Some c' ->
   exists rs',
     goto_label (transl_function f) lbl rs m = OK rs' m  
-  /\ transl_code_at_pc (rs' PC) f c'
+  /\ transl_code_at_pc (rs' PC) b f c'
   /\ forall r, r <> PC -> rs'#r = rs#r.
 Proof.
   intros. 
@@ -499,13 +517,13 @@ Proof.
   intros [pos' [A [B C]]].
   exists (rs#PC <- (Vptr b (Int.repr pos'))).
   split. unfold goto_label. rewrite A. rewrite H0. auto.
-  split. rewrite Pregmap.gss. constructor. auto. 
+  split. rewrite Pregmap.gss. constructor; auto. 
   rewrite Int.unsigned_repr. replace (pos' - 0) with pos' in B.
   auto. omega. 
   generalize (functions_transl_no_overflow _ _ H). 
   omega.
   intros. apply Pregmap.gso; auto.
-Qed.  
+Qed.
 
 (** * Memory properties *)
 
@@ -513,22 +531,39 @@ Qed.
   We show that it can be synthesized as a ``load 8-bit unsigned integer''
   followed by a sign extension. *)
 
+Remark valid_access_equiv:
+  forall chunk1 chunk2 m b ofs,
+  size_chunk chunk1 = size_chunk chunk2 ->
+  valid_access m chunk1 b ofs ->
+  valid_access m chunk2 b ofs.
+Proof.
+  intros. inv H0. rewrite H in H3. constructor; auto.
+Qed.
+
+Remark in_bounds_equiv:
+  forall chunk1 chunk2 m b ofs (A: Set) (a1 a2: A),
+  size_chunk chunk1 = size_chunk chunk2 ->
+  (if in_bounds m chunk1 b ofs then a1 else a2) =
+  (if in_bounds m chunk2 b ofs then a1 else a2).
+Proof.
+  intros. destruct (in_bounds m chunk1 b ofs).
+  rewrite in_bounds_true. auto. eapply valid_access_equiv; eauto.
+  destruct (in_bounds m chunk2 b ofs); auto.
+  elim n. eapply valid_access_equiv with (chunk1 := chunk2); eauto.
+Qed.
+
 Lemma loadv_8_signed_unsigned:
   forall m a,
   Mem.loadv Mint8signed m a = 
     option_map Val.cast8signed (Mem.loadv Mint8unsigned m a).
 Proof.
   intros. unfold Mem.loadv. destruct a; try reflexivity.
-  unfold load. case (zlt b (nextblock m)); intro.
-  change  (in_bounds Mint8unsigned (Int.signed i) (blocks m b))
-     with (in_bounds Mint8signed (Int.signed i) (blocks m b)).
-  case (in_bounds Mint8signed (Int.signed i) (blocks m b)).
-  change (mem_chunk Mint8unsigned) with (mem_chunk Mint8signed).
-  set (v := (load_contents (mem_chunk Mint8signed)
-             (contents (blocks m b)) (Int.signed i))).
-  unfold Val.load_result. destruct v; try reflexivity. 
+  unfold load. rewrite (in_bounds_equiv Mint8signed Mint8unsigned).
+  destruct (in_bounds m Mint8unsigned b (Int.signed i)); auto.
+  simpl.
+  destruct (getN 0 (Int.signed i) (contents (blocks m b))); auto.
   simpl. rewrite Int.cast8_signed_unsigned. auto.
-  reflexivity. reflexivity.
+  auto.
 Qed.
 
 (** Similarly, we show that signed 8- and 16-bit stores can be performed
@@ -538,155 +573,201 @@ Lemma storev_8_signed_unsigned:
   forall m a v,
   Mem.storev Mint8signed m a v = Mem.storev Mint8unsigned m a v.
 Proof.
-  intros. reflexivity.
+  intros. unfold storev. destruct a; auto.
+  unfold store. rewrite (in_bounds_equiv Mint8signed Mint8unsigned).
+  auto. auto.
 Qed.
 
 Lemma storev_16_signed_unsigned:
   forall m a v,
   Mem.storev Mint16signed m a v = Mem.storev Mint16unsigned m a v.
 Proof.
-  intros. reflexivity.
+  intros. unfold storev. destruct a; auto.
+  unfold store. rewrite (in_bounds_equiv Mint16signed Mint16unsigned).
+  auto. auto.
 Qed.
 
 (** * Proof of semantic preservation *)
 
-(** The invariants for the inductive proof of simulation are as follows.
-  The simulation diagrams are of the form:
+(** Semantic preservation is proved using simulation diagrams
+  of the following form.
 <<
-      c1, ms1, m1 --------------------- rs1, m1
-           |                              |
-           |                              |
-           v                              v
-      c2, ms2, m2 --------------------- rs2, m2
+           st1 --------------- st2
+            |                   |
+           t|                  *|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
 >>
-  Left: execution of one Mach instruction.  Right: execution of zero, one
-  or several instructions.  Precondition (top): agreement between
-  the Mach register set [ms1] and the PPC register set [rs1]; moreover,
-  [rs1 PC] points to the translation of code [c1].  Postcondition (bottom):
-  similar.
+  The invariant is the [match_states] predicate below, which includes:
+- The PPC code pointed by the PC register is the translation of
+  the current Mach code sequence.
+- Mach register values and PPC register values agree.
 *)
 
-Definition exec_instr_prop
-            (f: Mach.function) (sp: val) 
-            (c1: Mach.code) (ms1: Mach.regset) (m1: mem) (t: trace)
-            (c2: Mach.code) (ms2: Mach.regset) (m2: mem) :=
-  forall rs1
-    (WTF: wt_function f)
-    (INCL: incl c1 f.(fn_code))
-    (AT: transl_code_at_pc (rs1 PC) f c1)
-    (AG: agree ms1 sp rs1),
-  exists rs2,
-    agree ms2 sp rs2
-  /\ exec_steps tge rs1 m1 t rs2 m2
-  /\ transl_code_at_pc (rs2 PC) f c2.
-
-Definition exec_function_body_prop
-            (f: Mach.function) (parent: val) (ra: val)
-            (ms1: Mach.regset) (m1: mem) (t: trace)
-            (ms2: Mach.regset) (m2: mem) :=
-  forall rs1
-    (WTRA: Val.has_type ra Tint)
-    (RALR: rs1 LR = ra)
-    (WTF: wt_function f)
-    (AT: Genv.find_funct ge (rs1 PC) = Some (Internal f))
-    (AG: agree ms1 parent rs1),
-  exists rs2,
-     agree ms2 parent rs2
-  /\ exec_steps tge rs1 m1 t rs2 m2
-  /\ rs2 PC = rs1 LR.
-
-Definition exec_function_prop
-            (f: Mach.fundef) (parent: val) 
-            (ms1: Mach.regset) (m1: mem) (t: trace)
-            (ms2: Mach.regset) (m2: mem) :=
-  forall rs1
-    (WTF: wt_fundef f)
-    (AT: Genv.find_funct ge (rs1 PC) = Some f)
-    (AG: agree ms1 parent rs1)
-    (WTRA: Val.has_type (rs1 LR) Tint),
-  exists rs2,
-     agree ms2 parent rs2
-  /\ exec_steps tge rs1 m1 t rs2 m2
-  /\ rs2 PC = rs1 LR.
-           
-(** We show each case of the inductive proof of simulation as a separate
-  lemma. *)
+Inductive match_stack: list Machconcr.stackframe -> Prop :=
+  | match_stack_nil:
+      match_stack nil
+  | match_stack_cons: forall fb sp ra c s f,
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      wt_function f ->
+      incl c f.(fn_code) ->
+      transl_code_at_pc ra fb f c ->
+      match_stack s ->
+      match_stack (Stackframe fb sp ra c :: s).
+
+Inductive match_states: Machconcr.state -> PPC.state -> Prop :=
+  | match_states_intro:
+      forall s fb sp c ms m rs f
+        (STACKS: match_stack s)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (WTF: wt_function f)
+        (INCL: incl c f.(fn_code))
+        (AT: transl_code_at_pc (rs PC) fb f c)
+        (AG: agree ms sp rs),
+      match_states (Machconcr.State s fb sp c ms m)
+                   (PPC.State rs m)
+  | match_states_call:
+      forall s fb ms m rs
+        (STACKS: match_stack s)
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = Vptr fb Int.zero)
+        (ATLR: rs LR = parent_ra s),
+      match_states (Machconcr.Callstate s fb ms m)
+                   (PPC.State rs m)
+  | match_states_return:
+      forall s ms m rs
+        (STACKS: match_stack s)
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = parent_ra s),
+      match_states (Machconcr.Returnstate s ms m)
+                   (PPC.State rs m).
+
+Lemma exec_straight_steps:
+  forall s fb sp m1 f c1 rs1 c2 m2 ms2,
+  match_stack s ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  wt_function f ->
+  incl c2 f.(fn_code) ->
+  transl_code_at_pc (rs1 PC) fb f c1 ->
+  (exists rs2,
+     exec_straight tge (transl_function f) (transl_code c1) rs1 m1 (transl_code c2) rs2 m2
+     /\ agree ms2 sp rs2) ->
+  exists st',
+  plus step tge (State rs1 m1) E0 st' /\
+  match_states (Machconcr.State s fb sp c2 ms2 m2) st'.
+Proof.
+  intros. destruct H4 as [rs2 [A B]].
+  exists (State rs2 m2); split.
+  eapply exec_straight_exec; eauto.
+  econstructor; eauto. eapply exec_straight_at; eauto.
+Qed.
+
+(** We need to show that, in the simulation diagram, we cannot
+  take infinitely many Mach transitions that correspond to zero
+  transitions on the PPC side.  Actually, all Mach transitions
+  correspond to at least one PPC transition, except the
+  transition from [Machconcr.Returnstate] to [Machconcr.State].
+  So, the following integer measure will suffice to rule out
+  the unwanted behaviour. *)
+
+Definition measure (s: Machconcr.state) : nat :=
+  match s with
+  | Machconcr.State _ _ _ _ _ _ => 0%nat
+  | Machconcr.Callstate _ _ _ _ => 0%nat
+  | Machconcr.Returnstate _ _ _ => 1%nat
+  end.
+
+(** We show the simulation diagram by case analysis on the Mach transition
+  on the left.  Since the proof is large, we break it into one lemma
+  per transition. *)
+
+Definition exec_instr_prop (s1: Machconcr.state) (t: trace) (s2: Machconcr.state) : Prop :=
+  forall s1' (MS: match_states s1 s1'),
+  (exists s2', plus step tge s1' t s2' /\ match_states s2 s2')
+  \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat.
+
 
 Lemma exec_Mlabel_prop:
-  forall (f : function) (sp : val) (lbl : Mach.label)
-     (c : list Mach.instruction) (rs : Mach.regset) (m : mem),
-   exec_instr_prop f sp (Mlabel lbl :: c) rs m E0 c rs m.
+  forall (s : list stackframe) (fb : block) (sp : val)
+         (lbl : Mach.label) (c : list Mach.instruction) (ms : Mach.regset)
+         (m : mem),
+  exec_instr_prop (Machconcr.State s fb sp (Mlabel lbl :: c) ms m) E0
+                  (Machconcr.State s fb sp c ms m).
 Proof.
-  intros; red; intros.
-  assert (exec_straight tge (transl_function f)
-                        (transl_code (Mlabel lbl :: c)) rs1 m
-                        (transl_code c) (nextinstr rs1) m).
+  intros; red; intros; inv MS.
+  left; eapply exec_straight_steps; eauto with coqlib.
+  exists (nextinstr rs); split.
   simpl. apply exec_straight_one. reflexivity. reflexivity.
-  exists (nextinstr rs1). split. apply agree_nextinstr; auto.
-  eapply exec_straight_steps; eauto.
+  apply agree_nextinstr; auto.
 Qed.
 
 Lemma exec_Mgetstack_prop:
-  forall (f : function) (sp : val) (ofs : int) (ty : typ) (dst : mreg)
-     (c : list Mach.instruction) (ms : Mach.regset) (m : mem) (v : val),
-   load_stack m sp ty ofs = Some v ->
-   exec_instr_prop f sp (Mgetstack ofs ty dst :: c) ms m E0 c (Regmap.set dst v ms) m.
+  forall (s : list stackframe) (fb : block) (sp : val) (ofs : int)
+         (ty : typ) (dst : mreg) (c : list Mach.instruction)
+         (ms : Mach.regset) (m : mem) (v : val),
+  load_stack m sp ty ofs = Some v ->
+  exec_instr_prop (Machconcr.State s fb sp (Mgetstack ofs ty dst :: c) ms m) E0
+                  (Machconcr.State s fb sp c (Regmap.set dst v ms) m).
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
   unfold load_stack in H. 
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI. inversion WTI.
   rewrite (sp_val _ _ _ AG) in H.
   assert (NOTE: GPR1 <> GPR0). congruence.
   generalize (loadind_correct tge (transl_function f) GPR1 ofs ty
-                dst (transl_code c) rs1 m v H H1 NOTE).
+                dst (transl_code c) rs m v H H1 NOTE).
   intros [rs2 [EX [RES OTH]]].
-  exists rs2. split.
-  apply agree_exten_2 with (rs1#(preg_of dst) <- v).
+  left; eapply exec_straight_steps; eauto with coqlib.
+  simpl. exists rs2; split. auto. 
+  apply agree_exten_2 with (rs#(preg_of dst) <- v).
   auto with ppcgen. 
   intros. case (preg_eq r0 (preg_of dst)); intro.
   subst r0. rewrite Pregmap.gss. auto. 
-  rewrite Pregmap.gso; auto. 
-  eapply exec_straight_steps; eauto.
+  rewrite Pregmap.gso; auto.
 Qed.
 
 Lemma exec_Msetstack_prop:
-  forall (f : function) (sp : val) (src : mreg) (ofs : int) (ty : typ)
-     (c : list Mach.instruction) (ms : mreg -> val) (m m' : mem),
-   store_stack m sp ty ofs (ms src) = Some m' ->
-   exec_instr_prop f sp (Msetstack src ofs ty :: c) ms m E0 c ms m'.
+  forall (s : list stackframe) (fb : block) (sp : val) (src : mreg)
+         (ofs : int) (ty : typ) (c : list Mach.instruction)
+         (ms : mreg -> val) (m m' : mem),
+  store_stack m sp ty ofs (ms src) = Some m' ->
+  exec_instr_prop (Machconcr.State s fb sp (Msetstack src ofs ty :: c) ms m) E0
+                  (Machconcr.State s fb sp c ms m').
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
   unfold store_stack in H. 
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI. inversion WTI.
   rewrite (sp_val _ _ _ AG) in H.
-  rewrite (preg_val ms sp rs1) in H; auto.
+  rewrite (preg_val ms sp rs) in H; auto.
   assert (NOTE: GPR1 <> GPR0). congruence.
   generalize (storeind_correct tge (transl_function f) GPR1 ofs ty
-                src (transl_code c) rs1 m m' H H2 NOTE).
+                src (transl_code c) rs m m' H H1 NOTE).
   intros [rs2 [EX OTH]].
-  exists rs2. split.
-  apply agree_exten_2 with rs1; auto.
-  eapply exec_straight_steps; eauto.
+  left; eapply exec_straight_steps; eauto with coqlib.
+  exists rs2; split; auto.
+  apply agree_exten_2 with rs; auto.
 Qed.
 
 Lemma exec_Mgetparam_prop:
-  forall (f : function) (sp parent : val) (ofs : int) (ty : typ)
-     (dst : mreg) (c : list Mach.instruction) (ms : Mach.regset)
-     (m : mem) (v : val),
-   load_stack m sp Tint (Int.repr 0) = Some parent ->
-   load_stack m parent ty ofs = Some v ->
-   exec_instr_prop f sp (Mgetparam ofs ty dst :: c) ms m E0 c (Regmap.set dst v ms) m.
+  forall (s : list stackframe) (fb : block) (sp parent : val)
+         (ofs : int) (ty : typ) (dst : mreg) (c : list Mach.instruction)
+         (ms : Mach.regset) (m : mem) (v : val),
+  load_stack m sp Tint (Int.repr 0) = Some parent ->
+  load_stack m parent ty ofs = Some v ->
+  exec_instr_prop (Machconcr.State s fb sp (Mgetparam ofs ty dst :: c) ms m) E0
+                  (Machconcr.State s fb sp c (Regmap.set dst v ms) m).
 Proof.
-  intros; red; intros.
-  set (rs2 := nextinstr (rs1#GPR2 <- parent)).
+  intros; red; intros; inv MS.
+  set (rs2 := nextinstr (rs#GPR2 <- parent)).
   assert (EX1: exec_straight tge (transl_function f)
-                 (transl_code (Mgetparam ofs ty dst :: c)) rs1 m
+                 (transl_code (Mgetparam ofs ty dst :: c)) rs m
                  (loadind GPR2 ofs ty dst (transl_code c)) rs2 m).
   simpl. apply exec_straight_one.
   simpl. unfold load1. rewrite gpr_or_zero_not_zero; auto with ppcgen.
-  unfold const_low. rewrite <- (sp_val ms sp rs1); auto.
+  unfold const_low. rewrite <- (sp_val ms sp rs); auto.
   unfold load_stack in H. simpl chunk_of_type in H. 
   rewrite H. reflexivity. reflexivity.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
@@ -696,64 +777,48 @@ Proof.
   generalize (loadind_correct tge (transl_function f) GPR2 ofs ty
                 dst (transl_code c) rs2 m v H0 H2 NOTE).
   intros [rs3 [EX2 [RES OTH]]].
-  exists rs3. split.
+  left; eapply exec_straight_steps; eauto with coqlib.
+  exists rs3; split; simpl.
+  eapply exec_straight_trans; eauto.
   apply agree_exten_2 with (rs2#(preg_of dst) <- v).
   unfold rs2; auto with ppcgen.
   intros. case (preg_eq r0 (preg_of dst)); intro.
   subst r0. rewrite Pregmap.gss. auto. 
   rewrite Pregmap.gso; auto. 
-  eapply exec_straight_steps; eauto. 
-  eapply exec_straight_trans; eauto.
-Qed.
-
-Lemma exec_straight_exec_prop:
-  forall f sp c1 rs1 m1 c2 m2 ms',
-  transl_code_at_pc (rs1 PC) f c1 ->
-  (exists rs2, 
-      exec_straight tge (transl_function f)
-                    (transl_code c1) rs1 m1
-                    (transl_code c2) rs2 m2 
-   /\ agree ms' sp rs2) ->
-  (exists rs2,
-      agree ms' sp rs2
-   /\ exec_steps tge rs1 m1 E0 rs2 m2
-  /\ transl_code_at_pc (rs2 PC) f c2).
-Proof.
-  intros until ms'. intros TRANS1 [rs2 [EX AG]].
-  exists rs2. split. assumption. 
-  eapply exec_straight_steps; eauto.
 Qed.
 
 Lemma exec_Mop_prop:
-  forall (f : function) (sp : val) (op : operation) (args : list mreg)
-     (res : mreg) (c : list Mach.instruction) (ms: Mach.regset)
-     (m : mem) (v: val),
-   eval_operation ge sp op ms ## args = Some v ->
-   exec_instr_prop f sp (Mop op args res :: c) ms m E0 c (Regmap.set res v ms) m.
+  forall (s : list stackframe) (fb : block) (sp : val) (op : operation)
+         (args : list mreg) (res : mreg) (c : list Mach.instruction)
+         (ms : mreg -> val) (m : mem) (v : val),
+  eval_operation ge sp op ms ## args m = Some v ->
+  exec_instr_prop (Machconcr.State s fb sp (Mop op args res :: c) ms m) E0
+                  (Machconcr.State s fb sp c (Regmap.set res v ms) m).
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
-  intro WTI. 
-  eapply exec_straight_exec_prop; eauto.
+  intro WTI.
+  left; eapply exec_straight_steps; eauto with coqlib.
   simpl. eapply transl_op_correct; auto.
   rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
 Qed.
 
 Lemma exec_Mload_prop:
-   forall (f : function) (sp : val) (chunk : memory_chunk)
-     (addr : addressing) (args : list mreg) (dst : mreg)
-     (c : list Mach.instruction) (ms: Mach.regset) (m : mem) 
-     (a v : val),
-   eval_addressing ge sp addr ms ## args = Some a ->
-   loadv chunk m a = Some v ->
-   exec_instr_prop f sp (Mload chunk addr args dst :: c) ms m E0 c (Regmap.set dst v ms) m.
+  forall (s : list stackframe) (fb : block) (sp : val)
+         (chunk : memory_chunk) (addr : addressing) (args : list mreg)
+         (dst : mreg) (c : list Mach.instruction) (ms : mreg -> val)
+         (m : mem) (a v : val),
+  eval_addressing ge sp addr ms ## args = Some a ->
+  loadv chunk m a = Some v ->
+  exec_instr_prop (Machconcr.State s fb sp (Mload chunk addr args dst :: c) ms m)
+               E0 (Machconcr.State s fb sp c (Regmap.set dst v ms) m).
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI; inversion WTI.
   assert (eval_addressing tge sp addr ms##args = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
-  eapply exec_straight_exec_prop; eauto.
+  left; eapply exec_straight_steps; eauto with coqlib;
   destruct chunk; simpl; simpl in H6;
   (* all cases but Mint8signed *)
   try (eapply transl_load_correct; eauto;
@@ -776,7 +841,7 @@ Proof.
                 (Plbz (ireg_of dst)) (Plbzx (ireg_of dst))
                 Mint8unsigned addr args 
                 (Pextsb (ireg_of dst) (ireg_of dst) :: transl_code c) 
-                ms sp rs1 m dst a v'
+                ms sp rs m dst a v'
                 X1 X2 AG H3 H7 LOAD').
   intros [rs2 [EX1 AG1]].
   exists (nextinstr (rs2#(ireg_of dst) <- v)).
@@ -788,190 +853,298 @@ Proof.
 Qed.
 
 Lemma exec_Mstore_prop:
-   forall (f : function) (sp : val) (chunk : memory_chunk)
-     (addr : addressing) (args : list mreg) (src : mreg)
-     (c : list Mach.instruction) (ms: Mach.regset) (m m' : mem)
-     (a : val),
-   eval_addressing ge sp addr ms ## args = Some a ->
-   storev chunk m a (ms src) = Some m' ->
-   exec_instr_prop f sp (Mstore chunk addr args src :: c) ms m E0 c ms m'.
+  forall (s : list stackframe) (fb : block) (sp : val)
+         (chunk : memory_chunk) (addr : addressing) (args : list mreg)
+         (src : mreg) (c : list Mach.instruction) (ms : mreg -> val)
+         (m m' : mem) (a : val),
+  eval_addressing ge sp addr ms ## args = Some a ->
+  storev chunk m a (ms src) = Some m' ->
+  exec_instr_prop (Machconcr.State s fb sp (Mstore chunk addr args src :: c) ms m) E0
+                  (Machconcr.State s fb sp c ms m').
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI; inversion WTI.
   rewrite <- (eval_addressing_preserved symbols_preserved) in H.
-  eapply exec_straight_exec_prop; eauto.
+  left; eapply exec_straight_steps; eauto with coqlib. 
   destruct chunk; simpl; simpl in H6;
-  try (rewrite storev_8_signed_unsigned in H);
-  try (rewrite storev_16_signed_unsigned in H);
+  try (rewrite storev_8_signed_unsigned in H0);
+  try (rewrite storev_16_signed_unsigned in H0);
   simpl; eapply transl_store_correct; eauto;
   intros; unfold preg_of; rewrite H6; reflexivity.
 Qed.
 
-Hypothesis wt_prog: wt_program prog.
-
 Lemma exec_Mcall_prop:
-   forall (f : function) (sp : val) (sig : signature)
-     (mos : mreg + ident) (c : list Mach.instruction) (ms : Mach.regset)
-     (m : mem) (f' : Mach.fundef) (t: trace) (ms' : Mach.regset) (m' : mem),
-   find_function ge mos ms = Some f' ->
-   exec_function ge f' sp ms m t ms' m' ->
-   exec_function_prop f' sp ms m t ms' m' ->
-   exec_instr_prop f sp (Mcall sig mos :: c) ms m t c ms' m'.
+  forall (s : list stackframe) (fb : block) (sp : val)
+         (sig : signature) (ros : mreg + ident) (c : Mach.code)
+         (ms : Mach.regset) (m : mem) (f : function) (f' : block)
+         (ra : int),
+  find_function_ptr ge ros ms = Some f' ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  return_address_offset f c ra ->
+  exec_instr_prop (Machconcr.State s fb sp (Mcall sig ros :: c) ms m) E0
+                  (Callstate (Stackframe fb sp (Vptr fb ra) c :: s) f' ms m).
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
+  assert (f0 = f) by congruence. subst f0.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI. inversion WTI.
-  inversion AT.
-  assert (WTF': wt_fundef f').
-    destruct mos; simpl in H.
-    apply (Genv.find_funct_prop wt_fundef wt_prog H).
-    destruct (Genv.find_symbol ge i); try discriminate.
-    apply (Genv.find_funct_ptr_prop wt_fundef wt_prog H).
+  inv AT. 
   assert (NOOV: code_size (transl_function f) <= Int.max_unsigned).
     eapply functions_transl_no_overflow; eauto.
-  destruct mos; simpl in H; simpl transl_code in H7.
+  destruct ros; simpl in H; simpl transl_code in H7.
   (* Indirect call *)
   generalize (code_tail_next_int _ _ _ _ NOOV H7). intro CT1.
   generalize (code_tail_next_int _ _ _ _ NOOV CT1). intro CT2.
-  set (rs2 := nextinstr (rs1#CTR <- (ms m0))).
+  set (rs2 := nextinstr (rs#CTR <- (ms m0))).
   set (rs3 := rs2 #LR <- (Val.add rs2#PC Vone) #PC <- (ms m0)).
-  assert (TFIND: Genv.find_funct ge (rs3#PC) = Some f').
-    unfold rs3. rewrite Pregmap.gss. auto.
+  assert (ATPC: rs3 PC = Vptr f' Int.zero).
+    change (rs3 PC) with (ms m0). 
+    destruct (ms m0); try discriminate.
+    generalize H; predSpec Int.eq Int.eq_spec i Int.zero; intros; congruence.
+  exploit return_address_offset_correct; eauto. constructor; eauto. 
+  intro RA_EQ.
+  assert (ATLR: rs3 LR = Vptr fb ra).
+    rewrite RA_EQ.
+    change (rs3 LR) with (Val.add (Val.add (rs PC) Vone) Vone).
+    rewrite <- H5. reflexivity. 
   assert (AG3: agree ms sp rs3). 
     unfold rs3, rs2; auto 8 with ppcgen.
-  assert (WTRA: Val.has_type rs3#LR Tint).
-    change rs3#LR with (Val.add (Val.add rs1#PC Vone) Vone).
-    rewrite <- H5. exact I.
-  generalize (H1 rs3 WTF' TFIND AG3 WTRA).
-  intros [rs4 [AG4 [EXF' PC4]]].
-  exists rs4. split. auto. split.
-  apply exec_trans with E0 rs2 m t. apply exec_one. econstructor. 
-    eauto. apply functions_transl. eexact H6. 
-    rewrite find_instr_tail. rewrite H7. reflexivity.
-    simpl. rewrite <- (ireg_val ms sp rs1); auto. 
-  apply exec_trans with E0 rs3 m t. apply exec_one. econstructor.
-    unfold rs2, nextinstr. rewrite Pregmap.gss. 
-    rewrite Pregmap.gso. rewrite <- H5. simpl. reflexivity.
-    discriminate. apply functions_transl. eexact H6.
-    rewrite find_instr_tail. rewrite CT1. reflexivity.
-    simpl. replace (rs2 CTR) with (ms m0). reflexivity.
-    unfold rs2. rewrite nextinstr_inv. rewrite Pregmap.gss. 
-    auto. discriminate.
-  exact EXF'. traceEq. traceEq. 
-  rewrite PC4. unfold rs3. rewrite Pregmap.gso. rewrite Pregmap.gss.
-  unfold rs2, nextinstr. rewrite Pregmap.gss. rewrite Pregmap.gso. 
-  rewrite <- H5. simpl. constructor. auto. auto. 
-  discriminate. discriminate.
+  left; exists (State rs3 m); split.
+  apply plus_left with E0 (State rs2 m) E0. 
+    econstructor. eauto. apply functions_transl. eexact H0. 
+    eapply find_instr_tail. eauto.
+    simpl. rewrite <- (ireg_val ms sp rs); auto. 
+  apply star_one. econstructor.
+    change (rs2 PC) with (Val.add (rs PC) Vone). rewrite <- H5.
+    simpl. auto.
+    apply functions_transl. eexact H0.
+    eapply find_instr_tail. eauto.
+    simpl. reflexivity.
+  traceEq.
+  econstructor; eauto.
+  econstructor; eauto with coqlib.
+  rewrite RA_EQ. econstructor; eauto.
   (* Direct call *)
-  caseEq (Genv.find_symbol ge i). intros fblock FINDS.
-  rewrite FINDS in H.
   generalize (code_tail_next_int _ _ _ _ NOOV H7). intro CT1.
-  set (rs2 := rs1 #LR <- (Val.add rs1#PC Vone) #PC <- (symbol_offset tge i Int.zero)).
-  assert (TFIND: Genv.find_funct ge (rs2#PC) = Some f').
-    unfold rs2. rewrite Pregmap.gss. 
-    unfold symbol_offset. rewrite symbols_preserved. 
-    rewrite FINDS.     
-    rewrite Genv.find_funct_find_funct_ptr. assumption.
+  set (rs2 := rs #LR <- (Val.add rs#PC Vone) #PC <- (symbol_offset tge i Int.zero)).
+  assert (ATPC: rs2 PC = Vptr f' Int.zero).
+    change (rs2 PC) with (symbol_offset tge i Int.zero).
+    unfold symbol_offset. rewrite symbols_preserved. rewrite H. auto.
+  exploit return_address_offset_correct; eauto. constructor; eauto. 
+  intro RA_EQ.
+  assert (ATLR: rs2 LR = Vptr fb ra).
+    rewrite RA_EQ.
+    change (rs2 LR) with (Val.add (rs PC) Vone).
+    rewrite <- H5. reflexivity. 
   assert (AG2: agree ms sp rs2). 
     unfold rs2; auto 8 with ppcgen.
-  assert (WTRA: Val.has_type rs2#LR Tint).
-    change rs2#LR with (Val.add rs1#PC Vone).
-    rewrite <- H5. exact I.
-  generalize (H1 rs2 WTF' TFIND AG2 WTRA).
-  intros [rs3 [AG3 [EXF' PC3]]].
-  exists rs3. split. auto. split.
-  apply exec_trans with E0 rs2 m t. apply exec_one. econstructor. 
-    eauto. apply functions_transl. eexact H6. 
-    rewrite find_instr_tail. rewrite H7. reflexivity.
-    simpl. reflexivity. 
-  exact EXF'. traceEq.
-  rewrite PC3. unfold rs2. rewrite Pregmap.gso. rewrite Pregmap.gss.
-  rewrite <- H5. simpl. constructor. auto. auto. 
-  discriminate. 
-  intro FINDS. rewrite FINDS in H. discriminate.
+  left; exists (State rs2 m); split.
+  apply plus_one. econstructor. 
+    eauto.
+    apply functions_transl. eexact H0. 
+    eapply find_instr_tail. eauto.
+    simpl. reflexivity.
+  econstructor; eauto with coqlib.
+  econstructor; eauto with coqlib.
+  rewrite RA_EQ. econstructor; eauto.
+Qed.
+
+Lemma exec_Mtailcall_prop:
+  forall (s : list stackframe) (fb stk : block) (soff : int)
+         (sig : signature) (ros : mreg + ident) (c : list Mach.instruction)
+         (ms : Mach.regset) (m : mem) (f' : block),
+  find_function_ptr ge ros ms = Some f' ->
+  load_stack m (Vptr stk soff) Tint (Int.repr 0) = Some (parent_sp s) ->
+  load_stack m (Vptr stk soff) Tint (Int.repr 12) = Some (parent_ra s) ->
+  exec_instr_prop
+          (Machconcr.State s fb (Vptr stk soff) (Mtailcall sig ros :: c) ms m) E0
+          (Callstate s f' ms (free m stk)).
+Proof.
+  intros; red; intros; inv MS.
+  generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
+  intro WTI. inversion WTI.
+  inversion AT. subst b f0 c0.
+  assert (NOOV: code_size (transl_function f) <= Int.max_unsigned).
+    eapply functions_transl_no_overflow; eauto.
+  destruct ros; simpl in H; simpl in H8.
+  (* Indirect call *)
+  set (rs2 := nextinstr (rs#CTR <- (ms m0))).
+  set (rs3 := nextinstr (rs2#GPR2 <- (parent_ra s))).
+  set (rs4 := nextinstr (rs3#LR <- (parent_ra s))).
+  set (rs5 := nextinstr (rs4#GPR1 <- (parent_sp s))).
+  set (rs6 := rs5#PC <- (rs5 CTR)).
+  assert (exec_straight tge (transl_function f)
+            (transl_code (Mtailcall sig (inl ident m0) :: c)) rs m 
+            (Pbctr :: transl_code c) rs5 (free m stk)).
+  simpl. apply exec_straight_step with rs2 m. 
+  simpl. rewrite <- (ireg_val _ _ _ _ AG H5). reflexivity. reflexivity.
+  apply exec_straight_step with rs3 m.
+  simpl. unfold load1. rewrite gpr_or_zero_not_zero. unfold const_low.
+  change (rs2 GPR1) with (rs GPR1). rewrite <- (sp_val _ _ _ AG). 
+  simpl. unfold load_stack in H1. simpl in H1. rewrite H1.
+  reflexivity. discriminate. reflexivity.
+  apply exec_straight_step with rs4 m.
+  simpl. reflexivity. reflexivity.
+  apply exec_straight_one. 
+  simpl. change (rs4 GPR1) with (rs GPR1). rewrite <- (sp_val _ _ _ AG).
+  unfold load_stack in H0; simpl in H0. rewrite Int.add_zero in H0.
+  simpl. rewrite H0. reflexivity. reflexivity.
+  left; exists (State rs6 (free m stk)); split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor.
+  change (rs5 PC) with (Val.add (Val.add (Val.add (Val.add (rs PC) Vone) Vone) Vone) Vone).
+  rewrite <- H6; simpl. eauto.
+  eapply functions_transl; eauto. 
+  eapply find_instr_tail.
+  repeat (eapply code_tail_next_int; auto). eauto. 
+  simpl. reflexivity. traceEq.
+  (* match states *)
+  econstructor; eauto.
+  assert (AG4: agree ms (Vptr stk soff) rs4).
+    unfold rs4, rs3, rs2; auto 10 with ppcgen.
+  assert (AG5: agree ms (parent_sp s) rs5).
+    unfold rs5. apply agree_nextinstr.
+    split. reflexivity. intros. inv AG4. rewrite H11. 
+    rewrite Pregmap.gso; auto with ppcgen.
+  unfold rs6; auto with ppcgen.
+  change (rs6 PC) with (ms m0). 
+  generalize H. destruct (ms m0); try congruence.
+  predSpec Int.eq Int.eq_spec i Int.zero; intros; congruence.
+  (* direct call *)
+  set (rs2 := nextinstr (rs#GPR2 <- (parent_ra s))).
+  set (rs3 := nextinstr (rs2#LR <- (parent_ra s))).
+  set (rs4 := nextinstr (rs3#GPR1 <- (parent_sp s))).
+  set (rs5 := rs4#PC <- (Vptr f' Int.zero)).
+  assert (exec_straight tge (transl_function f)
+            (transl_code (Mtailcall sig (inr mreg i) :: c)) rs m 
+            (Pbs i :: transl_code c) rs4 (free m stk)).
+  simpl. apply exec_straight_step with rs2 m. 
+  simpl. unfold load1. rewrite gpr_or_zero_not_zero. unfold const_low.
+  rewrite <- (sp_val _ _ _ AG). 
+  simpl. unfold load_stack in H1. simpl in H1. rewrite H1.
+  reflexivity. discriminate. reflexivity.
+  apply exec_straight_step with rs3 m.
+  simpl. reflexivity. reflexivity.
+  apply exec_straight_one. 
+  simpl. change (rs3 GPR1) with (rs GPR1). rewrite <- (sp_val _ _ _ AG).
+  unfold load_stack in H0; simpl in H0. rewrite Int.add_zero in H0.
+  simpl. rewrite H0. reflexivity. reflexivity.
+  left; exists (State rs5 (free m stk)); split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor.
+  change (rs4 PC) with (Val.add (Val.add (Val.add (rs PC) Vone) Vone) Vone).
+  rewrite <- H6; simpl. eauto.
+  eapply functions_transl; eauto. 
+  eapply find_instr_tail.
+  repeat (eapply code_tail_next_int; auto). eauto. 
+  simpl. unfold symbol_offset. rewrite symbols_preserved. rewrite H. 
+  reflexivity. traceEq.
+  (* match states *)
+  econstructor; eauto.
+  assert (AG3: agree ms (Vptr stk soff) rs3).
+    unfold rs3, rs2; auto 10 with ppcgen.
+  assert (AG4: agree ms (parent_sp s) rs4).
+    unfold rs4. apply agree_nextinstr.
+    split. reflexivity. intros. inv AG3. rewrite H11. 
+    rewrite Pregmap.gso; auto with ppcgen.
+  unfold rs5; auto with ppcgen.
 Qed.
 
 Lemma exec_Malloc_prop:
-  forall (f : function) (sp : val) (c : list Mach.instruction)
-         (ms : Mach.regset) (m : mem) (sz : int) (m' : mem) (blk : block),
+  forall (s : list stackframe) (fb : block) (sp : val)
+         (c : list Mach.instruction) (ms : mreg -> val) (m : mem) (sz : int)
+         (m' : mem) (blk : block),
   ms Conventions.loc_alloc_argument = Vint sz ->
-  Mem.alloc m 0 (Int.signed sz) = (m', blk) ->
-  exec_instr_prop f sp (Malloc :: c) ms m E0 c
-    (Regmap.set Conventions.loc_alloc_result (Vptr blk Int.zero) ms) m'.
+  alloc m 0 (Int.signed sz) = (m', blk) ->
+  exec_instr_prop (Machconcr.State s fb sp (Malloc :: c) ms m) E0
+         (Machconcr.State s fb sp c
+             (Regmap.set (Conventions.loc_alloc_result) (Vptr blk Int.zero) ms) m').
 Proof.
-  intros; red; intros.
-  eapply exec_straight_exec_prop; eauto.
+  intros; red; intros; inv MS.
+  left; eapply exec_straight_steps; eauto with coqlib.
   simpl. eapply transl_alloc_correct; eauto. 
 Qed.
 
 Lemma exec_Mgoto_prop:
-   forall (f : function) (sp : val) (lbl : Mach.label)
-     (c : list Mach.instruction) (ms : Mach.regset) (m : mem)
-     (c' : Mach.code),
-   Mach.find_label lbl (fn_code f) = Some c' ->
-   exec_instr_prop f sp (Mgoto lbl :: c) ms m E0 c' ms m.
+  forall (s : list stackframe) (fb : block) (f : function) (sp : val)
+         (lbl : Mach.label) (c : list Mach.instruction) (ms : Mach.regset)
+         (m : mem) (c' : Mach.code),
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  Mach.find_label lbl (fn_code f) = Some c' ->
+  exec_instr_prop (Machconcr.State s fb sp (Mgoto lbl :: c) ms m) E0
+                  (Machconcr.State s fb sp c' ms m).
 Proof.
-  intros; red; intros.
-  inversion AT.
-  generalize (find_label_goto_label f lbl rs1 m _ _ _ H1 (sym_equal H0) H).
+  intros; red; intros; inv MS.
+  assert (f0 = f) by congruence. subst f0.
+  inv AT. simpl in H3.
+  generalize (find_label_goto_label f lbl rs m _ _ _ FIND (sym_equal H1) H0).
   intros [rs2 [GOTO [AT2 INV]]].
-  exists rs2. split. apply agree_exten_2 with rs1; auto. 
-  split. inversion AT. apply exec_one. econstructor; eauto.
-  apply functions_transl; eauto. 
-  rewrite find_instr_tail. rewrite H7. simpl. reflexivity.
-  simpl. rewrite GOTO. auto. auto.
+  left; exists (State rs2 m); split.
+  apply plus_one. econstructor; eauto.
+  apply functions_transl; eauto.
+  eapply find_instr_tail; eauto.
+  simpl; auto.
+  econstructor; eauto.
+  eapply Mach.find_label_incl; eauto.
+  apply agree_exten_2 with rs; auto. 
 Qed.
 
 Lemma exec_Mcond_true_prop:
-   forall (f : function) (sp : val) (cond : condition)
-     (args : list mreg) (lbl : Mach.label) (c : list Mach.instruction)
-     (ms: Mach.regset) (m : mem) (c' : Mach.code),
-   eval_condition cond ms ## args = Some true ->
-   Mach.find_label lbl (fn_code f) = Some c' ->
-   exec_instr_prop f sp (Mcond cond args lbl :: c) ms m E0 c' ms m.
+  forall (s : list stackframe) (fb : block) (f : function) (sp : val)
+         (cond : condition) (args : list mreg) (lbl : Mach.label)
+         (c : list Mach.instruction) (ms : mreg -> val) (m : mem)
+         (c' : Mach.code),
+  eval_condition cond ms ## args m = Some true ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  Mach.find_label lbl (fn_code f) = Some c' ->
+  exec_instr_prop (Machconcr.State s fb sp (Mcond cond args lbl :: c) ms m) E0
+                  (Machconcr.State s fb sp c' ms m).
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS. assert (f0 = f) by congruence. subst f0.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
-  intro WTI. inversion WTI.
+  intro WTI. inv WTI.
   pose (k1 := 
          if snd (crbit_for_cond cond)
          then Pbt (fst (crbit_for_cond cond)) lbl :: transl_code c
          else Pbf (fst (crbit_for_cond cond)) lbl :: transl_code c).
   generalize (transl_cond_correct tge (transl_function f)
-                cond args k1 ms sp rs1 m true H2 AG H).
+                cond args k1 ms sp rs m true H3 AG H).
   simpl. intros [rs2 [EX [RES AG2]]].
-  inversion AT.
-  generalize (functions_transl _ _ H6); intro FN.
-  generalize (functions_transl_no_overflow _ _ H6); intro NOOV.
-  simpl in H7.
-  generalize (exec_straight_steps_2 _ _ _ _ _ _ _ EX 
-               NOOV _ _ (sym_equal H5) FN H7).
+  inv AT. simpl in H5.
+  generalize (functions_transl _ _ H4); intro FN.
+  generalize (functions_transl_no_overflow _ _ H4); intro NOOV.
+  exploit exec_straight_steps_2; eauto. 
   intros [ofs' [PC2 CT2]].
-  generalize (find_label_goto_label f lbl rs2 m _ _ _ H6 PC2 H0).
+  generalize (find_label_goto_label f lbl rs2 m _ _ _ FIND PC2 H1).
   intros [rs3 [GOTO [AT3 INV3]]].
-  exists rs3. split.
-  apply agree_exten_2 with rs2; auto.
-  split. eapply exec_trans. 
+  left; exists (State rs3 m); split.
+  eapply plus_right'. 
   eapply exec_straight_steps_1; eauto.
   caseEq (snd (crbit_for_cond cond)); intro ISSET; rewrite ISSET in RES.
-  apply exec_one. econstructor; eauto.
-  rewrite find_instr_tail. rewrite CT2. unfold k1. rewrite ISSET. reflexivity.
+  econstructor; eauto.
+  eapply find_instr_tail. unfold k1 in CT2; rewrite ISSET in CT2. eauto.
   simpl. rewrite RES. simpl. auto.
-  apply exec_one. econstructor; eauto.
-  rewrite find_instr_tail. rewrite CT2. unfold k1. rewrite ISSET. reflexivity.
+  econstructor; eauto.
+  eapply find_instr_tail. unfold k1 in CT2; rewrite ISSET in CT2. eauto.
   simpl. rewrite RES. simpl. auto.
-  traceEq. auto. 
+  traceEq.
+  econstructor; eauto. 
+  eapply Mach.find_label_incl; eauto.
+  apply agree_exten_2 with rs2; auto. 
 Qed.
 
 Lemma exec_Mcond_false_prop:
-   forall (f : function) (sp : val) (cond : condition)
-     (args : list mreg) (lbl : Mach.label) (c : list Mach.instruction)
-     (ms : Mach.regset) (m : mem),
-   eval_condition cond ms ## args = Some false ->
-   exec_instr_prop f sp (Mcond cond args lbl :: c) ms m E0 c ms m.
+  forall (s : list stackframe) (fb : block) (sp : val)
+         (cond : condition) (args : list mreg) (lbl : Mach.label)
+         (c : list Mach.instruction) (ms : mreg -> val) (m : mem),
+  eval_condition cond ms ## args m = Some false ->
+  exec_instr_prop (Machconcr.State s fb sp (Mcond cond args lbl :: c) ms m) E0
+                  (Machconcr.State s fb sp c ms m).
 Proof.
-  intros; red; intros.
+  intros; red; intros; inv MS.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI. inversion WTI.
   pose (k1 := 
@@ -979,12 +1152,11 @@ Proof.
          then Pbt (fst (crbit_for_cond cond)) lbl :: transl_code c
          else Pbf (fst (crbit_for_cond cond)) lbl :: transl_code c).
   generalize (transl_cond_correct tge (transl_function f)
-                cond args k1 ms sp rs1 m false H1 AG H).
+                cond args k1 ms sp rs m false H1 AG H).
   simpl. intros [rs2 [EX [RES AG2]]].
-  exists (nextinstr rs2).
-  split. auto with ppcgen. 
-  eapply exec_straight_steps; eauto.
-  eapply exec_straight_trans. eexact EX. 
+  left; eapply exec_straight_steps; eauto with coqlib.
+  exists (nextinstr rs2); split.
+  simpl. eapply exec_straight_trans. eexact EX. 
   caseEq (snd (crbit_for_cond cond)); intro ISSET; rewrite ISSET in RES.
   unfold k1; rewrite ISSET; apply exec_straight_one. 
   simpl. rewrite RES. reflexivity.
@@ -992,114 +1164,110 @@ Proof.
   unfold k1; rewrite ISSET; apply exec_straight_one. 
   simpl. rewrite RES. reflexivity.
   reflexivity.
+  auto with ppcgen.
 Qed.
 
-Lemma exec_instr_incl:
-  forall f sp c rs m t c' rs' m',
-  Mach.exec_instr ge f sp c rs m t c' rs' m' ->
-  incl c f.(fn_code) -> incl c' f.(fn_code).
+Lemma exec_Mreturn_prop:
+  forall (s : list stackframe) (fb stk : block) (soff : int)
+         (c : list Mach.instruction) (ms : Mach.regset) (m : mem),
+  load_stack m (Vptr stk soff) Tint (Int.repr 0) = Some (parent_sp s) ->
+  load_stack m (Vptr stk soff) Tint (Int.repr 12) = Some (parent_ra s) ->
+  exec_instr_prop (Machconcr.State s fb (Vptr stk soff) (Mreturn :: c) ms m) E0
+                  (Returnstate s ms (free m stk)).
 Proof.
-  induction 1; intros; eauto with coqlib.
-  eapply incl_find_label; eauto.
-  eapply incl_find_label; eauto.
-Qed.
-
-Lemma exec_instrs_incl:
-  forall f sp c rs m t c' rs' m',
-  Mach.exec_instrs ge f sp c rs m t c' rs' m' ->
-  incl c f.(fn_code) -> incl c' f.(fn_code).
-Proof.
-  induction 1; intros. 
-  auto.
-  eapply exec_instr_incl; eauto.
-  eauto.
-Qed.
-
-Lemma exec_refl_prop:
-   forall (f : function) (sp : val) (c : Mach.code) (ms : Mach.regset)
-     (m : mem), exec_instr_prop f sp c ms m E0 c ms m.
-Proof.
-  intros; red; intros.
-  exists rs1. split. auto. split. apply exec_refl. auto.
-Qed.
-
-Lemma exec_one_prop:
-   forall (f : function) (sp : val) (c : Mach.code) (ms : Mach.regset)
-     (m : mem) (t: trace) (c' : Mach.code) (ms' : Mach.regset) (m' : mem),
-   Mach.exec_instr ge f sp c ms m t c' ms' m' ->
-   exec_instr_prop f sp c ms m t c' ms' m' ->
-   exec_instr_prop f sp c ms m t c' ms' m'.
-Proof.
-  auto.
+  intros; red; intros; inv MS.
+  set (rs2 := nextinstr (rs#GPR2 <- (parent_ra s))).
+  set (rs3 := nextinstr (rs2#LR <- (parent_ra s))).
+  set (rs4 := nextinstr (rs3#GPR1 <- (parent_sp s))).
+  set (rs5 := rs4#PC <- (parent_ra s)).
+  assert (exec_straight tge (transl_function f)
+            (transl_code (Mreturn :: c)) rs m 
+            (Pblr :: transl_code c) rs4 (free m stk)).
+  simpl. apply exec_straight_three with rs2 m rs3 m.
+  simpl. unfold load1. rewrite gpr_or_zero_not_zero. unfold const_low.
+  unfold load_stack in H0. simpl in H0. 
+  rewrite <- (sp_val _ _ _ AG). simpl. rewrite H0.
+  reflexivity. discriminate.
+  unfold rs3. change (parent_ra s) with rs2#GPR2. reflexivity.
+  simpl. change (rs3 GPR1) with (rs GPR1). rewrite <- (sp_val _ _ _ AG).
+  simpl. 
+  unfold load_stack in H. simpl in H. rewrite Int.add_zero in H.  
+  rewrite H. reflexivity.
+  reflexivity. reflexivity. reflexivity.
+  left; exists (State rs5 (free m stk)); split.
+  (* execution *)
+  apply plus_right' with E0 (State rs4 (free m stk)) E0.
+  eapply exec_straight_exec; eauto.
+  inv AT. econstructor.
+  change (rs4 PC) with (Val.add (Val.add (Val.add (rs PC) Vone) Vone) Vone). 
+  rewrite <- H2. simpl. eauto.
+  apply functions_transl; eauto.
+  generalize (functions_transl_no_overflow _ _ H3); intro NOOV.
+  simpl in H4. eapply find_instr_tail. 
+  eapply code_tail_next_int; auto.
+  eapply code_tail_next_int; auto.
+  eapply code_tail_next_int; eauto.
+  reflexivity. traceEq.
+  (* match states *)
+  econstructor; eauto. 
+  assert (AG3: agree ms (Vptr stk soff) rs3). 
+    unfold rs3, rs2; auto 10 with ppcgen.
+  assert (AG4: agree ms (parent_sp s) rs4).
+    split. reflexivity. intros. unfold rs4. 
+    rewrite nextinstr_inv. rewrite Pregmap.gso.
+    elim AG3; auto. auto with ppcgen. auto with ppcgen.
+  unfold rs5; auto with ppcgen.
 Qed.
 
-Lemma exec_trans_prop:
-   forall (f : function) (sp : val) (c1 : Mach.code) (ms1 : Mach.regset)
-     (m1 : mem) (t1: trace) (c2 : Mach.code) (ms2 : Mach.regset) (m2 : mem)
-     (t2: trace) (c3 : Mach.code) (ms3 : Mach.regset) (m3 : mem) (t3: trace),
-   exec_instrs ge f sp c1 ms1 m1 t1 c2 ms2 m2 ->
-   exec_instr_prop f sp c1 ms1 m1 t1 c2 ms2 m2 ->
-   exec_instrs ge f sp c2 ms2 m2 t2 c3 ms3 m3 ->
-   exec_instr_prop f sp c2 ms2 m2 t2 c3 ms3 m3 -> 
-   t3 = t1 ** t2 ->
-   exec_instr_prop f sp c1 ms1 m1 t3 c3 ms3 m3.
-Proof.
-  intros; red; intros.
-  generalize (H0 rs1 WTF INCL AT AG).
-  intros [rs2 [AG2 [EX2 AT2]]].
-  generalize (exec_instrs_incl _ _ _ _ _ _ _ _ _ H INCL). intro INCL2.
-  generalize (H2 rs2 WTF INCL2 AT2 AG2).
-  intros [rs3 [AG3 [EX3 AT3]]].
-  exists rs3. split. auto. split. eapply exec_trans; eauto. auto.
-Qed.
+Hypothesis wt_prog: wt_program prog.
 
-Lemma exec_function_body_prop_:
-   forall (f : function) (parent ra : val) (ms : Mach.regset) (m : mem)
-     (t: trace) (ms' : Mach.regset) (m1 m2 m3 m4 : mem) (stk : block)
-     (c : list Mach.instruction),
-   alloc m (- fn_framesize f)
-      (align_16_top (- fn_framesize f) (fn_stacksize f)) = (m1, stk) ->
-   let sp := Vptr stk (Int.repr (- fn_framesize f)) in
-   store_stack m1 sp Tint (Int.repr 0) parent = Some m2 ->
-   store_stack m2 sp Tint (Int.repr 12) ra = Some m3 ->
-   exec_instrs ge f sp (fn_code f) ms m3 t (Mreturn :: c) ms' m4 ->
-   exec_instr_prop f sp (fn_code f) ms m3 t (Mreturn :: c) ms' m4 ->
-   load_stack m4 sp Tint (Int.repr 0) = Some parent ->
-   load_stack m4 sp Tint (Int.repr 12) = Some ra ->
-   exec_function_body_prop f parent ra ms m t ms' (free m4 stk).
+Lemma exec_function_internal_prop:
+  forall (s : list stackframe) (fb : block) (ms : Mach.regset)
+         (m : mem) (f : function) (m1 m2 m3 : mem) (stk : block),
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  alloc m (- fn_framesize f)
+          (align_16_top (- fn_framesize f) (fn_stacksize f)) = (m1, stk) ->
+  let sp := Vptr stk (Int.repr (- fn_framesize f)) in
+  store_stack m1 sp Tint (Int.repr 0) (parent_sp s) = Some m2 ->
+  store_stack m2 sp Tint (Int.repr 12) (parent_ra s) = Some m3 ->
+  exec_instr_prop (Machconcr.Callstate s fb ms m) E0
+                  (Machconcr.State s fb sp (fn_code f) ms m3).
 Proof.
-  intros; red; intros.
-  generalize (Genv.find_funct_inv AT). intros [b EQPC].
-  generalize AT. rewrite EQPC. rewrite Genv.find_funct_find_funct_ptr. intro FN.
-  generalize (functions_transl_no_overflow _ _ FN); intro NOOV.
-  set (rs2 := nextinstr (rs1#GPR1 <- sp #GPR2 <- Vundef)).
-  set (rs3 := nextinstr (rs2#GPR2 <- ra)).
+  intros; red; intros; inv MS.
+  assert (WTF: wt_function f).
+    generalize (Genv.find_funct_ptr_prop wt_fundef wt_prog H); intro TY.
+    inversion TY; auto.
+  exploit functions_transl; eauto. intro TFIND.
+  generalize (functions_transl_no_overflow _ _ H); intro NOOV.
+  set (rs2 := nextinstr (rs#GPR1 <- sp #GPR2 <- Vundef)).
+  set (rs3 := nextinstr (rs2#GPR2 <- (parent_ra s))).
   set (rs4 := nextinstr rs3).
-  assert (exec_straight tge (transl_function f)
-            (transl_function f) rs1 m 
-            (transl_code (fn_code f)) rs4 m3).
+  (* Execution of function prologue *)
+  assert (EXEC_PROLOGUE:
+            exec_straight tge (transl_function f)
+              (transl_function f) rs m
+              (transl_code (fn_code f)) rs4 m3).
   unfold transl_function at 2. 
   apply exec_straight_three with rs2 m2 rs3 m2.
-  unfold exec_instr. rewrite H. fold sp. 
-  generalize H0. unfold store_stack. change (Vint (Int.repr 0)) with Vzero.
+  unfold exec_instr. rewrite H0. fold sp. 
+  generalize H1. unfold store_stack. change (Vint (Int.repr 0)) with Vzero.
   replace (Val.add sp Vzero) with sp. simpl chunk_of_type. 
-  rewrite (sp_val _ _ _ AG). intro. rewrite H6. clear H6.
+  rewrite (sp_val _ _ _ AG). intro EQ; rewrite EQ; clear EQ.
   reflexivity. unfold sp. simpl. rewrite Int.add_zero. reflexivity.
-  simpl. replace (rs2 LR) with ra. reflexivity.
+  simpl. change (rs2 LR) with (rs LR). rewrite ATLR. reflexivity.
   simpl. unfold store1. rewrite gpr_or_zero_not_zero. 
-  unfold const_low. replace (rs3 GPR1) with sp. replace (rs3 GPR2) with ra.
-  unfold store_stack in H1. simpl chunk_of_type in H1. rewrite H1. reflexivity.
-  reflexivity. reflexivity. discriminate. 
-  reflexivity. reflexivity. reflexivity.
-  assert (AT2: transl_code_at_pc rs4#PC f f.(fn_code)).
-    change (rs4 PC) with (Val.add (Val.add (Val.add (rs1 PC) Vone) Vone) Vone).
-    rewrite EQPC. simpl. constructor. auto.
+  unfold const_low. change (rs3 GPR1) with sp. change (rs3 GPR2) with (parent_ra s).
+  unfold store_stack in H2. simpl chunk_of_type in H2. rewrite H2. reflexivity.
+  discriminate. reflexivity. reflexivity. reflexivity.
+  (* Agreement at end of prologue *)
+  assert (AT4: transl_code_at_pc rs4#PC fb f f.(fn_code)).
+    change (rs4 PC) with (Val.add (Val.add (Val.add (rs PC) Vone) Vone) Vone).
+    rewrite ATPC. simpl. constructor. auto.
     eapply code_tail_next_int; auto.
     eapply code_tail_next_int; auto.
     eapply code_tail_next_int; auto.
-    unfold Int.zero. rewrite Int.unsigned_repr. 
-    rewrite code_tail_zero. unfold transl_function. reflexivity.
-    compute. intuition congruence.
+    change (Int.unsigned Int.zero) with 0. 
+    unfold transl_function. constructor.
   assert (AG2: agree ms sp rs2).
     split. reflexivity. 
     intros. unfold rs2. rewrite nextinstr_inv. 
@@ -1107,114 +1275,52 @@ Proof.
     auto with ppcgen. auto with ppcgen. auto with ppcgen.
   assert (AG4: agree ms sp rs4).
     unfold rs4, rs3; auto with ppcgen.
-  generalize (H3 rs4 WTF (incl_refl _) AT2 AG4).
-  intros [rs5 [AG5 [EXB AT5]]].
-  set (rs6 := nextinstr (rs5#GPR2 <- ra)).
-  set (rs7 := nextinstr (rs6#LR <- ra)).
-  set (rs8 := nextinstr (rs7#GPR1 <- parent)).
-  set (rs9 := rs8#PC <- ra).
-  assert (exec_straight tge (transl_function f)
-            (transl_code (Mreturn :: c)) rs5 m4 
-            (Pblr :: transl_code c) rs8 (free m4 stk)).
-  simpl. apply exec_straight_three with rs6 m4 rs7 m4.
-  simpl. unfold load1. rewrite gpr_or_zero_not_zero. unfold const_low.
-  unfold load_stack in H5. simpl in H5. 
-  rewrite <- (sp_val _ _ _ AG5). simpl. rewrite H5.
-  reflexivity. discriminate.
-  unfold rs7. change ra with rs6#GPR2. reflexivity. 
-  unfold exec_instr. generalize H4. unfold load_stack. 
-  replace (Val.add sp (Vint (Int.repr 0))) with sp.
-  simpl chunk_of_type. intro. change rs7#GPR1 with rs5#GPR1.
-  rewrite <- (sp_val _ _ _ AG5). rewrite H7.
-  unfold sp. reflexivity.
-  unfold sp. simpl. rewrite Int.add_zero. reflexivity.
-  reflexivity. reflexivity. reflexivity.
-  exists rs9. split.
-  (* agreement *)
-  assert (AG7: agree ms' sp rs7). 
-    unfold rs7, rs6; auto 10 with ppcgen.
-  assert (AG8: agree ms' parent rs8).
-    split. reflexivity. intros. unfold rs8. 
-    rewrite nextinstr_inv. rewrite Pregmap.gso.
-    elim AG7; auto. auto with ppcgen. auto with ppcgen.
-  unfold rs9; auto with ppcgen.
+  left; exists (State rs4 m3); split.
   (* execution *)
-  split. apply exec_trans with E0 rs4 m3 t.
-  eapply exec_straight_steps_1; eauto. 
-  apply functions_transl; auto. 
-  apply exec_trans with t rs5 m4 E0. assumption.
-  inversion AT5.
-  apply exec_trans with E0 rs8 (free m4 stk) E0.
   eapply exec_straight_steps_1; eauto.
-  apply functions_transl; auto.
-  apply exec_one. econstructor. 
-  change rs8#PC with (Val.add (Val.add (Val.add rs5#PC Vone) Vone) Vone).
-  rewrite <- H8. simpl. reflexivity.
-  apply functions_transl; eauto.
-  assert (code_tail (Int.unsigned (Int.add (Int.add (Int.add ofs Int.one) Int.one) Int.one))
-                    (transl_function f) = Pblr :: transl_code c).
-  eapply code_tail_next_int; auto.
-  eapply code_tail_next_int; auto.
-  eapply code_tail_next_int; auto.
-  rewrite H10. simpl. reflexivity.
-  rewrite find_instr_tail. rewrite H13.
-  reflexivity.
-  reflexivity.
-  traceEq. traceEq. traceEq. 
-  (* LR preservation *)
-  change rs9#PC with ra. auto.
+  change (Int.unsigned Int.zero) with 0. constructor.
+  (* match states *)
+  econstructor; eauto with coqlib.
 Qed.
 
-Lemma exec_function_internal_prop:
-   forall (f : function) (parent : val) (ms : Mach.regset) (m : mem)
-          (t: trace) (ms' : Mach.regset) (m' : mem),
-   (forall ra : val,
-      Val.has_type ra Tint ->
-      exec_function_body ge f parent ra ms m t ms' m') ->
-   (forall ra : val, Val.has_type ra Tint -> 
-      exec_function_body_prop f parent ra ms m t ms' m') ->
-   exec_function_prop (Internal f) parent ms m t ms' m'.
+Lemma exec_function_external_prop:
+  forall (s : list stackframe) (fb : block) (ms : Mach.regset)
+         (m : mem) (t0 : trace) (ms' : RegEq.t -> val)
+         (ef : external_function) (args : list val) (res : val),
+  Genv.find_funct_ptr ge fb = Some (External ef) ->
+  event_match ef args t0 res ->
+  Machconcr.extcall_arguments ms m (parent_sp s) (ef_sig ef) args ->
+  ms' = Regmap.set (Conventions.loc_result (ef_sig ef)) res ms ->
+  exec_instr_prop (Machconcr.Callstate s fb ms m)
+               t0 (Machconcr.Returnstate s ms' m).
 Proof.
-  intros; red; intros.
-  inversion WTF. subst f0. 
-  apply (H0 rs1#LR WTRA rs1 WTRA (refl_equal _) H2 AT AG).
+  intros; red; intros; inv MS.
+  exploit functions_translated; eauto.
+  intros [tf [A B]]. simpl in B. inv B. 
+  left; exists (State (rs#(loc_external_result (ef_sig ef)) <- res #PC <- (rs LR))
+                m); split.
+  apply plus_one. eapply exec_step_external; eauto. 
+  eapply extcall_arguments_match; eauto. 
+  econstructor; eauto. 
+  rewrite loc_external_result_match. auto with ppcgen.
 Qed.
 
-Lemma exec_function_external_prop:
-  forall (ef : external_function) (parent : val) (args : list val)
-         (res : val) (ms1 ms2: Mach.regset) (m : mem)
-         (t : trace),
-  event_match ef args t res ->
-  Mach.extcall_arguments ms1 m parent ef.(ef_sig) args ->
-  ms2 = Regmap.set (Conventions.loc_result (ef_sig ef)) res ms1 ->
-  exec_function_prop (External ef) parent ms1 m t ms2 m.
+Lemma exec_return_prop:
+  forall (s : list stackframe) (fb : block) (sp ra : val)
+         (c : Mach.code) (ms : Mach.regset) (m : mem),
+  exec_instr_prop (Machconcr.Returnstate (Stackframe fb sp ra c :: s) ms m) E0
+                  (Machconcr.State s fb sp c ms m).
 Proof.
-  intros; red; intros.
-  destruct (Genv.find_funct_inv AT) as [b EQ].
-  rewrite EQ in AT. rewrite Genv.find_funct_find_funct_ptr in AT.
-  exists (rs1#(loc_external_result (ef_sig ef)) <- res #PC <- (rs1 LR)).
-  split. rewrite loc_external_result_match. rewrite H1. auto with ppcgen.
-  split. apply exec_one. eapply exec_step_external; eauto.
-  destruct (functions_translated _ _ AT) as [tf [A B]].
-  simpl in B. congruence.
-  eapply extcall_arguments_match; eauto.
-  reflexivity. 
+  intros; red; intros; inv MS. inv STACKS. simpl in *.
+  right. split. omega. split. auto. 
+  econstructor; eauto. rewrite ATPC; auto.  
 Qed.
 
-(** We then conclude by induction on the structure of the Mach
-execution derivation. *)
-
-Theorem transf_function_correct:
-  forall f parent ms m t ms' m',
-  Mach.exec_function ge f parent ms m t ms' m' ->
-  exec_function_prop f parent ms m t ms' m'.
-Proof 
-  (Mach.exec_function_ind4 ge
-           exec_instr_prop
-           exec_instr_prop
-           exec_function_body_prop
-           exec_function_prop
-
+Theorem transf_instr_correct:
+  forall s1 t s2, Machconcr.step ge s1 t s2 ->
+  exec_instr_prop s1 t s2.
+Proof
+  (Machconcr.step_ind ge exec_instr_prop
            exec_Mlabel_prop
            exec_Mgetstack_prop
            exec_Msetstack_prop
@@ -1223,53 +1329,50 @@ Proof
            exec_Mload_prop
            exec_Mstore_prop
            exec_Mcall_prop
+           exec_Mtailcall_prop
            exec_Malloc_prop
            exec_Mgoto_prop
            exec_Mcond_true_prop
            exec_Mcond_false_prop
-           exec_refl_prop
-           exec_one_prop
-           exec_trans_prop
-           exec_function_body_prop_
+           exec_Mreturn_prop
            exec_function_internal_prop
-           exec_function_external_prop).
+           exec_function_external_prop
+           exec_return_prop).
 
-End PRESERVATION.
+Lemma transf_initial_states:
+  forall st1, Machconcr.initial_state prog st1 ->
+  exists st2, PPC.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. unfold ge0 in *.
+  econstructor; split.
+  econstructor.
+  replace (symbol_offset (Genv.globalenv tprog) (prog_main tprog) Int.zero)
+     with (Vptr fb Int.zero).
+  rewrite (Genv.init_mem_transf_partial _ _ TRANSF).
+  econstructor; eauto. constructor.
+  split. auto. intros. repeat rewrite Pregmap.gso; auto with ppcgen.
+  unfold symbol_offset. 
+  rewrite (transform_partial_program_main _ _ TRANSF). 
+  rewrite symbols_preserved. unfold ge; rewrite H0. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> Machconcr.final_state st1 r -> PPC.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. constructor. auto. 
+  rewrite (ireg_val _ _ _ R3 AG) in H1. auto. auto.
+Qed.
 
 Theorem transf_program_correct:
-  forall (p: Mach.program) (tp: PPC.program) (t: trace) (r: val),
-  wt_program p ->
-  transf_program p = Some tp ->
-  Mach.exec_program p t r ->
-  PPC.exec_program tp t r.
+  forall (beh: program_behavior),
+  Machconcr.exec_program prog beh -> PPC.exec_program tprog beh.
 Proof.
-  intros. 
-  destruct H1 as [fptr [f [ms [m [FINDS [FINDF [EX RES]]]]]]].
-  assert (WTF: wt_fundef f).
-    apply (Genv.find_funct_ptr_prop wt_fundef H FINDF).
-  set (ge := Genv.globalenv p) in *.
-  set (ms0 := Regmap.init Vundef) in *.
-  set (tge := Genv.globalenv tp).
-  set (rs0 := 
-    (Pregmap.init Vundef) # PC <- (symbol_offset tge tp.(prog_main) Int.zero)
-                          # LR <- Vzero
-                          # GPR1 <- (Vptr Mem.nullptr Int.zero)).
-  assert (AT: Genv.find_funct ge (rs0 PC) = Some f).
-    change (rs0 PC) with (symbol_offset tge tp.(prog_main) Int.zero).
-    rewrite (transform_partial_program_main _ _ H0).
-    unfold symbol_offset. rewrite (symbols_preserved p tp H0). 
-    fold ge. rewrite FINDS. 
-    rewrite Genv.find_funct_find_funct_ptr. exact FINDF.
-  assert (AG: agree ms0 (Vptr Mem.nullptr Int.zero) rs0).
-    split. reflexivity. intros. unfold rs0. 
-    repeat (rewrite Pregmap.gso; auto with ppcgen). 
-  assert (WTRA: Val.has_type (rs0 LR) Tint).
-    exact I.
-  generalize (transf_function_correct p tp H0 H
-                 _ _ _ _ _ _ _ EX rs0 WTF AT AG WTRA).
-  intros [rs [AG' [EX' RPC]]].
-  red. exists rs; exists m.
-  split. rewrite (Genv.init_mem_transf_partial _ _ H0). exact EX'.
-  split. rewrite RPC. reflexivity. rewrite <- RES. 
-  change (IR GPR3) with (preg_of R3). elim AG'; auto.
+  unfold Machconcr.exec_program, PPC.exec_program; intros.
+  eapply simulation_star_preservation with (measure := measure); eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact transf_instr_correct. 
 Qed.
+
+End PRESERVATION.
diff --git a/backend/PPCgenproof1.v b/backend/PPCgenproof1.v
index f9af3c30..1b432c72 100644
--- a/backend/PPCgenproof1.v
+++ b/backend/PPCgenproof1.v
@@ -11,6 +11,7 @@ Require Import Globalenvs.
 Require Import Op.
 Require Import Locations.
 Require Import Mach.
+Require Import Machconcr.
 Require Import Machtyping.
 Require Import PPC.
 Require Import PPCgen.
@@ -448,7 +449,7 @@ Lemma extcall_args_match:
   forall tyl iregl fregl ofs args,
   (forall r, In r iregl -> mreg_type r = Tint) ->
   (forall r, In r fregl -> mreg_type r = Tfloat) ->
-  Mach.extcall_args ms m sp (Conventions.loc_arguments_rec tyl iregl fregl ofs) args ->
+  Machconcr.extcall_args ms m sp (Conventions.loc_arguments_rec tyl iregl fregl ofs) args ->
   PPC.extcall_args rs m tyl (List.map ireg_of iregl) (List.map freg_of fregl) (4 * ofs) args.
 Proof.
   induction tyl; intros.
@@ -478,9 +479,7 @@ Proof.
   eapply sp_val; eauto. 
   change (@nil freg) with (freg_of ## nil). 
   replace (4 * ofs + 8) with (4 * (ofs + 2)) by omega. 
-  rewrite list_map_drop2. 
   apply IHtyl; auto. 
-  intros. apply H0. apply list_drop2_incl. auto. 
   (* register *)
   inversion H2; subst; clear H2. inversion H8; subst; clear H8.
   simpl map. econstructor. eapply freg_val; eauto.
@@ -505,13 +504,13 @@ Ltac ElimOrEq :=
 Lemma extcall_arguments_match:
   forall ms m sp rs sg args,
   agree ms sp rs ->
-  Mach.extcall_arguments ms m sp sg args ->
+  Machconcr.extcall_arguments ms m sp sg args ->
   PPC.extcall_arguments rs m sg args.
 Proof.
-  unfold Mach.extcall_arguments, PPC.extcall_arguments; intros.
+  unfold Machconcr.extcall_arguments, PPC.extcall_arguments; intros.
   change (extcall_args rs m sg.(sig_args)
     (List.map ireg_of Conventions.int_param_regs)
-    (List.map freg_of Conventions.float_param_regs) (4 * 6) args).
+    (List.map freg_of Conventions.float_param_regs) (4 * 14) args).
   eapply extcall_args_match; eauto. 
   intro; simpl; ElimOrEq; reflexivity.  
   intro; simpl; ElimOrEq; reflexivity.  
@@ -533,9 +532,11 @@ Variable fn: code.
 
 Inductive exec_straight: code -> regset -> mem -> 
                          code -> regset -> mem -> Prop :=
-  | exec_straight_refl:
-      forall c rs m,
-      exec_straight c rs m c rs m
+  | exec_straight_one:
+      forall i1 c rs1 m1 rs2 m2,
+      exec_instr ge fn i1 rs1 m1 = OK rs2 m2 ->
+      rs2#PC = Val.add rs1#PC Vone ->
+      exec_straight (i1 :: c) rs1 m1 c rs2 m2
   | exec_straight_step:
       forall i c rs1 m1 rs2 m2 c' rs3 m3,
       exec_instr ge fn i rs1 m1 = OK rs2 m2 ->
@@ -549,18 +550,9 @@ Lemma exec_straight_trans:
   exec_straight c2 rs2 m2 c3 rs3 m3 ->
   exec_straight c1 rs1 m1 c3 rs3 m3.
 Proof.
-  induction 1. auto. 
-  intro. apply exec_straight_step with rs2 m2; auto.
-Qed.
-
-Lemma exec_straight_one:
-  forall i1 c rs1 m1 rs2 m2,
-  exec_instr ge fn i1 rs1 m1 = OK rs2 m2 ->
-  rs2#PC = Val.add rs1#PC Vone ->
-  exec_straight (i1 :: c) rs1 m1 c rs2 m2.
-Proof.
-  intros. apply exec_straight_step with rs2 m2. auto. auto. 
-  apply exec_straight_refl.
+  induction 1; intros.
+  apply exec_straight_step with rs2 m2; auto.
+  apply exec_straight_step with rs2 m2; auto.
 Qed.
 
 Lemma exec_straight_two:
@@ -1182,7 +1174,7 @@ Lemma transl_cond_correct:
   forall cond args k ms sp rs m b,
   map mreg_type args = type_of_condition cond ->
   agree ms sp rs ->
-  eval_condition cond (map ms args) = Some b ->
+  eval_condition cond (map ms args) m = Some b ->
   exists rs',
      exec_straight (transl_cond cond args k) rs m k rs' m
   /\ rs'#(reg_of_crbit (fst (crbit_for_cond cond))) = 
@@ -1191,7 +1183,7 @@ Lemma transl_cond_correct:
         else Val.notbool (Val.of_bool b))
   /\ agree ms sp rs'.
 Proof.
-  intros. rewrite <- (eval_condition_weaken _ _ H1). 
+  intros. rewrite <- (eval_condition_weaken _ _ _ H1). 
   apply transl_cond_correct_aux; auto.
 Qed.
 
@@ -1221,12 +1213,12 @@ Lemma transl_op_correct:
   forall op args res k ms sp rs m v,
   wt_instr (Mop op args res) ->
   agree ms sp rs ->
-  eval_operation ge sp op (map ms args) = Some v ->
+  eval_operation ge sp op (map ms args) m = Some v ->
   exists rs',
      exec_straight (transl_op op args res k) rs m k rs' m
   /\ agree (Regmap.set res v ms) sp rs'.
 Proof.
-  intros. rewrite <- (eval_operation_weaken _ _ _ _ H1). clear H1; clear v.
+  intros. rewrite <- (eval_operation_weaken _ _ _ _ _ H1). clear H1; clear v.
   inversion H.
   (* Omove *)
   simpl. exists (nextinstr (rs#(preg_of res) <- (ms r1))).
@@ -1238,19 +1230,9 @@ Proof.
   simpl. unfold preg_of. rewrite <- H2. rewrite H5. reflexivity.
   auto with ppcgen.
   auto with ppcgen.
-  (* Oundef *)
-  simpl. exists (nextinstr (rs#(preg_of res) <- Vundef)).
-  split. caseEq (mreg_type res); intro.
-  apply exec_straight_one. simpl.   
-  unfold preg_of; rewrite H1. reflexivity.
-  auto with ppcgen.
-  apply exec_straight_one. simpl.   
-  unfold preg_of; rewrite H1. reflexivity.
-  auto with ppcgen.
-  auto with ppcgen.
   (* Other instructions *)
-  clear H1; clear H2; clear H3.
-  destruct op; simpl in H6; injection H6; clear H6; intros;
+  clear H1; clear H2; clear H4.
+  destruct op; simpl in H5; injection H5; clear H5; intros;
   TypeInv; simpl; try (TranslOpSimpl).
   (* Omove again *)
   congruence.
@@ -1283,8 +1265,6 @@ Proof.
   exists rs'. split. auto. 
   apply agree_set_mireg_exten with rs; auto.
   rewrite (sp_val ms sp rs). auto. auto. 
-  (* Oundef again *)
-  congruence.
   (* Ocast8unsigned *)
   exists (nextinstr (rs#(ireg_of res) <- (Val.rolm (ms m0) Int.zero (Int.repr 255)))).
   split. apply exec_straight_one. repeat (rewrite (ireg_val ms sp rs)); auto. reflexivity.
diff --git a/backend/PPCgenretaddr.v b/backend/PPCgenretaddr.v
new file mode 100644
index 00000000..f2802ecd
--- /dev/null
+++ b/backend/PPCgenretaddr.v
@@ -0,0 +1,176 @@
+(** Predictor for return addresses in generated PPC code.
+
+    The [return_address_offset] predicate defined here is used in the
+    concrete semantics for Mach (module [Machconcr]) to determine the
+    return addresses that are stored in activation records. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Mem.
+Require Import Globalenvs.
+Require Import Op.
+Require Import Locations.
+Require Import Mach.
+Require Import PPC.
+Require Import PPCgen.
+
+(** The ``code tail'' of an instruction list [c] is the list of instructions
+  starting at PC [pos]. *)
+
+Inductive code_tail: Z -> code -> code -> Prop :=
+  | code_tail_0: forall c,
+      code_tail 0 c c
+  | code_tail_S: forall pos i c1 c2,
+      code_tail pos c1 c2 ->
+      code_tail (pos + 1) (i :: c1) c2.
+
+Lemma code_tail_pos:
+  forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0.
+Proof.
+  induction 1. omega. omega.
+Qed.
+
+(** Consider a Mach function [f] and a sequence [c] of Mach instructions
+  representing the Mach code that remains to be executed after a
+  function call returns.  The predicate [return_address_offset f c ofs]
+  holds if [ofs] is the integer offset of the PPC instruction
+  following the call in the PPC code obtained by translating the
+  code of [f]. Graphically:
+<<
+     Mach function f    |--------- Mcall ---------|
+         Mach code c    |                |--------|
+                        |                 \        \
+                        |                  \        \
+                        |                   \        \
+     PPC code           |                    |--------|
+     PPC function       |--------------- Pbl ---------|
+
+                        <-------- ofs ------->
+>>
+*)
+
+Inductive return_address_offset: Mach.function -> Mach.code -> int -> Prop :=
+  | return_address_offset_intro:
+      forall c f ofs,
+      code_tail ofs (transl_function f) (transl_code c) ->
+      return_address_offset f c (Int.repr ofs).
+
+(** We now show that such an offset always exists if the Mach code [c]
+  is a suffix of [f.(fn_code)].  This holds because the translation
+  from Mach to PPC is compositional: each Mach instruction becomes
+  zero, one or several PPC instructions, but the order of instructions
+  is preserved. *)
+
+Lemma is_tail_code_tail:
+  forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1.
+Proof.
+  induction 1. exists 0; constructor.
+  destruct IHis_tail as [ofs CT]. exists (ofs + 1); constructor; auto.
+Qed.
+
+Hint Resolve is_tail_refl: ppcretaddr.
+
+Ltac IsTail :=
+  auto with ppcretaddr;
+  match goal with
+  | [ |- is_tail _ (_ :: _) ] => constructor; IsTail
+  | [ |- is_tail _ (match ?x with true => _ | false => _ end) ] => destruct x; IsTail
+  | [ |- is_tail _ (match ?x with left _ => _ | right _ => _ end) ] => destruct x; IsTail
+  | [ |- is_tail _ (match ?x with nil => _ | _ :: _ => _ end) ] => destruct x; IsTail
+  | [ |- is_tail _ (match ?x with Tint => _ | Tfloat => _ end) ] => destruct x; IsTail
+  | [ |- is_tail _ (?f _ _ _ _ _ _ ?k) ] => apply is_tail_trans with k; IsTail
+  | [ |- is_tail _ (?f _ _ _ _ _ ?k) ] => apply is_tail_trans with k; IsTail
+  | [ |- is_tail _ (?f _ _ _ _ ?k) ] => apply is_tail_trans with k; IsTail
+  | [ |- is_tail _ (?f _ _ _ ?k) ] => apply is_tail_trans with k; IsTail
+  | [ |- is_tail _ (?f _ _ ?k) ] => apply is_tail_trans with k; IsTail
+  | _ => idtac
+  end.
+
+Lemma loadimm_tail:
+  forall r n k, is_tail k (loadimm r n k).
+Proof. unfold loadimm; intros; IsTail. Qed.
+Hint Resolve loadimm_tail: ppcretaddr.
+
+Lemma addimm_tail:
+  forall r1 r2 n k, is_tail k (addimm r1 r2 n k).
+Proof. unfold addimm, addimm_1, addimm_2; intros; IsTail. Qed.
+Hint Resolve addimm_tail: ppcretaddr.
+
+Lemma andimm_tail:
+  forall r1 r2 n k, is_tail k (andimm r1 r2 n k).
+Proof. unfold andimm; intros; IsTail. Qed.
+Hint Resolve andimm_tail: ppcretaddr.
+
+Lemma orimm_tail:
+  forall r1 r2 n k, is_tail k (orimm r1 r2 n k).
+Proof. unfold orimm; intros; IsTail. Qed.
+Hint Resolve orimm_tail: ppcretaddr.
+
+Lemma xorimm_tail:
+  forall r1 r2 n k, is_tail k (xorimm r1 r2 n k).
+Proof. unfold xorimm; intros; IsTail. Qed.
+Hint Resolve xorimm_tail: ppcretaddr.
+
+Lemma loadind_tail:
+  forall base ofs ty dst k, is_tail k (loadind base ofs ty dst k).
+Proof. unfold loadind; intros; IsTail. Qed.
+Hint Resolve loadind_tail: ppcretaddr.
+
+Lemma storeind_tail:
+  forall src base ofs ty k, is_tail k (storeind src base ofs ty k).
+Proof. unfold storeind; intros; IsTail. Qed.
+Hint Resolve storeind_tail: ppcretaddr.
+
+Lemma floatcomp_tail:
+  forall cmp r1 r2 k, is_tail k (floatcomp cmp r1 r2 k).
+Proof. unfold floatcomp; intros; destruct cmp; IsTail. Qed.
+Hint Resolve floatcomp_tail: ppcretaddr.
+
+Lemma transl_cond_tail:
+  forall cond args k, is_tail k (transl_cond cond args k).
+Proof. unfold transl_cond; intros; destruct cond; IsTail. Qed.
+Hint Resolve transl_cond_tail: ppcretaddr.
+
+Lemma transl_op_tail:
+  forall op args r k, is_tail k (transl_op op args r k).
+Proof. unfold transl_op; intros; destruct op; IsTail. Qed.
+Hint Resolve transl_op_tail: ppcretaddr.
+
+Lemma transl_load_store_tail:
+  forall mk1 mk2 addr args k,
+  is_tail k (transl_load_store mk1 mk2 addr args k).
+Proof. unfold transl_load_store; intros; destruct addr; IsTail. Qed.
+Hint Resolve transl_load_store_tail: ppcretaddr.
+
+Lemma transl_instr_tail:
+  forall i k, is_tail k (transl_instr i k).
+Proof.
+  unfold transl_instr; intros; destruct i; IsTail.
+  destruct m; IsTail.
+  destruct m; IsTail.
+  destruct s0; IsTail.
+  destruct s0; IsTail.
+Qed.
+Hint Resolve transl_instr_tail: ppcretaddr.
+
+Lemma transl_code_tail: 
+  forall c1 c2, is_tail c1 c2 -> is_tail (transl_code c1) (transl_code c2).
+Proof.
+  induction 1; simpl. constructor. eapply is_tail_trans; eauto with ppcretaddr.
+Qed.
+
+Lemma return_address_exists:
+  forall f c, is_tail c f.(fn_code) ->
+  exists ra, return_address_offset f c ra.
+Proof.
+  intros. assert (is_tail (transl_code c) (transl_function f)).
+  unfold transl_function. IsTail. apply transl_code_tail; auto.
+  destruct (is_tail_code_tail _ _ H0) as [ofs A].
+  exists (Int.repr ofs). constructor. auto.
+Qed.
+
+ 
diff --git a/backend/Parallelmove.v b/backend/Parallelmove.v
index b2ec930b..3d77b57a 100644
--- a/backend/Parallelmove.v
+++ b/backend/Parallelmove.v
@@ -1,3 +1,14 @@
+(** Translation of parallel moves into sequences of individual moves.
+
+  In this file, we adapt the generic "parallel move" algorithm
+  (developed and proved correct in module [Parmov]) to the idiosyncraties
+  of the [LTLin] and [Linear] intermediate languages.  While the generic
+  algorithm assumes that registers never overlap, the locations
+  used in [LTLin] and [Linear] can overlap, and assigning one location
+  can set the values of other, overlapping locations to [Vundef].
+  We address this issue in the remainder of this file.
+*)
+
 Require Import Coqlib.
 Require Parmov.
 Require Import Values.
@@ -6,16 +17,28 @@ Require Import AST.
 Require Import Locations.
 Require Import Conventions.
 
+(** * Instantiating the generic parallel move algorithm *)
+
+(** The temporary location to use for a move is determined
+  by the type of the data being moved: register [IT2] for an
+  integer datum, and register [FT2] for a floating-point datum. *)
+
 Definition temp_for (l: loc) : loc :=
   match Loc.type l with Tint => R IT2 | Tfloat => R FT2 end.
 
 Definition parmove (srcs dsts: list loc) :=
-  Parmov.parmove2 loc temp_for Loc.eq srcs dsts.
+  Parmov.parmove2 loc Loc.eq temp_for srcs dsts.
 
 Definition moves := (list (loc * loc))%type.
 
+(** [exec_seq m] gives semantics to a sequence of elementary moves.
+  This semantics ignores the possibility of overlap: only the
+  target locations are updated, but the locations they
+  overlap with are not set to [Vundef].  See [effect_seqmove] below
+  for a semantics that accounts for overlaps. *)
+
 Definition exec_seq (m: moves) (e: Locmap.t) : Locmap.t :=
-  Parmov.exec_seq loc val Loc.eq m e.
+  Parmov.exec_seq loc Loc.eq val m e.
   
 Lemma temp_for_charact:
   forall l, temp_for l = R IT2 \/ temp_for l = R FT2.
@@ -52,6 +75,12 @@ Proof.
   apply Loc.notin_not_in; auto. auto.
 Qed.
 
+(** Instantiating the theorems proved in [Parmov], we obtain
+  the following properties of semantic correctness and well-typedness
+  of the generated sequence of moves.  Note that the semantic
+  correctness result is stated in terms of the [exec_seq] semantics,
+  and therefore does not account for overlap between locations. *)
+
 Lemma parmove_prop_1:
   forall srcs dsts,
   List.length srcs = List.length dsts ->
@@ -69,8 +98,8 @@ Proof.
     intros. apply disjoint_temp_not_temp. apply Loc.disjoint_notin with srcs; auto.
   assert (NTD: forall r, In r dsts -> Parmov.is_not_temp loc temp_for r).
     intros. apply disjoint_temp_not_temp. apply Loc.disjoint_notin with dsts; auto.
-  generalize (Parmov.parmove2_correctness loc temp_for val Loc.eq srcs dsts H NR NTS NTD e).
-  change (Parmov.exec_seq loc val Loc.eq (Parmov.parmove2 loc temp_for Loc.eq srcs dsts) e) with e'.
+  generalize (Parmov.parmove2_correctness loc Loc.eq temp_for val srcs dsts H NR NTS NTD e).
+  change (Parmov.exec_seq loc Loc.eq val (Parmov.parmove2 loc Loc.eq temp_for srcs dsts) e) with e'.
   intros [A B].
   split. auto. intros. apply B. auto. rewrite is_not_temp_charact; auto.
 Qed.
@@ -97,7 +126,7 @@ Proof.
     tauto.
     right. apply temp_for_charact.
   intros. apply H. 
-  apply (Parmov.parmove2_wf_moves loc temp_for Loc.eq srcs dsts s d H0). 
+  apply (Parmov.parmove2_wf_moves loc Loc.eq temp_for srcs dsts s d H0). 
 Qed.
 
 Lemma loc_type_temp_for:
@@ -134,11 +163,21 @@ Proof.
   rewrite loc_type_temp_for; auto.
   rewrite loc_type_temp_for; auto.
   intros. apply H. 
-  apply (Parmov.parmove2_wf_moves loc temp_for Loc.eq srcs dsts s d H0). 
+  apply (Parmov.parmove2_wf_moves loc Loc.eq temp_for srcs dsts s d H0). 
 Qed.
 
+(** * Accounting for overlap between locations *)
+
 Section EQUIVALENCE.
 
+(** We now prove the correctness of the generated sequence of elementary
+  moves, accounting for possible overlap between locations.
+  The proof is conducted under the following hypotheses: there must
+  be no partial overlap between
+- two distinct destinations (hypothesis [NOREPET]);
+- a source location and a destination location (hypothesis [NO_OVERLAP]).
+*)
+
 Variables srcs dsts: list loc.
 Hypothesis LENGTH: List.length srcs = List.length dsts.
 Hypothesis NOREPET: Loc.norepet dsts.
@@ -146,9 +185,16 @@ Hypothesis NO_OVERLAP: Loc.no_overlap srcs dsts.
 Hypothesis NO_SRCS_TEMP: Loc.disjoint srcs temporaries.
 Hypothesis NO_DSTS_TEMP: Loc.disjoint dsts temporaries.
 
+(** [no_overlap_dests l] holds if location [l] does not partially overlap
+  a destination location: either it is identical to one of the
+  destinations, or it is disjoint from all destinations. *)
+
 Definition no_overlap_dests (l: loc) : Prop :=
   forall d, In d dsts -> l = d \/ Loc.diff l d.
 
+(** We show that [no_overlap_dests] holds for any destination location
+  and for any source location. *)
+
 Lemma dests_no_overlap_dests:
   forall l, In l dsts -> no_overlap_dests l.
 Proof.
@@ -201,10 +247,19 @@ Proof.
   elim H2; intro; subst d; auto.
 Qed.
 
+(** [locmap_equiv e1 e2] holds if the location maps [e1] and [e2]
+  assign the same values to all locations except temporaries [IT1], [FT1]
+  and except locations that partially overlap a destination. *)
+
 Definition locmap_equiv (e1 e2: Locmap.t): Prop :=
   forall l,
   no_overlap_dests l -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> e2 l = e1 l.
 
+(** The following predicates characterize the effect of one move
+  move ([effect_move]) and of a sequence of elementary moves
+  ([effect_seqmove]).  We allow the code generated for one move
+  to use the temporaries [IT1] and [FT1] in any way it needs. *)
+
 Definition effect_move (src dst: loc) (e e': Locmap.t): Prop :=
   e' dst = e src /\
   forall l, Loc.diff l dst -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> e' l = e l.
@@ -217,12 +272,18 @@ Inductive effect_seqmove: list (loc * loc) -> Locmap.t -> Locmap.t -> Prop :=
       effect_seqmove m e2 e3 ->
       effect_seqmove ((s, d) :: m) e1 e3.
 
+(** The following crucial lemma shows that [locmap_equiv] is preserved
+  by executing one move [d <- s], once using the [effect_move]
+  predicate that accounts for partial overlap and the use of
+  temporaries [IT1], [FT1], or via the [Parmov.update] function that
+  does not account for any of these. *)
+
 Lemma effect_move_equiv:
   forall s d e1 e2 e1',
   (In s srcs \/ s = R IT2 \/ s = R FT2) ->
   (In d dsts \/ d = R IT2 \/ d = R FT2) ->
   locmap_equiv e1 e2 -> effect_move s d e1 e1' ->
-  locmap_equiv e1' (Parmov.update loc val Loc.eq d (e2 s) e2).
+  locmap_equiv e1' (Parmov.update loc Loc.eq val d (e2 s) e2).
 Proof.
   intros. destruct H2. red; intros. 
   unfold Parmov.update. destruct (Loc.eq l d). 
@@ -231,6 +292,9 @@ Proof.
   rewrite H3; auto. apply dest_noteq_diff; auto. 
 Qed.
 
+(** We then extend the previous lemma to a sequence [mu] of elementary moves.
+*)
+
 Lemma effect_seqmove_equiv:
   forall mu e1 e1',
   effect_seqmove mu e1 e1' ->
@@ -249,6 +313,13 @@ Proof.
   eapply effect_move_equiv; eauto. 
 Qed.
 
+(** Here is the main result in this file: executing the sequence
+  of moves returned by the [parmove] function results in the
+  desired state for locations: the final values of destination locations
+  are the initial values of source locations, and all locations
+  that are disjoint from the temporaries and the destinations
+  keep their initial values. *)
+
 Lemma effect_parmove:
   forall e e',
   effect_seqmove (parmove srcs dsts) e e' ->
diff --git a/backend/RTL.v b/backend/RTL.v
index 8b46a7db..74719977 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -1,10 +1,9 @@
 (** The RTL intermediate language: abstract syntax and semantics.
 
-  RTL (``Register Transfer Language'' is the first intermediate language
-  after Cminor.
+  RTL stands for "Register Transfer Language". This is the first
+  intermediate language after Cminor and CminorSel.
 *)
 
-(*Require Import Relations.*)
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
@@ -13,6 +12,7 @@ Require Import Values.
 Require Import Events.
 Require Import Mem.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Registers.
 
@@ -55,6 +55,7 @@ Inductive instruction: Set :=
           function name), giving it the values of registers [args] 
           as arguments.  It stores the return value in [dest] and branches
           to [succ]. *)
+  | Itailcall: signature -> reg + ident -> list reg -> instruction
   | Ialloc: reg -> reg -> node -> instruction
       (** [Ialloc arg dest succ] allocates a fresh block of size
           the contents of register [arg], stores a pointer to this
@@ -111,6 +112,52 @@ Fixpoint init_regs (vl: list val) (rl: list reg) {struct rl} : regset :=
   | _, _ => Regmap.init Vundef
   end.
 
+Inductive stackframe : Set :=
+  | Stackframe:
+      forall (res: reg) (c: code) (sp: val) (pc: node) (rs: regset), 
+      stackframe.
+
+Inductive state : Set :=
+  | State:
+      forall (stack: list stackframe) (c: code) (sp: val) (pc: node)
+             (rs: regset) (m: mem), state
+  | Callstate:
+      forall (stack: list stackframe) (f: fundef) (args: list val) (m: mem),
+      state
+  | Returnstate:
+      forall (stack: list stackframe) (v: val) (m: mem),
+      state.
+
+(** The dynamic semantics of RTL is given in small-step style, as a 
+  set of transitions between states.  A state captures the current
+  point in the execution.  Three kinds of states appear in the transitions:
+
+- [State cs c sp pc rs m] describes an execution point within a function.
+  [c] is the code for the current function (a CFG).
+  [sp] is the pointer to the stack block for its current activation
+     (as in Cminor).
+  [pc] is the current program point (CFG node) within the code [c].
+  [rs] gives the current values for the pseudo-registers.
+  [m] is the current memory state.
+- [Callstate cs f args m] is an intermediate state that appears during
+  function calls.
+  [f] is the function definition that we are calling.
+  [args] (a list of values) are the arguments for this call.
+  [m] is the current memory state.
+- [Returnstate cs v m] is an intermediate state that appears when a
+  function terminates and returns to its caller.
+  [v] is the return value and [m] the current memory state.
+
+In all three kinds of states, the [cs] parameter represents the call stack.
+It is a list of frames [Stackframe res c sp pc rs].  Each frame represents
+a function call in progress.  
+[res] is the pseudo-register that will receive the result of the call.
+[c] is the code of the calling function.
+[sp] is its stack pointer.
+[pc] is the program point for the instruction that follows the call.
+[rs] is the state of registers in the calling function.
+*)
+
 Section RELSEM.
 
 Variable ge: genv.
@@ -126,194 +173,142 @@ Definition find_function
       end
   end.
 
-(** The dynamic semantics of RTL is a combination of small-step (transition)
-  semantics and big-step semantics.  Execution of an instruction performs
-  a single transition to the next instruction (small-step), except if
-  the instruction is a function call.  In this case, the whole body of
-  the called function is executed at once and the transition terminates
-  on the instruction immediately following the call in the caller function.
-  Such ``mixed-step'' semantics is convenient for reasoning over
-  intra-procedural analyses and transformations.  It also dispenses us
-  from making the call stack explicit in the semantics.
-
-  The semantics is organized in three mutually inductive predicates.
-  The first is [exec_instr ge c sp pc rs m pc' rs' m'].  [ge] is the
-  global environment (see module [Genv]), [c] the CFG for the current
-  function, and [sp] the pointer to the stack block for its
-  current activation (as in Cminor).  [pc], [rs] and [m] is the 
-  initial state of the transition: program point (CFG node) [pc],
-  register state (mapping of pseudo-registers to values) [rs],
-  and memory state [m].  The final state is [pc'], [rs'] and [m']. *)
-
-Inductive exec_instr: code -> val ->
-                      node -> regset -> mem -> trace ->
-                      node -> regset -> mem -> Prop :=
+(** The transitions are presented as an inductive predicate
+  [step ge st1 t st2], where [ge] is the global environment,
+  [st1] the initial state, [st2] the final state, and [t] the trace
+  of system calls performed during this transition. *)
+
+Inductive step: state -> trace -> state -> Prop :=
   | exec_Inop:
-      forall c sp pc rs m pc',
+      forall s c sp pc rs m pc',
       c!pc = Some(Inop pc') ->
-      exec_instr c sp  pc rs m  E0 pc' rs m
+      step (State s c sp pc rs m)
+        E0 (State s c sp pc' rs m)
   | exec_Iop:
-      forall c sp pc rs m op args res pc' v,
+      forall s c sp pc rs m op args res pc' v,
       c!pc = Some(Iop op args res pc') ->
-      eval_operation ge sp op rs##args = Some v ->
-      exec_instr c sp  pc rs m  E0 pc' (rs#res <- v) m
+      eval_operation ge sp op rs##args m = Some v ->
+      step (State s c sp pc rs m)
+        E0 (State s c sp pc' (rs#res <- v) m)
   | exec_Iload:
-      forall c sp pc rs m chunk addr args dst pc' a v,
+      forall s c sp pc rs m chunk addr args dst pc' a v,
       c!pc = Some(Iload chunk addr args dst pc') ->
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
-      exec_instr c sp  pc rs m  E0 pc' (rs#dst <- v) m
+      step (State s c sp pc rs m)
+        E0 (State s c sp pc' (rs#dst <- v) m)
   | exec_Istore:
-      forall c sp pc rs m chunk addr args src pc' a m',
+      forall s c sp pc rs m chunk addr args src pc' a m',
       c!pc = Some(Istore chunk addr args src pc') ->
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.storev chunk m a rs#src = Some m' ->
-      exec_instr c sp  pc rs m  E0 pc' rs m'
+      step (State s c sp pc rs m)
+        E0 (State s c sp pc' rs m')
   | exec_Icall:
-      forall c sp pc rs m sig ros args res pc' f vres m' t,
+      forall s c sp pc rs m sig ros args res pc' f,
       c!pc = Some(Icall sig ros args res pc') ->
       find_function ros rs = Some f ->
       funsig f = sig ->
-      exec_function f rs##args m t vres m' ->
-      exec_instr c sp  pc rs m  t pc' (rs#res <- vres) m'
+      step (State s c sp pc rs m)
+        E0 (Callstate (Stackframe res c sp pc' rs :: s) f rs##args m)
+  | exec_Itailcall:
+      forall s c stk pc rs m sig ros args f,
+      c!pc = Some(Itailcall sig ros args) ->
+      find_function ros rs = Some f ->
+      funsig f = sig ->
+      step (State s c (Vptr stk Int.zero) pc rs m)
+        E0 (Callstate s f rs##args (Mem.free m stk))
   | exec_Ialloc:
-      forall c sp pc rs m pc' arg res sz m' b,
+      forall s c sp pc rs m pc' arg res sz m' b,
       c!pc = Some(Ialloc arg res pc') ->
       rs#arg = Vint sz ->
       Mem.alloc m 0 (Int.signed sz) = (m', b) ->
-      exec_instr c sp  pc rs m E0 pc' (rs#res <- (Vptr b Int.zero)) m'
+      step (State s c sp pc rs m)
+        E0 (State s c sp pc' (rs#res <- (Vptr b Int.zero)) m')
   | exec_Icond_true:
-      forall c sp pc rs m cond args ifso ifnot,
+      forall s c sp pc rs m cond args ifso ifnot,
       c!pc = Some(Icond cond args ifso ifnot) ->
-      eval_condition cond rs##args = Some true ->
-      exec_instr c sp  pc rs m  E0 ifso rs m
+      eval_condition cond rs##args m = Some true ->
+      step (State s c sp pc rs m)
+        E0 (State s c sp ifso rs m)
   | exec_Icond_false:
-      forall c sp pc rs m cond args ifso ifnot,
+      forall s c sp pc rs m cond args ifso ifnot,
       c!pc = Some(Icond cond args ifso ifnot) ->
-      eval_condition cond rs##args = Some false ->
-      exec_instr c sp  pc rs m  E0 ifnot rs m
-
-(** [exec_instrs ge c sp pc rs m pc' rs' m'] is the reflexive
-  transitive closure of [exec_instr].  It corresponds to the execution
-  of zero, one or finitely many transitions. *)
-
-with exec_instrs: code -> val ->
-                  node -> regset -> mem -> trace ->
-                  node -> regset -> mem -> Prop :=
-  | exec_refl:
-      forall c sp pc rs m,
-      exec_instrs c sp pc rs m E0 pc rs m
-  | exec_one:
-      forall c sp pc rs m t pc' rs' m',
-      exec_instr c sp pc rs m t pc' rs' m' ->
-      exec_instrs c sp pc rs m t pc' rs' m'
-  | exec_trans:
-      forall c sp pc1 rs1 m1 t1 pc2 rs2 m2 t2 pc3 rs3 m3 t3,
-      exec_instrs c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
-      exec_instrs c sp pc2 rs2 m2 t2 pc3 rs3 m3 ->
-      t3 = t1 ** t2 ->
-      exec_instrs c sp pc1 rs1 m1 t3 pc3 rs3 m3
-
-(** [exec_function ge f args m res m'] executes a function application.
-  [f] is the called function, [args] the values of its arguments,
-  and [m] the memory state at the beginning of the call.  [res] is
-  the returned value: the value of [r] if the function terminates with 
-  a [Ireturn (Some r)], or [Vundef] if it terminates with [Ireturn None].
-  Evaluation proceeds by executing transitions from the function's entry
-  point to the first [Ireturn] instruction encountered.  It is preceeded
-  by the allocation of the stack activation block and the binding
-  of register parameters to the provided arguments.
-  (Non-parameter registers are initialized to [Vundef].)  Before returning,
-  the stack activation block is freed. *)
-
-with exec_function: fundef -> list val -> mem -> trace ->
-                              val -> mem -> Prop :=
-  | exec_funct_internal:
-      forall f m m1 stk args t pc rs m2 or vres,
-      Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) ->
-      exec_instrs f.(fn_code) (Vptr stk Int.zero) 
-                  f.(fn_entrypoint) (init_regs args f.(fn_params)) m1
-                  t pc rs m2 ->
-      f.(fn_code)!pc = Some(Ireturn or) ->
-      vres = regmap_optget or Vundef rs ->
-      exec_function (Internal f) args m t vres (Mem.free m2 stk)
-  | exec_funct_external:
-      forall ef args m t res,
+      eval_condition cond rs##args m = Some false ->
+      step (State s c sp pc rs m)
+        E0 (State s c sp ifnot rs m)
+  | exec_Ireturn:
+      forall s c stk pc rs m or,
+      c!pc = Some(Ireturn or) ->
+      step (State s c (Vptr stk Int.zero) pc rs m)
+        E0 (Returnstate s (regmap_optget or Vundef rs) (Mem.free m stk))
+  | exec_function_internal:
+      forall s f args m m' stk,
+      Mem.alloc m 0 f.(fn_stacksize) = (m', stk) ->
+      step (Callstate s (Internal f) args m)
+        E0 (State s
+                  f.(fn_code)
+                  (Vptr stk Int.zero)
+                  f.(fn_entrypoint)
+                  (init_regs args f.(fn_params))
+                  m')
+  | exec_function_external:
+      forall s ef args res t m,
       event_match ef args t res ->
-      exec_function (External ef) args m t res m.
-
-Scheme exec_instr_ind_3 := Minimality for exec_instr Sort Prop
-  with exec_instrs_ind_3 := Minimality for exec_instrs Sort Prop
-  with exec_function_ind_3 := Minimality for exec_function Sort Prop.
-
-(** Some derived execution rules. *)
-
-Lemma exec_step:
-  forall c sp pc1 rs1 m1 t1 pc2 rs2 m2 t2 pc3 rs3 m3 t3,
-  exec_instr c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
-  exec_instrs c sp pc2 rs2 m2 t2 pc3 rs3 m3 ->
-  t3 = t1 ** t2 ->
-  exec_instrs c sp pc1 rs1 m1 t3 pc3 rs3 m3.
-Proof.
-  intros. eapply exec_trans. apply exec_one. eauto. eauto. auto.
-Qed.
+      step (Callstate s (External ef) args m)
+         t (Returnstate s res m)
+  | exec_return:
+      forall res c sp pc rs s vres m,
+      step (Returnstate (Stackframe res c sp pc rs :: s) vres m)
+        E0 (State s c sp pc (rs#res <- vres) m).
 
 Lemma exec_Iop':
-  forall c sp pc rs m op args res pc' rs' v,
+  forall s c sp pc rs m op args res pc' rs' v,
   c!pc = Some(Iop op args res pc') ->
-  eval_operation ge sp op rs##args = Some v ->
+  eval_operation ge sp op rs##args m = Some v ->
   rs' = (rs#res <- v) ->
-  exec_instr c sp  pc rs m  E0 pc' rs' m.
+  step (State s c sp pc rs m)
+    E0 (State s c sp pc' rs' m).
 Proof.
   intros. subst rs'. eapply exec_Iop; eauto.
 Qed.
 
 Lemma exec_Iload':
-  forall c sp pc rs m chunk addr args dst pc' rs' a v,
+  forall s c sp pc rs m chunk addr args dst pc' rs' a v,
   c!pc = Some(Iload chunk addr args dst pc') ->
   eval_addressing ge sp addr rs##args = Some a ->
   Mem.loadv chunk m a = Some v ->
   rs' = (rs#dst <- v) ->
-  exec_instr c sp  pc rs m E0 pc' rs' m.
+  step (State s c sp pc rs m)
+    E0 (State s c sp pc' rs' m).
 Proof.
   intros. subst rs'. eapply exec_Iload; eauto.
 Qed.
 
-(** If a transition can take place from [pc], the instruction at [pc]
-  is defined in the CFG. *)
+End RELSEM.
 
-Lemma exec_instr_present:
-  forall c sp pc rs m t pc' rs' m',
-  exec_instr c sp  pc rs m  t pc' rs' m' ->
-  c!pc <> None.
-Proof.
-  induction 1; congruence.
-Qed.
+(** Execution of whole programs are described as sequences of transitions
+  from an initial state to a final state.  An initial state is a [Callstate]
+  corresponding to the invocation of the ``main'' function of the program
+  without arguments and with an empty call stack. *)
 
-Lemma exec_instrs_present:
-  forall c sp pc rs m t pc' rs' m',
-  exec_instrs c sp  pc rs m  t pc' rs' m' ->
-  c!pc' <> None -> c!pc <> None.
-Proof.
-  induction 1; intros.
-  auto.
-  eapply exec_instr_present; eauto.
-  eauto.
-Qed.
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall b f,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      funsig f = mksignature nil (Some Tint) ->
+      initial_state p (Callstate nil f nil m0).
 
-End RELSEM.
+(** A final state is a [Returnstate] with an empty call stack. *)
 
-(** Execution of whole programs.  As in Cminor, we call the ``main'' function
-  with no arguments and observe its return value. *)
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall r m,
+      final_state (Returnstate nil (Vint r) m) r.
 
-Definition exec_program (p: program) (t: trace) (r: val) : Prop :=
-  let ge := Genv.globalenv p in
-  let m0 := Genv.init_mem p in
-  exists b, exists f, exists m,
-  Genv.find_symbol ge p.(prog_main) = Some b /\
-  Genv.find_funct_ptr ge b = Some f /\
-  funsig f = mksignature nil (Some Tint) /\
-  exec_function ge f nil m0 t r m.
+Definition exec_program (p: program) (beh: program_behavior) : Prop :=
+  program_behaves step (initial_state p) final_state (Genv.globalenv p) beh.
 
 (** * Operations on RTL abstract syntax *)
 
@@ -330,21 +325,13 @@ Definition successors (f: function) (pc: node) : list node :=
       | Iload chunk addr args dst s => s :: nil
       | Istore chunk addr args src s => s :: nil
       | Icall sig ros args res s => s :: nil
+      | Itailcall sig ros args => nil
       | Ialloc arg res s => s :: nil
       | Icond cond args ifso ifnot => ifso :: ifnot :: nil
       | Ireturn optarg => nil
       end
   end.
 
-Lemma successors_correct:
-  forall ge f sp pc rs m t pc' rs' m',
-  exec_instr ge f.(fn_code) sp pc rs m t pc' rs' m' ->
-  In pc' (successors f pc).
-Proof.
-  intros ge f. unfold successors. generalize (fn_code f).
-  induction 1; rewrite H; simpl; tauto.
-Qed.
-
 (** Transformation of a RTL function instruction by instruction.
   This applies a given transformation function to all instructions
   of a function and constructs a transformed function from that. *)
diff --git a/backend/RTLbigstep.v b/backend/RTLbigstep.v
new file mode 100644
index 00000000..0ad6e68a
--- /dev/null
+++ b/backend/RTLbigstep.v
@@ -0,0 +1,400 @@
+(** An alternate, mixed-step semantics for the RTL intermediate language. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Events.
+Require Import Mem.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Op.
+Require Import Registers.
+Require Import RTL.
+
+Section BIGSTEP.
+
+Variable ge: genv.
+
+(** The dynamic semantics of RTL is a combination of small-step (transition)
+  semantics and big-step semantics.  Execution of an instruction performs
+  a single transition to the next instruction (small-step), except if
+  the instruction is a function call.  In this case, the whole body of
+  the called function is executed at once and the transition terminates
+  on the instruction immediately following the call in the caller function.
+  Such ``mixed-step'' semantics is convenient for reasoning over
+  intra-procedural analyses and transformations.  It also dispenses us
+  from making the call stack explicit in the semantics.
+
+  The semantics is organized in three mutually inductive predicates.
+  The first is [exec_instr ge c sp pc rs m pc' rs' m'].  [ge] is the
+  global environment (see module [Genv]), [c] the CFG for the current
+  function, and [sp] the pointer to the stack block for its
+  current activation (as in Cminor).  [pc], [rs] and [m] is the 
+  initial state of the transition: program point (CFG node) [pc],
+  register state (mapping of pseudo-registers to values) [rs],
+  and memory state [m].  The final state is [pc'], [rs'] and [m']. *)
+
+Inductive exec_instr: code -> val ->
+                      node -> regset -> mem -> trace ->
+                      node -> regset -> mem -> Prop :=
+  | exec_Inop:
+      forall c sp pc rs m pc',
+      c!pc = Some(Inop pc') ->
+      exec_instr c sp  pc rs m  E0 pc' rs m
+  | exec_Iop:
+      forall c sp pc rs m op args res pc' v,
+      c!pc = Some(Iop op args res pc') ->
+      eval_operation ge sp op rs##args m = Some v ->
+      exec_instr c sp  pc rs m  E0 pc' (rs#res <- v) m
+  | exec_Iload:
+      forall c sp pc rs m chunk addr args dst pc' a v,
+      c!pc = Some(Iload chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = Some v ->
+      exec_instr c sp  pc rs m  E0 pc' (rs#dst <- v) m
+  | exec_Istore:
+      forall c sp pc rs m chunk addr args src pc' a m',
+      c!pc = Some(Istore chunk addr args src pc') ->
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.storev chunk m a rs#src = Some m' ->
+      exec_instr c sp  pc rs m  E0 pc' rs m'
+  | exec_Icall:
+      forall c sp pc rs m sig ros args res pc' f vres m' t,
+      c!pc = Some(Icall sig ros args res pc') ->
+      find_function ge ros rs = Some f ->
+      funsig f = sig ->
+      exec_function f rs##args m t vres m' ->
+      exec_instr c sp  pc rs m  t pc' (rs#res <- vres) m'
+  | exec_Ialloc:
+      forall c sp pc rs m pc' arg res sz m' b,
+      c!pc = Some(Ialloc arg res pc') ->
+      rs#arg = Vint sz ->
+      Mem.alloc m 0 (Int.signed sz) = (m', b) ->
+      exec_instr c sp  pc rs m E0 pc' (rs#res <- (Vptr b Int.zero)) m'
+  | exec_Icond_true:
+      forall c sp pc rs m cond args ifso ifnot,
+      c!pc = Some(Icond cond args ifso ifnot) ->
+      eval_condition cond rs##args m = Some true ->
+      exec_instr c sp  pc rs m  E0 ifso rs m
+  | exec_Icond_false:
+      forall c sp pc rs m cond args ifso ifnot,
+      c!pc = Some(Icond cond args ifso ifnot) ->
+      eval_condition cond rs##args m = Some false ->
+      exec_instr c sp  pc rs m  E0 ifnot rs m
+
+(** [exec_body ge c sp pc rs m res m'] repeatedly executes
+   instructions starting at [pc] in [c], until it
+   reaches a return or tailcall instruction.  It performs
+   that instruction and sets [res] to the return value
+   and [m'] to the final memory state. *)
+
+with exec_body: code -> val ->
+                node -> regset -> mem -> trace ->
+                val -> mem -> Prop :=
+  | exec_body_step: forall c sp pc rs m t1 pc1 rs1 m1 t2 t res m2,
+      exec_instr c sp  pc rs m  t1  pc1 rs1 m1 ->
+      exec_body c sp  pc1 rs1 m1  t2  res m2 ->
+      t = t1 ** t2 ->
+      exec_body c sp  pc rs m  t  res m2
+  | exec_Ireturn: forall c stk pc rs m or res,
+      c!pc = Some(Ireturn or) ->
+      res = regmap_optget or Vundef rs ->
+      exec_body c (Vptr stk Int.zero)  pc rs m  E0  res (Mem.free m stk)
+  | exec_Itailcall: forall c stk pc rs m sig ros args f t res m',
+      c!pc = Some(Itailcall sig ros args) ->
+      find_function ge ros rs = Some f ->
+      funsig f = sig ->
+      exec_function f rs##args (Mem.free m stk) t res m' ->
+      exec_body c (Vptr stk Int.zero)  pc rs m  t  res m'
+
+(** [exec_function ge f args m res m'] executes a function application.
+  [f] is the called function, [args] the values of its arguments,
+  and [m] the memory state at the beginning of the call.  [res] is
+  the returned value: the value of [r] if the function terminates with 
+  a [Ireturn (Some r)], or [Vundef] if it terminates with [Ireturn None].
+  Evaluation proceeds by executing transitions from the function's entry
+  point to the first [Ireturn] instruction encountered.  It is preceeded
+  by the allocation of the stack activation block and the binding
+  of register parameters to the provided arguments.
+  (Non-parameter registers are initialized to [Vundef].)  Before returning,
+  the stack activation block is freed. *)
+
+with exec_function: fundef -> list val -> mem -> trace ->
+                              val -> mem -> Prop :=
+  | exec_funct_internal:
+      forall f m m1 stk args t m2 vres,
+      Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) ->
+      exec_body f.(fn_code) (Vptr stk Int.zero) 
+                f.(fn_entrypoint) (init_regs args f.(fn_params)) m1
+                t vres m2 ->
+      exec_function (Internal f) args m t vres m2
+  | exec_funct_external:
+      forall ef args m t res,
+      event_match ef args t res ->
+      exec_function (External ef) args m t res m.
+
+Scheme exec_instr_ind_3 := Minimality for exec_instr Sort Prop
+  with exec_body_ind_3 := Minimality for exec_body Sort Prop
+  with exec_function_ind_3 := Minimality for exec_function Sort Prop.
+
+(** The reflexive transitive closure of [exec_instr]. *)
+
+Inductive exec_instrs: code -> val ->
+                       node -> regset -> mem -> trace ->
+                       node -> regset -> mem -> Prop :=
+  | exec_refl:
+      forall c sp pc rs m,
+      exec_instrs c sp pc rs m E0 pc rs m
+  | exec_one:
+      forall c sp pc rs m t pc' rs' m',
+      exec_instr c sp pc rs m t pc' rs' m' ->
+      exec_instrs c sp pc rs m t pc' rs' m'
+  | exec_trans:
+      forall c sp pc1 rs1 m1 t1 pc2 rs2 m2 t2 pc3 rs3 m3 t3,
+      exec_instrs c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
+      exec_instrs c sp pc2 rs2 m2 t2 pc3 rs3 m3 ->
+      t3 = t1 ** t2 ->
+      exec_instrs c sp pc1 rs1 m1 t3 pc3 rs3 m3.
+
+(** Some derived execution rules. *)
+
+Lemma exec_instrs_exec_body:
+  forall c sp pc1 rs1 m1 t1 pc2 rs2 m2,
+  exec_instrs c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
+  forall res t2 m3 t3,
+  exec_body c sp pc2 rs2 m2 t2 res m3 ->
+  t3 = t1 ** t2 ->
+  exec_body c sp pc1 rs1 m1 t3 res m3.
+Proof.
+  induction 1; intros.
+  subst t3. rewrite E0_left. auto.
+  eapply exec_body_step; eauto.
+  eapply IHexec_instrs1. eapply IHexec_instrs2. eauto. 
+  eauto. traceEq. 
+Qed.
+
+Lemma exec_step:
+  forall c sp pc1 rs1 m1 t1 pc2 rs2 m2 t2 pc3 rs3 m3 t3,
+  exec_instr c sp pc1 rs1 m1 t1 pc2 rs2 m2 ->
+  exec_instrs c sp pc2 rs2 m2 t2 pc3 rs3 m3 ->
+  t3 = t1 ** t2 ->
+  exec_instrs c sp pc1 rs1 m1 t3 pc3 rs3 m3.
+Proof.
+  intros. eapply exec_trans. apply exec_one. eauto. eauto. auto.
+Qed.
+
+Lemma exec_Iop':
+  forall c sp pc rs m op args res pc' rs' v,
+  c!pc = Some(Iop op args res pc') ->
+  eval_operation ge sp op rs##args m = Some v ->
+  rs' = (rs#res <- v) ->
+  exec_instr c sp  pc rs m  E0 pc' rs' m.
+Proof.
+  intros. subst rs'. eapply exec_Iop; eauto.
+Qed.
+
+Lemma exec_Iload':
+  forall c sp pc rs m chunk addr args dst pc' rs' a v,
+  c!pc = Some(Iload chunk addr args dst pc') ->
+  eval_addressing ge sp addr rs##args = Some a ->
+  Mem.loadv chunk m a = Some v ->
+  rs' = (rs#dst <- v) ->
+  exec_instr c sp  pc rs m E0 pc' rs' m.
+Proof.
+  intros. subst rs'. eapply exec_Iload; eauto.
+Qed.
+
+(** Experimental: coinductive big-step semantics for divergence. *)
+
+CoInductive diverge_body:
+      code -> val -> node -> regset -> mem -> traceinf -> Prop :=
+  | diverge_step: forall c sp pc rs m t1 pc1 rs1 m1 T2 T,
+      exec_instr c sp  pc rs m  t1  pc1 rs1 m1 ->
+      diverge_body c sp  pc1 rs1 m1  T2 ->
+      T = t1 *** T2 ->
+      diverge_body c sp  pc rs m  T
+  | diverge_Icall:
+      forall c sp pc rs m sig ros args res pc' f T,
+      c!pc = Some(Icall sig ros args res pc') ->
+      find_function ge ros rs = Some f ->
+      funsig f = sig ->
+      diverge_function f rs##args m T ->
+      diverge_body c sp  pc rs m  T
+  | diverge_Itailcall: forall c stk pc rs m sig ros args f T,
+      c!pc = Some(Itailcall sig ros args) ->
+      find_function ge ros rs = Some f ->
+      funsig f = sig ->
+      diverge_function f rs##args (Mem.free m stk) T ->
+      diverge_body c (Vptr stk Int.zero)  pc rs m  T
+
+with diverge_function: fundef -> list val -> mem -> traceinf -> Prop :=
+  | diverge_funct_internal:
+      forall f m m1 stk args T,
+      Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) ->
+      diverge_body f.(fn_code) (Vptr stk Int.zero) 
+                   f.(fn_entrypoint) (init_regs args f.(fn_params)) m1
+                   T ->
+      diverge_function (Internal f) args m T.
+
+End BIGSTEP.
+
+(** Execution of whole programs. *)
+
+Inductive exec_program (p: program): program_behavior -> Prop :=
+  | exec_program_terminates: forall b f t r m,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      funsig f = mksignature nil (Some Tint) ->
+      exec_function ge f nil m0 t (Vint r) m ->
+      exec_program p (Terminates t r)
+  | exec_program_diverges: forall b f T,
+      let ge := Genv.globalenv p in
+      let m0 := Genv.init_mem p in
+      Genv.find_symbol ge p.(prog_main) = Some b ->
+      Genv.find_funct_ptr ge b = Some f ->
+      funsig f = mksignature nil (Some Tint) ->
+      diverge_function ge f nil m0 T ->
+      exec_program p (Diverges T).
+
+(** * Equivalence with the transition semantics. *)
+
+Section EQUIVALENCE.
+
+Variable ge: genv.
+
+Definition exec_instr_prop
+  (c: code) (sp: val) (pc1: node) (rs1: regset) (m1: mem)
+           (t: trace) (pc2: node) (rs2: regset) (m2: mem) : Prop :=
+  forall s,
+  plus step ge (State s c sp pc1 rs1 m1)
+             t (State s c sp pc2 rs2 m2).
+
+Definition exec_body_prop
+  (c: code) (sp: val) (pc1: node) (rs1: regset) (m1: mem)
+           (t: trace) (res: val) (m2: mem) : Prop :=
+  forall s,
+  plus step ge (State s c sp pc1 rs1 m1)
+             t (Returnstate s res m2).
+
+Definition exec_function_prop
+  (f: fundef) (args: list val) (m1: mem)
+  (t: trace) (res: val) (m2: mem) : Prop :=
+  forall s,
+  plus step ge (Callstate s f args m1)
+             t (Returnstate s res m2).
+
+Lemma exec_steps:
+  (forall c sp pc1 rs1 m1 t pc2 rs2 m2,
+   exec_instr ge c sp pc1 rs1 m1 t pc2 rs2 m2 ->
+   exec_instr_prop c sp pc1 rs1 m1 t pc2 rs2 m2) /\
+  (forall c sp pc1 rs1 m1 t res m2,
+   exec_body ge c sp pc1 rs1 m1 t res m2 ->
+   exec_body_prop c sp pc1 rs1 m1 t res m2) /\
+  (forall f args m1 t res m2,
+   exec_function ge f args m1 t res m2 ->
+   exec_function_prop f args m1 t res m2).
+Proof.
+  set (IND := fun a b c d e f g h i j k l m =>
+          conj (exec_instr_ind_3 ge exec_instr_prop exec_body_prop exec_function_prop a b c d e f g h i j k l m)
+               (conj (exec_body_ind_3 ge exec_instr_prop exec_body_prop exec_function_prop a b c d e f g h i j k l m)
+                     (exec_function_ind_3 ge exec_instr_prop exec_body_prop exec_function_prop a b c d e f g h i j k l m))).
+  apply IND; clear IND;
+  intros; red; intros.
+  (* nop *)
+  apply plus_one. eapply RTL.exec_Inop; eauto.
+  (* op *)
+  apply plus_one. eapply RTL.exec_Iop'; eauto. 
+  (* load *)
+  apply plus_one. eapply RTL.exec_Iload'; eauto.
+  (* store *)
+  apply plus_one. eapply RTL.exec_Istore; eauto.
+  (* call *)
+  eapply plus_left'. eapply RTL.exec_Icall; eauto.
+  eapply plus_right'. apply H3. apply RTL.exec_return. 
+  eauto. traceEq.
+  (* alloc *)
+  apply plus_one. eapply RTL.exec_Ialloc; eauto. 
+  (* cond true *)
+  apply plus_one. eapply RTL.exec_Icond_true; eauto.  
+  (* cond false *)
+  apply plus_one. eapply RTL.exec_Icond_false; eauto.
+  (* body step *)
+  eapply plus_trans. apply H0. apply H2. auto.
+  (* body return *)
+  apply plus_one. rewrite H0. eapply RTL.exec_Ireturn; eauto.
+  (* body tailcall *)
+  eapply plus_left'. eapply RTL.exec_Itailcall; eauto. 
+  apply H3. traceEq.
+  (* internal function *)
+  eapply plus_left'. eapply RTL.exec_function_internal; eauto. 
+  apply H1. traceEq.
+  (* external function *)
+  apply plus_one. eapply RTL.exec_function_external; eauto.
+Qed.
+
+Lemma diverge_function_steps:
+  forall fd args m T s,
+  diverge_function ge fd args m T ->
+  forever step ge (Callstate s fd args m) T.
+Proof.
+  assert (diverge_function_steps':
+          forall fd args m T s,
+          diverge_function ge fd args m T ->
+          forever_N step ge O (Callstate s fd args m) T).
+  cofix COINDHYP1.
+  assert (diverge_body_steps: forall c sp pc rs m T s,
+          diverge_body ge c sp pc rs m T ->
+          forever_N step ge O (State s c sp pc rs m) T).
+  cofix COINDHYP2; intros.
+  inv H. 
+  (* step *)
+  apply forever_N_plus with (State s c sp pc1 rs1 m1) O.
+  destruct exec_steps as [E [F G]].
+  apply E. assumption.
+  apply COINDHYP2. assumption.
+  (* call *)
+  change T with (E0 *** T).
+  apply forever_N_plus with (Callstate (Stackframe res c sp pc' rs :: s)
+                                       f rs##args m) O.
+  apply plus_one. eapply RTL.exec_Icall; eauto. 
+  apply COINDHYP1. assumption.
+  (* tailcall *)
+  change T with (E0 *** T).
+  apply forever_N_plus with (Callstate s f rs##args (free m stk)) O.
+  apply plus_one. eapply RTL.exec_Itailcall; eauto. 
+  apply COINDHYP1. assumption.
+  (* internal function *)
+  intros. inv H. 
+  change T with (E0 *** T).
+  apply forever_N_plus with
+    (State s f.(fn_code) (Vptr stk Int.zero)
+             f.(fn_entrypoint) (init_regs args f.(fn_params)) m1) O.
+  apply plus_one. eapply RTL.exec_function_internal; eauto.
+  apply diverge_body_steps. assumption.
+  (* conclusion *)
+  intros. eapply forever_N_forever. eauto.
+Qed.
+
+End EQUIVALENCE.
+
+Theorem exec_program_bigstep_smallstep:
+  forall p beh,
+  exec_program p beh ->
+  RTL.exec_program p beh.
+Proof.
+  intros. unfold RTL.exec_program. inv H. 
+  econstructor. 
+  econstructor; eauto. 
+  apply plus_star.
+  destruct (exec_steps (Genv.globalenv p)) as [E [F G]].
+  apply (G _ _ _ _ _ _ H3).
+  constructor.
+  econstructor.
+  econstructor; eauto.
+  apply diverge_function_steps. auto. 
+Qed.
+
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index b38964d2..117631ef 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -1,21 +1,25 @@
-(** Translation from Cminor to RTL. *)
+(** Translation from CminorSel to RTL. *)
 
 Require Import Coqlib.
+Require Errors.
 Require Import Maps.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
+Require Import Switch.
 Require Import Op.
 Require Import Registers.
-Require Import Cminor.
+Require Import CminorSel.
 Require Import RTL.
 
+Open Local Scope string_scope.
+
 (** * Translation environments and state *)
 
 (** The translation functions are parameterized by the following 
-  compile-time environment, which maps Cminor local variables and
+  compile-time environment, which maps CminorSel local variables and
   let-bound variables to RTL registers.   The mapping for local variables
-  is computed from the Cminor variable declarations at the beginning of
+  is computed from the CminorSel variable declarations at the beginning of
   the translation of a function, and does not change afterwards.
   The mapping for let-bound variables is initially empty and updated
   during translation of expressions, when crossing a [Elet] binding. *)
@@ -46,9 +50,10 @@ Record state: Set := mkstate {
   to modify the global state.  These luxuries are not available in Coq,
   however.  Instead, we use a monadic encoding of the translation:
   translation functions take the current global state as argument,
-  and return either [Error] to denote an error, or [OK r s] to denote
+  and return either [Error msg] to denote an error, or [OK r s] to denote
   success.  [s] is the modified state, and [r] the result value of the
-  translation function. 
+  translation function.  In the error case, [msg] is an error message
+  (see modules [Errors]) describing the problem.
 
   We now define this monadic encoding -- the ``state and error'' monad --
   as well as convenient syntax to express monadic computations. *)
@@ -56,19 +61,19 @@ Record state: Set := mkstate {
 Set Implicit Arguments.
 
 Inductive res (A: Set) : Set :=
-  | Error: res A
+  | Error: Errors.errmsg -> res A
   | OK: A -> state -> res A.
 
 Definition mon (A: Set) : Set := state -> res A.
 
 Definition ret (A: Set) (x: A) : mon A := fun (s: state) => OK x s.
 
-Definition error (A: Set) : mon A := fun (s: state) => Error A.
+Definition error (A: Set) (msg: Errors.errmsg) : mon A := fun (s: state) => Error A msg.
 
 Definition bind (A B: Set) (f: mon A) (g: A -> mon B) : mon B :=
   fun (s: state) =>
     match f s with
-    | Error => Error B
+    | Error msg => Error B msg
     | OK a s' => g a s'
     end.
 
@@ -155,7 +160,7 @@ Definition update_instr (n: node) (i: instruction) : mon unit :=
                        (PTree.set n i s.(st_code))
                        (@update_instr_wf s n i PEQ))
     | right _ =>
-        Error unit
+        Error unit (Errors.msg "RTLgen.update_instr")
     end.
 
 (** Generate a fresh RTL register. *)
@@ -188,7 +193,7 @@ Fixpoint add_vars (map: mapping) (names: list ident)
 
 Definition find_var (map: mapping) (name: ident) : mon reg :=
   match PTree.get name map.(map_vars) with
-  | None => error reg
+  | None => error reg (Errors.MSG "RTLgen: unbound variable " :: Errors.CTX name :: nil)
   | Some r => ret r
   end.
 
@@ -197,7 +202,7 @@ Definition add_letvar (map: mapping) (r: reg) : mapping :=
 
 Definition find_letvar (map: mapping) (idx: nat) : mon reg :=
   match List.nth_error map.(map_letvars) idx with
-  | None => error reg
+  | None => error reg (Errors.msg "RTLgen: unbound let variable")
   | Some r => ret r
   end.
 
@@ -227,8 +232,8 @@ Fixpoint alloc_regs (map: mapping) (al: exprlist)
   | Enil =>
       ret nil
   | Econs a bl =>
-      do rl <- alloc_regs map bl;
       do r  <- alloc_reg map a;
+      do rl <- alloc_regs map bl;
       ret (r :: rl)
   end.
 
@@ -241,9 +246,9 @@ Definition add_move (rs rd: reg) (nd: node) : mon node :=
   then ret nd
   else add_instr (Iop Omove (rs::nil) rd nd).
 
-(** Translation of an expression.  [transl_expr map mut a rd nd]
+(** Translation of an expression.  [transl_expr map a rd nd]
   enriches the current CFG with the RTL instructions necessary
-  to compute the value of Cminor expression [a], leave its result
+  to compute the value of CminorSel expression [a], leave its result
   in register [rd], and branch to node [nd].  It returns the node
   of the first instruction in this sequence.  [map] is the compile-time
   translation environment. *)
@@ -311,7 +316,7 @@ with transl_condition (map: mapping) (a: condexpr) (ntrue nfalse: node)
   end
 
 (** Translation of a list of expressions.  The expressions are evaluated
-  left-to-right, and their values left in the given list of registers. *)
+  left-to-right, and their values stored in the given list of registers. *)
 
 with transl_exprlist (map: mapping) (al: exprlist) (rl: list reg) (nd: node)
                      {struct al} : mon node :=
@@ -321,7 +326,7 @@ with transl_exprlist (map: mapping) (al: exprlist) (rl: list reg) (nd: node)
   | Econs b bs, r :: rs =>
       do no <- transl_exprlist map bs rs nd; transl_expr map b r no
   | _, _ =>
-      error node
+      error node (Errors.msg "RTLgen.transl_exprlist")
   end.
 
 (** Auxiliary for branch prediction.  When compiling an if/then/else
@@ -336,27 +341,32 @@ Parameter more_likely: condexpr -> stmt -> stmt -> bool.
 
 (** Auxiliary for translating [Sswitch] statements. *)
 
+Parameter compile_switch: nat -> table -> comptree.
+
 Definition transl_exit (nexits: list node) (n: nat) : mon node :=
   match nth_error nexits n with
-  | None => error node
+  | None => error node (Errors.msg "RTLgen: wrong exit")
   | Some ne => ret ne
   end.
 
-Fixpoint transl_switch (r: reg) (nexits: list node)
-                       (cases: list (int * nat)) (default: nat)
-                       {struct cases} : mon node :=
-  match cases with
-  | nil =>
-      transl_exit nexits default
-  | (key1, exit1) :: cases' =>
-      do ncont <- transl_switch r nexits cases' default;
-      do nfound <- transl_exit nexits exit1;
-      add_instr (Icond (Ccompimm Ceq key1) (r :: nil) nfound ncont)
+Fixpoint transl_switch (r: reg) (nexits: list node) (t: comptree)
+                       {struct t} : mon node :=
+  match t with
+  | CTaction act =>
+      transl_exit nexits act
+  | CTifeq key act t' =>
+      do ncont <- transl_switch r nexits t';
+      do nfound <- transl_exit nexits act;
+      add_instr (Icond (Ccompimm Ceq key) (r :: nil) nfound ncont)
+  | CTiflt key t1 t2 =>
+      do n2 <- transl_switch r nexits t2;
+      do n1 <- transl_switch r nexits t1;
+      add_instr (Icond (Ccompuimm Clt key) (r :: nil) n1 n2)
   end.
 
 (** Translation of statements.  [transl_stmt map s nd nexits nret rret]
   enriches the current CFG with the RTL instructions necessary to
-  execute the Cminor statement [s], and returns the node of the first
+  execute the CminorSel statement [s], and returns the node of the first
   instruction in this sequence.  The generated instructions continue
   at node [nd] if the statement terminates normally, at node [nret]
   if it terminates by early return, and at the [n]-th node in the list
@@ -398,18 +408,28 @@ Fixpoint transl_stmt (map: mapping) (s: stmt) (nd: node)
   | Sexit n =>
       transl_exit nexits n
   | Sswitch a cases default =>
-      do r <- alloc_reg map a;
-      do ns <- transl_switch r nexits cases default;
-      transl_expr map a r ns
+      let t := compile_switch default cases in
+      if validate_switch default cases t then
+        (do r <- alloc_reg map a;
+         do ns <- transl_switch r nexits t;
+         transl_expr map a r ns)
+      else
+        error node (Errors.msg "RTLgen: wrong switch")
   | Sreturn opt_a =>
       match opt_a, rret with
       | None, None => ret nret
       | Some a, Some r => transl_expr map a r nret
-      | _, _ => error node
+      | _, _ => error node (Errors.msg "RTLgen: type mismatch on return")
       end
+  | Stailcall sig b cl =>
+      do rf <- alloc_reg map b;
+      do rargs <- alloc_regs map cl;
+      do n1 <- add_instr (Itailcall sig (inl _ rf) rargs);
+      do n2 <- transl_exprlist map cl rargs n1;
+         transl_expr map b rf n2
   end.
 
-(** Translation of a Cminor function. *)
+(** Translation of a CminorSel function. *)
 
 Definition ret_reg (sig: signature) (rd: reg) : option reg :=
   match sig.(sig_res) with
@@ -417,32 +437,32 @@ Definition ret_reg (sig: signature) (rd: reg) : option reg :=
   | Some ty => Some rd
   end.
 
-Definition transl_fun (f: Cminor.function) : mon (node * list reg) :=
-  do (rparams, map1) <- add_vars init_mapping f.(Cminor.fn_params);
-  do (rvars, map2) <- add_vars map1 f.(Cminor.fn_vars);
+Definition transl_fun (f: CminorSel.function) : mon (node * list reg) :=
+  do (rparams, map1) <- add_vars init_mapping f.(CminorSel.fn_params);
+  do (rvars, map2) <- add_vars map1 f.(CminorSel.fn_vars);
   do rret <- new_reg;
-  let orret := ret_reg f.(Cminor.fn_sig) rret in
+  let orret := ret_reg f.(CminorSel.fn_sig) rret in
   do nret <- add_instr (Ireturn orret);
-  do nentry <- transl_stmt map2 f.(Cminor.fn_body) nret nil nret orret;
+  do nentry <- transl_stmt map2 f.(CminorSel.fn_body) nret nil nret orret;
   ret (nentry, rparams).
 
-Definition transl_function (f: Cminor.function) : option RTL.function :=
+Definition transl_function (f: CminorSel.function) : Errors.res RTL.function :=
   match transl_fun f init_state with
-  | Error => None
+  | Error msg => Errors.Error msg
   | OK (nentry, rparams) s =>
-      Some (RTL.mkfunction
-             f.(Cminor.fn_sig)
-             rparams
-             f.(Cminor.fn_stackspace)
-             s.(st_code)
-             nentry
-             s.(st_nextnode)
-             s.(st_wf))
+      Errors.OK (RTL.mkfunction
+                   f.(CminorSel.fn_sig)
+                   rparams
+                   f.(CminorSel.fn_stackspace)
+                   s.(st_code)
+                   nentry
+                   s.(st_nextnode)
+                   s.(st_wf))
   end.
 
 Definition transl_fundef := transf_partial_fundef transl_function.
 
 (** Translation of a whole program. *)
 
-Definition transl_program (p: Cminor.program) : option RTL.program :=
+Definition transl_program (p: CminorSel.program) : Errors.res RTL.program :=
   transform_partial_program transl_fundef p.
diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v
index 2ce961bc..15f305a8 100644
--- a/backend/RTLgenproof.v
+++ b/backend/RTLgenproof.v
@@ -1,4 +1,4 @@
-(** Correctness proof for RTL generation: main proof. *)
+(** Correctness proof for RTL generation. *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -7,144 +7,411 @@ Require Import Integers.
 Require Import Values.
 Require Import Mem.
 Require Import Events.
+Require Import Smallstep.
 Require Import Globalenvs.
-Require Import Op.
+Require Import Switch.
 Require Import Registers.
 Require Import Cminor.
+Require Import Op.
+Require Import CminorSel.
 Require Import RTL.
 Require Import RTLgen.
-Require Import RTLgenproof1.
+Require Import RTLgenspec.
+
+(** * Correspondence between Cminor environments and RTL register sets *)
+
+(** A compilation environment (mapping) is well-formed if
+  the following properties hold:
+- Two distinct Cminor local variables are mapped to distinct pseudo-registers.
+- A Cminor local variable and a let-bound variable are mapped to
+  distinct pseudo-registers.
+*)
+
+Record map_wf (m: mapping) : Prop :=
+  mk_map_wf {
+    map_wf_inj:
+      (forall id1 id2 r,
+         m.(map_vars)!id1 = Some r -> m.(map_vars)!id2 = Some r -> id1 = id2);
+     map_wf_disj:
+      (forall id r,
+         m.(map_vars)!id = Some r -> In r m.(map_letvars) -> False)
+  }.
+
+Lemma init_mapping_wf:
+  map_wf init_mapping.
+Proof.
+  unfold init_mapping; split; simpl.
+  intros until r. rewrite PTree.gempty. congruence.
+  tauto.
+Qed.
+
+Lemma add_var_wf:
+  forall s1 s2 map name r map',
+  add_var map name s1 = OK (r,map') s2 -> 
+  map_wf map -> map_valid map s1 -> map_wf map'.
+Proof.
+  intros. monadInv H. 
+  apply mk_map_wf; simpl.
+  intros until r0. repeat rewrite PTree.gsspec.
+  destruct (peq id1 name); destruct (peq id2 name).
+  congruence.
+  intros. inv H. elimtype False. 
+  apply valid_fresh_absurd with r0 s1. 
+  apply H1. left; exists id2; auto.
+  eauto with rtlg.
+  intros. inv H2. elimtype False. 
+  apply valid_fresh_absurd with r0 s1. 
+  apply H1. left; exists id1; auto.
+  eauto with rtlg.
+  inv H0. eauto.
+  intros until r0. rewrite PTree.gsspec.
+  destruct (peq id name). 
+  intros. inv H.
+  apply valid_fresh_absurd with r0 s1.
+  apply H1. right; auto.
+  eauto with rtlg.
+  inv H0; eauto.
+Qed.
+
+Lemma add_vars_wf:
+  forall names s1 s2 map map' rl,
+  add_vars map names s1 = OK (rl,map') s2 ->
+  map_wf map -> map_valid map s1 -> map_wf map'.
+Proof.
+  induction names; simpl; intros; monadInv H. 
+  auto.
+  exploit add_vars_valid; eauto. intros [A B].
+  eapply add_var_wf; eauto.
+Qed.
+
+Lemma add_letvar_wf:
+  forall map r,
+  map_wf map -> ~reg_in_map map r -> map_wf (add_letvar map r).
+Proof.
+  intros. inv H. unfold add_letvar; constructor; simpl.
+  auto.
+  intros. elim H1; intro. subst r0. elim H0. left; exists id; auto.
+  eauto.
+Qed.
+
+(** An RTL register environment matches a Cminor local environment and
+  let-environment if the value of every local or let-bound variable in
+  the Cminor environments is identical to the value of the
+  corresponding pseudo-register in the RTL register environment. *)
+
+Record match_env
+      (map: mapping) (e: Cminor.env) (le: Cminor.letenv) (rs: regset) : Prop :=
+  mk_match_env {
+    me_vars:
+      (forall id v,
+         e!id = Some v -> exists r, map.(map_vars)!id = Some r /\ rs#r = v);
+    me_letvars:
+      rs##(map.(map_letvars)) = le
+  }.
+
+Lemma match_env_find_var:
+  forall map e le rs id v r,
+  match_env map e le rs ->
+  e!id = Some v ->
+  map.(map_vars)!id = Some r ->
+  rs#r = v.
+Proof.
+  intros. exploit me_vars; eauto. intros [r' [EQ' RS]].
+  replace r with r'. auto. congruence.
+Qed.
+
+Lemma match_env_find_letvar:
+  forall map e le rs idx v r,
+  match_env map e le rs ->
+  List.nth_error le idx = Some v ->
+  List.nth_error map.(map_letvars) idx = Some r ->
+  rs#r = v.
+Proof.
+  intros. exploit me_letvars; eauto. intros.
+  rewrite <- H2 in H0. rewrite list_map_nth in H0. 
+  change reg with positive in H1. rewrite H1 in H0. 
+  simpl in H0. congruence.
+Qed.
+
+Lemma match_env_invariant:
+  forall map e le rs rs',
+  match_env map e le rs ->
+  (forall r, (reg_in_map map r) -> rs'#r = rs#r) ->
+  match_env map e le rs'.
+Proof.
+  intros. inversion H. apply mk_match_env.
+  intros. exploit me_vars0; eauto. intros [r [A B]].
+  exists r; split. auto. rewrite H0; auto. left; exists id; auto.
+  rewrite <- me_letvars0. apply list_map_exten. intros.
+  symmetry. apply H0. right; auto.
+Qed.
+
+(** Matching between environments is preserved when an unmapped register
+  (not corresponding to any Cminor variable) is assigned in the RTL
+  execution. *)
+
+Lemma match_env_update_temp:
+  forall map e le rs r v,
+  match_env map e le rs ->
+  ~(reg_in_map map r) ->
+  match_env map e le (rs#r <- v).
+Proof.
+  intros. apply match_env_invariant with rs; auto.
+  intros. case (Reg.eq r r0); intro. 
+  subst r0; contradiction.
+  apply Regmap.gso; auto.
+Qed.
+Hint Resolve match_env_update_temp: rtlg.
+
+(** Matching between environments is preserved by simultaneous
+  assignment to a Cminor local variable (in the Cminor environments)
+  and to the corresponding RTL pseudo-register (in the RTL register
+  environment). *)
+
+Lemma match_env_update_var:
+  forall map e le rs id r v,
+  map_wf map ->
+  map.(map_vars)!id = Some r ->
+  match_env map e le rs ->
+  match_env map (PTree.set id v e) le (rs#r <- v).
+Proof.
+  intros. inversion H. inversion H1. apply mk_match_env.
+  intros id' v'. rewrite PTree.gsspec. destruct (peq id' id); intros.
+  subst id'. inv H2. exists r; split. auto. apply PMap.gss.
+  exploit me_vars0; eauto. intros [r' [A B]].
+  exists r'; split. auto. rewrite PMap.gso; auto.
+  red; intros. subst r'. elim n. eauto.
+  rewrite <- me_letvars0. apply list_map_exten; intros.
+  symmetry. apply PMap.gso. red; intros. subst x. eauto. 
+Qed.
+
+Lemma match_env_bind_letvar:
+  forall map e le rs r v,
+  match_env map e le rs ->
+  rs#r = v ->
+  match_env (add_letvar map r) e (v :: le) rs.
+Proof.
+  intros. inv H. unfold add_letvar. apply mk_match_env; simpl; auto.
+Qed.
+
+Lemma match_env_unbind_letvar:
+  forall map e le rs r v,
+  match_env (add_letvar map r) e (v :: le) rs ->
+  match_env map e le rs.
+Proof.
+  unfold add_letvar; intros. inv H. simpl in *. 
+  constructor. auto. congruence.
+Qed.
+
+Lemma match_env_empty:
+  forall map,
+  map.(map_letvars) = nil ->
+  match_env map (PTree.empty val) nil (Regmap.init Vundef).
+Proof.
+  intros. apply mk_match_env.
+  intros. rewrite PTree.gempty in H0. discriminate.
+  rewrite H. reflexivity.
+Qed.
+
+(** The assignment of function arguments to local variables (on the Cminor
+  side) and pseudo-registers (on the RTL side) preserves matching
+  between environments. *)
+
+Lemma match_set_params_init_regs:
+  forall il rl s1 map2 s2 vl,
+  add_vars init_mapping il s1 = OK (rl, map2) s2 ->
+  match_env map2 (set_params vl il) nil (init_regs vl rl)
+  /\ (forall r, reg_fresh r s2 -> (init_regs vl rl)#r = Vundef).
+Proof.
+  induction il; intros.
+
+  inv H. split. apply match_env_empty. auto. intros. 
+  simpl. apply Regmap.gi.
+
+  monadInv H. simpl.
+  exploit add_vars_valid; eauto. apply init_mapping_valid. intros [A B].
+  exploit add_var_valid; eauto. intros [A' B']. clear B'.
+  monadInv EQ1. 
+  destruct vl as [ | v1 vs].
+  (* vl = nil *)
+  destruct (IHil _ _ _ _ nil EQ) as [ME UNDEF]. inv ME. split.
+  constructor; simpl.
+  intros id v. repeat rewrite PTree.gsspec. destruct (peq id a); intros.
+  subst a. inv H. exists x1; split. auto. apply Regmap.gi.
+  replace (init_regs nil x) with (Regmap.init Vundef) in me_vars0. eauto.
+  destruct x; reflexivity.
+  destruct (map_letvars x0). auto. simpl in me_letvars0. congruence.
+  intros. apply Regmap.gi.
+  (* vl = v1 :: vs *)
+  destruct (IHil _ _ _ _ vs EQ) as [ME UNDEF]. inv ME. split.
+  constructor; simpl.
+  intros id v. repeat rewrite PTree.gsspec. destruct (peq id a); intros.
+  subst a. inv H. exists x1; split. auto. apply Regmap.gss.
+  exploit me_vars0; eauto. intros [r' [C D]]. 
+  exists r'; split. auto. rewrite Regmap.gso. auto.
+  apply valid_fresh_different with s.
+  apply B. left; exists id; auto.
+  eauto with rtlg. 
+  destruct (map_letvars x0). auto. simpl in me_letvars0. congruence.
+  intros. rewrite Regmap.gso. apply UNDEF. 
+  apply reg_fresh_decr with s2; eauto with rtlg.
+  apply sym_not_equal. apply valid_fresh_different with s2; auto.
+Qed.
+
+Lemma match_set_locals:
+  forall map1 s1,
+  map_wf map1 ->
+  forall il rl map2 s2 e le rs,
+  match_env map1 e le rs ->
+  (forall r, reg_fresh r s1 -> rs#r = Vundef) ->
+  add_vars map1 il s1 = OK (rl, map2) s2 ->
+  match_env map2 (set_locals il e) le rs.
+Proof.
+  induction il; simpl in *; intros.
+
+  inv H2. auto.
+
+  monadInv H2. 
+  exploit IHil; eauto. intro. 
+  monadInv EQ1.
+  constructor.
+  intros id v. simpl. repeat rewrite PTree.gsspec. 
+  destruct (peq id a). subst a. intro. 
+  exists x1. split. auto. inv H3. 
+  apply H1. apply reg_fresh_decr with s. 
+  eapply add_vars_incr; eauto.
+  eauto with rtlg.
+  intros. eapply me_vars; eauto. 
+  simpl. eapply me_letvars; eauto.
+Qed.
+
+Lemma match_init_env_init_reg:
+  forall params s0 rparams map1 s1 vars rvars map2 s2 vparams,
+  add_vars init_mapping params s0 = OK (rparams, map1) s1 ->
+  add_vars map1 vars s1 = OK (rvars, map2) s2 ->
+  match_env map2 (set_locals vars (set_params vparams params))
+            nil (init_regs vparams rparams).
+Proof.
+  intros.
+  exploit match_set_params_init_regs; eauto. intros [A B].
+  eapply match_set_locals; eauto.
+  eapply add_vars_wf; eauto. apply init_mapping_wf.
+  apply init_mapping_valid.
+Qed.
+
+(** * The simulation argument *)
+
+Require Import Errors.
 
 Section CORRECTNESS.
 
-Variable prog: Cminor.program.
+Variable prog: CminorSel.program.
 Variable tprog: RTL.program.
-Hypothesis TRANSL: transl_program prog = Some tprog.
+Hypothesis TRANSL: transl_program prog = OK tprog.
 
-Let ge : Cminor.genv := Genv.globalenv prog.
+Let ge : CminorSel.genv := Genv.globalenv prog.
 Let tge : RTL.genv := Genv.globalenv tprog.
 
 (** Relationship between the global environments for the original
-  Cminor program and the generated RTL program. *)
+  CminorSel program and the generated RTL program. *)
 
 Lemma symbols_preserved:
   forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof.
-  intro. unfold ge, tge. 
-  apply Genv.find_symbol_transf_partial with transl_fundef.
-  exact TRANSL.
-Qed.
+Proof
+  (Genv.find_symbol_transf_partial transl_fundef _ TRANSL).
 
 Lemma function_ptr_translated:
-  forall (b: block) (f: Cminor.fundef),
+  forall (b: block) (f: CminorSel.fundef),
   Genv.find_funct_ptr ge b = Some f ->
   exists tf,
-  Genv.find_funct_ptr tge b = Some tf /\ transl_fundef f = Some tf.
-Proof.
-  intros. 
-  generalize 
-   (Genv.find_funct_ptr_transf_partial transl_fundef TRANSL H).
-  case (transl_fundef f).
-  intros tf [A B]. exists tf. tauto.
-  intros [A B]. elim B. reflexivity.
-Qed.
+  Genv.find_funct_ptr tge b = Some tf /\ transl_fundef f = OK tf.
+Proof
+  (Genv.find_funct_ptr_transf_partial transl_fundef TRANSL).
 
 Lemma functions_translated:
-  forall (v: val) (f: Cminor.fundef),
+  forall (v: val) (f: CminorSel.fundef),
   Genv.find_funct ge v = Some f ->
   exists tf,
-  Genv.find_funct tge v = Some tf /\ transl_fundef f = Some tf.
-Proof.
-  intros. 
-  generalize 
-   (Genv.find_funct_transf_partial transl_fundef TRANSL H).
-  case (transl_fundef f).
-  intros tf [A B]. exists tf. tauto.
-  intros [A B]. elim B. reflexivity.
-Qed.
+  Genv.find_funct tge v = Some tf /\ transl_fundef f = OK tf.
+Proof
+  (Genv.find_funct_transf_partial transl_fundef TRANSL).
 
 Lemma sig_transl_function:
-  forall (f: Cminor.fundef) (tf: RTL.fundef),
-  transl_fundef f = Some tf ->
-  RTL.funsig tf = Cminor.funsig f.
+  forall (f: CminorSel.fundef) (tf: RTL.fundef),
+  transl_fundef f = OK tf ->
+  RTL.funsig tf = CminorSel.funsig f.
 Proof.
   intros until tf. unfold transl_fundef, transf_partial_fundef.
   case f; intro.
   unfold transl_function. 
-  case (transl_fun f0 init_state); intros.
+  case (transl_fun f0 init_state); simpl; intros.
   discriminate.
-  destruct p. inversion H. reflexivity.
+  destruct p. simpl in H. inversion H. reflexivity.
   intro. inversion H. reflexivity.
 Qed.
 
 (** Correctness of the code generated by [add_move]. *)
 
-Lemma add_move_correct:
-  forall r1 r2 sp nd s ns s' rs m,
-  add_move r1 r2 nd s = OK ns s' ->
+Lemma tr_move_correct:
+  forall r1 ns r2 nd cs code sp rs m,
+  tr_move code ns r1 nd r2 ->
   exists rs',
-  exec_instrs tge s'.(st_code) sp ns rs m E0 nd rs' m /\
+  star step tge (State cs code sp ns rs m) E0 (State cs code sp nd rs' m) /\
   rs'#r2 = rs#r1 /\
   (forall r, r <> r2 -> rs'#r = rs#r).
 Proof.
-  intros until m. 
-  unfold add_move. case (Reg.eq r1 r2); intro.
-  monadSimpl. subst s'; subst r2; subst ns. 
-  exists rs. split. apply exec_refl. split. auto. auto.
-  intro. exists (rs#r2 <- (rs#r1)).
-  split. apply exec_one. eapply exec_Iop. eauto with rtlg. 
-  reflexivity. 
-  split. apply Regmap.gss. 
-  intros. apply Regmap.gso; auto.
+  intros. inv H. 
+  exists rs; split. constructor. auto.
+  exists (rs#r2 <- (rs#r1)); split. 
+  apply star_one. eapply exec_Iop. eauto. auto. 
+  split. apply Regmap.gss. intros; apply Regmap.gso; auto.
 Qed.
 
 (** The proof of semantic preservation for the translation of expressions
   is a simulation argument based on diagrams of the following form:
 <<
                     I /\ P
-       e, m, a ------------- ns, rs, m
+    e, le, m, a ------------- State cs code sp ns rs m
          ||                      |
-         ||                      |*
+        t||                     t|*
          ||                      |
          \/                      v
-       e', m', v ----------- nd, rs', m'
+    e, le, m', v ------------ State cs code sp nd rs' m'
                     I /\ Q
 >>
-  where [transl_expr map mut a rd nd s = OK ns s'].
+  where [tr_expr code map pr a ns nd rd] is assumed to hold.
   The left vertical arrow represents an evaluation of the expression [a]
-  (assumption).  The right vertical arrow represents the execution of
-  zero, one or several instructions in the generated RTL flow graph
-  found in the final state [s'] (conclusion).  
+  to value [v].
+  The right vertical arrow represents the execution of zero, one or
+  several instructions in the generated RTL flow graph [code].
 
-  The invariant [I] is the agreement between Cminor environments and
-  RTL register environment, as captured by [match_envs].
+  The invariant [I] is the agreement between CminorSel environments
+  [e], [le] and the RTL register environment [rs],
+  as captured by [match_envs].
 
-  The preconditions [P] include the well-formedness of the compilation
-  environment [mut] and the validity of [rd] as a target register.
+  The precondition [P] is the well-formedness of the compilation
+  environment [mut].
 
   The postconditions [Q] state that in the final register environment
-  [rs'], register [rd] contains value [v], and most other registers
-  valid in [s] are unchanged w.r.t. the initial register environment
-  [rs]. (See below for a precise specification of ``most other
-  registers''.)
+  [rs'], register [rd] contains value [v], and the registers in
+  the set of preserved registers [pr] are unchanged, as are the registers
+  in the codomain of [map].
 
   We formalize this simulation property by the following predicate
-  parameterized by the Cminor evaluation (left arrow).  *)
+  parameterized by the CminorSel evaluation (left arrow).  *)
 
 Definition transl_expr_correct 
   (sp: val) (le: letenv) (e: env) (m: mem) (a: expr)
               (t: trace) (m': mem) (v: val) : Prop :=
-  forall map rd nd s ns s' rs
-    (MWF: map_wf map s)
-    (TE: transl_expr map a rd nd s = OK ns s')
-    (ME: match_env map e le rs)
-    (TRG: target_reg_ok s map a rd),
+  forall cs code map pr ns nd rd rs
+    (MWF: map_wf map)
+    (TE: tr_expr code map pr a ns nd rd)
+    (ME: match_env map e le rs),
   exists rs',
-     exec_instrs tge s'.(st_code) sp ns rs m t nd rs' m'
+     star step tge (State cs code sp ns rs m) t (State cs code sp nd rs' m')
   /\ match_env map e le rs'
   /\ rs'#rd = v
-  /\ (forall r,
-       reg_valid r s -> reg_in_map map r \/ r <> rd -> rs'#r = rs#r).
+  /\ (forall r, reg_in_map map r \/ In r pr -> rs'#r = rs#r).
 
 (** The simulation properties for lists of expressions and for
   conditional expressions are similar. *)
@@ -152,122 +419,113 @@ Definition transl_expr_correct
 Definition transl_exprlist_correct 
   (sp: val) (le: letenv) (e: env) (m: mem) (al: exprlist)
               (t: trace) (m': mem) (vl: list val) : Prop :=
-  forall map rl nd s ns s' rs
-    (MWF: map_wf map s)
-    (TE: transl_exprlist map al rl nd s = OK ns s')
-    (ME: match_env map e le rs)
-    (TRG: target_regs_ok s map al rl),
+  forall cs code map pr ns nd rl rs
+    (MWF: map_wf map)
+    (TE: tr_exprlist code map pr al ns nd rl)
+    (ME: match_env map e le rs),
   exists rs',
-     exec_instrs tge s'.(st_code) sp ns rs m t nd rs' m'
+     star step tge (State cs code sp ns rs m) t (State cs code sp nd rs' m')
   /\ match_env map e le rs'
   /\ rs'##rl = vl
-  /\ (forall r,
-       reg_valid r s -> reg_in_map map r \/ ~(In r rl) -> rs'#r = rs#r).
+  /\ (forall r, reg_in_map map r \/ In r pr -> rs'#r = rs#r).
 
 Definition transl_condition_correct 
   (sp: val) (le: letenv) (e: env) (m: mem) (a: condexpr)
               (t: trace) (m': mem) (vb: bool) : Prop :=
-  forall map ntrue nfalse s ns s' rs
-    (MWF: map_wf map s)
-    (TE: transl_condition map a ntrue nfalse s = OK ns s')
+  forall cs code map pr ns ntrue nfalse rs
+    (MWF: map_wf map)
+    (TE: tr_condition code map pr a ns ntrue nfalse)
     (ME: match_env map e le rs),
   exists rs',
-     exec_instrs tge s'.(st_code) sp ns rs m t (if vb then ntrue else nfalse) rs' m'
+     star step tge (State cs code sp ns rs m) t
+                   (State cs code sp (if vb then ntrue else nfalse) rs' m')
   /\ match_env map e le rs'
-  /\ (forall r, reg_valid r s -> rs'#r = rs#r).
-
-(** For statements, we define the following auxiliary predicates
-  relating the outcome of the Cminor execution with the final node
-  and value of the return register in the RTL execution. *)
-
-Definition outcome_node
-    (out: outcome)
-    (ncont: node) (nexits: list node) (nret: node) (nd: node) : Prop :=
-  match out with
-  | Out_normal => ncont = nd
-  | Out_exit n => nth_error nexits n = Some nd
-  | Out_return _ => nret = nd
-  end.
-
-Definition match_return_reg
-    (rs: regset) (rret: option reg) (v: val) : Prop :=
-  match rret with
-  | None => True
-  | Some r => rs#r = v
-  end.
-
-Definition match_return_outcome
-    (out: outcome) (rret: option reg) (rs: regset) : Prop :=
-  match out with
-  | Out_normal => True
-  | Out_exit n => True
-  | Out_return optv =>
-      match rret, optv with
-      | None, None => True
-      | Some r, Some v => rs#r = v
-      | _, _ => False
-      end
-  end.
+  /\ (forall r, reg_in_map map r \/ In r pr -> rs'#r = rs#r).
+
 
 (** The simulation diagram for the translation of statements
   is of the following form:
 <<
                     I /\ P
-       e, m, a ------------- ns, rs, m
+      e, m, a -------------- State cs code sp ns rs m
          ||                      |
-         ||                      |*
+        t||                     t|*
          ||                      |
          \/                      v
-       e', m', out --------- nd, rs', m'
+     e', m', out -------------- st'
                     I /\ Q
 >>
-  where [transl_stmt map a ncont nexits nret rret s = OK ns s'].
+  where [tr_stmt code map a ns ncont nexits nret rret] holds.
   The left vertical arrow represents an execution of the statement [a]
-  (assumption).  The right vertical arrow represents the execution of
-  zero, one or several instructions in the generated RTL flow graph
-  found in the final state [s'] (conclusion).  
+  with outcome [out].
+  The right vertical arrow represents the execution of
+  zero, one or several instructions in the generated RTL flow graph [code].
 
-  The invariant [I] is the agreement between Cminor environments and
+  The invariant [I] is the agreement between CminorSel environments and
   RTL register environment, as captured by [match_envs].
 
-  The preconditions [P] include the well-formedness of the compilation
-  environment [mut] and the agreement between the outcome [out]
-  and the end node [nd].
-
-  The postcondition [Q] states agreement between the outcome [out]
-  and the value of the return register [rret]. *)
+  The precondition [P] is the well-formedness of the compilation
+  environment [mut].
+
+  The postcondition [Q] characterizes the final RTL state [st'].
+  It must have memory state [m'] and a register state [rs'] that matches
+  [e'].  Moreover, the program point reached must correspond to the outcome
+  [out].  This is captured by the following [state_for_outcome] predicate. *)
+
+Inductive state_for_outcome
+           (ncont: node) (nexits: list node) (nret: node) (rret: option reg)
+           (cs: list stackframe) (c: code) (sp: val) (rs: regset) (m: mem):
+           outcome -> RTL.state -> Prop :=
+  | state_for_normal:
+      state_for_outcome ncont nexits nret rret cs c sp rs m
+                        Out_normal (State cs c sp ncont rs m)
+  | state_for_exit: forall n nexit,
+      nth_error nexits n = Some nexit ->
+      state_for_outcome ncont nexits nret rret cs c sp rs m
+                        (Out_exit n) (State cs c sp nexit rs m)
+  | state_for_return_none:
+      rret = None ->
+      state_for_outcome ncont nexits nret rret cs c sp rs m
+                        (Out_return None) (State cs c sp nret rs m)
+  | state_for_return_some: forall r v,
+      rret = Some r ->
+      rs#r = v ->
+      state_for_outcome ncont nexits nret rret cs c sp rs m
+                        (Out_return (Some v)) (State cs c sp nret rs m)
+  | state_for_return_tail: forall v,
+      state_for_outcome ncont nexits nret rret cs c sp rs m
+                        (Out_tailcall_return v) (Returnstate cs v m).
 
 Definition transl_stmt_correct 
   (sp: val) (e: env) (m: mem) (a: stmt)
   (t: trace) (e': env) (m': mem) (out: outcome) : Prop :=
-  forall map ncont nexits nret rret s ns s' nd rs
-    (MWF: map_wf map s)
-    (TE: transl_stmt map a ncont nexits nret rret s = OK ns s')
-    (ME: match_env map e nil rs)
-    (OUT: outcome_node out ncont nexits nret nd)
-    (RRG: return_reg_ok s map rret),
-  exists rs',
-     exec_instrs tge s'.(st_code) sp ns rs m t nd rs' m'
-  /\ match_env map e' nil rs'
-  /\ match_return_outcome out rret rs'.
+  forall cs code map ns ncont nexits nret rret rs
+    (MWF: map_wf map)
+    (TE: tr_stmt code map a ns ncont nexits nret rret)
+    (ME: match_env map e nil rs),
+  exists rs', exists st,
+     state_for_outcome ncont nexits nret rret cs code sp rs' m' out st
+  /\ star step tge (State cs code sp ns rs m) t st
+  /\ match_env map e' nil rs'.
 
 (** Finally, the correctness condition for the translation of functions
   is that the translated RTL function, when applied to the same arguments
-  as the original Cminor function, returns the same value and performs
-  the same modifications on the memory state. *)
+  as the original CminorSel function, returns the same value, produces
+  the same trace of events, and performs the same modifications on the
+  memory state. *)
 
 Definition transl_function_correct
-    (m: mem) (f: Cminor.fundef) (vargs: list val)
-    (t: trace) (m':mem) (vres: val) : Prop :=
-  forall tf
-    (TE: transl_fundef f = Some tf),
-  exec_function tge tf vargs m t vres m'.
+    (m: mem) (f: CminorSel.fundef) (vargs: list val)
+    (t: trace) (m': mem) (vres: val) : Prop :=
+  forall cs tf
+    (TE: transl_fundef f = OK tf),
+  star step tge (Callstate cs tf vargs m) t (Returnstate cs vres m').
 
 (** The correctness of the translation is a huge induction over
-  the Cminor evaluation derivation for the source program.  To keep
+  the CminorSel evaluation derivation for the source program.  To keep
   the proof manageable, we put each case of the proof in a separate
-  lemma.  There is one lemma for each Cminor evaluation rule.
-  It takes as hypotheses the premises of the Cminor evaluation rule,
+  lemma.  There is one lemma for each CminorSel evaluation rule.
+  It takes as hypotheses the premises of the CminorSel evaluation rule,
   plus the induction hypotheses, that is, the [transl_expr_correct], etc,
   corresponding to the evaluations of sub-expressions or sub-statements. *)
 
@@ -276,35 +534,22 @@ Lemma transl_expr_Evar_correct:
   e!id = Some v ->
   transl_expr_correct sp le e m (Evar id) E0 m v.
 Proof.
-  intros; red; intros. monadInv TE. intro. 
-  generalize EQ; unfold find_var. caseEq (map_vars map)!id.
-  intros r' MV; monadSimpl. subst s0; subst r'. 
-  generalize (add_move_correct _ _ sp _ _ _ _ rs m TE0).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
-  exists rs1.
-(* Exec *)
-  split. assumption.
-(* Match-env *)
-  split. inversion TRG; subst.
-  (* Optimized case rd = r *)
-  rewrite MV in H3; injection H3; intro; subst r.
-  apply match_env_exten with rs.
-  intros. case (Reg.eq r rd); intro.
-  subst r; assumption. apply OTHER1; assumption.
-  assumption. 
-  (* General case rd is temp *)
-  apply match_env_invariant with rs.
-  assumption. intros. apply OTHER1. congruence. 
-(* Result value *)
-  split. rewrite RES1. eauto with rtlg.
-(* Other regs *)
-  intros. destruct (Reg.eq rd r0).
-  subst r0. inversion TRG; subst. 
-  congruence.
-  byContradiction. tauto.
+  intros; red; intros. inv TE.
+  exploit tr_move_correct; eauto. intros [rs' [A [B C]]]. 
+  exists rs'; split. eauto.
+  destruct H2 as [D | [E F]].
+  (* optimized case *)
+  subst r.
+  assert (forall r, rs'#r = rs#r).
+    intros. destruct (Reg.eq r rd). subst r. auto. auto. 
+  split. eapply match_env_invariant; eauto.
+  split. rewrite B. eapply match_env_find_var; eauto.
   auto.
-
-  intro; monadSimpl.
+  (* general case *)
+  split. eapply match_env_invariant; eauto. 
+    intros. apply C. congruence. 
+  split. rewrite B. eapply match_env_find_var; eauto. 
+  intros. apply C. intuition congruence.
 Qed.
 
 Lemma transl_expr_Eop_correct:
@@ -313,36 +558,25 @@ Lemma transl_expr_Eop_correct:
          (v: val),
   eval_exprlist ge sp le e m al t m1 vl ->
   transl_exprlist_correct sp le e m al t m1 vl ->
-  eval_operation ge sp op vl = Some v ->
+  eval_operation ge sp op vl m1 = Some v ->
   transl_expr_correct sp le e m (Eop op al) t m1 v.
 Proof.
-  intros until v. intros EEL TEL EOP. red; intros.
-  simpl in TE. monadInv TE. intro EQ1. 
-  exploit TEL. 2: eauto. eauto with rtlg. eauto. eauto with rtlg.
-  intros [rs1 [EX1 [ME1 [RR1 RO1]]]].
+  intros; red; intros. inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RR1 RO1]]]].
   exists (rs1#rd <- v).
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  apply exec_one; eapply exec_Iop. eauto with rtlg.
+  split. eapply star_right. eexact EX1.
+  eapply exec_Iop; eauto.  
   subst vl. 
-  rewrite (@eval_operation_preserved Cminor.fundef RTL.fundef ge tge). 
-  eexact EOP.
+  rewrite (@eval_operation_preserved CminorSel.fundef RTL.fundef ge tge). 
+  auto. 
   exact symbols_preserved. traceEq.
 (* Match-env *)
-  split. inversion TRG. eauto with rtlg.
+  split. eauto with rtlg.  
 (* Result reg *)
   split. apply Regmap.gss.
 (* Other regs *)
-  intros. rewrite Regmap.gso.
-  apply RO1. eauto with rtlg. 
-  destruct (In_dec Reg.eq r l).
-  left. elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWF EQ r i); intro.
-  auto. byContradiction; eauto with rtlg.  
-  right; auto.
-  red; intro; subst r. 
-  elim H0; intro. inversion TRG. contradiction.
-  tauto.
+  intros. rewrite Regmap.gso. auto. intuition congruence.
 Qed.
 
 Lemma transl_expr_Eload_correct:
@@ -356,30 +590,19 @@ Lemma transl_expr_Eload_correct:
   Mem.loadv chunk m1 a = Some v ->
   transl_expr_correct sp le e m (Eload chunk addr al) t m1 v.
 Proof.
-  intros; red; intros. simpl in TE. monadInv TE. intro EQ1. clear TE.
-  assert (MWF1: map_wf map s1). eauto with rtlg.
-  assert (TRG1: target_regs_ok s1 map al l). eauto with rtlg. 
-  generalize (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME TRG1).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  intros; red; intros. inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
   exists (rs1#rd <- v).
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  apply exec_one. eapply exec_Iload. eauto with rtlg.
+  split. eapply star_right. eexact EX1. eapply exec_Iload; eauto.
   rewrite RES1. rewrite (@eval_addressing_preserved _ _ ge tge).
-  eexact H1. exact symbols_preserved. assumption. traceEq.
+  exact H1. exact symbols_preserved. traceEq.
 (* Match-env *)
-  split. eapply match_env_update_temp. assumption. inversion TRG. assumption.
+  split. eauto with rtlg. 
 (* Result *)
   split. apply Regmap.gss.
 (* Other regs *)
-  intros. rewrite Regmap.gso. apply OTHER1.
-  eauto with rtlg.  
-  case (In_dec Reg.eq r l); intro.
-  elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWF EQ r i); intro.
-  tauto. byContradiction. eauto with rtlg.
-  tauto.
-  red; intro; subst r. inversion TRG. tauto. 
+  intros. rewrite Regmap.gso. auto. intuition congruence. 
 Qed.
 
 Lemma transl_expr_Estore_correct:
@@ -397,35 +620,18 @@ Lemma transl_expr_Estore_correct:
   t = t1 ** t2 ->
   transl_expr_correct sp le e m (Estore chunk addr al b) t m3 v.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. intro EQ2; clear TE.
-  assert (MWF2: map_wf map s2). 
-    apply map_wf_incr with s. 
-    apply state_incr_trans2 with s0 s1; eauto with rtlg.
-    assumption. 
-  assert (TRG2: target_regs_ok s2 map al l). 
-    apply target_regs_ok_incr with s0; eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF2 EQ2 ME TRG2).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  assert (MWF1: map_wf map s1). eauto with rtlg.
-  assert (TRG1: target_reg_ok s1 map b rd).
-    inversion TRG. apply target_reg_other; eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ MWF1 EQ1 ME1 TRG1).
-  intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
   exists rs2.
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s2. eauto with rtlg.
-  eapply exec_trans. eexact EX2. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  apply exec_one. eapply exec_Istore. eauto with rtlg.
-  assert (rs2##l = rs1##l).
+  split. eapply star_trans. eexact EX1. 
+  eapply star_right. eexact EX2. 
+  eapply exec_Istore; eauto.
+  assert (rs2##rl = rs1##rl).
     apply list_map_exten. intros r' IN. symmetry. apply OTHER2.
-    eauto with rtlg. eauto with rtlg. 
-    elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWF EQ r' IN); intro.
-    tauto. right. apply sym_not_equal. 
-    apply valid_fresh_different with s. inversion TRG; assumption.
-    assumption.
-  rewrite H6; rewrite RES1. 
+    right. apply in_or_app. auto.
+  rewrite H5; rewrite RES1. 
   rewrite (@eval_addressing_preserved _ _ ge tge).
   eexact H3. exact symbols_preserved. 
   rewrite RES2. assumption.
@@ -435,110 +641,52 @@ Proof.
 (* Result *)
   split. assumption.
 (* Other regs *)
-  intro r'; intros. transitivity (rs1#r').
-  apply OTHER2. apply reg_valid_incr with s; eauto with rtlg. 
-  assumption. 
-  apply OTHER1. apply reg_valid_incr with s.
-  apply state_incr_trans2 with s0 s1; eauto with rtlg. assumption.
-  case (In_dec Reg.eq r' l); intro.
-    elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWF EQ r' i); intro.
-  tauto. byContradiction; eauto with rtlg. tauto.
-Qed.  
+  intro r'; intros. transitivity (rs1#r'). 
+  apply OTHER2. intuition.
+  auto.
+Qed.
 
 Lemma transl_expr_Ecall_correct:
  forall (sp: val) (le : letenv) (e : env) (m : mem) 
     (sig : signature) (a : expr) (bl : exprlist) (t t1: trace)
     (m1: mem) (t2: trace) (m2 : mem) 
     (t3: trace) (m3: mem) (vf : val) 
-    (vargs : list val) (vres : val) (f : Cminor.fundef),
+    (vargs : list val) (vres : val) (f : CminorSel.fundef),
   eval_expr ge sp le e m a t1 m1 vf ->
   transl_expr_correct sp le e m a t1 m1 vf ->
   eval_exprlist ge sp le e m1 bl t2 m2 vargs ->
   transl_exprlist_correct sp le e m1 bl t2 m2 vargs ->
   Genv.find_funct ge vf = Some f ->
-  Cminor.funsig f = sig ->
+  CminorSel.funsig f = sig ->
   eval_funcall ge m2 f vargs t3 m3 vres ->
   transl_function_correct m2 f vargs t3 m3 vres ->
   t = t1 ** t2 ** t3 ->
   transl_expr_correct sp le e m (Ecall sig a bl) t m3 vres.
 Proof.
-  intros. red; simpl; intros.
-  monadInv TE. intro EQ3. clear TE.
-  assert (MWFa: map_wf map s3).
-    apply map_wf_incr with s. 
-    apply state_incr_trans3 with s0 s1 s2; eauto with rtlg.
-    assumption.
-  assert (TRGr: target_reg_ok s3 map a r). 
-    apply target_reg_ok_incr with s0.
-    apply state_incr_trans2 with s1 s2; eauto with rtlg.
-    eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWFa EQ3 ME TRGr).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  clear MWFa TRGr.
-  assert (MWFbl: map_wf map s2).
-    apply map_wf_incr with s. 
-    apply state_incr_trans2 with s0 s1; eauto with rtlg.
-    assumption.
-  assert (TRGl: target_regs_ok s2 map bl l).
-    apply target_regs_ok_incr with s1; eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ MWFbl EQ2 ME1 TRGl).
-  intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
-  clear MWFbl TRGl.
- 
-  generalize (functions_translated vf f H3). intros [tf [TFIND TF]]. 
-  generalize (H6 tf TF). intro EXF.
-
-  assert (EX3: exec_instrs tge (st_code s2) sp n rs2 m2
-                     t3 nd (rs2#rd <- vres) m3).
-  apply exec_one. eapply exec_Icall.
-  eauto with rtlg. simpl. replace (rs2#r) with vf. eexact TFIND.
-  rewrite <- RES1. symmetry. apply OTHER2.
-  apply reg_valid_incr with s0; eauto with rtlg.
-  assert (MWFs0: map_wf map s0). eauto with rtlg.
-  case (In_dec Reg.eq r l); intro.
-  elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWFs0 EQ0 r i); intro.
-  tauto. byContradiction. apply valid_fresh_absurd with r s0.
-  eauto with rtlg. assumption.
-  tauto.
-  generalize (sig_transl_function _ _ TF). congruence.
-  rewrite RES2. assumption.
-
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
+  exploit functions_translated; eauto. intros [tf [TFIND TF]].
+  exploit H6; eauto. intro EXF.
   exists (rs2#rd <- vres).
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s3. eauto with rtlg.
-  eapply exec_trans. eexact EX2. 
-  apply exec_instrs_incr with s2. eauto with rtlg.
-  eexact EX3. reflexivity. traceEq. 
+  split. eapply star_trans. eexact EX1. 
+  eapply star_trans. eexact EX2. 
+  eapply star_left. eapply exec_Icall; eauto. 
+  simpl. rewrite OTHER2. rewrite RES1. eauto. simpl; tauto. 
+  eapply sig_transl_function; eauto.
+  eapply star_right. rewrite RES2. eexact EXF.
+  apply exec_return. reflexivity. reflexivity. reflexivity. traceEq.  
 (* Match env *)
-  split. apply match_env_update_temp. assumption.
-  inversion TRG. assumption.
+  split. eauto with rtlg. 
 (* Result *)
   split. apply Regmap.gss.
 (* Other regs *)
   intros.
-  rewrite Regmap.gso. transitivity (rs1#r0). 
-  apply OTHER2. 
-    apply reg_valid_incr with s. 
-    apply state_incr_trans2 with s0 s1; eauto with rtlg.
-    assumption.
-    assert (MWFs0: map_wf map s0). eauto with rtlg.
-    case (In_dec Reg.eq r0 l); intro.
-    elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWFs0 EQ0 r0 i); intro.
-    tauto. byContradiction. apply valid_fresh_absurd with r0 s0.
-    eauto with rtlg. assumption.
-    tauto.
-  apply OTHER1.
-    apply reg_valid_incr with s.
-    apply state_incr_trans3 with s0 s1 s2; eauto with rtlg.
-    assumption.
-    case (Reg.eq r0 r); intro.
-    subst r0.
-    elim (alloc_reg_fresh_or_in_map _ _ _ _ _ MWF EQ); intro.
-    tauto. byContradiction; eauto with rtlg.
-    tauto.
-  red; intro; subst r0.
-  inversion TRG. tauto. 
+  rewrite Regmap.gso. transitivity (rs1#r). 
+  apply OTHER2. simpl; tauto.
+  apply OTHER1; auto. 
+  intuition congruence.
 Qed.
 
 Lemma transl_expr_Econdition_correct:
@@ -552,52 +700,20 @@ Lemma transl_expr_Econdition_correct:
   t = t1 ** t2 ->
   transl_expr_correct sp le e m (Econdition a b c) t m2 v2.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. intro EQ1; clear TE.
-
-  assert (MWF1: map_wf map s1).
-    apply map_wf_incr with s. eauto with rtlg. assumption.
-  generalize (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME).
-  intros [rs1 [EX1 [ME1 OTHER1]]].
-  destruct v1.
-
-  assert (MWF0: map_wf map s0). eauto with rtlg.
-  assert (TRG0: target_reg_ok s0 map b rd).
-    inversion TRG. apply target_reg_other; eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ MWF0 EQ0 ME1 TRG0).  
-  intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
-  exists rs2.
-(* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  eexact EX2. auto.
-(* Match-env *)
-  split. assumption.
-(* Result value *)
-  split. assumption.
-(* Other regs *)
-  intros. transitivity (rs1#r). 
-  apply OTHER2; auto. eauto with rtlg.
-  apply OTHER1; auto. apply reg_valid_incr with s.
-  apply state_incr_trans with s0; eauto with rtlg. assumption.
-
-  assert (TRGc: target_reg_ok s map c rd).
-    inversion TRG. apply target_reg_other; auto.
-  generalize (H2 _ _ _ _ _ _ _ MWF EQ ME1 TRGc).  
-  intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 OTHER1]]].
+  assert (tr_expr code map pr (if v1 then b else c) (if v1 then ntrue else nfalse) nd rd).
+    destruct v1; auto.
+  exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
   exists rs2.
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s0. eauto with rtlg.
-  eexact EX2. auto.
+  split. eapply star_trans. eexact EX1. eexact EX2. auto. 
 (* Match-env *)
   split. assumption.
 (* Result value *)
   split. assumption.
 (* Other regs *)
-  intros. transitivity (rs1#r). 
-  apply OTHER2; auto. eauto with rtlg.
-  apply OTHER1; auto. apply reg_valid_incr with s.
-  apply state_incr_trans2 with s0 s1; eauto with rtlg. assumption.
+  intros. transitivity (rs1#r); auto.
 Qed.
 
 Lemma transl_expr_Elet_correct:
@@ -611,47 +727,25 @@ Lemma transl_expr_Elet_correct:
   t = t1 ** t2 ->
   transl_expr_correct sp le e m (Elet a b) t m2 v2.
 Proof.
-  intros; red; intros.
-  simpl in TE; monadInv TE. intro EQ1.
-  assert (MWF1: map_wf map s1). eauto with rtlg.
-  assert (TRG1: target_reg_ok s1 map a r).
-  eapply target_reg_other; eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME TRG1).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  assert (MWF2: map_wf (add_letvar map r) s0). 
-  apply add_letvar_wf; eauto with rtlg. 
-  assert (ME2: match_env (add_letvar map r) e (v1 :: le) rs1).
-  apply match_env_letvar; assumption.
-  assert (TRG2: target_reg_ok s0 (add_letvar map r) b rd).
-  inversion TRG. apply target_reg_other. 
-  unfold reg_in_map, add_letvar; simpl. red; intro.
-  elim H10; intro. apply H4. left. assumption.
-  elim H11; intro. apply valid_fresh_absurd with rd s.
-  assumption. rewrite <- H12. eauto with rtlg.
-  apply H4. right. assumption.
-  eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ MWF2 EQ0 ME2 TRG2).
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  assert (map_wf (add_letvar map r)).
+    eapply add_letvar_wf; eauto. 
+  exploit H2; eauto. eapply match_env_bind_letvar; eauto. 
   intros [rs2 [EX2 [ME3 [RES2 OTHER2]]]].
   exists rs2.
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg. eexact EX2. auto.
+  split. eapply star_trans. eexact EX1. eexact EX2. auto.
 (* Match-env *)
-  split. apply mk_match_env. exact (me_vars _ _ _ _ ME3). 
-  generalize (me_letvars _ _ _ _ ME3). 
-  unfold add_letvar; simpl. intro X; injection X; auto.
+  split. eapply match_env_unbind_letvar; eauto.
 (* Result *)
   split. assumption.
 (* Other regs *)
   intros. transitivity (rs1#r0). 
-  apply OTHER2. eauto with rtlg. 
-  elim H5; intro. left. 
+  apply OTHER2. elim H4; intro; auto.
   unfold reg_in_map, add_letvar; simpl.
-  unfold reg_in_map in H6; tauto.
-  tauto.
-  apply OTHER1. eauto with rtlg.
-  right. red; intro. apply valid_fresh_absurd with r0 s.
-  assumption. rewrite H6. eauto with rtlg.
+  unfold reg_in_map in H5; tauto.
+  auto.
 Qed.
 
 Lemma transl_expr_Eletvar_correct:
@@ -660,35 +754,22 @@ Lemma transl_expr_Eletvar_correct:
   nth_error le n = Some v ->
   transl_expr_correct sp le e m (Eletvar n) E0 m v.
 Proof.
-  intros; red; intros. 
-  simpl in TE; monadInv TE. intro EQ1.
-  generalize EQ. unfold find_letvar. 
-  caseEq (nth_error (map_letvars map) n).
-  intros r0 NE; monadSimpl. subst s0; subst r0.
-  generalize (add_move_correct _ _ sp _ _ _ _ rs m EQ1).
-  intros [rs1 [EX1 [RES1 OTHER1]]].
+  intros; red; intros; inv TE.
+  exploit tr_move_correct; eauto. intros [rs1 [EX1 [RES1 OTHER1]]].
   exists rs1.
 (* Exec *)
-  split. exact EX1.
+  split. eexact EX1.
 (* Match-env *)
-  split. inversion TRG.
-  assert (r = rd). congruence.
-  subst r. apply match_env_exten with rs. 
-  intros. case (Reg.eq r rd); intro. subst r; auto. auto. auto.
-  apply match_env_invariant with rs. auto. 
-  intros. apply OTHER1. red;intro;subst r1. contradiction.
+  split. apply match_env_invariant with rs. auto.
+  intros. destruct H2 as [A | [B C]]. 
+  subst r. destruct (Reg.eq r0 rd). subst r0; auto. auto.
+  apply OTHER1. intuition congruence.
 (* Result *)
-  split. rewrite RES1. 
-  generalize H. rewrite <- (me_letvars _ _ _ _ ME). 
-  change positive with reg.
-  rewrite list_map_nth. rewrite NE. simpl. congruence. 
+  split. rewrite RES1. eapply match_env_find_letvar; eauto. 
 (* Other regs *)
-  intros. inversion TRG. 
-  assert (r = rd). congruence. subst r.
-  case (Reg.eq r0 rd); intro. subst r0; auto. auto. 
-  apply OTHER1. red; intro; subst r0. tauto.
-
-  intro; monadSimpl.
+  intros. destruct H2 as [A | [B C]]. 
+  subst r. destruct (Reg.eq r0 rd). subst r0; auto. auto.
+  apply OTHER1. intuition congruence.
 Qed.
 
 Lemma transl_expr_Ealloc_correct:
@@ -700,47 +781,35 @@ Lemma transl_expr_Ealloc_correct:
   Mem.alloc m1 0 (Int.signed n) = (m2, b) ->
   transl_expr_correct sp le e m (Ealloc a) t m2 (Vptr b Int.zero).
 Proof.
-  intros until b; intros EE TEC ALLOC; red; intros.
-  simpl in TE. monadInv TE. intro EQ1.
-  assert (TRG': target_reg_ok s1 map a r); eauto with rtlg.
-  assert (MWF': map_wf map s1). eauto with rtlg.
-  generalize (TEC _ _ _ _ _ _ _ MWF' EQ1 ME TRG').
-  intros [rs1 [EX1 [ME1 [RR1 RO1]]]].
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RR1 RO1]]]].
   exists (rs1#rd <- (Vptr b Int.zero)).
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  apply exec_one; eapply exec_Ialloc. eauto with rtlg.
+  split. eapply star_right. eexact EX1. 
+  eapply exec_Ialloc. eauto with rtlg.
   eexact RR1. assumption. traceEq. 
 (* Match-env *)
-  split. inversion TRG. eauto with rtlg.
+  split. eauto with rtlg. 
 (* Result *)
   split. apply Regmap.gss.
 (* Other regs *)
-  intros. rewrite Regmap.gso.
-  apply RO1. eauto with rtlg. 
-  case (Reg.eq r0 r); intro.
-  subst r0. left. elim (alloc_reg_fresh_or_in_map _ _ _ _ _ MWF EQ); intro. 
-  auto. byContradiction; eauto with rtlg.
-  right; assumption.
-  elim H0; intro. red; intro. subst r0. inversion TRG. contradiction. 
-  auto.
+  intros. rewrite Regmap.gso. auto. intuition congruence.
 Qed.
 
 Lemma transl_condition_CEtrue_correct:
   forall (sp: val) (le : letenv) (e : env) (m : mem),
   transl_condition_correct sp le e m CEtrue E0 m true.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. subst ns.
-  exists rs. split. apply exec_refl. split. auto. auto.
+  intros; red; intros; inv TE. 
+  exists rs. split. apply star_refl. split. auto. auto.
 Qed.    
 
 Lemma transl_condition_CEfalse_correct:
   forall (sp: val) (le : letenv) (e : env) (m : mem),
   transl_condition_correct sp le e m CEfalse E0 m false.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. subst ns.
-  exists rs. split. apply exec_refl. split. auto. auto.
+  intros; red; intros; inv TE. 
+  exists rs. split. apply star_refl. split. auto. auto.
 Qed.    
 
 Lemma transl_condition_CEcond_correct:
@@ -749,33 +818,24 @@ Lemma transl_condition_CEcond_correct:
     (m1 : mem) (vl : list val) (b : bool),
   eval_exprlist ge sp le e m al t1 m1 vl ->
   transl_exprlist_correct sp le e m al t1 m1 vl ->
-  eval_condition cond vl =  Some b ->
+  eval_condition cond vl m1 = Some b ->
   transl_condition_correct sp le e m (CEcond cond al) t1 m1 b.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. intro EQ1; clear TE.
-  assert (MWF1: map_wf map s1). eauto with rtlg.
-  assert (TRG: target_regs_ok s1 map al l).  eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME TRG).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
   exists rs1.
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  apply exec_one. 
+  split. eapply star_right. eexact EX1. 
   destruct b.
-  eapply exec_Icond_true. eauto with rtlg. 
+  eapply exec_Icond_true; eauto. 
   rewrite RES1. assumption.
-  eapply exec_Icond_false. eauto with rtlg. 
+  eapply exec_Icond_false; eauto. 
   rewrite RES1. assumption.
   traceEq.
 (* Match-env *)
   split. assumption.
 (* Regs *)
-  intros. apply OTHER1. eauto with rtlg. 
-  case (In_dec Reg.eq r l); intro.
-  elim (alloc_regs_fresh_or_in_map _ _ _ _ _ MWF EQ r i); intro.
-  tauto. byContradiction; eauto with rtlg.
-  tauto.
+  auto.
 Qed.
 
 Lemma transl_condition_CEcondition_correct:
@@ -789,47 +849,31 @@ Lemma transl_condition_CEcondition_correct:
   t = t1 ** t2 ->
   transl_condition_correct sp le e m (CEcondition a b c) t m2 vb2.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. intro EQ1; clear TE.
-  assert (MWF1: map_wf map s1). eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME).
-  intros [rs1 [EX1 [ME1 OTHER1]]].
-  destruct vb1.
-
-  assert (MWF0: map_wf map s0). eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ MWF0 EQ0 ME1).
-  intros [rs2 [EX2 [ME2 OTHER2]]].
-  exists rs2.
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  eexact EX2. auto.
-  split. assumption. 
-  intros. transitivity (rs1#r). 
-  apply OTHER2; eauto with rtlg.
-  apply OTHER1; eauto with rtlg.
-
-  generalize (H2 _ _ _ _ _ _ _ MWF EQ ME1).
-  intros [rs2 [EX2 [ME2 OTHER2]]].
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 OTHER1]]].
+  assert (tr_condition code map pr (if vb1 then b else c)
+             (if vb1 then ntrue' else nfalse') ntrue nfalse).
+    destruct vb1; auto.
+  exploit H2; eauto. intros [rs2 [EX2 [ME2 OTHER2]]].
   exists rs2.
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s0. eauto with rtlg.
-  eexact EX2. auto.
-  split. assumption. 
-  intros. transitivity (rs1#r). 
-  apply OTHER2; eauto with rtlg.
-  apply OTHER1; eauto with rtlg.
+(* Execution *)
+  split. eapply star_trans. eexact EX1. eexact EX2. auto. 
+(* Match-env *)
+  split. auto.
+(* Regs *)
+  intros. transitivity (rs1#r); auto.
 Qed.
  
 Lemma transl_exprlist_Enil_correct:
   forall (sp: val) (le : letenv) (e : env) (m : mem),
   transl_exprlist_correct sp le e m Enil E0 m nil.
 Proof.
-  intros; red; intros. 
-  generalize TE. simpl. destruct rl; monadSimpl. 
-  subst s'; subst ns. exists rs.
-  split. apply exec_refl.
+  intros; red; intros; inv TE.
+  exists rs.
+  split. apply star_refl.
   split. assumption.
   split. reflexivity.
-  intros. reflexivity.
+  auto.
 Qed.
 
 Lemma transl_exprlist_Econs_correct:
@@ -843,93 +887,90 @@ Lemma transl_exprlist_Econs_correct:
   t = t1 ** t2 ->
   transl_exprlist_correct sp le e m (Econs a bl) t m2 (v :: vl).
 Proof.
-  intros. red. intros. 
-  inversion TRG.
-  rewrite <- H10 in TE. simpl in TE. monadInv TE. intro EQ1.
-  assert (MWF1: map_wf map s1); eauto with rtlg.
-  assert (TRG1: target_reg_ok s1 map a r); eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME TRG1).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  generalize (H2 _ _ _ _ _ _ _ MWF EQ ME1 H11).
-  intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
   exists rs2.
 (* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s1. eauto with rtlg. 
-  eexact EX2. auto. 
+  split. eapply star_trans. eexact EX1. eexact EX2. auto. 
 (* Match-env *)
   split. assumption.
 (* Results *)
-  split. simpl. rewrite RES2. rewrite OTHER2. rewrite RES1.
-  reflexivity.
-  eauto with rtlg. 
-  eauto with rtlg.
+  split. simpl. rewrite RES2. rewrite OTHER2. rewrite RES1. auto.
+  simpl; tauto. 
 (* Other regs *)
-  simpl. intros.
-  transitivity (rs1#r0).
-  apply OTHER2; auto. tauto. 
-  apply OTHER1; auto. eauto with rtlg. 
-  elim H13; intro. left; assumption. right; apply sym_not_equal; tauto.
+  intros. transitivity (rs1#r).
+  apply OTHER2; auto. simpl; tauto. 
+  apply OTHER1; auto.
 Qed.
 
 Lemma transl_funcall_internal_correct:
-  forall (m : mem) (f : Cminor.function)
+  forall (m : mem) (f : CminorSel.function)
          (vargs : list val) (m1 : mem) (sp : block) (e : env) (t : trace)
-         (e2 : env) (m2 : mem) (out : outcome) (vres : val),
+         (e2 : env) (m2 : mem) (out: outcome) (vres : val),
   Mem.alloc m 0 (fn_stackspace f) = (m1, sp) ->
-  set_locals (fn_vars f) (set_params vargs (Cminor.fn_params f)) = e ->
+  set_locals (fn_vars f) (set_params vargs (CminorSel.fn_params f)) = e ->
   exec_stmt ge (Vptr sp Int.zero) e m1 (fn_body f) t e2 m2 out ->
   transl_stmt_correct (Vptr sp Int.zero) e m1 (fn_body f) t e2 m2 out ->
-  outcome_result_value out f.(Cminor.fn_sig).(sig_res) vres ->
-  transl_function_correct m (Internal f)
-            vargs t (Mem.free m2 sp) vres.
+  outcome_result_value out f.(CminorSel.fn_sig).(sig_res) vres ->
+  transl_function_correct m (Internal f) vargs t 
+                            (outcome_free_mem out m2 sp) vres.
 Proof.
   intros; red; intros.
-  generalize TE. unfold transl_fundef, transl_function; simpl.
-  caseEq (transl_fun f init_state).
-  intros; discriminate.
-  intros ns s. unfold transl_fun. monadSimpl. 
-  subst ns. intros EQ4. injection EQ4; intro TF; clear EQ4.
-  subst s4.
-
-  pose (rs := init_regs vargs x).
-  assert (ME: match_env y0 e nil rs).
+  generalize TE; simpl. caseEq (transl_function f); simpl. 2: congruence.
+  intros tfi EQ1 EQ2. injection EQ2; clear EQ2; intro EQ2.
+  assert (TR: tr_function f tfi). apply transl_function_charact; auto.
+  rewrite <- EQ2. inversion TR. subst f0. 
+
+  pose (rs := init_regs vargs rparams).
+  assert (ME: match_env map2 e nil rs).
   rewrite <- H0. unfold rs. 
-  eapply match_init_env_init_reg; eauto. 
+  eapply match_init_env_init_reg; eauto.
+
+  assert (MWF: map_wf map2).
+    assert (map_valid init_mapping init_state) by apply init_mapping_valid.
+    exploit (add_vars_valid (CminorSel.fn_params f)); eauto. intros [A B].
+    eapply add_vars_wf; eauto. eapply add_vars_wf; eauto. apply init_mapping_wf.
+
+  exploit H2; eauto. intros [rs' [st [OUT [EX ME']]]].
+
+  eapply star_left.
+  eapply exec_function_internal; eauto. simpl.
+  inversion OUT; clear OUT; subst out st; simpl in H3; simpl.
+
+  (* Out_normal *)
+  unfold ret_reg in H6. destruct (sig_res (CminorSel.fn_sig f)). contradiction. 
+  subst vres orret. 
+  eapply star_right. unfold rs in EX. eexact EX.
+  change Vundef with (regmap_optget None Vundef rs').
+  apply exec_Ireturn. auto. reflexivity.
+
+  (* Out_exit *)
+  contradiction.
+
+  (* Out_return None *)
+  subst orret.
+  unfold ret_reg in H8. destruct (sig_res (CminorSel.fn_sig f)). contradiction.
+  subst vres. 
+  eapply star_right. unfold rs in EX. eexact EX. 
+  change Vundef with (regmap_optget None Vundef rs').
+  apply exec_Ireturn. auto.
+  reflexivity. 
 
-  assert (OUT: outcome_node out n nil n n).
-  red. generalize H3. unfold outcome_result_value.
-  destruct out; contradiction || auto. 
- 
-  assert (MWF1: map_wf y0 s1).
-  eapply add_vars_wf. eexact EQ0. 
-  eapply add_vars_wf. eexact EQ.
-  apply init_mapping_wf.
-
-  assert (MWF: map_wf y0 s3).
-  apply map_wf_incr with s1. apply state_incr_trans with s2; eauto with rtlg.
-  assumption.
-
-  set (rr := ret_reg (Cminor.fn_sig f) r) in *.
-  assert (RRG: return_reg_ok s3 y0 rr).
-  apply return_reg_ok_incr with s2. eauto with rtlg. 
-  unfold rr; apply new_reg_return_ok with s1; assumption.
-
-  generalize (H2 _ _ _ _ _ _ _ _ _ rs MWF EQ3 ME OUT RRG).
-  intros [rs1 [EX1 [ME1 MR1]]].
-
-  rewrite <- TF. apply exec_funct_internal with m1 n rs1 rr; simpl.
-  assumption.
-  exact EX1.
-  apply instr_at_incr with s3.
-  eauto with rtlg. discriminate. eauto with rtlg.
-  generalize MR1. unfold match_return_outcome.
-  generalize H3. unfold outcome_result_value.
-  unfold rr, ret_reg; destruct (sig_res (Cminor.fn_sig f)).
-  unfold regmap_optget. destruct out; try contradiction.
-  destruct o; try contradiction. intros; congruence.
-  unfold regmap_optget. destruct out; contradiction||auto.
-  destruct o; contradiction||auto.
+  (* Out_return Some *)
+  subst orret. 
+  unfold ret_reg in H8. unfold ret_reg in H9.
+  destruct (sig_res (CminorSel.fn_sig f)). inversion H9.
+  subst vres.    
+  eapply star_right. unfold rs in EX. eexact EX.
+  replace v with (regmap_optget (Some rret) Vundef rs').
+  apply exec_Ireturn. auto.
+  simpl. congruence. 
+  reflexivity.
+  contradiction.
+
+  (* a tail call *)
+  subst v. rewrite E0_right. auto. traceEq.
 Qed.
 
 Lemma transl_funcall_external_correct:
@@ -939,17 +980,27 @@ Lemma transl_funcall_external_correct:
 Proof.
   intros; red; intros. unfold transl_function in TE; simpl in TE.
   inversion TE; subst tf. 
-  apply exec_funct_external. auto.
+  apply star_one. apply exec_function_external. auto.
 Qed.
 
 Lemma transl_stmt_Sskip_correct:
   forall (sp: val) (e : env) (m : mem),
   transl_stmt_correct sp e m Sskip E0 e m Out_normal.
 Proof.
-  intros; red; intros. simpl in TE. monadInv TE.
-  subst s'; subst ns.
-  simpl in OUT. subst ncont. 
-  exists rs. simpl. intuition. apply exec_refl.
+  intros; red; intros; inv TE.
+  exists rs; econstructor.
+  split. constructor.
+  split. apply star_refl.
+  auto.
+Qed.
+
+Remark state_for_outcome_stop:
+  forall ncont1 ncont2 nexits nret rret cs code sp rs m out st,
+  state_for_outcome ncont1 nexits nret rret cs code sp rs m out st ->
+  out <> Out_normal ->
+  state_for_outcome ncont2 nexits nret rret cs code sp rs m out st.
+Proof.
+  intros. inv H; congruence || econstructor; eauto.
 Qed.
 
 Lemma transl_stmt_Sseq_continue_correct:
@@ -963,22 +1014,13 @@ Lemma transl_stmt_Sseq_continue_correct:
   t = t1 ** t2 ->
   transl_stmt_correct sp e m (Sseq s1 s2) t e2 m2 out.
 Proof.
-  intros; red; intros. simpl in TE; monadInv TE. intro EQ0.
-  assert (MWF1: map_wf map s0). eauto with rtlg.
-  assert (OUTs: outcome_node Out_normal n nexits nret n).
-    simpl. auto.
-  assert (RRG1: return_reg_ok s0 map rret). eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF1 EQ0 ME OUTs RRG1).
-  intros [rs1 [EX1 [ME1 MR1]]].
-  generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF EQ ME1 OUT RRG).
-  intros [rs2 [EX2 [ME2 MR2]]].
-  exists rs2.
-(* Exec *)
-  split. eapply exec_trans. eexact EX1. 
-  apply exec_instrs_incr with s0. eauto with rtlg.
-  eexact EX2. auto.
-(* Match-env + return *)
-  tauto.
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [st1 [OUT1 [EX1 ME1]]]]. inv OUT1. 
+  exploit H2; eauto. intros [rs2 [st2 [OUT2 [EX2 ME2]]]].
+  exists rs2; exists st2.
+  split. eauto.
+  split. eapply star_trans; eauto.
+  auto.
 Qed.
 
 Lemma transl_stmt_Sseq_stop_correct:
@@ -989,13 +1031,11 @@ Lemma transl_stmt_Sseq_stop_correct:
   out <> Out_normal ->
   transl_stmt_correct sp e m (Sseq s1 s2) t e1 m1 out.
 Proof.
-  intros; red; intros. 
-  simpl in TE; monadInv TE. intro EQ0; clear TE.
-  assert (MWF1: map_wf map s0). eauto with rtlg.
-  assert (OUTs: outcome_node out n nexits nret nd).
-    destruct out; simpl; auto. tauto. 
-  assert (RRG1: return_reg_ok s0 map rret). eauto with rtlg.
-  exact (H0 _ _ _ _ _ _ _ _ _ _ MWF1 EQ0 ME OUTs RRG1).
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [st1 [OUT1 [EX1 ME1]]]].
+  exists rs1; exists st1.
+  split. eapply state_for_outcome_stop; eauto. 
+  auto.
 Qed.
 
 Lemma transl_stmt_Sexpr_correct:
@@ -1005,14 +1045,11 @@ Lemma transl_stmt_Sexpr_correct:
   transl_expr_correct sp nil e m a t m1 v ->
   transl_stmt_correct sp e m (Sexpr a) t e m1 Out_normal.
 Proof.
-  intros; red; intros.
-  simpl in OUT. subst nd.
-  unfold transl_stmt in TE. monadInv TE. intro EQ1.
-  assert (MWF0: map_wf map s0). eauto with rtlg.
-  assert (TRG: target_reg_ok s0 map a r). eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF0 EQ1 ME TRG).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  exists rs1; simpl; tauto.
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exists rs1; econstructor. 
+  split. constructor.
+  eauto.
 Qed.
 
 Lemma transl_stmt_Sassign_correct:
@@ -1022,26 +1059,17 @@ Lemma transl_stmt_Sassign_correct:
   transl_expr_correct sp nil e m a t m1 v ->
   transl_stmt_correct sp e m (Sassign id a) t (PTree.set id v e) m1 Out_normal.
 Proof.
-  intros; red; intros.
-  simpl in TE. monadInv TE. intro EQ2. 
-  assert (MWF0: map_wf map s2). 
-    apply map_wf_incr with s. eauto 6 with rtlg. auto.
-  assert (TRGa: target_reg_ok s2 map a r0). eauto 6 with rtlg.
-  generalize (H0 _ _ _ _ _ _ _ MWF0 EQ2 ME TRGa).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  generalize (add_move_correct _ _ sp _ _ _ _ rs1 m1 EQ1).
-  intros [rs2 [EX2 [RES2 OTHER2]]].
-  exists rs2.
-(* Exec *)
-  split. inversion OUT; subst. eapply exec_trans. eexact EX1.
-  apply exec_instrs_incr with s2. eauto with rtlg.
-  eexact EX2. traceEq.
-(* Match-env *)
-  split. 
-  apply match_env_update_var with rs1 r s s0; auto.
-  congruence.
-(* Outcome *)
-  simpl; auto.
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit tr_move_correct; eauto. intros [rs2 [EX2 [RES2 OTHER2]]].
+  exists rs2; econstructor.
+  split. constructor.
+  split. eapply star_trans. eexact EX1. eexact EX2. traceEq.
+  apply match_env_invariant with (rs1#rv <- v). 
+  apply match_env_update_var; auto. 
+  intros. rewrite Regmap.gsspec. destruct (peq r rv). 
+  subst r. congruence.
+  auto.
 Qed.
 
 Lemma transl_stmt_Sifthenelse_correct:
@@ -1055,45 +1083,15 @@ Lemma transl_stmt_Sifthenelse_correct:
   t = t1 ** t2 ->
   transl_stmt_correct sp e m (Sifthenelse a s1 s2) t e2 m2 out.
 Proof.
-  intros; red; intros until rs; intro MWF.
-  simpl. case (more_likely a s1 s2); monadSimpl; intro EQ2; intros.
-  assert (MWF1: map_wf map s3). eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ rs MWF1 EQ2 ME).
-  intros [rs1 [EX1 [ME1 OTHER1]]].
-  destruct v1.
-  assert (MWF0: map_wf map s0). eauto with rtlg.
-  assert (RRG0: return_reg_ok s0 map rret). eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME1 OUT RRG0).
-  intros [rs2 [EX2 [ME2 MRE2]]].
-  exists rs2. split.
-  eapply exec_trans. eexact EX1. apply exec_instrs_incr with s3.
-  eauto with rtlg. eexact EX2. auto.
-  tauto.
-  generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF EQ ME1 OUT RRG).
-  intros [rs2 [EX2 [ME2 MRE2]]].
-  exists rs2. split.
-  eapply exec_trans. eexact EX1. apply exec_instrs_incr with s0.
-  eauto with rtlg. eexact EX2. auto.
-  tauto.
-
-  assert (MWF1: map_wf map s3). eauto with rtlg.
-  generalize (H0 _ _ _ _ _ _ rs MWF1 EQ2 ME).
-  intros [rs1 [EX1 [ME1 OTHER1]]].
-  destruct v1.
-  generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF EQ ME1 OUT RRG).
-  intros [rs2 [EX2 [ME2 MRE2]]].
-  exists rs2. split.
-  eapply exec_trans. eexact EX1. apply exec_instrs_incr with s0.
-  eauto with rtlg. eexact EX2. auto.
-  tauto.
-  assert (MWF0: map_wf map s0). eauto with rtlg.
-  assert (RRG0: return_reg_ok s0 map rret). eauto with rtlg.
-  generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME1 OUT RRG0).
-  intros [rs2 [EX2 [ME2 MRE2]]].
-  exists rs2. split.
-  eapply exec_trans. eexact EX1. apply exec_instrs_incr with s3.
-  eauto with rtlg. eexact EX2. auto.
-  tauto.
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 OTHER1]]].
+  assert (tr_stmt code map (if v1 then s1 else s2) (if v1 then ntrue else nfalse) ncont nexits nret rret).
+    destruct v1; auto.
+  exploit H2; eauto. intros [rs2 [st2 [OUT2 [EX2 ME2]]]].
+  exists rs2; exists st2.
+  split. eauto.
+  split. eapply star_trans. eexact EX1. eexact EX2. auto.
+  auto.
 Qed.
 
 Lemma transl_stmt_Sloop_loop_correct:
@@ -1107,34 +1105,15 @@ Lemma transl_stmt_Sloop_loop_correct:
   t = t1 ** t2 ->
   transl_stmt_correct sp e m (Sloop sl) t e2 m2 out.
 Proof.
-  intros; red; intros.
-  generalize TE. simpl. monadSimpl. subst s2; subst n0. intros.
-  assert (MWF0: map_wf map s0). apply map_wf_incr with s.
-  eapply reserve_instr_incr; eauto.
-  assumption.
-  assert (OUT0: outcome_node Out_normal n nexits nret n).
-  unfold outcome_node. auto.
-  assert (RRG0: return_reg_ok s0 map rret). 
-  apply return_reg_ok_incr with s.
-  eapply reserve_instr_incr; eauto.
-  assumption.
-  generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME OUT0 RRG0).
-  intros [rs1 [EX1 [ME1 MR1]]].
-  generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF TE ME1 OUT RRG).
-  intros [rs2 [EX2 [ME2 MR2]]].
-  exists rs2.
-  split. eapply exec_trans.
-  apply exec_instrs_extends with s1. 
-  eapply update_instr_extends.
-  eexact EQ. eauto with rtlg. eexact EQ1. eexact EX1.
-  apply exec_trans with E0 ns rs1 m1 t2.
-  apply exec_one. apply exec_Inop. 
-  generalize EQ1. unfold update_instr. 
-  case (plt n (st_nextnode s1)); intro; monadSimpl.
-  rewrite <- H5. simpl. apply PTree.gss.
-  exact EX2.
-  reflexivity. traceEq. 
-  tauto.
+  intros; red; intros; inversion TE. subst. 
+  exploit H0; eauto. intros [rs1 [st1 [OUT1 [EX1 ME1]]]]. inv OUT1.
+  exploit H2; eauto. intros [rs2 [st2 [OUT2 [EX2 ME2]]]].
+  exists rs2; exists st2. 
+  split. eauto.
+  split. eapply star_trans. eexact EX1. 
+  eapply star_left. apply exec_Inop; eauto. eexact EX2. 
+  reflexivity. traceEq.
+  auto.
 Qed.
 
 Lemma transl_stmt_Sloop_stop_correct:
@@ -1145,25 +1124,12 @@ Lemma transl_stmt_Sloop_stop_correct:
   out <> Out_normal ->
   transl_stmt_correct sp e m (Sloop sl) t e1 m1 out.
 Proof.
-  intros; red; intros.
-  simpl in TE. monadInv TE. subst s2; subst n0.
-  assert (MWF0: map_wf map s0). apply map_wf_incr with s.
-  eapply reserve_instr_incr; eauto. assumption.
-  assert (OUT0: outcome_node out n nexits nret nd).
-  generalize OUT. unfold outcome_node. 
-  destruct out; auto. elim H1; auto.
-  assert (RRG0: return_reg_ok s0 map rret). 
-  apply return_reg_ok_incr with s.
-  eapply reserve_instr_incr; eauto.
-  assumption.
-  generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME OUT0 RRG0).
-  intros [rs1 [EX1 [ME1 MR1]]].
-  exists rs1. split.
-  apply exec_instrs_extends with s1. 
-  eapply update_instr_extends.
-  eexact EQ. eauto with rtlg. eexact EQ1. eexact EX1.
-  tauto.
-Qed.  
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [st1 [OUT1 [EX1 ME1]]]].
+  exists rs1; exists st1.
+  split. eapply state_for_outcome_stop; eauto.
+  auto.
+Qed.
 
 Lemma transl_stmt_Sblock_correct:
   forall (sp: val) (e : env) (m : mem) (sl : stmt) (t: trace)
@@ -1172,65 +1138,59 @@ Lemma transl_stmt_Sblock_correct:
   transl_stmt_correct sp e m sl t e1 m1 out ->
   transl_stmt_correct sp e m (Sblock sl) t e1 m1 (outcome_block out).
 Proof.
-  intros; red; intros. simpl in TE. 
-  assert (OUT': outcome_node out ncont (ncont :: nexits) nret nd).
-  generalize OUT. unfold outcome_node, outcome_block.
-  destruct out.
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [st1 [OUT1 [EX1 ME1]]]].
+  exists rs1; exists st1.
+  split. inv OUT1; simpl; try (econstructor; eauto).
+  destruct n; simpl in H1.
+    inv H1. constructor.
+    constructor. auto.
   auto.
-  destruct n. simpl. intro; unfold value; congruence.
-  simpl. auto.
-  auto.
-  generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF TE ME OUT' RRG).
-  intros [rs1 [EX1 [ME1 MR1]]].
-  exists rs1. 
-  split. assumption.
-  split. assumption.
-  generalize MR1. unfold match_return_outcome, outcome_block.
-  destruct out; auto. 
-  destruct n; simpl; auto.
-Qed.
-
-Lemma transl_exit_correct:
-  forall nexits ex s nd s',
-  transl_exit nexits ex s = OK nd s' ->
-  nth_error nexits ex = Some nd.
-Proof.
-  intros until s'. unfold transl_exit. 
-  case (nth_error nexits ex); intros; simplify_eq H; congruence.
 Qed.
 
 Lemma transl_stmt_Sexit_correct:
   forall (sp: val) (e : env) (m : mem) (n : nat),
   transl_stmt_correct sp e m (Sexit n) E0 e m (Out_exit n).
 Proof.
-  intros; red; intros. 
-  simpl in TE. simpl in OUT. 
-  generalize (transl_exit_correct _ _ _ _ _ TE); intro.
-  assert (ns = nd). congruence. subst ns.
-  exists rs. simpl. intuition. apply exec_refl.
+  intros; red; intros; inv TE.
+  exists rs; econstructor.
+  split. econstructor; eauto.
+  split. apply star_refl.
+  auto.
 Qed.
 
 Lemma transl_switch_correct:
-  forall sp rs m r i nexits nd default cases s ns s',
-  transl_switch r nexits cases default s = OK ns s' ->
-  nth_error nexits (switch_target i default cases) = Some nd ->
+  forall cs sp rs m i code r nexits t ns,
+  tr_switch code r nexits t ns ->
   rs#r = Vint i ->
-  exec_instrs tge s'.(st_code) sp ns rs m E0 nd rs m.
+  exists nd,
+  star step tge (State cs code sp ns rs m) E0 (State cs code sp nd rs m) /\
+  nth_error nexits (comptree_match i t) = Some nd.
 Proof.
-  induction cases; simpl; intros.
-  generalize (transl_exit_correct _ _ _ _ _ H). intros.
-  assert (ns = nd). congruence. subst ns. apply exec_refl.
-  destruct a as [key1 exit1]. monadInv H. clear H. intro EQ1.
-  caseEq (Int.eq i key1); intro IEQ; rewrite IEQ in H0.
-  (* i = key1 *)
-  apply exec_trans with E0 n0 rs m E0. apply exec_one. 
-  eapply exec_Icond_true. eauto with rtlg. simpl. rewrite H1. congruence.
-  generalize (transl_exit_correct _ _ _ _ _ EQ0); intro.
-  assert (n0 = nd). congruence. subst n0. apply exec_refl. traceEq.
-  (* i <> key1 *)
-  apply exec_trans with E0 n rs m E0. apply exec_one.
-  eapply exec_Icond_false. eauto with rtlg. simpl. rewrite H1. congruence.
-  apply exec_instrs_incr with s0; eauto with rtlg. traceEq.
+  induction 1; intros; simpl.
+  exists n. split. apply star_refl. auto. 
+
+  caseEq (Int.eq i key); intros.
+  exists nfound; split. 
+  apply star_one. eapply exec_Icond_true; eauto. 
+  simpl. rewrite H2. congruence. auto.
+  exploit IHtr_switch; eauto. intros [nd [EX MATCH]].
+  exists nd; split.
+  eapply star_step. eapply exec_Icond_false; eauto. 
+  simpl. rewrite H2. congruence. eexact EX. traceEq.
+  auto.
+
+  caseEq (Int.ltu i key); intros.
+  exploit IHtr_switch1; eauto. intros [nd [EX MATCH]].
+  exists nd; split. 
+  eapply star_step. eapply exec_Icond_true; eauto. 
+  simpl. rewrite H2. congruence. eexact EX. traceEq.
+  auto.
+  exploit IHtr_switch2; eauto. intros [nd [EX MATCH]].
+  exists nd; split. 
+  eapply star_step. eapply exec_Icond_false; eauto. 
+  simpl. rewrite H2. congruence. eexact EX. traceEq.
+  auto.
 Qed.
 
 Lemma transl_stmt_Sswitch_correct:
@@ -1242,32 +1202,25 @@ Lemma transl_stmt_Sswitch_correct:
   transl_stmt_correct sp e m (Sswitch a cases default) t1 e m1
          (Out_exit (switch_target n default cases)).
 Proof.
-  intros; red; intros. monadInv TE. clear TE; intros EQ1.
-  simpl in OUT. 
-  assert (state_incr s s1). eauto with rtlg.
-
-  red in H0. 
-  assert (MWF1: map_wf map s1). eauto with rtlg.
-  assert (TRG1: target_reg_ok s1 map a r). eauto with rtlg.
-  destruct (H0 _ _ _ _ _ _ _ MWF1 EQ1 ME TRG1)
-        as [rs' [EXEC1 [ME1 [RES1 OTHER1]]]].
-  simpl. exists rs'.
-  (* execution *)
-  split. eapply exec_trans. eexact EXEC1. 
-  apply exec_instrs_incr with s1. eauto with rtlg.
-  eapply transl_switch_correct; eauto. traceEq. 
-  (* match_env & match_reg *)
-  tauto.
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit transl_switch_correct; eauto. intros [nd [EX2 MO2]].
+  exists rs1; econstructor.
+  split. econstructor. 
+  rewrite (validate_switch_correct _ _ _ H4 n). eauto.  
+  split. eapply star_trans. eexact EX1. eexact EX2. traceEq.
+  auto.
 Qed.
 
 Lemma transl_stmt_Sreturn_none_correct:
   forall (sp: val) (e : env) (m : mem),
   transl_stmt_correct sp e m (Sreturn None) E0 e m (Out_return None).
 Proof.
-  intros; red; intros. generalize TE. simpl.
-  destruct rret; monadSimpl. 
-  simpl in OUT. subst ns; subst nret.
-  exists rs. intuition. apply exec_refl.
+  intros; red; intros; inv TE.
+  exists rs; econstructor.
+  split. constructor. auto.
+  split. apply star_refl. 
+  auto.
 Qed.
 
 Lemma transl_stmt_Sreturn_some_correct:
@@ -1277,33 +1230,59 @@ Lemma transl_stmt_Sreturn_some_correct:
   transl_expr_correct sp nil e m a t m1 v ->
   transl_stmt_correct sp e m (Sreturn (Some a)) t e m1 (Out_return (Some v)).
 Proof.
-  intros; red; intros. generalize TE; simpl.
-  destruct rret. intro EQ.
-  assert (TRG: target_reg_ok s map a r).
-  inversion RRG. apply target_reg_other; auto.
-  generalize (H0 _ _ _ _ _ _ _ MWF EQ ME TRG).
-  intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
-  simpl in OUT. subst nd. exists rs1. tauto.
-
-  monadSimpl.
+  intros; red; intros; inv TE. 
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exists rs1; econstructor.
+  split. econstructor. reflexivity. auto.
+  eauto.
+Qed.
+
+Lemma transl_stmt_Stailcall_correct:
+  forall (sp : block) (e : env) (m : mem) (sig : signature) (a : expr)
+         (bl : exprlist) (t t1 : trace) (m1 : mem) (t2 : trace) (m2 : mem)
+         (t3 : trace) (m3 : mem) (vf : val) (vargs : list val) (vres : val)
+         (f : CminorSel.fundef),
+  eval_expr ge (Vptr sp Int.zero) nil e m a t1 m1 vf ->
+  transl_expr_correct (Vptr sp Int.zero) nil e m a t1 m1 vf ->
+  eval_exprlist ge (Vptr sp Int.zero) nil e m1 bl t2 m2 vargs ->
+  transl_exprlist_correct (Vptr sp Int.zero) nil e m1 bl t2 m2 vargs ->
+  Genv.find_funct ge vf = Some f ->
+  CminorSel.funsig f = sig ->
+  eval_funcall ge (free m2 sp) f vargs t3 m3 vres ->
+  transl_function_correct (free m2 sp) f vargs t3 m3 vres ->
+  t = t1 ** t2 ** t3 ->
+  transl_stmt_correct (Vptr sp Int.zero) e m (Stailcall sig a bl)
+                                       t e m3 (Out_tailcall_return vres).
+Proof.
+  intros; red; intros; inv TE.
+  exploit H0; eauto. intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]].
+  exploit H2; eauto. intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]].
+  exploit functions_translated; eauto. intros [tf [TFIND TF]].
+  exploit H6; eauto. intro EXF.
+  exists rs2; econstructor. 
+  split. constructor. 
+  split. 
+  eapply star_trans. eexact EX1. 
+  eapply star_trans. eexact EX2. 
+  eapply star_step. 
+  eapply exec_Itailcall; eauto.
+  simpl. rewrite OTHER2. rewrite RES1. eauto. simpl; tauto.
+  eapply sig_transl_function; eauto.
+  rewrite RES2. eexact EXF. 
+  reflexivity. reflexivity. traceEq.
+  auto.
 Qed.
 
 (** The correctness of the translation then follows by application
-  of the mutual induction principle for Cminor evaluation derivations
+  of the mutual induction principle for CminorSel evaluation derivations
   to the lemmas above. *)
 
-Scheme eval_expr_ind_5 := Minimality for eval_expr Sort Prop
-  with eval_condexpr_ind_5 := Minimality for eval_condexpr Sort Prop
-  with eval_exprlist_ind_5 := Minimality for eval_exprlist Sort Prop
-  with eval_funcall_ind_5 := Minimality for eval_funcall Sort Prop
-  with exec_stmt_ind_5 := Minimality for exec_stmt Sort Prop.
-
 Theorem transl_function_correctness:
   forall m f vargs t m' vres,
   eval_funcall ge m f vargs t m' vres ->
   transl_function_correct m f vargs t m' vres.
 Proof
-  (eval_funcall_ind_5 ge
+  (eval_funcall_ind5 ge
     transl_expr_correct
     transl_condition_correct
     transl_exprlist_correct
@@ -1339,26 +1318,35 @@ Proof
     transl_stmt_Sexit_correct
     transl_stmt_Sswitch_correct
     transl_stmt_Sreturn_none_correct
-    transl_stmt_Sreturn_some_correct).
+    transl_stmt_Sreturn_some_correct
+    transl_stmt_Stailcall_correct).
+
+Require Import Smallstep.
+
+(** The correctness of the translation follows: if the original CminorSel
+  program executes with trace [t] and exit code [r], then the generated
+  RTL program terminates with the same trace and exit code. *)
 
 Theorem transl_program_correct:
-  forall (t: trace) (r: val),
-  Cminor.exec_program prog t r ->
-  RTL.exec_program tprog t r.
+  forall (t: trace) (r: int),
+  CminorSel.exec_program prog t (Vint r) ->
+  RTL.exec_program tprog (Terminates t r).
 Proof.
   intros t r [b [f [m [SYMB [FUNC [SIG EVAL]]]]]].
   generalize (function_ptr_translated _ _ FUNC).
   intros [tf [TFIND TRANSLF]].
-  red; exists b; exists tf; exists m.
-  split. rewrite symbols_preserved. 
-  replace (prog_main tprog) with (prog_main prog).
-  assumption. 
+  exploit transl_function_correctness; eauto. intro EX.
+  econstructor.
+  econstructor. 
+  rewrite symbols_preserved. 
+  replace (prog_main tprog) with (prog_main prog). eauto.
   symmetry; apply transform_partial_program_main with transl_fundef.
   exact TRANSL.
-  split. exact TFIND.
-  split. generalize (sig_transl_function _ _ TRANSLF). congruence.
+  eexact TFIND.
+  generalize (sig_transl_function _ _ TRANSLF). congruence.
   unfold fundef; rewrite (Genv.init_mem_transf_partial transl_fundef prog TRANSL).
-  exact (transl_function_correctness _ _ _ _ _ _ EVAL _ TRANSLF).
+  eexact EX. 
+  constructor.
 Qed.
 
 End CORRECTNESS.
diff --git a/backend/RTLgenproof1.v b/backend/RTLgenproof1.v
deleted file mode 100644
index 8e12e798..00000000
--- a/backend/RTLgenproof1.v
+++ /dev/null
@@ -1,1367 +0,0 @@
-(** Correctness proof for RTL generation: auxiliary results. *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import Integers.
-Require Import Values.
-Require Import Events.
-Require Import Mem.
-Require Import Globalenvs.
-Require Import Op.
-Require Import Registers.
-Require Import Cminor.
-Require Import RTL.
-Require Import RTLgen.
-
-(** * Reasoning about monadic computations *)
-
-(** The tactics below simplify hypotheses of the form [f ... = OK x s]
-  where [f] is a monadic computation.  For instance, the hypothesis
-  [(do x <- a; b) s = OK y s'] will generate the additional witnesses
-  [x], [s1] and the additional hypotheses
-  [a s = OK x s1] and [b x s1 = OK y s'], reflecting the fact that
-  both monadic computations [a] and [b] succeeded.
-*)
-
-Ltac monadSimpl1 :=
-  match goal with
-  | [ |- (bind ?F ?G ?S = OK ?X ?S') -> _ ] =>
-      unfold bind at 1;
-      generalize (refl_equal (F S));
-      pattern (F S) at -1 in |- *;
-      case (F S);
-      [ intro; intro; discriminate
-      | (let s := fresh "s" in
-         (let EQ := fresh "EQ" in
-          (intro; intros s EQ;
-           try monadSimpl1))) ]
-  | [ |- (bind2 ?F ?G ?S = OK ?X ?S') -> _ ] =>
-      unfold bind2 at 1; unfold bind at 1;
-      generalize (refl_equal (F S));
-      pattern (F S) at -1 in |- *;
-      case (F S);
-      [ intro; intro; discriminate
-      | let xy := fresh "xy" in
-        (let x := fresh "x" in
-         (let y := fresh "y" in
-          (let s := fresh "s" in
-           (let EQ := fresh "EQ" in
-           (intros xy s EQ; destruct xy as [x y]; simpl;
-            try monadSimpl1))))) ]
-  | [ |- (error _ _ = OK _ _) -> _ ] =>
-      unfold error; monadSimpl1
-  | [ |- (ret _ _ = OK _ _) -> _ ] =>
-      unfold ret; monadSimpl1
-  | [ |- (Error _ = OK _ _) -> _ ] =>
-      intro; discriminate
-  | [ |- (OK _ _ = OK _ _) -> _ ] =>
-      let h := fresh "H" in
-      (intro h; injection h; intro; intro; clear h)
-  end.
-
-Ltac monadSimpl :=
-  match goal with
-  | [ |- (bind ?F ?G ?S = OK ?X ?S') -> _ ] => monadSimpl1
-  | [ |- (bind2 ?F ?G ?S = OK ?X ?S') -> _ ] => monadSimpl1
-  | [ |- (error _ _ = OK _ _) -> _ ] => monadSimpl1
-  | [ |- (ret _ _ = OK _ _) -> _ ] => monadSimpl1
-  | [ |- (Error _ = OK _ _) -> _ ] => monadSimpl1
-  | [ |- (OK _ _ = OK _ _) -> _ ] => monadSimpl1
-  | [ |- (?F _ _ _ _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  | [ |- (?F _ _ _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  | [ |- (?F _ _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  | [ |- (?F _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  | [ |- (?F _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  | [ |- (?F _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  | [ |- (?F _ = OK _ _) -> _ ] => unfold F; monadSimpl1
-  end.
-
-Ltac monadInv H :=
-  generalize H; monadSimpl.
-
-(** * Monotonicity properties of the state *)
-
-(** Operations over the global state satisfy a crucial monotonicity property:
-  nodes are only added to the CFG, but never removed nor their instructions
-  changed; similarly, fresh nodes and fresh registers are only consumed,
-  but never reused.  This property is captured by the following predicate
-  over states, which we show is a partial order. *)
-
-Inductive state_incr: state -> state -> Prop :=
-  state_incr_intro:
-    forall (s1 s2: state),
-    Ple s1.(st_nextnode) s2.(st_nextnode) ->
-    Ple s1.(st_nextreg) s2.(st_nextreg) ->
-    (forall pc, Plt pc s1.(st_nextnode) -> s2.(st_code)!pc = s1.(st_code)!pc) ->
-    state_incr s1 s2.
-
-Lemma instr_at_incr:
-  forall s1 s2 n i,
-  s1.(st_code)!n = i -> i <> None -> state_incr s1 s2 ->
-  s2.(st_code)!n = i.
-Proof.
-  intros. inversion H1.
-  rewrite <- H. apply H4. elim (st_wf s1 n); intro.
-  assumption. elim H0. congruence.
-Qed.
-
-Lemma state_incr_refl:
-  forall s, state_incr s s.
-Proof.
-  intros. apply state_incr_intro.
-  apply Ple_refl. apply Ple_refl. intros; auto.
-Qed.
-Hint Resolve state_incr_refl: rtlg.
-
-Lemma state_incr_trans:
-  forall s1 s2 s3, state_incr s1 s2 -> state_incr s2 s3 -> state_incr s1 s3.
-Proof.
-  intros. inversion H. inversion H0. apply state_incr_intro.
-  apply Ple_trans with (st_nextnode s2); assumption. 
-  apply Ple_trans with (st_nextreg s2); assumption.
-  intros. transitivity (s2.(st_code)!pc).
-  apply H8. apply Plt_Ple_trans with s1.(st_nextnode); auto.
-  apply H3; auto.
-Qed.
-Hint Resolve state_incr_trans: rtlg.
-
-Lemma state_incr_trans2:
-  forall s1 s2 s3 s4,
-  state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 ->
-  state_incr s1 s4.
-Proof.
-  intros; eauto with rtlg.
-Qed.
-
-Lemma state_incr_trans3:
-  forall s1 s2 s3 s4 s5,
-  state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 -> state_incr s4 s5 ->
-  state_incr s1 s5.
-Proof.
-  intros; eauto with rtlg.
-Qed.
-
-Lemma state_incr_trans4:
-  forall s1 s2 s3 s4 s5 s6,
-  state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 ->
-  state_incr s4 s5 -> state_incr s5 s6 ->
-  state_incr s1 s6.
-Proof.
-  intros; eauto with rtlg.
-Qed.
-
-Lemma state_incr_trans5:
-  forall s1 s2 s3 s4 s5 s6 s7,
-  state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 ->
-  state_incr s4 s5 -> state_incr s5 s6 -> state_incr s6 s7 ->
-  state_incr s1 s7.
-Proof.
-  intros; eauto 6 with rtlg.
-Qed.
-
-Lemma state_incr_trans6:
-  forall s1 s2 s3 s4 s5 s6 s7 s8,
-  state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 ->
-  state_incr s4 s5 -> state_incr s5 s6 -> state_incr s6 s7 ->
-  state_incr s7 s8 -> state_incr s1 s8.
-Proof.
-  intros; eauto 7 with rtlg.
-Qed.
-
-(** The following relation between two states is a weaker version of
-  [state_incr].  It permits changing the contents at an already reserved
-  graph node, but only from [None] to [Some i]. *)
-
-Definition state_extends (s1 s2: state): Prop :=
-  forall pc,
-  s1.(st_code)!pc = None \/ s2.(st_code)!pc = s1.(st_code)!pc.
-
-Lemma state_incr_extends:
-  forall s1 s2,
-  state_incr s1 s2 -> state_extends s1 s2.
-Proof.
-  unfold state_extends; intros. inversion H.   
-  case (plt pc s1.(st_nextnode)); intro.
-  right; apply H2; auto.
-  left. elim (s1.(st_wf) pc); intro.
-  elim (n H5). auto.
-Qed.
-Hint Resolve state_incr_extends.
-
-(** A crucial property of states is the following: if an RTL execution
-  is possible (does not get stuck) in the CFG of a given state [s1]
-  the same execution is possible and leads to the same results in
-  the CFG of any state [s2] that extends [s1] in the sense of the
-  [state_extends] predicate. *)
-
-Section EXEC_INSTR_EXTENDS.
-
-Variable s1 s2: state.
-Hypothesis EXT: state_extends s1 s2.
-
-Lemma exec_instr_not_halt:
-  forall ge c sp pc rs m t pc' rs' m',
-  exec_instr ge c sp pc rs m t pc' rs' m' -> c!pc <> None.
-Proof.
-  induction 1; rewrite H; discriminate.
-Qed.
-
-Lemma exec_instr_in_s2:
-  forall ge sp pc rs m t pc' rs' m',
-  exec_instr ge s1.(st_code) sp pc rs m t pc' rs' m' ->
-  s2.(st_code)!pc = s1.(st_code)!pc.
-Proof.
-  intros.
-  elim (EXT pc); intro.
-  elim (exec_instr_not_halt _ _ _ _ _ _ _ _ _ _ H H0).
-  assumption.
-Qed.
-
-Lemma exec_instr_extends_rec:
-  forall ge c sp pc rs m t pc' rs' m',
-  exec_instr ge c sp  pc rs m t pc' rs' m' ->
-  forall c', c!pc = c'!pc ->
-  exec_instr ge c' sp  pc rs m t pc' rs' m'.
-Proof.
-  induction 1; intros.
-  apply exec_Inop. congruence.
-  apply exec_Iop with op args. congruence. auto.
-  apply exec_Iload with chunk addr args a. congruence. auto. auto.
-  apply exec_Istore with chunk addr args src a.
-    congruence. auto. auto.
-  apply exec_Icall with sig ros args f; auto. congruence.
-  apply exec_Ialloc with arg sz; auto. congruence. 
-  apply exec_Icond_true with cond args ifnot; auto. congruence.
-  apply exec_Icond_false with cond args ifso; auto. congruence.
-Qed.
-
-Lemma exec_instr_extends:
-  forall ge sp pc rs m t pc' rs' m',
-  exec_instr ge s1.(st_code) sp pc rs m t pc' rs' m' ->
-  exec_instr ge s2.(st_code) sp pc rs m t pc' rs' m'.
-Proof.
-  intros.
-  apply exec_instr_extends_rec with (st_code s1).
-  assumption.
-  symmetry. eapply exec_instr_in_s2. eexact H.
-Qed.
-
-Lemma exec_instrs_extends_rec:
-  forall ge c sp pc rs m t pc' rs' m',
-  exec_instrs ge c sp  pc rs m t pc' rs' m' ->
-  c = s1.(st_code) ->
-  exec_instrs ge s2.(st_code) sp  pc rs m t pc' rs' m'.
-Proof.
-  induction 1; intros.
-  apply exec_refl.
-  apply exec_one. apply exec_instr_extends; auto. rewrite <- H0; auto.
-  apply exec_trans with t1 pc2 rs2 m2 t2; auto.
-Qed.
-
-Lemma exec_instrs_extends:
-  forall ge sp pc rs m t pc' rs' m',
-  exec_instrs ge s1.(st_code) sp  pc rs m t pc' rs' m' ->
-  exec_instrs ge s2.(st_code) sp  pc rs m t pc' rs' m'.
-Proof.
-  intros.
-  apply exec_instrs_extends_rec with (st_code s1); auto.
-Qed.
-
-End EXEC_INSTR_EXTENDS.
-
-(** Since [state_incr s1 s2] implies [state_extends s1 s2], we also have
-  that any RTL execution possible in the CFG of [s1] is also possible
-  in the CFG of [s2], provided that [state_incr s1 s2].
-  In particular, any RTL execution that is possible in a partially
-  constructed CFG remains possible in the final CFG obtained at
-  the end of the translation of the current function. *)
-
-Section EXEC_INSTR_INCR.
-
-Variable s1 s2: state.
-Hypothesis INCR: state_incr s1 s2.
-
-Lemma exec_instr_incr:
-  forall ge sp pc rs m t pc' rs' m',
-  exec_instr ge s1.(st_code) sp  pc rs m t pc' rs' m' ->
-  exec_instr ge s2.(st_code) sp  pc rs m t pc' rs' m'.
-Proof.
-  intros.
-  apply exec_instr_extends with s1.
-  apply state_incr_extends; auto.
-  auto.
-Qed.
-
-Lemma exec_instrs_incr:
-  forall ge sp pc rs m t pc' rs' m',
-  exec_instrs ge s1.(st_code) sp  pc rs m t pc' rs' m' ->
-  exec_instrs ge s2.(st_code) sp  pc rs m t pc' rs' m'.
-Proof.
-  intros.
-  apply exec_instrs_extends with s1.
-  apply state_incr_extends; auto.
-  auto.
-Qed.
-
-End EXEC_INSTR_INCR.
-
-(** * Validity and freshness of registers *)
-
-(** An RTL pseudo-register is valid in a given state if it was created
-  earlier, that is, it is less than the next fresh register of the state.
-  Otherwise, the pseudo-register is said to be fresh. *)
-
-Definition reg_valid (r: reg) (s: state) : Prop := 
-  Plt r s.(st_nextreg).
-
-Definition reg_fresh (r: reg) (s: state) : Prop :=
-  ~(Plt r s.(st_nextreg)).
-
-Lemma valid_fresh_absurd:
-  forall r s, reg_valid r s -> reg_fresh r s -> False.
-Proof.
-  intros r s. unfold reg_valid, reg_fresh; case r; tauto.
-Qed.
-Hint Resolve valid_fresh_absurd: rtlg.
-
-Lemma valid_fresh_different:
-  forall r1 r2 s, reg_valid r1 s -> reg_fresh r2 s -> r1 <> r2.
-Proof.
-  unfold not; intros. subst r2. eauto with rtlg.
-Qed.
-Hint Resolve valid_fresh_different: rtlg.
-
-Lemma reg_valid_incr: 
-  forall r s1 s2, state_incr s1 s2 -> reg_valid r s1 -> reg_valid r s2.
-Proof.
-  intros r s1 s2 INCR.
-  inversion INCR.
-  unfold reg_valid. intros; apply Plt_Ple_trans with (st_nextreg s1); auto.
-Qed.
-Hint Resolve reg_valid_incr: rtlg.
-
-Lemma reg_fresh_decr:
-  forall r s1 s2, state_incr s1 s2 -> reg_fresh r s2 -> reg_fresh r s1.
-Proof.
-  intros r s1 s2 INCR. inversion INCR.
-  unfold reg_fresh; unfold not; intros.
-  apply H4. apply Plt_Ple_trans with (st_nextreg s1); auto.
-Qed. 
-Hint Resolve reg_fresh_decr: rtlg.
-
-(** * Well-formedness of compilation environments *)
-
-(** A compilation environment (mapping) is well-formed in a given state if
-  the following properties hold:
-- The registers associated with Cminor local variables and let-bound variables
-  are valid in the state.
-- Two distinct Cminor local variables are mapped to distinct pseudo-registers.
-- A Cminor local variable and a let-bound variable are mapped to
-  distinct pseudo-registers.
-*)
-
-Record map_wf (m: mapping) (s: state) : Prop :=
-  mk_map_wf {
-    map_wf_var_valid:
-      (forall id r, m.(map_vars)!id = Some r -> reg_valid r s);
-    map_wf_letvar_valid:
-      (forall r, In r m.(map_letvars) -> reg_valid r s);
-    map_wf_inj:
-      (forall id1 id2 r,
-         m.(map_vars)!id1 = Some r -> m.(map_vars)!id2 = Some r -> id1 = id2);
-     map_wf_disj:
-      (forall id r,
-         m.(map_vars)!id = Some r -> In r m.(map_letvars) -> False)
-  }.
-Hint Resolve map_wf_var_valid
-             map_wf_letvar_valid
-             map_wf_inj map_wf_disj: rtlg.
-
-Lemma map_wf_incr:
-  forall s1 s2 m,
-  state_incr s1 s2 -> map_wf m s1 -> map_wf m s2.
-Proof.
-  intros. apply mk_map_wf; intros; eauto with rtlg.
-Qed.
-Hint Resolve map_wf_incr: rtlg.
-
-(** A register is ``in'' a mapping if it is associated with a Cminor
-  local or let-bound variable. *)
-
-Definition reg_in_map (m: mapping) (r: reg) : Prop :=
-  (exists id, m.(map_vars)!id = Some r) \/ In r m.(map_letvars).
-
-Lemma reg_in_map_valid:
-  forall m s r,
-  map_wf m s -> reg_in_map m r -> reg_valid r s.
-Proof.
-  intros. elim H0.
-  intros [id EQ]. eauto with rtlg.
-  intro IN. eauto with rtlg.
-Qed.
-Hint Resolve reg_in_map_valid: rtlg.
-
-(** * Properties of basic operations over the state *)
-
-(** Properties of [add_instr]. *)
-
-Lemma add_instr_incr:
-  forall s1 s2 i n,
-  add_instr i s1 = OK n s2 -> state_incr s1 s2.
-Proof.
-  intros until n; monadSimpl.
-  subst s2; apply state_incr_intro; simpl.
-  apply Ple_succ.
-  apply Ple_refl.
-  intros. apply PTree.gso; apply Plt_ne; auto.
-Qed.
-Hint Resolve add_instr_incr: rtlg.
-
-Lemma add_instr_at:
-  forall s1 s2 i n,
-  add_instr i s1 = OK n s2 -> s2.(st_code)!n = Some i.
-Proof.
-  intros until n; monadSimpl.
-  subst n; subst s2; simpl. apply PTree.gss.
-Qed.
-Hint Resolve add_instr_at.
-
-(** Properties of [reserve_instr] and [update_instr]. *)
-
-Lemma reserve_instr_incr:
-  forall s1 s2 n,
-  reserve_instr s1 = OK n s2 -> state_incr s1 s2.
-Proof.
-  intros until n; monadSimpl. subst s2.
-  apply state_incr_intro; simpl.
-  apply Ple_succ.
-  apply Ple_refl.
-  auto.
-Qed.
-
-Lemma update_instr_incr:
-  forall s1 s2 s3 s4 i n t,
-  reserve_instr s1 = OK n s2 ->
-  state_incr s2 s3 ->
-  update_instr n i s3 = OK t s4 ->
-  state_incr s1 s4.
-Proof.
-  intros.
-  monadInv H.
-  generalize H1; unfold update_instr.
-  case (plt n (st_nextnode s3)); intro.
-  monadSimpl. inversion H0.
-  subst s4; apply state_incr_intro; simpl.
-  apply Plt_Ple. apply Plt_Ple_trans with (st_nextnode s2).
-  subst s2; simpl; apply Plt_succ. assumption.
-  rewrite <- H3 in H7; simpl in H7. assumption.
-  intros. rewrite PTree.gso.
-  rewrite <- H3 in H8; simpl in H8. apply H8.
-  apply Plt_trans_succ; assumption.
-  subst n; apply Plt_ne; assumption.
-  intros; discriminate.
-Qed.
-
-Lemma update_instr_extends:
-  forall s1 s2 s3 s4 i n t,
-  reserve_instr s1 = OK n s2 ->
-  state_incr s2 s3 ->
-  update_instr n i s3 = OK t s4 ->
-  state_extends s3 s4.
-Proof.
-  intros.
-  monadInv H.
-  red; intros. 
-  case (peq pc n); intro.
-  subst pc. left. inversion H0. rewrite H6. 
-  rewrite <- H3; simpl.  
-  elim (s1.(st_wf) n); intro.
-  rewrite <- H4 in H9. elim (Plt_strict _ H9).
-  auto.
-  rewrite <- H4. rewrite <- H3; simpl. apply Plt_succ.
-  generalize H1; unfold update_instr.
-  case (plt n s3.(st_nextnode)); intro; monadSimpl.
-  right; rewrite <- H5; simpl. apply PTree.gso; auto.
-Qed.
-
-(** Properties of [new_reg]. *)
-
-Lemma new_reg_incr:
-  forall s1 s2 r, new_reg s1 = OK r s2 -> state_incr s1 s2.
-Proof.
-  intros until r. monadSimpl.
-  subst s2; apply state_incr_intro; simpl.
-  apply Ple_refl. apply Ple_succ. auto.
-Qed.
-Hint Resolve new_reg_incr: rtlg.
-
-Lemma new_reg_valid:
-  forall s1 s2 r,
-  new_reg s1 = OK r s2 -> reg_valid r s2.
-Proof.
-  intros until r. monadSimpl. subst s2; subst r.
-  unfold reg_valid; unfold reg_valid; simpl.
-  apply Plt_succ.
-Qed.
-Hint Resolve new_reg_valid: rtlg.
-
-Lemma new_reg_fresh:
-  forall s1 s2 r,
-  new_reg s1 = OK r s2 -> reg_fresh r s1.
-Proof.
-  intros until r. monadSimpl. subst s2; subst r.
-  unfold reg_fresh; simpl.
-  exact (Plt_strict _).
-Qed.
-Hint Resolve new_reg_fresh: rtlg.
-
-Lemma new_reg_not_in_map:
-  forall s1 s2 m r,
-  new_reg s1 = OK r s2 -> map_wf m s1 -> ~(reg_in_map m r).
-Proof.
-  unfold not; intros; eauto with rtlg.
-Qed. 
-Hint Resolve new_reg_not_in_map: rtlg.
-
-(** * Properties of operations over compilation environments *)
-
-Lemma init_mapping_wf:
-  forall s, map_wf init_mapping s.
-Proof.
-  intro. unfold init_mapping; apply mk_map_wf; simpl; intros.
-  rewrite PTree.gempty in H; discriminate.
-  contradiction.
-  rewrite PTree.gempty in H; discriminate.
-  tauto.
-Qed.  
-
-(** Properties of [find_var]. *)
-
-Lemma find_var_incr:
-  forall s1 s2 map name r,
-  find_var map name s1 = OK r s2 -> state_incr s1 s2.
-Proof.
-  intros until r. unfold find_var.
-  case (map_vars map)!name.
-  intro; monadSimpl. subst s2; auto with rtlg.
-  monadSimpl.
-Qed.
-Hint Resolve find_var_incr: rtlg.
-
-Lemma find_var_in_map:
-  forall s1 s2 map name r,
-  find_var map name s1 = OK r s2 -> map_wf map s1 -> reg_in_map map r.
-Proof.
-  intros until r. unfold find_var; caseEq (map.(map_vars)!name).
-  intros r0 eq. monadSimpl; intros. subst r0. 
-  left. exists name; auto.
-  intro; monadSimpl.
-Qed.
-Hint Resolve find_var_in_map: rtlg.
-
-Lemma find_var_valid:
-  forall s1 s2 map name r,
-  find_var map name s1 = OK r s2 -> map_wf map s1 -> reg_valid r s1.
-Proof.
-  eauto with rtlg.
-Qed.
-Hint Resolve find_var_valid: rtlg.
-
-
-(** Properties of [find_letvar]. *)
-
-Lemma find_letvar_incr:
-  forall s1 s2 map idx r,
-  find_letvar map idx s1 = OK r s2 -> state_incr s1 s2.
-Proof.
-  intros until r. unfold find_letvar.
-  case (nth_error (map_letvars map) idx).
-  intro; monadSimpl. subst s2; auto with rtlg.
-  monadSimpl.
-Qed.
-Hint Resolve find_letvar_incr: rtlg.
-
-Lemma find_letvar_in_map:
-  forall s1 s2 map idx r,
-  find_letvar map idx s1 = OK r s2 -> map_wf map s1 -> reg_in_map map r.
-Proof.
-  intros until r. unfold find_letvar.
-  caseEq (nth_error (map_letvars map) idx).
-  intros r0 EQ; monadSimpl. intros. right. 
-  subst r0; apply nth_error_in with idx; auto.
-  intro; monadSimpl.
-Qed.
-Hint Resolve find_letvar_in_map: rtlg.
-
-Lemma find_letvar_valid:
-  forall s1 s2 map idx r,
-  find_letvar map idx s1 = OK r s2 -> map_wf map s1 -> reg_valid r s1.
-Proof.
-  eauto with rtlg.
-Qed.
-Hint Resolve find_letvar_valid: rtlg.
-
-(** Properties of [add_var]. *)
-
-Lemma add_var_valid:
-  forall s1 s2 map1 map2 name r, 
-  add_var map1 name s1 = OK (r, map2) s2 -> reg_valid r s2.
-Proof.
-  intros until r. monadSimpl. intro. subst r0; subst s.
-  eauto with rtlg.
-Qed.
-
-Lemma add_var_incr:
-  forall s1 s2 map name rm, 
-  add_var map name s1 = OK rm s2 -> state_incr s1 s2.
-Proof.
-  intros until rm; monadSimpl. subst s2. eauto with rtlg.
-Qed.
-Hint Resolve add_var_incr: rtlg.
-
-Lemma add_var_wf:
-  forall s1 s2 map name r map',
-  add_var map name s1 = OK (r,map') s2 -> map_wf map s1 -> map_wf map' s2.
-Proof.
-  intros until map'; monadSimpl; intros.
-  subst r0; subst s; subst map'; apply mk_map_wf; simpl.
-
-  intros id r'. rewrite PTree.gsspec. 
-  case (peq id name); intros.
-  injection H; intros; subst r'. eauto with rtlg.
-  eauto with rtlg.
-  eauto with rtlg.
-
-  intros id1 id2 r'.
-  repeat (rewrite PTree.gsspec). 
-  case (peq id1 name); case (peq id2 name); intros.
-  congruence.
-  rewrite <- H in H0. byContradiction; eauto with rtlg.
-  rewrite <- H0 in H. byContradiction; eauto with rtlg.
-  eauto with rtlg.
-
-  intros id r'. case (peq id name); intro.
-  subst id. rewrite PTree.gss. intro E; injection E; intro; subst r'.
-  intro; eauto with rtlg.
-
-  rewrite PTree.gso; auto. eauto with rtlg.
-Qed.
-Hint Resolve add_var_wf: rtlg.
-
-Lemma add_var_find:
-  forall s1 s2 map name r map',
-  add_var map name s1 = OK (r,map') s2 -> map'.(map_vars)!name = Some r.
-Proof.
-  intros until map'.
-  monadSimpl.
-  intro; subst r0.
-  subst map'; simpl in |- *.
-  apply PTree.gss.
-Qed.
-
-Lemma add_vars_incr:
-  forall names s1 s2 map r,
-  add_vars map names s1 = OK r s2 -> state_incr s1 s2.
-Proof.
-  induction names; simpl. 
-  intros until r; monadSimpl; intros. subst s2; eauto with rtlg.
-  intros until r; monadSimpl; intros.
-  subst s0; eauto with rtlg.
-Qed.
-
-Lemma add_vars_valid:
-  forall namel s1 s2 map1 map2 rl, 
-  add_vars map1 namel s1 = OK (rl, map2) s2 ->
-  forall r, In r rl -> reg_valid r s2.
-Proof.
-  induction namel; simpl; intros.
-  monadInv H. intro. subst rl. elim H0. 
-  monadInv H. intro EQ1. subst rl; subst s0; subst y0. elim H0.
-  intro; subst r. eapply add_var_valid. eexact EQ0. 
-  intro. apply reg_valid_incr with s. eauto with rtlg.
-  eauto.
-Qed.
-
-Lemma add_vars_wf:
-  forall names s1 s2 map map' rl,
-  add_vars map names s1 = OK (rl,map') s2 ->
-  map_wf map s1 -> map_wf map' s2.
-Proof.
-  induction names; simpl.
-  intros until rl; monadSimpl; intros.
-  subst s2; subst map'; assumption.
-  intros until rl; monadSimpl; intros. subst y0; subst s0.
-  eapply add_var_wf. eexact EQ0.
-  eapply IHnames. eexact EQ. auto.
-Qed.
-Hint Resolve add_vars_wf: rtlg.
-
-Lemma add_var_letenv:
-  forall map1 i s1 r map2 s2,
-  add_var map1 i s1 = OK (r, map2) s2 ->
-  map2.(map_letvars) = map1.(map_letvars).
-Proof.
-  intros until s2. monadSimpl. intro. subst map2; reflexivity.
-Qed.
-
-(** Properties of [add_letvar]. *)
-
-Lemma add_letvar_wf:
-  forall map s r,
-  map_wf map s ->
-  reg_valid r s ->
-  ~(reg_in_map map r) ->
-  map_wf (add_letvar map r) s.
-Proof.
-  intros. unfold add_letvar; apply mk_map_wf; simpl.
-  exact (map_wf_var_valid map s H).
-  intros r' [EQ| IN].
-    subst r'; assumption.
-    eapply map_wf_letvar_valid; eauto.
-  exact (map_wf_inj map s H).
-  intros. elim H3; intro.
-   subst r0. apply H1. left. exists id; auto.
-   eapply map_wf_disj; eauto.
-Qed.
-
-(** * Properties of [alloc_reg] and [alloc_regs] *)
-
-Lemma alloc_reg_incr:
-  forall a s1 s2 map r,
-  alloc_reg map a s1 = OK r s2 -> state_incr s1 s2.
-Proof.
-  intros until r.  unfold alloc_reg.
-  case a; eauto with rtlg.
-Qed.
-Hint Resolve alloc_reg_incr: rtlg.
-
-Lemma alloc_reg_valid:
-  forall a s1 s2 map r,
-  map_wf map s1 ->
-  alloc_reg map a s1 = OK r s2 -> reg_valid r s2.
-Proof.
-  intros until r.  unfold alloc_reg.
-  case a; eauto with rtlg.
-Qed.
-Hint Resolve alloc_reg_valid: rtlg.
-
-Lemma alloc_reg_fresh_or_in_map:
-  forall map a s r s',
-  map_wf map s ->
-  alloc_reg map a s = OK r s' ->
-  reg_in_map map r \/ reg_fresh r s.
-Proof.
-  intros until s'. unfold alloc_reg.
-  destruct a; intros; try (right; eauto with rtlg; fail).
-  left; eauto with rtlg.
-  left; eauto with rtlg.
-Qed.
-
-Lemma add_vars_letenv:
-  forall il map1 s1 rl map2 s2,
-  add_vars map1 il s1 = OK (rl, map2) s2 ->
-  map2.(map_letvars) = map1.(map_letvars).
-Proof.
-  induction il; simpl; intros.
-   monadInv H. intro. subst map2; reflexivity.
-   monadInv H. intro EQ1. transitivity (map_letvars y).
-   subst y0. eapply add_var_letenv; eauto.
-   eauto.
-Qed.
-
-(** A register is an adequate target for holding the value of an
-  expression if
-- either the register is associated with a Cminor let-bound variable
-  or a Cminor local variable;
-- or the register is valid and not associated with any Cminor variable. *)
-
-Inductive target_reg_ok: state -> mapping -> expr -> reg -> Prop :=
-  | target_reg_var:
-      forall s map id r,
-      map.(map_vars)!id = Some r ->
-      target_reg_ok s map (Evar id) r
-  | target_reg_letvar:
-      forall s map idx r,
-      nth_error map.(map_letvars) idx = Some r ->
-      target_reg_ok s map (Eletvar idx) r
-  | target_reg_other:
-      forall s map a r,
-      ~(reg_in_map map r) ->
-      reg_valid r s ->
-      target_reg_ok s map a r.
-
-Lemma target_reg_ok_incr:
-  forall s1 s2 map e r,
-  state_incr s1 s2 ->
-  target_reg_ok s1 map e r ->
-  target_reg_ok s2 map e r.
-Proof.
-  intros. inversion H0. 
-  apply target_reg_var; auto.
-  apply target_reg_letvar; auto.
-  apply target_reg_other; eauto with rtlg.
-Qed.
-Hint Resolve target_reg_ok_incr: rtlg.
-
-Lemma target_reg_valid:
-  forall s map e r,
-  map_wf map s ->
-  target_reg_ok s map e r ->
-  reg_valid r s.
-Proof.
-  intros. inversion H0; eauto with rtlg coqlib.
-Qed.
-Hint Resolve target_reg_valid: rtlg.
-
-Lemma alloc_reg_target_ok:
-  forall a s1 s2 map r,
-  map_wf map s1 ->
-  alloc_reg map a s1 = OK r s2 ->
-  target_reg_ok s2 map a r.
-Proof.
-  intros until r; intro MWF. unfold alloc_reg.
-  case a; intros; try (eapply target_reg_other; eauto with rtlg; fail).
-  apply target_reg_var. 
-  generalize H; unfold find_var. 
-  case (map_vars map)!i.
-  intro. monadSimpl. congruence.
-  monadSimpl.
-  apply target_reg_letvar. 
-  generalize H. unfold find_letvar. 
-  case (nth_error (map_letvars map) n).
-  intro; monadSimpl; congruence.
-  monadSimpl.
-Qed.
-Hint Resolve alloc_reg_target_ok: rtlg.
-
-Lemma alloc_regs_incr:
-  forall al s1 s2 map rl,
-  alloc_regs map al s1 = OK rl s2 -> state_incr s1 s2.
-Proof.
-  induction al; simpl; intros.
-  monadInv H. subst s2. eauto with rtlg.
-  monadInv H. subst s2. eauto with rtlg.
-Qed.
-Hint Resolve alloc_regs_incr: rtlg.
-
-Lemma alloc_regs_valid:
-  forall al s1 s2 map rl,
-  map_wf map s1 ->
-  alloc_regs map al s1 = OK rl s2 ->
-  forall r, In r rl -> reg_valid r s2.
-Proof.
-  induction al; simpl; intros.
-   monadInv H0. subst rl. elim H1.
-   monadInv H0. subst rl; subst s0.
-   elim H1; intro.
-     subst r0. eauto with rtlg.
-     eauto with rtlg.
-Qed.
-Hint Resolve alloc_regs_valid: rtlg.
-
-Lemma alloc_regs_fresh_or_in_map:
-  forall map al s rl s',
-  map_wf map s ->
-  alloc_regs map al s = OK rl s' ->
-  forall r, In r rl -> reg_in_map map r \/ reg_fresh r s.
-Proof.
-  induction al; simpl; intros.
-  monadInv H0. subst rl. elim H1.
-  monadInv H0. subst rl. elim (in_inv H1); intro.
-  subst r. 
-  assert (MWF: map_wf map s0). eauto with rtlg.
-  elim (alloc_reg_fresh_or_in_map map e s0 r0 s1 MWF EQ0); intro.
-  left; assumption. right; eauto with rtlg.
-  eauto with rtlg.
-Qed.
-
-Inductive target_regs_ok: state -> mapping -> exprlist -> list reg -> Prop :=
-  | target_regs_nil:
-      forall s map,
-      target_regs_ok s map Enil nil
-  | target_regs_cons:
-      forall s map a r al rl,
-      reg_in_map map r \/ ~(In r rl) ->
-      target_reg_ok s map a r ->
-      target_regs_ok s map al rl ->
-      target_regs_ok s map (Econs a al) (r :: rl).
-
-Lemma target_regs_ok_incr:
-  forall s1 map al rl,
-  target_regs_ok s1 map al rl ->
-  forall s2,
-  state_incr s1 s2 ->
-  target_regs_ok s2 map al rl.
-Proof.
-  induction 1; intros.
-  apply target_regs_nil. 
-  apply target_regs_cons; eauto with rtlg. 
-Qed.
-Hint Resolve target_regs_ok_incr: rtlg.
-
-Lemma target_regs_valid:
-  forall s map al rl,
-  target_regs_ok s map al rl ->
-  map_wf map s ->
-  forall r, In r rl -> reg_valid r s.
-Proof.
-  induction 1; simpl; intros.
-  contradiction.
-  elim H3; intro.
-  subst r0. eauto with rtlg.
-  auto.
-Qed.
-Hint Resolve target_regs_valid: rtlg.
-
-Lemma alloc_regs_target_ok:
-  forall al s1 s2 map rl,
-  map_wf map s1 ->
-  alloc_regs map al s1 = OK rl s2 ->
-  target_regs_ok s2 map al rl.
-Proof.
-  induction al; simpl; intros.
-  monadInv H0. subst rl; apply target_regs_nil.
-  monadInv H0. subst s0; subst rl. 
-  apply target_regs_cons; eauto 6 with rtlg.
-  assert (MWF: map_wf map s). eauto with rtlg.
-  elim (alloc_reg_fresh_or_in_map map e s r s2 MWF EQ0); intro.
-  left; assumption. right; red; intro; eauto with rtlg.
-Qed.
-Hint Resolve alloc_regs_target_ok: rtlg.
-
-(** The following predicate is a variant of [target_reg_ok] used
-  to characterize registers that are adequate for holding the return
-  value of a function. *)
-
-Inductive return_reg_ok: state -> mapping -> option reg -> Prop :=
-  | return_reg_ok_none:
-      forall s map,
-      return_reg_ok s map None
-  | return_reg_ok_some:
-      forall s map r,
-      ~(reg_in_map map r) -> reg_valid r s ->
-      return_reg_ok s map (Some r).
-
-Lemma return_reg_ok_incr:
-  forall s1 s2 map or,
-  state_incr s1 s2 ->
-  return_reg_ok s1 map or ->
-  return_reg_ok s2 map or.
-Proof.
-  intros. inversion H0; constructor. 
-  assumption. eauto with rtlg.
-Qed.
-Hint Resolve return_reg_ok_incr: rtlg.
-
-Lemma new_reg_return_ok:
-  forall s1 r s2 map sig,
-  new_reg s1 = OK r s2 ->
-  map_wf map s1 ->
-  return_reg_ok s2 map (ret_reg sig r).
-Proof.
-  intros. unfold ret_reg. destruct (sig_res sig); constructor.
-  eauto with rtlg. eauto with rtlg.
-Qed.
-
-(** * Correspondence between Cminor environments and RTL register sets *)
-
-(** An RTL register environment matches a Cminor local environment and
-  let-environment if the value of every local or let-bound variable in
-  the Cminor environments is identical to the value of the
-  corresponding pseudo-register in the RTL register environment. *)
-
-Record match_env
-      (map: mapping) (e: Cminor.env) (le: Cminor.letenv) (rs: regset) : Prop :=
-  mk_match_env {
-    me_vars:
-      (forall id v,
-         e!id = Some v -> exists r, map.(map_vars)!id = Some r /\ rs#r = v);
-    me_letvars:
-      rs##(map.(map_letvars)) = le
-  }.
-
-Lemma match_env_find_reg:
-  forall map id s1 s2 r e le rs v,
-  find_var map id s1 = OK r s2 ->
-  match_env map e le rs ->
-  e!id = Some v ->
-  rs#r = v.
-Proof.
-  intros until v.
-  unfold find_var. caseEq (map.(map_vars)!id).
-  intros r' EQ. monadSimpl. subst r'. intros.
-  generalize (me_vars _ _ _ _ H _ _ H1). intros [r' [EQ' RS]].
-  rewrite EQ' in EQ; injection EQ; intro; subst r'.
-  assumption.
-  intro; monadSimpl.
-Qed.
-Hint Resolve match_env_find_reg: rtlg.
-
-Lemma match_env_invariant:
-  forall map e le rs rs',
-  match_env map e le rs ->
-  (forall r, (reg_in_map map r) -> rs'#r = rs#r) ->
-  match_env map e le rs'.
-Proof.
-  intros. apply mk_match_env.
-  intros id v' E.
-  generalize (me_vars _ _ _ _ H _ _ E). intros (r', (M, R)).
-  exists r'. split. auto. rewrite <- R. apply H0. 
-  left. exists id. auto. 
-  transitivity rs ## (map_letvars map).
-  apply list_map_exten. intros. 
-  symmetry. apply H0. right. auto.
-  exact (me_letvars _ _ _ _ H).
-Qed.
-
-(** Matching between environments is preserved when an unmapped register
-  (not corresponding to any Cminor variable) is assigned in the RTL
-  execution. *)
-
-Lemma match_env_update_temp:
-  forall map e le rs r v,
-  match_env map e le rs ->
-  ~(reg_in_map map r) ->
-  match_env map e le (rs#r <- v).
-Proof.
-  intros. apply match_env_invariant with rs; auto.
-  intros. case (Reg.eq r r0); intro. 
-  subst r0; contradiction.
-  apply Regmap.gso; auto.
-Qed.
-Hint Resolve match_env_update_temp: rtlg.
-
-(** Matching between environments is preserved by simultaneous
-  assignment to a Cminor local variable (in the Cminor environments)
-  and to the corresponding RTL pseudo-register (in the RTL register
-  environment). *)
-
-Lemma match_env_update_var:
-  forall map e le rs rs' id r v s s',
-  map_wf map s ->
-  find_var map id s = OK r s' ->
-  match_env map e le rs ->
-  rs'#r = v ->
-  (forall x, x <> r -> rs'#x = rs#x) ->
-  match_env map (PTree.set id v e) le rs'.
-Proof.
-  intros until s'; intro MWF.
-  unfold find_var in |- *. caseEq (map_vars map)!id.
-  intros. monadInv H0. subst r0. apply mk_match_env.
-  intros id' v' E. case (peq id' id); intros.
-  subst id'. rewrite PTree.gss in E. injection E; intro; subst v'.
-  exists r. split. auto. auto.
-  rewrite PTree.gso in E; auto.
-  elim (me_vars _ _ _ _ H1 _ _ E). intros r' (M, R).
-  exists r'. split. assumption. rewrite <- R; apply H3; auto.
-  red in |- *; intro. subst r'. apply n. eauto with rtlg.
-  transitivity rs ## (map_letvars map).
-  apply list_map_exten. intros. symmetry. apply H3.
-  red in |- *; intro. subst x. eauto with rtlg.
-  exact (me_letvars _ _ _ _ H1).
-  intro; monadSimpl.
-Qed.
-
-Lemma match_env_letvar:
-  forall map e le rs r v,
-  match_env map e le rs ->
-  rs#r = v ->
-  match_env (add_letvar map r) e (v :: le) rs.
-Proof.
-  intros.  unfold add_letvar in |- *; apply mk_match_env; simpl in |- *.
-  exact (me_vars _ _ _ _ H).
-  rewrite H0. rewrite (me_letvars _ _ _ _ H). auto.
-Qed.
-
-Lemma match_env_exten:
-  forall map e le rs1 rs2,
-    (forall r, rs2#r = rs1#r) ->
-    match_env map e le rs1 ->
-    match_env map e le rs2.
-Proof.
-  intros. apply mk_match_env.
-  intros. generalize (me_vars _ _ _ _ H0 _ _ H1). intros (r, (M1, M2)).
-  exists r. split. assumption. subst v. apply H. 
-  transitivity rs1 ## (map_letvars map).
-  apply list_map_exten. intros. symmetry  in |- *. apply H. 
-  exact (me_letvars _ _ _ _ H0).
-Qed.
-
-Lemma match_env_empty:
-  forall map,
-  map.(map_letvars) = nil ->
-  match_env map (PTree.empty val) nil (Regmap.init Vundef).
-Proof.
-  intros. apply mk_match_env.
-  intros. rewrite PTree.gempty in H0. discriminate.
-  rewrite H. reflexivity.
-Qed.
-
-(** The assignment of function arguments to local variables (on the Cminor
-  side) and pseudo-registers (on the RTL side) preserves matching
-  between environments. *)
-
-Lemma match_set_params_init_regs:
-  forall il rl s1 map2 s2 vl,
-  add_vars init_mapping il s1 = OK (rl, map2) s2 ->
-  match_env map2 (set_params vl il) nil (init_regs vl rl)
-  /\ (forall r, reg_fresh r s2 -> (init_regs vl rl)#r = Vundef).
-Proof.
-  induction il; simpl in |- *; intros.
-
-  monadInv H. intro; subst rl; simpl in |- *.
-  split. apply match_env_empty. subst map2; auto.
-  intros. apply Regmap.gi. 
-
-  monadInv H. intro EQ1; subst s0; subst y0; subst rl. clear H.
-  monadInv EQ0. intro EQ2. subst x0; subst s0. simpl.
-
-  assert (LV : map_letvars map2 = nil).
-  transitivity (map_letvars y).
-  eapply add_var_letenv; eauto. 
-  transitivity (map_letvars init_mapping).
-  eapply add_vars_letenv; eauto.
-  reflexivity.
-
-  destruct vl.
-  (* vl = nil *)
-  generalize (IHil _ _ _ _ nil EQ). intros [ME UNDEF]. split.
-  constructor. intros id v. subst map2. simpl. 
-  repeat rewrite PTree.gsspec; case (peq id a); intros.
-  exists r; split. auto. rewrite Regmap.gi. congruence.
-  destruct (me_vars _ _ _ _ ME id v H) as (r', (MV, IR)).
-  exists r'. split. auto. 
-  replace (init_regs nil x) with (Regmap.init Vundef) in IR. auto.
-  destruct x; reflexivity.
-  rewrite LV; reflexivity.
-  intros. apply Regmap.gi.
-  (* vl = v :: vl *)
-  generalize (IHil _ _ _ _ vl EQ). intros [ME UNDEF]. split.
-  constructor. intros id v1. subst map2. simpl. 
-  repeat rewrite PTree.gsspec; case (peq id a); intros.
-  exists r; split. auto. rewrite Regmap.gss. congruence.
-  destruct (me_vars _ _ _ _ ME id v1 H) as (r', (MV, IR)).
-  exists r'. split. auto. rewrite Regmap.gso. auto.
-  apply valid_fresh_different with s.
-  assert (MWF : map_wf y s).
-  eapply add_vars_wf; eauto. apply init_mapping_wf.
-  eauto with rtlg. eauto with rtlg.
-  rewrite LV; reflexivity.
-  intros. rewrite Regmap.gso. apply UNDEF. eauto with rtlg. 
-  apply sym_not_equal. eauto with rtlg.
-Qed.
-
-Lemma match_set_locals:
-  forall map1 s1,
-  map_wf map1 s1 ->
-  forall il rl map2 s2 e le rs,
-  match_env map1 e le rs ->
-  (forall r, reg_fresh r s1 -> rs#r = Vundef) ->
-  add_vars map1 il s1 = OK (rl, map2) s2 ->
-  match_env map2 (set_locals il e) le rs.
-Proof.
-  induction il; simpl in *; intros.
-
-  monadInv H2. intros; subst map2; auto.
-
-  monadInv H2. intros. subst s0; subst y0.
-  assert (match_env y (set_locals il e) le rs).
-    eapply IHil; eauto. 
-  monadInv EQ0. intro. subst s0; subst x0.  rewrite <- H7.
-  constructor.
-  intros id v. simpl. repeat rewrite PTree.gsspec. 
-  case (peq id a); intros.
-  exists r. split. auto. injection H5; intro; subst v.
-  apply H1. apply reg_fresh_decr with s. 
-  eapply add_vars_incr; eauto.
-  eauto with rtlg.
-  eapply me_vars; eauto. 
-  simpl. eapply me_letvars; eauto.
-Qed.
-
-Lemma match_init_env_init_reg:
-  forall params s0 rparams map1 s1 vars rvars map2 s2 vparams,
-  add_vars init_mapping params s0 = OK (rparams, map1) s1 ->
-  add_vars map1 vars s1 = OK (rvars, map2) s2 ->
-  match_env map2 (set_locals vars (set_params vparams params))
-            nil (init_regs vparams rparams).
-Proof.
-  intros. 
-  generalize (match_set_params_init_regs _ _ _ _ _ vparams H).
-  intros [A B].
-  eapply match_set_locals; eauto.
-  eapply add_vars_wf; eauto. apply init_mapping_wf.
-Qed.
-
-(** * Monotonicity properties of the state for the translation functions *)
-
-(** We show that the translation functions modify the state monotonically
-  (in the sense of the [state_incr] relation). *)
-
-Lemma add_move_incr:
-  forall r1 r2 nd s ns s',
-  add_move r1 r2 nd s = OK ns s' ->
-  state_incr s s'.
-Proof.
-  intros until s'. unfold add_move. 
-  case (Reg.eq r1 r2); intro.
-  monadSimpl. subst s'; auto with rtlg.
-  intro; eauto with rtlg.
-Qed.
-Hint Resolve add_move_incr: rtlg.
-
-Scheme expr_ind3 := Induction for expr Sort Prop
-  with condexpr_ind3 := Induction for condexpr Sort Prop
-  with exprlist_ind3 := Induction for exprlist Sort Prop.
-
-Lemma expr_condexpr_exprlist_ind:
-forall (P : expr -> Prop) (P0 : condexpr -> Prop)
-         (P1 : exprlist -> Prop),
-       (forall i : ident, P (Evar i)) ->
-       (forall (o : operation) (e : exprlist), P1 e -> P (Eop o e)) ->
-       (forall (m : memory_chunk) (a : addressing) (e : exprlist),
-        P1 e -> P (Eload m a e)) ->
-       (forall (m : memory_chunk) (a : addressing) (e : exprlist),
-        P1 e -> forall e0 : expr, P e0 -> P (Estore m a e e0)) ->
-       (forall (s : signature) (e : expr),
-        P e -> forall e0 : exprlist, P1 e0 -> P (Ecall s e e0)) ->
-       (forall c : condexpr,
-        P0 c ->
-        forall e : expr,
-        P e -> forall e0 : expr, P e0 -> P (Econdition c e e0)) ->
-       (forall e : expr, P e -> forall e0 : expr, P e0 -> P (Elet e e0)) ->
-       (forall n : nat, P (Eletvar n)) ->
-       (forall e : expr, P e -> P (Ealloc e)) ->
-       P0 CEtrue ->
-       P0 CEfalse ->
-       (forall (c : condition) (e : exprlist), P1 e -> P0 (CEcond c e)) ->
-       (forall c : condexpr,
-        P0 c ->
-        forall c0 : condexpr,
-        P0 c0 -> forall c1 : condexpr, P0 c1 -> P0 (CEcondition c c0 c1)) ->
-       P1 Enil ->
-       (forall e : expr,
-        P e -> forall e0 : exprlist, P1 e0 -> P1 (Econs e e0)) ->
-       (forall e : expr, P e) /\
-       (forall ce : condexpr, P0 ce) /\
-       (forall el : exprlist, P1 el).
-Proof.
-  intros. split. apply (expr_ind3 P P0 P1); assumption.
-  split. apply (condexpr_ind3 P P0 P1); assumption.
-  apply (exprlist_ind3 P P0 P1); assumption.
-Qed.
-
-Definition transl_expr_incr_pred (a: expr) : Prop :=
-  forall map rd nd s ns s',
-  transl_expr map a rd nd s = OK ns s' -> state_incr s s'.
-Definition transl_condition_incr_pred (c: condexpr) : Prop :=
-  forall map ntrue nfalse s ns s',
-  transl_condition map c ntrue nfalse s = OK ns s' -> state_incr s s'.
-Definition transl_exprlist_incr_pred (al: exprlist) : Prop :=
-  forall map rl nd s ns s',
-  transl_exprlist map al rl nd s = OK ns s' -> state_incr s s'.
-
-Lemma transl_expr_condition_exprlist_incr:
-  (forall a, transl_expr_incr_pred a) /\
-  (forall c, transl_condition_incr_pred c) /\
-  (forall al, transl_exprlist_incr_pred al).
-Proof.
-  apply expr_condexpr_exprlist_ind;
-  unfold transl_expr_incr_pred,
-         transl_condition_incr_pred,
-         transl_exprlist_incr_pred;
-  simpl; intros; 
-  try (monadInv H); try (monadInv H0); 
-  try (monadInv H1); try (monadInv H2);
-  eauto 6 with rtlg.
-
-  intro EQ2. 
-  apply state_incr_trans3 with s0 s1 s2; eauto with rtlg. 
-
-  intro EQ4.
-  apply state_incr_trans4 with s1 s2 s3 s4; eauto with rtlg.
-
-  subst s'; auto with rtlg.
-  subst s'; auto with rtlg.
-  destruct rl; monadInv H. subst s'; auto with rtlg.
-  destruct rl; monadInv H1. eauto with rtlg.  
-Qed.
-
-Lemma transl_expr_incr:
-  forall a map rd nd s ns s',
-  transl_expr map a rd nd s = OK ns s' -> state_incr s s'.
-Proof (proj1 transl_expr_condition_exprlist_incr).
-
-Lemma transl_condition_incr:
-  forall a map ntrue nfalse s ns s',
-  transl_condition map a ntrue nfalse s = OK ns s' -> state_incr s s'.
-Proof (proj1 (proj2 transl_expr_condition_exprlist_incr)).
-
-Lemma transl_exprlist_incr:
-  forall al map rl nd s ns s',
-  transl_exprlist map al rl nd s = OK ns s' -> state_incr s s'.
-Proof (proj2 (proj2 transl_expr_condition_exprlist_incr)).
-
-Hint Resolve transl_expr_incr transl_condition_incr transl_exprlist_incr: rtlg.
-
-Lemma transl_exit_incr:
-  forall nexits n s ns s',
-  transl_exit nexits n s = OK ns s' ->
-  state_incr s s'.
-Proof.
-  intros until s'. unfold transl_exit. 
-  destruct (nth_error nexits n); intros; simplify_eq H; intros; subst s'.
-  auto with rtlg.
-Qed.
-
-Hint Resolve transl_exit_incr: rtlg.
-
-Lemma transl_switch_incr:
-  forall r nexits default cases s n s',
-  transl_switch r nexits cases default s = OK n s' ->
-  state_incr s s'.
-Proof.
-  induction cases; simpl; intros.
-  eauto with rtlg.
-  destruct a as [key1 exit1].
-  monadInv H. intros EQ2.
-  apply state_incr_trans with s0. eauto.
-  eauto with rtlg. 
-Qed. 
-
-Hint Resolve transl_switch_incr: rtlg.
-
-Lemma transl_stmt_incr:
-  forall a map nd nexits nret rret s ns s',
-  transl_stmt map a nd nexits nret rret s = OK ns s' ->
-  state_incr s s'.
-Proof.
-  induction a; simpl; intros.
-
-  monadInv H. subst s'. auto with rtlg.
-
-  monadInv H. eauto with rtlg.
-
-  monadInv H. intro. apply state_incr_trans3 with s0 s1 s2; eauto with rtlg.
-
-  monadInv H. eauto with rtlg.
-
-  generalize H. case (more_likely c a1 a2); monadSimpl; eauto 6 with rtlg.
-
-  monadInv H. subst s'. 
-  apply update_instr_incr with s0 s1 (Inop n0) n u; eauto with rtlg.
-
-  eauto.
-
-  eauto with rtlg.
-
-  monadInv H. eauto 6 with rtlg.
-
-  generalize H. destruct o; destruct rret; try monadSimpl.
-  eauto with rtlg.
-  subst s'; auto with rtlg.
-Qed.
-
-Hint Resolve transl_stmt_incr: rtlg.
-
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
new file mode 100644
index 00000000..c46bdbba
--- /dev/null
+++ b/backend/RTLgenspec.v
@@ -0,0 +1,1455 @@
+(** Abstract specification of RTL generation *)
+
+(** In this module, we define inductive predicates that specify the 
+  translations from Cminor to RTL performed by the functions in module
+  [RTLgen].  We then show that these functions satisfy these relational
+  specifications.  The relational specifications will then be used
+  instead of the actual functions to show semantic equivalence between
+  the source Cminor code and the the generated RTL code
+  (see module [RTLgenproof]). *)
+
+Require Import Coqlib.
+Require Errors.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Events.
+Require Import Mem.
+Require Import Globalenvs.
+Require Import Switch.
+Require Import Op.
+Require Import Registers.
+Require Import CminorSel.
+Require Import RTL.
+Require Import RTLgen.
+
+(** * Reasoning about monadic computations *)
+
+(** The tactics below simplify hypotheses of the form [f ... = OK x s]
+  where [f] is a monadic computation.  For instance, the hypothesis
+  [(do x <- a; b) s = OK y s'] will generate the additional witnesses
+  [x], [s1] and the additional hypotheses
+  [a s = OK x s1] and [b x s1 = OK y s'], reflecting the fact that
+  both monadic computations [a] and [b] succeeded.
+*)
+
+Remark bind_inversion:
+  forall (A B: Set) (f: mon A) (g: A -> mon B) (y: B) (s1 s3: state),
+  bind f g s1 = OK y s3 ->
+  exists x, exists s2, f s1 = OK x s2 /\ g x s2 = OK y s3.
+Proof.
+  intros until s3. unfold bind. destruct (f s1); intros.
+  discriminate.
+  exists a; exists s; auto.
+Qed.
+
+Remark bind2_inversion:
+  forall (A B C: Set) (f: mon (A*B)) (g: A -> B -> mon C)
+         (z: C) (s1 s3: state),
+  bind2 f g s1 = OK z s3 ->
+  exists x, exists y, exists s2, f s1 = OK (x, y) s2 /\ g x y s2 = OK z s3.
+Proof.
+  intros until s3. unfold bind2, bind. destruct (f s1). congruence.
+  destruct p as [x y]; simpl; intro. 
+  exists x; exists y; exists s; auto. 
+Qed.
+
+Ltac monadInv1 H :=
+  match type of H with
+  | (OK _ _ = OK _ _) =>
+      inversion H; clear H; try subst
+  | (Error _ _ = OK _ _) =>
+      discriminate
+  | (ret _ _ = OK _ _) =>
+      inversion H; clear H; try subst
+  | (error _ _ = OK _ _) =>
+      discriminate
+  | (bind ?F ?G ?S = OK ?X ?S') =>
+      let x := fresh "x" in (
+      let s := fresh "s" in (
+      let EQ1 := fresh "EQ" in (
+      let EQ2 := fresh "EQ" in (
+      destruct (bind_inversion _ _ F G X S S' H) as [x [s [EQ1 EQ2]]];
+      clear H;
+      try (monadInv1 EQ2)))))
+  | (bind2 ?F ?G ?S = OK ?X ?S') =>
+      let x1 := fresh "x" in (
+      let x2 := fresh "x" in (
+      let s := fresh "s" in (
+      let EQ1 := fresh "EQ" in (
+      let EQ2 := fresh "EQ" in (
+      destruct (bind2_inversion _ _ _ F G X S S' H) as [x1 [x2 [s [EQ1 EQ2]]]];
+      clear H;
+      try (monadInv1 EQ2))))))
+  end.
+
+Ltac monadInv H :=
+  match type of H with
+  | (ret _ _ = OK _ _) => monadInv1 H
+  | (error _ _ = OK _ _) => monadInv1 H
+  | (bind ?F ?G ?S = OK ?X ?S') => monadInv1 H
+  | (bind2 ?F ?G ?S = OK ?X ?S') => monadInv1 H
+  | (?F _ _ _ _ _ _ _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ _ _ _ _ _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ _ _ _ _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ _ _ _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ _ _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  | (?F _ = OK _ _) => 
+      ((progress simpl in H) || unfold F in H); monadInv1 H
+  end.
+
+(** * Monotonicity properties of the state *)
+
+(** Operations over the global state satisfy a crucial monotonicity property:
+  nodes are only added to the CFG, but never removed nor their instructions
+  changed; similarly, fresh nodes and fresh registers are only consumed,
+  but never reused.  This property is captured by the following predicate
+  over states, which we show is a partial order. *)
+
+Inductive state_incr: state -> state -> Prop :=
+  state_incr_intro:
+    forall (s1 s2: state),
+    Ple s1.(st_nextnode) s2.(st_nextnode) ->
+    Ple s1.(st_nextreg) s2.(st_nextreg) ->
+    (forall pc, Plt pc s1.(st_nextnode) -> s2.(st_code)!pc = s1.(st_code)!pc) ->
+    state_incr s1 s2.
+
+Lemma instr_at_incr:
+  forall s1 s2 n i,
+  state_incr s1 s2 -> s1.(st_code)!n = Some i -> s2.(st_code)!n = Some i.
+Proof.
+  intros. inversion H.
+  rewrite H3. auto. elim (st_wf s1 n); intro.
+  assumption. congruence.
+Qed.
+
+Lemma state_incr_refl:
+  forall s, state_incr s s.
+Proof.
+  intros. apply state_incr_intro.
+  apply Ple_refl. apply Ple_refl. intros; auto.
+Qed.
+Hint Resolve state_incr_refl: rtlg.
+
+Lemma state_incr_trans:
+  forall s1 s2 s3, state_incr s1 s2 -> state_incr s2 s3 -> state_incr s1 s3.
+Proof.
+  intros. inversion H. inversion H0. apply state_incr_intro.
+  apply Ple_trans with (st_nextnode s2); assumption. 
+  apply Ple_trans with (st_nextreg s2); assumption.
+  intros. transitivity (s2.(st_code)!pc).
+  apply H8. apply Plt_Ple_trans with s1.(st_nextnode); auto.
+  apply H3; auto.
+Qed.
+Hint Resolve state_incr_trans: rtlg.
+
+(** The following relation between two states is a weaker version of
+  [state_incr].  It permits changing the contents at an already reserved
+  graph node, but only from [None] to [Some i]. *)
+
+Definition state_extends (s1 s2: state): Prop :=
+  forall pc,
+  s1.(st_code)!pc = None \/ s2.(st_code)!pc = s1.(st_code)!pc.
+
+Lemma instr_at_extends:
+  forall s1 s2 pc i,
+  state_extends s1 s2 -> 
+  s1.(st_code)!pc = Some i -> s2.(st_code)!pc = Some i.
+Proof.
+  intros. elim (H pc); intro. congruence. congruence.
+Qed.
+
+Lemma state_incr_extends:
+  forall s1 s2,
+  state_incr s1 s2 -> state_extends s1 s2.
+Proof.
+  unfold state_extends; intros. inversion H.   
+  case (plt pc s1.(st_nextnode)); intro.
+  right; apply H2; auto.
+  left. elim (s1.(st_wf) pc); intro.
+  elim (n H5). auto.
+Qed.
+Hint Resolve state_incr_extends.
+
+(** * Validity and freshness of registers *)
+
+(** An RTL pseudo-register is valid in a given state if it was created
+  earlier, that is, it is less than the next fresh register of the state.
+  Otherwise, the pseudo-register is said to be fresh. *)
+
+Definition reg_valid (r: reg) (s: state) : Prop := 
+  Plt r s.(st_nextreg).
+
+Definition reg_fresh (r: reg) (s: state) : Prop :=
+  ~(Plt r s.(st_nextreg)).
+
+Lemma valid_fresh_absurd:
+  forall r s, reg_valid r s -> reg_fresh r s -> False.
+Proof.
+  intros r s. unfold reg_valid, reg_fresh; case r; tauto.
+Qed.
+Hint Resolve valid_fresh_absurd: rtlg.
+
+Lemma valid_fresh_different:
+  forall r1 r2 s, reg_valid r1 s -> reg_fresh r2 s -> r1 <> r2.
+Proof.
+  unfold not; intros. subst r2. eauto with rtlg.
+Qed.
+Hint Resolve valid_fresh_different: rtlg.
+
+Lemma reg_valid_incr: 
+  forall r s1 s2, state_incr s1 s2 -> reg_valid r s1 -> reg_valid r s2.
+Proof.
+  intros r s1 s2 INCR.
+  inversion INCR.
+  unfold reg_valid. intros; apply Plt_Ple_trans with (st_nextreg s1); auto.
+Qed.
+Hint Resolve reg_valid_incr: rtlg.
+
+Lemma reg_fresh_decr:
+  forall r s1 s2, state_incr s1 s2 -> reg_fresh r s2 -> reg_fresh r s1.
+Proof.
+  intros r s1 s2 INCR. inversion INCR.
+  unfold reg_fresh; unfold not; intros.
+  apply H4. apply Plt_Ple_trans with (st_nextreg s1); auto.
+Qed. 
+Hint Resolve reg_fresh_decr: rtlg.
+
+(** Validity of a list of registers. *)
+
+Definition regs_valid (rl: list reg) (s: state) : Prop :=
+  forall r, In r rl -> reg_valid r s.
+
+Lemma regs_valid_nil: 
+  forall s, regs_valid nil s.
+Proof.
+  intros; red; intros. elim H.
+Qed.
+Hint Resolve regs_valid_nil: rtlg.
+
+Lemma regs_valid_cons:
+  forall r1 rl s,
+  reg_valid r1 s -> regs_valid rl s -> regs_valid (r1 :: rl) s.
+Proof.
+  intros; red; intros. elim H1; intro. subst r1; auto. auto.
+Qed.
+
+Lemma regs_valid_incr:
+  forall s1 s2 rl, state_incr s1 s2 -> regs_valid rl s1 -> regs_valid rl s2.
+Proof.
+  unfold regs_valid; intros; eauto with rtlg.
+Qed.
+Hint Resolve regs_valid_incr: rtlg.
+
+(** A register is ``in'' a mapping if it is associated with a Cminor
+  local or let-bound variable. *)
+
+Definition reg_in_map (m: mapping) (r: reg) : Prop :=
+  (exists id, m.(map_vars)!id = Some r) \/ In r m.(map_letvars).
+
+(** A compilation environment (mapping) is valid in a given state if
+  the registers associated with Cminor local variables and let-bound variables
+  are valid in the state. *)
+
+Definition map_valid (m: mapping) (s: state) : Prop :=
+  forall r, reg_in_map m r -> reg_valid r s.
+
+Lemma map_valid_incr:
+  forall s1 s2 m,
+  state_incr s1 s2 -> map_valid m s1 -> map_valid m s2.
+Proof.
+  unfold map_valid; intros; eauto with rtlg.
+Qed.
+Hint Resolve map_valid_incr: rtlg.
+
+(** * Properties of basic operations over the state *)
+
+(** Properties of [add_instr]. *)
+
+Lemma add_instr_incr:
+  forall s1 s2 i n,
+  add_instr i s1 = OK n s2 -> state_incr s1 s2.
+Proof.
+  intros. monadInv H.
+  apply state_incr_intro; simpl.
+  apply Ple_succ.
+  apply Ple_refl.
+  intros. apply PTree.gso; apply Plt_ne; auto.
+Qed.
+Hint Resolve add_instr_incr: rtlg.
+
+Lemma add_instr_at:
+  forall s1 s2 i n,
+  add_instr i s1 = OK n s2 -> s2.(st_code)!n = Some i.
+Proof.
+  intros. monadInv H. simpl. apply PTree.gss.
+Qed.
+Hint Resolve add_instr_at.
+
+(** Properties of [reserve_instr] and [update_instr]. *)
+
+Lemma reserve_instr_incr:
+  forall s1 s2 n,
+  reserve_instr s1 = OK n s2 -> state_incr s1 s2.
+Proof.
+  intros. monadInv H. 
+  apply state_incr_intro; simpl.
+  apply Ple_succ.
+  apply Ple_refl.
+  auto.
+Qed.
+
+Lemma update_instr_incr:
+  forall s1 s2 s3 s4 i n t,
+  reserve_instr s1 = OK n s2 ->
+  state_incr s2 s3 ->
+  update_instr n i s3 = OK t s4 ->
+  state_incr s1 s4.
+Proof.
+  intros.
+  generalize H1; clear H1; unfold update_instr.
+  case (plt n (st_nextnode s3)); intros. 2: discriminate.
+  inv H1. inv H0. monadInv H; simpl in *.
+  apply state_incr_intro; simpl.
+  eapply Ple_trans; eauto. apply Plt_Ple. apply Plt_succ.
+  auto.
+  intros. rewrite PTree.gso.
+  apply H3. apply Plt_trans_succ; auto.
+  apply Plt_ne. auto.
+Qed.
+
+Lemma update_instr_extends:
+  forall s1 s2 s3 s4 i n t,
+  reserve_instr s1 = OK n s2 ->
+  state_incr s2 s3 ->
+  update_instr n i s3 = OK t s4 ->
+  state_extends s3 s4.
+Proof.
+  intros. injection H. intros EQ1 EQ2.  
+  red; intros. 
+  case (peq pc n); intro.
+  subst pc. left. inversion H0. rewrite H4. rewrite <- EQ1; simpl.
+  destruct (s1.(st_wf) n). rewrite <- EQ2 in H7. elim (Plt_strict _ H7).
+  auto.
+  rewrite <- EQ1; rewrite <- EQ2; simpl. apply Plt_succ.
+  generalize H1; unfold update_instr.
+  case (plt n s3.(st_nextnode)); intros; inv H2. 
+  right; simpl. apply PTree.gso; auto.
+Qed.
+
+(** Properties of [new_reg]. *)
+
+Lemma new_reg_incr:
+  forall s1 s2 r, new_reg s1 = OK r s2 -> state_incr s1 s2.
+Proof.
+  intros. monadInv H. 
+  apply state_incr_intro; simpl.
+  apply Ple_refl. apply Ple_succ. auto.
+Qed.
+Hint Resolve new_reg_incr: rtlg.
+
+Lemma new_reg_valid:
+  forall s1 s2 r,
+  new_reg s1 = OK r s2 -> reg_valid r s2.
+Proof.
+  intros. monadInv H.
+  unfold reg_valid; simpl. apply Plt_succ.
+Qed.
+Hint Resolve new_reg_valid: rtlg.
+
+Lemma new_reg_fresh:
+  forall s1 s2 r,
+  new_reg s1 = OK r s2 -> reg_fresh r s1.
+Proof.
+  intros. monadInv H.
+  unfold reg_fresh; simpl.
+  exact (Plt_strict _).
+Qed.
+Hint Resolve new_reg_fresh: rtlg.
+
+Lemma new_reg_not_in_map:
+  forall s1 s2 m r,
+  new_reg s1 = OK r s2 -> map_valid m s1 -> ~(reg_in_map m r).
+Proof.
+  unfold not; intros; eauto with rtlg.
+Qed. 
+Hint Resolve new_reg_not_in_map: rtlg.
+
+(** * Properties of operations over compilation environments *)
+
+Lemma init_mapping_valid:
+  forall s, map_valid init_mapping s.
+Proof.
+  unfold map_valid, init_mapping.
+  intros s r [[id A] | B].  
+  simpl in A. rewrite PTree.gempty in A; discriminate.
+  simpl in B. tauto.
+Qed.  
+
+(** Properties of [find_var]. *)
+
+Lemma find_var_incr:
+  forall s1 s2 map name r,
+  find_var map name s1 = OK r s2 -> state_incr s1 s2.
+Proof.
+  intros until r. unfold find_var.
+  case (map_vars map)!name; intros; monadInv H. 
+  auto with rtlg.
+Qed.
+Hint Resolve find_var_incr: rtlg.
+
+Lemma find_var_in_map:
+  forall s1 s2 map name r,
+  find_var map name s1 = OK r s2 -> reg_in_map map r.
+Proof.
+  intros until r. unfold find_var; caseEq (map.(map_vars)!name).
+  intros. inv H0. left; exists name; auto.
+  intros. inv H0.
+Qed.
+Hint Resolve find_var_in_map: rtlg.
+
+Lemma find_var_valid:
+  forall s1 s2 map name r,
+  find_var map name s1 = OK r s2 -> map_valid map s1 -> reg_valid r s1.
+Proof.
+  eauto with rtlg.
+Qed.
+Hint Resolve find_var_valid: rtlg.
+
+(** Properties of [find_letvar]. *)
+
+Lemma find_letvar_incr:
+  forall s1 s2 map idx r,
+  find_letvar map idx s1 = OK r s2 -> state_incr s1 s2.
+Proof.
+  intros until r. unfold find_letvar.
+  case (nth_error (map_letvars map) idx); intros; monadInv H.
+  auto with rtlg.
+Qed.
+Hint Resolve find_letvar_incr: rtlg.
+
+Lemma find_letvar_in_map:
+  forall s1 s2 map idx r,
+  find_letvar map idx s1 = OK r s2 -> reg_in_map map r.
+Proof.
+  intros until r. unfold find_letvar.
+  caseEq (nth_error (map_letvars map) idx); intros; monadInv H0.
+  right; apply nth_error_in with idx; auto.
+Qed.
+Hint Resolve find_letvar_in_map: rtlg.
+
+Lemma find_letvar_valid:
+  forall s1 s2 map idx r,
+  find_letvar map idx s1 = OK r s2 -> map_valid map s1 -> reg_valid r s1.
+Proof.
+  eauto with rtlg.
+Qed.
+Hint Resolve find_letvar_valid: rtlg.
+
+(** Properties of [add_var]. *)
+
+Lemma add_var_valid:
+  forall s1 s2 map1 map2 name r, 
+  add_var map1 name s1 = OK (r, map2) s2 ->
+  map_valid map1 s1 ->
+  reg_valid r s2 /\ map_valid map2 s2.
+Proof.
+  intros. monadInv H. 
+  split. eauto with rtlg.
+  inversion EQ. subst. red. intros r' [[id A] | B].
+  simpl in A. rewrite PTree.gsspec in A. destruct (peq id name).
+  inv A. eauto with rtlg.
+  apply reg_valid_incr with s1. eauto with rtlg. 
+  apply H0. left; exists id; auto.
+  simpl in B. apply reg_valid_incr with s1. eauto with rtlg.
+  apply H0. right; auto.
+Qed.
+
+Lemma add_var_incr:
+  forall s1 s2 map name rm, 
+  add_var map name s1 = OK rm s2 -> state_incr s1 s2.
+Proof.
+  intros. monadInv H. eauto with rtlg.
+Qed.
+Hint Resolve add_var_incr: rtlg.
+
+Lemma add_var_find:
+  forall s1 s2 map name r map',
+  add_var map name s1 = OK (r,map') s2 -> map'.(map_vars)!name = Some r.
+Proof.
+  intros. monadInv H. simpl. apply PTree.gss. 
+Qed.
+
+Lemma add_vars_incr:
+  forall names s1 s2 map r,
+  add_vars map names s1 = OK r s2 -> state_incr s1 s2.
+Proof.
+  induction names; simpl; intros; monadInv H.
+  auto with rtlg.
+  eauto with rtlg. 
+Qed.
+
+Lemma add_vars_valid:
+  forall namel s1 s2 map1 map2 rl, 
+  add_vars map1 namel s1 = OK (rl, map2) s2 ->
+  map_valid map1 s1 ->
+  regs_valid rl s2 /\ map_valid map2 s2.
+Proof.
+  induction namel; simpl; intros; monadInv H.
+  split. red; simpl; intros; tauto. auto.
+  exploit IHnamel; eauto. intros [A B].
+  exploit add_var_valid; eauto. intros [C D].
+  exploit add_var_incr; eauto. intros INCR.
+  split. apply regs_valid_cons; eauto with rtlg.
+  auto.
+Qed.
+
+Lemma add_var_letenv:
+  forall map1 i s1 r map2 s2,
+  add_var map1 i s1 = OK (r, map2) s2 ->
+  map2.(map_letvars) = map1.(map_letvars).
+Proof.
+  intros; monadInv H. reflexivity.
+Qed.
+
+Lemma add_vars_letenv:
+  forall il map1 s1 rl map2 s2,
+  add_vars map1 il s1 = OK (rl, map2) s2 ->
+  map2.(map_letvars) = map1.(map_letvars).
+Proof.
+  induction il; simpl; intros; monadInv H.
+  reflexivity.
+  transitivity (map_letvars x0).
+  eapply add_var_letenv; eauto.
+  eauto.
+Qed.
+
+(** Properties of [add_letvar]. *)
+
+Lemma add_letvar_valid:
+  forall map s r,
+  map_valid map s ->
+  reg_valid r s ->
+  map_valid (add_letvar map r) s.
+Proof.
+  intros; red; intros. 
+  destruct H1 as [[id A]|B]. 
+  simpl in A. apply H. left; exists id; auto.
+  simpl in B. elim B; intro. 
+  subst r0; auto. apply H. right; auto.
+Qed.
+
+(** * Properties of [alloc_reg] and [alloc_regs] *)
+
+Lemma alloc_reg_incr:
+  forall a s1 s2 map r,
+  alloc_reg map a s1 = OK r s2 -> state_incr s1 s2.
+Proof.
+  intros until r.  unfold alloc_reg.
+  case a; eauto with rtlg.
+Qed.
+Hint Resolve alloc_reg_incr: rtlg.
+
+Lemma alloc_reg_valid:
+  forall a s1 s2 map r,
+  map_valid map s1 ->
+  alloc_reg map a s1 = OK r s2 -> reg_valid r s2.
+Proof.
+  intros until r.  unfold alloc_reg.
+  case a; eauto with rtlg.
+Qed.
+Hint Resolve alloc_reg_valid: rtlg.
+
+Lemma alloc_reg_fresh_or_in_map:
+  forall map a s r s',
+  map_valid map s ->
+  alloc_reg map a s = OK r s' ->
+  reg_in_map map r \/ reg_fresh r s.
+Proof.
+  intros until s'. unfold alloc_reg.
+  destruct a; intros; try (right; eauto with rtlg; fail).
+  left; eauto with rtlg.
+  left; eauto with rtlg.
+Qed.
+
+Lemma alloc_regs_incr:
+  forall al s1 s2 map rl,
+  alloc_regs map al s1 = OK rl s2 -> state_incr s1 s2.
+Proof.
+  induction al; simpl; intros; monadInv H; eauto with rtlg.
+Qed.
+Hint Resolve alloc_regs_incr: rtlg.
+
+Lemma alloc_regs_valid:
+  forall al s1 s2 map rl,
+  map_valid map s1 ->
+  alloc_regs map al s1 = OK rl s2 ->
+  regs_valid rl s2.
+Proof.
+  induction al; simpl; intros; monadInv H0.
+  apply regs_valid_nil.
+  apply regs_valid_cons. eauto with rtlg. eauto with rtlg. 
+Qed.
+Hint Resolve alloc_regs_valid: rtlg.
+
+Lemma alloc_regs_fresh_or_in_map:
+  forall map al s rl s',
+  map_valid map s ->
+  alloc_regs map al s = OK rl s' ->
+  forall r, In r rl -> reg_in_map map r \/ reg_fresh r s.
+Proof.
+  induction al; simpl; intros; monadInv H0.
+  elim H1.
+  elim H1; intro. 
+  subst r.
+  eapply alloc_reg_fresh_or_in_map; eauto.
+  exploit IHal. apply map_valid_incr with s0; eauto with rtlg. eauto. eauto.
+  intros [A|B]. auto. right; eauto with rtlg.
+Qed.
+
+(** A register is an adequate target for holding the value of an
+  expression if
+- either the register is associated with a Cminor let-bound variable
+  or a Cminor local variable;
+- or the register is not associated with any Cminor variable
+  and does not belong to the preserved set [pr]. *)
+
+Inductive target_reg_ok (map: mapping) (pr: list reg): expr -> reg -> Prop :=
+  | target_reg_var:
+      forall id r,
+      map.(map_vars)!id = Some r ->
+      target_reg_ok map pr (Evar id) r
+  | target_reg_letvar:
+      forall idx r,
+      nth_error map.(map_letvars) idx = Some r ->
+      target_reg_ok map pr (Eletvar idx) r
+  | target_reg_other:
+      forall a r,
+      ~(reg_in_map map r) -> ~In r pr ->
+      target_reg_ok map pr a r.
+
+Inductive target_regs_ok (map: mapping) (pr: list reg): exprlist -> list reg -> Prop :=
+  | target_regs_nil:
+      target_regs_ok map pr Enil nil
+  | target_regs_cons: forall a1 al r1 rl,
+      target_reg_ok map pr a1 r1 ->
+      target_regs_ok map (r1 :: pr) al rl ->
+      target_regs_ok map pr (Econs a1 al) (r1 :: rl).
+
+Lemma target_reg_ok_append:
+  forall map pr a r,
+  target_reg_ok map pr a r ->
+  forall pr',
+  (forall r', In r' pr' -> reg_in_map map r' \/ r' <> r) ->
+  target_reg_ok map (pr' ++ pr) a r.
+Proof.
+  induction 1; intros.
+  constructor; auto.
+  constructor; auto.
+  constructor; auto. red; intros. 
+  elim (in_app_or _ _ _ H2); intro. 
+  generalize (H1 _ H3). tauto. tauto.
+Qed.
+
+Lemma target_reg_ok_cons:
+  forall map pr a r,
+  target_reg_ok map pr a r ->
+  forall r',
+  reg_in_map map r' \/ r' <> r ->
+  target_reg_ok map (r' :: pr) a r.
+Proof.
+  intros. change (r' :: pr) with ((r' :: nil) ++ pr).
+  apply target_reg_ok_append; auto. 
+  intros r'' [A|B]. subst r''; auto. contradiction.
+Qed.
+
+Lemma new_reg_target_ok:
+  forall map pr s1 a r s2,
+  map_valid map s1 ->
+  regs_valid pr s1 ->
+  new_reg s1 = OK r s2 ->
+  target_reg_ok map pr a r.
+Proof.
+  intros. constructor. 
+  red; intro. apply valid_fresh_absurd with r s1.
+  eauto with rtlg. eauto with rtlg.
+  red; intro. apply valid_fresh_absurd with r s1.
+  auto. eauto with rtlg.
+Qed.
+
+Lemma alloc_reg_target_ok:
+  forall map pr s1 a r s2,
+  map_valid map s1 ->
+  regs_valid pr s1 ->
+  alloc_reg map a s1 = OK r s2 ->
+  target_reg_ok map pr a r.
+Proof.
+  intros. unfold alloc_reg in H1. destruct a;
+  try (eapply new_reg_target_ok; eauto; fail).
+  (* Evar *)
+  generalize H1; unfold find_var. caseEq (map_vars map)!i; intros.
+  inv H3. constructor. auto. inv H3.
+  (* Elet *)
+  generalize H1; unfold find_letvar. caseEq (nth_error (map_letvars map) n); intros.
+  inv H3. constructor. auto. inv H3.
+Qed.
+
+Lemma alloc_regs_target_ok:
+  forall map al pr s1 rl s2,
+  map_valid map s1 ->
+  regs_valid pr s1 ->
+  alloc_regs map al s1 = OK rl s2 ->
+  target_regs_ok map pr al rl.
+Proof.
+  induction al; intros; monadInv H1.
+  constructor.
+  constructor.
+  eapply alloc_reg_target_ok; eauto. 
+  apply IHal with s s2; eauto with rtlg. 
+  apply regs_valid_cons; eauto with rtlg.
+Qed.
+
+Hint Resolve new_reg_target_ok alloc_reg_target_ok alloc_regs_target_ok: rtlg.  
+
+(** The following predicate is a variant of [target_reg_ok] used
+  to characterize registers that are adequate for holding the return
+  value of a function. *)
+
+Inductive return_reg_ok: state -> mapping -> option reg -> Prop :=
+  | return_reg_ok_none:
+      forall s map,
+      return_reg_ok s map None
+  | return_reg_ok_some:
+      forall s map r,
+      ~(reg_in_map map r) -> reg_valid r s ->
+      return_reg_ok s map (Some r).
+
+Lemma return_reg_ok_incr:
+  forall s map rret, return_reg_ok s map rret ->
+  forall s', state_incr s s' -> return_reg_ok s' map rret.
+Proof.
+  induction 1; intros; econstructor; eauto with rtlg.
+Qed.
+Hint Resolve return_reg_ok_incr: rtlg.
+
+Lemma new_reg_return_ok:
+  forall s1 r s2 map sig,
+  new_reg s1 = OK r s2 ->
+  map_valid map s1 ->
+  return_reg_ok s2 map (ret_reg sig r).
+Proof.
+  intros. unfold ret_reg. destruct (sig_res sig); constructor.
+  eauto with rtlg. eauto with rtlg.  
+Qed.
+
+(** * Relational specification of the translation *)
+
+(** We now define inductive predicates that characterize the fact that
+  the control-flow graph produced by compilation of a Cminor function
+  contains sub-graphs that correspond to the translation of each
+  Cminor expression or statement in the original code. *)
+
+(** [tr_move c ns rs nd rd] holds if the graph [c], between nodes [ns]
+  and [nd], contains instructions that move the value of register [rs]
+  to register [rd]. *)
+
+Inductive tr_move (c: code):
+       node -> reg -> node -> reg -> Prop :=
+  | tr_move_0: forall n r,
+      tr_move c n r n r
+  | tr_move_1: forall ns rs nd rd,
+      c!ns = Some (Iop Omove (rs :: nil) rd nd) ->
+      tr_move c ns rs nd rd.
+
+(** [tr_expr c map pr expr ns nd rd] holds if the graph [c],
+  between nodes [ns] and [nd], contains instructions that compute the
+  value of the Cminor expression [expr] and deposit this value in
+  register [rd].  [map] is a mapping from Cminor variables to the
+  corresponding RTL registers.  [pr] is a list of RTL registers whose
+  values must be preserved during this computation.  All registers
+  mentioned in [map] must also be preserved during this computation.
+  To ensure this, we demand that the result registers of the instructions
+  appearing on the path from [ns] to [nd] are neither in [pr] nor in [map].
+*)
+
+Inductive tr_expr (c: code): 
+       mapping -> list reg -> expr -> node -> node -> reg -> Prop :=
+  | tr_Evar: forall map pr id ns nd r rd,
+      map.(map_vars)!id = Some r ->
+      (rd = r \/ ~reg_in_map map rd /\ ~In rd pr) ->
+      tr_move c ns r nd rd ->
+      tr_expr c map pr (Evar id) ns nd rd
+  | tr_Eop: forall map pr op al ns nd rd n1 rl,
+      tr_exprlist c map pr al ns n1 rl ->
+      c!n1 = Some (Iop op rl rd nd) ->
+      ~reg_in_map map rd -> ~In rd pr ->
+      tr_expr c map pr (Eop op al) ns nd rd
+  | tr_Eload: forall map pr chunk addr al ns nd rd n1 rl,
+      tr_exprlist c map pr al ns n1 rl ->
+      c!n1 = Some (Iload chunk addr rl rd nd) ->
+      ~reg_in_map map rd -> ~In rd pr ->
+      tr_expr c map pr (Eload chunk addr al) ns nd rd
+  | tr_Estore: forall map pr chunk addr al b ns nd rd n1 rl n2,
+      tr_exprlist c map pr al ns n1 rl ->
+      tr_expr c map (rl ++ pr) b n1 n2 rd ->
+      c!n2 = Some (Istore chunk addr rl rd nd) ->
+      tr_expr c map pr (Estore chunk addr al b) ns nd rd
+  | tr_Ecall: forall map pr sig b cl ns nd rd n1 rf n2 rargs,
+      tr_expr c map pr b ns n1 rf ->
+      tr_exprlist c map (rf :: pr) cl n1 n2 rargs ->
+      c!n2 = Some (Icall sig (inl _ rf) rargs rd nd) ->
+      ~reg_in_map map rd -> ~In rd pr ->
+      tr_expr c map pr (Ecall sig b cl) ns nd rd
+  | tr_Econdition: forall map pr b ifso ifnot ns nd rd ntrue nfalse,
+      tr_condition c map pr b ns ntrue nfalse ->
+      tr_expr c map pr ifso ntrue nd rd ->
+      tr_expr c map pr ifnot nfalse nd rd ->
+      tr_expr c map pr (Econdition b ifso ifnot) ns nd rd
+  | tr_Elet: forall map pr b1 b2 ns nd rd n1 r,
+      ~reg_in_map map r ->
+      tr_expr c map pr b1 ns n1 r ->
+      tr_expr c (add_letvar map r) pr b2 n1 nd rd ->
+      tr_expr c map pr (Elet b1 b2) ns nd rd
+  | tr_Eletvar: forall map pr n ns nd rd r,
+      List.nth_error map.(map_letvars) n = Some r ->
+      (rd = r \/ ~reg_in_map map rd /\ ~In rd pr) ->
+      tr_move c ns r nd rd ->
+      tr_expr c map pr (Eletvar n) ns nd rd
+  | tr_Ealloc: forall map pr a ns nd rd n1 r,
+      tr_expr c map pr a ns n1 r ->
+      c!n1 = Some (Ialloc r rd nd) ->
+      ~reg_in_map map rd -> ~In rd pr ->
+      tr_expr c map pr (Ealloc a) ns nd rd
+
+(** [tr_expr c map pr cond ns ntrue nfalse rd] holds if the graph [c],
+  starting at node [ns], contains instructions that compute the truth
+  value of the Cminor conditional expression [cond] and terminate
+  on node [ntrue] if the condition holds and on node [nfalse] otherwise. *)
+
+with tr_condition (c: code): 
+       mapping -> list reg -> condexpr -> node -> node -> node -> Prop :=
+  | tr_CEtrue: forall map pr ntrue nfalse,
+      tr_condition c map pr CEtrue ntrue ntrue nfalse
+  | tr_CEfalse: forall map pr ntrue nfalse,
+      tr_condition c map pr CEfalse nfalse ntrue nfalse
+  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl,
+      tr_exprlist c map pr bl ns n1 rl ->
+      c!n1 = Some (Icond cond rl ntrue nfalse) ->
+      tr_condition c map pr (CEcond cond bl) ns ntrue nfalse
+  | tr_CEcondition: forall map pr b ifso ifnot ns ntrue nfalse ntrue' nfalse',
+      tr_condition c map pr b ns ntrue' nfalse' ->
+      tr_condition c map pr ifso ntrue' ntrue nfalse ->
+      tr_condition c map pr ifnot nfalse' ntrue nfalse ->
+      tr_condition c map pr (CEcondition b ifso ifnot) ns ntrue nfalse
+
+(** [tr_exprlist c map pr exprs ns nd rds] holds if the graph [c],
+  between nodes [ns] and [nd], contains instructions that compute the values
+  of the list of Cminor expression [exprlist] and deposit these values
+  in registers [rds]. *)
+
+with tr_exprlist (c: code): 
+       mapping -> list reg -> exprlist -> node -> node -> list reg -> Prop :=
+  | tr_Enil: forall map pr n,
+      tr_exprlist c map pr Enil n n nil
+  | tr_Econs: forall map pr a1 al ns nd r1 rl n1,
+      tr_expr c map pr a1 ns n1 r1 ->
+      tr_exprlist c map (r1 :: pr) al n1 nd rl ->
+      tr_exprlist c map pr (Econs a1 al) ns nd (r1 :: rl).
+
+(** Auxiliary for the compilation of [switch] statements. *)
+
+Inductive tr_switch
+     (c: code) (r: reg) (nexits: list node):
+     comptree -> node -> Prop :=
+  | tr_switch_action: forall act n,
+      nth_error nexits act = Some n ->
+      tr_switch c r nexits (CTaction act) n
+  | tr_switch_ifeq: forall key act t' n ncont nfound,
+      tr_switch c r nexits t' ncont ->
+      nth_error nexits act = Some nfound ->
+      c!n = Some(Icond (Ccompimm Ceq key) (r :: nil) nfound ncont) ->
+      tr_switch c r nexits (CTifeq key act t') n
+  | tr_switch_iflt: forall key t1 t2 n n1 n2,
+      tr_switch c r nexits t1 n1 ->
+      tr_switch c r nexits t2 n2 ->
+      c!n = Some(Icond (Ccompuimm Clt key) (r :: nil) n1 n2) ->
+      tr_switch c r nexits (CTiflt key t1 t2) n.
+
+(** [tr_stmt c map stmt ns ncont nexits nret rret] holds if the graph [c],
+  starting at node [ns], contains instructions that perform the Cminor
+  statement [stmt].   These instructions branch to node [ncont] if
+  the statement terminates normally, to the [n]-th node in [nexits]
+  if the statement terminates prematurely on a [exit n] statement,
+  and to [nret] if the statement terminates prematurely on a [return]
+  statement.  Moreover, if the [return] statement has an argument,
+  its value is deposited in register [rret]. *)
+
+Inductive tr_stmt (c: code) (map: mapping):
+     stmt -> node -> node -> list node -> node -> option reg -> Prop :=
+  | tr_Sskip: forall ns nexits nret rret,
+     tr_stmt c map Sskip ns ns nexits nret rret
+  | tr_Sexpr: forall a ns nd nexits nret rret r,
+     tr_expr c map nil a ns nd r ->
+     tr_stmt c map (Sexpr a) ns nd nexits nret rret
+  | tr_Sassign: forall id a ns nd nexits nret rret rv rt n,
+     map.(map_vars)!id = Some rv ->
+     tr_move c n rt nd rv ->
+     tr_expr c map nil a ns n rt ->
+     tr_stmt c map (Sassign id a) ns nd nexits nret rret
+  | tr_Sseq: forall s1 s2 ns nd nexits nret rret n,
+     tr_stmt c map s2 n nd nexits nret rret ->
+     tr_stmt c map s1 ns n nexits nret rret ->
+     tr_stmt c map (Sseq s1 s2) ns nd nexits nret rret
+  | tr_Sifthenelse: forall a strue sfalse ns nd nexits nret rret ntrue nfalse,
+     tr_stmt c map strue ntrue nd nexits nret rret ->
+     tr_stmt c map sfalse nfalse nd nexits nret rret ->
+     tr_condition c map nil a ns ntrue nfalse ->
+     tr_stmt c map (Sifthenelse a strue sfalse) ns nd nexits nret rret
+  | tr_Sloop: forall sbody ns nd nexits nret rret nloop,
+     tr_stmt c map sbody ns nloop nexits nret rret ->
+     c!nloop = Some(Inop ns) ->
+     tr_stmt c map (Sloop sbody) ns nd nexits nret rret
+  | tr_Sblock: forall sbody ns nd nexits nret rret,
+     tr_stmt c map sbody ns nd (nd :: nexits) nret rret ->
+     tr_stmt c map (Sblock sbody) ns nd nexits nret rret
+  | tr_Sexit: forall n ns nd nexits nret rret,
+     nth_error nexits n = Some ns ->
+     tr_stmt c map (Sexit n) ns nd nexits nret rret
+  | tr_Sswitch: forall a cases default ns nd nexits nret rret n r t,
+     validate_switch default cases t = true ->
+     tr_expr c map nil a ns n r ->
+     tr_switch c r nexits t n ->
+     tr_stmt c map (Sswitch a cases default) ns nd nexits nret rret
+  | tr_Sreturn_none: forall nret nd nexits,
+     tr_stmt c map (Sreturn None) nret nd nexits nret None
+  | tr_Sreturn_some: forall a ns nd nexits nret rret,
+     tr_expr c map nil a ns nret rret ->
+     tr_stmt c map (Sreturn (Some a)) ns nd nexits nret (Some rret)
+  | tr_Stailcall: forall sig b cl ns nd nexits nret rret n1 rf n2 rargs,
+     tr_expr c map nil b ns n1 rf ->
+     tr_exprlist c map (rf :: nil) cl n1 n2 rargs ->
+     c!n2 = Some (Itailcall sig (inl _ rf) rargs) ->
+     tr_stmt c map (Stailcall sig b cl) ns nd nexits nret rret.
+
+(** [tr_function f tf] specifies the RTL function [tf] that 
+  [RTLgen.transl_function] returns.  *)
+
+Inductive tr_function: CminorSel.function -> RTL.function -> Prop :=
+  | tr_function_intro:
+      forall f code rparams map1 s1 rvars map2 s2 nentry nret rret orret nextnode wfcode,
+      add_vars init_mapping f.(CminorSel.fn_params) init_state = OK (rparams, map1) s1 ->
+      add_vars map1 f.(CminorSel.fn_vars) s1 = OK (rvars, map2) s2 ->
+      orret = ret_reg f.(CminorSel.fn_sig) rret ->
+      tr_stmt code map2 f.(CminorSel.fn_body) nentry nret nil nret orret ->
+      code!nret = Some(Ireturn orret) -> 
+      tr_function f (RTL.mkfunction
+                       f.(CminorSel.fn_sig)
+                       rparams
+                       f.(CminorSel.fn_stackspace)
+                       code
+                       nentry
+                       nextnode
+                       wfcode).
+
+(** * Correctness proof of the translation functions *)
+
+(** We now show that the translation functions in module [RTLgen]
+  satisfy the specifications given above as inductive predicates. *)
+
+Scheme tr_expr_ind3 := Minimality for tr_expr Sort Prop
+  with tr_condition_ind3 := Minimality for tr_condition Sort Prop
+  with tr_exprlist_ind3 := Minimality for tr_exprlist Sort Prop.
+
+Definition tr_expr_condition_exprlist_ind3
+  (c: code)
+  (P : mapping -> list reg -> expr -> node -> node -> reg -> Prop)
+  (P0 : mapping -> list reg -> condexpr -> node -> node -> node -> Prop)
+  (P1 : mapping -> list reg -> exprlist -> node -> node -> list reg -> Prop) :=
+  fun a b c' d e f g h i j k l m n o =>
+  conj (tr_expr_ind3 c P P0 P1 a b c' d e f g h i j k l m n o)
+       (conj (tr_condition_ind3 c P P0 P1 a b c' d e f g h i j k l m n o)
+             (tr_exprlist_ind3 c P P0 P1 a b c' d e f g h i j k l m n o)).
+
+Lemma tr_move_extends:
+  forall s1 s2, state_extends s1 s2 ->
+  forall ns rs nd rd,
+  tr_move s1.(st_code) ns rs nd rd ->
+  tr_move s2.(st_code) ns rs nd rd.
+Proof.
+  induction 2; econstructor; eauto.
+  eapply instr_at_extends; eauto.
+Qed.
+
+Lemma tr_expr_condition_exprlist_extends:
+  forall s1 s2, state_extends s1 s2 ->
+  (forall map pr a ns nd rd,
+   tr_expr s1.(st_code) map pr a ns nd rd ->
+   tr_expr s2.(st_code) map pr a ns nd rd)
+/\(forall map pr a ns ntrue nfalse,
+   tr_condition s1.(st_code) map pr a ns ntrue nfalse ->
+   tr_condition s2.(st_code) map pr a ns ntrue nfalse)
+/\(forall map pr al ns nd rl,
+   tr_exprlist s1.(st_code) map pr al ns nd rl ->
+   tr_exprlist s2.(st_code) map pr al ns nd rl).
+Proof.
+  intros s1 s2 EXT. 
+  pose (AT := fun pc i => instr_at_extends s1 s2 pc i EXT).
+  apply tr_expr_condition_exprlist_ind3; intros; econstructor; eauto.
+  eapply tr_move_extends; eauto.
+  eapply tr_move_extends; eauto.
+Qed.
+
+Lemma tr_expr_incr:
+  forall s1 s2, state_incr s1 s2 ->
+  forall map pr a ns nd rd,
+  tr_expr s1.(st_code) map pr a ns nd rd ->
+  tr_expr s2.(st_code) map pr a ns nd rd.
+Proof.
+  intros.
+  exploit tr_expr_condition_exprlist_extends. 
+  apply state_incr_extends; eauto. intros [A [B C]]. eauto.
+Qed.
+
+Lemma tr_condition_incr:
+  forall s1 s2, state_incr s1 s2 ->
+  forall map pr a ns ntrue nfalse,
+  tr_condition s1.(st_code) map pr a ns ntrue nfalse ->
+  tr_condition s2.(st_code) map pr a ns ntrue nfalse.
+Proof.
+  intros.
+  exploit tr_expr_condition_exprlist_extends. 
+  apply state_incr_extends; eauto. intros [A [B C]]. eauto.
+Qed.
+
+Lemma tr_exprlist_incr:
+  forall s1 s2, state_incr s1 s2 ->
+  forall map pr al ns nd rl,
+  tr_exprlist s1.(st_code) map pr al ns nd rl ->
+  tr_exprlist s2.(st_code) map pr al ns nd rl.
+Proof.
+  intros.
+  exploit tr_expr_condition_exprlist_extends. 
+  apply state_incr_extends; eauto. intros [A [B C]]. eauto.
+Qed.
+
+Scheme expr_ind3 := Induction for expr Sort Prop
+  with condexpr_ind3 := Induction for condexpr Sort Prop
+  with exprlist_ind3 := Induction for exprlist Sort Prop.
+
+Definition expr_condexpr_exprlist_ind 
+    (P1: expr -> Prop) (P2: condexpr -> Prop) (P3: exprlist -> Prop) :=
+  fun a b c d e f g h i j k l m n o =>
+  conj (expr_ind3 P1 P2 P3 a b c d e f g h i j k l m n o)
+    (conj (condexpr_ind3 P1 P2 P3 a b c d e f g h i j k l m n o)
+          (exprlist_ind3 P1 P2 P3 a b c d e f g h i j k l m n o)).
+
+Lemma add_move_charact:
+  forall s ns rs nd rd s',
+  add_move rs rd nd s = OK ns s' -> 
+  tr_move s'.(st_code) ns rs nd rd /\ state_incr s s'.
+Proof.
+  intros. unfold add_move in H. destruct (Reg.eq rs rd).
+  inv H. split. constructor. auto with rtlg.
+  split. constructor. eauto with rtlg. eauto with rtlg.
+Qed.
+
+Lemma transl_expr_condexpr_list_charact:
+  (forall a map rd nd s ns s' pr
+     (TR: transl_expr map a rd nd s = OK ns s')
+     (WF: map_valid map s)
+     (OK: target_reg_ok map pr a rd)
+     (VALID: regs_valid pr s)
+     (VALID2: reg_valid rd s),
+   tr_expr s'.(st_code) map pr a ns nd rd
+   /\ state_incr s s')
+/\
+  (forall a map ntrue nfalse s ns s' pr
+     (TR: transl_condition map a ntrue nfalse s = OK ns s')
+     (WF: map_valid map s)
+     (VALID: regs_valid pr s),
+   tr_condition s'.(st_code) map pr a ns ntrue nfalse
+   /\ state_incr s s')
+/\
+  (forall al map rl nd s ns s' pr
+     (TR: transl_exprlist map al rl nd s = OK ns s')
+     (WF: map_valid map s)
+     (OK: target_regs_ok map pr al rl)
+     (VALID1: regs_valid pr s)
+     (VALID2: regs_valid rl s),
+   tr_exprlist s'.(st_code) map pr al ns nd rl
+   /\ state_incr s s').
+Proof.
+  apply expr_condexpr_exprlist_ind; intros; try (monadInv TR).
+  (* Evar *)
+  generalize EQ; unfold find_var. caseEq (map_vars map)!i; intros; inv EQ1.
+  exploit add_move_charact; eauto.
+  intros [A B].
+  split. econstructor; eauto.
+    inv OK. left; congruence. right; eauto.
+  auto.
+  (* Eop *)
+  inv OK. 
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg. intros [A B].
+  split. econstructor; eauto.
+  eapply instr_at_incr; eauto.
+  apply state_incr_trans with s1; eauto with rtlg.
+  (* Eload *)
+  inv OK.
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg. intros [A B].
+  split. econstructor; eauto.
+  eapply instr_at_incr; eauto.
+  apply state_incr_trans with s1; eauto with rtlg.
+  (* Estore *)
+  inv OK.
+  assert (state_incr s s1). eauto with rtlg. 
+  exploit (H0 _ _ _ _ _ _ (x ++ pr) EQ0).
+  eauto with rtlg.
+  apply target_reg_ok_append. constructor; auto. 
+  intros. exploit alloc_regs_fresh_or_in_map; eauto.
+  intros [A|B]. auto. right. apply sym_not_equal.
+  eapply valid_fresh_different; eauto with rtlg. 
+  red; intros. elim (in_app_or _ _ _ H4); intro. 
+  exploit alloc_regs_valid; eauto with rtlg.
+  generalize (VALID _ H5). eauto with rtlg. 
+  eauto with rtlg. 
+  intros [A B].
+  exploit (H _ _ _ _ _ _ pr EQ3); eauto with rtlg. 
+  intros [C D].
+  split. econstructor; eauto.
+  apply tr_expr_incr with s2; eauto with rtlg. 
+  apply instr_at_incr with s1; eauto with rtlg. 
+  eauto with rtlg.
+  (* Ecall *)
+  inv OK.
+  assert (state_incr s0 s3).
+    apply state_incr_trans with s1. eauto with rtlg.
+    apply state_incr_trans with s2; eauto with rtlg.
+  assert (regs_valid (x :: pr) s1).
+    apply regs_valid_cons; eauto with rtlg. 
+  exploit (H0 _ _ _ _ _ _ (x :: pr) EQ2).
+  eauto with rtlg. 
+  apply alloc_regs_target_ok with s1 s2; eauto with rtlg.
+  eauto with rtlg.
+  apply regs_valid_incr with s2; eauto with rtlg.
+  intros [A B].
+  exploit (H _ _ _ _ _ _ pr EQ4).
+  eauto with rtlg.
+  eauto with rtlg.
+  apply regs_valid_incr with s0; eauto with rtlg.
+  apply reg_valid_incr with s1; eauto with rtlg.
+  intros [C D].
+  split. econstructor; eauto.
+  apply tr_exprlist_incr with s4; eauto.
+  apply instr_at_incr with s3; eauto with rtlg. 
+  eauto with rtlg.
+  (* Econdition *)
+  inv OK.
+  exploit (H1 _ _ _ _ _ _ pr EQ); eauto with rtlg.
+  constructor; auto.
+  intros [A B].
+  exploit (H0 _ _ _ _ _ _ pr EQ1); eauto with rtlg.
+  constructor; auto.
+  intros [C D].
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg.
+  intros [E F].
+  split. econstructor; eauto. 
+  apply tr_expr_incr with s1; auto.
+  apply tr_expr_incr with s0; eauto with rtlg.
+  apply state_incr_trans with s0; auto.
+  apply state_incr_trans with s1; auto.
+  (* Elet *)
+  inv OK.
+  exploit (H0 _ _ _ _ _ _ pr EQ1). 
+  apply add_letvar_valid; eauto with rtlg. 
+  constructor; auto. 
+  red; unfold reg_in_map. simpl. intros [[id A] | [B | C]].
+  elim H1. left; exists id; auto.
+  subst x. apply valid_fresh_absurd with rd s. auto. eauto with rtlg.
+  elim H1. right; auto.
+  eauto with rtlg. eauto with rtlg. 
+  intros [A B].
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg. intros [C D].
+  split. econstructor.
+  eapply new_reg_not_in_map; eauto with rtlg. eauto. 
+  apply tr_expr_incr with s1; auto.
+  eauto with rtlg.
+  (* Eletvar *)
+  generalize EQ; unfold find_letvar. caseEq (nth_error (map_letvars map) n); intros; inv EQ0.
+  monadInv EQ1.
+  exploit add_move_charact; eauto.
+  intros [A B].
+  split. econstructor; eauto. 
+    inv OK. left; congruence. right; eauto.
+  auto.
+  monadInv EQ1.
+  (* Ealloc *)
+  inv OK. 
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg.
+  intros [A B].
+  split. econstructor; eauto.
+  eapply instr_at_incr; eauto.
+  apply state_incr_trans with s1; eauto with rtlg.
+
+  (* CEtrue *)
+  split. constructor. auto with rtlg.
+  (* CEfalse *)
+  split. constructor. auto with rtlg.
+  (* CEcond *)
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg.
+  intros [A B].
+  split. econstructor; eauto. 
+  apply instr_at_incr with s1; eauto with rtlg. 
+  eauto with rtlg.
+  (* CEcondition *)
+  exploit (H1 _ _ _ _ _ _ pr EQ); eauto with rtlg.
+  intros [A B].
+  exploit (H0 _ _ _ _ _ _ pr EQ1); eauto with rtlg.
+  intros [C D].
+  exploit (H _ _ _ _ _ _ pr EQ2); eauto with rtlg.
+  intros [E F].
+  split. econstructor; eauto. 
+  apply tr_condition_incr with s1; eauto with rtlg.
+  apply tr_condition_incr with s0; eauto with rtlg.
+  eauto with rtlg.
+
+  (* Enil *)
+  destruct rl; inv TR. split. constructor. eauto with rtlg.
+  (* Econs *)
+  destruct rl; simpl in TR; monadInv TR. inv OK. 
+  exploit H0; eauto.
+  apply regs_valid_cons. apply VALID2. auto with coqlib. auto. 
+  red; intros; apply VALID2; auto with coqlib.
+  intros [A B]. 
+  exploit H; eauto.
+  eauto with rtlg.
+  eauto with rtlg.
+  generalize (VALID2 r (in_eq _ _)). eauto with rtlg.
+  intros [C D].
+  split. econstructor; eauto.
+  apply tr_exprlist_incr with s0; auto.
+  apply state_incr_trans with s0; eauto with rtlg.
+Qed.
+
+Lemma transl_expr_charact:
+  forall a map rd nd s ns s'
+     (TR: transl_expr map a rd nd s = OK ns s')
+     (WF: map_valid map s)
+     (OK: target_reg_ok map nil a rd)
+     (VALID2: reg_valid rd s),
+   tr_expr s'.(st_code) map nil a ns nd rd
+   /\ state_incr s s'.
+Proof.
+  destruct transl_expr_condexpr_list_charact as [A [B C]].
+  intros. eapply A; eauto with rtlg.
+Qed.
+
+Lemma transl_condexpr_charact:
+  forall a map ntrue nfalse s ns s'
+     (TR: transl_condition map a ntrue nfalse s = OK ns s')
+     (WF: map_valid map s),
+   tr_condition s'.(st_code) map nil a ns ntrue nfalse
+   /\ state_incr s s'.
+Proof.
+  destruct transl_expr_condexpr_list_charact as [A [B C]].
+  intros. eapply B; eauto with rtlg.
+Qed.
+
+Lemma tr_switch_extends:
+  forall s1 s2, state_extends s1 s2 ->
+  forall r nexits t ns,
+  tr_switch s1.(st_code) r nexits t ns ->
+  tr_switch s2.(st_code) r nexits t ns.
+Proof.
+  induction 2; econstructor; eauto with rtlg.
+  eapply instr_at_extends; eauto.
+  eapply instr_at_extends; eauto.
+Qed.
+
+Lemma tr_stmt_extends:
+  forall s1 s2, state_extends s1 s2 ->
+  forall map s ns nd nexits nret rret,
+  tr_stmt s1.(st_code) map s ns nd nexits nret rret ->
+  tr_stmt s2.(st_code) map s ns nd nexits nret rret.
+Proof.
+  intros s1 s2 EXT.
+  destruct (tr_expr_condition_exprlist_extends s1 s2 EXT) as [A [B C]].
+  pose (AT := fun pc i => instr_at_extends s1 s2 pc i EXT).
+  induction 1; econstructor; eauto.
+  eapply tr_move_extends; eauto.
+  eapply tr_switch_extends; eauto.
+Qed.
+
+Lemma tr_stmt_incr:
+  forall s1 s2, state_incr s1 s2 ->
+  forall map s ns nd nexits nret rret,
+  tr_stmt s1.(st_code) map s ns nd nexits nret rret ->
+  tr_stmt s2.(st_code) map s ns nd nexits nret rret.
+Proof.
+  intros. eapply tr_stmt_extends; eauto with rtlg.
+Qed.
+
+Lemma transl_exit_charact:
+  forall nexits n s ne s',
+  transl_exit nexits n s = OK ne s' ->
+  nth_error nexits n = Some ne /\ s' = s.
+Proof.
+  intros until s'. unfold transl_exit. 
+  destruct (nth_error nexits n); intro; monadInv H. auto.
+Qed.
+
+Lemma transl_switch_charact:
+  forall r nexits t s ns s',
+  transl_switch r nexits t s = OK ns s' ->
+  tr_switch s'.(st_code) r nexits t ns /\ state_incr s s'.
+Proof.
+  induction t; simpl; intros.
+  exploit transl_exit_charact; eauto. intros [A B].
+  split. econstructor; eauto. subst s'; auto with rtlg.
+
+  monadInv H.
+  exploit transl_exit_charact; eauto. intros [A B]. subst s1.
+  exploit IHt; eauto. intros [C D].
+  split. econstructor; eauto with rtlg. 
+  apply tr_switch_extends with s0; eauto with rtlg.
+  eauto with rtlg.
+
+  monadInv H.
+  exploit IHt2; eauto. intros [A B].
+  exploit IHt1; eauto. intros [C D].
+  split. econstructor.
+  apply tr_switch_extends with s1; eauto with rtlg.
+  apply tr_switch_extends with s0; eauto with rtlg.
+  eauto with rtlg.
+  eauto with rtlg.
+Qed.
+ 
+Lemma transl_stmt_charact:
+  forall map stmt nd nexits nret rret s ns s'
+    (TR: transl_stmt map stmt nd nexits nret rret s = OK ns s')
+    (WF: map_valid map s)
+    (OK: return_reg_ok s map rret),
+  tr_stmt s'.(st_code) map stmt ns nd nexits nret rret
+  /\ state_incr s s'.
+Proof.
+  induction stmt; intros; simpl in TR; try (monadInv TR).
+  (* Sskip *)
+  split. constructor. auto with rtlg.
+  (* Sexpr *)
+  exploit transl_expr_charact; eauto with rtlg. intros [A B].
+  split. econstructor; eauto. eauto with rtlg.
+  (* Sassign *)
+  exploit add_move_charact; eauto. intros [A B].
+  exploit transl_expr_charact; eauto with rtlg. 
+    apply map_valid_incr with s; eauto with rtlg. 
+  intros [C D].
+  generalize EQ. unfold find_var. caseEq (map_vars map)!i; intros; inv EQ2.
+  split. econstructor; eauto.
+  apply tr_move_extends with s2; eauto with rtlg.
+  eauto with rtlg. 
+  (* Sseq *)
+  exploit IHstmt2; eauto with rtlg. intros [A B].
+  exploit IHstmt1; eauto with rtlg. intros [C D].
+  split. econstructor; eauto. apply tr_stmt_incr with s0; eauto with rtlg.
+  eauto with rtlg.
+  (* Sifthenelse *)
+  destruct (more_likely c stmt1 stmt2); monadInv TR.
+  exploit IHstmt2; eauto. intros [A B].
+  exploit IHstmt1; eauto with rtlg. intros [C D].
+  exploit transl_condexpr_charact; eauto with rtlg. intros [E F].
+  split. econstructor; eauto. 
+  apply tr_stmt_incr with s1; eauto with rtlg.
+  apply tr_stmt_incr with s0; eauto with rtlg.
+  eauto with rtlg.
+  exploit IHstmt1; eauto. intros [A B].
+  exploit IHstmt2; eauto with rtlg. intros [C D].
+  exploit transl_condexpr_charact; eauto with rtlg. intros [E F].
+  split. econstructor; eauto. 
+  apply tr_stmt_incr with s0; eauto with rtlg.
+  apply tr_stmt_incr with s1; eauto with rtlg.
+  eauto with rtlg.
+  (* Sloop *)
+  assert (state_incr s s0).
+    eapply reserve_instr_incr; eauto.  
+  exploit IHstmt; eauto with rtlg. intros [A B].
+  split. econstructor. 
+    apply tr_stmt_extends with s1; eauto.
+    eapply update_instr_extends; eauto.
+  unfold update_instr in EQ0.
+  destruct (plt x (st_nextnode s1)); inv EQ0.
+  simpl. apply PTree.gss. 
+  eapply update_instr_incr; eauto.
+  (* Sblock *)
+  exploit IHstmt; eauto. intros [A B].
+  split. econstructor; eauto. auto.
+  (* Sexit *)
+  exploit transl_exit_charact; eauto. intros [A B]. subst s'.
+  split. econstructor; eauto. auto with rtlg.
+  (* Sswitch *)
+  generalize TR; clear TR.
+  set (t := compile_switch n l).
+  caseEq (validate_switch n l t); intro VALID; intros.
+  monadInv TR. 
+  exploit transl_switch_charact; eauto with rtlg. intros [A B].
+  exploit transl_expr_charact; eauto with rtlg. intros [C D].
+  split. econstructor; eauto with rtlg. 
+  apply tr_switch_extends with s1; eauto with rtlg.
+  eauto with rtlg.
+  monadInv TR. 
+  (* Sreturn *)
+  destruct o; destruct rret; inv TR.
+  inv OK.  
+  exploit transl_expr_charact; eauto with rtlg.
+  constructor. auto. simpl; tauto. 
+  intros [A B].
+  split. econstructor; eauto. auto.
+  split. constructor. auto with rtlg.
+  (* Stailcall *)
+  assert (state_incr s0 s2) by eauto with rtlg.
+  destruct transl_expr_condexpr_list_charact as [A [B C]].
+  exploit (C _ _ _ _ _ _ _ (x ::nil) EQ2); eauto with rtlg.
+  apply alloc_regs_target_ok with s1 s2; eauto with rtlg. 
+  apply regs_valid_cons. eauto with rtlg. apply regs_valid_nil.
+  apply regs_valid_cons. apply reg_valid_incr with s1; eauto with rtlg.
+  apply regs_valid_nil.
+  apply regs_valid_incr with s2; eauto with rtlg.
+  intros [D E].
+  exploit (A _ _ _ _ _ _ _ nil EQ4); eauto with rtlg.
+  apply reg_valid_incr with s1; eauto with rtlg. 
+  intros [F G].
+  split. econstructor; eauto. 
+  apply tr_exprlist_incr with s4; eauto.
+  apply instr_at_incr with s3; eauto with rtlg.
+  eauto with rtlg.  
+Qed.
+
+Lemma transl_function_charact:
+  forall f tf,
+  transl_function f = Errors.OK tf ->
+  tr_function f tf.
+Proof.
+  intros until tf. unfold transl_function. 
+  caseEq (transl_fun f init_state). congruence. 
+  intros [nentry rparams] sfinal TR E. inv E. 
+  monadInv TR.
+  exploit add_vars_valid. eexact EQ. apply init_mapping_valid.
+  intros [A B]. 
+  exploit add_vars_valid. eexact EQ1. auto. 
+  intros [C D].
+  exploit transl_stmt_charact; eauto with rtlg. 
+  unfold ret_reg. destruct (sig_res (CminorSel.fn_sig f)).
+  constructor. eauto with rtlg. eauto with rtlg.
+  constructor.
+  intros [E F].
+  eapply tr_function_intro; eauto with rtlg.
+  apply instr_at_incr with s2; eauto with rtlg. 
+Qed.
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 97d063ac..40567491 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -1,6 +1,7 @@
 (** Typing rules and a type inference algorithm for RTL. *)
 
 Require Import Coqlib.
+Require Import Errors.
 Require Import Maps.
 Require Import AST.
 Require Import Op.
@@ -22,15 +23,17 @@ Require Conventions.
   enabling each pseudo-register to be mapped to a single hardware register
   or stack location of the correct type.
 
-
-  The typing judgement for instructions is of the form [wt_instr f env instr],
-  where [f] is the current function (used to type-check [Ireturn] 
-  instructions) and [env] is a typing environment associating types to
-  pseudo-registers.  Since pseudo-registers have unique types throughout
-  the function, the typing environment does not change during type-checking
-  of individual instructions.  One point to note is that we have two
-  polymorphic operators, [Omove] and [Oundef], which can work both
-  over integers and floats.
+  Finally, we also check that the successors of instructions
+  are valid, i.e. refer to non-empty nodes in the CFG.
+
+  The typing judgement for instructions is of the form [wt_instr f env
+  instr], where [f] is the current function (used to type-check
+  [Ireturn] instructions) and [env] is a typing environment
+  associating types to pseudo-registers.  Since pseudo-registers have
+  unique types throughout the function, the typing environment does
+  not change during type-checking of individual instructions.  One
+  point to note is that we have one polymorphic operator, [Omove],
+  which can work over both integers and floats.
 *)
 
 Definition regenv := reg -> typ.
@@ -38,51 +41,67 @@ Definition regenv := reg -> typ.
 Section WT_INSTR.
 
 Variable env: regenv.
-Variable funsig: signature.
+Variable funct: function.
+
+Definition valid_successor (s: node) : Prop :=
+  exists i, funct.(fn_code)!s = Some i.
 
 Inductive wt_instr : instruction -> Prop :=
   | wt_Inop:
       forall s,
+      valid_successor s ->
       wt_instr (Inop s)
   | wt_Iopmove:
       forall r1 r s,
       env r1 = env r ->
+      valid_successor s ->
       wt_instr (Iop Omove (r1 :: nil) r s)
-  | wt_Iopundef:
-      forall r s,
-      wt_instr (Iop Oundef nil r s)
   | wt_Iop:
       forall op args res s,
-      op <> Omove -> op <> Oundef ->
+      op <> Omove ->
       (List.map env args, env res) = type_of_operation op ->
+      valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
       forall chunk addr args dst s,
       List.map env args = type_of_addressing addr ->
       env dst = type_of_chunk chunk ->
+      valid_successor s ->
       wt_instr (Iload chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
       List.map env args = type_of_addressing addr ->
       env src = type_of_chunk chunk ->
+      valid_successor s ->
       wt_instr (Istore chunk addr args src s)
   | wt_Icall:
       forall sig ros args res s,
       match ros with inl r => env r = Tint | inr s => True end ->
       List.map env args = sig.(sig_args) ->
-      env res = match sig.(sig_res) with None => Tint | Some ty => ty end ->
+      env res = proj_sig_res sig ->
+      valid_successor s ->
       wt_instr (Icall sig ros args res s)
+  | wt_Itailcall:
+      forall sig ros args,
+      match ros with inl r => env r = Tint | inr s => True end ->
+      sig.(sig_res) = funct.(fn_sig).(sig_res) ->
+      List.map env args = sig.(sig_args) ->
+      Conventions.tailcall_possible sig ->
+      wt_instr (Itailcall sig ros args)
   | wt_Ialloc:
       forall arg res s,
       env arg = Tint -> env res = Tint ->
+      valid_successor s ->
       wt_instr (Ialloc arg res s)
   | wt_Icond:
       forall cond args s1 s2,
       List.map env args = type_of_condition cond ->
+      valid_successor s1 ->
+      valid_successor s2 ->
       wt_instr (Icond cond args s1 s2)
   | wt_Ireturn: 
       forall optres,
-      option_map env optres = funsig.(sig_res) ->
+      option_map env optres = funct.(fn_sig).(sig_res) ->
       wt_instr (Ireturn optres).
 
 End WT_INSTR.
@@ -90,7 +109,7 @@ End WT_INSTR.
 (** A function [f] is well-typed w.r.t. a typing environment [env],
    written [wt_function env f], if all instructions are well-typed,
    parameters agree in types with the function signature, and
-   names of parameters are pairwise distinct. *)
+   parameters are pairwise distinct. *)
 
 Record wt_function (f: function) (env: regenv): Prop :=
   mk_wt_function {
@@ -100,7 +119,9 @@ Record wt_function (f: function) (env: regenv): Prop :=
       list_norepet f.(fn_params);
     wt_instrs:
       forall pc instr, 
-      f.(fn_code)!pc = Some instr -> wt_instr env f.(fn_sig) instr
+      f.(fn_code)!pc = Some instr -> wt_instr env f instr;
+    wt_entrypoint:
+      valid_successor f f.(fn_entrypoint)
 }.
 
 Inductive wt_fundef: fundef -> Prop :=
@@ -142,6 +163,9 @@ Variable env: regenv.
 Lemma typ_eq: forall (t1 t2: typ), {t1=t2} + {t1<>t2}.
 Proof. decide equality. Qed.
 
+Lemma opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}.
+Proof. decide equality. apply typ_eq. Qed.
+
 Definition check_reg (r: reg) (ty: typ): bool :=
   if typ_eq (env r) ty then true else false.
 
@@ -156,34 +180,47 @@ Definition check_op (op: operation) (args: list reg) (res: reg): bool :=
   let (targs, tres) := type_of_operation op in
   check_regs args targs && check_reg res tres.
 
+Definition check_successor (s: node) : bool :=
+  match funct.(fn_code)!s with None => false | Some i => true end.
+
 Definition check_instr (i: instruction) : bool :=
   match i with
-  | Inop _ =>
-      true
-  | Iop Omove (arg::nil) res _ =>
-      if typ_eq (env arg) (env res) then true else false
-  | Iop Omove args res _ =>
-      false
-  | Iop Oundef nil res _ =>
-      true
-  | Iop Oundef args res _ =>
+  | Inop s =>
+      check_successor s
+  | Iop Omove (arg::nil) res s =>
+      if typ_eq (env arg) (env res) 
+      then check_successor s
+      else false
+  | Iop Omove args res s =>
       false
-  | Iop op args res _ =>
-      check_op op args res
-  | Iload chunk addr args dst _ =>
+  | Iop op args res s =>
+      check_op op args res && check_successor s
+  | Iload chunk addr args dst s =>
       check_regs args (type_of_addressing addr)
       && check_reg dst (type_of_chunk chunk)
-  | Istore chunk addr args src _ =>
+      && check_successor s
+  | Istore chunk addr args src s =>
       check_regs args (type_of_addressing addr)
       && check_reg src (type_of_chunk chunk)
-  | Icall sig ros args res _ =>
+      && check_successor s
+  | Icall sig ros args res s =>
+      match ros with inl r => check_reg r Tint | inr s => true end
+      && check_regs args sig.(sig_args)
+      && check_reg res (proj_sig_res sig)
+      && check_successor s
+  | Itailcall sig ros args =>
       match ros with inl r => check_reg r Tint | inr s => true end
       && check_regs args sig.(sig_args)
-      && check_reg res (match sig.(sig_res) with None => Tint | Some ty => ty end)
-  | Ialloc arg res _ =>
-      check_reg arg Tint && check_reg res Tint
-  | Icond cond args _ _ =>
+      && opt_typ_eq sig.(sig_res) funct.(fn_sig).(sig_res)
+      && zeq (Conventions.size_arguments sig) 14
+  | Ialloc arg res s =>
+      check_reg arg Tint 
+      && check_reg res Tint
+      && check_successor s
+  | Icond cond args s1 s2 =>
       check_regs args (type_of_condition cond)
+      && check_successor s1
+      && check_successor s2
   | Ireturn optres =>
       match optres, funct.(fn_sig).(sig_res) with
       | None, None => true
@@ -203,6 +240,14 @@ Fixpoint check_instrs (instrs: list (node * instruction)) : bool :=
 
 (** ** Correctness of the type-checking algorithm *)
 
+Ltac elimAndb :=
+  match goal with
+  | [ H: _ && _ = true |- _ ] =>
+      elim (andb_prop _ _ H); clear H; intros; elimAndb
+  | _ =>
+      idtac
+  end.
+
 Lemma check_reg_correct:
   forall r ty, check_reg r ty = true -> env r = ty.
 Proof.
@@ -215,8 +260,8 @@ Lemma check_regs_correct:
 Proof.
   induction rl; destruct tyl; simpl; intros.
   auto. discriminate. discriminate.
-  elim (andb_prop _ _ H); intros. 
-  rewrite (check_reg_correct _ _ H0). rewrite (IHrl tyl H1). auto.
+  elimAndb.
+  rewrite (check_reg_correct _ _ H). rewrite (IHrl tyl H0). auto.
 Qed.
 
 Lemma check_op_correct:
@@ -225,45 +270,65 @@ Lemma check_op_correct:
   (List.map env args, env res) = type_of_operation op.
 Proof.
   unfold check_op; intros.
-  destruct (type_of_operation op) as [targs tres]. 
-  elim (andb_prop _ _ H); intros.
-  rewrite (check_regs_correct _ _ H0).
-  rewrite (check_reg_correct _ _ H1).
+  destruct (type_of_operation op) as [targs tres].
+  elimAndb. 
+  rewrite (check_regs_correct _ _ H).
+  rewrite (check_reg_correct _ _ H0).
   auto.
 Qed.
 
+Lemma check_successor_correct:
+  forall s,
+  check_successor s = true -> valid_successor funct s.
+Proof.
+  intro; unfold check_successor, valid_successor.
+  destruct (fn_code funct)!s; intro.
+  exists i; auto.
+  discriminate.
+Qed.
+
 Lemma check_instr_correct:
-  forall i, check_instr i = true -> wt_instr env funct.(fn_sig) i.
+  forall i, check_instr i = true -> wt_instr env funct i.
 Proof.
-  unfold check_instr; intros; destruct i.
+  unfold check_instr; intros; destruct i; elimAndb.
   (* nop *)
-  constructor.
+  constructor. apply check_successor_correct; auto.
   (* op *)
-  destruct o;
-  try (apply wt_Iop; [congruence|congruence|apply check_op_correct;auto]).
+  destruct o; elimAndb;
+  try (apply wt_Iop; [ congruence
+                     | apply check_op_correct; auto
+                     | apply check_successor_correct; auto ]).
   destruct l; try discriminate. destruct l; try discriminate.
   destruct (typ_eq (env r0) (env r)); try discriminate.
-  apply wt_Iopmove; auto.
-  destruct l; try discriminate.
-  apply wt_Iopundef.
+  apply wt_Iopmove; auto. apply check_successor_correct; auto.
   (* load *)
-  elim (andb_prop _ _ H); intros.
   constructor. apply check_regs_correct; auto. apply check_reg_correct; auto.
+  apply check_successor_correct; auto.
   (* store *)
-  elim (andb_prop _ _ H); intros.
   constructor. apply check_regs_correct; auto. apply check_reg_correct; auto.
+  apply check_successor_correct; auto.
   (* call *)
-  elim (andb_prop _ _ H); clear H; intros.
-  elim (andb_prop _ _ H); clear H; intros.
   constructor.
   destruct s0; auto. apply check_reg_correct; auto.
   apply check_regs_correct; auto.
   apply check_reg_correct; auto.
+  apply check_successor_correct; auto.
+  (* tailcall *)
+  constructor.
+  destruct s0; auto. apply check_reg_correct; auto.
+  eapply proj_sumbool_true; eauto.
+  apply check_regs_correct; auto.
+  rewrite Conventions.tailcall_possible_size. 
+  eapply proj_sumbool_true; eauto.
   (* alloc *)
-  elim (andb_prop _ _ H); intros.
-  constructor; apply check_reg_correct; auto.
+  constructor. 
+  apply check_reg_correct; auto.
+  apply check_reg_correct; auto.
+  apply check_successor_correct; auto.
   (* cond *)
   constructor. apply check_regs_correct; auto.
+  apply check_successor_correct; auto.
+  apply check_successor_correct; auto.
   (* return *)
   constructor. 
   destruct o; simpl; destruct funct.(fn_sig).(sig_res); try discriminate.
@@ -274,11 +339,11 @@ Qed.
 Lemma check_instrs_correct:
   forall instrs,
   check_instrs instrs = true ->
-  forall pc i, In (pc, i) instrs -> wt_instr env funct.(fn_sig) i.
+  forall pc i, In (pc, i) instrs -> wt_instr env funct i.
 Proof.
   induction instrs; simpl; intros.
   elim H0.
-  destruct a as [pc' i']. elim (andb_prop _ _ H); clear H; intros.
+  destruct a as [pc' i']. elimAndb. 
   elim H0; intro.
   inversion H2; subst pc' i'. apply check_instr_correct; auto.
   eauto.
@@ -288,20 +353,24 @@ End TYPECHECKING.
 
 (** ** The type inference function **)
 
-Definition type_function (f: function): option regenv :=
+Open Scope string_scope.
+
+Definition type_function (f: function): res regenv :=
   let instrs := PTree.elements f.(fn_code) in
   match infer_type_environment f instrs with
-  | None => None
+  | None => Error (msg "RTL type inference error")
   | Some env =>
       if check_regs env f.(fn_params) f.(fn_sig).(sig_args)
       && check_params_norepet f.(fn_params)
       && check_instrs f env instrs
-      then Some env else None
+      && check_successor f f.(fn_entrypoint)
+      then OK env
+      else Error (msg "RTL type checking error")
   end.
 
 Lemma type_function_correct:
   forall f env,
-  type_function f = Some env ->
+  type_function f = OK env ->
   wt_function f env.
 Proof.
   unfold type_function; intros until env.
@@ -311,6 +380,7 @@ Proof.
   caseEq (check_regs env' f.(fn_params) f.(fn_sig).(sig_args)); intro; simpl; try congruence.
   caseEq (check_params_norepet f.(fn_params)); intro; simpl; try congruence.
   caseEq (check_instrs f env' instrs); intro; simpl; try congruence.
+  caseEq (check_successor f (fn_entrypoint f)); intro; simpl; try congruence.
   intro EQ; inversion EQ; subst env'.
   constructor. 
   apply check_regs_correct; auto.
@@ -318,6 +388,7 @@ Proof.
   destruct (list_norepet_dec Reg.eq (fn_params f)). auto. discriminate.
   intros. eapply check_instrs_correct. eauto. 
   unfold instrs. apply PTree.elements_correct. eauto.
+  apply check_successor_correct. auto.
   congruence.
 Qed.
 
@@ -330,7 +401,8 @@ Qed.
   property: if the execution does not fail because of a failed run-time
   test, the result values and register states match the static
   typing assumptions.  This preservation property will be useful
-  later for the proof of semantic equivalence between [Machabstr] and [Mach].
+  later for the proof of semantic equivalence between
+  [Machabstr] and [Machconcr].
   Even though we do not need it for [RTL], we show preservation for [RTL]
   here, as a warm-up exercise and because some of the lemmas will be
   useful later. *)
@@ -340,6 +412,7 @@ Require Import Values.
 Require Import Mem.
 Require Import Integers.
 Require Import Events.
+Require Import Smallstep.
 
 Definition wt_regset (env: regenv) (rs: regset) : Prop :=
   forall r, Val.has_type (rs#r) (env r).
@@ -385,6 +458,36 @@ Proof.
   induction 1. inversion H0; exact I.
 Qed.
 
+Inductive wt_stackframes: list stackframe -> option typ -> Prop :=
+  | wt_stackframes_nil:
+      wt_stackframes nil (Some Tint)
+  | wt_stackframes_cons:
+      forall s res f sp pc rs env tyres,
+      wt_function f env ->
+      wt_regset env rs ->
+      env res = match tyres with None => Tint | Some t => t end ->
+      wt_stackframes s (sig_res (fn_sig f)) ->
+      wt_stackframes (Stackframe res (fn_code f) sp pc rs :: s) tyres.
+
+Inductive wt_state: state -> Prop :=
+  | wt_state_intro:
+      forall s f sp pc rs m env
+        (WT_STK: wt_stackframes s (sig_res (fn_sig f)))
+        (WT_FN: wt_function f env)
+        (WT_RS: wt_regset env rs),
+      wt_state (State s (fn_code f) sp pc rs m)
+  | wt_state_call:
+      forall s f args m,
+      wt_stackframes s (sig_res (funsig f)) ->
+      wt_fundef f ->
+      Val.has_type_list args (sig_args (funsig f)) ->
+      wt_state (Callstate s f args m)
+  | wt_state_return:
+      forall s v m tyres,
+      wt_stackframes s tyres ->
+      Val.has_type v (match tyres with None => Tint | Some t => t end) ->
+      wt_state (Returnstate s v m).
+
 Section SUBJECT_REDUCTION.
 
 Variable p: program.
@@ -393,90 +496,77 @@ Hypothesis wt_p: wt_program p.
 
 Let ge := Genv.globalenv p.
 
-Definition exec_instr_subject_reduction
-    (c: code) (sp: val)
-    (pc: node) (rs: regset) (m: mem) (t: trace)
-    (pc': node) (rs': regset) (m': mem) : Prop :=
-  forall env f
-         (CODE: c = fn_code f)
-         (WT_FN: wt_function f env)
-         (WT_RS: wt_regset env rs),
-  wt_regset env rs'.
-
-Definition exec_function_subject_reduction
-    (f: fundef) (args: list val) (m: mem) (t: trace) (res: val) (m': mem) : Prop :=
-  wt_fundef f ->  
-  Val.has_type_list args (sig_args (funsig f)) ->
-  Val.has_type res (proj_sig_res (funsig f)).
-
 Lemma subject_reduction:
-  forall f args m t res m',
-  exec_function ge f args m t res m' ->
-  exec_function_subject_reduction f args m t res m'.
+  forall st1 t st2, step ge st1 t st2 ->
+  forall (WT: wt_state st1), wt_state st2.
 Proof.
-  apply (exec_function_ind_3 ge
-           exec_instr_subject_reduction
-           exec_instr_subject_reduction
-           exec_function_subject_reduction);
-  intros; red; intros;
-  try (rewrite CODE in H;
-       generalize (wt_instrs _ _ WT_FN pc _ H);
+  induction 1; intros; inv WT;
+  try (generalize (wt_instrs _ _ WT_FN pc _ H);
        intro WT_INSTR;
-       inversion WT_INSTR).
- 
-  assumption.
-
+       inv WT_INSTR).
+  (* Inop *)
+  econstructor; eauto.
+  (* Iop *)
+  econstructor; eauto.
   apply wt_regset_assign. auto. 
-  subst op. subst args. simpl in H0. injection H0; intro. 
-  subst v. rewrite <- H2. apply WT_RS.
-
-  apply wt_regset_assign. auto.
-  subst op; subst args; simpl in H0. injection H0; intro; subst v.
-  simpl; auto.
-
+  simpl in H0. inv H0. rewrite <- H3. apply WT_RS.
+  econstructor; eauto.
   apply wt_regset_assign. auto.
   replace (env res) with (snd (type_of_operation op)).
-  apply type_of_operation_sound with fundef ge rs##args sp; auto.
-  rewrite <- H7. reflexivity.
-
+  apply type_of_operation_sound with fundef ge rs##args sp m; auto.
+  rewrite <- H6. reflexivity.
+  (* Iload *)
+  econstructor; eauto.
   apply wt_regset_assign. auto. rewrite H8. 
   eapply type_of_chunk_correct; eauto.
-
-  assumption.
-
-  apply wt_regset_assign. auto. rewrite H11. rewrite <- H1.
+  (* Istore *)
+  econstructor; eauto.
+  (* Icall *)
   assert (wt_fundef f).
     destruct ros; simpl in H0.
     pattern f. apply Genv.find_funct_prop with fundef unit p (rs#r).
     exact wt_p. exact H0. 
-    caseEq (Genv.find_symbol ge i); intros; rewrite H12 in H0.
+    caseEq (Genv.find_symbol ge i); intros; rewrite H1 in H0.
     pattern f. apply Genv.find_funct_ptr_prop with fundef unit p b.
     exact wt_p. exact H0.
     discriminate.
-  eapply H3. auto. rewrite H1. rewrite <- H10.
-  apply wt_regset_list; auto. 
-
-  apply wt_regset_assign. auto. rewrite H6; exact I. 
-
-  assumption.
-  assumption.
-  assumption.
-  eauto.
-  eauto.
-
-  inversion H4; subst f0.
-  assert (WT_INIT: wt_regset env (init_regs args (fn_params f))).
-    apply wt_init_regs. rewrite (wt_params _ _ H7). assumption.
-  generalize (H1 env f (refl_equal (fn_code f)) H7 WT_INIT). 
-  intro WT_RS.
-  generalize (wt_instrs _ _ H7 pc _ H2).
-  intro WT_INSTR; inversion WT_INSTR.
-  unfold proj_sig_res; simpl.
-  destruct or; simpl in H3; simpl in H8; rewrite <- H8.
-  rewrite H3. apply WT_RS. 
-  rewrite H3. simpl; auto.
-
-  simpl. eapply wt_event_match; eauto.
-Qed. 
+  econstructor; eauto.
+  econstructor; eauto.
+  rewrite <- H7. apply wt_regset_list. auto.
+  (* Itailcall *)
+  assert (wt_fundef f).
+    destruct ros; simpl in H0.
+    pattern f. apply Genv.find_funct_prop with fundef unit p (rs#r).
+    exact wt_p. exact H0. 
+    caseEq (Genv.find_symbol ge i); intros; rewrite H1 in H0.
+    pattern f. apply Genv.find_funct_ptr_prop with fundef unit p b.
+    exact wt_p. exact H0.
+    discriminate.
+  econstructor; eauto.
+  rewrite H5; auto.
+  rewrite <- H6. apply wt_regset_list. auto.
+  (* Ialloc *)  
+  econstructor; eauto.
+  apply wt_regset_assign. auto. rewrite H6; exact I.
+  (* Icond *)
+  econstructor; eauto.
+  econstructor; eauto.
+  (* Ireturn *)
+  econstructor; eauto. 
+  destruct or; simpl in *.
+  rewrite <- H1. apply WT_RS. exact I.
+  (* internal function *)
+  simpl in *. inv H5. inversion H1; subst.  
+  econstructor; eauto.
+  apply wt_init_regs; auto. rewrite wt_params0; auto.
+  (* external function *)
+  simpl in *. inv H5. 
+  econstructor; eauto. 
+  change (Val.has_type res (proj_sig_res (ef_sig ef))).
+  eapply wt_event_match; eauto.
+  (* return *)
+  inv H1. econstructor; eauto. 
+  apply wt_regset_assign; auto. congruence. 
+Qed.
 
 End SUBJECT_REDUCTION.
diff --git a/backend/Registers.v b/backend/Registers.v
index 578e4b87..e4b7b000 100644
--- a/backend/Registers.v
+++ b/backend/Registers.v
@@ -1,8 +1,9 @@
 (** Pseudo-registers and register states.
 
-  This library defines the type of pseudo-registers used in the RTL
-  intermediate language, and of mappings from pseudo-registers to
-  values as used in the dynamic semantics of RTL. *)
+  This library defines the type of pseudo-registers (also known as
+  "temporaries" in compiler literature) used in the RTL
+  intermediate language.  We also define finite sets and finite maps
+  over pseudo-registers. *)
 
 Require Import Coqlib.
 Require Import AST.
diff --git a/backend/Reload.v b/backend/Reload.v
new file mode 100644
index 00000000..58e17ff5
--- /dev/null
+++ b/backend/Reload.v
@@ -0,0 +1,211 @@
+(** Reloading, spilling, and explication of calling conventions. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Globalenvs.
+Require Import Op.
+Require Import Locations.
+Require Import LTLin.
+Require Import Conventions.
+Require Import Parallelmove.
+Require Import Linear.
+
+(** * Spilling and reloading *)
+
+(** Operations in the Linear language, like those of the target processor,
+  operate only over machine registers, but not over stack slots.
+  Consider the LTLin instruction
+<<
+        r1 <- Lop(Oadd, r1 :: r2 :: nil)
+>>
+  and assume that [r1] and [r2] are assigned to stack locations [S s1]
+  and [S s2], respectively.  The translated LTL code must load these
+  stack locations into temporary integer registers (this is called
+  ``reloading''), perform the [Oadd] operation over these temporaries,
+  leave the result in a temporary, then store the temporary back to
+  stack location [S s1] (this is called ``spilling'').  In other term,
+  the generated Linear code has the following shape:
+<<
+        IT1 <- Lgetstack s1;
+        IT2 <- Lgetstack s2;
+        IT1 <- Lop(Oadd, IT1 :: IT2 :: nil);
+        Lsetstack s1 IT1;
+>>
+  This section provides functions that assist in choosing appropriate
+  temporaries and inserting the required spilling and reloading
+  operations. *)
+
+(** ** Allocation of temporary registers for reloading and spilling. *)
+
+(** [reg_for l] returns a machine register appropriate for working
+    over the location [l]: either the machine register [m] if [l = R m],
+    or a temporary register of [l]'s type if [l] is a stack slot. *)
+
+Definition reg_for (l: loc) : mreg :=
+  match l with
+  | R r => r
+  | S s => match slot_type s with Tint => IT1 | Tfloat => FT1 end
+  end.
+
+(** [regs_for ll] is similar, for a list of locations [ll] of length
+    at most 3.  We ensure that distinct temporaries are used for
+    different elements of [ll]. *)
+
+Fixpoint regs_for_rec (locs: list loc) (itmps ftmps: list mreg)
+                      {struct locs} : list mreg :=
+  match locs, itmps, ftmps with
+  | l1 :: ls, it1 :: its, ft1 :: fts =>
+      match l1 with
+      | R r => r
+      | S s => match slot_type s with Tint => it1 | Tfloat => ft1 end
+      end
+      :: regs_for_rec ls its fts
+  | _, _, _ => nil
+  end.
+
+Definition regs_for (locs: list loc) :=
+  regs_for_rec locs (IT1 :: IT2 :: IT3 :: nil) (FT1 :: FT2 :: FT3 :: nil).
+
+(** ** Insertion of Linear reloads, stores and moves *)
+
+(** [add_spill src dst k] prepends to [k] the instructions needed
+  to assign location [dst] the value of machine register [mreg]. *)
+
+Definition add_spill (src: mreg) (dst: loc) (k: code) :=
+  match dst with
+  | R rd => if mreg_eq src rd then k else Lop Omove (src :: nil) rd :: k
+  | S sl => Lsetstack src sl :: k
+  end.
+
+(** [add_reload src dst k] prepends to [k] the instructions needed
+  to assign machine register [mreg] the value of the location [src]. *)
+
+Definition add_reload (src: loc) (dst: mreg) (k: code) :=
+  match src with
+  | R rs => if mreg_eq rs dst then k else Lop Omove (rs :: nil) dst :: k
+  | S sl => Lgetstack sl dst :: k
+  end.
+
+(** [add_reloads] is similar for a list of locations (as sources)
+  and a list of machine registers (as destinations).  *)
+
+Fixpoint add_reloads (srcs: list loc) (dsts: list mreg) (k: code)
+                                            {struct srcs} : code :=
+  match srcs, dsts with
+  | s1 :: sl, t1 :: tl => add_reload s1 t1 (add_reloads sl tl k)
+  | _, _ => k
+  end.
+
+(** [add_move src dst k] prepends to [k] the instructions that copy
+  the value of location [src] into location [dst]. *)
+
+Definition add_move (src dst: loc) (k: code) :=
+  if Loc.eq src dst then k else
+    match src, dst with
+    | R rs, _ =>
+        add_spill rs dst k
+    | _, R rd =>
+        add_reload src rd k
+    | S ss, S sd =>
+        let tmp :=
+          match slot_type ss with Tint => IT1 | Tfloat => FT1 end in
+        add_reload src tmp (add_spill tmp dst k)
+    end.
+
+(** [parallel_move srcs dsts k] is similar, but for a list of
+  source locations and a list of destination locations of the same
+  length.  This is a parallel move, meaning that some of the 
+  destinations can also occur as sources. *)
+
+Definition parallel_move (srcs dsts: list loc) (k: code) : code :=
+  List.fold_right
+    (fun p k => add_move (fst p) (snd p) k)
+     k (parmove srcs dsts).
+
+(** * Code transformation *)
+
+(** We insert appropriate reload, spill and parallel move operations
+  around each of the instructions of the source code. *)
+
+Definition transf_instr 
+     (f: LTLin.function) (instr: LTLin.instruction) (k: code) : code :=
+  match instr with
+  | LTLin.Lop op args res =>
+      match is_move_operation op args with
+      | Some src =>
+          add_move src res k
+      | None =>
+          let rargs := regs_for args in
+          let rres := reg_for res in
+          add_reloads args rargs (Lop op rargs rres :: add_spill rres res k)
+      end
+  | LTLin.Lload chunk addr args dst =>
+      let rargs := regs_for args in
+      let rdst := reg_for dst in
+      add_reloads args rargs
+        (Lload chunk addr rargs rdst :: add_spill rdst dst k)
+  | LTLin.Lstore chunk addr args src =>
+      match regs_for (src :: args) with
+      | nil => nil                       (* never happens *)
+      | rsrc :: rargs =>
+          add_reloads (src :: args) (rsrc :: rargs)
+            (Lstore chunk addr rargs rsrc :: k)
+      end
+  | LTLin.Lcall sig los args res =>
+      let largs := loc_arguments sig in
+      let rres  := loc_result sig in
+      match los with
+      | inl fn =>
+          add_reload fn IT3
+            (parallel_move args largs
+              (Lcall sig (inl _ IT3) :: add_spill rres res k))
+      | inr id =>
+          parallel_move args largs
+            (Lcall sig (inr _ id) :: add_spill rres res k)
+      end
+  | LTLin.Ltailcall sig los args =>
+      let largs := loc_arguments sig in
+      match los with
+      | inl fn =>
+          add_reload fn IT3
+            (parallel_move args largs
+              (Ltailcall sig (inl _ IT3) :: k))
+      | inr id =>
+          parallel_move args largs
+            (Ltailcall sig (inr _ id) :: k)
+      end
+  | LTLin.Lalloc arg res =>
+      add_reload arg loc_alloc_argument
+        (Lalloc :: add_spill loc_alloc_result res k)
+  | LTLin.Llabel lbl =>
+      Llabel lbl :: k
+  | LTLin.Lgoto lbl =>
+      Lgoto lbl :: k
+  | LTLin.Lcond cond args lbl =>
+      let rargs := regs_for args in
+      add_reloads args rargs (Lcond cond rargs lbl :: k)
+  | LTLin.Lreturn None =>
+      Lreturn :: k
+  | LTLin.Lreturn (Some loc) =>
+      add_reload loc (loc_result (LTLin.fn_sig f)) (Lreturn :: k)
+  end.
+
+Definition transf_code (f: LTLin.function) (c: LTLin.code) : code :=
+  List.fold_right (transf_instr f) nil c.
+
+Definition transf_function (f: LTLin.function) : function :=
+  mkfunction
+    (LTLin.fn_sig f)
+    (LTLin.fn_stacksize f)
+    (parallel_move (loc_parameters (LTLin.fn_sig f)) (LTLin.fn_params f)
+       (transf_code f (LTLin.fn_code f))).
+
+Definition transf_fundef (fd: LTLin.fundef) : Linear.fundef :=
+  transf_fundef transf_function fd.
+
+Definition transf_program (p: LTLin.program) : Linear.program :=
+  transform_program transf_fundef p.
+
diff --git a/backend/Reloadproof.v b/backend/Reloadproof.v
new file mode 100644
index 00000000..e9ced51f
--- /dev/null
+++ b/backend/Reloadproof.v
@@ -0,0 +1,1230 @@
+(** Correctness proof for the [Reload] pass. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Mem.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Op.
+Require Import Locations.
+Require Import Conventions.
+Require Import Allocproof.
+Require Import LTLin.
+Require Import LTLintyping.
+Require Import Linear.
+Require Import Parallelmove.
+Require Import Reload.
+
+(** * Exploitation of the typing hypothesis *)
+
+(** Reloading is applied to LTLin code that is well-typed in
+  the sense of [LTLintyping]. We exploit this hypothesis to obtain information on
+  the number of arguments to operations, addressing modes and conditions. *)
+
+Remark length_type_of_condition:
+  forall (c: condition), (List.length (type_of_condition c) <= 3)%nat.
+Proof.
+  destruct c; unfold type_of_condition; simpl; omega.
+Qed.
+
+Remark length_type_of_operation:
+  forall (op: operation), (List.length (fst (type_of_operation op)) <= 3)%nat.
+Proof.
+  destruct op; unfold type_of_operation; simpl; try omega.
+  apply length_type_of_condition.
+Qed.
+
+Remark length_type_of_addressing:
+  forall (addr: addressing), (List.length (type_of_addressing addr) <= 2)%nat.
+Proof.
+  destruct addr; unfold type_of_addressing; simpl; omega.
+Qed.
+
+Lemma length_op_args:
+  forall (op: operation) (args: list loc) (res: loc),
+  (List.map Loc.type args, Loc.type res) = type_of_operation op ->
+  (List.length args <= 3)%nat.
+Proof.
+  intros. rewrite <- (list_length_map Loc.type). 
+  generalize (length_type_of_operation op).
+  rewrite <- H. simpl. auto.
+Qed.
+
+Lemma length_addr_args:
+  forall (addr: addressing) (args: list loc),
+  List.map Loc.type args = type_of_addressing addr ->
+  (List.length args <= 2)%nat.
+Proof.
+  intros. rewrite <- (list_length_map Loc.type). 
+  rewrite H. apply length_type_of_addressing.
+Qed.
+
+Lemma length_cond_args:
+  forall (cond: condition) (args: list loc),
+  List.map Loc.type args = type_of_condition cond ->
+  (List.length args <= 3)%nat.
+Proof.
+  intros. rewrite <- (list_length_map Loc.type). 
+  rewrite H. apply length_type_of_condition.
+Qed.
+
+(** * Correctness of the Linear constructors *)
+
+(** This section proves theorems that establish the correctness of the
+  Linear constructor functions such as [add_move].  The theorems are of
+  the general form ``the generated Linear instructions execute and
+  modify the location set in the expected way: the result location(s)
+  contain the expected values; other, non-temporary locations keep
+  their values''. *)
+
+Section LINEAR_CONSTRUCTORS.
+
+Variable ge: genv.
+Variable stk: list stackframe.
+Variable f: function.
+Variable sp: val.
+
+Lemma reg_for_spec:
+  forall l,
+  R(reg_for l) = l \/ In (R (reg_for l)) temporaries.
+Proof.
+  intros. unfold reg_for. destruct l. tauto.
+  case (slot_type s); simpl; tauto.
+Qed.
+
+Lemma reg_for_diff:
+  forall l l',
+  Loc.diff l l' -> Loc.notin l' temporaries ->
+  Loc.diff (R (reg_for l)) l'.
+Proof.
+  intros. destruct (reg_for_spec l). 
+  rewrite H1; auto.
+  apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto.
+Qed.
+
+Lemma add_reload_correct:
+  forall src dst k rs m,
+  exists rs',
+  star step ge (State stk f sp (add_reload src dst k) rs m)
+            E0 (State stk f sp k rs' m) /\
+  rs' (R dst) = rs src /\
+  forall l, Loc.diff (R dst) l -> rs' l = rs l.
+Proof.
+  intros. unfold add_reload. destruct src.
+  case (mreg_eq m0 dst); intro.
+  subst dst. exists rs. split. apply star_refl. tauto.
+  exists (Locmap.set (R dst) (rs (R m0)) rs).
+  split. apply star_one; apply exec_Lop. reflexivity. 
+  split. apply Locmap.gss. 
+  intros; apply Locmap.gso; auto.
+  exists (Locmap.set (R dst) (rs (S s)) rs).
+  split. apply star_one; apply exec_Lgetstack. 
+  split. apply Locmap.gss. 
+  intros; apply Locmap.gso; auto.
+Qed.
+
+Lemma add_spill_correct:
+  forall src dst k rs m,
+  exists rs',
+  star step ge (State stk f sp (add_spill src dst k) rs m)
+            E0 (State stk f sp k rs' m) /\
+  rs' dst = rs (R src) /\
+  forall l, Loc.diff dst l -> rs' l = rs l.
+Proof.
+  intros. unfold add_spill. destruct dst.
+  case (mreg_eq src m0); intro.
+  subst src. exists rs. split. apply star_refl. tauto.
+  exists (Locmap.set (R m0) (rs (R src)) rs).
+  split. apply star_one. apply exec_Lop. reflexivity.
+  split. apply Locmap.gss.
+  intros; apply Locmap.gso; auto.
+  exists (Locmap.set (S s) (rs (R src)) rs).
+  split. apply star_one. apply exec_Lsetstack. 
+  split. apply Locmap.gss.
+  intros; apply Locmap.gso; auto.
+Qed.
+
+Lemma add_reloads_correct_rec:
+  forall srcs itmps ftmps k rs m,
+  (List.length srcs <= List.length itmps)%nat ->
+  (List.length srcs <= List.length ftmps)%nat ->
+  (forall r, In (R r) srcs -> In r itmps -> False) ->
+  (forall r, In (R r) srcs -> In r ftmps -> False) ->
+  list_disjoint itmps ftmps ->
+  list_norepet itmps ->
+  list_norepet ftmps ->
+  exists rs',
+  star step ge 
+      (State stk f sp (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m)
+   E0 (State stk f sp k rs' m) /\
+  reglist rs' (regs_for_rec srcs itmps ftmps) = map rs srcs /\
+  (forall r, ~(In r itmps) -> ~(In r ftmps) -> rs' (R r) = rs (R r)) /\
+  (forall s, rs' (S s) = rs (S s)).
+Proof.
+  induction srcs; simpl; intros.
+  (* base case *)
+  exists rs. split. apply star_refl. tauto.
+  (* inductive case *)
+  destruct itmps; simpl in H. omegaContradiction.
+  destruct ftmps; simpl in H0. omegaContradiction.
+  assert (R1: (length srcs <= length itmps)%nat). omega.
+  assert (R2: (length srcs <= length ftmps)%nat). omega.
+  assert (R3: forall r, In (R r) srcs -> In r itmps -> False).
+    intros. apply H1 with r. tauto. auto with coqlib. 
+  assert (R4: forall r, In (R r) srcs -> In r ftmps -> False).
+    intros. apply H2 with r. tauto. auto with coqlib.
+  assert (R5: list_disjoint itmps ftmps).
+    eapply list_disjoint_cons_left.
+    eapply list_disjoint_cons_right. eauto.
+  assert (R6: list_norepet itmps).
+    inversion H4; auto.
+  assert (R7: list_norepet ftmps).
+    inversion H5; auto.
+  destruct a.
+  (* a is a register *)
+  generalize (IHsrcs itmps ftmps k rs m R1 R2 R3 R4 R5 R6 R7).
+  intros [rs' [EX [RES [OTH1 OTH2]]]].
+  exists rs'. split.
+  unfold add_reload. case (mreg_eq m2 m2); intro; tauto.
+  split. simpl. apply (f_equal2 (@cons val)). 
+  apply OTH1. 
+  red; intro; apply H1 with m2. tauto. auto with coqlib.
+  red; intro; apply H2 with m2. tauto. auto with coqlib.
+  assumption.
+  split. intros. apply OTH1. simpl in H6; tauto. simpl in H7; tauto.
+  auto.
+  (* a is a stack location *)
+  set (tmp := match slot_type s with Tint => m0 | Tfloat => m1 end).
+  assert (NI: ~(In tmp itmps)).
+    unfold tmp; case (slot_type s).
+    inversion H4; auto. 
+    apply list_disjoint_notin with (m1 :: ftmps). 
+    apply list_disjoint_sym. apply list_disjoint_cons_left with m0.
+    auto. auto with coqlib.
+  assert (NF: ~(In tmp ftmps)).
+    unfold tmp; case (slot_type s).
+    apply list_disjoint_notin with (m0 :: itmps). 
+    apply list_disjoint_cons_right with m1.
+    auto. auto with coqlib.
+    inversion H5; auto. 
+  generalize
+    (add_reload_correct (S s) tmp
+       (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m).
+  intros [rs1 [EX1 [RES1 OTH]]].     
+  generalize (IHsrcs itmps ftmps k rs1 m R1 R2 R3 R4 R5 R6 R7).
+  intros [rs' [EX [RES [OTH1 OTH2]]]].
+  exists rs'.
+  split. eapply star_trans; eauto. traceEq.
+  split. simpl. apply (f_equal2 (@cons val)). 
+  rewrite OTH1; auto.
+  rewrite RES. apply list_map_exten. intros.
+  symmetry. apply OTH. 
+  destruct x; try exact I. simpl. red; intro; subst m2.
+  generalize H6; unfold tmp. case (slot_type s).
+  intro. apply H1 with m0. tauto. auto with coqlib.
+  intro. apply H2 with m1. tauto. auto with coqlib.
+  split. intros. simpl in H6; simpl in H7.
+  rewrite OTH1. apply OTH. 
+  simpl. unfold tmp. case (slot_type s); tauto.
+  tauto. tauto.
+  intros. rewrite OTH2. apply OTH. exact I.
+Qed.
+
+Lemma add_reloads_correct:
+  forall srcs k rs m,
+  (List.length srcs <= 3)%nat ->
+  Loc.disjoint srcs temporaries ->
+  exists rs',
+  star step ge (State stk f sp (add_reloads srcs (regs_for srcs) k) rs m)
+            E0 (State stk f sp k rs' m) /\
+  reglist rs' (regs_for srcs) = List.map rs srcs /\
+  forall l, Loc.notin l temporaries -> rs' l = rs l.
+Proof.
+  intros.
+  pose (itmps := IT1 :: IT2 :: IT3 :: nil).
+  pose (ftmps := FT1 :: FT2 :: FT3 :: nil).
+  assert (R1: (List.length srcs <= List.length itmps)%nat).
+    unfold itmps; simpl; assumption.
+  assert (R2: (List.length srcs <= List.length ftmps)%nat).
+    unfold ftmps; simpl; assumption.
+  assert (R3: forall r, In (R r) srcs -> In r itmps -> False).
+    intros. assert (In (R r) temporaries). 
+    simpl in H2; simpl; intuition congruence.
+    generalize (H0 _ _ H1 H3). simpl. tauto.
+  assert (R4: forall r, In (R r) srcs -> In r ftmps -> False).
+    intros. assert (In (R r) temporaries). 
+    simpl in H2; simpl; intuition congruence.
+    generalize (H0 _ _ H1 H3). simpl. tauto.
+  assert (R5: list_disjoint itmps ftmps).
+    red; intros r1 r2; simpl; intuition congruence.
+  assert (R6: list_norepet itmps).
+    unfold itmps. NoRepet.
+  assert (R7: list_norepet ftmps).
+    unfold ftmps. NoRepet.
+  generalize (add_reloads_correct_rec srcs itmps ftmps k rs m
+                R1 R2 R3 R4 R5 R6 R7).
+  intros [rs' [EX [RES [OTH1 OTH2]]]].
+  exists rs'. split. exact EX. 
+  split. exact RES.
+  intros. destruct l. apply OTH1.
+  generalize (Loc.notin_not_in _ _ H1). simpl. intuition congruence.
+  generalize (Loc.notin_not_in _ _ H1). simpl. intuition congruence.
+  apply OTH2.
+Qed.
+
+Lemma add_move_correct:
+  forall src dst k rs m,
+  exists rs',
+  star step ge (State stk f sp (add_move src dst k) rs m)
+            E0 (State stk f sp k rs' m) /\
+  rs' dst = rs src /\
+  forall l, Loc.diff l dst -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> rs' l = rs l.
+Proof.
+  intros; unfold add_move.
+  case (Loc.eq src dst); intro.
+  subst dst. exists rs. split. apply star_refl. tauto.
+  destruct src.
+  (* src is a register *)
+  generalize (add_spill_correct m0 dst k rs m); intros [rs' [EX [RES OTH]]].
+  exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto.
+  destruct dst.
+  (* src is a stack slot, dst a register *)
+  generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]].
+  exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto.
+  (* src and dst are stack slots *)
+  set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end).
+  generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m);
+  intros [rs1 [EX1 [RES1 OTH1]]].
+  generalize (add_spill_correct tmp (S s0) k rs1 m);
+  intros [rs2 [EX2 [RES2 OTH2]]].
+  exists rs2. split.
+  eapply star_trans; eauto. traceEq.
+  split. congruence.
+  intros. rewrite OTH2. apply OTH1. 
+  apply Loc.diff_sym. unfold tmp; case (slot_type s); auto.
+  apply Loc.diff_sym; auto.
+Qed.
+
+Lemma effect_move_sequence:
+  forall k moves rs m,
+  let k' := List.fold_right (fun p k => add_move (fst p) (snd p) k) k moves in
+  exists rs',
+  star step ge (State stk f sp k' rs m)
+            E0 (State stk f sp k rs' m) /\
+  effect_seqmove moves rs rs'.
+Proof.
+  induction moves; intros until m; simpl.
+  exists rs; split. constructor. constructor.
+  destruct a as [src dst]; simpl.
+  set (k1 := fold_right
+              (fun (p : loc * loc) (k : code) => add_move (fst p) (snd p) k)
+              k moves) in *.
+  destruct (add_move_correct src dst k1 rs m) as [rs1 [A [B C]]].
+  destruct (IHmoves rs1 m) as [rs' [D E]].
+  exists rs'; split. 
+  eapply star_trans; eauto. traceEq.
+  econstructor; eauto. red. tauto. 
+Qed.
+
+Lemma parallel_move_correct:
+  forall srcs dsts k rs m,
+  List.length srcs = List.length dsts ->
+  Loc.no_overlap srcs dsts ->
+  Loc.norepet dsts ->
+  Loc.disjoint srcs temporaries ->
+  Loc.disjoint dsts temporaries ->
+  exists rs',
+  star step ge (State stk f sp (parallel_move srcs dsts k) rs m)
+               E0 (State stk f sp k rs' m) /\
+  List.map rs' dsts = List.map rs srcs /\
+  rs' (R IT3) = rs (R IT3) /\
+  forall l, Loc.notin l dsts -> Loc.notin l temporaries -> rs' l = rs l.
+Proof.
+  intros. 
+  generalize (effect_move_sequence k (parmove srcs dsts) rs m).
+  intros [rs' [EXEC EFFECT]].
+  exists rs'. split. exact EXEC. 
+  apply effect_parmove; auto.
+Qed.
+
+Lemma parallel_move_arguments_correct:
+  forall args sg k rs m,
+  List.map Loc.type args = sg.(sig_args) ->
+  locs_acceptable args ->
+  exists rs',
+  star step ge (State stk f sp (parallel_move args (loc_arguments sg) k) rs m)
+            E0 (State stk f sp k rs' m) /\
+  List.map rs' (loc_arguments sg) = List.map rs args /\
+  rs' (R IT3) = rs (R IT3) /\
+  forall l, Loc.notin l (loc_arguments sg) -> Loc.notin l temporaries -> rs' l = rs l.
+Proof.
+  intros. apply parallel_move_correct.
+  transitivity (length sg.(sig_args)).
+  rewrite <- H. symmetry; apply list_length_map. 
+  symmetry. apply loc_arguments_length.
+  apply no_overlap_arguments; auto.
+  apply loc_arguments_norepet.
+  apply locs_acceptable_disj_temporaries; auto.
+  apply loc_arguments_not_temporaries.
+Qed.
+
+Lemma parallel_move_parameters_correct:
+  forall params sg k rs m,
+  List.map Loc.type params = sg.(sig_args) ->
+  locs_acceptable params ->
+  Loc.norepet params ->
+  exists rs',
+  star step ge (State stk f sp (parallel_move (loc_parameters sg) params k) rs m)
+            E0 (State stk f sp k rs' m) /\
+  List.map rs' params = List.map rs (loc_parameters sg) /\
+  rs' (R IT3) = rs (R IT3) /\
+  forall l, Loc.notin l params -> Loc.notin l temporaries -> rs' l = rs l.
+Proof.
+  intros. apply parallel_move_correct.
+  transitivity (length sg.(sig_args)).
+  apply loc_parameters_length.
+  rewrite <- H. apply list_length_map.
+  apply no_overlap_parameters; auto.
+  auto. apply loc_parameters_not_temporaries. 
+  apply locs_acceptable_disj_temporaries; auto.
+Qed.
+
+End LINEAR_CONSTRUCTORS.
+
+(** * Agreement between values of locations *)
+
+(** The predicate [agree] states that two location maps
+  give the same values to all acceptable locations,
+  that is, non-temporary registers and [Local] stack slots. *)
+
+Definition agree (rs1 rs2: locset) : Prop :=
+  forall l, loc_acceptable l -> rs2 l = rs1 l.
+
+Lemma agree_loc:
+  forall rs1 rs2 l,
+  agree rs1 rs2 -> loc_acceptable l -> rs2 l = rs1 l.
+Proof.
+  auto.
+Qed.
+
+Lemma agree_locs:
+  forall rs1 rs2 ll,
+  agree rs1 rs2 -> locs_acceptable ll -> map rs2 ll = map rs1 ll.
+Proof.
+  induction ll; simpl; intros.
+  auto.
+  f_equal. apply H. apply H0; auto with coqlib.
+  apply IHll; auto. red; intros. apply H0; auto with coqlib.
+Qed.
+
+Lemma agree_exten:
+  forall rs ls1 ls2,
+  agree rs ls1 ->
+  (forall l, Loc.notin l temporaries -> ls2 l = ls1 l) ->
+  agree rs ls2.
+Proof.
+  intros; red; intros. rewrite H0. auto. 
+  apply temporaries_not_acceptable; auto.
+Qed.
+
+Lemma agree_set:
+  forall rs1 rs2 rs2' l v,
+  loc_acceptable l ->
+  rs2' l = v ->
+  (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') ->
+  agree rs1 rs2 -> agree (Locmap.set l v rs1) rs2'.
+Proof.
+  intros; red; intros.
+  destruct (Loc.eq l l0).
+  subst l0. rewrite Locmap.gss. auto.
+  assert (Loc.diff l l0). eapply loc_acceptable_noteq_diff; eauto.
+  rewrite Locmap.gso; auto. rewrite H1. auto. auto.
+  apply temporaries_not_acceptable; auto. 
+Qed.
+
+Lemma agree_return_regs:
+  forall rs1 ls1 rs2 ls2,
+  agree rs1 ls1 -> agree rs2 ls2 ->
+  agree (LTL.return_regs rs1 rs2) (LTL.return_regs ls1 ls2).
+Proof.
+  intros; red; intros. unfold LTL.return_regs.
+  assert (~In l temporaries). 
+  apply Loc.notin_not_in. apply temporaries_not_acceptable; auto.
+  destruct l.
+  destruct (In_dec Loc.eq (R m) temporaries). contradiction.
+  destruct (In_dec Loc.eq (R m) destroyed_at_call); auto.
+  auto.
+Qed.
+
+Lemma agree_set_result:
+  forall rs1 ls1 rs2 ls2 sig res ls3,
+  loc_acceptable res -> agree rs1 ls1 -> agree rs2 ls2 ->
+  ls3 res = LTL.return_regs ls1 ls2 (R (loc_result sig)) ->
+  (forall l : loc, Loc.diff res l -> ls3 l = LTL.return_regs ls1 ls2 l) ->
+  let rs_merge := LTL.return_regs rs1 rs2 in
+  agree (Locmap.set res (rs_merge (R (loc_result sig))) rs_merge) ls3.
+Proof.
+  intros. apply agree_set with (LTL.return_regs ls1 ls2); auto.
+  rewrite H2; unfold rs_merge. 
+  repeat rewrite return_regs_result. apply H1. apply loc_result_acceptable.
+  unfold rs_merge. apply agree_return_regs; auto.
+Qed.
+
+(** [agree_arguments] and [agree_parameters] are two stronger
+  variants of the predicate [agree].  They additionally demand
+  equality of the values of locations that are arguments or parameters
+  (respectively) for a call to a function of signature [sg]. *)
+
+Definition agree_arguments (sg: signature) (rs1 rs2: locset) : Prop :=
+  forall l, loc_acceptable l \/ In l (loc_arguments sg) -> rs2 l = rs1 l.
+
+Definition agree_parameters (sg: signature) (rs1 rs2: locset) : Prop :=
+  forall l, loc_acceptable l \/ In l (loc_parameters sg) -> rs2 l = rs1 l.
+
+Remark parallel_assignment:
+  forall (P: loc -> Prop) (rs1 rs2 ls1 ls2: locset) srcs dsts,
+  map rs2 dsts = map rs1 srcs ->
+  map ls2 dsts = map ls1 srcs ->
+  (forall l, In l srcs -> P l) ->
+  (forall l, P l -> ls1 l = rs1 l) ->
+  (forall l, In l dsts -> ls2 l = rs2 l).
+Proof.
+  induction srcs; destruct dsts; simpl; intros; try congruence.
+  contradiction.
+  inv H; inv H0. elim H3; intro. subst l0.
+  rewrite H5; rewrite H4. auto with coqlib.
+  eapply IHsrcs; eauto. 
+Qed.
+
+Lemma agree_set_arguments:
+  forall rs1 ls1 ls2 args sg,
+  agree rs1 ls1 ->
+  List.map Loc.type args = sg.(sig_args) ->
+  locs_acceptable args ->
+  List.map ls2 (loc_arguments sg) = map ls1 args ->
+  (forall l : loc,
+    Loc.notin l (loc_arguments sg) ->
+    Loc.notin l temporaries -> ls2 l = ls1 l) ->
+  agree_arguments sg (LTL.parmov args (loc_arguments sg) rs1) ls2.
+Proof.
+  intros.  
+  assert (Loc.norepet (loc_arguments sg)).
+    apply loc_arguments_norepet.
+  assert (List.length args = List.length (loc_arguments sg)).
+    rewrite loc_arguments_length. rewrite <- H0.
+    symmetry. apply list_length_map.
+  destruct (parmov_spec rs1 _ _ H4 H5) as [A B].
+  set (rs2 := LTL.parmov args (loc_arguments sg) rs1) in *.
+  assert (forall l, In l (loc_arguments sg) -> ls2 l = rs2 l).
+    intros.
+    eapply parallel_assignment with (P := loc_acceptable); eauto.
+  red; intros. 
+  destruct (In_dec Loc.eq l (loc_arguments sg)).
+  auto.
+  assert (loc_acceptable l) by tauto.
+  assert (Loc.notin l (loc_arguments sg)).
+    eapply loc_acceptable_notin_notin; eauto.
+  rewrite H3; auto. rewrite B; auto. 
+  apply temporaries_not_acceptable; auto.
+Qed.
+
+Lemma agree_arguments_agree:
+  forall sg rs ls, agree_arguments sg rs ls -> agree rs ls.
+Proof.
+  intros; red; intros; auto.
+Qed.
+
+Lemma agree_arguments_locs:
+  forall sg rs1 rs2,
+  agree_arguments sg rs1 rs2 ->
+  map rs2 (loc_arguments sg) = map rs1 (loc_arguments sg).
+Proof.
+  intros.
+  assert (forall ll, incl ll (loc_arguments sg) -> map rs2 ll = map rs1 ll).
+    induction ll; simpl; intros. auto.
+    f_equal. apply H. right. apply H0. auto with coqlib.
+    apply IHll. eapply incl_cons_inv; eauto.
+  apply H0. apply incl_refl.
+Qed.
+
+Lemma agree_set_parameters:
+  forall rs1 ls1 ls2 sg params,
+  agree_parameters sg rs1 ls1 ->
+  List.map Loc.type params = sg.(sig_args) ->
+  locs_acceptable params ->
+  Loc.norepet params ->
+  List.map ls2 params = List.map ls1 (loc_parameters sg) ->
+  (forall l : loc,
+    Loc.notin l params ->
+    Loc.notin l temporaries -> ls2 l = ls1 l) ->
+  agree (LTL.parmov (loc_parameters sg) params rs1) ls2.
+Proof.
+  intros.
+  assert (List.length (loc_parameters sg) = List.length params).
+    unfold loc_parameters. rewrite list_length_map. 
+    rewrite loc_arguments_length. rewrite <- H0.
+    apply list_length_map.
+  destruct (parmov_spec rs1 _ _ H2 H5) as [A B].
+  set (rs2 := LTL.parmov (loc_parameters sg) params rs1) in *.
+  red; intros.
+  assert (forall l, In l params -> ls2 l = rs2 l).
+    intros.
+    eapply parallel_assignment with (P := fun l => In l (loc_parameters sg)); eauto.
+  destruct (In_dec Loc.eq l params).
+  auto.
+  assert (Loc.notin l params).
+    eapply loc_acceptable_notin_notin; eauto.
+  rewrite B; auto. rewrite H4; auto. 
+  apply temporaries_not_acceptable; auto.
+Qed.
+
+Lemma agree_call_regs:
+  forall sg rs ls,
+  agree_arguments sg rs ls ->
+  agree_parameters sg (LTL.call_regs rs) (LTL.call_regs ls).
+Proof.
+  intros; red; intros. elim H0.
+  destruct l; simpl; auto. destruct s; auto.
+  unfold loc_parameters. intro. 
+  destruct (list_in_map_inv _ _ _ H1) as [r [A B]].
+  subst l. generalize (loc_arguments_acceptable _ _ B). 
+  destruct r; simpl; auto. destruct s; simpl; auto.
+Qed.
+
+Lemma agree_arguments_return_regs:
+  forall sg rs1 rs2 ls1 ls2,
+  tailcall_possible sg ->
+  agree rs1 ls1 ->
+  agree_arguments sg rs2 ls2 ->
+  agree_arguments sg (LTL.return_regs rs1 rs2) (LTL.return_regs ls1 ls2).
+Proof.
+  intros; red; intros. generalize (H1 l H2). intro.
+  elim H2; intro. generalize (H0 l H4); intro. 
+  unfold LTL.return_regs. destruct l; auto.
+  destruct (In_dec Loc.eq (R m) temporaries); auto.
+  destruct (In_dec Loc.eq (R m) destroyed_at_call); auto.
+  generalize (H l H4). unfold LTL.return_regs; destruct l; intro.
+  destruct (In_dec Loc.eq (R m) temporaries); auto.
+  destruct (In_dec Loc.eq (R m) destroyed_at_call); auto.
+  contradiction.
+Qed.
+
+(** * Preservation of labels and gotos *)
+
+Lemma find_label_add_spill:
+  forall lbl src dst k,
+  find_label lbl (add_spill src dst k) = find_label lbl k.
+Proof.
+  intros. destruct dst; simpl; auto.
+  destruct (mreg_eq src m); auto.
+Qed.
+
+Lemma find_label_add_reload:
+  forall lbl src dst k,
+  find_label lbl (add_reload src dst k) = find_label lbl k.
+Proof.
+  intros. destruct src; simpl; auto.
+  destruct (mreg_eq m dst); auto.
+Qed.
+
+Lemma find_label_add_reloads:
+  forall lbl srcs dsts k,
+  find_label lbl (add_reloads srcs dsts k) = find_label lbl k.
+Proof.
+  induction srcs; intros; simpl. auto.
+  destruct dsts; auto. rewrite find_label_add_reload. auto.
+Qed.
+
+Lemma find_label_add_move:
+  forall lbl src dst k,
+  find_label lbl (add_move src dst k) = find_label lbl k.
+Proof.
+  intros; unfold add_move.
+  destruct (Loc.eq src dst); auto.
+  destruct src. apply find_label_add_spill.
+  destruct dst. apply find_label_add_reload.
+  rewrite find_label_add_reload. apply find_label_add_spill.
+Qed.
+
+Lemma find_label_parallel_move:
+  forall lbl srcs dsts k,
+  find_label lbl (parallel_move srcs dsts k) = find_label lbl k.
+Proof.
+  intros. unfold parallel_move. generalize (parmove srcs dsts). 
+  induction m; simpl. auto.
+  rewrite find_label_add_move. auto.
+Qed.
+
+Hint Rewrite find_label_add_spill find_label_add_reload
+             find_label_add_reloads find_label_add_move
+             find_label_parallel_move: labels.
+
+Opaque reg_for.
+
+Ltac FL := simpl; autorewrite with labels; auto.
+
+Lemma find_label_transf_instr:
+  forall lbl sg instr k,
+  find_label lbl (transf_instr sg instr k) =
+  if LTLin.is_label lbl instr then Some k else find_label lbl k.
+Proof.
+  intros. destruct instr; FL.
+  destruct (is_move_operation o l); FL; FL.
+  FL.
+  destruct s0; FL; FL; FL.
+  destruct s0; FL; FL; FL.
+  FL.
+  destruct o; FL.
+Qed.
+
+Lemma find_label_transf_code:
+  forall sg lbl c,
+  find_label lbl (transf_code sg c) =
+  option_map (transf_code sg) (LTLin.find_label lbl c).
+Proof.
+  induction c; simpl.
+  auto.
+  rewrite find_label_transf_instr.
+  destruct (LTLin.is_label lbl a); auto.
+Qed.
+
+Lemma find_label_transf_function:
+  forall lbl f c,
+  LTLin.find_label lbl (LTLin.fn_code f) = Some c ->
+  find_label lbl (Linear.fn_code (transf_function f)) =
+  Some (transf_code f c).
+Proof.
+  intros. destruct f; simpl in *. FL. 
+  rewrite find_label_transf_code. rewrite H; auto.
+Qed.
+
+(** * Semantic preservation *)
+
+Section PRESERVATION.
+
+Variable prog: LTLin.program.
+Let tprog := transf_program prog.
+Hypothesis WT_PROG: LTLintyping.wt_program prog.
+
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (@Genv.find_symbol_transf _ _ _ transf_fundef prog).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = LTLin.funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_wt:
+  forall ros rs f,
+  LTLin.find_function ge ros rs = Some f -> wt_fundef f.
+Proof.
+  intros until f. destruct ros; simpl.
+  intro. eapply Genv.find_funct_prop with (p := prog); eauto. 
+  caseEq (Genv.find_symbol ge i); intros. 
+  eapply Genv.find_funct_ptr_prop with (p := prog); eauto. 
+  congruence.
+Qed.
+
+(** The [match_state] predicate relates states in the original LTLin
+  program and the transformed Linear program.  The main property
+  it enforces are:
+- Agreement between the values of locations in the two programs,
+  according to the [agree] or [agree_arguments] predicates.
+- Lists of LTLin instructions appearing in the source state
+  are always suffixes of the code for the corresponding functions.
+- Well-typedness of the source code, which ensures that
+  only acceptable locations are accessed by this code.
+*)
+
+Inductive match_stackframes: list LTLin.stackframe -> list Linear.stackframe -> signature -> Prop :=
+  | match_stackframes_nil:
+      forall tyargs,
+      match_stackframes nil nil (mksignature tyargs (Some Tint))
+  | match_stackframes_cons:
+      forall res f sp c rs s s' c' ls sig,
+      match_stackframes s s' (LTLin.fn_sig f) ->
+      c' = add_spill (loc_result sig) res (transf_code f c) ->      
+      agree rs ls ->
+      loc_acceptable res ->
+      wt_function f ->
+      is_tail c (LTLin.fn_code f) ->
+      match_stackframes (LTLin.Stackframe res f sp rs c :: s)
+                        (Linear.Stackframe (transf_function f) sp ls c' :: s')
+                        sig.
+
+Inductive match_states: LTLin.state -> Linear.state -> Prop :=
+  | match_states_intro:
+      forall s f sp c rs m s' ls
+        (STACKS: match_stackframes s s' (LTLin.fn_sig f))
+        (AG: agree rs ls)
+        (WT: wt_function f)
+        (TL: is_tail c (LTLin.fn_code f)),
+      match_states (LTLin.State s f sp c rs m)
+                   (Linear.State s' (transf_function f) sp (transf_code f c) ls m)
+  | match_states_call:
+      forall s f rs m s' ls
+        (STACKS: match_stackframes s s' (LTLin.funsig f))
+        (AG: agree_arguments (LTLin.funsig f) rs ls)
+        (WT: wt_fundef f),
+      match_states (LTLin.Callstate s f rs m)
+                   (Linear.Callstate s' (transf_fundef f) ls m)
+  | match_states_return:
+      forall s sig rs m s' ls
+        (STACKS: match_stackframes s s' sig)
+        (AG: agree rs ls),
+      match_states (LTLin.Returnstate s sig rs m)
+                   (Linear.Returnstate s' ls m).
+
+Remark parent_locset_match:
+  forall s s' sig,
+  match_stackframes s s' sig ->
+  agree (LTLin.parent_locset s) (parent_locset s').
+Proof.
+  induction 1; simpl.
+  red; intros; auto.
+  auto.
+Qed.
+
+Remark match_stackframes_inv:
+  forall s ts sig,
+  match_stackframes s ts sig ->
+  forall sig', sig_res sig' = sig_res sig ->
+  match_stackframes s ts sig'.
+Proof.
+  induction 1; intros.
+  destruct sig'. simpl in H; inv H. constructor.
+  assert (loc_result sig' = loc_result sig).
+  unfold loc_result. rewrite H5; auto.
+  econstructor; eauto. rewrite H6; auto.
+Qed. 
+         
+Ltac ExploitWT :=
+  match goal with
+  | [ WT: wt_function _, TL: is_tail _ _ |- _ ] =>
+       generalize (wt_instrs _ WT _ (is_tail_in TL)); intro WTI
+  end.
+
+(** The proof of semantic preservation is a simulation argument
+  based on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  *|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  It is possible for the transformed code to take no transition,
+  remaining in the same state; for instance, if the source transition
+  corresponds to a move instruction that was eliminated.  
+  To ensure that this cannot occur infinitely often in a row,
+  we use the following [measure] function that just counts the 
+  remaining number of instructions in the source code sequence. *)
+
+Definition measure (st: LTLin.state) : nat :=
+  match st with
+  | LTLin.State s f sp c ls m => List.length c
+  | LTLin.Callstate s f ls m => 0%nat
+  | LTLin.Returnstate s sig ls m => 0%nat
+  end.
+
+Theorem transf_step_correct:
+  forall s1 t s2, LTLin.step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  (exists s2', plus Linear.step tge s1' t s2' /\ match_states s2 s2')
+  \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat.
+Proof.
+  Opaque regs_for. Opaque add_reloads.
+  induction 1; intros; try inv MS; simpl.
+
+  (* Lop *)
+  ExploitWT. inv WTI. 
+  (* move *)
+  simpl.
+  destruct (add_move_correct tge s' (transf_function f) sp r1 res (transf_code f b) ls m)
+        as [ls2 [A [B C]]].
+  inv A. 
+  right. split. omega. split. auto.
+  rewrite H1. rewrite H1. econstructor; eauto with coqlib. 
+  apply agree_set with ls2; auto.
+  rewrite B. simpl in H; inversion H. auto.
+  left; econstructor; split. eapply plus_left; eauto. 
+  econstructor; eauto with coqlib. 
+  apply agree_set with ls; auto.
+  rewrite B. simpl in H; inversion H. auto.
+  intros. simpl in H1. apply C. apply Loc.diff_sym; auto.
+  simpl in H7; tauto. simpl in H7; tauto.
+  (* other ops *)
+  assert (is_move_operation op args = None).
+    caseEq (is_move_operation op args); intros.
+    destruct (is_move_operation_correct _ _ H0). congruence.
+    auto.
+  rewrite H0.
+  exploit add_reloads_correct. 
+    eapply length_op_args; eauto.
+    apply locs_acceptable_disj_temporaries; auto.
+  intros [ls2 [A [B C]]].
+  exploit add_spill_correct.
+  intros [ls3 [D [E F]]].
+  left; econstructor; split.
+  eapply star_plus_trans. eexact A. 
+  eapply plus_left. eapply exec_Lop with (v := v).
+    rewrite <- H. rewrite B. rewrite (agree_locs _ _ _ AG H5). 
+    apply eval_operation_preserved. exact symbols_preserved.
+  eexact D. eauto. traceEq.
+  econstructor; eauto with coqlib.
+  apply agree_set with ls; auto.
+  rewrite E. apply Locmap.gss.
+  intros. rewrite F; auto. rewrite Locmap.gso. auto. 
+  apply reg_for_diff; auto.
+
+  (* Lload *)
+  ExploitWT; inv WTI.
+  exploit add_reloads_correct. 
+    apply le_S. eapply length_addr_args; eauto.
+    apply locs_acceptable_disj_temporaries; auto.
+  intros [ls2 [A [B C]]].
+  exploit add_spill_correct.
+  intros [ls3 [D [E F]]].
+  left; econstructor; split.
+  eapply star_plus_trans. eauto. 
+  eapply plus_left. eapply exec_Lload; eauto.
+    rewrite B. rewrite <- H. rewrite (agree_locs _ _ _ AG H7).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. auto. traceEq.
+  econstructor; eauto with coqlib.
+  apply agree_set with ls; auto.
+  rewrite E. apply Locmap.gss.
+  intros. rewrite F; auto. rewrite Locmap.gso. auto. 
+  apply reg_for_diff; auto.
+
+  (* Lstore *)
+  ExploitWT; inv WTI.
+  assert (exists rsrc, exists rargs, regs_for (src :: args) = rsrc :: rargs).
+    Transparent regs_for. unfold regs_for. simpl.
+    destruct src. econstructor; econstructor; eauto.
+    destruct (slot_type s0);  econstructor; econstructor; eauto.
+  destruct H1 as [rsrc [rargs EQ]]. rewrite EQ. 
+  assert (length (src :: args) <= 3)%nat. 
+    simpl. apply le_n_S.
+    eapply length_addr_args; eauto.
+  exploit add_reloads_correct.
+    eauto. apply locs_acceptable_disj_temporaries; auto.
+    red; intros. elim H2; intro; auto. subst l; auto. 
+  intros [ls2 [A [B C]]]. rewrite EQ in A. rewrite EQ in B.
+  injection B. intros D E. 
+  simpl in B.
+  left; econstructor; split. 
+  eapply plus_right. eauto. 
+  eapply exec_Lstore with (a := a); eauto.
+    rewrite D. rewrite <- H. rewrite (agree_locs _ _ _ AG H7).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  rewrite E. rewrite (agree_loc _ _ _ AG H8). eauto.
+  traceEq.
+  econstructor; eauto with coqlib.
+  apply agree_exten with ls; auto.
+
+  (* Lcall *)
+  inversion MS. subst s0 f0 sp0 c rs0 m0.
+  simpl transf_code.
+  ExploitWT. inversion WTI. subst sig0 ros0 args0 res0.
+  assert (WTF': wt_fundef f'). eapply find_function_wt; eauto.
+  destruct ros as [fn | id].
+  (* indirect call *)
+  destruct (add_reload_correct tge s' (transf_function f) sp fn IT3
+              (parallel_move args (loc_arguments sig)
+                (Lcall sig (inl ident IT3)
+                 :: add_spill (loc_result sig) res (transf_code f b)))
+              ls m)
+  as [ls2 [A [B C]]].
+  destruct (parallel_move_arguments_correct tge s' (transf_function f) sp 
+              args sig
+              (Lcall sig (inl ident IT3)
+                :: add_spill (loc_result sig) res (transf_code f b))
+              ls2 m H7 H10)
+  as [ls3 [D [E [F G]]]].
+  assert (AG_ARGS: agree_arguments (LTLin.funsig f') rs1 ls3).
+    rewrite <- H0.
+    unfold rs1. apply agree_set_arguments with ls; auto.
+    rewrite E. apply list_map_exten; intros. symmetry. apply C.
+    assert (Loc.notin x temporaries). apply temporaries_not_acceptable; auto.
+    simpl in H3. apply Loc.diff_sym. tauto.
+    intros. rewrite G; auto. apply C. 
+    simpl in H3. apply Loc.diff_sym. tauto.
+  left; econstructor; split.
+  eapply star_plus_trans. eauto. eapply plus_right. eauto. 
+  eapply exec_Lcall; eauto.
+    simpl. rewrite F. rewrite B. 
+    rewrite (agree_loc rs ls fn); auto.
+    apply functions_translated. eauto.
+    rewrite H0; symmetry; apply sig_preserved.
+  eauto. traceEq.
+  econstructor; eauto. 
+  econstructor; eauto with coqlib.
+  rewrite H0. auto.
+  eapply agree_arguments_agree; eauto.
+  (* direct call *)
+  destruct (parallel_move_arguments_correct tge s' (transf_function f) sp
+              args sig
+              (Lcall sig (inr mreg id)
+                :: add_spill (loc_result sig) res (transf_code f b))
+              ls m H7 H10)
+  as [ls3 [D [E [F G]]]].
+  assert (AG_ARGS: agree_arguments (LTLin.funsig f') rs1 ls3).
+    rewrite <- H0.
+    unfold rs1. apply agree_set_arguments with ls; auto.
+  left; econstructor; split.
+  eapply plus_right. eauto.
+  eapply exec_Lcall; eauto.
+    simpl. rewrite symbols_preserved. 
+    generalize H; simpl. destruct (Genv.find_symbol ge id).
+    apply function_ptr_translated; auto. congruence.
+    rewrite H0. symmetry; apply sig_preserved.
+  traceEq.
+  econstructor; eauto.
+  econstructor; eauto with coqlib.
+  rewrite H0; auto.
+  eapply agree_arguments_agree; eauto.
+
+  (* Ltailcall *)
+  inversion MS. subst s0 f0 sp c rs0 m0 s1'.
+  simpl transf_code.
+  ExploitWT. inversion WTI. subst sig0 ros0 args0.
+  assert (WTF': wt_fundef f'). eapply find_function_wt; eauto.
+  destruct ros as [fn | id].
+  (* indirect call *)
+  destruct (add_reload_correct tge s' (transf_function f) (Vptr stk Int.zero) fn IT3
+              (parallel_move args (loc_arguments sig)
+                (Ltailcall sig (inl ident IT3) :: transf_code f b))
+              ls m)
+  as [ls2 [A [B C]]].
+  destruct (parallel_move_arguments_correct tge s' (transf_function f) (Vptr stk Int.zero)
+              args sig
+              (Ltailcall sig (inl ident IT3) :: transf_code f b)
+              ls2 m H5 H7)
+  as [ls3 [D [E [F G]]]].
+  assert (AG_ARGS: agree_arguments (LTLin.funsig f') rs2 (LTL.return_regs (parent_locset s') ls3)).
+    rewrite <- H0. unfold rs2. 
+    apply agree_arguments_return_regs; auto.
+    eapply parent_locset_match; eauto.  
+    unfold rs1. apply agree_set_arguments with ls; auto.
+    rewrite E. apply list_map_exten; intros. symmetry. apply C.
+    assert (Loc.notin x temporaries). apply temporaries_not_acceptable; auto.
+    simpl in H2. apply Loc.diff_sym. tauto.
+    intros. rewrite G; auto. apply C. 
+    simpl in H2. apply Loc.diff_sym. tauto.
+  left; econstructor; split.
+  eapply star_plus_trans. eauto. eapply plus_right. eauto.
+  eapply exec_Ltailcall; eauto.
+    simpl. rewrite F. rewrite B. 
+    rewrite (agree_loc rs ls fn); auto.
+    apply functions_translated. eauto.
+    rewrite H0; symmetry; apply sig_preserved.
+  eauto. traceEq.
+  econstructor; eauto.
+  eapply match_stackframes_inv; eauto. congruence.
+ 
+  (* direct call *)
+  destruct (parallel_move_arguments_correct tge s' (transf_function f) (Vptr stk Int.zero)
+              args sig
+              (Ltailcall sig (inr mreg id) :: transf_code f b)
+              ls m H5 H7)
+  as [ls3 [D [E [F G]]]].
+  assert (AG_ARGS: agree_arguments (LTLin.funsig f') rs2 (LTL.return_regs (parent_locset s') ls3)).
+    rewrite <- H0. unfold rs2. 
+    apply agree_arguments_return_regs; auto.
+    eapply parent_locset_match; eauto.  
+    unfold rs1. apply agree_set_arguments with ls; auto.
+  left; econstructor; split.
+  eapply plus_right. eauto.
+  eapply exec_Ltailcall; eauto.
+    simpl. rewrite symbols_preserved. 
+    generalize H; simpl. destruct (Genv.find_symbol ge id).
+    apply function_ptr_translated; auto. congruence.
+    rewrite H0. symmetry; apply sig_preserved.
+  traceEq.
+  econstructor; eauto.
+  eapply match_stackframes_inv; eauto. congruence.
+
+  (* Lalloc *)
+  ExploitWT; inv WTI.
+  exploit add_reload_correct. intros [ls2 [A [B C]]]. 
+  exploit add_spill_correct. intros [ls3 [D [E F]]].
+  left; econstructor; split.
+  eapply star_plus_trans. eauto. 
+  eapply plus_left. eapply exec_Lalloc; eauto.
+    rewrite B. rewrite <- H. apply AG. auto.
+  eauto. eauto. traceEq.
+  econstructor; eauto with coqlib.
+  unfold rs3; apply agree_set with (Locmap.set (R loc_alloc_result) (Vptr blk Int.zero) ls2).
+  auto. rewrite E. rewrite Locmap.gss. 
+  unfold rs2; rewrite Locmap.gss. auto.
+  auto.
+  unfold rs2. apply agree_set with ls2.
+  unfold loc_alloc_result; simpl; intuition congruence.
+  apply Locmap.gss. intros. apply Locmap.gso; auto.
+  unfold rs1. apply agree_set with ls.
+  unfold loc_alloc_argument; simpl; intuition congruence.
+  rewrite B. apply AG; auto. auto. auto.
+
+  (* Llabel *)
+  left; econstructor; split.
+  apply plus_one. apply exec_Llabel.
+  econstructor; eauto with coqlib.
+
+  (* Lgoto *)
+  left; econstructor; split.
+  apply plus_one. apply exec_Lgoto. apply find_label_transf_function; eauto.
+  econstructor; eauto.
+  eapply LTLin.find_label_is_tail; eauto.
+
+  (* Lcond true *)
+  ExploitWT; inv WTI.
+  exploit add_reloads_correct. 
+    eapply length_cond_args; eauto.
+    apply locs_acceptable_disj_temporaries; auto.
+  intros [ls2 [A [B C]]].
+  left; econstructor; split.
+  eapply plus_right. eauto. eapply exec_Lcond_true; eauto.
+  rewrite B. rewrite (agree_locs _ _ _ AG H5). auto.
+  apply find_label_transf_function; eauto.
+  traceEq.
+  econstructor; eauto.
+  apply agree_exten with ls; auto.
+  eapply LTLin.find_label_is_tail; eauto.
+
+  (* Lcond false *)
+  ExploitWT; inv WTI.
+  exploit add_reloads_correct. 
+    eapply length_cond_args; eauto.
+    apply locs_acceptable_disj_temporaries; auto.
+  intros [ls2 [A [B C]]].
+  left; econstructor; split.
+  eapply plus_right. eauto. eapply exec_Lcond_false; eauto.
+  rewrite B. rewrite (agree_locs _ _ _ AG H4). auto.
+  traceEq.
+  econstructor; eauto with coqlib.
+  apply agree_exten with ls; auto.
+
+  (* Lreturn *)
+  ExploitWT; inv WTI. 
+  unfold rs2, rs1; destruct or; simpl.
+  (* with an argument *)
+  exploit add_reload_correct.
+  intros [ls2 [A [B C]]].
+  left; econstructor; split.
+  eapply plus_right. eauto. eapply exec_Lreturn; eauto. 
+  traceEq.
+  econstructor; eauto. 
+  apply agree_return_regs; auto.
+  eapply parent_locset_match; eauto.
+  apply agree_set with ls. 
+  apply loc_result_acceptable.
+  rewrite B. eapply agree_loc; eauto.
+  auto. auto.
+  (* without an argument *)
+  left; econstructor; split.
+  apply plus_one. eapply exec_Lreturn; eauto.  
+  econstructor; eauto.
+  apply agree_return_regs; auto.
+  eapply parent_locset_match; eauto.
+
+  (* internal function *)
+  simpl in WT. inversion_clear WT. inversion H0. simpl in AG.
+  destruct (parallel_move_parameters_correct tge s' (transf_function f)
+               (Vptr stk Int.zero) (LTLin.fn_params f) (LTLin.fn_sig f)
+               (transf_code f (LTLin.fn_code f)) (LTL.call_regs ls) m'
+               wt_params wt_acceptable wt_norepet)
+  as [ls2 [A [B [C D]]]].
+  assert (AG2: agree rs2 ls2).
+    unfold rs2. eapply agree_set_parameters; eauto.
+    unfold rs1. apply agree_call_regs; auto.
+  left; econstructor; split.
+  eapply plus_left.
+  eapply exec_function_internal; eauto.
+  simpl. eauto. traceEq.
+  econstructor; eauto with coqlib.
+
+  (* external function *)
+  left; econstructor; split.
+  apply plus_one. eapply exec_function_external; eauto. 
+  unfold args. symmetry. eapply agree_arguments_locs; auto.
+  econstructor; eauto.
+  unfold rs1. apply agree_set with ls; auto. 
+  apply loc_result_acceptable.
+  apply Locmap.gss.
+  intros. apply Locmap.gso; auto.
+  eapply agree_arguments_agree; eauto.
+
+  (* return *)
+  inv STACKS. 
+  exploit add_spill_correct. intros [ls2 [A [B C]]].
+  left; econstructor; split.
+  eapply plus_left. eapply exec_return; eauto.
+  eauto. traceEq.
+  econstructor; eauto. 
+  unfold rs1. apply agree_set with ls; auto.
+  rewrite B. apply AG. apply loc_result_acceptable. 
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, LTLin.initial_state prog st1 ->
+  exists st2, Linear.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H.
+  econstructor; split.
+  econstructor. 
+  change (prog_main tprog) with (prog_main prog). 
+  rewrite symbols_preserved. eauto.
+  apply function_ptr_translated; eauto. 
+  rewrite sig_preserved. auto.
+  replace (Genv.init_mem tprog) with (Genv.init_mem prog).  
+  econstructor; eauto. rewrite H2. constructor.
+  red; intros. auto.
+  eapply Genv.find_funct_ptr_prop; eauto.
+  symmetry. unfold tprog, transf_program. apply Genv.init_mem_transf; auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> LTLin.final_state st1 r -> Linear.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. econstructor.
+  rewrite (agree_loc _ _ (R R3) AG). auto. 
+  simpl. intuition congruence.
+Qed.
+
+Theorem transf_program_correct:
+  forall (beh: program_behavior),
+  LTLin.exec_program prog beh -> Linear.exec_program tprog beh.
+Proof.
+  unfold LTLin.exec_program, Linear.exec_program; intros.
+  eapply simulation_star_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact transf_step_correct. 
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Reloadtyping.v b/backend/Reloadtyping.v
new file mode 100644
index 00000000..155c174d
--- /dev/null
+++ b/backend/Reloadtyping.v
@@ -0,0 +1,309 @@
+(** Proof of type preservation for Reload and of
+    correctness of computation of stack bounds for Linear. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Op.
+Require Import Locations.
+Require Import LTLin.
+Require Import LTLintyping.
+Require Import Linear.
+Require Import Lineartyping.
+Require Import Conventions.
+Require Import Parallelmove.
+Require Import Reload.
+Require Import Reloadproof.
+
+(** * Typing Linear constructors *)
+
+(** We show that the Linear constructor functions defined in [Reload]
+  generate well-typed instruction sequences,
+  given sufficient typing and well-formedness hypotheses over the locations involved. *)
+
+Hint Resolve wt_Lgetstack wt_Lsetstack wt_Lopmove
+             wt_Lop wt_Lload wt_Lstore wt_Lcall wt_Ltailcall wt_Lalloc
+             wt_Llabel wt_Lgoto wt_Lcond wt_Lreturn: reloadty.
+
+Remark wt_code_cons:
+  forall f i c, wt_instr f i -> wt_code f c -> wt_code f (i :: c).
+Proof.
+  intros; red; simpl; intros. elim H1; intro.
+  subst i; auto. auto.
+Qed.
+
+Hint Resolve wt_code_cons: reloadty.
+
+Definition loc_valid (f: function) (l: loc) :=
+  match l with R _ => True | S s => slot_valid f s end.
+
+Lemma loc_acceptable_valid:
+  forall f l, loc_acceptable l -> loc_valid f l.
+Proof.
+  destruct l; simpl; intro. auto.
+  destruct s; simpl. omega. tauto. tauto.
+Qed.
+
+Definition loc_writable (l: loc) :=
+  match l with R _ => True | S s => slot_writable s end.
+
+Lemma loc_acceptable_writable:
+  forall l, loc_acceptable l -> loc_writable l.
+Proof.
+  destruct l; simpl; intro. auto.
+  destruct s; simpl; tauto.
+Qed.
+
+Hint Resolve loc_acceptable_valid loc_acceptable_writable: reloadty.
+
+Definition locs_valid (f: function) (ll: list loc) :=
+  forall l, In l ll -> loc_valid f l.
+Definition locs_writable (ll: list loc) :=
+  forall l, In l ll -> loc_writable l.
+
+Lemma locs_acceptable_valid:
+  forall f ll, locs_acceptable ll -> locs_valid f ll.
+Proof.
+  unfold locs_acceptable, locs_valid. auto with reloadty.
+Qed.
+
+Lemma locs_acceptable_writable:
+  forall ll, locs_acceptable ll -> locs_writable ll.
+Proof.
+  unfold locs_acceptable, locs_writable. auto with reloadty.
+Qed.
+
+Hint Resolve locs_acceptable_valid locs_acceptable_writable: reloadty.
+
+Lemma wt_add_reload:
+  forall f src dst k,
+  loc_valid f src -> Loc.type src = mreg_type dst ->
+  wt_code f k -> wt_code f (add_reload src dst k).
+Proof.
+  intros; unfold add_reload.
+  destruct src; eauto with reloadty.
+  destruct (mreg_eq m dst); eauto with reloadty.
+Qed.
+
+Hint Resolve wt_add_reload: reloadty.
+
+Lemma wt_add_reloads:
+  forall f srcs dsts k,
+  locs_valid f srcs -> map Loc.type srcs = map mreg_type dsts ->
+  wt_code f k -> wt_code f (add_reloads srcs dsts k).
+Proof.
+  induction srcs; destruct dsts; simpl; intros; try congruence.
+  auto. inv H0. apply wt_add_reload; auto with coqlib reloadty.
+  apply IHsrcs; auto. red; intros; auto with coqlib.
+Qed.
+
+Hint Resolve wt_add_reloads: reloadty.
+
+Lemma wt_add_spill:
+  forall f src dst k,
+  loc_valid f dst -> loc_writable dst -> Loc.type dst = mreg_type src ->
+  wt_code f k -> wt_code f (add_spill src dst k).
+Proof.
+  intros; unfold add_spill.
+  destruct dst; eauto with reloadty.
+  destruct (mreg_eq src m); eauto with reloadty.
+Qed.
+
+Hint Resolve wt_add_spill: reloadty.
+
+Lemma wt_add_move:
+  forall f src dst k,
+  loc_valid f src -> loc_valid f dst -> loc_writable dst ->
+  Loc.type dst = Loc.type src ->
+  wt_code f k -> wt_code f (add_move src dst k).
+Proof.
+  intros. unfold add_move.
+  destruct (Loc.eq src dst); auto.
+  destruct src; auto with reloadty.
+  destruct dst; auto with reloadty.
+  set (tmp := match slot_type s with
+                    | Tint => IT1
+                    | Tfloat => FT1
+                    end).
+  assert (mreg_type tmp = Loc.type (S s)).
+    simpl. destruct (slot_type s); reflexivity.
+  apply wt_add_reload; auto with reloadty. 
+  apply wt_add_spill; auto. congruence.
+Qed.
+
+Hint Resolve wt_add_move: reloadty.
+
+Lemma wt_add_moves:
+  forall f b moves,
+  (forall s d, In (s, d) moves ->
+      loc_valid f s /\ loc_valid f d /\ loc_writable d /\ Loc.type s = Loc.type d) ->
+  wt_code f b ->
+  wt_code f 
+    (List.fold_right (fun p k => add_move (fst p) (snd p) k) b moves).
+Proof.
+  induction moves; simpl; intros.
+  auto.
+  destruct a as [s d]. simpl.
+  destruct (H s d) as [A [B [C D]]]. auto.
+  auto with reloadty.
+Qed.
+
+Theorem wt_parallel_move:
+ forall f srcs dsts b,
+ List.map Loc.type srcs = List.map Loc.type dsts ->
+ locs_valid f srcs -> locs_valid f dsts -> locs_writable dsts ->
+ wt_code f b ->  wt_code f (parallel_move srcs dsts b).
+Proof.
+  intros. unfold parallel_move. apply wt_add_moves; auto.
+  intros. 
+  elim (parmove_prop_2 _ _ _ _ H4); intros A B.
+  split. destruct A as [C|[C|C]]; auto; subst s; exact I.
+  split. destruct B as [C|[C|C]]; auto; subst d; exact I.
+  split. destruct B as [C|[C|C]]; auto; subst d; exact I.
+  eapply parmove_prop_3; eauto.
+Qed.
+Hint Resolve wt_parallel_move: reloadty.
+
+Lemma wt_reg_for:
+  forall l, mreg_type (reg_for l) = Loc.type l.
+Proof.
+  intros. destruct l; simpl. auto.
+  case (slot_type s); reflexivity.
+Qed.
+Hint Resolve wt_reg_for: reloadty.
+
+Lemma wt_regs_for_rec:
+  forall locs itmps ftmps,
+  (List.length locs <= List.length itmps)%nat ->
+  (List.length locs <= List.length ftmps)%nat ->
+  (forall r, In r itmps -> mreg_type r = Tint) ->
+  (forall r, In r ftmps -> mreg_type r = Tfloat) ->
+  List.map mreg_type (regs_for_rec locs itmps ftmps) = List.map Loc.type locs.
+Proof.
+  induction locs; intros.
+  simpl. auto.
+  destruct itmps; simpl in H. omegaContradiction.
+  destruct ftmps; simpl in H0. omegaContradiction.
+  simpl. apply (f_equal2 (@cons typ)). 
+  destruct a. reflexivity. simpl. case (slot_type s).
+  apply H1; apply in_eq. apply H2; apply in_eq.
+  apply IHlocs. omega. omega. 
+  intros; apply H1; apply in_cons; auto.
+  intros; apply H2; apply in_cons; auto.
+Qed.
+
+Lemma wt_regs_for:
+  forall locs,
+  (List.length locs <= 3)%nat ->
+  List.map mreg_type (regs_for locs) = List.map Loc.type locs.
+Proof.
+  intros. unfold regs_for. apply wt_regs_for_rec.
+  simpl. auto. simpl. auto. 
+  simpl; intros; intuition; subst r; reflexivity.
+  simpl; intros; intuition; subst r; reflexivity.
+Qed.
+Hint Resolve wt_regs_for: reloadty.
+
+Hint Resolve length_op_args length_addr_args length_cond_args: reloadty.
+
+Hint Extern 4 (_ = _) => congruence : reloadty.
+
+Lemma wt_transf_instr:
+  forall f instr k,
+  LTLintyping.wt_instr (LTLin.fn_sig f) instr ->
+  wt_code (transf_function f) k ->
+  wt_code (transf_function f) (transf_instr f instr k).
+Proof.
+  Opaque reg_for regs_for.
+  intros. inv H; simpl; auto with reloadty.
+  caseEq (is_move_operation op args); intros.
+  destruct (is_move_operation_correct _ _ H). congruence.
+  assert (map mreg_type (regs_for args) = map Loc.type args).
+    eauto with reloadty.
+  assert (mreg_type (reg_for res) = Loc.type res). eauto with reloadty.
+  auto with reloadty.
+
+  assert (map mreg_type (regs_for args) = map Loc.type args).
+    eauto with reloadty.
+  assert (mreg_type (reg_for dst) = Loc.type dst). eauto with reloadty.
+  auto with reloadty.
+
+  caseEq (regs_for (src :: args)); intros.
+  red; simpl; tauto.
+  assert (map mreg_type (regs_for (src :: args)) = map Loc.type (src :: args)).
+    apply wt_regs_for. simpl. apply le_n_S. eauto with reloadty.
+  rewrite H in H5. injection H5; intros.
+  auto with reloadty.
+
+  assert (locs_valid (transf_function f) (loc_arguments sig)).
+    red; intros. generalize (loc_arguments_acceptable sig l H).
+    destruct l; simpl; auto. destruct s; simpl; intuition.
+  assert (locs_writable (loc_arguments sig)).
+    red; intros. generalize (loc_arguments_acceptable sig l H7).
+    destruct l; simpl; auto. destruct s; simpl; intuition.
+  assert (map Loc.type args = map Loc.type (loc_arguments sig)).
+    rewrite loc_arguments_type; auto.
+  assert (Loc.type res = mreg_type (loc_result sig)).
+    rewrite H3. unfold loc_result.
+    destruct (sig_res sig); auto. destruct t; auto.
+  destruct ros; auto 10 with reloadty.
+
+  assert (locs_valid (transf_function f) (loc_arguments sig)).
+    red; intros. generalize (loc_arguments_acceptable sig l H).
+    destruct l; simpl; auto. destruct s; simpl; intuition.
+  assert (locs_writable (loc_arguments sig)).
+    red; intros. generalize (loc_arguments_acceptable sig l H7).
+    destruct l; simpl; auto. destruct s; simpl; intuition.
+  assert (map Loc.type args = map Loc.type (loc_arguments sig)).
+    rewrite loc_arguments_type; auto.
+  destruct ros; auto 10 with reloadty.
+
+  assert (map mreg_type (regs_for args) = map Loc.type args).
+    eauto with reloadty.
+  auto with reloadty.
+
+  destruct optres; simpl in *; auto with reloadty.
+  apply wt_add_reload; auto with reloadty. 
+  unfold loc_result. rewrite <- H1.
+  destruct (Loc.type l); reflexivity.
+Qed.
+
+Lemma wt_transf_code:
+  forall f  c,
+  LTLintyping.wt_code (LTLin.fn_sig f) c ->
+  Lineartyping.wt_code (transf_function f) (transf_code f c).
+Proof.
+  induction c; simpl; intros.
+  red; simpl; tauto.
+  apply wt_transf_instr; auto with coqlib. 
+  apply IHc. red; auto with coqlib. 
+Qed.
+
+Lemma wt_transf_fundef:
+  forall fd,
+  LTLintyping.wt_fundef fd ->
+  Lineartyping.wt_fundef (transf_fundef fd).
+Proof.
+  intros. destruct fd; simpl.
+  inv H. inv H1. constructor. unfold wt_function. simpl. 
+  apply wt_parallel_move; auto with reloadty.
+    rewrite loc_parameters_type. auto.
+    unfold loc_parameters; red; intros.
+    destruct (list_in_map_inv _ _ _ H) as [r [A B]]. rewrite A.
+    generalize (loc_arguments_acceptable _ _ B). 
+    destruct r; simpl; auto. destruct s; try tauto.
+    intros; simpl. split. omega. 
+    apply loc_arguments_bounded; auto.
+  apply wt_transf_code; auto.
+  constructor.
+Qed.
+
+Lemma program_typing_preserved:
+  forall p,
+  LTLintyping.wt_program p ->
+  Lineartyping.wt_program (transf_program p).
+Proof.
+  intros; red; intros. 
+  destruct (transform_program_function _ _ _ _ H0) as [f0 [A B]].
+  subst f; apply wt_transf_fundef. eauto.
+Qed.
diff --git a/backend/Cmconstr.v b/backend/Selection.v
index 2cc947c7..c98e55e4 100644
--- a/backend/Cmconstr.v
+++ b/backend/Selection.v
@@ -1,47 +1,37 @@
-(** Smart constructors for Cminor.  This library provides functions
-  for building Cminor expressions and statements, especially expressions
-  consisting of operator applications.  These functions examine their
-  arguments to choose cheaper forms of operators whenever possible.
+(** Instruction selection *)
 
-  For instance, [add e1 e2] will return a Cminor expression semantically
-  equivalent to [Eop Oadd (e1 ::: e2 ::: Enil)], but will use a
-  [Oaddimm] operator if one of the arguments is an integer constant,
-  or suppress the addition altogether if one of the arguments is the
-  null integer.  In passing, we perform operator reassociation
-  ([(e + c1) * c2] becomes [(e * c2) + (c1 * c2)]) and a small amount
-  of constant propagation.
+(** The instruction selection pass recognizes opportunities for using
+  combined arithmetic and logical operations and addressing modes
+  offered by the target processor.  For instance, the expression [x + 1]
+  can take advantage of the "immediate add" instruction of the processor,
+  and on the PowerPC, the expression [(x >> 6) & 0xFF] can be turned
+  into a "rotate and mask" instruction.
 
-  In more general terms, the purpose of the smart constructors is twofold:
-- Perform instruction selection (for operators, loads, stores and
-  conditional expressions);
-- Abstract over processor dependencies in operators and addressing modes,
-  providing Cminor providers with processor-independent ways of constructing
-  Cminor terms.
-*)
+  Instruction selection proceeds by bottom-up rewriting over expressions.
+  The source language is Cminor and the target language is CminorSel. *)
 
 Require Import Coqlib.
-Require Import Compare_dec.
 Require Import Maps.
 Require Import AST.
 Require Import Integers.
 Require Import Floats.
 Require Import Values.
 Require Import Mem.
-Require Import Op.
 Require Import Globalenvs.
-Require Import Cminor.
+Require Cminor.
+Require Import Op.
+Require Import CminorSel.
 
-Infix ":::" := Econs (at level 60, right associativity) : cminor_scope.
+Infix ":::" := Econs (at level 60, right associativity) : selection_scope.
 
-Open Scope cminor_scope.
+Open Local Scope selection_scope.
 
 (** * Lifting of let-bound variables *)
 
-(** Some of the smart constructors, as well as the Cminor producers,
-  generate [Elet] constructs to share the evaluation of a subexpression.
-  Owing to the use of de Bruijn indices for let-bound variables,
-  we need to shift de Bruijn indices when an expression [b] is put
-  in a [Elet a b] context. *)
+(** Some of the instruction functions generate [Elet] constructs to
+  share the evaluation of a subexpression.  Owing to the use of de
+  Bruijn indices for let-bound variables, we need to shift de Bruijn
+  indices when an expression [b] is put in a [Elet a b] context. *)
 
 Fixpoint lift_expr (p: nat) (a: expr) {struct a}: expr :=
   match a with
@@ -79,12 +69,19 @@ Definition lift (a: expr): expr := lift_expr O a.
 
 (** * Smart constructors for operators *)
 
-Definition negint (e: expr) := Eop (Osubimm Int.zero) (e ::: Enil).
-Definition negfloat (e: expr) := Eop Onegf (e ::: Enil).
-Definition absfloat (e: expr) := Eop Oabsf (e ::: Enil).
-Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
-Definition floatofint (e: expr) := Eop Ofloatofint (e ::: Enil).
-Definition floatofintu (e: expr) := Eop Ofloatofintu (e ::: Enil).
+(** This section defines functions for building CminorSel expressions
+  and statements, especially expressions consisting of operator
+  applications.  These functions examine their arguments to choose
+  cheaper forms of operators whenever possible.
+
+  For instance, [add e1 e2] will return a CminorSel expression semantically
+  equivalent to [Eop Oadd (e1 ::: e2 ::: Enil)], but will use a
+  [Oaddimm] operator if one of the arguments is an integer constant,
+  or suppress the addition altogether if one of the arguments is the
+  null integer.  In passing, we perform operator reassociation
+  ([(e + c1) * c2] becomes [(e * c2) + (c1 * c2)]) and a small amount
+  of constant propagation.
+*)
 
 (** ** Integer logical negation *)
 
@@ -129,7 +126,7 @@ Inductive notint_cases: forall (e: expr), Set :=
       notint_cases e.
 
 (** We then define a classification function that takes an expression
-  and return in which case it falls.  Note that the catch-all case
+  and return the case in which it falls.  Note that the catch-all case
   [notint_default] does not state that it is mutually exclusive with
   the first three, more specific cases.  The classification function
   nonetheless chooses the specific cases in preference to the catch-all
@@ -174,7 +171,7 @@ Definition notint (e: expr) :=
 (** ** Boolean negation *)
 
 Definition notbool_base (e: expr) :=
-  Eop (Ocmp (Ccompuimm Ceq Int.zero)) (e ::: Enil).
+  Eop (Ocmp (Ccompimm Ceq Int.zero)) (e ::: Enil).
 
 Fixpoint notbool (e: expr) {struct e} : expr :=
   match e with
@@ -672,8 +669,6 @@ Definition or (e1: expr) (e2: expr) :=
       Eop Oor (e1:::e2:::Enil)
   end.
 
-Definition xor (e1 e2: expr) := Eop Oxor (e1:::e2:::Enil).
-
 (** ** General shifts *)
 
 Inductive shift_cases: forall (e1: expr), Set :=
@@ -700,9 +695,6 @@ Definition shl (e1: expr) (e2: expr) :=
       Eop Oshl (e1:::e2:::Enil)
   end.
 
-Definition shr (e1 e2: expr) :=
-  Eop Oshr (e1:::e2:::Enil).
-
 Definition shru (e1: expr) (e2: expr) :=
   match shift_match e2 with
   | shift_case1 n2 =>
@@ -791,9 +783,6 @@ Definition subf (e1: expr) (e2: expr) :=
       Eop Osubf (e1:::e2:::Enil)
   end.
 
-Definition mulf (e1 e2: expr) := Eop Omulf (e1:::e2:::Enil).
-Definition divf (e1 e2: expr) := Eop Odivf (e1:::e2:::Enil).
-
 (** ** Truncations and sign extensions *)
 
 Inductive cast8signed_cases: forall (e1: expr), Set :=
@@ -906,14 +895,7 @@ Definition singleoffloat (e: expr) :=
   | singleoffloat_default e1 => Eop Osingleoffloat (e1 ::: Enil)
   end.
 
-(** ** Comparisons and conditional expressions *)
-
-Definition cmp (c: comparison) (e1 e2: expr) :=
-  Eop (Ocmp (Ccomp c)) (e1:::e2:::Enil).
-Definition cmpu (c: comparison) (e1 e2: expr) :=
-  Eop (Ocmp (Ccompu c)) (e1:::e2:::Enil).
-Definition cmpf (c: comparison) (e1 e2: expr) :=
-  Eop (Ocmp (Ccompf c)) (e1:::e2:::Enil).
+(** ** Conditional expressions *)
 
 Fixpoint condexpr_of_expr (e: expr) : condexpr :=
   match e with
@@ -922,12 +904,9 @@ Fixpoint condexpr_of_expr (e: expr) : condexpr :=
   | Eop (Ocmp c) el => CEcond c el
   | Econdition e1 e2 e3 =>
       CEcondition e1 (condexpr_of_expr e2) (condexpr_of_expr e3)
-  | e => CEcond (Ccompuimm Cne Int.zero) (e:::Enil)
+  | e => CEcond (Ccompimm Cne Int.zero) (e:::Enil)
   end.
 
-Definition conditionalexpr (e1 e2 e3: expr) : expr :=
-  Econdition (condexpr_of_expr e1) e2 e3.
-
 (** ** Recognition of addressing modes for load and store operations *)
 
 (*
@@ -1005,7 +984,120 @@ Definition store (chunk: memory_chunk) (e1 e2: expr) :=
   | (mode, args) => Estore chunk mode args e2
   end.
 
-(** ** If-then-else statement *)
+(** * Translation from Cminor to CminorSel *)
+
+(** Instruction selection for operator applications *)
+
+Definition sel_constant (cst: Cminor.constant) : expr :=
+  match cst with
+  | Cminor.Ointconst n => Eop (Ointconst n) Enil
+  | Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil
+  | Cminor.Oaddrsymbol id ofs => Eop (Oaddrsymbol id ofs) Enil
+  | Cminor.Oaddrstack ofs => Eop (Oaddrstack ofs) Enil
+  end.
+
+Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
+  match op with
+  | Cminor.Ocast8unsigned => cast8unsigned arg 
+  | Cminor.Ocast8signed => cast8signed arg 
+  | Cminor.Ocast16unsigned => cast16unsigned arg 
+  | Cminor.Ocast16signed => cast16signed arg 
+  | Cminor.Onegint => Eop (Osubimm Int.zero) (arg ::: Enil)
+  | Cminor.Onotbool => notbool arg
+  | Cminor.Onotint => notint arg
+  | Cminor.Onegf => Eop Onegf (arg ::: Enil)
+  | Cminor.Oabsf => Eop Oabsf (arg ::: Enil)
+  | Cminor.Osingleoffloat => singleoffloat arg
+  | Cminor.Ointoffloat => Eop Ointoffloat (arg ::: Enil)
+  | Cminor.Ofloatofint => Eop Ofloatofint (arg ::: Enil)
+  | Cminor.Ofloatofintu => Eop Ofloatofintu (arg ::: Enil)
+  end.
+
+Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
+  match op with
+  | Cminor.Oadd => add arg1 arg2
+  | Cminor.Osub => sub arg1 arg2
+  | Cminor.Omul => mul arg1 arg2
+  | Cminor.Odiv => divs arg1 arg2
+  | Cminor.Odivu => divu arg1 arg2
+  | Cminor.Omod => mods arg1 arg2
+  | Cminor.Omodu => modu arg1 arg2
+  | Cminor.Oand => and arg1 arg2
+  | Cminor.Oor => or arg1 arg2
+  | Cminor.Oxor => Eop Oxor (arg1 ::: arg2 ::: Enil)
+  | Cminor.Oshl => shl arg1 arg2
+  | Cminor.Oshr => Eop Oshr (arg1 ::: arg2 ::: Enil)
+  | Cminor.Oshru => shru arg1 arg2
+  | Cminor.Oaddf => addf arg1 arg2
+  | Cminor.Osubf => subf arg1 arg2
+  | Cminor.Omulf => Eop Omulf (arg1 ::: arg2 ::: Enil)
+  | Cminor.Odivf => Eop Odivf (arg1 ::: arg2 ::: Enil)
+  | Cminor.Ocmp c => Eop (Ocmp (Ccomp c)) (arg1 ::: arg2 ::: Enil)
+  | Cminor.Ocmpu c => Eop (Ocmp (Ccompu c)) (arg1 ::: arg2 ::: Enil)
+  | Cminor.Ocmpf c => Eop (Ocmp (Ccompf c)) (arg1 ::: arg2 ::: Enil)
+  end.
+
+(** Conversion from Cminor expression to Cminorsel expressions *)
+
+Fixpoint sel_expr (a: Cminor.expr) : expr :=
+  match a with
+  | Cminor.Evar id => Evar id
+  | Cminor.Econst cst => sel_constant cst
+  | Cminor.Eunop op arg => sel_unop op (sel_expr arg)
+  | Cminor.Ebinop op arg1 arg2 => sel_binop op (sel_expr arg1) (sel_expr arg2)
+  | Cminor.Eload chunk addr => load chunk (sel_expr addr)
+  | Cminor.Estore chunk addr rhs => store chunk (sel_expr addr) (sel_expr rhs)
+  | Cminor.Ecall sg fn args => Ecall sg (sel_expr fn) (sel_exprlist args)
+  | Cminor.Econdition cond ifso ifnot =>
+      Econdition (condexpr_of_expr (sel_expr cond))
+                 (sel_expr ifso) (sel_expr ifnot)
+  | Cminor.Elet b c => Elet (sel_expr b) (sel_expr c)
+  | Cminor.Eletvar n => Eletvar n
+  | Cminor.Ealloc b => Ealloc (sel_expr b)
+  end
+
+with sel_exprlist (al: Cminor.exprlist) : exprlist :=
+  match al with
+  | Cminor.Enil => Enil
+  | Cminor.Econs a bl => Econs (sel_expr a) (sel_exprlist bl)
+  end.
+
+(** Conversion from Cminor statements to Cminorsel statements. *)
+
+Fixpoint sel_stmt (s: Cminor.stmt) : stmt :=
+  match s with
+  | Cminor.Sskip => Sskip
+  | Cminor.Sexpr e => Sexpr (sel_expr e)
+  | Cminor.Sassign id e => Sassign id (sel_expr e)
+  | Cminor.Sseq s1 s2 => Sseq (sel_stmt s1) (sel_stmt s2)
+  | Cminor.Sifthenelse e ifso ifnot =>
+      Sifthenelse (condexpr_of_expr (sel_expr e))
+                  (sel_stmt ifso) (sel_stmt ifnot)
+  | Cminor.Sloop body => Sloop (sel_stmt body)
+  | Cminor.Sblock body => Sblock (sel_stmt body)
+  | Cminor.Sexit n => Sexit n
+  | Cminor.Sswitch e cases dfl => Sswitch (sel_expr e) cases dfl
+  | Cminor.Sreturn None => Sreturn None
+  | Cminor.Sreturn (Some e) => Sreturn (Some (sel_expr e))
+  | Cminor.Stailcall sg fn args => 
+      Stailcall sg (sel_expr fn) (sel_exprlist args)
+  end.
+
+(** Conversion of functions and programs. *)
+
+Definition sel_function (f: Cminor.function) : function :=
+  mkfunction
+    f.(Cminor.fn_sig)
+    f.(Cminor.fn_params)
+    f.(Cminor.fn_vars)
+    f.(Cminor.fn_stackspace)
+    (sel_stmt f.(Cminor.fn_body)).
+
+Definition sel_fundef (f: Cminor.fundef) : fundef :=
+  transf_fundef sel_function f.
+
+Definition sel_program (p: Cminor.program) : program :=
+  transform_program sel_fundef p.
+
+
 
-Definition ifthenelse (e: expr) (ifso ifnot: stmt) : stmt :=
-  Sifthenelse (condexpr_of_expr e) ifso ifnot.
diff --git a/backend/Cmconstrproof.v b/backend/Selectionproof.v
index 35b3d8a0..e41765a7 100644
--- a/backend/Cmconstrproof.v
+++ b/backend/Selectionproof.v
@@ -1,15 +1,6 @@
-(** Correctness of the Cminor smart constructors.  This file states
-  evaluation rules for the smart constructors, for instance that [add
-  a b] evaluates to [Vint(Int.add i j)] if [a] evaluates to [Vint i]
-  and [b] to [Vint j].  It then proves that these rules are
-  admissible, that is, satisfied for all possible choices of [a] and
-  [b].  The Cminor producer can then use these evaluation rules
-  (theorems) to reason about the execution of terms produced by the
-  smart constructors.
-*)
+(** Correctness of instruction selection *)
 
 Require Import Coqlib.
-Require Import Compare_dec.
 Require Import Maps.
 Require Import AST.
 Require Import Integers.
@@ -17,14 +8,17 @@ Require Import Floats.
 Require Import Values.
 Require Import Mem.
 Require Import Events.
-Require Import Op.
 Require Import Globalenvs.
 Require Import Cminor.
-Require Import Cmconstr.
+Require Import Op.
+Require Import CminorSel.
+Require Import Selection.
+
+Open Local Scope selection_scope.
 
 Section CMCONSTR.
 
-Variable ge: Cminor.genv.
+Variable ge: genv.
 
 (** * Lifting of let-bound variables *)
 
@@ -150,19 +144,18 @@ Hint Resolve eval_lift: evalexpr.
 
 (** * Useful lemmas and tactics *)
 
-Ltac EvalOp := eapply eval_Eop; eauto with evalexpr.
-
-Ltac TrivialOp cstr :=
-  unfold cstr; intros; EvalOp.
-
 (** The following are trivial lemmas and custom tactics that help
   perform backward (inversion) and forward reasoning over the evaluation
   of operator applications. *)  
 
+Ltac EvalOp := eapply eval_Eop; eauto with evalexpr.
+
+Ltac TrivialOp cstr := unfold cstr; intros; EvalOp.
+
 Lemma inv_eval_Eop_0:
   forall sp le e m1 op t m2 v,
   eval_expr ge sp le e m1 (Eop op Enil) t m2 v ->
-  t = E0 /\ m2 = m1 /\ eval_operation ge sp op nil = Some v.
+  t = E0 /\ m2 = m1 /\ eval_operation ge sp op nil m1 = Some v.
 Proof.
   intros. inversion H. inversion H6. 
   intuition. congruence.
@@ -173,7 +166,7 @@ Lemma inv_eval_Eop_1:
   eval_expr ge sp le e m1 (Eop op (a1 ::: Enil)) t m2 v ->
   exists v1,
   eval_expr ge sp le e m1 a1 t m2 v1 /\
-  eval_operation ge sp op (v1 :: nil) = Some v.
+  eval_operation ge sp op (v1 :: nil) m2 = Some v.
 Proof.
   intros. 
   inversion H. inversion H6. inversion H18. 
@@ -187,7 +180,7 @@ Lemma inv_eval_Eop_2:
   eval_expr ge sp le e m1 a1 t1 m2 v1 /\
   eval_expr ge sp le e m2 a2 t2 m3 v2 /\
   t3 = t1 ** t2 /\
-  eval_operation ge sp op (v1 :: v2 :: nil) = Some v.
+  eval_operation ge sp op (v1 :: v2 :: nil) m3 = Some v.
 Proof.
   intros. 
   inversion H. subst. inversion H6. subst. inversion H8. subst.
@@ -229,9 +222,16 @@ Ltac SimplEval :=
 Ltac InvEval H :=
   generalize H; SimplEval; clear H.
 
-(** ** Admissible evaluation rules for the smart constructors *)
+(** * Correctness of the smart constructors *)
 
-(** All proofs follow a common pattern:
+(** We now show that the code generated by "smart constructor" functions
+  such as [Selection.notint] behaves as expected.  Continuing the
+  [notint] example, we show that if the expression [e]
+  evaluates to some integer value [Vint n], then [Selection.notint e]
+  evaluates to a value [Vint (Int.not n)] which is indeed the integer
+  negation of the value of [e].
+
+  All proofs follow a common pattern:
 - Reasoning by case over the result of the classification functions
   (such as [add_match] for integer addition), gathering additional
   information on the shape of the argument expressions in the non-default
@@ -244,55 +244,7 @@ Ltac InvEval H :=
   by the smart constructor.
 *)
 
-Theorem eval_negint:
-  forall sp le e m1 a t m2 x,
-  eval_expr ge sp le e m1 a t m2 (Vint x) ->
-  eval_expr ge sp le e m1 (negint a) t m2 (Vint (Int.neg x)).
-Proof.
-  TrivialOp negint. 
-Qed.
-
-Theorem eval_negfloat:
-  forall sp le e m1 a t m2 x,
-  eval_expr ge sp le e m1 a t m2 (Vfloat x) ->
-  eval_expr ge sp le e m1 (negfloat a) t m2 (Vfloat (Float.neg x)).
-Proof.
-  TrivialOp negfloat.
-Qed.
-
-Theorem eval_absfloat:
-  forall sp le e m1 a t m2 x,
-  eval_expr ge sp le e m1 a t m2 (Vfloat x) ->
-  eval_expr ge sp le e m1 (absfloat a) t m2 (Vfloat (Float.abs x)).
-Proof.
-  TrivialOp absfloat.
-Qed.
-
-Theorem eval_intoffloat:
-  forall sp le e m1 a t m2 x,
-  eval_expr ge sp le e m1 a t m2 (Vfloat x) ->
-  eval_expr ge sp le e m1 (intoffloat a) t m2 (Vint (Float.intoffloat x)).
-Proof.
-  TrivialOp intoffloat.
-Qed.
-
-Theorem eval_floatofint:
-  forall sp le e m1 a t m2 x,
-  eval_expr ge sp le e m1 a t m2 (Vint x) ->
-  eval_expr ge sp le e m1 (floatofint a) t m2 (Vfloat (Float.floatofint x)).
-Proof.
-  TrivialOp floatofint.
-Qed.
-
-Theorem eval_floatofintu:
-  forall sp le e m1 a t m2 x,
-  eval_expr ge sp le e m1 a t m2 (Vint x) ->
-  eval_expr ge sp le e m1 (floatofintu a) t m2 (Vfloat (Float.floatofintu x)).
-Proof.
-  TrivialOp floatofintu.
-Qed.
-
-Theorem eval_notint:
+Lemma eval_notint:
   forall sp le e m1 a t m2 x,
   eval_expr ge sp le e m1 a t m2 (Vint x) ->
   eval_expr ge sp le e m1 (notint a) t m2 (Vint (Int.not x)).
@@ -325,7 +277,7 @@ Qed.
 Hint Resolve Val.bool_of_true_val Val.bool_of_false_val
              Val.bool_of_true_val_inv Val.bool_of_false_val_inv: valboolof.
 
-Theorem eval_notbool:
+Lemma eval_notbool:
   forall a sp le e m1 t m2 v b,
   eval_expr ge sp le e m1 a t m2 v ->
   Val.bool_of_val v b ->
@@ -349,11 +301,11 @@ Proof.
 
   inversion H. subst. 
   simpl in H11. eapply eval_Eop; eauto.
-  simpl. caseEq (eval_condition c vl); intros.
+  simpl. caseEq (eval_condition c vl m2); intros.
   rewrite H1 in H11. 
   assert (b0 = b). 
   destruct b0; inversion H11; subst v; inversion H0; auto.
-  subst b0. rewrite (Op.eval_negate_condition _ _ H1). 
+  subst b0. rewrite (Op.eval_negate_condition _ _ _ H1). 
   destruct b; reflexivity.
   rewrite H1 in H11; discriminate.
 
@@ -364,7 +316,7 @@ Proof.
   destruct v4; eauto. auto.
 Qed.
 
-Theorem eval_addimm:
+Lemma eval_addimm:
   forall sp le e m1 n a t m2 x,
   eval_expr ge sp le e m1 a t m2 (Vint x) ->
   eval_expr ge sp le e m1 (addimm n a) t m2 (Vint (Int.add x n)).
@@ -382,7 +334,7 @@ Proof.
   EvalOp. 
 Qed. 
 
-Theorem eval_addimm_ptr:
+Lemma eval_addimm_ptr:
   forall sp le e m1 n t a m2 b ofs,
   eval_expr ge sp le e m1 a t m2 (Vptr b ofs) ->
   eval_expr ge sp le e m1 (addimm n a) t m2 (Vptr b (Int.add ofs n)).
@@ -404,7 +356,7 @@ Proof.
   EvalOp. 
 Qed.
 
-Theorem eval_add:
+Lemma eval_add:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -431,7 +383,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_add_ptr:
+Lemma eval_add_ptr:
   forall sp le e m1 a t1 m2 p x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vptr p x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -456,7 +408,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_add_ptr_2:
+Lemma eval_add_ptr_2:
   forall sp le e m1 a t1 m2 p x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vptr p y) ->
@@ -483,7 +435,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_sub:
+Lemma eval_sub:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -511,7 +463,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_sub_ptr_int:
+Lemma eval_sub_ptr_int:
   forall sp le e m1 a t1 m2 p x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vptr p x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -540,7 +492,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_sub_ptr_ptr:
+Lemma eval_sub_ptr_ptr:
   forall sp le e m1 a t1 m2 p x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vptr p x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vptr p y) ->
@@ -589,7 +541,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_shlimm:
+Lemma eval_shlimm:
   forall sp le e m1 a n t m2 x,
   eval_expr ge sp le e m1 a t m2 (Vint x) ->
   Int.ltu n (Int.repr 32) = true ->
@@ -603,7 +555,7 @@ Proof.
   apply eval_rolm. auto. symmetry. apply Int.shl_rolm. exact H0.
 Qed.
 
-Theorem eval_shruimm:
+Lemma eval_shruimm:
   forall sp le e m1 a n t m2 x,
   eval_expr ge sp le e m1 a t m2 (Vint x) ->
   Int.ltu n (Int.repr 32) = true ->
@@ -649,7 +601,7 @@ Proof.
   intros. EvalOp. 
 Qed.
 
-Theorem eval_mulimm:
+Lemma eval_mulimm:
   forall sp le e m1 a n t m2 x,
   eval_expr ge sp le e m1 a t m2 (Vint x) ->
   eval_expr ge sp le e m1 (mulimm n a) t m2 (Vint (Int.mul x n)).
@@ -669,7 +621,7 @@ Proof.
   apply eval_mulimm_base. assumption.
 Qed.
 
-Theorem eval_mul:
+Lemma eval_mul:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -683,7 +635,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_divs:
+Lemma eval_divs:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -696,9 +648,9 @@ Qed.
 
 Lemma eval_mod_aux:
   forall divop semdivop,
-  (forall sp x y,
+  (forall sp x y m,
    y <> Int.zero ->
-   eval_operation ge sp divop (Vint x :: Vint y :: nil) =
+   eval_operation ge sp divop (Vint x :: Vint y :: nil) m =
    Some (Vint (semdivop x y))) ->
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
@@ -725,7 +677,7 @@ Proof.
   reflexivity. traceEq.
 Qed.
 
-Theorem eval_mods:
+Lemma eval_mods:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -750,7 +702,7 @@ Proof.
   predSpec Int.eq Int.eq_spec y Int.zero. contradiction. auto.
 Qed.
 
-Theorem eval_divu:
+Lemma eval_divu:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -767,7 +719,7 @@ Proof.
   eapply eval_divu_base; eauto.
 Qed.
 
-Theorem eval_modu:
+Lemma eval_modu:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -788,7 +740,7 @@ Proof.
   eexact H. eexact H0. auto. auto.
 Qed.
 
-Theorem eval_andimm:
+Lemma eval_andimm:
   forall sp le e m1 n a t m2 x,
   eval_expr ge sp le e m1 a t m2 (Vint x) ->
   eval_expr ge sp le e m1 (andimm n a) t m2 (Vint (Int.and x n)).
@@ -798,7 +750,7 @@ Proof.
   EvalOp. 
 Qed.
 
-Theorem eval_and:
+Lemma eval_and:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -848,14 +800,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_xor:
-  forall sp le e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
-  eval_expr ge sp le e m1 (xor a b) (t1**t2) m3 (Vint (Int.xor x y)).
-Proof. TrivialOp xor. Qed.
-
-Theorem eval_shl:
+Lemma eval_shl:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -867,17 +812,7 @@ Proof.
   EvalOp. simpl. rewrite H1. auto.
 Qed.
 
-Theorem eval_shr:
-  forall sp le e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
-  Int.ltu y (Int.repr 32) = true ->
-  eval_expr ge sp le e m1 (shr a b) (t1**t2) m3 (Vint (Int.shr x y)).
-Proof.
-  TrivialOp shr. simpl. rewrite H1. auto.
-Qed.
-
-Theorem eval_shru:
+Lemma eval_shru:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
@@ -889,7 +824,7 @@ Proof.
   EvalOp. simpl. rewrite H1. auto.
 Qed.
 
-Theorem eval_addf:
+Lemma eval_addf:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vfloat x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vfloat y) ->
@@ -908,7 +843,7 @@ Proof.
   EvalOp.
 Qed.
  
-Theorem eval_subf:
+Lemma eval_subf:
   forall sp le e m1 a t1 m2 x b t2 m3 y,
   eval_expr ge sp le e m1 a t1 m2 (Vfloat x) ->
   eval_expr ge sp le e m2 b t2 m3 (Vfloat y) ->
@@ -921,21 +856,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_mulf:
-  forall sp le e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vfloat x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vfloat y) ->
-  eval_expr ge sp le e m1 (mulf a b) (t1**t2) m3 (Vfloat (Float.mul x y)).
-Proof. TrivialOp mulf. Qed.
-
-Theorem eval_divf:
-  forall sp le e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vfloat x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vfloat y) ->
-  eval_expr ge sp le e m1 (divf a b) (t1**t2) m3 (Vfloat (Float.div x y)).
-Proof. TrivialOp divf. Qed.
-
-Theorem eval_cast8signed:
+Lemma eval_cast8signed:
   forall sp le e m1 a t m2 v,
   eval_expr ge sp le e m1 a t m2 v ->
   eval_expr ge sp le e m1 (cast8signed a) t m2 (Val.cast8signed v).
@@ -946,7 +867,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_cast8unsigned:
+Lemma eval_cast8unsigned:
   forall sp le e m1 a t m2 v,
   eval_expr ge sp le e m1 a t m2 v ->
   eval_expr ge sp le e m1 (cast8unsigned a) t m2 (Val.cast8unsigned v).
@@ -957,7 +878,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_cast16signed:
+Lemma eval_cast16signed:
   forall sp le e m1 a t m2 v,
   eval_expr ge sp le e m1 a t m2 v ->
   eval_expr ge sp le e m1 (cast16signed a) t m2 (Val.cast16signed v).
@@ -968,7 +889,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_cast16unsigned:
+Lemma eval_cast16unsigned:
   forall sp le e m1 a t m2 v,
   eval_expr ge sp le e m1 a t m2 v ->
   eval_expr ge sp le e m1 (cast16unsigned a) t m2 (Val.cast16unsigned v).
@@ -979,7 +900,7 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_singleoffloat:
+Lemma eval_singleoffloat:
   forall sp le e m1 a t m2 v,
   eval_expr ge sp le e m1 a t m2 v ->
   eval_expr ge sp le e m1 (singleoffloat a) t m2 (Val.singleoffloat v).
@@ -990,75 +911,12 @@ Proof.
   EvalOp.
 Qed.
 
-Theorem eval_cmp:
-  forall sp le c e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
-  eval_expr ge sp le e m1 (cmp c a b) (t1**t2) m3 (Val.of_bool (Int.cmp c x y)).
-Proof. 
-  TrivialOp cmp. 
-  simpl. case (Int.cmp c x y); auto.
-Qed.
-
-Theorem eval_cmp_null_r:
-  forall sp le c e m1 a t1 m2 p x b t2 m3 v,
-  eval_expr ge sp le e m1 a t1 m2 (Vptr p x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vint Int.zero) ->
-  (c = Ceq /\ v = Vfalse) \/ (c = Cne /\ v = Vtrue) ->
-  eval_expr ge sp le e m1 (cmp c a b) (t1**t2) m3 v.
-Proof. 
-  TrivialOp cmp. 
-  simpl. elim H1; intros [EQ1 EQ2]; subst c; subst v; reflexivity.
-Qed.
-
-Theorem eval_cmp_null_l:
-  forall sp le c e m1 a t1 m2 p x b t2 m3 v,
-  eval_expr ge sp le e m1 a t1 m2 (Vint Int.zero) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vptr p x) ->
-  (c = Ceq /\ v = Vfalse) \/ (c = Cne /\ v = Vtrue) ->
-  eval_expr ge sp le e m1 (cmp c a b) (t1**t2) m3 v.
-Proof. 
-  TrivialOp cmp. 
-  simpl. elim H1; intros [EQ1 EQ2]; subst c; subst v; reflexivity.
-Qed.
-
-Theorem eval_cmp_ptr:
-  forall sp le c e m1 a t1 m2 p x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vptr p x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vptr p y) ->
-  eval_expr ge sp le e m1 (cmp c a b) (t1**t2) m3 (Val.of_bool (Int.cmp c x y)).
-Proof. 
-  TrivialOp cmp. 
-  simpl. unfold eq_block. rewrite zeq_true. 
-  case (Int.cmp c x y); auto.
-Qed.
-
-Theorem eval_cmpu:
-  forall sp le c e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vint x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vint y) ->
-  eval_expr ge sp le e m1 (cmpu c a b) (t1**t2) m3 (Val.of_bool (Int.cmpu c x y)).
-Proof. 
-  TrivialOp cmpu. 
-  simpl. case (Int.cmpu c x y); auto.
-Qed.
-
-Theorem eval_cmpf:
-  forall sp le c e m1 a t1 m2 x b t2 m3 y,
-  eval_expr ge sp le e m1 a t1 m2 (Vfloat x) ->
-  eval_expr ge sp le e m2 b t2 m3 (Vfloat y) ->
-  eval_expr ge sp le e m1 (cmpf c a b) (t1**t2) m3 (Val.of_bool (Float.cmp c x y)).
-Proof. 
-  TrivialOp cmpf. 
-  simpl. case (Float.cmp c x y); auto.
-Qed.
-
 Lemma eval_base_condition_of_expr:
   forall sp le a e m1 t m2 v (b: bool),
   eval_expr ge sp le e m1 a t m2 v ->
   Val.bool_of_val v b ->
   eval_condexpr ge sp le e m1 
-                (CEcond (Ccompuimm Cne Int.zero) (a ::: Enil))
+                (CEcond (Ccompimm Cne Int.zero) (a ::: Enil))
                 t m2 b.
 Proof.
   intros. 
@@ -1090,30 +948,6 @@ Proof.
   destruct v1; eauto with evalexpr.
 Qed.
 
-Theorem eval_conditionalexpr_true:
-  forall sp le e m1 a1 t1 m2 v1 t2 a2 m3 v2 a3,
-  eval_expr ge sp le e m1 a1 t1 m2 v1 ->
-  Val.is_true v1 ->
-  eval_expr ge sp le e m2 a2 t2 m3 v2 ->
-  eval_expr ge sp le e m1 (conditionalexpr a1 a2 a3) (t1**t2) m3 v2.
-Proof.
-  intros; unfold conditionalexpr.
-  apply eval_Econdition with t1 m2 true t2; auto.
-  eapply eval_condition_of_expr; eauto with valboolof.
-Qed.
-
-Theorem eval_conditionalexpr_false:
-  forall sp le e m1 a1 t1 m2 v1 a2 t2 m3 v2 a3,
-  eval_expr ge sp le e m1 a1 t1 m2 v1 ->
-  Val.is_false v1 ->
-  eval_expr ge sp le e m2 a3 t2 m3 v2 ->
-  eval_expr ge sp le e m1 (conditionalexpr a1 a2 a3) (t1**t2) m3 v2.
-Proof.
-  intros; unfold conditionalexpr.
-  apply eval_Econdition with t1 m2 false t2; auto.
-  eapply eval_condition_of_expr; eauto with valboolof.
-Qed.
-
 Lemma eval_addressing:
   forall sp le e m1 a t m2 v b ofs,
   eval_expr ge sp le e m1 a t m2 v ->
@@ -1150,7 +984,7 @@ Proof.
     subst v. simpl. rewrite Int.add_zero. auto.
 Qed.
 
-Theorem eval_load:
+Lemma eval_load:
   forall sp le e m1 a t m2 v chunk v',
   eval_expr ge sp le e m1 a t m2 v ->
   Mem.loadv chunk m2 v = Some v' ->
@@ -1163,7 +997,7 @@ Proof.
   eapply eval_Eload; eauto. 
 Qed.
 
-Theorem eval_store:
+Lemma eval_store:
   forall sp le e m1 a1 t1 m2 v1 a2 t2 m3 v2 chunk m4,
   eval_expr ge sp le e m1 a1 t1 m2 v1 ->
   eval_expr ge sp le e m2 a2 t2 m3 v2 ->
@@ -1177,31 +1011,230 @@ Proof.
   eapply eval_Estore; eauto. 
 Qed.
 
-Theorem exec_ifthenelse_true:
-  forall sp e m1 a t1 m2 v ifso ifnot t2 e3 m3 out,
-  eval_expr ge sp nil e m1 a t1 m2 v ->
-  Val.is_true v ->
-  exec_stmt ge sp e m2 ifso t2 e3 m3 out ->
-  exec_stmt ge sp e m1 (ifthenelse a ifso ifnot) (t1**t2) e3 m3 out.
+(** * Correctness of instruction selection for operators *)
+
+(** We now prove a semantic preservation result for the [sel_unop]
+  and [sel_binop] selection functions.  The proof exploits
+  the results of the previous section. *)
+
+Lemma eval_sel_unop:
+  forall sp le e m op a1 t m1 v1 v,
+  eval_expr ge sp le e m a1 t m1 v1 ->
+  eval_unop op v1 = Some v ->
+  eval_expr ge sp le e m (sel_unop op a1) t m1 v.
 Proof.
-  intros. unfold ifthenelse.
-  apply exec_Sifthenelse with t1 m2 true t2.
-  eapply eval_condition_of_expr; eauto with valboolof.
-  auto. auto.
+  destruct op; simpl; intros; FuncInv; try subst v.
+  apply eval_cast8unsigned; auto.
+  apply eval_cast8signed; auto.
+  apply eval_cast16unsigned; auto.
+  apply eval_cast16signed; auto.
+  EvalOp. 
+  generalize (Int.eq_spec i Int.zero). destruct (Int.eq i Int.zero); intro.
+  change true with (negb false). eapply eval_notbool; eauto. subst i; constructor.
+  change false with (negb true). eapply eval_notbool; eauto. constructor; auto.
+  change Vfalse with (Val.of_bool (negb true)).
+  eapply eval_notbool; eauto. constructor.
+  apply eval_notint; auto.
+  EvalOp.
+  EvalOp.
+  apply eval_singleoffloat; auto.
+  EvalOp.
+  EvalOp.
+  EvalOp.
 Qed.
 
-Theorem exec_ifthenelse_false:
-  forall sp e m1 a t1 m2 v ifso ifnot t2 e3 m3 out,
-  eval_expr ge sp nil e m1 a t1 m2 v ->
-  Val.is_false v ->
-  exec_stmt ge sp e m2 ifnot t2 e3 m3 out ->
-  exec_stmt ge sp e m1 (ifthenelse a ifso ifnot) (t1**t2) e3 m3 out.
+Lemma eval_sel_binop:
+  forall sp le e m op a1 a2 t1 m1 v1 t2 m2 v2 v,
+  eval_expr ge sp le e m a1 t1 m1 v1 ->
+  eval_expr ge sp le e m1 a2 t2 m2 v2 ->
+  eval_binop op v1 v2 m2 = Some v ->
+  eval_expr ge sp le e m (sel_binop op a1 a2) (t1 ** t2) m2 v.
 Proof.
-  intros. unfold ifthenelse.
-  apply exec_Sifthenelse with t1 m2 false t2.
-  eapply eval_condition_of_expr; eauto with valboolof.
-  auto. auto.
+  destruct op; simpl; intros; FuncInv; try subst v.
+  eapply eval_add; eauto.
+  eapply eval_add_ptr_2; eauto.
+  eapply eval_add_ptr; eauto.
+  eapply eval_sub; eauto.
+  eapply eval_sub_ptr_int; eauto.
+  destruct (eq_block b b0); inv H1. 
+  eapply eval_sub_ptr_ptr; eauto.
+  eapply eval_mul; eauto.
+  generalize (Int.eq_spec i0 Int.zero). intro. destruct (Int.eq i0 Int.zero); inv H1.
+  eapply eval_divs; eauto.
+  generalize (Int.eq_spec i0 Int.zero). intro. destruct (Int.eq i0 Int.zero); inv H1.
+  eapply eval_divu; eauto.
+  generalize (Int.eq_spec i0 Int.zero). intro. destruct (Int.eq i0 Int.zero); inv H1.
+  eapply eval_mods; eauto.
+  generalize (Int.eq_spec i0 Int.zero). intro. destruct (Int.eq i0 Int.zero); inv H1.
+  eapply eval_modu; eauto.
+  eapply eval_and; eauto.
+  eapply eval_or; eauto.
+  EvalOp.
+  caseEq (Int.ltu i0 (Int.repr 32)); intro; rewrite H2 in H1; inv H1.
+  eapply eval_shl; eauto.
+  EvalOp.
+  caseEq (Int.ltu i0 (Int.repr 32)); intro; rewrite H2 in H1; inv H1.
+  eapply eval_shru; eauto.
+  eapply eval_addf; eauto.
+  eapply eval_subf; eauto.
+  EvalOp.
+  EvalOp.
+  EvalOp. simpl. destruct (Int.cmp c i i0); auto. 
+  EvalOp. simpl. generalize H1; unfold eval_compare_null, Cminor.eval_compare_null.
+  destruct (Int.eq i Int.zero). destruct c; intro EQ; inv EQ; auto.
+  auto.
+  EvalOp. simpl. generalize H1; unfold eval_compare_null, Cminor.eval_compare_null.
+  destruct (Int.eq i0 Int.zero). destruct c; intro EQ; inv EQ; auto.
+  auto.
+  EvalOp. simpl. 
+  destruct (valid_pointer m2 b (Int.signed i) && valid_pointer m2 b0 (Int.signed i0)).
+  destruct (eq_block b b0); inv H1. 
+  destruct (Int.cmp c i i0); auto.
+  auto.
+  EvalOp. simpl. destruct (Int.cmpu c i i0); auto.
+  EvalOp. simpl. destruct (Float.cmp c f f0); auto.
 Qed.
 
 End CMCONSTR.
 
+(** * Semantic preservation for instruction selection. *)
+
+Section PRESERVATION.
+
+Variable prog: Cminor.program.
+Let tprog := sel_program prog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+(** Relationship between the global environments for the original
+  CminorSel program and the generated RTL program. *)
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  intros; unfold ge, tge, tprog, sel_program. 
+  apply Genv.find_symbol_transf.
+Qed.
+
+Lemma functions_translated:
+  forall (v: val) (f: Cminor.fundef),
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (sel_fundef f).
+Proof.  
+  intros.
+  exact (Genv.find_funct_transf sel_fundef H).
+Qed.
+
+Lemma function_ptr_translated:
+  forall (b: block) (f: Cminor.fundef),
+  Genv.find_funct_ptr ge b = Some f ->
+  Genv.find_funct_ptr tge b = Some (sel_fundef f).
+Proof.  
+  intros. 
+  exact (Genv.find_funct_ptr_transf sel_fundef H).
+Qed.
+
+Lemma sig_function_translated:
+  forall f,
+  funsig (sel_fundef f) = Cminor.funsig f.
+Proof.
+  intros. destruct f; reflexivity.
+Qed.
+
+(** This is the main semantic preservation theorem:
+  instruction selection preserves the semantics of function invocations.
+  The proof is an induction over the Cminor evaluation derivation. *)
+
+Lemma sel_function_correct:
+  forall m fd vargs t m' vres,
+  Cminor.eval_funcall ge m fd vargs t m' vres ->
+  CminorSel.eval_funcall tge m (sel_fundef fd) vargs t m' vres.
+Proof.
+  apply (Cminor.eval_funcall_ind4 ge
+          (fun sp le e m a t m' v => eval_expr tge sp le e m (sel_expr a) t m' v)
+          (fun sp le e m a t m' v => eval_exprlist tge sp le e m (sel_exprlist a) t m' v)
+          (fun m fd vargs t m' vres => eval_funcall tge m (sel_fundef fd) vargs t m' vres)
+          (fun sp e m s t e' m' out => exec_stmt tge sp e m (sel_stmt s) t e' m' out));
+  intros; simpl.
+  (* Evar *)
+  constructor; auto.
+  (* Econst *)
+  destruct cst; simpl; simpl in H; (econstructor; [constructor|simpl;auto]).
+  rewrite symbols_preserved. auto.
+  (* Eunop *)
+  eapply eval_sel_unop; eauto.
+  (* Ebinop *)
+  subst t. eapply eval_sel_binop; eauto.
+  (* Eload *)
+  eapply eval_load; eauto.
+  (* Estore *)
+  subst t. eapply eval_store; eauto.
+  (* Ecall *)
+  econstructor; eauto. apply functions_translated; auto.
+  rewrite <- H4. apply sig_function_translated.
+  (* Econdition *)
+  econstructor; eauto. eapply eval_condition_of_expr; eauto. 
+  destruct b1; auto.
+  (* Elet *)
+  econstructor; eauto.
+  (* Eletvar *)
+  constructor; auto.
+  (* Ealloc *)
+  econstructor; eauto.
+  (* Enil *)
+  constructor.
+  (* Econs *)
+  econstructor; eauto.
+  (* Internal function *)
+  econstructor; eauto. 
+  (* External function *)
+  econstructor; eauto.
+  (* Sskip *)
+  constructor.
+  (* Sexpr *)
+  econstructor; eauto.
+  (* Sassign *)
+  econstructor; eauto.
+  (* Sifthenelse *)
+  econstructor; eauto. eapply eval_condition_of_expr; eauto.
+  destruct b1; auto.
+  (* Sseq *)
+  eapply exec_Sseq_continue; eauto.
+  eapply exec_Sseq_stop; eauto.
+  (* Sloop *)
+  eapply exec_Sloop_loop; eauto.
+  eapply exec_Sloop_stop; eauto.
+  (* Sblock *)
+  econstructor; eauto.
+  (* Sexit *)
+  constructor.
+  (* Sswitch *)
+  econstructor; eauto.
+  (* Sreturn *)
+  constructor.
+  econstructor; eauto.
+  (* Stailcall *)
+  econstructor; eauto. apply functions_translated; auto.
+  rewrite <- H4. apply sig_function_translated.
+Qed.
+
+End PRESERVATION.
+
+(** As a corollary, instruction selection preserves the observable
+   behaviour of programs. *)
+
+Theorem sel_program_correct:
+  forall prog t r,
+  Cminor.exec_program prog t r ->
+  CminorSel.exec_program (sel_program prog) t r.
+Proof.
+  intros.
+  destruct H as [b [f [m [FINDS [FINDF [SIG EXEC]]]]]].
+  exists b; exists (sel_fundef f); exists m.
+  split. simpl. rewrite <- FINDS. apply symbols_preserved.
+  split. apply function_ptr_translated. auto.
+  split. rewrite <- SIG. apply sig_function_translated. 
+  replace (Genv.init_mem (sel_program prog)) with (Genv.init_mem prog).
+  apply sel_function_correct; auto.
+  symmetry. unfold sel_program. apply Genv.init_mem_transf.
+Qed.
diff --git a/backend/Stacking.v b/backend/Stacking.v
index de350dc1..c19e293f 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -2,13 +2,14 @@
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Op.
 Require Import RTL.
 Require Import Locations.
 Require Import Linear.
-Require Import Lineartyping.
+Require Import Bounds.
 Require Import Mach.
 Require Import Conventions.
 
@@ -87,43 +88,61 @@ Definition offset_of_index (fe: frame_env) (idx: frame_index) :=
   store in the frame the values of callee-save registers used by the
   current function. *)
 
-Definition save_int_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) :=
-  let i := index_int_callee_save cs in
-  if zlt i fe.(fe_num_int_callee_save)
-  then Msetstack cs (Int.repr (offset_of_index fe (FI_saved_int i))) Tint :: k
+Definition save_callee_save_reg
+    (bound: frame_env -> Z) (number: mreg -> Z) (mkindex: Z -> frame_index)
+    (ty: typ) (fe: frame_env) (cs: mreg) (k: Mach.code) :=
+  let i := number cs in
+  if zlt i (bound fe)
+  then Msetstack cs (Int.repr (offset_of_index fe (mkindex i))) ty :: k
   else k.
 
-Definition save_float_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) :=
-  let i := index_float_callee_save cs in
-  if zlt i fe.(fe_num_float_callee_save)
-  then Msetstack cs (Int.repr (offset_of_index fe (FI_saved_float i))) Tfloat :: k
-  else k.
+Definition save_callee_save_regs
+    (bound: frame_env -> Z) (number: mreg -> Z) (mkindex: Z -> frame_index)
+    (ty: typ) (fe: frame_env) (csl: list mreg) (k: Mach.code) :=
+  List.fold_right (save_callee_save_reg bound number mkindex ty fe) k csl.
+
+Definition save_callee_save_int (fe: frame_env) :=
+  save_callee_save_regs 
+    fe_num_int_callee_save index_int_callee_save FI_saved_int
+    Tint fe int_callee_save_regs.
+
+Definition save_callee_save_float (fe: frame_env) :=
+  save_callee_save_regs
+    fe_num_float_callee_save index_float_callee_save FI_saved_float 
+    Tfloat fe float_callee_save_regs.
 
 Definition save_callee_save (fe: frame_env) (k: Mach.code) :=
-  List.fold_right (save_int_callee_save fe)
-    (List.fold_right (save_float_callee_save fe) k float_callee_save_regs)
-    int_callee_save_regs.
+  save_callee_save_int fe (save_callee_save_float fe k).
 
 (** [restore_callee_save fe k] adds before [k] the instructions that
   re-load from the frame the values of callee-save registers used by the
   current function, restoring these registers to their initial values. *)
 
-Definition restore_int_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) :=
-  let i := index_int_callee_save cs in
-  if zlt i fe.(fe_num_int_callee_save)
-  then Mgetstack (Int.repr (offset_of_index fe (FI_saved_int i))) Tint cs :: k
+Definition restore_callee_save_reg
+    (bound: frame_env -> Z) (number: mreg -> Z) (mkindex: Z -> frame_index)
+    (ty: typ) (fe: frame_env) (cs: mreg) (k: Mach.code) :=
+  let i := number cs in
+  if zlt i (bound fe)
+  then Mgetstack (Int.repr (offset_of_index fe (mkindex i))) ty cs :: k
   else k.
 
-Definition restore_float_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) :=
-  let i := index_float_callee_save cs in
-  if zlt i fe.(fe_num_float_callee_save)
-  then Mgetstack (Int.repr (offset_of_index fe (FI_saved_float i))) Tfloat cs :: k
-  else k.
+Definition restore_callee_save_regs
+    (bound: frame_env -> Z) (number: mreg -> Z) (mkindex: Z -> frame_index)
+    (ty: typ) (fe: frame_env) (csl: list mreg) (k: Mach.code) :=
+  List.fold_right (restore_callee_save_reg bound number mkindex ty fe) k csl.
+
+Definition restore_callee_save_int (fe: frame_env) :=
+  restore_callee_save_regs 
+    fe_num_int_callee_save index_int_callee_save FI_saved_int
+    Tint fe int_callee_save_regs.
+
+Definition restore_callee_save_float (fe: frame_env) :=
+  restore_callee_save_regs
+    fe_num_float_callee_save index_float_callee_save FI_saved_float 
+    Tfloat fe float_callee_save_regs.
 
 Definition restore_callee_save (fe: frame_env) (k: Mach.code) :=
-  List.fold_right (restore_int_callee_save fe)
-    (List.fold_right (restore_float_callee_save fe) k float_callee_save_regs)
-    int_callee_save_regs.
+  restore_callee_save_int fe (restore_callee_save_float fe k).
 
 (** * Code transformation. *)
 
@@ -186,6 +205,8 @@ Definition transl_instr
       Mstore chunk (transl_addr fe addr) args src :: k
   | Lcall sig ros =>
       Mcall sig ros :: k
+  | Ltailcall sig ros =>
+      restore_callee_save fe (Mtailcall sig ros :: k)
   | Lalloc =>
       Malloc :: k
   | Llabel lbl =>
@@ -214,18 +235,23 @@ Definition transl_code
 Definition transl_body (f: Linear.function) (fe: frame_env) :=
   save_callee_save fe (transl_code fe f.(Linear.fn_code)).
 
-Definition transf_function (f: Linear.function) : option Mach.function :=
+Open Local Scope string_scope.
+
+Definition transf_function (f: Linear.function) : res Mach.function :=
   let fe := make_env (function_bounds f) in
-  if zlt f.(Linear.fn_stacksize) 0 then None else
-  if zlt (- Int.min_signed) fe.(fe_size) then None else
-  Some (Mach.mkfunction
+  if zlt f.(Linear.fn_stacksize) 0 then
+    Error (msg "Stacking.transf_function") 
+  else if zlt (- Int.min_signed) fe.(fe_size) then
+    Error (msg "Too many spilled variables, stack size exceeded")
+  else
+    OK (Mach.mkfunction
          f.(Linear.fn_sig)
          (transl_body f fe)
          f.(Linear.fn_stacksize)
          fe.(fe_size)).
 
-Definition transf_fundef (f: Linear.fundef) : option Mach.fundef :=
+Definition transf_fundef (f: Linear.fundef) : res Mach.fundef :=
   AST.transf_partial_fundef transf_function f.
 
-Definition transf_program (p: Linear.program) : option Mach.program :=
+Definition transf_program (p: Linear.program) : res Mach.program :=
   transform_partial_program transf_fundef p.
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 3bc4339b..905570ef 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -1,15 +1,16 @@
 (** Correctness proof for the translation from Linear to Mach. *)
 
 (** This file proves semantic preservation for the [Stacking] pass.
-  For the target language Mach, we use the alternate semantics
+  For the target language Mach, we use the abstract semantics
   given in file [Machabstr], where a part of the activation record
   is not resident in memory.  Combined with the semantic equivalence
-  result between the two Mach semantics (see file [Machabstr2mach]),
+  result between the two Mach semantics (see file [Machabstr2concr]),
   the proof in this file also shows semantic preservation with
-  respect to the standard Mach semantics. *)
+  respect to the concrete Mach semantics. *)
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
@@ -17,11 +18,13 @@ Require Import Op.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Locations.
-Require Import Mach.
-Require Import Machabstr.
 Require Import Linear.
 Require Import Lineartyping.
+Require Import Mach.
+Require Import Machabstr.
+Require Import Bounds.
 Require Import Conventions.
 Require Import Stacking.
 
@@ -29,25 +32,28 @@ Require Import Stacking.
 
 (** ``Good variable'' properties for frame accesses. *)
 
+Lemma typesize_typesize:
+  forall ty, AST.typesize ty = 4 * Locations.typesize ty.
+Proof.
+  destruct ty; auto.
+Qed.
+
 Lemma get_slot_ok:
   forall fr ty ofs,
-  0 <= ofs -> fr.(low) + ofs + 4 * typesize ty <= 0 ->
+  24 <= ofs -> fr.(fr_low) + ofs + 4 * typesize ty <= 0 ->
   exists v, get_slot fr ty ofs v.
 Proof.
-  intros. exists (load_contents (mem_type ty) fr.(contents) (fr.(low) + ofs)).
-  constructor; auto. 
+  intros. rewrite <- typesize_typesize in H0.
+  exists (fr.(fr_contents) ty (fr.(fr_low) + ofs)). constructor; auto. 
 Qed.
 
 Lemma set_slot_ok:
   forall fr ty ofs v,
-  fr.(high) = 0 -> 0 <= ofs -> fr.(low) + ofs + 4 * typesize ty <= 0 ->
+  24 <= ofs -> fr.(fr_low) + ofs + 4 * typesize ty <= 0 ->
   exists fr', set_slot fr ty ofs v fr'.
 Proof.
-  intros.
-  exists (mkblock fr.(low) fr.(high)
-           (store_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) v)
-           (set_slot_undef_outside fr ty ofs v H H0 H1 fr.(undef_outside))).
-  constructor; auto. 
+  intros. rewrite <- typesize_typesize in H0.
+  econstructor. constructor; eauto.
 Qed.
 
 Lemma slot_gss: 
@@ -55,10 +61,45 @@ Lemma slot_gss:
   set_slot fr1 ty ofs v fr2 ->
   get_slot fr2 ty ofs v.
 Proof.
-  intros. induction H. 
-  constructor.
-  auto.  simpl.  auto. 
-  simpl. symmetry. apply load_store_contents_same. 
+  intros. inv H. constructor; auto.
+  simpl. destruct (typ_eq ty ty); try congruence.
+  rewrite zeq_true. auto.
+Qed.
+
+Remark frame_update_gso:
+  forall fr ty ofs v ty' ofs',
+  ofs' + 4 * typesize ty' <= ofs \/ ofs + 4 * typesize ty <= ofs' ->
+  fr_contents (update ty ofs v fr) ty' ofs' = fr_contents fr ty' ofs'.
+Proof.
+  intros.
+  generalize (typesize_pos ty); intro.
+  generalize (typesize_pos ty'); intro.
+  simpl. rewrite zeq_false. 2: omega.
+  repeat rewrite <- typesize_typesize in H.
+  destruct (zle (ofs' + AST.typesize ty') ofs). auto.
+  destruct (zle (ofs + AST.typesize ty) ofs'). auto.
+  omegaContradiction.
+Qed.
+
+Remark frame_update_overlap:
+  forall fr ty ofs v ty' ofs',
+  ofs <> ofs' ->
+  ofs' + 4 * typesize ty' > ofs -> ofs + 4 * typesize ty > ofs' ->
+  fr_contents (update ty ofs v fr) ty' ofs' = Vundef.
+Proof.
+  intros. simpl. rewrite zeq_false; auto.
+  rewrite <- typesize_typesize in H0. 
+  rewrite <- typesize_typesize in H1.
+  repeat rewrite zle_false; auto.
+Qed.
+
+Remark frame_update_mismatch:
+  forall fr ty ofs v ty',
+  ty <> ty' ->
+  fr_contents (update ty ofs v fr) ty' ofs = Vundef.
+Proof.
+  intros. simpl. rewrite zeq_true. 
+  destruct (typ_eq ty ty'); congruence.
 Qed.
 
 Lemma slot_gso:
@@ -68,38 +109,36 @@ Lemma slot_gso:
   ofs' + 4 * typesize ty' <= ofs \/ ofs + 4 * typesize ty <= ofs' ->
   get_slot fr2 ty' ofs' v'.
 Proof.
-  intros. induction H; inversion H0.
-  constructor.
-  auto.  simpl low. auto.
-  simpl. rewrite H3. symmetry. apply load_store_contents_other. 
-  repeat (rewrite size_mem_type). omega. 
+  intros. inv H. inv H0.
+  constructor; auto.
+  symmetry. simpl fr_low. apply frame_update_gso. omega.
 Qed.
 
 Lemma slot_gi:
   forall f ofs ty,
-  0 <= ofs -> (init_frame f).(low) + ofs + 4 * typesize ty <= 0 ->
+  24 <= ofs -> fr_low (init_frame f) + ofs + 4 * typesize ty <= 0 ->
   get_slot (init_frame f) ty ofs Vundef.
 Proof.
-  intros. constructor.
-  auto. auto. 
-  symmetry. apply load_contents_init. 
+  intros. rewrite <- typesize_typesize in H0. constructor; auto.
 Qed.
 
 Section PRESERVATION.
 
 Variable prog: Linear.program.
 Variable tprog: Mach.program.
-Hypothesis TRANSF: transf_program prog = Some tprog.
+Hypothesis TRANSF: transf_program prog = OK tprog.
 Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
+
 Section FRAME_PROPERTIES.
 
+Variable stack: list Machabstr.stackframe.
 Variable f: Linear.function.
 Let b := function_bounds f.
 Let fe := make_env b.
 Variable tf: Mach.function.
-Hypothesis TRANSF_F: transf_function f = Some tf.
+Hypothesis TRANSF_F: transf_function f = OK tf.
 
 Lemma unfold_transf_function:
   tf = Mach.mkfunction
@@ -109,7 +148,7 @@ Lemma unfold_transf_function:
          fe.(fe_size).
 Proof.
   generalize TRANSF_F. unfold transf_function.
-  case (zlt (fn_stacksize f) 0). intros; discriminate.
+  case (zlt (Linear.fn_stacksize f) 0). intros; discriminate.
   case (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))).
   intros; discriminate.
   intros. unfold fe. unfold b. congruence.
@@ -118,7 +157,7 @@ Qed.
 Lemma size_no_overflow: fe.(fe_size) <= -Int.min_signed.
 Proof.
   generalize TRANSF_F. unfold transf_function.
-  case (zlt (fn_stacksize f) 0). intros; discriminate.
+  case (zlt (Linear.fn_stacksize f) 0). intros; discriminate.
   case (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))).
   intros; discriminate.
   intros. unfold fe, b. omega.
@@ -131,7 +170,7 @@ Definition index_valid (idx: frame_index) :=
   match idx with
   | FI_local x Tint => 0 <= x < b.(bound_int_local)
   | FI_local x Tfloat => 0 <= x < b.(bound_float_local)
-  | FI_arg x ty => 6 <= x /\ x + typesize ty <= b.(bound_outgoing)
+  | FI_arg x ty => 14 <= x /\ x + typesize ty <= b.(bound_outgoing)
   | FI_saved_int x => 0 <= x < b.(bound_int_callee_save)
   | FI_saved_float x => 0 <= x < b.(bound_float_callee_save)
   end.
@@ -166,17 +205,12 @@ Proof.
 Qed.
 
 Ltac AddPosProps :=
-  assert (bound_int_local b >= 0);
-  [unfold b; apply bound_int_local_pos |
-  assert (bound_float_local b >= 0);
-  [unfold b; apply bound_float_local_pos |
-  assert (bound_int_callee_save b >= 0);
-  [unfold b; apply bound_int_callee_save_pos |
-  assert (bound_float_callee_save b >= 0);
-  [unfold b; apply bound_float_callee_save_pos |
-  assert (bound_outgoing b >= 6); 
-  [unfold b; apply bound_outgoing_pos |
-   generalize align_float_part; intro]]]]].
+  generalize (bound_int_local_pos b); intro;
+  generalize (bound_float_local_pos b); intro;
+  generalize (bound_int_callee_save_pos b); intro;
+  generalize (bound_float_callee_save_pos b); intro;
+  generalize (bound_outgoing_pos b); intro;
+  generalize align_float_part; intro.
 
 Lemma size_pos: fe.(fe_size) >= 24.
 Proof.
@@ -212,18 +246,18 @@ Qed.
 
 Lemma index_local_valid:
   forall ofs ty,
-  slot_bounded f (Local ofs ty) ->
+  slot_within_bounds f b (Local ofs ty) ->
   index_valid (FI_local ofs ty).
 Proof.
-  unfold slot_bounded, index_valid. auto.
+  unfold slot_within_bounds, index_valid. auto.
 Qed.
 
 Lemma index_arg_valid:
   forall ofs ty,
-  slot_bounded f (Outgoing ofs ty) ->
+  slot_within_bounds f b (Outgoing ofs ty) ->
   index_valid (FI_arg ofs ty).
 Proof.
-  unfold slot_bounded, index_valid, b. auto.
+  unfold slot_within_bounds, index_valid. auto.
 Qed.
 
 Lemma index_saved_int_valid:
@@ -280,10 +314,8 @@ Lemma offset_of_index_no_overflow:
 Proof.
   intros.
   generalize (offset_of_index_valid idx H). intros [A B].
-(* omega falls flat on its face... *)
   apply Int.signed_repr.
-  split. apply Zle_trans with 24. compute; intro; discriminate.
-  auto.
+  split. apply Zle_trans with 24; auto. compute; intro; discriminate.
   assert (offset_of_index fe idx < fe_size fe).
     generalize (typesize_pos (type_of_index idx)); intro. omega.
   apply Zlt_succ_le. 
@@ -295,26 +327,30 @@ Qed.
   instructions, in case the offset is computed with [offset_of_index]. *)
 
 Lemma exec_Mgetstack':
-  forall sp parent idx ty c rs fr dst m v,
+  forall sp idx ty c rs fr dst m v,
   index_valid idx ->
   get_slot fr ty (offset_of_index fe idx) v ->
-  Machabstr.exec_instrs tge tf sp parent
-    (Mgetstack (Int.repr (offset_of_index fe idx)) ty dst :: c) rs fr m
-    E0 c (rs#dst <- v) fr m.
+  step tge 
+     (State stack tf sp
+            (Mgetstack (Int.repr (offset_of_index fe idx)) ty dst :: c)
+            rs fr m)
+  E0 (State stack tf sp c (rs#dst <- v) fr m).
 Proof.
-  intros. apply Machabstr.exec_one. apply Machabstr.exec_Mgetstack.
+  intros. apply exec_Mgetstack.
   rewrite offset_of_index_no_overflow. auto. auto.
 Qed.
 
 Lemma exec_Msetstack':
-  forall sp parent idx ty c rs fr src m fr',
+  forall sp idx ty c rs fr src m fr',
   index_valid idx ->
   set_slot fr ty (offset_of_index fe idx) (rs src) fr' ->
-  Machabstr.exec_instrs tge tf sp parent
-    (Msetstack src (Int.repr (offset_of_index fe idx)) ty :: c) rs fr m
-    E0 c rs fr' m.
+  step tge
+     (State stack tf sp
+           (Msetstack src (Int.repr (offset_of_index fe idx)) ty :: c) 
+           rs fr m)
+  E0 (State stack tf sp c rs fr' m).
 Proof.
-  intros. apply Machabstr.exec_one. apply Machabstr.exec_Msetstack.
+  intros. apply exec_Msetstack.
   rewrite offset_of_index_no_overflow. auto. auto.
 Qed.
 
@@ -323,44 +359,42 @@ Qed.
   function. *)
 
 Definition index_val (idx: frame_index) (fr: frame) :=
-  load_contents (mem_type (type_of_index idx))
-                fr.(contents)
-                (fr.(low) + offset_of_index fe idx).
+  fr.(fr_contents) (type_of_index idx) (fr.(fr_low) + offset_of_index fe idx).
 
 Lemma get_slot_index:
   forall fr idx ty v,
   index_valid idx ->
-  fr.(low) = - fe.(fe_size) ->
+  fr.(fr_low) = -fe.(fe_size) ->
   ty = type_of_index idx ->
   v = index_val idx fr ->
   get_slot fr ty (offset_of_index fe idx) v.
 Proof.
   intros. subst v; subst ty.
   generalize (offset_of_index_valid idx H); intros [A B].
-  unfold index_val. apply get_slot_intro. omega. 
-  rewrite H0. omega. auto.
+  rewrite <- typesize_typesize in B. 
+  unfold index_val. apply get_slot_intro; auto.
+  rewrite H0. omega.
 Qed.
 
 Lemma set_slot_index:
   forall fr idx v,
   index_valid idx ->
-  fr.(high) = 0 ->
-  fr.(low) = - fe.(fe_size) ->
+  fr.(fr_low) = -fe.(fe_size) ->
   exists fr', set_slot fr (type_of_index idx) (offset_of_index fe idx) v fr'.
 Proof.
   intros. 
   generalize (offset_of_index_valid idx H); intros [A B].
-  apply set_slot_ok. auto. omega. rewrite H1; omega.
+  apply set_slot_ok; auto. rewrite H0. omega.
 Qed.
 
 (** Alternate ``good variable'' properties for [get_slot] and [set_slot]. *)
+
 Lemma slot_iss:
   forall fr idx v fr',
   set_slot fr (type_of_index idx) (offset_of_index fe idx) v fr' ->
   index_val idx fr' = v.
 Proof.
-  intros. inversion H. subst ofs ty.
-  unfold index_val; simpl. apply load_store_contents_same.
+  intros. exploit slot_gss; eauto. intro. inv H0. auto.
 Qed.
 
 Lemma slot_iso:
@@ -371,19 +405,20 @@ Lemma slot_iso:
   index_val idx' fr' = index_val idx' fr.
 Proof.
   intros. generalize (offset_of_index_disj idx idx' H1 H2 H0). intro.
-  unfold index_val. inversion H. subst ofs ty. simpl. 
-  apply load_store_contents_other. 
-  repeat rewrite size_mem_type. omega.
+  inv H. unfold index_val. simpl fr_low. apply frame_update_gso.
+  omega.
 Qed.
 
 (** * Agreement between location sets and Mach environments *)
 
-(** The following [agree] predicate expresses semantic agreement between
-  a location set on the Linear side and, on the Mach side,
-  a register set, plus the current and parent frames, plus the register
-  set [rs0] at function entry. *)
+(** The following [agree] predicate expresses semantic agreement between:
+- on the Linear side, the current location set [ls] and the location
+  set at function entry [ls0];
+- on the Mach side, a register set [rs] plus the current and parent frames
+  [fr] and [parent]. 
+*)
 
-Record agree (ls: locset) (rs: regset) (fr parent: frame) (rs0: regset) : Prop :=
+Record agree (ls: locset) (rs: regset) (fr parent: frame) (ls0: locset) : Prop :=
   mk_agree {
     (** Machine registers have the same values on the Linear and Mach sides. *)
     agree_reg:
@@ -393,25 +428,22 @@ Record agree (ls: locset) (rs: regset) (fr parent: frame) (rs0: regset) : Prop :
         have the same values they had at function entry.  In other terms,
         these registers are never assigned. *)
     agree_unused_reg:
-      forall r, ~(mreg_bounded f r) -> rs r = rs0 r;
+      forall r, ~(mreg_within_bounds b r) -> rs r = ls0 (R r);
 
-    (** The bounds of the current frame are [[- fe.(fe_size), 0]]. *)
-    agree_high:
-      fr.(high) = 0;
+    (** The low bound of the current frame is [- fe.(fe_size)]. *)
     agree_size:
-      fr.(low) = - fe.(fe_size);
+      fr.(fr_low) = -fe.(fe_size);
 
     (** Local and outgoing stack slots (on the Linear side) have
         the same values as the one loaded from the current Mach frame 
         at the corresponding offsets. *)
-
     agree_locals:
       forall ofs ty, 
-      slot_bounded f (Local ofs ty) ->
+      slot_within_bounds f b (Local ofs ty) ->
       ls (S (Local ofs ty)) = index_val (FI_local ofs ty) fr;
     agree_outgoing:
       forall ofs ty, 
-      slot_bounded f (Outgoing ofs ty) ->
+      slot_within_bounds f b (Outgoing ofs ty) ->
       ls (S (Outgoing ofs ty)) = index_val (FI_arg ofs ty) fr;
 
     (** Incoming stack slots (on the Linear side) have
@@ -419,7 +451,7 @@ Record agree (ls: locset) (rs: regset) (fr parent: frame) (rs0: regset) : Prop :
         at the corresponding offsets. *)
     agree_incoming:
       forall ofs ty,
-      slot_bounded f (Incoming ofs ty) ->
+      slot_within_bounds f b (Incoming ofs ty) ->
       get_slot parent ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Incoming ofs ty)));
 
     (** The areas of the frame reserved for saving used callee-save
@@ -429,35 +461,33 @@ Record agree (ls: locset) (rs: regset) (fr parent: frame) (rs0: regset) : Prop :
       forall r,
       In r int_callee_save_regs ->
       index_int_callee_save r < b.(bound_int_callee_save) ->
-      index_val (FI_saved_int (index_int_callee_save r)) fr = rs0 r;
+      index_val (FI_saved_int (index_int_callee_save r)) fr = ls0 (R r);
     agree_saved_float:
       forall r,
       In r float_callee_save_regs ->
       index_float_callee_save r < b.(bound_float_callee_save) ->
-      index_val (FI_saved_float (index_float_callee_save r)) fr = rs0 r
+      index_val (FI_saved_float (index_float_callee_save r)) fr = ls0 (R r)
   }.
 
-Hint Resolve agree_reg agree_unused_reg agree_size agree_high
+Hint Resolve agree_reg agree_unused_reg agree_size 
              agree_locals agree_outgoing agree_incoming
              agree_saved_int agree_saved_float: stacking.
 
 (** Values of registers and register lists. *)
 
 Lemma agree_eval_reg:
-  forall ls rs fr parent rs0 r,
-  agree ls rs fr parent rs0 -> rs r = ls (R r).
+  forall ls rs fr parent ls0 r,
+  agree ls rs fr parent ls0 -> rs r = ls (R r).
 Proof.
   intros. symmetry. eauto with stacking.
 Qed.
 
 Lemma agree_eval_regs:
-  forall ls rs fr parent rs0 rl,
-  agree ls rs fr parent rs0 -> rs##rl = LTL.reglist rl ls.
+  forall ls rs fr parent ls0 rl,
+  agree ls rs fr parent ls0 -> rs##rl = reglist ls rl.
 Proof.
   induction rl; simpl; intros.
-  auto. apply (f_equal2 (@cons val)).
-  eapply agree_eval_reg; eauto.
-  auto.
+  auto. f_equal. eapply agree_eval_reg; eauto. auto.
 Qed.
 
 Hint Resolve agree_eval_reg agree_eval_regs: stacking.
@@ -466,10 +496,10 @@ Hint Resolve agree_eval_reg agree_eval_regs: stacking.
   of machine registers, of local slots, of outgoing slots. *)
 
 Lemma agree_set_reg:
-  forall ls rs fr parent rs0 r v,
-  agree ls rs fr parent rs0 ->
-  mreg_bounded f r ->
-  agree (Locmap.set (R r) v ls) (Regmap.set r v rs) fr parent rs0.
+  forall ls rs fr parent ls0 r v,
+  agree ls rs fr parent ls0 ->
+  mreg_within_bounds b r ->
+  agree (Locmap.set (R r) v ls) (Regmap.set r v rs) fr parent ls0.
 Proof.
   intros. constructor; eauto with stacking.
   intros. case (mreg_eq r r0); intro.
@@ -484,25 +514,22 @@ Proof.
 Qed.
 
 Lemma agree_set_local:
-  forall ls rs fr parent rs0 v ofs ty,
-  agree ls rs fr parent rs0 ->
-  slot_bounded f (Local ofs ty) ->
+  forall ls rs fr parent ls0 v ofs ty,
+  agree ls rs fr parent ls0 ->
+  slot_within_bounds f b (Local ofs ty) ->
   exists fr',
     set_slot fr ty (offset_of_index fe (FI_local ofs ty)) v fr' /\
-    agree (Locmap.set (S (Local ofs ty)) v ls) rs fr' parent rs0.
+    agree (Locmap.set (S (Local ofs ty)) v ls) rs fr' parent ls0.
 Proof.
   intros.
   generalize (set_slot_index fr _ v (index_local_valid _ _ H0) 
-                (agree_high _ _ _ _ _ H)
                 (agree_size _ _ _ _ _ H)).
   intros [fr' SET].
   exists fr'. split. auto. constructor; eauto with stacking.
   (* agree_reg *)
   intros. rewrite Locmap.gso. eauto with stacking. red; auto.
-  (* agree_high *)
-  inversion SET. simpl high. eauto with stacking.
   (* agree_size *)
-  inversion SET. simpl low. eauto with stacking.
+  inversion SET. rewrite H3; simpl fr_low. eauto with stacking.
   (* agree_local *)
   intros. case (slot_eq (Local ofs ty) (Local ofs0 ty0)); intro.
   rewrite <- e. rewrite Locmap.gss. 
@@ -517,7 +544,7 @@ Proof.
   (* agree_outgoing *)
   intros. rewrite Locmap.gso. transitivity (index_val (FI_arg ofs0 ty0) fr).
   eauto with stacking. symmetry. eapply slot_iso; eauto.
-  red; auto. red; auto.
+  red; auto. red; auto. 
   (* agree_incoming *)
   intros. rewrite Locmap.gso. eauto with stacking. red. auto.
   (* agree_saved_int *)
@@ -529,30 +556,27 @@ Proof.
 Qed.
 
 Lemma agree_set_outgoing:
-  forall ls rs fr parent rs0 v ofs ty,
-  agree ls rs fr parent rs0 ->
-  slot_bounded f (Outgoing ofs ty) ->
+  forall ls rs fr parent ls0 v ofs ty,
+  agree ls rs fr parent ls0 ->
+  slot_within_bounds f b (Outgoing ofs ty) ->
   exists fr',
     set_slot fr ty (offset_of_index fe (FI_arg ofs ty)) v fr' /\
-    agree (Locmap.set (S (Outgoing ofs ty)) v ls) rs fr' parent rs0.
+    agree (Locmap.set (S (Outgoing ofs ty)) v ls) rs fr' parent ls0.
 Proof.
   intros. 
   generalize (set_slot_index fr _ v (index_arg_valid _ _ H0)
-                (agree_high _ _ _ _ _ H)
                 (agree_size _ _ _ _ _ H)).
   intros [fr' SET].
   exists fr'. split. exact SET. constructor; eauto with stacking.
   (* agree_reg *)
   intros. rewrite Locmap.gso. eauto with stacking. red; auto.
-  (* agree_high *)
-  inversion SET. simpl high. eauto with stacking.
   (* agree_size *)
-  inversion SET. simpl low. eauto with stacking.
+  inversion SET. subst fr'; simpl fr_low. eauto with stacking.
   (* agree_local *)
   intros. rewrite Locmap.gso. 
   transitivity (index_val (FI_local ofs0 ty0) fr).
   eauto with stacking. symmetry. eapply slot_iso; eauto.
-  red; auto. red; auto.
+  red; auto. red; auto. 
   (* agree_outgoing *)
   intros. unfold Locmap.set. 
   case (Loc.eq (S (Outgoing ofs ty)) (S (Outgoing ofs0 ty0))); intro.
@@ -562,18 +586,12 @@ Proof.
   congruence. congruence.
   (* overlapping locations *)
   caseEq (Loc.overlap (S (Outgoing ofs ty)) (S (Outgoing ofs0 ty0))); intros.
-  inversion SET. subst ofs1 ty1.
-  unfold index_val, type_of_index, offset_of_index.
-  set (ofs4 := 4 * ofs). set (ofs04 := 4 * ofs0). simpl.
-  unfold ofs4, ofs04. symmetry. 
-  case (zeq ofs ofs0); intro.
-  subst ofs0. apply load_store_contents_mismatch.
+  inv SET. unfold index_val, type_of_index, offset_of_index.
+  destruct (zeq ofs ofs0).
+  subst ofs0. symmetry. apply frame_update_mismatch. 
   destruct ty; destruct ty0; simpl; congruence.
-  generalize (Loc.overlap_not_diff _ _ H2). intro. simpl in H4.
-  apply load_store_contents_overlap. 
-  omega.
-  rewrite size_mem_type. omega.
-  rewrite size_mem_type. omega.
+  generalize (Loc.overlap_not_diff _ _ H2). intro. simpl in H5.
+  simpl fr_low. symmetry. apply frame_update_overlap. omega. omega. omega.
   (* different locations *)
   transitivity (index_val (FI_arg ofs0 ty0) fr).
   eauto with stacking.
@@ -588,17 +606,17 @@ Proof.
   intros. rewrite <- (agree_saved_float _ _ _ _ _ H r H1 H2). 
   eapply slot_iso; eauto with stacking. red; auto.
 Qed.
-
+(*
 Lemma agree_return_regs:
-  forall ls rs fr parent rs0 ls' rs',
-  agree ls rs fr parent rs0 ->
+  forall ls rs fr parent ls0 ls' rs',
+  agree ls rs fr parent ls0 ->
   (forall r,
     In (R r) temporaries \/ In (R r) destroyed_at_call ->
     rs' r = ls' (R r)) ->
   (forall r,
     In r int_callee_save_regs \/ In r float_callee_save_regs ->
     rs' r = rs r) ->
-  agree (LTL.return_regs ls ls') rs' fr parent rs0.
+  agree (LTL.return_regs ls ls') rs' fr parent ls0.
 Proof.
   intros. constructor; unfold LTL.return_regs; eauto with stacking.
   (* agree_reg *)
@@ -610,12 +628,117 @@ Proof.
   generalize (register_classification r); tauto.
   (* agree_unused_reg *)
   intros. rewrite H1. eauto with stacking.
-  generalize H2; unfold mreg_bounded; case (mreg_type r); intro.
+  generalize H2; unfold mreg_within_bounds; case (mreg_type r); intro.
   left. apply index_int_callee_save_pos2. 
-  generalize (bound_int_callee_save_pos f); intro. omega.
+  generalize (bound_int_callee_save_pos b); intro. omega.
   right. apply index_float_callee_save_pos2. 
-  generalize (bound_float_callee_save_pos f); intro. omega.
+  generalize (bound_float_callee_save_pos b); intro. omega.
 Qed. 
+*)
+
+Lemma agree_return_regs:
+  forall ls rs fr parent ls0 rs',
+  agree ls rs fr parent ls0 ->
+  (forall r,
+    ~In r int_callee_save_regs -> ~In r float_callee_save_regs ->
+    rs' r = rs r) ->
+  (forall r,
+    In r int_callee_save_regs \/ In r float_callee_save_regs ->
+    rs' r = ls0 (R r)) ->
+  (forall r, LTL.return_regs ls0 ls (R r) = rs' r).
+Proof.
+  intros; unfold LTL.return_regs.
+  case (In_dec Loc.eq (R r) temporaries); intro.
+  rewrite H0. eapply agree_reg; eauto. 
+    apply int_callee_save_not_destroyed; auto.
+    apply float_callee_save_not_destroyed; auto.
+  case (In_dec Loc.eq (R r) destroyed_at_call); intro.
+  rewrite H0. eapply agree_reg; eauto.
+    apply int_callee_save_not_destroyed; auto.
+    apply float_callee_save_not_destroyed; auto.
+  symmetry; apply H1.
+  generalize (register_classification r); tauto.
+Qed.
+
+(** Agreement over callee-save registers and stack locations *)
+
+Definition agree_callee_save (ls1 ls2: locset) : Prop :=
+  forall l,
+  match l with
+  | R r => In r int_callee_save_regs \/ In r float_callee_save_regs
+  | S s => True
+  end ->
+  ls2 l = ls1 l.
+
+Remark mreg_not_within_bounds:
+  forall r,
+  ~mreg_within_bounds b r -> In r int_callee_save_regs \/ In r float_callee_save_regs.
+Proof.
+  intro r; unfold mreg_within_bounds.
+  destruct (mreg_type r); intro.
+  left. apply index_int_callee_save_pos2. 
+  generalize (bound_int_callee_save_pos b). omega.
+  right. apply index_float_callee_save_pos2. 
+  generalize (bound_float_callee_save_pos b). omega.
+Qed.
+
+Lemma agree_callee_save_agree:
+  forall ls rs fr parent ls1 ls2,
+  agree ls rs fr parent ls1 ->
+  agree_callee_save ls1 ls2 ->
+  agree ls rs fr parent ls2.
+Proof.
+  intros. inv H. constructor; auto.
+  intros. rewrite agree_unused_reg0; auto.
+  symmetry. apply H0. apply mreg_not_within_bounds; auto.
+  intros. rewrite (H0 (R r)); auto. 
+  intros. rewrite (H0 (R r)); auto. 
+Qed.
+
+Lemma agree_callee_save_return_regs:
+  forall ls1 ls2,
+  agree_callee_save (LTL.return_regs ls1 ls2) ls1.
+Proof.
+  intros; red; intros.
+  unfold LTL.return_regs. destruct l; auto.
+  generalize (int_callee_save_not_destroyed m); intro.
+  generalize (float_callee_save_not_destroyed m); intro.
+  destruct (In_dec Loc.eq (R m) temporaries). tauto.
+  destruct (In_dec Loc.eq (R m) destroyed_at_call). tauto.
+  auto.
+Qed.
+
+Lemma agree_callee_save_set_result:
+  forall ls1 ls2 v sg,
+  agree_callee_save ls1 ls2 ->
+  agree_callee_save (Locmap.set (R (Conventions.loc_result sg)) v ls1) ls2.
+Proof.
+  intros; red; intros. rewrite H; auto. 
+  symmetry; apply Locmap.gso. destruct l; simpl; auto.
+  red; intro. subst m. elim (loc_result_not_callee_save _ H0).
+Qed.
+
+(** A variant of [agree] used for return frames. *)
+
+Definition agree_frame (ls: locset) (fr parent: frame) (ls0: locset) : Prop :=
+  exists rs, agree ls rs fr parent ls0.
+
+Lemma agree_frame_agree:
+  forall ls1 ls2 rs fr parent ls0,
+  agree_frame ls1 fr parent ls0 ->
+  agree_callee_save ls2 ls1 ->
+  (forall r, rs r = ls2 (R r)) ->
+  agree ls2 rs fr parent ls0.
+Proof.
+  intros. destruct H as [rs' AG]. inv AG.
+  constructor; auto.
+  intros. rewrite <- agree_unused_reg0; auto.
+  rewrite <- agree_reg0. rewrite H1. symmetry; apply H0.
+  apply mreg_not_within_bounds; auto.
+  intros. rewrite <- H0; auto.
+  intros. rewrite <- H0; auto.
+  intros. rewrite <- H0; auto.
+Qed.
 
 (** * Correctness of saving and restoring of callee-save registers *)
 
@@ -624,87 +747,100 @@ Qed.
   the register save areas of the current frame do contain the
   values of the callee-save registers used by the function. *)
 
-Lemma save_int_callee_save_correct_rec:
-  forall l k sp parent rs fr m,
-  incl l int_callee_save_regs ->
+Section SAVE_CALLEE_SAVE.
+
+Variable bound: frame_env -> Z.
+Variable number: mreg -> Z.
+Variable mkindex: Z -> frame_index.
+Variable ty: typ.
+Variable sp: val.
+Variable csregs: list mreg.
+Hypothesis number_inj: 
+  forall r1 r2, In r1 csregs -> In r2 csregs -> r1 <> r2 -> number r1 <> number r2.
+Hypothesis mkindex_valid:
+  forall r, In r csregs -> number r < bound fe -> index_valid (mkindex (number r)).
+Hypothesis mkindex_typ:
+  forall z, type_of_index (mkindex z) = ty.
+Hypothesis mkindex_inj:
+  forall z1 z2, z1 <> z2 -> mkindex z1 <> mkindex z2.
+Hypothesis mkindex_diff:
+  forall r idx,
+  idx <> mkindex (number r) -> index_diff (mkindex (number r)) idx.
+
+Lemma save_callee_save_regs_correct:
+  forall l k rs fr m,
+  incl l csregs ->
   list_norepet l ->
-  fr.(high) = 0 ->
-  fr.(low) = -fe.(fe_size) ->
+  fr.(fr_low) = -fe.(fe_size) ->
   exists fr',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (save_int_callee_save fe) k l) rs fr m
-       E0 k rs fr' m
-  /\ fr'.(high) = 0
-  /\ fr'.(low) = -fe.(fe_size)
+    star step tge 
+       (State stack tf sp
+         (save_callee_save_regs bound number mkindex ty fe l k) rs fr m)
+    E0 (State stack tf sp k rs fr' m)
+  /\ fr'.(fr_low) = - fe.(fe_size)
   /\ (forall r,
-       In r l -> index_int_callee_save r < bound_int_callee_save b ->
-       index_val (FI_saved_int (index_int_callee_save r)) fr' = rs r)
+       In r l -> number r < bound fe ->
+       index_val (mkindex (number r)) fr' = rs r)
   /\ (forall idx,
        index_valid idx ->
        (forall r,
-         In r l -> index_int_callee_save r < bound_int_callee_save b ->
-         idx <> FI_saved_int (index_int_callee_save r)) ->
+         In r l -> number r < bound fe -> idx <> mkindex (number r)) ->
        index_val idx fr' = index_val idx fr).
 Proof.
-  induction l.
+  induction l; intros; simpl save_callee_save_regs.
   (* base case *)
-  intros. simpl fold_right. exists fr. 
-  split. apply Machabstr.exec_refl. split. auto. split. auto. 
-  split. intros. elim H3. auto.
+  exists fr. split. apply star_refl. split. auto. 
+  split. intros. elim H2. auto.
   (* inductive case *)
-  intros. simpl fold_right. 
-  set (k1 := fold_right (save_int_callee_save fe) k l) in *.
-  assert (R1: incl l int_callee_save_regs). eauto with coqlib.
+  set (k1 := save_callee_save_regs bound number mkindex ty fe l k).
+  assert (R1: incl l csregs). eauto with coqlib.
   assert (R2: list_norepet l). inversion H0; auto.
-  unfold save_int_callee_save. 
-  case (zlt (index_int_callee_save a) (fe_num_int_callee_save fe));
-  intro;
-  unfold fe_num_int_callee_save, fe, make_env in z.
+  unfold save_callee_save_reg.
+  destruct (zlt (number a) (bound fe)).
   (* a store takes place *)
-  assert (IDX: index_valid (FI_saved_int (index_int_callee_save a))).
-    apply index_saved_int_valid. eauto with coqlib. auto.
-  generalize (set_slot_index _ _ (rs a) IDX H1 H2).
+  assert (VALID: index_valid (mkindex (number a))).
+    apply mkindex_valid. auto with coqlib. auto.
+  exploit set_slot_index; eauto.
   intros [fr1 SET].
-  assert (R3: high fr1 = 0). inversion SET. simpl high. auto.
-  assert (R4: low fr1 = -fe_size fe).  inversion SET. simpl low. auto.
-  generalize (IHl k sp parent rs fr1 m R1 R2 R3 R4).
-  intros [fr' [A [B [C [D E]]]]].
-  exists fr'. 
-  split. eapply Machabstr.exec_trans. apply exec_Msetstack'; eauto with stacking.
+  exploit (IHl k rs fr1 m); auto. inv SET; auto. 
+  fold k1. intros [fr' [A [B [C D]]]].
+  exists fr'.
+  split. eapply star_left.
+  apply exec_Msetstack'; eauto with stacking. rewrite <- (mkindex_typ (number a)). eauto.
   eexact A. traceEq.
-  split. auto.
-  split. auto.
-  split. intros. elim H3; intros. subst r. 
-    rewrite E. eapply slot_iss; eauto. auto. 
-    intros. decEq. apply index_int_callee_save_inj; auto with coqlib.
+  split. auto. 
+  split. intros. elim H2; intros. subst r. 
+    rewrite D. eapply slot_iss; eauto.  
+    apply mkindex_valid; auto.
+    intros. apply mkindex_inj. apply number_inj; auto with coqlib.
     inversion H0. red; intro; subst r; contradiction.
-    apply D; auto.
+    apply C; auto.
   intros. transitivity (index_val idx fr1).
-    apply E; auto. 
-    intros. apply H4; eauto with coqlib.
-    eapply slot_iso; eauto. 
-    destruct idx; simpl; auto. 
-    generalize (H4 a (in_eq _ _) z). congruence.
-  (* no store takes place *)
-  generalize (IHl k sp parent rs fr m R1 R2 H1 H2).
-  intros [fr' [A [B [C [D E]]]]].
-  exists fr'. split. exact A. split. exact B. split. exact C.
-  split. intros. elim H3; intros. subst r. omegaContradiction.
     apply D; auto. 
-  intros. apply E; auto.
-    intros. apply H4; auto with coqlib.
-Qed. 
+    intros. apply H3; eauto with coqlib.
+    eapply slot_iso; eauto.
+    apply mkindex_diff. apply H3. auto with coqlib.
+    auto.
+  (* no store takes place *)
+  exploit (IHl k rs fr m); auto. intros [fr' [A [B [C D]]]].
+  exists fr'. split. exact A. split. exact B.
+  split. intros. elim H2; intros. subst r. omegaContradiction.
+    apply C; auto. 
+  intros. apply D; auto.
+    intros. apply H3; auto with coqlib.
+Qed.
 
-Lemma save_int_callee_save_correct:
-  forall k sp parent rs fr m,
-  fr.(high) = 0 ->
-  fr.(low) = -fe.(fe_size) ->
+End SAVE_CALLEE_SAVE. 
+
+Lemma save_callee_save_int_correct:
+  forall k sp rs fr m,
+  fr.(fr_low) = - fe.(fe_size) ->
   exists fr',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (save_int_callee_save fe) k int_callee_save_regs) rs fr m
-       E0 k rs fr' m
-  /\ fr'.(high) = 0
-  /\ fr'.(low) = -fe.(fe_size)
+    star step tge 
+       (State stack tf sp
+         (save_callee_save_int fe k) rs fr m)
+    E0 (State stack tf sp k rs fr' m)
+  /\ fr'.(fr_low) = - fe.(fe_size)
   /\ (forall r,
        In r int_callee_save_regs ->
        index_int_callee_save r < bound_int_callee_save b ->
@@ -714,98 +850,30 @@ Lemma save_int_callee_save_correct:
        match idx with FI_saved_int _ => False | _ => True end ->
        index_val idx fr' = index_val idx fr).
 Proof.
-  intros. 
-  generalize (save_int_callee_save_correct_rec
-                int_callee_save_regs k sp parent rs fr m
-                (incl_refl _) int_callee_save_norepet H H0).
-  intros [fr' [A [B [C [D E]]]]]. 
-  exists fr'. 
-  split. assumption. split. assumption. split. assumption.
-  split. apply D. intros. apply E. auto. 
-  intros. red; intros; subst idx. contradiction.
+  intros.
+  exploit (save_callee_save_regs_correct fe_num_int_callee_save index_int_callee_save FI_saved_int
+                                         Tint sp int_callee_save_regs).
+  exact index_int_callee_save_inj.
+  intros. red. split; auto. generalize (index_int_callee_save_pos r H0). omega.
+  auto.
+  intros; congruence.
+  intros until idx. destruct idx; simpl; auto. congruence.
+  apply incl_refl.
+  apply int_callee_save_norepet. eauto.
+  intros [fr' [A [B [C D]]]]. 
+  exists fr'; intuition. unfold save_callee_save_int; eauto. 
+  apply D. auto. intros; subst idx. auto.
 Qed.
 
-Lemma save_float_callee_save_correct_rec:
-  forall l k sp parent rs fr m,
-  incl l float_callee_save_regs ->
-  list_norepet l ->
-  fr.(high) = 0 ->
-  fr.(low) = -fe.(fe_size) ->
-  exists fr',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (save_float_callee_save fe) k l) rs fr m
-      E0 k rs fr' m
-  /\ fr'.(high) = 0
-  /\ fr'.(low) = -fe.(fe_size)
-  /\ (forall r,
-       In r l -> index_float_callee_save r < bound_float_callee_save b ->
-       index_val (FI_saved_float (index_float_callee_save r)) fr' = rs r)
-  /\ (forall idx,
-       index_valid idx ->
-       (forall r,
-         In r l -> index_float_callee_save r < bound_float_callee_save b ->
-         idx <> FI_saved_float (index_float_callee_save r)) ->
-       index_val idx fr' = index_val idx fr).
-Proof.
-  induction l.
-  (* base case *)
-  intros. simpl fold_right. exists fr. 
-  split. apply Machabstr.exec_refl. split. auto. split. auto.
-  split. intros. elim H3. auto.
-  (* inductive case *)
-  intros. simpl fold_right. 
-  set (k1 := fold_right (save_float_callee_save fe) k l) in *.
-  assert (R1: incl l float_callee_save_regs). eauto with coqlib.
-  assert (R2: list_norepet l). inversion H0; auto.
-  unfold save_float_callee_save. 
-  case (zlt (index_float_callee_save a) (fe_num_float_callee_save fe));
-  intro;
-  unfold fe_num_float_callee_save, fe, make_env in z.
-  (* a store takes place *)
-  assert (IDX: index_valid (FI_saved_float (index_float_callee_save a))).
-    apply index_saved_float_valid. eauto with coqlib. auto.
-  generalize (set_slot_index _ _ (rs a) IDX H1 H2).
-  intros [fr1 SET].
-  assert (R3: high fr1 = 0).  inversion SET. simpl high. auto.
-  assert (R4: low fr1 = -fe_size fe).  inversion SET. simpl low. auto.
-  generalize (IHl k sp parent rs fr1 m R1 R2 R3 R4).
-  intros [fr' [A [B [C [D E]]]]].
-  exists fr'. 
-  split. eapply Machabstr.exec_trans. apply exec_Msetstack'; eauto with stacking.
-  eexact A. traceEq.
-  split. auto.
-  split. auto.
-  split. intros. elim H3; intros. subst r. 
-    rewrite E. eapply slot_iss; eauto. auto. 
-    intros. decEq. apply index_float_callee_save_inj; auto with coqlib.
-    inversion H0. red; intro; subst r; contradiction.
-    apply D; auto.
-  intros. transitivity (index_val idx fr1).
-    apply E; auto. 
-    intros. apply H4; eauto with coqlib.
-    eapply slot_iso; eauto. 
-    destruct idx; simpl; auto. 
-    generalize (H4 a (in_eq _ _) z). congruence.
-  (* no store takes place *)
-  generalize (IHl k sp parent rs fr m R1 R2 H1 H2).
-  intros [fr' [A [B [C [D E]]]]].
-  exists fr'. split. exact A. split. exact B. split. exact C.
-  split. intros. elim H3; intros. subst r. omegaContradiction.
-    apply D; auto. 
-  intros. apply E; auto.
-    intros. apply H4; auto with coqlib.
-Qed. 
-
-Lemma save_float_callee_save_correct:
-  forall k sp parent rs fr m,
-  fr.(high) = 0 ->
-  fr.(low) = -fe.(fe_size) ->
+Lemma save_callee_save_float_correct:
+  forall k sp rs fr m,
+  fr.(fr_low) = - fe.(fe_size) ->
   exists fr',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (save_float_callee_save fe) k float_callee_save_regs) rs fr m
-       E0 k rs fr' m
-  /\ fr'.(high) = 0
-  /\ fr'.(low) = -fe.(fe_size)
+    star step tge 
+       (State stack tf sp
+         (save_callee_save_float fe k) rs fr m)
+    E0 (State stack tf sp k rs fr' m)
+  /\ fr'.(fr_low) = - fe.(fe_size)
   /\ (forall r,
        In r float_callee_save_regs ->
        index_float_callee_save r < bound_float_callee_save b ->
@@ -815,63 +883,59 @@ Lemma save_float_callee_save_correct:
        match idx with FI_saved_float _ => False | _ => True end ->
        index_val idx fr' = index_val idx fr).
 Proof.
-  intros. 
-  generalize (save_float_callee_save_correct_rec
-                float_callee_save_regs k sp parent rs fr m
-                (incl_refl _) float_callee_save_norepet H H0).
-  intros [fr' [A [B [C [D E]]]]]. 
-  exists fr'. split. assumption. split. assumption. split. assumption.
-  split. apply D. intros. apply E. auto. 
-  intros. red; intros; subst idx. contradiction.
-Qed.
-
-Lemma index_val_init_frame:
-  forall idx,
-  index_valid idx ->
-  index_val idx (init_frame tf) = Vundef.
-Proof.
-  intros. unfold index_val, init_frame. simpl contents.
-  apply load_contents_init. 
+  intros.
+  exploit (save_callee_save_regs_correct fe_num_float_callee_save index_float_callee_save FI_saved_float
+                                         Tfloat sp float_callee_save_regs).
+  exact index_float_callee_save_inj.
+  intros. red. split; auto. generalize (index_float_callee_save_pos r H0). omega.
+  auto.
+  intros; congruence.
+  intros until idx. destruct idx; simpl; auto. congruence.
+  apply incl_refl.
+  apply float_callee_save_norepet. eauto.
+  intros [fr' [A [B [C D]]]]. 
+  exists fr'; intuition. unfold save_callee_save_float; eauto. 
+  apply D. auto. intros; subst idx. auto.
 Qed.
 
 Lemma save_callee_save_correct:
-  forall sp parent k rs fr m ls,
+  forall sp k rs fr m ls,
   (forall r, rs r = ls (R r)) ->
   (forall ofs ty, 
-     6 <= ofs -> 
-     ofs + typesize ty <= size_arguments f.(fn_sig) ->
-     get_slot parent ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))) ->
-  high fr = 0 ->
-  low fr = -fe.(fe_size) ->
+     14 <= ofs -> 
+     ofs + typesize ty <= size_arguments f.(Linear.fn_sig) ->
+     get_slot (parent_frame stack) ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))) ->
+  fr.(fr_low) = - fe.(fe_size) ->
   (forall idx, index_valid idx -> index_val idx fr = Vundef) ->
   exists fr',
-    Machabstr.exec_instrs tge tf sp parent
-      (save_callee_save fe k) rs fr m
-      E0 k rs fr' m
-  /\ agree (LTL.call_regs ls) rs fr' parent rs.
+    star step tge
+       (State stack tf sp (save_callee_save fe k) rs fr m)
+    E0 (State stack tf sp k rs fr' m)
+  /\ agree (LTL.call_regs ls) rs fr' (parent_frame stack) ls.
 Proof.
   intros. unfold save_callee_save.
-  generalize (save_int_callee_save_correct
-     (fold_right (save_float_callee_save fe) k float_callee_save_regs)
-     sp parent rs fr m H1 H2).
-  intros [fr1 [A1 [B1 [C1 [D1 E1]]]]].
-  generalize (save_float_callee_save_correct k sp parent rs fr1 m B1 C1).
-  intros [fr2 [A2 [B2 [C2 [D2 E2]]]]].
+  exploit save_callee_save_int_correct; eauto. 
+  intros [fr1 [A1 [B1 [C1 D1]]]].
+  exploit save_callee_save_float_correct. eexact B1. 
+  intros [fr2 [A2 [B2 [C2 D2]]]].
   exists fr2.
-  split. eapply Machabstr.exec_trans. eexact A1. eexact A2. traceEq.
+  split. eapply star_trans. eexact A1. eexact A2. traceEq.
   constructor; unfold LTL.call_regs; auto.
   (* agree_local *)
-  intros. rewrite E2; auto with stacking. 
-  rewrite E1; auto with stacking. 
+  intros. rewrite D2; auto with stacking. 
+  rewrite D1; auto with stacking. 
   symmetry. auto with stacking.
   (* agree_outgoing *)
-  intros. rewrite E2; auto with stacking. 
-  rewrite E1; auto with stacking. 
+  intros. rewrite D2; auto with stacking. 
+  rewrite D1; auto with stacking. 
   symmetry. auto with stacking. 
   (* agree_incoming *)
-  intros. simpl in H4. apply H0. tauto. tauto.
+  intros. simpl in H3. apply H0. tauto. tauto.
   (* agree_saved_int *)
-  intros. rewrite E2; auto with stacking. 
+  intros. rewrite D2; auto with stacking.
+  rewrite C1; auto with stacking. 
+  (* agree_saved_float *)
+  intros. rewrite C2; auto with stacking. 
 Qed.
 
 (** The following lemmas show the correctness of the register reloading
@@ -879,49 +943,66 @@ Qed.
   all callee-save registers contain the same values they had at
   function entry. *)
 
-Lemma restore_int_callee_save_correct_rec:
-  forall sp parent k fr m rs0 l ls rs,
-  incl l int_callee_save_regs ->
+Section RESTORE_CALLEE_SAVE.
+
+Variable bound: frame_env -> Z.
+Variable number: mreg -> Z.
+Variable mkindex: Z -> frame_index.
+Variable ty: typ.
+Variable sp: val.
+Variable csregs: list mreg.
+Hypothesis mkindex_valid:
+  forall r, In r csregs -> number r < bound fe -> index_valid (mkindex (number r)).
+Hypothesis mkindex_typ:
+  forall z, type_of_index (mkindex z) = ty.
+Hypothesis number_within_bounds:
+  forall r, In r csregs ->
+  (number r < bound fe <-> mreg_within_bounds b r).
+Hypothesis mkindex_val:
+  forall ls rs fr ls0 r, 
+  agree ls rs fr (parent_frame stack) ls0 -> In r csregs -> number r < bound fe ->
+  index_val (mkindex (number r)) fr = ls0 (R r).
+
+Lemma restore_callee_save_regs_correct:
+  forall k fr m ls0 l ls rs,
+  incl l csregs ->
   list_norepet l -> 
-  agree ls rs fr parent rs0 ->
+  agree ls rs fr (parent_frame stack) ls0 ->
   exists ls', exists rs',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (restore_int_callee_save fe) k l) rs fr m
-      E0 k rs' fr m
-  /\ (forall r, In r l -> rs' r = rs0 r)
+    star step tge
+      (State stack tf sp
+        (restore_callee_save_regs bound number mkindex ty fe l k) rs fr m)
+   E0 (State stack tf sp k rs' fr m)
+  /\ (forall r, In r l -> rs' r = ls0 (R r))
   /\ (forall r, ~(In r l) -> rs' r = rs r)
-  /\ agree ls' rs' fr parent rs0.
+  /\ agree ls' rs' fr (parent_frame stack) ls0.
 Proof.
-  induction l.
+  induction l; intros; simpl restore_callee_save_regs.
   (* base case *)
-  intros. simpl fold_right. exists ls. exists rs. 
-  split. apply Machabstr.exec_refl. 
+  exists ls. exists rs. 
+  split. apply star_refl. 
   split. intros. elim H2. 
   split. auto. auto.
   (* inductive case *)
-  intros. simpl fold_right. 
-  set (k1 := fold_right (restore_int_callee_save fe) k l) in *.
-  assert (R0: In a int_callee_save_regs). apply H; auto with coqlib.
-  assert (R1: incl l int_callee_save_regs). eauto with coqlib.
+  set (k1 := restore_callee_save_regs bound number mkindex ty fe l k).
+  assert (R0: In a csregs). apply H; auto with coqlib.
+  assert (R1: incl l csregs). eauto with coqlib.
   assert (R2: list_norepet l). inversion H0; auto.
-  unfold restore_int_callee_save.
-  case (zlt (index_int_callee_save a) (fe_num_int_callee_save fe));
-  intro;
-  unfold fe_num_int_callee_save, fe, make_env in z.
-  set (ls1 := Locmap.set (R a) (rs0 a) ls).
-  set (rs1 := Regmap.set a (rs0 a) rs).
-  assert (R3: agree ls1 rs1 fr parent rs0). 
+  unfold restore_callee_save_reg.
+  destruct (zlt (number a) (bound fe)).
+  set (ls1 := Locmap.set (R a) (ls0 (R a)) ls).
+  set (rs1 := Regmap.set a (ls0 (R a)) rs).
+  assert (R3: agree ls1 rs1 fr (parent_frame stack) ls0). 
     unfold ls1, rs1. apply agree_set_reg. auto. 
-    red. rewrite int_callee_save_type. exact z.
-    apply H. auto with coqlib.
+    rewrite <- number_within_bounds; auto. 
   generalize (IHl ls1 rs1 R1 R2 R3). 
   intros [ls' [rs' [A [B [C D]]]]].
-  exists ls'. exists rs'. 
-  split. apply Machabstr.exec_trans with E0 k1 rs1 fr m E0. 
+  exists ls'. exists rs'. split. 
+  apply star_left with E0 (State stack tf sp k1 rs1 fr m) E0.
   unfold rs1; apply exec_Mgetstack'; eauto with stacking.
   apply get_slot_index; eauto with stacking.
-  symmetry. eauto with stacking. 
-  eauto with stacking. traceEq.
+  symmetry. eapply mkindex_val; eauto.  
+  auto. traceEq.
   split. intros. elim H2; intros.
   subst r. rewrite C. unfold rs1. apply Regmap.gss. inversion H0; auto.
   auto.
@@ -934,127 +1015,82 @@ Proof.
   exists ls'; exists rs'. split. assumption.
   split. intros. elim H2; intros. 
   subst r. apply (agree_unused_reg _ _ _ _ _ D).
-  unfold mreg_bounded. rewrite int_callee_save_type; auto. 
-  auto.
+  rewrite <- number_within_bounds. auto. omega. auto.
   split. intros. simpl in H2. apply C. tauto.
   assumption.
 Qed.
 
+End RESTORE_CALLEE_SAVE.
+
 Lemma restore_int_callee_save_correct:
-  forall sp parent k fr m rs0 ls rs,
-  agree ls rs fr parent rs0 ->
+  forall sp k fr m ls0 ls rs,
+  agree ls rs fr (parent_frame stack) ls0 ->
   exists ls', exists rs',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (restore_int_callee_save fe) k int_callee_save_regs) rs fr m
-      E0 k rs' fr m
-  /\ (forall r, In r int_callee_save_regs -> rs' r = rs0 r)
+    star step tge
+       (State stack tf sp
+         (restore_callee_save_int fe k) rs fr m)
+    E0 (State stack tf sp k rs' fr m)
+  /\ (forall r, In r int_callee_save_regs -> rs' r = ls0 (R r))
   /\ (forall r, ~(In r int_callee_save_regs) -> rs' r = rs r)
-  /\ agree ls' rs' fr parent rs0.
+  /\ agree ls' rs' fr (parent_frame stack) ls0.
 Proof.
-  intros. apply restore_int_callee_save_correct_rec with ls. 
-  apply incl_refl. apply int_callee_save_norepet. auto.
-Qed.
-
-Lemma restore_float_callee_save_correct_rec:
-  forall sp parent k fr m rs0 l ls rs,
-  incl l float_callee_save_regs ->
-  list_norepet l -> 
-  agree ls rs fr parent rs0 ->
-  exists ls', exists rs',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (restore_float_callee_save fe) k l) rs fr m
-      E0 k rs' fr m
-  /\ (forall r, In r l -> rs' r = rs0 r)
-  /\ (forall r, ~(In r l) -> rs' r = rs r)
-  /\ agree ls' rs' fr parent rs0.
-Proof.
-  induction l.
-  (* base case *)
-  intros. simpl fold_right. exists ls. exists rs. 
-  split. apply Machabstr.exec_refl. 
-  split. intros. elim H2. 
-  split. auto. auto.
-  (* inductive case *)
-  intros. simpl fold_right. 
-  set (k1 := fold_right (restore_float_callee_save fe) k l) in *.
-  assert (R0: In a float_callee_save_regs). apply H; auto with coqlib.
-  assert (R1: incl l float_callee_save_regs). eauto with coqlib.
-  assert (R2: list_norepet l). inversion H0; auto.
-  unfold restore_float_callee_save.
-  case (zlt (index_float_callee_save a) (fe_num_float_callee_save fe));
-  intro;
-  unfold fe_num_float_callee_save, fe, make_env in z.
-  set (ls1 := Locmap.set (R a) (rs0 a) ls).
-  set (rs1 := Regmap.set a (rs0 a) rs).
-  assert (R3: agree ls1 rs1 fr parent rs0). 
-    unfold ls1, rs1. apply agree_set_reg. auto. 
-    red. rewrite float_callee_save_type. exact z.
-    apply H. auto with coqlib.
-  generalize (IHl ls1 rs1 R1 R2 R3). 
-  intros [ls' [rs' [A [B [C D]]]]].
-  exists ls'. exists rs'. 
-  split. apply Machabstr.exec_trans with E0 k1 rs1 fr m E0. 
-  unfold rs1; apply exec_Mgetstack'; eauto with stacking.
-  apply get_slot_index; eauto with stacking.
-  symmetry. eauto with stacking. 
-  exact A. traceEq.
-  split. intros. elim H2; intros.
-  subst r. rewrite C. unfold rs1. apply Regmap.gss. inversion H0; auto.
+  intros. unfold restore_callee_save_int.
+  apply restore_callee_save_regs_correct with int_callee_save_regs ls.
+  intros; simpl. split; auto. generalize (index_int_callee_save_pos r H0). omega.
   auto.
-  split. intros. simpl in H2. rewrite C. unfold rs1. apply Regmap.gso.
-  apply sym_not_eq; tauto. tauto.
-  assumption.
-  (* no load takes place *)
-  generalize (IHl ls rs R1 R2 H1).  
-  intros [ls' [rs' [A [B [C D]]]]].
-  exists ls'; exists rs'. split. assumption.
-  split. intros. elim H2; intros. 
-  subst r. apply (agree_unused_reg _ _ _ _ _ D).
-  unfold mreg_bounded. rewrite float_callee_save_type; auto. 
+  intros. unfold mreg_within_bounds. 
+  rewrite (int_callee_save_type r H0). tauto. 
+  eauto with stacking. 
+  apply incl_refl.
+  apply int_callee_save_norepet.
   auto.
-  split. intros. simpl in H2. apply C. tauto.
-  assumption.
 Qed.
 
 Lemma restore_float_callee_save_correct:
-  forall sp parent k fr m rs0 ls rs,
-  agree ls rs fr parent rs0 ->
+  forall sp k fr m ls0 ls rs,
+  agree ls rs fr (parent_frame stack) ls0 ->
   exists ls', exists rs',
-    Machabstr.exec_instrs tge tf sp parent
-      (List.fold_right (restore_float_callee_save fe) k float_callee_save_regs) rs fr m
-      E0 k rs' fr m
-  /\ (forall r, In r float_callee_save_regs -> rs' r = rs0 r)
+    star step tge
+       (State stack tf sp
+          (restore_callee_save_float fe k) rs fr m)
+    E0 (State stack tf sp k rs' fr m)
+  /\ (forall r, In r float_callee_save_regs -> rs' r = ls0 (R r))
   /\ (forall r, ~(In r float_callee_save_regs) -> rs' r = rs r)
-  /\ agree ls' rs' fr parent rs0.
+  /\ agree ls' rs' fr (parent_frame stack) ls0.
 Proof.
-  intros. apply restore_float_callee_save_correct_rec with ls. 
-  apply incl_refl. apply float_callee_save_norepet. auto.
+  intros. unfold restore_callee_save_float.
+  apply restore_callee_save_regs_correct with float_callee_save_regs ls.
+  intros; simpl. split; auto. generalize (index_float_callee_save_pos r H0). omega.
+  auto.
+  intros. unfold mreg_within_bounds. 
+  rewrite (float_callee_save_type r H0). tauto. 
+  eauto with stacking. 
+  apply incl_refl.
+  apply float_callee_save_norepet.
+  auto.
 Qed.
 
 Lemma restore_callee_save_correct:
-  forall sp parent k fr m rs0 ls rs,
-  agree ls rs fr parent rs0 ->
+  forall sp k fr m ls0 ls rs,
+  agree ls rs fr (parent_frame stack) ls0 ->
   exists rs',
-    Machabstr.exec_instrs tge tf sp parent
-      (restore_callee_save fe k) rs fr m
-      E0 k rs' fr m
+    star step tge
+       (State stack tf sp (restore_callee_save fe k) rs fr m)
+    E0 (State stack tf sp k rs' fr m)
   /\ (forall r, 
         In r int_callee_save_regs \/ In r float_callee_save_regs -> 
-        rs' r = rs0 r)
+        rs' r = ls0 (R r))
   /\ (forall r, 
         ~(In r int_callee_save_regs) ->
         ~(In r float_callee_save_regs) ->
         rs' r = rs r).
 Proof.
   intros. unfold restore_callee_save.
-  generalize (restore_int_callee_save_correct sp parent
-               (fold_right (restore_float_callee_save fe) k float_callee_save_regs)
-               fr m rs0 ls rs H).
+  exploit restore_int_callee_save_correct; eauto.
   intros [ls1 [rs1 [A [B [C D]]]]].
-  generalize (restore_float_callee_save_correct sp parent
-                k fr m rs0 ls1 rs1 D).
+  exploit restore_float_callee_save_correct. eexact D.
   intros [ls2 [rs2 [P [Q [R S]]]]].
-  exists rs2. split. eapply Machabstr.exec_trans. eexact A. eexact P. traceEq.
+  exists rs2. split. eapply star_trans. eexact A. eexact P. traceEq.
   split. intros. elim H0; intros.
   rewrite R. apply B. auto. apply list_disjoint_notin with int_callee_save_regs.
   apply int_float_callee_save_disjoint. auto.
@@ -1083,12 +1119,12 @@ Remark find_label_save_callee_save:
   forall fe lbl k,
   Mach.find_label lbl (save_callee_save fe k) = Mach.find_label lbl k.
 Proof.
-  intros. unfold save_callee_save.
+  intros. unfold save_callee_save, save_callee_save_int, save_callee_save_float, save_callee_save_regs.
   repeat rewrite find_label_fold_right. reflexivity.
-  intros. unfold save_float_callee_save. 
+  intros. unfold save_callee_save_reg. 
   case (zlt (index_float_callee_save x) (fe_num_float_callee_save fe));
   intro; reflexivity.
-  intros. unfold save_int_callee_save. 
+  intros. unfold save_callee_save_reg.  
   case (zlt (index_int_callee_save x) (fe_num_int_callee_save fe));
   intro; reflexivity.
 Qed.
@@ -1097,12 +1133,12 @@ Remark find_label_restore_callee_save:
   forall fe lbl k,
   Mach.find_label lbl (restore_callee_save fe k) = Mach.find_label lbl k.
 Proof.
-  intros. unfold restore_callee_save.
+  intros. unfold restore_callee_save, restore_callee_save_int, restore_callee_save_float, restore_callee_save_regs.
   repeat rewrite find_label_fold_right. reflexivity.
-  intros. unfold restore_float_callee_save. 
+  intros. unfold restore_callee_save_reg. 
   case (zlt (index_float_callee_save x) (fe_num_float_callee_save fe));
   intro; reflexivity.
-  intros. unfold restore_int_callee_save. 
+  intros. unfold restore_callee_save_reg. 
   case (zlt (index_int_callee_save x) (fe_num_int_callee_save fe));
   intro; reflexivity.
 Qed.
@@ -1117,13 +1153,14 @@ Proof.
   destruct a; unfold transl_instr; auto.
   destruct s; simpl; auto.
   destruct s; simpl; auto.
+  rewrite find_label_restore_callee_save. auto.
   simpl. case (peq lbl l); intro. reflexivity. auto.
   rewrite find_label_restore_callee_save. auto.
 Qed.
 
 Lemma transl_find_label:
   forall f tf lbl c,
-  transf_function f = Some tf ->
+  transf_function f = OK tf ->
   Linear.find_label lbl f.(Linear.fn_code) = Some c ->
   Mach.find_label lbl tf.(Mach.fn_code) = 
     Some (transl_code (make_env (function_bounds f)) c).
@@ -1143,32 +1180,11 @@ Lemma find_label_incl:
 Proof.
   induction c; simpl.
   intros; discriminate.
-  intro c'. case (is_label lbl a); intros.
+  intro c'. case (Linear.is_label lbl a); intros.
   injection H; intro; subst c'. red; intros; auto with coqlib. 
   apply incl_tl. auto.
 Qed.
 
-Lemma exec_instr_incl:
-  forall f sp c1 ls1 m1 t c2 ls2 m2,
-  Linear.exec_instr ge f sp c1 ls1 m1 t c2 ls2 m2 ->
-  incl c1 f.(fn_code) ->
-  incl c2 f.(fn_code).
-Proof.
-  induction 1; intros; eauto with coqlib.
-  eapply find_label_incl; eauto.
-  eapply find_label_incl; eauto.
-Qed.
-
-Lemma exec_instrs_incl:
-  forall f sp c1 ls1 m1 t c2 ls2 m2,
-  Linear.exec_instrs ge f sp c1 ls1 m1 t c2 ls2 m2 ->
-  incl c1 f.(fn_code) ->
-  incl c2 f.(fn_code).
-Proof.
-  induction 1; intros; auto.
-  eapply exec_instr_incl; eauto.
-Qed.
-
 (** Preservation / translation of global symbols and functions. *)
 
 Lemma symbols_preserved:
@@ -1180,42 +1196,59 @@ Proof.
 Qed.
 
 Lemma functions_translated:
-  forall f v,
+  forall v f,
   Genv.find_funct ge v = Some f ->
   exists tf,
-  Genv.find_funct tge v = Some tf /\ transf_fundef f = Some tf.
-Proof.
-  intros. 
-  generalize (Genv.find_funct_transf_partial transf_fundef TRANSF H).
-  case (transf_fundef f).
-  intros tf [A B]. exists tf. tauto.
-  intros. tauto.
-Qed.
+  Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
+Proof
+  (Genv.find_funct_transf_partial transf_fundef TRANSF).
 
 Lemma function_ptr_translated:
-  forall f v,
+  forall v f,
   Genv.find_funct_ptr ge v = Some f ->
   exists tf,
-  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = Some tf.
-Proof.
-  intros. 
-  generalize (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF H).
-  case (transf_fundef f).
-  intros tf [A B]. exists tf. tauto.
-  intros. tauto.
-Qed.
+  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof
+  (Genv.find_funct_ptr_transf_partial transf_fundef TRANSF).
 
 Lemma sig_preserved:
-  forall f tf, transf_fundef f = Some tf -> Mach.funsig tf = Linear.funsig f.
+  forall f tf, transf_fundef f = OK tf -> Mach.funsig tf = Linear.funsig f.
 Proof.
   intros until tf; unfold transf_fundef, transf_partial_fundef.
   destruct f. unfold transf_function. 
-  destruct (zlt (fn_stacksize f) 0). congruence.
-  destruct (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))). congruence.
-  intros. inversion H; reflexivity. 
+  destruct (zlt (Linear.fn_stacksize f) 0). simpl; congruence.
+  destruct (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))). simpl; congruence.
+  unfold bind. intros. inversion H; reflexivity. 
   intro. inversion H. reflexivity.
 Qed.
 
+Lemma find_function_translated:
+  forall f0 ls rs fr parent ls0 ros f,
+  agree f0 ls rs fr parent ls0 ->
+  Linear.find_function ge ros ls = Some f ->
+  exists tf,
+  find_function tge ros rs = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros until f; intro AG.
+  destruct ros; simpl.
+  rewrite (agree_eval_reg _ _ _ _ _ _ m AG). intro.
+  apply functions_translated; auto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i); try congruence.
+  intro. apply function_ptr_translated; auto.
+Qed.
+
+Hypothesis wt_prog: wt_program prog.
+
+Lemma find_function_well_typed:
+  forall ros ls f,
+  Linear.find_function ge ros ls = Some f -> wt_fundef f.
+Proof.
+  intros until f; destruct ros; simpl; unfold ge.
+  intro. eapply Genv.find_funct_prop; eauto.
+  destruct (Genv.find_symbol (Genv.globalenv prog) i); try congruence.
+  intro. eapply Genv.find_funct_ptr_prop; eauto. 
+Qed.
+
 (** Correctness of stack pointer relocation in operations and
   addressing modes. *)
 
@@ -1224,7 +1257,7 @@ Definition shift_sp (tf: Mach.function) (sp: val) :=
 
 Remark shift_offset_sp:
   forall f tf sp n v,
-  transf_function f = Some tf ->
+  transf_function f = OK tf ->
   offset_sp sp n = Some v ->
   offset_sp (shift_sp tf sp)
     (Int.add (Int.repr (fe_size (make_env (function_bounds f)))) n) = Some v.
@@ -1243,11 +1276,11 @@ Proof.
 Qed.
 
 Lemma shift_eval_operation:
-  forall f tf sp op args v,
-  transf_function f = Some tf ->
-  eval_operation ge sp op args = Some v ->
+  forall f tf sp op args m v,
+  transf_function f = OK tf ->
+  eval_operation ge sp op args m = Some v ->
   eval_operation tge (shift_sp tf sp) 
-                 (transl_op (make_env (function_bounds f)) op) args =
+                 (transl_op (make_env (function_bounds f)) op) args m =
   Some v.
 Proof.
   intros until v. destruct op; intros; auto.
@@ -1258,7 +1291,7 @@ Qed.
 
 Lemma shift_eval_addressing:
   forall f tf sp addr args v,
-  transf_function f = Some tf ->
+  transf_function f = OK tf ->
   eval_addressing ge sp addr args = Some v ->
   eval_addressing tge (shift_sp tf sp) 
                  (transl_addr (make_env (function_bounds f)) addr) args =
@@ -1282,7 +1315,7 @@ Variable rs: regset.
 Variable sg: signature.
 Hypothesis AG1: forall r, rs r = ls (R r).
 Hypothesis AG2: forall (ofs : Z) (ty : typ),
-      6 <= ofs ->
+      14 <= ofs ->
       ofs + typesize ty <= size_arguments sg ->
       get_slot fr ty (Int.signed (Int.repr (4 * ofs)))
                      (ls (S (Outgoing ofs ty))).
@@ -1319,374 +1352,336 @@ End EXTERNAL_ARGUMENTS.
 (** The proof of semantic preservation relies on simulation diagrams
   of the following form:
 <<
-        c, ls, m ------------------- T(c), rs, fr, m
-            |                             |
-            |                             |
-            v                             v
-       c', ls', m' ---------------- T(c'), rs', fr', m'
+           st1 --------------- st2
+            |                   |
+           t|                  +|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
 >>
-  The left vertical arrow represents a transition in the
-  original Linear code.  The top horizontal bar captures three preconditions:
-- Agreement between [ls] on the Linear side and [rs], [fr], [rs0]
-  on the Mach side.
-- Inclusion between [c] and the code of the function [f] being
-  translated.
+  Matching between source and target states is defined by [match_states]
+  below.  It implies:
+- Agreement between, on the Linear side, the location sets [ls]
+  and [parent_locset s] of the current function and its caller,
+  and on the Mach side the register set [rs], the frame [fr]
+  and the caller's frame [parent_frame ts].
+- Inclusion between the Linear code [c] and the code of the
+  function [f] being executed.
 - Well-typedness of [f].
-
-  In conclusion, we want to prove the existence of [rs'], [fr'], [m']
-  that satisfies the right arrow (zero, one or several transitions in
-  the generated Mach code) and the postcondition (agreement between
-  [ls'] and [rs'], [fr'], [rs0]).
-
-  As usual, we capture these diagrams as predicates parameterized
-  by the transition in the original Linear code. *)
-
-Definition exec_instr_prop
-      (f: function) (sp: val)
-      (c: code) (ls: locset) (m: mem) (t: trace)
-      (c': code) (ls': locset) (m': mem) :=
-  forall tf rs fr parent rs0
-     (TRANSL: transf_function f = Some tf)
-     (WTF: wt_function f)
-     (AG: agree f ls rs fr parent rs0)
-     (INCL: incl c f.(fn_code)),
-  exists rs', exists fr',
-    Machabstr.exec_instrs tge tf (shift_sp tf sp) parent
-       (transl_code (make_env (function_bounds f)) c) rs fr m
-     t (transl_code (make_env (function_bounds f)) c') rs' fr' m'
-  /\ agree f ls' rs' fr' parent rs0.
-
-(** The simulation property for function calls has different preconditions
-  (a slightly weaker notion of agreement between [ls] and the initial
-  register values [rs] and caller's frame [parent]) and different
-  postconditions (preservation of callee-save registers). *)
-
-Definition exec_function_prop
-      (f: fundef) 
-      (ls: locset) (m: mem) (t: trace)
-      (ls': locset) (m': mem) :=
-  forall tf rs parent
-     (TRANSL: transf_fundef f = Some tf)
-     (WTF: wt_fundef f)
-     (AG1: forall r, rs r = ls (R r))
-     (AG2: forall ofs ty, 
-             6 <= ofs -> 
-             ofs + typesize ty <= size_arguments (funsig f) ->
-             get_slot parent ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))),
-  exists rs',
-    Machabstr.exec_function tge tf parent rs m t rs' m'
- /\ (forall r,
-        In (R r) temporaries \/ In (R r) destroyed_at_call ->
-        rs' r = ls' (R r))
- /\ (forall r,
-        In r int_callee_save_regs \/ In r float_callee_save_regs ->
-        rs' r = rs r).
-
-Hypothesis wt_prog: wt_program prog.
-
-Lemma transf_function_correct:
-  forall f ls m t ls' m',
-  Linear.exec_function ge f ls m t ls' m' ->
-  exec_function_prop f ls m t ls' m'.
+*)
+
+Inductive match_stacks: list Linear.stackframe -> list Machabstr.stackframe -> Prop :=
+  | match_stacks_nil:
+      match_stacks nil nil
+  | match_stacks_cons:
+      forall f sp c ls tf fr s ts,
+      match_stacks s ts ->
+      transf_function f = OK tf ->
+      wt_function f ->
+      agree_frame f ls fr (parent_frame ts) (parent_locset s) ->
+      incl c (Linear.fn_code f) ->
+      match_stacks
+       (Linear.Stackframe f sp ls c :: s)
+       (Machabstr.Stackframe tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) c) fr :: ts).
+
+Inductive match_states: Linear.state -> Machabstr.state -> Prop :=
+  | match_states_intro:
+      forall s f sp c ls m ts tf rs fr
+        (STACKS: match_stacks s ts)
+        (TRANSL: transf_function f = OK tf)
+        (WTF: wt_function f)
+        (AG: agree f ls rs fr (parent_frame ts) (parent_locset s))
+        (INCL: incl c (Linear.fn_code f)),
+      match_states (Linear.State s f sp c ls m)
+                   (Machabstr.State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) c) rs fr m)
+  | match_states_call:
+      forall s f ls m ts tf rs
+        (STACKS: match_stacks s ts)
+        (TRANSL: transf_fundef f = OK tf)
+        (WTF: wt_fundef f)
+        (AG1: forall r, rs r = ls (R r))
+        (AG2: forall ofs ty, 
+                14 <= ofs -> 
+                ofs + typesize ty <= size_arguments (Linear.funsig f) ->
+                get_slot (parent_frame ts) ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty))))
+        (AG3: agree_callee_save ls (parent_locset s)),
+      match_states (Linear.Callstate s f ls m)
+                   (Machabstr.Callstate ts tf rs m)
+  | match_states_return:
+      forall s ls m ts rs
+        (STACKS: match_stacks s ts)
+        (AG1: forall r, rs r = ls (R r))
+        (AG2: agree_callee_save ls (parent_locset s)),
+      match_states (Linear.Returnstate s ls m)
+                   (Machabstr.Returnstate ts rs m).
+
+Theorem transf_step_correct:
+  forall s1 t s2, Linear.step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  exists s2', plus step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
   assert (RED: forall f i c,
           transl_code (make_env (function_bounds f)) (i :: c) = 
           transl_instr (make_env (function_bounds f)) i
                        (transl_code (make_env (function_bounds f)) c)).
     intros. reflexivity.
-  apply (Linear.exec_function_ind3 ge exec_instr_prop
-            exec_instr_prop exec_function_prop);
-  intros; red; intros; 
-  try rewrite RED; 
+  induction 1; intros;
+  try inv MS;
+  try rewrite RED;
   try (generalize (WTF _ (INCL _ (in_eq _ _))); intro WTI);
+  try (generalize (function_is_within_bounds f WTF _ (INCL _ (in_eq _ _)));
+       intro BOUND; simpl in BOUND);
   unfold transl_instr.
-
   (* Lgetstack *)
-  inversion WTI. exists (rs0#r <- (rs (S sl))); exists fr.
+  inv WTI. destruct BOUND.
+  exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b)
+             (rs0#r <- (rs (S sl))) fr m).
   split. destruct sl. 
   (* Lgetstack, local *)
-  apply exec_Mgetstack'; auto.
+  apply plus_one. eapply exec_Mgetstack'; eauto.
   apply get_slot_index. apply index_local_valid. auto. 
   eapply agree_size; eauto. reflexivity. 
   eapply agree_locals; eauto.
   (* Lgetstack, incoming *)
-  apply Machabstr.exec_one; constructor.
+  apply plus_one; apply exec_Mgetparam.
   unfold offset_of_index. eapply agree_incoming; eauto.
   (* Lgetstack, outgoing *)
-  apply exec_Mgetstack'; auto.
+  apply plus_one; apply exec_Mgetstack'; eauto.
   apply get_slot_index. apply index_arg_valid. auto. 
   eapply agree_size; eauto. reflexivity. 
   eapply agree_outgoing; eauto.
   (* Lgetstack, common *)
+  econstructor; eauto with coqlib. 
   apply agree_set_reg; auto.
 
   (* Lsetstack *)
-  inversion WTI. destruct sl.
+  inv WTI. destruct sl.
+
   (* Lsetstack, local *)
-  generalize (agree_set_local _ _ _ _ _ _ (rs0 r) _ _ AG H3).
+  generalize (agree_set_local _ _ _ _ _ _ (rs0 r) _ _ AG BOUND).
   intros [fr' [SET AG']].
-  exists rs0; exists fr'. split.
-  apply exec_Msetstack'; auto.
+  econstructor; split.
+  apply plus_one. eapply exec_Msetstack'; eauto.
+  econstructor; eauto with coqlib.
   replace (rs (R r)) with (rs0 r). auto.
   symmetry. eapply agree_reg; eauto.
   (* Lsetstack, incoming *)
   contradiction.
   (* Lsetstack, outgoing *)
-  generalize (agree_set_outgoing _ _ _ _ _ _ (rs0 r) _ _ AG H3).
+  generalize (agree_set_outgoing _ _ _ _ _ _ (rs0 r) _ _ AG BOUND).
   intros [fr' [SET AG']].
-  exists rs0; exists fr'. split.
-  apply exec_Msetstack'; auto.
+  econstructor; split.
+  apply plus_one. eapply exec_Msetstack'; eauto.
+  econstructor; eauto with coqlib.
   replace (rs (R r)) with (rs0 r). auto.
   symmetry. eapply agree_reg; eauto.
 
   (* Lop *)
-  assert (mreg_bounded f res). inversion WTI; auto.
-  exists (rs0#res <- v); exists fr. split.
-  apply Machabstr.exec_one. constructor. 
+  exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#res <- v) fr m); split.
+  apply plus_one. apply exec_Mop. 
   apply shift_eval_operation. auto. 
   change mreg with RegEq.t.
   rewrite (agree_eval_regs _ _ _ _ _ _ args AG). auto.
+  econstructor; eauto with coqlib.
   apply agree_set_reg; auto.
 
   (* Lload *)
-  inversion WTI. exists (rs0#dst <- v); exists fr. split.
-  apply Machabstr.exec_one; econstructor.
+  exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#dst <- v) fr m); split.
+  apply plus_one; eapply exec_Mload; eauto.
   apply shift_eval_addressing; auto. 
   change mreg with RegEq.t.
   rewrite (agree_eval_regs _ _ _ _ _ _ args AG). eauto.
-  auto.
+  econstructor; eauto with coqlib.
   apply agree_set_reg; auto.
 
   (* Lstore *)
-  exists rs0; exists fr. split.
-  apply Machabstr.exec_one; econstructor.
-  apply shift_eval_addressing; auto. 
+  econstructor; split.
+  apply plus_one; eapply exec_Mstore; eauto.
+  apply shift_eval_addressing; eauto. 
   change mreg with RegEq.t.
   rewrite (agree_eval_regs _ _ _ _ _ _ args AG). eauto.
-  rewrite (agree_eval_reg _ _ _ _ _ _ src AG). auto.
-  auto.
-
-  (* Lcall *)
-  inversion WTI.
-  assert (WTF': wt_fundef f').
-    destruct ros; simpl in H.
-    apply (Genv.find_funct_prop wt_fundef wt_prog H).
-    destruct (Genv.find_symbol ge i); try discriminate.
-    apply (Genv.find_funct_ptr_prop wt_fundef wt_prog H).
-  assert (TR: exists tf', Mach.find_function tge ros rs0 = Some tf'
-                       /\ transf_fundef f' = Some tf').
-    destruct ros; simpl in H; simpl.
-    eapply functions_translated. 
-    rewrite (agree_eval_reg _ _ _ _ _ _ m0 AG). auto. 
-    rewrite symbols_preserved. 
-    destruct (Genv.find_symbol ge i); try discriminate.
-    apply function_ptr_translated; auto.
-  elim TR;  intros tf' [FIND' TRANSL']; clear TR.
-  assert (AG1: forall r, rs0 r = rs (R r)).
-    intro. symmetry. eapply agree_reg; eauto.
-  assert (AG2: forall ofs ty, 
-             6 <= ofs -> 
-             ofs + typesize ty <= size_arguments (funsig f') ->
-             get_slot fr ty (Int.signed (Int.repr (4 * ofs))) (rs (S (Outgoing ofs ty)))).
-    intros. 
-    assert (slot_bounded f (Outgoing ofs ty)).
-      red. rewrite <- H0 in H8. omega.
-    change (4 * ofs) with (offset_of_index (make_env (function_bounds f)) (FI_arg ofs ty)).
-    rewrite (offset_of_index_no_overflow f tf); auto. 
-    apply get_slot_index. apply index_arg_valid. auto. 
-    eapply agree_size; eauto. reflexivity. 
-    eapply agree_outgoing; eauto. 
-  generalize (H2 tf' rs0 fr TRANSL' WTF' AG1 AG2).
-  intros [rs2 [EXF [REGS1 REGS2]]].
-  exists rs2; exists fr. split.
-  apply Machabstr.exec_one; apply Machabstr.exec_Mcall with tf'; auto.
-  apply agree_return_regs with rs0; auto.
-
+  rewrite (agree_eval_reg _ _ _ _ _ _ src AG). eauto.
+  econstructor; eauto with coqlib.
+
+  (* Lcall *) 
+  assert (WTF': wt_fundef f'). eapply find_function_well_typed; eauto.
+  exploit find_function_translated; eauto. 
+  intros [tf' [FIND' TRANSL']].
+  econstructor; split.
+  apply plus_one; eapply exec_Mcall; eauto.
+  econstructor; eauto. 
+  econstructor; eauto with coqlib.
+  exists rs0; auto.
+  intro. symmetry. eapply agree_reg; eauto.
+  intros. 
+  assert (slot_within_bounds f (function_bounds f) (Outgoing ofs ty)).
+  red. simpl. omega. 
+  change (4 * ofs) with (offset_of_index (make_env (function_bounds f)) (FI_arg ofs ty)).
+  rewrite (offset_of_index_no_overflow f tf); auto. 
+  apply get_slot_index. apply index_arg_valid. auto. 
+  eapply agree_size; eauto. reflexivity. 
+  eapply agree_outgoing; eauto.
+  simpl. red; auto.
+
+  (* Ltailcall *) 
+  assert (WTF': wt_fundef f'). eapply find_function_well_typed; eauto.
+  generalize (find_function_translated _ _ _ _ _ _ _ _ AG H). 
+  intros [tf' [FIND' TRANSL']].
+  generalize (restore_callee_save_correct ts _ _ TRANSL
+               (shift_sp tf (Vptr stk Int.zero)) 
+               (Mtailcall (Linear.funsig f') ros :: transl_code (make_env (function_bounds f)) b)
+               _ m _ _ _ AG).
+  intros [rs2 [A [B C]]].
+  assert (FIND'': find_function tge ros rs2 = Some tf').
+    rewrite <- FIND'. destruct ros; simpl; auto.
+    inv WTI. rewrite C. auto. 
+    simpl. intuition congruence. simpl. intuition congruence. 
+  econstructor; split.
+  eapply plus_right. eexact A. 
+  simpl shift_sp. eapply exec_Mtailcall; eauto. traceEq.
+  econstructor; eauto. 
+  intros; symmetry; eapply agree_return_regs; eauto.
+  intros. inv WTI. rewrite tailcall_possible_size in H4.
+  rewrite H4 in H1. elimtype False. generalize (typesize_pos ty). omega.
+  apply agree_callee_save_return_regs. 
+ 
   (* Lalloc *)
-  exists (rs0#loc_alloc_result <- (Vptr blk Int.zero)); exists fr. split.
-  apply Machabstr.exec_one; eapply Machabstr.exec_Malloc; eauto.
+  exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b)
+                          (rs0#loc_alloc_result <- (Vptr blk Int.zero)) fr m'); split.
+  apply plus_one; eapply exec_Malloc; eauto.
   rewrite (agree_eval_reg _ _ _ _ _ _ loc_alloc_argument AG). auto.
+  econstructor; eauto with coqlib.
   apply agree_set_reg; auto.
   red. simpl. generalize (max_over_regs_of_funct_pos f int_callee_save). omega.
 
   (* Llabel *)
-  exists rs0; exists fr. split.
-  apply Machabstr.exec_one; apply Machabstr.exec_Mlabel.
-  auto.
+  econstructor; split.
+  apply plus_one; apply exec_Mlabel.
+  econstructor; eauto with coqlib.
 
   (* Lgoto *)
-  exists rs0; exists fr. split.
-  apply Machabstr.exec_one; apply Machabstr.exec_Mgoto.
-  apply transl_find_label; auto.
-  auto.
+  econstructor; split.
+  apply plus_one; apply exec_Mgoto.
+  apply transl_find_label; eauto.
+  econstructor; eauto. 
+  eapply find_label_incl; eauto.
 
   (* Lcond, true *)
-  exists rs0; exists fr. split.
-  apply Machabstr.exec_one; apply Machabstr.exec_Mcond_true.
-  rewrite <- (agree_eval_regs _ _ _ _ _ _ args AG) in H; auto.
-  apply transl_find_label; auto.
-  auto.
+  econstructor; split.
+  apply plus_one; apply exec_Mcond_true.
+  rewrite <- (agree_eval_regs _ _ _ _ _ _ args AG) in H; eauto.
+  apply transl_find_label; eauto.
+  econstructor; eauto.
+  eapply find_label_incl; eauto.
 
   (* Lcond, false *)
-  exists rs0; exists fr. split.
-  apply Machabstr.exec_one; apply Machabstr.exec_Mcond_false.
+  econstructor; split.
+  apply plus_one; apply exec_Mcond_false.
   rewrite <- (agree_eval_regs _ _ _ _ _ _ args AG) in H; auto.
-  auto.
-
-  (* refl *)
-  exists rs0; exists fr. split. apply Machabstr.exec_refl. auto.
-
-  (* one *)
-  apply H0; auto.
-
-  (* trans *)
-  generalize (H0 tf rs fr parent rs0 TRANSL WTF AG INCL).
-  intros [rs' [fr' [A B]]].
-  assert (INCL': incl b2 (fn_code f)). eapply exec_instrs_incl; eauto.
-  generalize (H2 tf rs' fr' parent rs0 TRANSL WTF B INCL').
-  intros [rs'' [fr'' [C D]]].
-  exists rs''; exists fr''. split.
-  eapply Machabstr.exec_trans. eexact A. eexact C. auto.
-  auto.
-
-  (* function *)
+  econstructor; eauto with coqlib.
+
+  (* Lreturn *)
+  exploit restore_callee_save_correct; eauto.
+  intros [ls' [A [B C]]].
+  econstructor; split.
+  eapply plus_right. eauto. 
+  simpl shift_sp. econstructor; eauto. traceEq.
+  econstructor; eauto. 
+  intros. symmetry. eapply agree_return_regs; eauto.
+  apply agree_callee_save_return_regs.  
+
+  (* internal function *)
   generalize TRANSL; clear TRANSL. 
   unfold transf_fundef, transf_partial_fundef.
-  caseEq (transf_function f); try congruence.
+  caseEq (transf_function f); simpl; try congruence.
   intros tfn TRANSL EQ. inversion EQ; clear EQ; subst tf.
   inversion WTF as [|f' WTFN]. subst f'.
-  assert (X: forall link ra,
-   exists rs' : regset,
-   Machabstr.exec_function_body tge tfn parent link ra rs0 m t rs' (free m2 stk) /\
-   (forall r : mreg,
-    In (R r) temporaries \/ In (R r) destroyed_at_call -> rs' r = rs2 (R r)) /\
-   (forall r : mreg,
-    In r int_callee_save_regs \/ In r float_callee_save_regs -> rs' r = rs0 r)).
-  intros.
   set (sp := Vptr stk Int.zero) in *.
   set (tsp := shift_sp tfn sp).
   set (fe := make_env (function_bounds f)).
-  assert (low (init_frame tfn) = -fe.(fe_size)).
-    simpl low. rewrite (unfold_transf_function _ _ TRANSL).
+  assert (fr_low (init_frame tfn) = - fe.(fe_size)).
+    simpl fr_low. rewrite (unfold_transf_function _ _ TRANSL).
     reflexivity.
-  assert (exists fr1, set_slot (init_frame tfn) Tint 0 link fr1).
-    apply set_slot_ok. reflexivity. omega. rewrite H2. generalize (size_pos f). 
-    unfold fe. simpl typesize. omega.
-  elim H3; intros fr1 SET1; clear H3.
-  assert (high fr1 = 0).
-    inversion SET1. reflexivity.
-  assert (low fr1 = -fe.(fe_size)).
-    inversion SET1. rewrite <- H2. reflexivity.
-  assert (exists fr2, set_slot fr1 Tint 12 ra fr2).
-    apply set_slot_ok. auto. omega. rewrite H4. generalize (size_pos f). 
-    unfold fe. simpl typesize. omega.
-  elim H5; intros fr2 SET2; clear H5.
-  assert (high fr2 = 0).
-    inversion SET2. simpl. auto.
-  assert (low fr2 = -fe.(fe_size)).
-    inversion SET2. rewrite <- H4. reflexivity.
-  assert (UNDEF: forall idx, index_valid f idx -> index_val f idx fr2 = Vundef).
-    intros. 
-    assert (get_slot fr2 (type_of_index idx) (offset_of_index fe idx) Vundef).
-    generalize (offset_of_index_valid _ _ H7). fold fe. intros [XX YY].
-    eapply slot_gso; eauto. 
-    eapply slot_gso; eauto.
-    apply slot_gi. omega. omega. 
-    simpl typesize. omega. simpl typesize. omega. 
-    inversion H8. symmetry. exact H11.
-  generalize (save_callee_save_correct f tfn TRANSL
-                tsp parent
-                (transl_code (make_env (function_bounds f)) f.(fn_code))
-                rs0 fr2 m1 rs AG1 AG2 H5 H6 UNDEF).
+  assert (UNDEF: forall idx, index_valid f idx -> index_val f idx (init_frame tfn) = Vundef).
+    intros.
+    assert (get_slot (init_frame tfn) (type_of_index idx) (offset_of_index fe idx) Vundef).
+    generalize (offset_of_index_valid _ _ H1). fold fe. intros [XX YY].
+    apply slot_gi; auto. omega. 
+    inv H2; auto.
+  exploit save_callee_save_correct; eauto. 
   intros [fr [EXP AG]].
-  generalize (H1 tfn rs0 fr parent rs0 TRANSL WTFN AG (incl_refl _)).
-  intros [rs' [fr' [EXB AG']]].
-  generalize (restore_callee_save_correct f tfn TRANSL tsp parent
-                (Mreturn :: transl_code (make_env (function_bounds f)) b)
-                fr' m2 rs0 rs2 rs' AG').
-  intros [rs'' [EXX [REGS1 REGS2]]].
-  exists rs''. split. eapply Machabstr.exec_funct_body.
-    rewrite (unfold_transf_function f tfn TRANSL); eexact H. 
-    eexact SET1. eexact SET2. 
-    replace (Mach.fn_code tfn) with
-            (transl_body f (make_env (function_bounds f))).    
-    replace (Vptr stk (Int.repr (- fn_framesize tfn))) with tsp.
-    eapply Machabstr.exec_trans. eexact EXP. 
-    eapply Machabstr.exec_trans. eexact EXB. eexact EXX. 
-    reflexivity. traceEq.
-    unfold tsp, shift_sp, sp. unfold Val.add. 
-    rewrite Int.add_commut. rewrite Int.add_zero. auto.
-    rewrite (unfold_transf_function f tfn TRANSL). simpl. auto.
-  split. intros. rewrite REGS2. symmetry; eapply agree_reg; eauto.
-  apply int_callee_save_not_destroyed; auto.
-  apply float_callee_save_not_destroyed; auto.
-  auto.
-  generalize (X Vzero Vzero). intros [rs' [EX [REGS1 REGS2]]].
-  exists rs'. split.
-  constructor. intros.
-  generalize (X link ra). intros [rs'' [EX' [REGS1' REGS2']]].
-  assert (rs' = rs'').   
-    apply (@Regmap.exten val). intro r. 
-    elim (register_classification r); intro.
-    rewrite REGS1'. apply REGS1. auto. auto.
-    rewrite REGS2'. apply REGS2. auto. auto.
-  rewrite H4. auto.
-  split; auto.
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_function_internal; eauto.
+  rewrite (unfold_transf_function f tfn TRANSL); simpl; eexact H. 
+  replace (Mach.fn_code tfn) with
+          (transl_body f (make_env (function_bounds f))).    
+  replace (Vptr stk (Int.repr (- fn_framesize tfn))) with tsp.
+  unfold transl_body. eexact EXP.
+  unfold tsp, shift_sp, sp. unfold Val.add. 
+  rewrite Int.add_commut. rewrite Int.add_zero. auto.
+  rewrite (unfold_transf_function f tfn TRANSL). simpl. auto.
+  traceEq.
+  unfold tsp. econstructor; eauto with coqlib.
+  eapply agree_callee_save_agree; eauto. 
 
   (* external function *)
   simpl in TRANSL. inversion TRANSL; subst tf.
-  inversion WTF. subst ef0. set (sg := ef_sig ef) in *.
-  exists (rs#(loc_result sg) <- res). 
-  split. econstructor. eauto. 
-  fold sg. rewrite H0. apply transl_external_arguments; assumption.
-  reflexivity.
-  split; intros. rewrite H1. 
-  unfold Regmap.set. case (RegEq.eq r (loc_result sg)); intro.
+  inversion WTF. subst ef0.
+  exploit transl_external_arguments; eauto. intro EXTARGS.
+  econstructor; split.
+  apply plus_one. eapply exec_function_external; eauto.
+  econstructor; eauto.
+  intros. unfold Regmap.set. case (RegEq.eq r (loc_result (ef_sig ef))); intro.
   rewrite e. rewrite Locmap.gss; auto. rewrite Locmap.gso; auto.
   red; auto.
-  apply Regmap.gso. red; intro; subst r.
-  elim (Conventions.loc_result_not_callee_save _ H2).
+  apply agree_callee_save_set_result; auto.
+
+  (* return *)
+  inv STACKS. 
+  econstructor; split.
+  apply plus_one. apply exec_return. 
+  econstructor; eauto. simpl in AG2.  
+  eapply agree_frame_agree; eauto.
 Qed.
 
-End PRESERVATION.
+Lemma transf_initial_states:
+  forall st1, Linear.initial_state prog st1 ->
+  exists st2, Machabstr.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto. intros [tf [FIND TR]].
+  econstructor; split.
+  econstructor. 
+  rewrite (transform_partial_program_main _ _ TRANSF). 
+  rewrite symbols_preserved. eauto.
+  eauto. 
+  rewrite (Genv.init_mem_transf_partial _ _ TRANSF).
+  econstructor; eauto. constructor. 
+  eapply Genv.find_funct_ptr_prop; eauto.
+  intros. 
+  replace (size_arguments (Linear.funsig f)) with 14 in H5.
+  elimtype False. generalize (typesize_pos ty). omega.
+  rewrite H2; auto.
+  simpl; red; auto.
+Qed.
 
-Theorem transl_program_correct:
-  forall (p: Linear.program) (tp: Mach.program) (t: trace) (r: val),
-  wt_program p ->
-  transf_program p = Some tp ->
-  Linear.exec_program p t r ->
-  Machabstr.exec_program tp t r.
-Proof.
-  intros p tp t r WTP TRANSF
-         [fptr [f [ls' [m [FINDSYMB [FINDFUNC [SIG [EXEC RES]]]]]]]].
-  assert (WTF: wt_fundef f).
-    apply (Genv.find_funct_ptr_prop wt_fundef WTP FINDFUNC).
-  set (ls := Locmap.init Vundef) in *.
-  set (rs := Regmap.init Vundef).
-  set (fr := empty_frame).
-  assert (AG1: forall r, rs r = ls (R r)).
-    intros; reflexivity.
-  assert (AG2: forall ofs ty, 
-             6 <= ofs -> 
-             ofs + typesize ty <= size_arguments (funsig f) ->
-             get_slot fr ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))).
-    rewrite SIG. unfold size_arguments, sig_args, size_arguments_rec.
-    intros. generalize (typesize_pos ty). 
-    intro. omegaContradiction.
-  generalize (function_ptr_translated p tp TRANSF f _ FINDFUNC).
-  intros [tf [TFIND TRANSL]]. 
-  generalize (transf_function_correct p tp TRANSF WTP _ _ _ _ _ _ EXEC
-                tf rs fr TRANSL WTF AG1 AG2).
-  intros [rs' [A [B C]]].
-  red. exists fptr; exists tf; exists rs'; exists m.
-  split. rewrite (symbols_preserved p tp TRANSF). 
-  rewrite (transform_partial_program_main transf_fundef p TRANSF).
-  assumption.
-  split. assumption.
-  split. replace (Genv.init_mem tp) with (Genv.init_mem p).
-  exact A. symmetry. apply Genv.init_mem_transf_partial with transf_fundef. 
-  exact TRANSF.
-  rewrite <- RES. replace R3 with (loc_result (funsig f)).
-  apply B. right. apply loc_result_acceptable. 
-  rewrite SIG; reflexivity.
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> Linear.final_state st1 r -> Machabstr.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. econstructor. rewrite AG1; auto.
+Qed.
+
+Theorem transf_program_correct:
+  forall (beh: program_behavior),
+  Linear.exec_program prog beh -> Machabstr.exec_program tprog beh.
+Proof.
+  unfold Linear.exec_program, Machabstr.exec_program; intros.
+  eapply simulation_plus_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact transf_step_correct. 
 Qed.
+
+End PRESERVATION.
diff --git a/backend/Stackingtyping.v b/backend/Stackingtyping.v
index beb28e29..fa8a3e2e 100644
--- a/backend/Stackingtyping.v
+++ b/backend/Stackingtyping.v
@@ -2,6 +2,7 @@
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import Integers.
 Require Import AST.
 Require Import Op.
@@ -11,11 +12,12 @@ Require Import Linear.
 Require Import Lineartyping.
 Require Import Mach.
 Require Import Machtyping.
+Require Import Bounds.
 Require Import Stacking.
 Require Import Stackingproof.
 
 (** We show that the Mach code generated by the [Stacking] pass
-  is well-typed if the original Linear code is. *)
+  is well-typed if the original LTLin code is. *)
 
 Definition wt_instrs (k: Mach.code) : Prop :=
   forall i, In i k -> wt_instr i.
@@ -33,17 +35,7 @@ Section TRANSL_FUNCTION.
 Variable f: Linear.function.
 Let fe := make_env (function_bounds f).
 Variable tf: Mach.function.
-Hypothesis TRANSF_F: transf_function f = Some tf.
-
-Lemma wt_Msetstack':
-  forall idx ty r,
-  mreg_type r = ty -> index_valid f idx ->
-  wt_instr (Msetstack r (Int.repr (offset_of_index fe idx)) ty).
-Proof.
-  intros. constructor. auto. 
-  unfold fe. rewrite (offset_of_index_no_overflow f tf TRANSF_F); auto.
-  generalize (offset_of_index_valid f idx H0). tauto.
-Qed.  
+Hypothesis TRANSF_F: transf_function f = OK tf.
 
 Lemma wt_fold_right:
   forall (A: Set) (f: A -> code -> code) (k: code) (l: list A),
@@ -58,51 +50,57 @@ Proof.
   auto. 
 Qed.
 
-Lemma wt_save_int_callee_save:
-  forall cs k,
-  In cs int_callee_save_regs -> wt_instrs k ->
-  wt_instrs (save_int_callee_save fe cs k).
+Lemma wt_save_callee_save_int:
+  forall k,
+  wt_instrs k ->
+  wt_instrs (save_callee_save_int fe k).
 Proof.
-  intros. unfold save_int_callee_save.
-  case (zlt (index_int_callee_save cs) (fe_num_int_callee_save fe)); intro.
+  intros. unfold save_callee_save_int, save_callee_save_regs.
+  apply wt_fold_right; auto.
+  intros. unfold save_callee_save_reg. 
+  case (zlt (index_int_callee_save x) (fe_num_int_callee_save fe)); intro.
   apply wt_instrs_cons; auto.
-  apply wt_Msetstack'. apply int_callee_save_type; auto.
-  apply index_saved_int_valid. auto. exact z.
+  apply wt_Msetstack. apply int_callee_save_type; auto.
   auto.
 Qed.
 
-Lemma wt_save_float_callee_save:
-  forall cs k,
-  In cs float_callee_save_regs -> wt_instrs k ->
-  wt_instrs (save_float_callee_save fe cs k).
+Lemma wt_save_callee_save_float:
+  forall k,
+  wt_instrs k ->
+  wt_instrs (save_callee_save_float fe k).
 Proof.
-  intros. unfold save_float_callee_save.
-  case (zlt (index_float_callee_save cs) (fe_num_float_callee_save fe)); intro.
+  intros. unfold save_callee_save_float, save_callee_save_regs.
+  apply wt_fold_right; auto.
+  intros. unfold save_callee_save_reg. 
+  case (zlt (index_float_callee_save x) (fe_num_float_callee_save fe)); intro.
   apply wt_instrs_cons; auto.
-  apply wt_Msetstack'. apply float_callee_save_type; auto.
-  apply index_saved_float_valid. auto. exact z.
+  apply wt_Msetstack. apply float_callee_save_type; auto.
   auto.
 Qed.
 
-Lemma wt_restore_int_callee_save:
-  forall cs k,
-  In cs int_callee_save_regs -> wt_instrs k ->
-  wt_instrs (restore_int_callee_save fe cs k).
+Lemma wt_restore_callee_save_int:
+  forall k,
+  wt_instrs k ->
+  wt_instrs (restore_callee_save_int fe k).
 Proof.
-  intros. unfold restore_int_callee_save.
-  case (zlt (index_int_callee_save cs) (fe_num_int_callee_save fe)); intro.
+  intros. unfold restore_callee_save_int, restore_callee_save_regs.
+  apply wt_fold_right; auto.
+  intros. unfold restore_callee_save_reg.
+  case (zlt (index_int_callee_save x) (fe_num_int_callee_save fe)); intro.
   apply wt_instrs_cons; auto.
   constructor. apply int_callee_save_type; auto.
   auto.
 Qed.
 
-Lemma wt_restore_float_callee_save:
-  forall cs k,
-  In cs float_callee_save_regs -> wt_instrs k ->
-  wt_instrs (restore_float_callee_save fe cs k).
+Lemma wt_restore_callee_save_float:
+  forall k,
+  wt_instrs k ->
+  wt_instrs (restore_callee_save_float fe k).
 Proof.
-  intros. unfold restore_float_callee_save.
-  case (zlt (index_float_callee_save cs) (fe_num_float_callee_save fe)); intro.
+  intros. unfold restore_callee_save_float, restore_callee_save_regs.
+  apply wt_fold_right; auto.
+  intros. unfold restore_callee_save_reg.
+  case (zlt (index_float_callee_save x) (fe_num_float_callee_save fe)); intro.
   apply wt_instrs_cons; auto.
   constructor. apply float_callee_save_type; auto.
   auto.
@@ -113,9 +111,7 @@ Lemma wt_save_callee_save:
   wt_instrs k -> wt_instrs (save_callee_save fe k).
 Proof.
   intros. unfold save_callee_save.
-  apply wt_fold_right. exact wt_save_int_callee_save.
-  apply wt_fold_right. exact wt_save_float_callee_save.
-  auto.
+  apply wt_save_callee_save_int. apply wt_save_callee_save_float. auto.
 Qed.
 
 Lemma wt_restore_callee_save:
@@ -123,37 +119,34 @@ Lemma wt_restore_callee_save:
   wt_instrs k -> wt_instrs (restore_callee_save fe k).
 Proof.
   intros. unfold restore_callee_save.
-  apply wt_fold_right. exact wt_restore_int_callee_save.
-  apply wt_fold_right. exact wt_restore_float_callee_save.
-  auto.
+  apply wt_restore_callee_save_int. apply wt_restore_callee_save_float. auto.
 Qed.
 
 Lemma wt_transl_instr:
   forall instr k,
+  In instr f.(Linear.fn_code) ->
   Lineartyping.wt_instr f instr ->
   wt_instrs k ->
   wt_instrs (transl_instr fe instr k).
 Proof.
-  intros. destruct instr; unfold transl_instr; inversion H.
+  intros.
+  generalize (instr_is_within_bounds f instr H H0); intro BND.
+  destruct instr; unfold transl_instr; inv H0; simpl in BND.
   (* getstack *)
-  destruct s; simpl in H3; apply wt_instrs_cons; auto;
+  destruct BND.
+  destruct s; simpl in *; apply wt_instrs_cons; auto;
   constructor; auto.
   (* setstack *)
-  destruct s; simpl in H3; simpl in H4.
-  apply wt_instrs_cons; auto. apply wt_Msetstack'. auto. 
-  apply index_local_valid. auto. 
+  destruct s.
+  apply wt_instrs_cons; auto. apply wt_Msetstack. auto. 
   auto.
-  apply wt_instrs_cons; auto. apply wt_Msetstack'. auto. 
-  apply index_arg_valid. auto. 
+  apply wt_instrs_cons; auto. apply wt_Msetstack. auto. 
   (* op, move *)
   simpl. apply wt_instrs_cons. constructor; auto. auto.
-  (* op, undef *)
-  simpl. apply wt_instrs_cons. constructor. auto.
   (* op, others *)
   apply wt_instrs_cons; auto.
   constructor. 
   destruct o; simpl; congruence.
-  destruct o; simpl; congruence.
   rewrite H6. destruct o; reflexivity || congruence.
   (* load *)
   apply wt_instrs_cons; auto.
@@ -162,10 +155,14 @@ Proof.
   (* store *)
   apply wt_instrs_cons; auto.
   constructor; auto.
-  rewrite H3. destruct a; reflexivity.
+  rewrite H4. destruct a; reflexivity.
   (* call *)
   apply wt_instrs_cons; auto.
   constructor; auto.
+  (* tailcall *)
+  apply wt_restore_callee_save. apply wt_instrs_cons; auto.
+  constructor; auto.
+  destruct s0; auto. rewrite H5; auto.
   (* alloc *)
   apply wt_instrs_cons; auto. constructor. 
   (* label *)
@@ -185,7 +182,7 @@ End TRANSL_FUNCTION.
 
 Lemma wt_transf_function:
   forall f tf, 
-  transf_function f = Some tf ->
+  transf_function f = OK tf ->
   Lineartyping.wt_function f ->
   wt_function tf.
 Proof.
@@ -201,7 +198,7 @@ Proof.
   constructor.
   change (wt_instrs (fn_code tf)).
   rewrite H1; simpl; unfold transl_body. 
-  apply wt_save_callee_save with tf; auto. 
+  apply wt_save_callee_save; auto. 
   unfold transl_code. apply wt_fold_right. 
   intros. eapply wt_transl_instr; eauto. 
   red; intros. elim H3.
@@ -213,20 +210,20 @@ Qed.
 Lemma wt_transf_fundef:
   forall f tf, 
   Lineartyping.wt_fundef f ->
-  transf_fundef f = Some tf ->
+  transf_fundef f = OK tf ->
   wt_fundef tf.
 Proof.
   intros f tf WT. inversion WT; subst.
   simpl; intros; inversion H. constructor.
   unfold transf_fundef, transf_partial_fundef.
-  caseEq (transf_function f0); try congruence.
+  caseEq (transf_function f0); simpl; try congruence.
   intros tfn TRANSF EQ. inversion EQ; subst tf.
   constructor; eapply wt_transf_function; eauto. 
 Qed.
 
 Lemma program_typing_preserved:
   forall (p: Linear.program) (tp: Mach.program),
-  transf_program p = Some tp ->
+  transf_program p = OK tp ->
   Lineartyping.wt_program p ->
   Machtyping.wt_program tp.
 Proof.
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index 4fbdc9fd..15f4676d 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -30,8 +30,8 @@ Require Import LTL.
   dead code (as the "goto L3" in the example above).
 *)
 
-Definition is_goto_block (b: option block) : option node :=
-  match b with Some (Bgoto s) => Some s | _ => None end.
+Definition is_goto_instr (b: option instruction) : option node :=
+  match b with Some (Lnop s) => Some s | _ => None end.
 
 (** [branch_target f pc] returns the node of the CFG that is at
   the end of the branch sequence starting at [pc].  If the instruction
@@ -70,7 +70,7 @@ Fixpoint branch_target_rec (f: LTL.function) (pc: node) (count: nat)
   match count with
   | Datatypes.O => None
   | Datatypes.S count' =>
-      match is_goto_block f.(fn_code)!pc with
+      match is_goto_instr f.(fn_code)!pc with
       | Some s => branch_target_rec f s count'
       | None => Some pc
       end
@@ -86,34 +86,32 @@ Definition branch_target (f: LTL.function) (pc: node) :=
   replacing the destinations of the [Bgoto] and [Bcond] instructions
   with their final target, as computed by [branch_target]. *)
 
-Fixpoint tunnel_block (f: LTL.function) (b: block) {struct b} : block :=
+Definition tunnel_instr (f: LTL.function) (b: instruction) : instruction :=
   match b with
-  | Bgetstack s r b =>
-      Bgetstack s r (tunnel_block f b)
-  | Bsetstack r s b =>
-      Bsetstack r s (tunnel_block f b)
-  | Bop op args res b =>
-      Bop op args res (tunnel_block f b)
-  | Bload chunk addr args dst b =>
-      Bload chunk addr args dst (tunnel_block f b)
-  | Bstore chunk addr args src b =>
-      Bstore chunk addr args src (tunnel_block f b)
-  | Bcall sig ros b =>
-      Bcall sig ros (tunnel_block f b)
-  | Balloc b =>
-      Balloc (tunnel_block f b)
-  | Bgoto s =>
-      Bgoto (branch_target f s)
-  | Bcond cond args s1 s2 =>
-      Bcond cond args (branch_target f s1) (branch_target f s2)
-  | Breturn =>
-      Breturn
+  | Lnop s =>
+      Lnop (branch_target f s)
+  | Lop op args res s =>
+      Lop op args res (branch_target f s)
+  | Lload chunk addr args dst s =>
+      Lload chunk addr args dst (branch_target f s)
+  | Lstore chunk addr args src s =>
+      Lstore chunk addr args src (branch_target f s)
+  | Lcall sig ros args res s =>
+      Lcall sig ros args res (branch_target f s)
+  | Ltailcall sig ros args =>
+      Ltailcall sig ros args
+  | Lalloc arg res s =>
+      Lalloc arg res (branch_target f s)
+  | Lcond cond args s1 s2 =>
+      Lcond cond args (branch_target f s1) (branch_target f s2)
+  | Lreturn or =>
+      Lreturn or
   end.
 
 Lemma wf_tunneled_code:
   forall (f: LTL.function),
-  let tc := PTree.map (fun pc b => tunnel_block f b) (fn_code f) in
-  forall (pc: node), Plt pc (Psucc (fn_entrypoint f)) \/ tc!pc = None.
+  let tc := PTree.map (fun pc b => tunnel_instr f b) (fn_code f) in
+  forall (pc: node), Plt pc (fn_nextpc f) \/ tc!pc = None.
 Proof.
   intros. elim (fn_code_wf f pc); intro.
   left; auto. right; unfold tc.
@@ -123,9 +121,11 @@ Qed.
 Definition tunnel_function (f: LTL.function) : LTL.function :=
   mkfunction
     (fn_sig f)
+    (fn_params f)
     (fn_stacksize f)
-    (PTree.map (fun pc b => tunnel_block f b) (fn_code f))
-    (fn_entrypoint f)
+    (PTree.map (fun pc b => tunnel_instr f b) (fn_code f))
+    (branch_target f (fn_entrypoint f))
+    (fn_nextpc f)
     (wf_tunneled_code f).
 
 Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index eae53cac..3777eaa9 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -7,6 +7,7 @@ Require Import Values.
 Require Import Mem.
 Require Import Events.
 Require Import Globalenvs.
+Require Import Smallstep.
 Require Import Op.
 Require Import Locations.
 Require Import LTL.
@@ -14,51 +15,85 @@ Require Import Tunneling.
 
 (** * Properties of branch target computation *)
 
-Lemma is_goto_block_correct:
+Lemma is_goto_instr_correct:
   forall b s,
-  is_goto_block b = Some s -> b = Some (Bgoto s).
+  is_goto_instr b = Some s -> b = Some (Lnop s).
 Proof.
-  unfold is_goto_block; intros.
-  destruct b. destruct b; discriminate || congruence. 
-  discriminate.
+  unfold is_goto_instr; intros.
+  destruct b; try discriminate.
+  destruct i; discriminate || congruence. 
 Qed. 
 
 Lemma branch_target_rec_1:
   forall f pc n,
   branch_target_rec f pc n = Some pc
   \/ branch_target_rec f pc n = None
-  \/ exists pc', f.(fn_code)!pc = Some(Bgoto pc').
+  \/ exists pc', f.(fn_code)!pc = Some(Lnop pc').
 Proof.
   intros. destruct n; simpl.
   right; left; auto.
-  caseEq (is_goto_block f.(fn_code)!pc); intros.
-  right; right. exists n0. apply is_goto_block_correct; auto.
+  caseEq (is_goto_instr f.(fn_code)!pc); intros.
+  right; right. exists n0. apply is_goto_instr_correct; auto.
   left; auto.
 Qed.
 
 Lemma branch_target_rec_2:
   forall f n pc1 pc2 pc3,
-  f.(fn_code)!pc1 = Some (Bgoto pc2) ->
+  f.(fn_code)!pc1 = Some (Lnop pc2) ->
   branch_target_rec f pc1 n = Some pc3 ->
   branch_target_rec f pc2 n = Some pc3.
 Proof.
   induction n. 
   simpl. intros; discriminate.
   intros pc1 pc2 pc3 ATpc1 H. simpl in H. 
-  unfold is_goto_block in H; rewrite ATpc1 in H.
-  simpl. caseEq (is_goto_block f.(fn_code)!pc2); intros.
-  apply IHn with pc2. apply is_goto_block_correct; auto. auto.
+  unfold is_goto_instr in H; rewrite ATpc1 in H.
+  simpl. caseEq (is_goto_instr f.(fn_code)!pc2); intros.
+  apply IHn with pc2. apply is_goto_instr_correct; auto. auto.
   destruct n; simpl in H. discriminate. rewrite H0 in H. auto.
 Qed.
 
+(** Counting the number of consecutive gotos. *)
+
+Fixpoint count_goto_rec (f: LTL.function) (pc: node) (count: nat)
+                        {struct count} : nat :=
+  match count with
+  | Datatypes.O => Datatypes.O
+  | Datatypes.S count' =>
+      match is_goto_instr f.(fn_code)!pc with
+      | Some s => Datatypes.S (count_goto_rec f s count')
+      | None => Datatypes.O
+      end
+    end.
+
+Definition count_goto (f: LTL.function) (pc: node) : nat :=
+  count_goto_rec f pc 10%nat.
+
+Lemma count_goto_rec_prop:
+  forall f n pc1 pc2 pc3,
+  f.(fn_code)!pc1 = Some (Lnop pc2) ->
+  branch_target_rec f pc1 n = Some pc3 ->
+  (count_goto_rec f pc2 n < count_goto_rec f pc1 n)%nat.
+Proof.
+  induction n.
+  simpl; intros. discriminate.
+  intros pc1 pc2 pc3 ATpc1 H. simpl in H. 
+  unfold is_goto_instr in H; rewrite ATpc1 in H.
+  simpl. unfold is_goto_instr at 2. rewrite ATpc1. 
+  caseEq (is_goto_instr f.(fn_code)!pc2); intros.
+  exploit (IHn pc2); eauto. apply is_goto_instr_correct; eauto. 
+  omega.
+  omega.
+Qed.
+
 (** The following lemma captures the property of [branch_target]
   on which the proof of semantic preservation relies. *)
 
 Lemma branch_target_characterization:
   forall f pc,
   branch_target f pc = pc \/
-  (exists pc', f.(fn_code)!pc = Some(Bgoto pc')
-            /\ branch_target f pc' = branch_target f pc).
+  (exists pc', f.(fn_code)!pc = Some(Lnop pc')
+            /\ branch_target f pc' = branch_target f pc
+            /\ count_goto f pc' < count_goto f pc)%nat.
 Proof.
   intros. unfold branch_target. 
   generalize (branch_target_rec_1 f pc 10%nat). 
@@ -67,7 +102,8 @@ Proof.
   rewrite A. left; auto.
   caseEq (branch_target_rec f pc 10%nat). intros pcx BT.
   right. exists pc'. split. auto. 
-  rewrite (branch_target_rec_2 _ _ _ _ _ AT BT). auto.
+  split. rewrite (branch_target_rec_2 _ _ _ _ _ AT BT). auto.
+  unfold count_goto. eapply count_goto_rec_prop; eauto. 
   intro. left; auto.
 Qed.
 
@@ -103,221 +139,233 @@ Proof.
   destruct f; reflexivity.
 Qed.
 
-(** These are inversion lemmas, characterizing what happens in the LTL
-  semantics when executing [Bgoto] instructions or basic blocks. *)
-
-Lemma exec_instrs_Bgoto_inv:
-  forall sp b1 ls1 m1 t b2 ls2 m2,
-  exec_instrs ge sp b1 ls1 m1 t b2 ls2 m2 ->
-  forall s1,
-  b1 = Bgoto s1 -> t = E0 /\ b2 = b1 /\ ls2 = ls1 /\ m2 = m1.
+Lemma find_function_translated:
+  forall ros ls f,
+  find_function ge ros ls = Some f ->
+  find_function tge ros ls = Some (tunnel_fundef f).
 Proof.
-  induction 1. 
-  intros. tauto.
-  intros. subst b1. inversion H. 
-  intros. generalize (IHexec_instrs1 s1 H2). intros [A [B [C D]]].
-  subst t1 b2 rs2 m2. 
-  generalize (IHexec_instrs2 s1 H2). intros [A [B [C D]]].
-  subst t2 b3 rs3 m3. intuition. traceEq. 
+  intros until f. destruct ros; simpl.
+  intro. apply functions_translated; auto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
+  apply function_ptr_translated; auto.
+  congruence.
 Qed.
 
-Lemma exec_block_Bgoto_inv:
-  forall sp s ls1 m1 t out ls2 m2,  
-  exec_block ge sp (Bgoto s) ls1 m1 t out ls2 m2 ->
-  t = E0 /\ out = Cont s /\ ls2 = ls1 /\ m2 = m1.
-Proof.
-  intros. inversion H;
-  generalize (exec_instrs_Bgoto_inv _ _ _ _ _ _ _ _ H0 s (refl_equal _));
-  intros [A [B [C D]]];
-  try discriminate.
-  intuition congruence.
-Qed.
+(** The proof of semantic preservation is a simulation argument
+  based on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  ?|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The [match_states] predicate, defined below, captures the precondition
+  between states [st1] and [st2], as well as the postcondition between
+  [st1'] and [st2'].  One transition in the source code (left) can correspond
+  to zero or one transition in the transformed code (right).  The
+  "zero transition" case occurs when executing a [Lgoto] instruction
+  in the source code that has been removed by tunneling.
+
+  In the definition of [match_states], note that only the control-flow
+  (in particular, the current program point [pc]) is changed:
+  the values of locations and the memory states are identical in the
+  original and transformed codes. *)
 
-Lemma exec_blocks_Bgoto_inv:
-  forall c sp pc1 ls1 m1 t out ls2 m2 s,
-  exec_blocks ge c sp pc1 ls1 m1 t out ls2 m2 ->
-  c!pc1 = Some (Bgoto s) ->
-  (t = E0 /\ out = Cont pc1 /\ ls2 = ls1 /\ m2 = m1)
-  \/ exec_blocks ge c sp s ls1 m1 t out ls2 m2.
+Definition tunneled_code (f: function) :=
+  PTree.map (fun pc b => tunnel_instr f b) (fn_code f).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall res f sp ls0 pc,
+      match_stackframes
+         (Stackframe res f sp ls0 pc)
+         (Stackframe res (tunnel_function f) sp ls0 (branch_target f pc)).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s f sp pc ls m ts,
+      list_forall2 match_stackframes s ts ->
+      match_states (State s f sp pc ls m)
+                   (State ts (tunnel_function f) sp (branch_target f pc) ls m)
+  | match_states_call:
+      forall s f ls m ts,
+      list_forall2 match_stackframes s ts ->
+      match_states (Callstate s f ls m)
+                   (Callstate ts (tunnel_fundef f) ls m)
+  | match_states_return:
+      forall s sig ls m ts,
+      list_forall2 match_stackframes s ts ->
+      match_states (Returnstate s sig ls m)
+                   (Returnstate ts sig ls m).
+
+Lemma parent_locset_match:
+  forall s ts, list_forall2 match_stackframes s ts -> parent_locset ts = parent_locset s.
 Proof.
-  induction 1; intros.
-  left; tauto.
-  assert (b = Bgoto s). congruence. subst b. 
-  generalize (exec_block_Bgoto_inv _ _ _ _ _ _ _ _ H0). 
-  intros [A [B [C D]]]. subst t out rs' m'.
-  right. apply exec_blocks_refl.
-  elim (IHexec_blocks1 H2).
-  intros [A [B [C D]]]. 
-  assert (pc2 = pc1). congruence. subst t1 rs2 m2 pc2.
-  replace t with t2. apply IHexec_blocks2; auto. traceEq.
-  intro. right. 
-  eapply exec_blocks_trans; eauto. 
+  induction 1; simpl; auto. inv H; auto.
 Qed.
 
-(** The following [exec_*_prop] predicates state the correctness
-  of the tunneling transformation: for each LTL execution
-  in the original code (of an instruction, a sequence of instructions,
-  a basic block, a sequence of basic blocks, etc), there exists
-  a similar LTL execution in the tunneled code.  
-
-  Note that only the control-flow is changed: the values of locations
-  and the memory states are identical in the original and transformed 
-  codes. *)
+(** To preserve non-terminating behaviours, we show that the transformed
+  code cannot take an infinity of "zero transition" cases.
+  We use the following [measure] function over source states,
+  which decreases strictly in the "zero transition" case. *)
 
-Definition tunnel_outcome (f: function) (out: outcome) :=
-  match out with
-  | Cont pc => Cont (branch_target f pc)
-  | Return => Return
+Definition measure (st: state) : nat :=
+  match st with
+  | State s f sp pc ls m => count_goto f pc
+  | Callstate s f ls m => 0%nat
+  | Returnstate s sig ls m => 0%nat
   end.
 
-Definition exec_instr_prop
-  (sp: val) (b1: block) (ls1: locset) (m1: mem) (t: trace)
-            (b2: block) (ls2: locset) (m2: mem) : Prop :=
-  forall f,
-  exec_instr tge sp (tunnel_block f b1) ls1 m1
-                  t (tunnel_block f b2) ls2 m2.
-
-Definition exec_instrs_prop
-  (sp: val) (b1: block) (ls1: locset) (m1: mem) (t: trace)
-            (b2: block) (ls2: locset) (m2: mem) : Prop :=
-  forall f,
-  exec_instrs tge sp (tunnel_block f b1) ls1 m1
-                   t (tunnel_block f b2) ls2 m2.
-
-Definition exec_block_prop
-  (sp: val) (b1: block) (ls1: locset) (m1: mem) (t: trace)
-            (out: outcome) (ls2: locset) (m2: mem) : Prop :=
-  forall f,
-  exec_block tge sp (tunnel_block f b1) ls1 m1
-                  t (tunnel_outcome f out) ls2 m2.
+Lemma branch_target_identity:
+  forall f pc,
+  match f.(fn_code)!pc with Some(Lnop _) => False | _ => True end ->
+  branch_target f pc = pc.
+Proof.
+  intros. 
+  destruct (branch_target_characterization f pc) as [A | [pc' [B C]]].
+  auto. rewrite B in H. contradiction.
+Qed.
+
+Lemma tunnel_function_lookup:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (tunnel_function f).(fn_code)!pc = Some (tunnel_instr f i).
+Proof.
+  intros. simpl. rewrite PTree.gmap. rewrite H. auto.
+Qed.
 
-Definition tunneled_code (f: function) :=
-  PTree.map (fun pc b => tunnel_block f b) (fn_code f).
-
-Definition exec_blocks_prop
-     (c: code) (sp: val)
-     (pc: node) (ls1: locset) (m1: mem) (t: trace)
-     (out: outcome) (ls2: locset) (m2: mem) : Prop :=
-  forall f,
-  f.(fn_code) = c ->
-  exec_blocks tge (tunneled_code f) sp
-              (branch_target f pc) ls1 m1
-            t (tunnel_outcome f out) ls2 m2.
-
-Definition exec_function_prop
-    (f: fundef) (ls1: locset) (m1: mem) (t: trace)
-                (ls2: locset) (m2: mem) : Prop :=
-  exec_function tge (tunnel_fundef f) ls1 m1 t ls2 m2.
-
-Scheme exec_instr_ind5 := Minimality for LTL.exec_instr Sort Prop
-  with exec_instrs_ind5 := Minimality for LTL.exec_instrs Sort Prop
-  with exec_block_ind5 := Minimality for LTL.exec_block Sort Prop
-  with exec_blocks_ind5 := Minimality for LTL.exec_blocks Sort Prop
-  with exec_function_ind5 := Minimality for LTL.exec_function Sort Prop.
-
-(** The proof of semantic preservation is a structural induction
-  over the LTL evaluation derivation of the original program,
-  using the [exec_*_prop] predicates above as induction hypotheses. *)
-
-Lemma tunnel_function_correct:
-  forall f ls1 m1 t ls2 m2,
-  exec_function ge f ls1 m1 t ls2 m2 ->
-  exec_function_prop f ls1 m1 t ls2 m2.
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
 Proof.
-  apply (exec_function_ind5 ge
-           exec_instr_prop
-           exec_instrs_prop
-           exec_block_prop
-           exec_blocks_prop
-           exec_function_prop);
-  intros; red; intros; simpl.
-  (* getstack *)
-  constructor.
-  (* setstack *)
-  constructor.
-  (* op *)
-  constructor. rewrite <- H. apply eval_operation_preserved.
-  exact symbols_preserved.
-  (* load *)
-  apply exec_Bload with a. rewrite <- H. 
-  apply eval_addressing_preserved. exact symbols_preserved.
-  auto.
-  (* store *)
-  apply exec_Bstore with a. rewrite <- H.
-  apply eval_addressing_preserved. exact symbols_preserved.
-  auto.
-  (* call *)
-  apply exec_Bcall with (tunnel_fundef f). 
-  generalize H; unfold find_function; destruct ros.
-  intro. apply functions_translated; auto.
-  rewrite symbols_preserved. destruct (Genv.find_symbol ge i).
-  intro. apply function_ptr_translated; auto. congruence.
-  generalize (sig_preserved f). congruence. 
-  apply H2. 
-  (* alloc *)
-  eapply exec_Balloc; eauto. 
-  (* instr_refl *)
-  apply exec_refl.
-  (* instr_one *)
-  apply exec_one. apply H0.
-  (* instr_trans *)
-  apply exec_trans with t1 (tunnel_block f b2) rs2 m2 t2; auto.
-  (* goto *)
-  apply exec_Bgoto. red in H0. simpl in H0. apply H0. 
-  (* cond, true *)
-  eapply exec_Bcond_true. red in H0. simpl in H0. apply H0.  auto.
-  (* cond, false *)
-  eapply exec_Bcond_false. red in H0. simpl in H0. apply H0. auto.
+  induction 1; intros; try inv MS.
+  (* Lnop *)
+  destruct (branch_target_characterization f pc) as [A | [pc1 [B [C D]]]].
+  left; econstructor; split.
+  eapply exec_Lnop. rewrite A. 
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
+  econstructor; eauto. 
+  assert (pc1 = pc') by congruence. subst pc1.
+  right. split. simpl. auto. split. auto. 
+  rewrite <- C. econstructor; eauto. 
+  (* Lop *)
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; econstructor; split.
+  eapply exec_Lop with (v := v); eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  econstructor; eauto.
+  (* Lload *)
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; econstructor; split.
+  eapply exec_Lload; eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  econstructor; eauto.
+  (* Lstore *)
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; econstructor; split.
+  eapply exec_Lstore; eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.  
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  econstructor; eauto.
+  (* Lcall *)
+  unfold rs1. inv MS. 
+  left; econstructor; split. 
+  eapply exec_Lcall with (f' := tunnel_fundef f'); eauto.
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  rewrite (tunnel_function_lookup _ _ _ H); simpl.
+  rewrite sig_preserved. auto.
+  apply find_function_translated; auto.
+  rewrite sig_preserved; auto. fold rs1. 
+  econstructor; eauto.
+  constructor; auto. 
+  constructor; auto.
+  (* Ltailcall *)
+  unfold rs2, rs1 in *. inv MS. fold rs1. fold rs2. 
+  left; econstructor; split. 
+  eapply exec_Ltailcall with (f' := tunnel_fundef f'); eauto.
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  rewrite (tunnel_function_lookup _ _ _ H); simpl.
+  rewrite sig_preserved. auto.
+  apply find_function_translated; auto.
+  rewrite sig_preserved; auto. fold rs1. 
+  rewrite (parent_locset_match _ _ H9).
+  econstructor; eauto.
+  (* Lalloc *)
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; exists (State ts (tunnel_function f) sp (branch_target f pc') rs3 m'); split.
+  unfold rs3, rs2, rs1; eapply exec_Lalloc; eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; auto.
+  econstructor; eauto.
+  (* cond *)
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; econstructor; split.
+  eapply exec_Lcond_true; eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto.
+  econstructor; eauto.
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; econstructor; split.
+  eapply exec_Lcond_false; eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto.
+  econstructor; eauto.
   (* return *)
-  apply exec_Breturn. red in H0. simpl in H0. apply H0. 
-  (* block_refl *)
-  apply exec_blocks_refl.
-  (* block_one *)
-  red in H1. 
-  elim (branch_target_characterization f pc).
-  intro. rewrite H3. apply exec_blocks_one with (tunnel_block f b).
-  unfold tunneled_code. rewrite PTree.gmap. rewrite H2; rewrite H.
-  reflexivity. apply H1.
-  intros [pc' [ATpc BTS]]. 
-  assert (b = Bgoto pc'). congruence. subst b.
-  generalize (exec_block_Bgoto_inv _ _ _ _ _ _ _ _ H0).
-  intros [A [B [C D]]]. subst t out rs' m'.
-  simpl. rewrite BTS. apply exec_blocks_refl.
-  (* blocks_trans *)
-  apply exec_blocks_trans with t1 (branch_target f pc2) rs2 m2 t2.
-  exact (H0 f H4). exact (H2 f H4). auto.
+  rewrite (branch_target_identity f pc); [idtac | rewrite H; auto].
+  left; econstructor; split.
+  eapply exec_Lreturn; eauto.
+  rewrite (tunnel_function_lookup _ _ _ H); simpl; eauto.
+  simpl. rewrite (parent_locset_match _ _ H7). constructor; auto.
   (* internal function *)
-  econstructor. eexact H. 
-  change (fn_code (tunnel_function f)) with (tunneled_code f).
-  simpl.
-  elim (branch_target_characterization f (fn_entrypoint f)).
-  intro BT. rewrite <- BT. exact (H1 f (refl_equal _)).
-  intros [pc [ATpc BT]].
-  apply exec_blocks_trans with 
-     E0 (branch_target f (fn_entrypoint f)) (call_regs rs) m1 t.
-  eapply exec_blocks_one. 
-  unfold tunneled_code. rewrite PTree.gmap. rewrite ATpc.
-  simpl. reflexivity. 
-  apply exec_Bgoto. rewrite BT. apply exec_refl.
-  exact (H1 f (refl_equal _)). traceEq.
+  simpl. left; econstructor; split.
+  eapply exec_function_internal; eauto.
+  simpl. econstructor; eauto. 
   (* external function *)
-  econstructor; eauto. 
+  simpl. left; econstructor; split.
+  eapply exec_function_external; eauto.
+  simpl. econstructor; eauto. 
+  (* return *)
+  inv H4. inv H1.
+  left; econstructor; split.
+  eapply exec_return; eauto.
+  fold rs1. constructor. auto.
 Qed.
 
-End PRESERVATION.
+Lemma transf_initial_states:
+  forall st1, initial_state p st1 ->
+  exists st2, initial_state tp st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. 
+  exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) (Genv.init_mem tp)); split.
+  econstructor; eauto.
+  change (prog_main tp) with (prog_main p).
+  rewrite symbols_preserved. eauto.
+  apply function_ptr_translated; auto.
+  rewrite <- H2. apply sig_preserved. 
+  replace (Genv.init_mem tp) with (Genv.init_mem p).
+  constructor. constructor. auto.
+  symmetry. unfold tp, tunnel_program. apply Genv.init_mem_transf.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv H6. constructor. auto. 
+Qed.
 
 Theorem transf_program_correct:
-  forall (p: program) (t: trace) (r: val),
-  exec_program p t r ->
-  exec_program (tunnel_program p) t r.
+  forall (beh: program_behavior),
+  exec_program p beh -> exec_program tp beh.
 Proof.
-  intros p t r [b [f [ls [m [FIND1 [FIND2 [SIG [EX RES]]]]]]]].
-  red. exists b; exists (tunnel_fundef f); exists ls; exists m.
-  split. change (prog_main (tunnel_program p)) with (prog_main p).
-  rewrite <- FIND1. apply symbols_preserved.
-  split. apply function_ptr_translated. assumption.
-  split. generalize (sig_preserved f). congruence.
-  split. apply tunnel_function_correct. 
-  unfold tunnel_program. rewrite Genv.init_mem_transf. auto.
-  rewrite sig_preserved. exact RES.
+  unfold exec_program; intros.
+  eapply simulation_opt_preservation; eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact tunnel_step_correct. 
 Qed.
+
+End PRESERVATION.
diff --git a/backend/Tunnelingtyping.v b/backend/Tunnelingtyping.v
index 6281afa1..c611067c 100644
--- a/backend/Tunnelingtyping.v
+++ b/backend/Tunnelingtyping.v
@@ -11,26 +11,65 @@ Require Import Locations.
 Require Import LTL.
 Require Import LTLtyping.
 Require Import Tunneling.
+Require Import Tunnelingproof.
 
 (** Tunneling preserves typing. *)
 
-Lemma wt_tunnel_block:
-  forall f b,
-  wt_block f b ->
-  wt_block (tunnel_function f) (tunnel_block f b).
+Lemma branch_target_rec_valid:
+  forall f, wt_function f ->
+  forall count pc pc',
+  branch_target_rec f pc count = Some pc' ->
+  valid_successor f pc ->
+  valid_successor f pc'.
 Proof.
-  induction 1; simpl; econstructor; eauto.
+  induction count; simpl.
+  intros; discriminate.
+  intros until pc'. caseEq (is_goto_instr (fn_code f)!pc); intros.
+  generalize (is_goto_instr_correct _ _ H0). intro.
+  eapply IHcount; eauto. 
+  generalize (wt_instrs _ H _ _ H3); intro WTI; inv WTI. auto.
+  inv H1; auto.
+Qed.
+
+Lemma tunnel_valid_successor:
+  forall f pc,
+  valid_successor f pc -> valid_successor (tunnel_function f) pc.
+Proof.
+  intros. destruct H as [i AT].
+  unfold valid_successor; simpl. rewrite PTree.gmap. rewrite AT. 
+  simpl. exists (tunnel_instr f i); auto.
+Qed.
+
+Lemma branch_target_valid:
+  forall f pc,
+  wt_function f ->
+  valid_successor f pc ->
+  valid_successor (tunnel_function f) (branch_target f pc).
+Proof.
+  intros. apply tunnel_valid_successor. 
+  unfold branch_target. caseEq (branch_target_rec f pc 10); intros.
+  eapply branch_target_rec_valid; eauto.
+  auto.
+Qed.
+  
+Lemma wt_tunnel_instr:
+  forall f i,
+  wt_function f ->
+  wt_instr f i -> wt_instr (tunnel_function f) (tunnel_instr f i).
+Proof.
+  intros; inv H0; simpl; econstructor; eauto;
+  eapply branch_target_valid; eauto.
 Qed.
 
 Lemma wt_tunnel_function:
   forall f, wt_function f -> wt_function (tunnel_function f).
 Proof.
-  unfold wt_function; intros until b.
-  simpl. rewrite PTree.gmap. unfold option_map. 
-  caseEq (fn_code f)!pc. intros b0 AT EQ. 
-  injection EQ; intros; subst b. 
-  apply wt_tunnel_block. eauto. 
-  intros; discriminate.
+  intros. inversion H. constructor; simpl; auto.
+  intros until instr. rewrite PTree.gmap. unfold option_map. 
+  caseEq (fn_code f)!pc. intros b0 AT EQ. inv EQ. 
+  apply wt_tunnel_instr; eauto.
+  congruence.
+  eapply branch_target_valid; eauto.
 Qed.
 
 Lemma wt_tunnel_fundef: