From 00b605a1d52696b055dd232a05dd54a312680b93 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Thu, 26 Mar 2009 09:50:18 +0000
Subject: Added tail call optimization pass

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1015 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 backend/Tailcallproof.v | 737 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 737 insertions(+)
 create mode 100644 backend/Tailcallproof.v

(limited to 'backend/Tailcallproof.v')

diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v
new file mode 100644
index 00000000..8e97e1a4
--- /dev/null
+++ b/backend/Tailcallproof.v
@@ -0,0 +1,737 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Recognition of tail calls: correctness proof *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Mem.
+Require Import Op.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Registers.
+Require Import RTL.
+Require Conventions.
+Require Import Tailcall.
+
+(** * Syntactic properties of the code transformation *)
+
+(** ** Measuring the number of instructions eliminated *)
+
+(** The [return_measure c pc] function counts the number of instructions
+  eliminated by the code transformation, where [pc] is the successor
+  of a call turned into a tailcall.  This is the length of the
+  move/nop/return sequence recognized by the [is_return] boolean function.
+*)
+
+Fixpoint return_measure_rec (n: nat) (c: code) (pc: node)
+                            {struct n}: nat :=
+  match n with
+  | O => O
+  | S n' =>
+      match c!pc with
+      | Some(Inop s) => S(return_measure_rec n' c s)
+      | Some(Iop op args dst s) => S(return_measure_rec n' c s)
+      | _ => O
+      end
+  end.
+
+Definition return_measure (c: code) (pc: node) :=
+  return_measure_rec niter c pc.
+
+Lemma return_measure_bounds:
+  forall f pc, (return_measure f pc <= niter)%nat.
+Proof.
+  intro f.
+  assert (forall n pc, (return_measure_rec n f pc <= n)%nat).
+    induction n; intros; simpl.
+    omega.
+    destruct (f!pc); try omega. 
+    destruct i; try omega.
+    generalize (IHn n0). omega.
+    generalize (IHn n0). omega.
+  intros. unfold return_measure. apply H.
+Qed.
+
+Remark return_measure_rec_incr:
+  forall f n1 n2 pc,
+  (n1 <= n2)%nat ->
+  (return_measure_rec n1 f pc <= return_measure_rec n2 f pc)%nat.
+Proof.
+  induction n1; intros; simpl.
+  omega.
+  destruct n2. omegaContradiction. assert (n1 <= n2)%nat by omega.
+  simpl. destruct f!pc; try omega. destruct i; try omega.
+  generalize (IHn1 n2 n H0). omega.
+  generalize (IHn1 n2 n H0). omega.
+Qed.
+
+Lemma is_return_measure_rec:
+  forall f n n' pc r,
+  is_return n f pc r = true -> (n <= n')%nat ->
+  return_measure_rec n f.(fn_code) pc = return_measure_rec n' f.(fn_code) pc.
+Proof.
+  induction n; simpl; intros.
+  congruence.
+  destruct n'. omegaContradiction. simpl.
+  destruct (fn_code f)!pc; try congruence.
+  destruct i; try congruence.
+  decEq. apply IHn with r. auto. omega.
+  destruct (is_move_operation o l); try congruence.
+  destruct (Reg.eq r r1); try congruence.
+  decEq. apply IHn with r0. auto. omega.
+Qed.
+
+(** ** Relational characterization of the code transformation *)
+
+(** The [is_return_spec] characterizes the instruction sequences
+  recognized by the [is_return] boolean function.  *)
+
+Inductive is_return_spec (f:function): node -> reg -> Prop :=
+  | is_return_none: forall pc r,
+      f.(fn_code)!pc = Some(Ireturn None) ->
+      is_return_spec f pc r
+  | is_return_some: forall pc r,
+      f.(fn_code)!pc = Some(Ireturn (Some r)) ->
+      is_return_spec f pc r
+  | is_return_nop: forall pc r s,
+      f.(fn_code)!pc = Some(Inop s) ->
+      is_return_spec f s r ->
+      (return_measure f.(fn_code) s < return_measure f.(fn_code) pc)%nat ->
+      is_return_spec f pc r
+  | is_return_move: forall pc r r' s,
+      f.(fn_code)!pc = Some(Iop Omove (r::nil) r' s) ->
+      is_return_spec f s r' ->
+      (return_measure f.(fn_code) s < return_measure f.(fn_code) pc)%nat ->
+     is_return_spec f pc r.
+
+Lemma is_return_charact:
+  forall f n pc rret,
+  is_return n f pc rret = true -> (n <= niter)%nat ->
+  is_return_spec f pc rret.
+Proof.
+  induction n; intros.
+  simpl in H. congruence.
+  generalize H. simpl. 
+  caseEq ((fn_code f)!pc); try congruence.
+  intro i. caseEq i; try congruence.
+  intros s; intros. eapply is_return_nop; eauto. eapply IHn; eauto. omega.
+  unfold return_measure.
+  rewrite <- (is_return_measure_rec f (S n) niter pc rret); auto.
+  rewrite <- (is_return_measure_rec f n niter s rret); auto. 
+  simpl. rewrite H2. omega. omega.
+
+  intros op args dst s EQ1 EQ2. 
+  caseEq (is_move_operation op args); try congruence.
+  intros src IMO. destruct (Reg.eq rret src); try congruence.
+  subst rret. intro. 
+  exploit is_move_operation_correct; eauto. intros [A B]. subst. 
+  eapply is_return_move; eauto. eapply IHn; eauto. omega.
+  unfold return_measure.
+  rewrite <- (is_return_measure_rec f (S n) niter pc src); auto.
+  rewrite <- (is_return_measure_rec f n niter s dst); auto. 
+  simpl. rewrite EQ2. omega. omega.
+ 
+  intros or EQ1 EQ2. destruct or; intros. 
+  assert (r = rret). eapply proj_sumbool_true; eauto. subst r. 
+  apply is_return_some; auto.
+  apply is_return_none; auto.
+Qed.
+
+(** The [transf_instr_spec] predicate relates one instruction in the
+  initial code with its possible transformations in the optimized code. *)
+
+Inductive transf_instr_spec (f: function): instruction -> instruction -> Prop :=
+  | transf_instr_tailcall: forall sig ros args res s,
+      f.(fn_stacksize) = 0 ->
+      is_return_spec f s res ->
+      transf_instr_spec f (Icall sig ros args res s) (Itailcall sig ros args)
+  | transf_instr_default: forall i,
+      transf_instr_spec f i i.
+
+Lemma transf_instr_charact:
+  forall f pc instr,
+  f.(fn_stacksize) = 0 ->
+  transf_instr_spec f instr (transf_instr f pc instr).
+Proof.
+  intros. unfold transf_instr. destruct instr; try constructor.
+  caseEq (is_return niter f n r && Conventions.tailcall_is_possible s &&
+          opt_typ_eq (sig_res s) (sig_res (fn_sig f))); intros.
+  destruct (andb_prop _ _ H0). destruct (andb_prop _ _ H1).
+  eapply transf_instr_tailcall; eauto.
+  eapply is_return_charact; eauto. 
+  constructor.
+Qed.
+
+Lemma transf_instr_lookup:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  exists i',  (transf_function f).(fn_code)!pc = Some i' /\ transf_instr_spec f i i'.
+Proof.
+  intros. unfold transf_function. destruct (zeq (fn_stacksize f) 0).
+  simpl. rewrite PTree.gmap. rewrite H. simpl. 
+  exists (transf_instr f pc i); split. auto. apply transf_instr_charact; auto. 
+  exists i; split. auto. constructor.
+Qed.
+
+(** * Semantic properties of the code transformation *)
+
+(** ** The ``less defined than'' relation between register states *)
+
+(** A call followed by a return without an argument can be turned
+  into a tail call.  In this case, the original function returns
+  [Vundef], while the transformed function can return any value.
+  We account for this situation by using the ``less defined than''
+  relation between values and between memory states.  We need to
+  extend it pointwise to register states. *)
+
+Definition regset_lessdef (rs rs': regset) : Prop :=
+  forall r, Val.lessdef (rs#r) (rs'#r).
+
+Lemma regset_get_list:
+  forall rs rs' l,
+  regset_lessdef rs rs' -> Val.lessdef_list (rs##l) (rs'##l).
+Proof.
+  induction l; simpl; intros; constructor; auto.
+Qed.
+
+Lemma regset_set:
+  forall rs rs' v v' r,
+  regset_lessdef rs rs' -> Val.lessdef v v' ->
+  regset_lessdef (rs#r <- v) (rs'#r <- v').
+Proof.
+  intros; red; intros. repeat rewrite PMap.gsspec. destruct (peq r0 r); auto. 
+Qed.
+
+Lemma regset_init_regs:
+  forall params vl vl',
+  Val.lessdef_list vl vl' ->
+  regset_lessdef (init_regs vl params) (init_regs vl' params).
+Proof.
+  induction params; intros.
+  simpl. red; intros. rewrite Regmap.gi. constructor.
+  simpl. inv H.   red; intros. rewrite Regmap.gi. constructor.
+  apply regset_set. auto. auto.
+Qed.
+
+(** ** Agreement between the size of a stack block and a function *)
+
+(** To reason over deallocation of empty stack blocks, we need to
+  maintain the invariant that the bounds of a stack block
+  for function [f] are always [0, f.(fn_stacksize)]. *)
+
+Inductive match_stacksize: function -> block -> mem -> Z -> Prop :=
+  | match_stacksize_intro: forall f sp m bound,
+      sp < bound ->
+      low_bound m sp = 0 ->
+      high_bound m sp = f.(fn_stacksize) ->
+      match_stacksize f sp m bound.
+
+Lemma match_stacksize_store:
+  forall m m' chunk  b ofs v f sp bound,
+  store chunk m b ofs v = Some m' ->
+  match_stacksize f sp m bound ->
+  match_stacksize f sp m' bound.
+Proof.
+  intros. inv H0. constructor. auto.
+  rewrite <- H2. eapply Mem.low_bound_store; eauto.
+  rewrite <- H3. eapply Mem.high_bound_store; eauto.
+Qed.
+
+Lemma match_stacksize_alloc_other:
+  forall m m' lo hi b f sp bound,
+  alloc m lo hi = (m', b) ->
+  match_stacksize f sp m bound ->
+  bound <= m.(nextblock) ->
+  match_stacksize f sp m' bound.
+Proof.
+  intros. inv H0.
+  assert (valid_block m sp). red. omega.
+  constructor. auto.
+  rewrite <- H3. eapply low_bound_alloc_other; eauto.
+  rewrite <- H4. eapply high_bound_alloc_other; eauto.
+Qed.
+
+Lemma match_stacksize_alloc_same:
+  forall m f m' sp,
+  alloc m 0 f.(fn_stacksize) = (m', sp) ->
+  match_stacksize f sp m' m'.(nextblock).
+Proof.
+  intros. constructor. 
+  unfold alloc in H. inv H. simpl. omega.
+  eapply low_bound_alloc_same; eauto.
+  eapply high_bound_alloc_same; eauto.
+Qed.
+
+Lemma match_stacksize_free:
+  forall f sp m b bound,
+  match_stacksize f sp m bound ->
+  bound <= b ->
+  match_stacksize f sp (free m b) bound.
+Proof.
+  intros. inv H. constructor. auto.
+  rewrite <- H2. apply low_bound_free. unfold block; omega.
+  rewrite <- H3. apply high_bound_free. unfold block; omega.
+Qed.
+
+(** * Proof of semantic preservation *)
+
+Section PRESERVATION.
+
+Variable prog: program.
+Let tprog := transf_program prog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_transf transf_fundef prog).
+
+Lemma functions_translated:
+  forall (v: val) (f: RTL.fundef),
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog).
+
+Lemma funct_ptr_translated:
+  forall (b: block) (f: RTL.fundef),
+  Genv.find_funct_ptr ge b = Some f ->
+  Genv.find_funct_ptr tge b = Some (transf_fundef f).
+Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; auto. simpl. unfold transf_function. 
+  destruct (zeq (fn_stacksize f) 0); auto. 
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs rs' f,
+  find_function ge ros rs = Some f ->
+  regset_lessdef rs rs' ->
+  find_function tge ros rs' = Some (transf_fundef f).
+Proof.
+  intros until f; destruct ros; simpl.
+  intros.
+  assert (rs'#r = rs#r).
+    exploit Genv.find_funct_inv; eauto. intros [b EQ].
+    generalize (H0 r). rewrite EQ. intro LD. inv LD. auto.
+  rewrite H1. apply functions_translated; auto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge i); intros.
+  apply funct_ptr_translated; auto.
+  discriminate.
+Qed.
+
+(** Consider an execution of a call/move/nop/return sequence in the
+  original code and the corresponding tailcall in the transformed code.
+  The transition sequences are of the following form
+  (left: original code, right: transformed code).
+  [f] is the calling function and [fd] the called function.
+<<
+     State stk f (Icall instruction)       State stk' f' (Itailcall)
+
+     Callstate (frame::stk) fd args        Callstate stk' fd' args'
+            .                                       .
+            .                                       .
+            .                                       .
+     Returnstate (frame::stk) res          Returnstate stk' res'
+
+     State stk f (move/nop/return seq)
+            .
+            .
+            .
+     State stk f (return instr)
+
+     Returnstate stk res
+>>
+The simulation invariant must therefore account for two kinds of
+mismatches between the transition sequences:
+- The call stack of the original program can contain more frames
+  than that of the transformed program (e.g. [frame] in the example above).
+- The regular states corresponding to executing the move/nop/return
+  sequence must all correspond to the single [Returnstate stk' res']
+  state of the transformed program.
+
+We first define the simulation invariant between call stacks.
+The first two cases are standard, but the third case corresponds
+to a frame that was eliminated by the transformation. *)
+
+Inductive match_stackframes: mem -> Z -> list stackframe -> list stackframe -> Prop :=
+  | match_stackframes_nil: forall m bound,
+      match_stackframes m bound nil nil
+  | match_stackframes_normal: forall m bound stk stk' res sp pc rs rs' f,
+      match_stackframes m sp stk stk' ->
+      match_stacksize f sp m bound ->
+      regset_lessdef rs rs' ->
+      match_stackframes m bound
+        (Stackframe res f.(fn_code) (Vptr sp Int.zero) pc rs :: stk)
+        (Stackframe res (transf_function f).(fn_code) (Vptr sp Int.zero) pc rs' :: stk')
+  | match_stackframes_tail: forall m bound stk stk' res sp pc rs f,
+      match_stackframes m sp stk stk' ->
+      match_stacksize f sp m bound ->
+      is_return_spec f pc res ->
+      f.(fn_stacksize) = 0 ->
+      match_stackframes m bound
+        (Stackframe res f.(fn_code) (Vptr sp Int.zero) pc rs :: stk)
+        stk'.
+
+(** In [match_stackframes m bound s s'], the memory state [m] is used
+  to check that the sizes of the stack blocks agree with what was
+  declared by the corresponding functions.  The [bound] parameter
+  is used to enforce separation between the stack blocks. *)
+
+Lemma match_stackframes_incr:
+  forall m bound s s' bound',
+  match_stackframes m bound s s' ->
+  bound <= bound' ->
+  match_stackframes m bound' s s'.
+Proof.
+  intros. inv H; econstructor; eauto. 
+  inv H2. constructor; auto. omega.
+  inv H2. constructor; auto. omega.
+Qed.
+
+Lemma match_stackframes_store:
+  forall m bound s s',
+  match_stackframes m bound s s' -> 
+  forall chunk b ofs v m',
+  store chunk m b ofs v = Some m' ->
+  match_stackframes m' bound s s'.
+Proof.
+  induction 1; intros.
+  constructor.
+  econstructor; eauto. eapply match_stacksize_store; eauto.
+  econstructor; eauto. eapply match_stacksize_store; eauto.
+Qed.
+
+Lemma match_stackframes_alloc:
+  forall m lo hi m' sp s s',
+  match_stackframes m (nextblock m) s s' ->
+  alloc m lo hi = (m', sp) ->
+  match_stackframes m' sp s s'.
+Proof.
+  intros. 
+  assert (forall bound s s',
+    match_stackframes m bound s s' ->
+    bound <= m.(nextblock) ->
+    match_stackframes m' bound s s').
+  induction 1; intros. constructor.
+  constructor; auto. apply IHmatch_stackframes; auto. inv H2. omega. 
+  eapply match_stacksize_alloc_other; eauto.
+  econstructor; eauto.  apply IHmatch_stackframes; auto. inv H2. omega. 
+  eapply match_stacksize_alloc_other; eauto.
+  exploit alloc_result; eauto. intro. rewrite H2.
+  eapply H1; eauto. omega.
+Qed. 
+
+Lemma match_stackframes_free:
+  forall f sp m s s',
+  match_stacksize f sp m (nextblock m) ->
+  match_stackframes m sp s s' ->
+  match_stackframes (free m sp) (nextblock (free m sp)) s s'.
+Proof.
+  intros. simpl. 
+  assert (forall bound s s',
+    match_stackframes m bound s s' ->
+    bound <= sp ->
+    match_stackframes (free m sp) bound s s').
+  induction 1; intros. constructor.
+  constructor; auto. apply IHmatch_stackframes; auto. inv H2; omega. 
+  apply match_stacksize_free; auto.
+  econstructor; eauto. apply IHmatch_stackframes; auto. inv H2; omega. 
+  apply match_stacksize_free; auto.
+
+  apply match_stackframes_incr with sp. apply H1; auto. omega.
+  inv H. omega. 
+Qed.
+
+(** Here is the invariant relating two states.  The first three
+  cases are standard.  Note the ``less defined than'' conditions
+  over values, register states, and memory states. *)
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_normal:
+      forall s sp pc rs m s' rs' m' f
+             (STKSZ: match_stacksize f sp m m.(nextblock))
+             (STACKS: match_stackframes m sp s s')
+             (RLD: regset_lessdef rs rs')
+             (MLD: Mem.lessdef m m'),
+      match_states (State s f.(fn_code) (Vptr sp Int.zero) pc rs m)
+                   (State s' (transf_function f).(fn_code) (Vptr sp Int.zero) pc rs' m')
+  | match_states_call:
+      forall s f args m s' args' m',
+      match_stackframes m m.(nextblock) s s' ->
+      Val.lessdef_list args args' ->
+      Mem.lessdef m m' ->
+      match_states (Callstate s f args m)
+                   (Callstate s' (transf_fundef f) args' m')
+  | match_states_return:
+      forall s v m s' v' m',
+      match_stackframes m m.(nextblock) s s' ->
+      Val.lessdef v v' ->
+      Mem.lessdef m m' ->
+      match_states (Returnstate s v m)
+                   (Returnstate s' v' m')
+  | match_states_interm:
+      forall s sp pc rs m s' m' f r v'
+             (STKSZ: match_stacksize f sp m m.(nextblock))
+             (STACKS: match_stackframes m sp s s')
+             (MLD: Mem.lessdef m m'),
+      is_return_spec f pc r ->
+      f.(fn_stacksize) = 0 ->
+      Val.lessdef (rs#r) v' ->
+      match_states (State s f.(fn_code) (Vptr sp Int.zero) pc rs m)
+                   (Returnstate s' v' m').
+
+(** The last case of [match_states] corresponds to the execution
+  of a move/nop/return sequence in the original code that was
+  eliminated by the transformation:
+<<
+     State stk f (move/nop/return seq)  ~~  Returnstate stk' res'
+            .
+            .
+            .
+     State stk f (return instr)         ~~  Returnstate stk' res'
+>>
+  To preserve non-terminating behaviors, we need to make sure
+  that the execution of this sequence in the original code cannot
+  diverge.  For this, we introduce the following complicated
+  measure over states, which will decrease strictly whenever
+  the original code makes a transition but the transformed code
+  does not. *)
+
+Definition measure (st: state) : nat :=
+  match st with
+  | State s c sp pc rs m => (List.length s * (niter + 2) + return_measure c pc + 1)%nat
+  | Callstate s f args m => 0%nat
+  | Returnstate s v m => (List.length s * (niter + 2))%nat
+  end.
+
+Ltac TransfInstr :=
+  match goal with
+  | H: (PTree.get _ (fn_code _) = _) |- _ =>
+      destruct (transf_instr_lookup _ _ _ H) as [i' [TINSTR TSPEC]]; inv TSPEC
+  end.
+
+Ltac EliminatedInstr :=
+  match goal with
+  | H: (is_return_spec _ _ _) |- _ => inv H; try congruence
+  | _ => idtac
+  end.
+
+(** The proof of semantic preservation, then, is a simulation diagram
+  of the ``option'' kind. *)
+
+Lemma transf_step_correct:
+  forall s1 t s2, step ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  (exists s2', step tge s1' t s2' /\ match_states s2 s2')
+  \/ (measure s2 < measure s1 /\ t = E0 /\ match_states s2 s1')%nat.
+Proof.
+  induction 1; intros; inv MS; EliminatedInstr.
+
+(* nop *)
+  TransfInstr. left. econstructor; split. 
+  eapply exec_Inop; eauto. constructor; auto.
+(* eliminated nop *)
+  assert (s0 = pc') by congruence. subst s0.
+  right. split. simpl. omega. split. auto. 
+  econstructor; eauto. 
+
+(* op *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regset_get_list; auto. 
+  exploit eval_operation_lessdef; eauto. 
+  intros [v' [EVAL' VLD]]. 
+  left. exists (State s' (fn_code (transf_function f)) (Vptr sp0 Int.zero) pc' (rs'#res <- v') m'); split.
+  eapply exec_Iop; eauto.  rewrite <- EVAL'.
+  apply eval_operation_preserved. exact symbols_preserved.
+  econstructor; eauto. apply regset_set; auto.
+(* eliminated move *)
+  rewrite H1 in H. clear H1. inv H. 
+  right. split. simpl. omega. split. auto.
+  econstructor; eauto. simpl in H0. rewrite PMap.gss. congruence. 
+
+(* load *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regset_get_list; auto. 
+  exploit eval_addressing_lessdef; eauto. 
+  intros [a' [ADDR' ALD]].
+  exploit loadv_lessdef; eauto. 
+  intros [v' [LOAD' VLD]].
+  left. exists (State s' (fn_code (transf_function f)) (Vptr sp0 Int.zero) pc' (rs'#dst <- v') m'); split.
+  eapply exec_Iload; eauto.  rewrite <- ADDR'.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  econstructor; eauto. apply regset_set; auto.
+
+(* store *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regset_get_list; auto. 
+  exploit eval_addressing_lessdef; eauto. 
+  intros [a' [ADDR' ALD]].
+  exploit storev_lessdef. 4: eexact H1. eauto. eauto. apply RLD.  
+  intros [m'1 [STORE' MLD']].
+  left. exists (State s' (fn_code (transf_function f)) (Vptr sp0 Int.zero) pc' rs' m'1); split.
+  eapply exec_Istore; eauto.  rewrite <- ADDR'.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  destruct a; simpl in H1; try discriminate.
+  econstructor; eauto.
+  eapply match_stacksize_store; eauto. 
+  rewrite (nextblock_store _ _ _ _ _ _ H1). auto.
+  eapply match_stackframes_store; eauto.
+
+(* call *)
+  exploit find_function_translated; eauto. intro FIND'.  
+  TransfInstr.
+(* call turned tailcall *)
+  left. exists (Callstate s' (transf_fundef f) (rs'##args) (Mem.free m' sp0)); split.
+  eapply exec_Itailcall; eauto. apply sig_preserved. 
+  constructor. eapply match_stackframes_tail; eauto. apply regset_get_list; auto.
+  apply Mem.free_right_lessdef; auto. inv STKSZ. omega.  
+(* call that remains a call *)
+  left. exists (Callstate (Stackframe res (fn_code (transf_function f0)) (Vptr sp0 Int.zero) pc' rs' :: s')
+                          (transf_fundef f) (rs'##args) m'); split.
+  eapply exec_Icall; eauto. apply sig_preserved. 
+  constructor. constructor; auto. apply regset_get_list; auto. auto. 
+
+(* tailcall *) 
+  exploit find_function_translated; eauto. intro FIND'.  
+  TransfInstr.
+  left. exists (Callstate s' (transf_fundef f) (rs'##args) (Mem.free m' stk)); split.
+  eapply exec_Itailcall; eauto. apply sig_preserved. 
+  constructor. eapply match_stackframes_free; eauto. 
+  apply regset_get_list; auto. apply Mem.free_lessdef; auto.
+
+(* cond true *)
+  TransfInstr. 
+  left. exists (State s' (fn_code (transf_function f)) (Vptr sp0 Int.zero) ifso rs' m'); split.
+  eapply exec_Icond_true; eauto.
+  apply eval_condition_lessdef with (rs##args); auto. apply regset_get_list; auto.
+  constructor; auto. 
+
+(* cond false *)
+  TransfInstr. 
+  left. exists (State s' (fn_code (transf_function f)) (Vptr sp0 Int.zero) ifnot rs' m'); split.
+  eapply exec_Icond_false; eauto.
+  apply eval_condition_lessdef with (rs##args); auto. apply regset_get_list; auto.
+  constructor; auto. 
+
+(* return *)
+  TransfInstr.
+  left. exists (Returnstate s' (regmap_optget or Vundef rs') (free m' stk)); split.
+  apply exec_Ireturn; auto.
+  constructor.
+  eapply match_stackframes_free; eauto. 
+  destruct or; simpl. apply RLD. constructor.
+  apply Mem.free_lessdef; auto.
+
+(* eliminated return None *)
+  assert (or = None) by congruence. subst or. 
+  right. split. simpl. omega. split. auto. 
+  constructor.
+  eapply match_stackframes_free; eauto.
+  simpl. constructor.
+  apply Mem.free_left_lessdef; auto. 
+
+(* eliminated return Some *)
+  assert (or = Some r) by congruence. subst or.
+  right. split. simpl. omega. split. auto.
+  constructor.
+  eapply match_stackframes_free; eauto.
+  simpl. auto.
+  apply Mem.free_left_lessdef; auto.
+
+(* internal call *)
+  caseEq (alloc m'0 0 (fn_stacksize f)). intros m'1 stk' ALLOC'.
+  exploit alloc_lessdef; eauto. intros [EQ1 LD']. subst stk'. 
+  assert (fn_stacksize (transf_function f) = fn_stacksize f /\
+          fn_entrypoint (transf_function f) = fn_entrypoint f /\
+          fn_params (transf_function f) = fn_params f).
+    unfold transf_function. destruct (zeq (fn_stacksize f) 0); auto. 
+  destruct H0 as [EQ1 [EQ2 EQ3]]. 
+  left. econstructor; split.
+  simpl. eapply exec_function_internal; eauto. rewrite EQ1; eauto.
+  rewrite EQ2. rewrite EQ3. constructor; auto.
+  eapply match_stacksize_alloc_same; eauto.
+  eapply match_stackframes_alloc; eauto. 
+  apply regset_init_regs. auto. 
+
+(* external call *)
+  exploit event_match_lessdef; eauto. intros [res' [EVM' VLD']]. 
+  left. exists (Returnstate s' res' m'); split.
+  simpl. econstructor; eauto.
+  constructor; auto. 
+
+(* returnstate *)
+  inv H2. 
+(* synchronous return in both programs *)
+  left. econstructor; split. 
+  apply exec_return. 
+  constructor; auto. apply regset_set; auto. 
+(* return instr in source program, eliminated because of tailcall *)
+  right. split. unfold measure. simpl length. 
+  change (S (length s) * (niter + 2))%nat
+   with ((niter + 2) + (length s) * (niter + 2))%nat. 
+  generalize (return_measure_bounds (fn_code f) pc). omega.  
+  split. auto. 
+  econstructor; eauto.
+  rewrite Regmap.gss. auto. 
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inv H. 
+  exploit funct_ptr_translated; eauto. intro FIND.
+  exists (Callstate nil (transf_fundef f) nil (Genv.init_mem tprog)); split.
+  econstructor; eauto.
+  replace (prog_main tprog) with (prog_main prog).
+  rewrite symbols_preserved. eauto.
+  reflexivity.
+  rewrite <- H2. apply sig_preserved.
+  replace (Genv.init_mem tprog) with (Genv.init_mem prog).
+  constructor. constructor. constructor. apply lessdef_refl. 
+  symmetry. unfold tprog, transf_program. apply Genv.init_mem_transf. 
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r, 
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv H5. inv H3. constructor. 
+Qed.
+
+
+(** The preservation of the observable behavior of the program then
+  follows, using the generic preservation theorem
+  [Smallstep.simulation_opt_preservation]. *)
+
+Theorem transf_program_correct:
+  forall (beh: program_behavior),
+  exec_program prog beh -> exec_program tprog beh.
+Proof.
+  unfold exec_program; intros.
+  eapply simulation_opt_preservation with (measure := measure); eauto.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact transf_step_correct. 
+Qed.
+
+End PRESERVATION.
+
+
-- 
cgit