1 files changed, 304 insertions, 352 deletions
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index 68913fc9..3bc92f75 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -3,6 +3,7 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -12,252 +13,163 @@
 
 (** Correctness proof for the branch tunneling optimization. *)
 
-Require Import FunInd.
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
 Require Import AST Linking.
 Require Import Values Memory Events Globalenvs Smallstep.
 Require Import Op Locations LTL.
 Require Import Tunneling.
 
-Definition match_prog (p tp: program) :=
-  match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp.
+Local Open Scope nat.
 
-Lemma transf_program_match:
-  forall p, match_prog p (tunnel_program p).
-Proof.
-  intros. eapply match_transform_program; eauto.
-Qed.
 
-(** * Properties of the branch map computed using union-find. *)
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+   ok <> None -> forall pc, c!pc = None -> td!pc = None.
 
-Section BRANCH_MAP_CORRECT.
+Lemma check_included_correct (td: UF) (c: code): 
+  check_included_spec c td (check_included td c).
+Proof. 
+  apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_included_spec.
+  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+  intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+     : target_bounds target bound pc ob
+ | TB_branch s bb
+     (EQ: target pc = target s)
+     (DECREASE: bound s < bound pc)
+     : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+     (EQ1: target pc = target s1)
+     (EQ2: target pc = target s2)
+     (DEC1: bound s1 < bound pc)
+     (DEC2: bound s2 < bound pc)
+     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intros H; rewrite H; auto.
+Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
 
-Variable fn: LTL.function.
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
 
-Definition measure_branch (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
-  fun x => if peq (U.repr u s) pc then f x
-           else if peq (U.repr u x) pc then (f x + f s + 1)%nat
-           else f x.
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
 
-Definition measure_cond (u: U.t) (pc s1 s2: node) (f: node -> nat) : node -> nat :=
-  fun x => if peq (U.repr u s1) pc then f x
-           else if peq (U.repr u x) pc then (f x + Nat.max (f s1) (f s2) + 1)%nat
-           else f x.
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+  check_bblock td pc bb = OK tt -> 
+  target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+  unfold check_bblock, bound.
+  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+  destruct bb as [|[ ] bb]; simpl; try congruence.
+  + destruct (get td s) as (ts, ds) eqn:Hs.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_branch.
+    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_cond.
+    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
 
-Definition branch_map_correct_1 (c: code) (u: U.t) (f: node -> nat): Prop :=
-  forall pc,
-  match c!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
-  | _ =>
-      U.repr u pc = pc
-  end.
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
 
-Lemma record_branch_correct:
-  forall c u f pc b,
-  branch_map_correct_1 (PTree.remove pc c) u f ->
-  c!pc = Some b ->
-  { f' | branch_map_correct_1 c (record_branch u pc b) f' }.
-Proof.
-  intros c u f pc b BMC GET1.
-  assert (PC: U.repr u pc = pc).
-  { specialize (BMC pc). rewrite PTree.grs in BMC. auto. }
-  assert (DFL: { f | branch_map_correct_1 c u f }).
-  { exists f. intros p. destruct (peq p pc).
-  - subst p. rewrite GET1. destruct b as [ | [] b ]; auto.
-  - specialize (BMC p). rewrite PTree.gro in BMC by auto. exact BMC.
-  }
-  unfold record_branch. destruct b as [ | [] b ]; auto.
-  exists (measure_branch u pc s f). intros p. destruct (peq p pc).
-+ subst p. rewrite GET1. unfold measure_branch.
-  rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3.
-  destruct (peq (U.repr u s) pc); auto. rewrite PC, peq_true. right; split; auto. lia.
-+ specialize (BMC p). rewrite PTree.gro in BMC by auto.
-  assert (U.repr u p = p -> U.repr (U.union u pc s) p = p).
-  { intro. rewrite <- H at 2. apply U.repr_union_1. congruence. }
-  destruct (c!p) as [ [ | [] _ ] | ]; auto.
-  destruct BMC as [A | [A B]]. auto.
-  right; split. apply U.sameclass_union_2; auto.
-  unfold measure_branch. destruct (peq (U.repr u s) pc). auto.
-  rewrite A. destruct (peq (U.repr u s0) pc); lia.
-Qed.
-
-Lemma record_branches_correct:
-  { f | branch_map_correct_1 fn.(fn_code) (record_branches fn) f }.
-Proof.
-  unfold record_branches. apply PTree_Properties.fold_ind.
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
 - (* base case *)
-  intros m EMPTY. exists (fun _ => O). 
-  red; intros. rewrite EMPTY. apply U.repr_empty.
+  intros _ pc.  rewrite PTree.gempty; try congruence.
 - (* inductive case *)
-  intros m u pc bb GET1 GET2 [f BMC]. eapply record_branch_correct; eauto.
-Qed.
-
-Definition branch_map_correct_2 (c: code) (u: U.t) (f: node -> nat): Prop :=
-  forall pc,
-  match fn.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
-  | Some(Lcond cond args s1 s2 :: b) =>
-      U.repr u pc = pc \/ (c!pc = None /\ U.repr u pc = U.repr u s1 /\ U.repr u pc = U.repr u s2 /\ f s1 < f pc /\ f s2 < f pc)%nat
-  | _ =>
-      U.repr u pc = pc
-  end.
-
-Lemma record_cond_correct:
-  forall c u changed f pc b,
-  branch_map_correct_2 c u f ->
-  fn.(fn_code)!pc = Some b ->
-  c!pc <> None ->
-  let '(c1, u1, _) := record_cond (c, u, changed) pc b in
-  { f' | branch_map_correct_2 c1 u1 f' }.
-Proof.
-  intros c u changed f pc b BMC GET1 GET2.
-  assert (DFL: { f' | branch_map_correct_2 c u f' }).
-  { exists f; auto. }
-  unfold record_cond. destruct b as [ | [] b ]; auto.
-  destruct (peq (U.repr u s1) (U.repr u s2)); auto.
-  exists (measure_cond u pc s1 s2 f).
-  assert (PC: U.repr u pc = pc).
-  { specialize (BMC pc). rewrite GET1 in BMC. intuition congruence. }
-  intro p. destruct (peq p pc).
-- subst p. rewrite GET1. unfold measure_cond.
-  rewrite U.repr_union_2 by auto. rewrite <- e, PC, peq_true.
-  destruct (peq (U.repr u s1) pc); auto.
-  right; repeat split.
-  + apply PTree.grs.
-  + rewrite U.repr_union_3. auto.
-  + rewrite U.repr_union_1 by congruence. auto.
-  + lia.
-  + lia.
-- assert (P: U.repr u p = p -> U.repr (U.union u pc s1) p = p).
-  { intros. rewrite U.repr_union_1 by congruence. auto. }
-  specialize (BMC p). destruct (fn_code fn)!p as [ [ | [] bb ] | ]; auto.
-  + destruct BMC as [A | (A & B)]; auto. right; split.
-    * apply U.sameclass_union_2; auto.
-    * unfold measure_cond. rewrite <- A.
-      destruct (peq (U.repr u s1) pc). auto.
-      destruct (peq (U.repr u p) pc); lia.
-  + destruct BMC as [A | (A & B & C & D & E)]; auto. right; split; [ | split; [ | split]].
-    * rewrite PTree.gro by auto. auto.
-    * apply U.sameclass_union_2; auto.
-    * apply U.sameclass_union_2; auto.
-    * unfold measure_cond. rewrite <- B, <- C.
-      destruct (peq (U.repr u s1) pc). auto.
-      destruct (peq (U.repr u p) pc); lia.
-Qed.
-
-Definition code_compat (c: code) : Prop :=
-  forall pc b, c!pc = Some b -> fn.(fn_code)!pc = Some b.
-
-Definition code_invariant (c0 c1 c2: code) : Prop :=
-  forall pc, c0!pc = None -> c1!pc = c2!pc.
-
-Lemma record_conds_1_correct:
-  forall c u f,
-  branch_map_correct_2 c u f ->
-  code_compat c ->
-  let '(c', u', _) := record_conds_1 (c, u) in
-  (code_compat c' * { f' | branch_map_correct_2 c' u' f' })%type.
-Proof.
-  intros c0 u0 f0 BMC0 COMPAT0.
-  unfold record_conds_1.
-  set (x := PTree.fold record_cond c0 (c0, u0, false)).
-  set (P := fun (cd: code) (cuc: code * U.t * bool) =>
-            (code_compat (fst (fst cuc)) *
-             code_invariant cd (fst (fst cuc)) c0 *
-             { f | branch_map_correct_2 (fst (fst cuc)) (snd (fst cuc)) f })%type).
-  assert (REC: P c0 x).
-  { unfold x; apply PTree_Properties.fold_ind.
-  - intros cd EMPTY. split; [split|]; simpl.
-    + auto.
-    + red; auto.
-    + exists f0; auto.
-  - intros cd [[c u] changed] pc b GET1 GET2 [[COMPAT INV] [f BMC]]. simpl in *.
-    split; [split|].
-    + unfold record_cond; destruct b as [ | [] b]; simpl; auto.
-      destruct (peq (U.repr u s1) (U.repr u s2)); simpl; auto.
-      red; intros. rewrite PTree.grspec in H. destruct (PTree.elt_eq pc0 pc). discriminate. auto.
-    + assert (DFL: code_invariant cd c c0).
-      { intros p GET. apply INV. rewrite PTree.gro by congruence. auto. }
-      unfold record_cond; destruct b as [ | [] b]; simpl; auto.
-      destruct (peq (U.repr u s1) (U.repr u s2)); simpl; auto.
-      intros p GET. rewrite PTree.gro by congruence. apply INV. rewrite PTree.gro by congruence. auto.
-    + assert (GET3: c!pc = Some b).
-      { rewrite <- GET2. apply INV. apply PTree.grs. }
-      assert (X: fn.(fn_code)!pc = Some b) by auto.
-      assert (Y: c!pc <> None) by congruence.
-      generalize (record_cond_correct c u changed f pc b BMC X Y).
-      destruct (record_cond (c, u, changed) pc b) as [[c1 u1] changed1]; simpl.
-      auto.
-  }
-  destruct x as [[c1 u1] changed1]; destruct REC as [[COMPAT1 INV1] BMC1]; auto.
-Qed.
-
-Definition branch_map_correct (u: U.t) (f: node -> nat): Prop :=
-  forall pc,
-  match fn.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
-  | Some(Lcond cond args s1 s2 :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s1 /\ U.repr u pc = U.repr u s2 /\ f s1 < f pc /\ f s2 < f pc)%nat
-  | _ =>
-      U.repr u pc = pc
-  end.
+  unfold check_code_spec.
+  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+  intros X; inversion X; subst.
+  apply check_bblock_correct; auto.
+Qed.
 
-Lemma record_conds_correct:
-  forall cu,
-  { f | branch_map_correct_2 (fst cu) (snd cu) f } ->
-  code_compat (fst cu) ->
-  { f | branch_map_correct (record_conds cu) f }.
+Theorem branch_target_bounds:
+  forall f tf pc, 
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
 Proof.
-  intros cu0. functional induction (record_conds cu0); intros.
-- destruct cu as [c u], cu' as [c' u'], H as [f BMC]. 
-  generalize (record_conds_1_correct c u f BMC H0).
-  rewrite e. intros [U V]. apply IHt; auto.
-- destruct cu as [c u], H as [f BMC].
-  exists f. intros pc. specialize (BMC pc); simpl in *.
-  destruct (fn_code fn)!pc as [ [ | [] b ] | ]; tauto.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros _.
+  destruct ((fn_code f)!pc) eqn:X.
+  - exploit check_code_correct; eauto.
+  - exploit check_included_correct; eauto.
+    congruence.
 Qed.
 
-Lemma record_gotos_correct_1:
-  { f | branch_map_correct (record_gotos fn) f }.
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf -> 
+  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
 Proof.
-  apply record_conds_correct; simpl.
-- destruct record_branches_correct as [f BMC].
-  exists f. intros pc. specialize (BMC pc); simpl in *.
-  destruct (fn_code fn)!pc as [ [ | [] b ] | ]; auto.
-- red; auto.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros X; inversion X; clear X; subst.
+  simpl. rewrite PTree.gmap1. auto.
 Qed.
 
-Definition branch_target  (pc: node) : node :=
-  U.repr (record_gotos fn) pc.
-
-Definition count_gotos (pc: node) : nat :=
-  proj1_sig record_gotos_correct_1 pc.
-
-Theorem record_gotos_correct:
-  forall pc,
-  match fn.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-      branch_target pc = pc \/
-      (branch_target pc = branch_target s /\ count_gotos s < count_gotos pc)%nat
-  | Some(Lcond cond args s1 s2 :: b) =>
-      branch_target pc = pc \/
-      (branch_target pc = branch_target s1 /\ branch_target pc = branch_target s2
-       /\ count_gotos s1 < count_gotos pc /\ count_gotos s2 < count_gotos pc)%nat
-  | _ =>
-      branch_target pc = pc
-  end.
+Lemma tunnel_fundef_Internal:
+  forall f tf, tunnel_fundef (Internal f) = OK tf
+  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
 Proof.
-  intros. unfold count_gotos. destruct record_gotos_correct_1 as [f P]; simpl.
-  apply P.
+  intros f tf; simpl.
+  destruct (tunnel_function f) eqn:X; simpl; try congruence.
+  intros EQ; inversion EQ.
+  eexists; split; eauto.
 Qed.
 
-End BRANCH_MAP_CORRECT.
+Lemma tunnel_fundef_External:
+  forall tf ef, tunnel_fundef (External ef) = OK tf
+  -> tf = External ef.
+Proof.
+  intros tf ef; simpl. intros H; inversion H; auto.
+Qed.
 
 (** * Preservation of semantics *)
 
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
 Section PRESERVATION.
 
 Variables prog tprog: program.
@@ -266,32 +178,65 @@ Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
 Lemma functions_translated:
-  forall v f,
+  forall (v: val) (f: fundef),
   Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_transf TRANSL).
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
 
 Lemma function_ptr_translated:
   forall v f,
   Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_ptr_transf TRANSL).
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
 
-Lemma symbols_preserved:
-  forall id,
-  Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (Genv.find_symbol_transf TRANSL).
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
 
 Lemma senv_preserved:
   Senv.equiv ge tge.
-Proof (Genv.senv_transf TRANSL).
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
 
 Lemma sig_preserved:
-  forall f, funsig (tunnel_fundef f) = funsig f.
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
+    destruct (check_included _ _); try congruence.
+    monadInv EQ. simpl; auto.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
 Proof.
-  destruct f; reflexivity.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
 Qed.
 
+
 (** The proof of semantic preservation is a simulation argument
   based on diagrams of the following form:
 <<
@@ -306,7 +251,7 @@ Qed.
   between states [st1] and [st2], as well as the postcondition between
   [st1'] and [st2'].  One transition in the source code (left) can correspond
   to zero or one transition in the transformed code (right).  The
-  "zero transition" case occurs when executing a [Lgoto] instruction
+  "zero transition" case occurs when executing a [Lnop] instruction
   in the source code that has been removed by tunneling.
 
   In the definition of [match_states], what changes between the original and
@@ -315,60 +260,52 @@ Qed.
   and memory states, since some [Vundef] values can become more defined
   as a consequence of eliminating useless [Lcond] instructions. *)
 
-Definition tunneled_block (f: function) (b: bblock) :=
-  tunnel_block (record_gotos f) b.
-
-Definition tunneled_code (f: function) :=
-  PTree.map1 (tunneled_block f) (fn_code f).
-
 Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
   forall l, Val.lessdef (ls1 l) (ls2 l).
 
 Inductive match_stackframes: stackframe -> stackframe -> Prop :=
   | match_stackframes_intro:
-      forall f sp ls0 bb tls0,
+      forall f tf sp ls0 bb tls0,
       locmap_lessdef ls0 tls0 ->
+      tunnel_function f = OK tf ->
       match_stackframes
          (Stackframe f sp ls0 bb)
-         (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)).
+         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
 
 Inductive match_states: state -> state -> Prop :=
   | match_states_intro:
-      forall s f sp pc ls m ts tls tm
+      forall s f tf sp pc ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
       match_states (State s f sp pc ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+                   (State ts tf sp (branch_target f pc) tls tm)
   | match_states_block:
-      forall s f sp bb ls m ts tls tm
+      forall s f tf sp bb ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
       match_states (Block s f sp bb ls m)
-                   (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm)
-  | match_states_interm_branch:
-      forall s f sp pc bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Block s f sp (Lbranch pc :: bb) ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
-  | match_states_interm_cond:
-      forall s f sp cond args pc1 pc2 bb ls m ts tls tm
+                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
+  | match_states_interm:
+      forall s f tf sp pc i bb ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
         (MEM: Mem.extends m tm)
-        (SAME: branch_target f pc1 = branch_target f pc2),
-      match_states (Block s f sp (Lcond cond args pc1 pc2 :: bb) ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc1) tls tm)
+        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp (i :: bb) ls m)
+                   (State ts tf sp pc tls tm)
   | match_states_call:
-      forall s f ls m ts tls tm
+      forall s f tf ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_fundef f = OK tf),
       match_states (Callstate s f ls m)
-                   (Callstate ts (tunnel_fundef f) tls tm)
+                   (Callstate ts tf tls tm)
   | match_states_return:
       forall s ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
@@ -418,22 +355,6 @@ Proof.
   induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
 Qed.
 
-(*
-Lemma locmap_undef_lessdef:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2).
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_lessdef_1:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2.
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto. 
-Qed.
-*)
-
 Lemma locmap_getpair_lessdef:
   forall p ls1 ls2,
   locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
@@ -477,15 +398,16 @@ Lemma find_function_translated:
   forall ros ls tls fd,
   locmap_lessdef ls tls ->
   find_function ge ros ls = Some fd ->
-  find_function tge ros tls = Some (tunnel_fundef fd).
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
 Proof.
   intros. destruct ros; simpl in *.
 - assert (E: tls (R m) = ls (R m)).
   { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
     generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
-  rewrite E. apply functions_translated; auto.
+  rewrite E. exploit functions_translated; eauto.
 - rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
-  apply function_ptr_translated; auto.
+  exploit function_ptr_translated; eauto.
+  intros (tf & X1 & X2). exists tf; intuition.
 Qed.
 
 Lemma call_regs_lessdef:
@@ -512,12 +434,12 @@ Qed.
 
 Definition measure (st: state) : nat :=
   match st with
-  | State s f sp pc ls m => (count_gotos f pc * 2)%nat
-  | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
-  | Block s f sp (Lcond _ _ pc1 pc2 :: _) ls m => (Nat.max (count_gotos f pc1) (count_gotos f pc2) * 2 + 1)%nat
-  | Block s f sp bb ls m => 0%nat
-  | Callstate s f ls m => 0%nat
-  | Returnstate s ls m => 0%nat
+  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+  | Block s f sp bb ls m => 0
+  | Callstate s f ls m => 0
+  | Returnstate s ls m => 0
   end.
 
 Lemma match_parent_locset:
@@ -536,30 +458,22 @@ Lemma tunnel_step_correct:
   (exists st2', step tge st1' t st2' /\ match_states st2 st2')
   \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
 Proof.
-  induction 1; intros; try inv MS.
+  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
 
 - (* entering a block *)
-  assert (DEFAULT: branch_target f pc = pc ->
-    (exists st2' : state,
-     step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2'
-     /\ match_states (Block s f sp bb rs m) st2')).
-  { intros. rewrite H0. econstructor; split.
-    econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto.
-    econstructor; eauto. }
-
-  generalize (record_gotos_correct f pc). rewrite H.
-  destruct bb; auto. destruct i; auto.
-+ (* Lbranch *)
-  intros [A | [B C]]. auto.
-  right. split. simpl. lia.
-  split. auto.
-  rewrite B. econstructor; eauto.
-+ (* Lcond *)
-  intros [A | (B & C & D & E)]. auto.
-  right. split. simpl. lia.
-  split. auto.
-  rewrite B. econstructor; eauto. congruence.
-
+  exploit (branch_target_bounds f tf pc); eauto.
+  rewrite H. intros X; inversion X.
+  + (* TB_default *) 
+    rewrite TB; left. econstructor; split.
+    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+    * econstructor; eauto.
+  + (* FT_branch *)
+    simpl; right.
+    rewrite EQ; repeat (econstructor; lia || eauto).
+  + (* FT_cond *)
+    simpl; right.
+    repeat (econstructor; lia || eauto); simpl.
+    destruct (peq _ _); try congruence.
 - (* Lop *)
   exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
   intros (tv & EV & LD).
@@ -577,6 +491,31 @@ Proof.
   rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
   eauto. eauto.
   econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap1 *)
+  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap1.
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap2 *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  destruct (Mem.loadv chunk tm ta) eqn:Htload.
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap2.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
 - (* Lgetstack *)
   left; simpl; econstructor; split.
   econstructor; eauto.
@@ -596,20 +535,25 @@ Proof.
   eauto. eauto.
   econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lcall *)
-  left; simpl; econstructor; split.
-  eapply exec_Lcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto.
-  rewrite sig_preserved. auto.
-  econstructor; eauto.
-  constructor; auto.
-  constructor; auto.
+  left; simpl.
+  exploit find_function_translated; eauto.
+  intros (tfd & Htfd & FIND).
+  econstructor; split.
+  + eapply exec_Lcall; eauto.
+    erewrite sig_preserved; eauto.
+  + econstructor; eauto.
+    constructor; auto.
+    constructor; auto.
 - (* Ltailcall *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
+  exploit find_function_translated. 2: eauto.
+  { eauto using return_regs_lessdef, match_parent_locset. }
+  intros (tfd & Htfd & FIND).
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
   left; simpl; econstructor; split.
-  eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset.
-  apply sig_preserved.
-  econstructor; eauto using return_regs_lessdef, match_parent_locset.
+  + eapply exec_Ltailcall; eauto.
+    * eapply sig_preserved; eauto.
+    * erewrite fn_stacksize_preserved; eauto.
+  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
 - (* Lbuiltin *)
   exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
   exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
@@ -624,51 +568,58 @@ Proof.
   fold (branch_target f pc). econstructor; eauto.
 - (* Lbranch (eliminated) *)
   right; split. simpl. lia. split. auto. constructor; auto.
-
 - (* Lcond (preserved) *)
-  simpl tunneled_block.
-  set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2).
-  destruct (peq s1 s2).
-+ left; econstructor; split.
-  eapply exec_Lbranch.
-  set (pc := if b then pc1 else pc2).
-  replace s1 with (branch_target f pc) by (unfold pc; destruct b; auto).
-  constructor; eauto using locmap_undef_regs_lessdef_1.
-+ left; econstructor; split.
-  eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
-  destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+  simpl; left; destruct (peq _ _) eqn: EQ.
+  + econstructor; split.
+    eapply exec_Lbranch. 
+    destruct b.
+    * constructor; eauto using locmap_undef_regs_lessdef_1.
+    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+  + econstructor; split.
+    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lcond (eliminated) *)
-  right; split. simpl. destruct b; lia.
-  split. auto.
-  set (pc := if b then pc1 else pc2).
-  replace (branch_target f pc1) with (branch_target f pc) by (unfold pc; destruct b; auto).
-  econstructor; eauto.
-
+  destruct (peq _ _) eqn: EQ; try inv H1.
+  right; split; simpl. 
+  + destruct b.
+    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+  + destruct b.
+    -- repeat (constructor; auto).
+    -- rewrite e; repeat (constructor; auto).
 - (* Ljumptable *)
   assert (tls (R arg) = Vint n).
   { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
   left; simpl; econstructor; split.
   eapply exec_Ljumptable.
-  eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto.
+  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
   econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lreturn *)
   exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
   left; simpl; econstructor; split.
-  eapply exec_Lreturn; eauto.
-  constructor; eauto using return_regs_lessdef, match_parent_locset.
+  + eapply exec_Lreturn; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + constructor; eauto using return_regs_lessdef, match_parent_locset.
 - (* internal function *)
+  exploit tunnel_fundef_Internal; eauto.
+  intros (tf' & TF' & ITF). subst.
   exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros (tm' & ALLOC & MEM'). 
-  left; simpl; econstructor; split.
-  eapply exec_function_internal; eauto.
-  simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+  intros (tm' & ALLOC & MEM').
+  left; simpl.
+  econstructor; split.
+  + eapply exec_function_internal; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + simpl.
+    erewrite (fn_entrypoint_preserved f tf'); auto.
+    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
 - (* external function *)
   exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
   intros (tvres & tm' & A & B & C & D).
   left; simpl; econstructor; split.
-  eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+  + erewrite (tunnel_fundef_External tf ef); eauto.
+    eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
 - (* return *)
   inv STK. inv H1.
   left; econstructor; split.
@@ -681,14 +632,15 @@ Lemma transf_initial_states:
   exists st2, initial_state tprog st2 /\ match_states st1 st2.
 Proof.
   intros. inversion H.
-  exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split.
+  exploit function_ptr_translated; eauto.
+  intros (tf & Htf & Hf).
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
   econstructor; eauto.
-  apply (Genv.init_mem_transf TRANSL); auto.
+  apply (Genv.init_mem_transf_partial TRANSL); auto.
   rewrite (match_program_main TRANSL).
   rewrite symbols_preserved. eauto.
-  apply function_ptr_translated; auto.
-  rewrite <- H3. apply sig_preserved.
-  constructor. constructor. red; simpl; auto. apply Mem.extends_refl.
+  rewrite <- H3. apply sig_preserved. auto.
+  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
 Qed.
 
 Lemma transf_final_states: