From 95f918c38b1e59f40ae7af455ec2c6746289375e Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 17 Jun 2021 17:31:10 +0200
Subject: Change "Tunneling" to "LTLTunneling" everywhere

To respect the symmetry between RTL- and LTL-Tunneling
---
 backend/LTLTunneling.v      | 167 +++++++++++
 backend/LTLTunnelingaux.ml  | 108 +++++++
 backend/LTLTunnelingproof.v | 666 ++++++++++++++++++++++++++++++++++++++++++++
 backend/RTLTunneling.v      |   2 +-
 backend/Tunneling.v         | 167 -----------
 backend/Tunnelingaux.ml     | 108 -------
 backend/Tunnelinglibs.ml    |   2 +-
 backend/Tunnelingproof.v    | 666 --------------------------------------------
 8 files changed, 943 insertions(+), 943 deletions(-)
 create mode 100644 backend/LTLTunneling.v
 create mode 100644 backend/LTLTunnelingaux.ml
 create mode 100644 backend/LTLTunnelingproof.v
 delete mode 100644 backend/Tunneling.v
 delete mode 100644 backend/Tunnelingaux.ml
 delete mode 100644 backend/Tunnelingproof.v

(limited to 'backend')

diff --git a/backend/LTLTunneling.v b/backend/LTLTunneling.v
new file mode 100644
index 00000000..4b404724
--- /dev/null
+++ b/backend/LTLTunneling.v
@@ -0,0 +1,167 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Branch tunneling (optimization of branches to branches). *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import LTL.
+
+(** Branch tunneling shortens sequences of branches (with no intervening
+  computations) by rewriting the branch and conditional branch instructions
+  so that they jump directly to the end of the branch sequence.
+  For example:
+<<
+     L1: if (cond) nop L2;                L1: nop L3;
+     L2: nop L3;               becomes    L2: nop L3;
+     L3: instr;                           L3: instr;
+     L4: if (cond) goto L1;               L4: if (cond) nop L1;
+>>
+  This optimization can be applied to several of our intermediate
+  languages.  We choose to perform it on the [LTL] language,
+  after register allocation but before code linearization.
+  Register allocation can delete instructions (such as dead
+  computations or useless moves), therefore there are more
+  opportunities for tunneling after allocation than before.
+  Symmetrically, prior tunneling helps linearization to produce
+  better code, e.g. by revealing that some [branch] instructions are
+  dead code (as the "branch L3" in the example above).
+*)
+
+(** The implementation consists in two passes: the first pass
+    records the branch t of each "nop"
+    and the second pass replace any "nop" node to [pc]
+    by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
+<<
+  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
+                     = pc                   otherwise
+>>
+  However, this definition can fail to terminate if
+  the program can contain loops consisting only of branches, as in
+<<
+     L1: branch L1;
+>>
+  or
+<<
+     L1: nop L2;
+     L2: nop L1;
+>>
+  Coq warns us of this fact by not accepting the definition
+  of [branch_t] above.
+
+  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+  for every instruction [pc: nop pc'] in the function.
+
+  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+  we need to iterate until we reach a fixpoint.
+
+  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+  in order to help the proof.
+
+  A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF. 
+Extract Constant branch_target => "LTLTunnelingaux.branch_target".
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) pc:node*Z :=
+  match td!pc with
+  | Some (t,d) => (t,Z.abs d)
+  | _ => (pc,0)
+  end.
+
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+    | None => OK tt
+    | Some (tpc, dpc) =>
+       let dpc := Z.abs dpc in
+       match bb with
+       | Lbranch s ::_ =>
+         let (ts, ds) := get td s in 
+         if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Lbranch")
+         else Error (msg "invalid skip of Lbranch")
+       | Lcond _ _ s1 s2 _ :: _ =>
+          let (ts1, ds1) := get td s1 in
+          let (ts2, ds2) := get td s2 in
+          if peq tpc ts1 then
+            if peq tpc ts2 then
+              if zlt ds1 dpc then
+                if zlt ds2 dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this block")
+      end
+   end.
+
+Definition check_code (td: UF) (c:code): res unit
+  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
+
+(** The second pass rewrites all LTL instructions, replacing every
+  successor [s] of every instruction by [t s], the canonical representative
+  of its equivalence class in the union-find data structure. *)
+
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+  match i with
+  | Lbranch s => Lbranch (t s)
+  | Lcond cond args s1 s2 info =>
+      let s1' := t s1 in let s2' := t s2 in
+      if peq s1' s2'
+      then Lbranch s1'
+      else Lcond cond args s1' s2' info
+  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
+  | _ => i
+  end.
+
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+  List.map (tunnel_instr t) b.
+
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+  let td := branch_target f in
+  let c := (fn_code f) in
+  if check_included td c then 
+    do _ <- check_code td c ; OK
+    (mkfunction
+      (fn_sig f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_block td) c)
+      (td (fn_entrypoint f)))
+  else 
+   Error (msg "Some node of the union-find is not in the CFG")
+   .
+
+Definition tunnel_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program tunnel_fundef p.
diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml
new file mode 100644
index 00000000..c3b8cf82
--- /dev/null
+++ b/backend/LTLTunnelingaux.ml
@@ -0,0 +1,108 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [LTLTunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = LTL.bblock
+  type funct = LTL.coq_function
+end
+
+module OPT = struct
+  let langname = "LTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc bb =
+    match bb with
+    | Lbranch s :: _ ->
+       let ns = get_node c s in
+       set_branch c pc ns;
+       acc
+    | Lcond (_, _, s1, s2, _) :: _ ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, bb) =
+    match bb with
+    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+    | _ -> Partial.debug "%d " pc; true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc bb =
+    match PTree.get pc td with
+    | Some p ->
+      let (tpc, dpc) = p in
+      let dpc0 = dpc in
+      (match bb with
+       | [] ->
+         raise (BugOnPC (P.to_int pc))
+       | i :: _ ->
+         (match i with
+          | Lbranch s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Lcond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc))))
+    | None -> ()
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
+
diff --git a/backend/LTLTunnelingproof.v b/backend/LTLTunnelingproof.v
new file mode 100644
index 00000000..d36d3c76
--- /dev/null
+++ b/backend/LTLTunnelingproof.v
@@ -0,0 +1,666 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for the branch tunneling optimization. *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations LTL.
+Require Import LTLTunneling.
+
+Local Open Scope nat.
+
+
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+   ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td: UF) (c: code): 
+  check_included_spec c td (check_included td c).
+Proof. 
+  apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_included_spec.
+  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+  intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+     : target_bounds target bound pc ob
+ | TB_branch s bb
+     (EQ: target pc = target s)
+     (DECREASE: bound s < bound pc)
+     : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+     (EQ1: target pc = target s1)
+     (EQ2: target pc = target s2)
+     (DEC1: bound s1 < bound pc)
+     (DEC2: bound s2 < bound pc)
+     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intros H; rewrite H; auto.
+Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
+
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
+
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
+
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+  check_bblock td pc bb = OK tt -> 
+  target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+  unfold check_bblock, bound.
+  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+  destruct bb as [|[ ] bb]; simpl; try congruence.
+  + destruct (get td s) as (ts, ds) eqn:Hs.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_branch.
+    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_cond.
+    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
+
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
+
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_code_spec.
+  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+  intros X; inversion X; subst.
+  apply check_bblock_correct; auto.
+Qed.
+
+Theorem branch_target_bounds:
+  forall f tf pc, 
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros _.
+  destruct ((fn_code f)!pc) eqn:X.
+  - exploit check_code_correct; eauto.
+  - exploit check_included_correct; eauto.
+    congruence.
+Qed.
+
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf -> 
+  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros X; inversion X; clear X; subst.
+  simpl. rewrite PTree.gmap1. auto.
+Qed.
+
+Lemma tunnel_fundef_Internal:
+  forall f tf, tunnel_fundef (Internal f) = OK tf
+  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
+Proof.
+  intros f tf; simpl.
+  destruct (tunnel_function f) eqn:X; simpl; try congruence.
+  intros EQ; inversion EQ.
+  eexists; split; eauto.
+Qed.
+
+Lemma tunnel_fundef_External:
+  forall tf ef, tunnel_fundef (External ef) = OK tf
+  -> tf = External ef.
+Proof.
+  intros tf ef; simpl. intros H; inversion H; auto.
+Qed.
+
+(** * Preservation of semantics *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
+    destruct (check_included _ _); try congruence.
+    monadInv EQ. simpl; auto.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+
+(** The proof of semantic preservation is a simulation argument
+  based on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  ?|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The [match_states] predicate, defined below, captures the precondition
+  between states [st1] and [st2], as well as the postcondition between
+  [st1'] and [st2'].  One transition in the source code (left) can correspond
+  to zero or one transition in the transformed code (right).  The
+  "zero transition" case occurs when executing a [Lnop] instruction
+  in the source code that has been removed by tunneling.
+
+  In the definition of [match_states], what changes between the original and
+  transformed codes is mainly the control-flow
+  (in particular, the current program point [pc]), but also some values
+  and memory states, since some [Vundef] values can become more defined
+  as a consequence of eliminating useless [Lcond] instructions. *)
+
+Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
+  forall l, Val.lessdef (ls1 l) (ls2 l).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall f tf sp ls0 bb tls0,
+      locmap_lessdef ls0 tls0 ->
+      tunnel_function f = OK tf ->
+      match_stackframes
+         (Stackframe f sp ls0 bb)
+         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s f tf sp pc ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
+      match_states (State s f sp pc ls m)
+                   (State ts tf sp (branch_target f pc) tls tm)
+  | match_states_block:
+      forall s f tf sp bb ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp bb ls m)
+                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
+  | match_states_interm:
+      forall s f tf sp pc i bb ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp (i :: bb) ls m)
+                   (State ts tf sp pc tls tm)
+  | match_states_call:
+      forall s f tf ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_fundef f = OK tf),
+      match_states (Callstate s f ls m)
+                   (Callstate ts tf tls tm)
+  | match_states_return:
+      forall s ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm),
+      match_states (Returnstate s ls m)
+                   (Returnstate ts tls tm).
+
+(** Properties of [locmap_lessdef] *)
+
+Lemma reglist_lessdef:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl).
+Proof.
+  induction rl; simpl; intros; auto.
+Qed.
+
+Lemma locmap_set_lessdef:
+  forall ls1 ls2 v1 v2 l,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2).
+Proof.
+  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
+- destruct l; auto using Val.load_result_lessdef.
+- destruct (Loc.diff_dec l l'); auto.
+Qed.
+
+Lemma locmap_set_undef_lessdef:
+  forall ls1 ls2 l,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2.
+Proof.
+  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
+- destruct l; auto. destruct ty; auto. 
+- destruct (Loc.diff_dec l l'); auto.
+Qed.
+
+Lemma locmap_undef_regs_lessdef:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2).
+Proof.
+  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. 
+Qed.
+
+Lemma locmap_undef_regs_lessdef_1:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2.
+Proof.
+  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
+Qed.
+
+Lemma locmap_getpair_lessdef:
+  forall p ls1 ls2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
+Proof.
+  intros; destruct p; simpl; auto using Val.longofwords_lessdef.
+Qed.
+
+Lemma locmap_getpairs_lessdef:
+  forall pl ls1 ls2,
+  locmap_lessdef ls1 ls2 ->
+  Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl).
+Proof.
+  intros. induction pl; simpl; auto using locmap_getpair_lessdef.
+Qed.
+
+Lemma locmap_setpair_lessdef:
+  forall p ls1 ls2 v1 v2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2).
+Proof.
+  intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
+Qed.
+
+Lemma locmap_setres_lessdef:
+  forall res ls1 ls2 v1 v2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2).
+Proof.
+  induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
+Qed.
+
+Lemma locmap_undef_caller_save_regs_lessdef:
+  forall ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2).
+Proof.
+  intros; red; intros. unfold undef_caller_save_regs. 
+  destruct l.
+- destruct (Conventions1.is_callee_save r); auto.
+- destruct sl; auto.
+Qed.
+
+Lemma find_function_translated:
+  forall ros ls tls fd,
+  locmap_lessdef ls tls ->
+  find_function ge ros ls = Some fd ->
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
+Proof.
+  intros. destruct ros; simpl in *.
+- assert (E: tls (R m) = ls (R m)).
+  { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
+    generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
+  rewrite E. exploit functions_translated; eauto.
+- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
+  exploit function_ptr_translated; eauto.
+  intros (tf & X1 & X2). exists tf; intuition.
+Qed.
+
+Lemma call_regs_lessdef:
+  forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2).
+Proof.
+  intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto.
+Qed.
+
+Lemma return_regs_lessdef:
+  forall caller1 callee1 caller2 callee2,
+  locmap_lessdef caller1 caller2 ->
+  locmap_lessdef callee1 callee2 ->
+  locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2).
+Proof.
+  intros; red; intros. destruct l; simpl.
+- destruct (Conventions1.is_callee_save r); auto.
+- destruct sl; auto.
+Qed. 
+
+(** To preserve non-terminating behaviours, we show that the transformed
+  code cannot take an infinity of "zero transition" cases.
+  We use the following [measure] function over source states,
+  which decreases strictly in the "zero transition" case. *)
+
+Definition measure (st: state) : nat :=
+  match st with
+  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+  | Block s f sp bb ls m => 0
+  | Callstate s f ls m => 0
+  | Returnstate s ls m => 0
+  end.
+
+Lemma match_parent_locset:
+  forall s ts,
+  list_forall2 match_stackframes s ts ->
+  locmap_lessdef (parent_locset s) (parent_locset ts).
+Proof.
+  induction 1; simpl.
+- red; auto.
+- inv H; auto.
+Qed.
+
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
+Proof.
+  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
+
+- (* entering a block *)
+  exploit (branch_target_bounds f tf pc); eauto.
+  rewrite H. intros X; inversion X.
+  + (* TB_default *) 
+    rewrite TB; left. econstructor; split.
+    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+    * econstructor; eauto.
+  + (* FT_branch *)
+    simpl; right.
+    rewrite EQ; repeat (econstructor; lia || eauto).
+  + (* FT_cond *)
+    simpl; right.
+    repeat (econstructor; lia || eauto); simpl.
+    destruct (peq _ _); try congruence.
+- (* Lop *)
+  exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
+  intros (tv & EV & LD).
+  left; simpl; econstructor; split.
+  eapply exec_Lop with (v := tv); eauto.
+  rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  exploit Mem.loadv_extends. eauto. eauto. eexact LD. 
+  intros (tv & LOAD & LD').
+  left; simpl; econstructor; split.
+  eapply exec_Lload with (a := ta).
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap1 *)
+  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap1.
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap2 *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  destruct (Mem.loadv chunk tm ta) eqn:Htload.
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap2.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+- (* Lgetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lsetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lstore *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS.  
+  intros (tm' & STORE & MEM').
+  left; simpl; econstructor; split.
+  eapply exec_Lstore with (a := ta).
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcall *)
+  left; simpl.
+  exploit find_function_translated; eauto.
+  intros (tfd & Htfd & FIND).
+  econstructor; split.
+  + eapply exec_Lcall; eauto.
+    erewrite sig_preserved; eauto.
+  + econstructor; eauto.
+    constructor; auto.
+    constructor; auto.
+- (* Ltailcall *)
+  exploit find_function_translated. 2: eauto.
+  { eauto using return_regs_lessdef, match_parent_locset. }
+  intros (tfd & Htfd & FIND).
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
+  left; simpl; econstructor; split.
+  + eapply exec_Ltailcall; eauto.
+    * eapply sig_preserved; eauto.
+    * erewrite fn_stacksize_preserved; eauto.
+  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
+- (* Lbuiltin *)
+  exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
+  exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
+  left; simpl; econstructor; split.
+  eapply exec_Lbuiltin; eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. 
+  eapply external_call_symbols_preserved. apply senv_preserved. eauto.
+  econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef.
+- (* Lbranch (preserved) *)
+  left; simpl; econstructor; split.
+  eapply exec_Lbranch; eauto.
+  fold (branch_target f pc). econstructor; eauto.
+- (* Lbranch (eliminated) *)
+  right; split. simpl. lia. split. auto. constructor; auto.
+- (* Lcond (preserved) *)
+  simpl; left; destruct (peq _ _) eqn: EQ.
+  + econstructor; split.
+    eapply exec_Lbranch. 
+    destruct b.
+    * constructor; eauto using locmap_undef_regs_lessdef_1.
+    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+  + econstructor; split.
+    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+  destruct (peq _ _) eqn: EQ; try inv H1.
+  right; split; simpl. 
+  + destruct b.
+    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+  + destruct b.
+    -- repeat (constructor; auto).
+    -- rewrite e; repeat (constructor; auto).
+- (* Ljumptable *)
+  assert (tls (R arg) = Vint n).
+  { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
+  left; simpl; econstructor; split.
+  eapply exec_Ljumptable.
+  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
+  econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lreturn *)
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
+  left; simpl; econstructor; split.
+  + eapply exec_Lreturn; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + constructor; eauto using return_regs_lessdef, match_parent_locset.
+- (* internal function *)
+  exploit tunnel_fundef_Internal; eauto.
+  intros (tf' & TF' & ITF). subst.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros (tm' & ALLOC & MEM').
+  left; simpl.
+  econstructor; split.
+  + eapply exec_function_internal; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + simpl.
+    erewrite (fn_entrypoint_preserved f tf'); auto.
+    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+- (* external function *)
+  exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
+  intros (tvres & tm' & A & B & C & D).
+  left; simpl; econstructor; split.
+  + erewrite (tunnel_fundef_External tf ef); eauto.
+    eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+- (* return *)
+  inv STK. inv H1.
+  left; econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto.
+  intros (tf & Htf & Hf).
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
+  econstructor; eauto.
+  apply (Genv.init_mem_transf_partial TRANSL); auto.
+  rewrite (match_program_main TRANSL).
+  rewrite symbols_preserved. eauto.
+  rewrite <- H3. apply sig_preserved. auto.
+  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv STK.
+  set (p := map_rpair R (Conventions1.loc_result signature_main)) in *.
+  generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD.
+  econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (LTL.semantics prog) (LTL.semantics tprog).
+Proof.
+  eapply forward_simulation_opt.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact tunnel_step_correct.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
index 049160fd..df663f48 100644
--- a/backend/RTLTunneling.v
+++ b/backend/RTLTunneling.v
@@ -19,7 +19,7 @@ Require Import Coqlib Maps Errors.
 Require Import AST.
 Require Import RTL.
 
-(* This is a port of tunneling for LTL. See Tunneling.v *)
+(* This is a port of tunneling for LTL. See LTLTunneling.v *)
 
 Definition UF := PTree.t (node * Z).
 
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
deleted file mode 100644
index c849ea92..00000000
--- a/backend/Tunneling.v
+++ /dev/null
@@ -1,167 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Branch tunneling (optimization of branches to branches). *)
-
-Require Import Coqlib Maps Errors.
-Require Import AST.
-Require Import LTL.
-
-(** Branch tunneling shortens sequences of branches (with no intervening
-  computations) by rewriting the branch and conditional branch instructions
-  so that they jump directly to the end of the branch sequence.
-  For example:
-<<
-     L1: if (cond) nop L2;                L1: nop L3;
-     L2: nop L3;               becomes    L2: nop L3;
-     L3: instr;                           L3: instr;
-     L4: if (cond) goto L1;               L4: if (cond) nop L1;
->>
-  This optimization can be applied to several of our intermediate
-  languages.  We choose to perform it on the [LTL] language,
-  after register allocation but before code linearization.
-  Register allocation can delete instructions (such as dead
-  computations or useless moves), therefore there are more
-  opportunities for tunneling after allocation than before.
-  Symmetrically, prior tunneling helps linearization to produce
-  better code, e.g. by revealing that some [branch] instructions are
-  dead code (as the "branch L3" in the example above).
-*)
-
-(** The implementation consists in two passes: the first pass
-    records the branch t of each "nop"
-    and the second pass replace any "nop" node to [pc]
-    by a branch to a "nop" at [branch_t f pc]
-
-Naively, we may define [branch_t f pc] as follows:
-<<
-  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
-                     = pc                   otherwise
->>
-  However, this definition can fail to terminate if
-  the program can contain loops consisting only of branches, as in
-<<
-     L1: branch L1;
->>
-  or
-<<
-     L1: nop L2;
-     L2: nop L1;
->>
-  Coq warns us of this fact by not accepting the definition
-  of [branch_t] above.
-
-  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
-  for every instruction [pc: nop pc'] in the function.
-
-  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
-  we need to iterate until we reach a fixpoint.
-
-  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
-  in order to help the proof.
-
-  A verifier checks that this data-structure is correct.
-*)
-
-Definition UF := PTree.t (node * Z).
-
-(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
-Axiom branch_target: LTL.function -> UF. 
-Extract Constant branch_target => "Tunnelingaux.branch_target".
-
-Local Open Scope error_monad_scope.
-
-Definition get (td: UF) pc:node*Z :=
-  match td!pc with
-  | Some (t,d) => (t,Z.abs d)
-  | _ => (pc,0)
-  end.
-
-Definition target (td: UF) (pc:node): node := fst (get td pc).
-Coercion target: UF >-> Funclass.
-
-(* we check that the domain of [td] is included in the domain of [c] *)
-Definition check_included (td: UF) (c: code): option bblock
-  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
-
-(* we check the validity of targets and their bound:
-   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
-*)
-Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
- := match td!pc with
-    | None => OK tt
-    | Some (tpc, dpc) =>
-       let dpc := Z.abs dpc in
-       match bb with
-       | Lbranch s ::_ =>
-         let (ts, ds) := get td s in 
-         if peq tpc ts then
-            if zlt ds dpc then OK tt
-            else Error (msg "bad distance in Lbranch")
-         else Error (msg "invalid skip of Lbranch")
-       | Lcond _ _ s1 s2 _ :: _ =>
-          let (ts1, ds1) := get td s1 in
-          let (ts2, ds2) := get td s2 in
-          if peq tpc ts1 then
-            if peq tpc ts2 then
-              if zlt ds1 dpc then
-                if zlt ds2 dpc then OK tt
-                else Error (msg "bad distance on else branch")
-              else Error (msg "bad distance on then branch")
-            else Error (msg "invalid skip of else branch")
-          else Error (msg "invalid skip of then branch")
-      | _ => Error (msg "cannot skip this block")
-      end
-   end.
-
-Definition check_code (td: UF) (c:code): res unit
-  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
-
-(** The second pass rewrites all LTL instructions, replacing every
-  successor [s] of every instruction by [t s], the canonical representative
-  of its equivalence class in the union-find data structure. *)
-
-Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
-  match i with
-  | Lbranch s => Lbranch (t s)
-  | Lcond cond args s1 s2 info =>
-      let s1' := t s1 in let s2' := t s2 in
-      if peq s1' s2'
-      then Lbranch s1'
-      else Lcond cond args s1' s2' info
-  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
-  | _ => i
-  end.
-
-Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
-  List.map (tunnel_instr t) b.
-
-Definition tunnel_function (f: LTL.function) : res LTL.function :=
-  let td := branch_target f in
-  let c := (fn_code f) in
-  if check_included td c then 
-    do _ <- check_code td c ; OK
-    (mkfunction
-      (fn_sig f)
-      (fn_stacksize f)
-      (PTree.map1 (tunnel_block td) c)
-      (td (fn_entrypoint f)))
-  else 
-   Error (msg "Some node of the union-find is not in the CFG")
-   .
-
-Definition tunnel_fundef (f: fundef) : res fundef :=
-  transf_partial_fundef tunnel_function f.
-
-Definition transf_program (p: program) : res program :=
-  transform_partial_program tunnel_fundef p.
diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
deleted file mode 100644
index d4b88a5d..00000000
--- a/backend/Tunnelingaux.ml
+++ /dev/null
@@ -1,108 +0,0 @@
-(* *************************************************************)
-(*                                                             *)
-(*             The Compcert verified compiler                  *)
-(*                                                             *)
-(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
-(*                                                             *)
-(*  Copyright VERIMAG. All rights reserved.                    *)
-(*  This file is distributed under the terms of the INRIA      *)
-(*  Non-Commercial License Agreement.                          *)
-(*                                                             *)
-(* *************************************************************)
-
-(*
-
-This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
-and computes their target node with the distance (ie the number of cummulated nops) toward this target.
-
-See [Tunneling.v]
-
-*)
-
-open Coqlib
-open LTL
-open Maps
-open Camlcoq
-open Tunnelinglibs
-
-module LANG = struct
-  type code_unit = LTL.bblock
-  type funct = LTL.coq_function
-end
-
-module OPT = struct
-  let langname = "LTL"
-  let limit_tunneling = None
-  let debug_flag = ref false
-  let final_dump = false
-end
-
-module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
-
-module FUNS = struct
-  let build_simplified_cfg c acc pc bb =
-    match bb with
-    | Lbranch s :: _ ->
-       let ns = get_node c s in
-       set_branch c pc ns;
-       acc
-    | Lcond (_, _, s1, s2, _) :: _ ->
-       c.num_rems <- c.num_rems + 1;
-       let ns1 = get_node c s1 in
-       let ns2 = get_node c s2 in
-       let npc = get_node c pc in
-       npc.inst <- COND(ns1, ns2);
-       npc::acc
-    | _ -> acc
-
-  let print_code_unit c println (pc, bb) =
-    match bb with
-    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
-    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
-    | _ -> Partial.debug "%d " pc; true
-
-  let fn_code f = f.fn_code
-  let fn_entrypoint f = f.fn_entrypoint
-
-
-  (*************************************************************)
-  (* Copy-paste of the extracted code of the verifier          *)
-  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
-
-  let check_code_unit td pc bb =
-    match PTree.get pc td with
-    | Some p ->
-      let (tpc, dpc) = p in
-      let dpc0 = dpc in
-      (match bb with
-       | [] ->
-         raise (BugOnPC (P.to_int pc))
-       | i :: _ ->
-         (match i with
-          | Lbranch s ->
-            let (ts, ds) = get td s in
-            if peq tpc ts
-            then if zlt ds dpc0
-                 then ()
-                 else raise (BugOnPC (P.to_int pc))
-            else raise (BugOnPC (P.to_int pc))
-          | Lcond (_, _, s1, s2, _) ->
-            let (ts1, ds1) = get td s1 in
-            let (ts2, ds2) = get td s2 in
-            if peq tpc ts1
-            then if peq tpc ts2
-                 then if zlt ds1 dpc0
-                      then if zlt ds2 dpc0
-                           then ()
-                           else raise (BugOnPC (P.to_int pc))
-                      else raise (BugOnPC (P.to_int pc))
-                 else raise (BugOnPC (P.to_int pc))
-            else raise (BugOnPC (P.to_int pc))
-          | _ ->
-            raise (BugOnPC (P.to_int pc))))
-    | None -> ()
-end
-
-module T = Partial.T(FUNS)
-let branch_target = T.branch_target
-
diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
index 1bb35f7a..7c826ba4 100644
--- a/backend/Tunnelinglibs.ml
+++ b/backend/Tunnelinglibs.ml
@@ -16,7 +16,7 @@
 This file implements the core functions of the tunneling passes, for both RTL
 and LTL, by using a simplified CFG as a transparent interface
 
-See [Tunneling.v] and [RTLTunneling.v]
+See [LTLTunneling.v] and [RTLTunneling.v]
 
 *)
 
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
deleted file mode 100644
index 3bc92f75..00000000
--- a/backend/Tunnelingproof.v
+++ /dev/null
@@ -1,666 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for the branch tunneling optimization. *)
-
-Require Import Coqlib Maps Errors.
-Require Import AST Linking.
-Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations LTL.
-Require Import Tunneling.
-
-Local Open Scope nat.
-
-
-(** * Properties of the branch_target, when the verifier succeeds *)
-
-Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
-   ok <> None -> forall pc, c!pc = None -> td!pc = None.
-
-Lemma check_included_correct (td: UF) (c: code): 
-  check_included_spec c td (check_included td c).
-Proof. 
-  apply PTree_Properties.fold_rec with (P := check_included_spec c).
-- (* extensionality *)
-  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
-- (* base case *)
-  intros _ pc.  rewrite PTree.gempty; try congruence.
-- (* inductive case *)
-  unfold check_included_spec.
-  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
-  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
-  intros; eapply IND; try congruence.
-Qed.
-
-Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
- | TB_default (TB: target pc = pc) ob
-     : target_bounds target bound pc ob
- | TB_branch s bb
-     (EQ: target pc = target s)
-     (DECREASE: bound s < bound pc)
-     : target_bounds target bound pc (Some (Lbranch s::bb))
- | TB_cond cond args s1 s2 info bb
-     (EQ1: target pc = target s1)
-     (EQ2: target pc = target s2)
-     (DEC1: bound s1 < bound pc)
-     (DEC2: bound s2 < bound pc)
-     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
- .
-Local Hint Resolve TB_default: core.
-
-Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
-Proof.
-  unfold target, get. intros H; rewrite H; auto.
-Qed.
-Local Hint Resolve target_None Z.abs_nonneg: core.
-
-Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
-Proof.
-  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto.
-Qed.
-Local Hint Resolve get_nonneg: core.
-
-Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
-
-Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
-  check_bblock td pc bb = OK tt -> 
-  target_bounds (target td) (bound td) pc (Some bb).
-Proof.
-  unfold check_bblock, bound.
-  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
-  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
-  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
-  destruct bb as [|[ ] bb]; simpl; try congruence.
-  + destruct (get td s) as (ts, ds) eqn:Hs.
-    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
-    intros; apply TB_branch.
-    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
-    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
-  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
-    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
-    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
-    intros; apply TB_cond.
-    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
-    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
-    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
-    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
-Qed.
-
-Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
-   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
-
-Lemma check_code_correct (td:UF) c:
-   check_code_spec td c (check_code td c).
-Proof.
-  apply PTree_Properties.fold_rec with (P := check_code_spec td).
-- (* extensionality *)
-  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
-- (* base case *)
-  intros _ pc.  rewrite PTree.gempty; try congruence.
-- (* inductive case *)
-  unfold check_code_spec.
-  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
-  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
-  intros X; inversion X; subst.
-  apply check_bblock_correct; auto.
-Qed.
-
-Theorem branch_target_bounds:
-  forall f tf pc, 
-  tunnel_function f = OK tf ->
-  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
-Proof.
-  unfold tunnel_function; intros f f' pc.
-  destruct (check_included _ _) eqn:H1; try congruence.
-  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
-  intros _.
-  destruct ((fn_code f)!pc) eqn:X.
-  - exploit check_code_correct; eauto.
-  - exploit check_included_correct; eauto.
-    congruence.
-Qed.
-
-Lemma tunnel_function_unfold:
-  forall f tf pc,
-  tunnel_function f = OK tf -> 
-  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
-Proof.
-  unfold tunnel_function; intros f f' pc.
-  destruct (check_included _ _) eqn:H1; try congruence.
-  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
-  intros X; inversion X; clear X; subst.
-  simpl. rewrite PTree.gmap1. auto.
-Qed.
-
-Lemma tunnel_fundef_Internal:
-  forall f tf, tunnel_fundef (Internal f) = OK tf
-  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
-Proof.
-  intros f tf; simpl.
-  destruct (tunnel_function f) eqn:X; simpl; try congruence.
-  intros EQ; inversion EQ.
-  eexists; split; eauto.
-Qed.
-
-Lemma tunnel_fundef_External:
-  forall tf ef, tunnel_fundef (External ef) = OK tf
-  -> tf = External ef.
-Proof.
-  intros tf ef; simpl. intros H; inversion H; auto.
-Qed.
-
-(** * Preservation of semantics *)
-
-Definition match_prog (p tp: program) :=
-  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
-
-Lemma transf_program_match:
-  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
-Proof.
-  intros. eapply match_transform_partial_program_contextual; eauto.
-Qed.
-
-Section PRESERVATION.
-
-Variables prog tprog: program.
-Hypothesis TRANSL: match_prog prog tprog.
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Lemma functions_translated:
-  forall (v: val) (f: fundef),
-  Genv.find_funct ge v = Some f ->
-  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
-Proof.
-  intros. exploit (Genv.find_funct_match TRANSL); eauto.
-  intros (cu & tf & A & B & C).
-  repeat eexists; intuition eauto.
-Qed.
-
-Lemma function_ptr_translated:
-  forall v f,
-  Genv.find_funct_ptr ge v = Some f ->
-  exists tf,
-  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
-Proof.
-  intros.
-  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
-Qed.
-
-Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof.
-  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
-Qed.
-
-Lemma senv_preserved:
-  Senv.equiv ge tge.
-Proof.
-  eapply (Genv.senv_match TRANSL).
-Qed.
-
-Lemma sig_preserved:
-  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
-Proof.
-  intros. destruct f.
-  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
-    destruct (check_included _ _); try congruence.
-    monadInv EQ. simpl; auto.
-  - simpl in H. monadInv H. reflexivity.
-Qed.
-
-Lemma fn_stacksize_preserved:
-  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
-Proof.
-  intros f tf; unfold tunnel_function.
-  destruct (check_included _ _); try congruence.
-  destruct (check_code _ _); simpl; try congruence.
-  intros H; inversion H; simpl; auto.
-Qed.
-
-Lemma fn_entrypoint_preserved:
-  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
-Proof.
-  intros f tf; unfold tunnel_function.
-  destruct (check_included _ _); try congruence.
-  destruct (check_code _ _); simpl; try congruence.
-  intros H; inversion H; simpl; auto.
-Qed.
-
-
-(** The proof of semantic preservation is a simulation argument
-  based on diagrams of the following form:
-<<
-           st1 --------------- st2
-            |                   |
-           t|                  ?|t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  The [match_states] predicate, defined below, captures the precondition
-  between states [st1] and [st2], as well as the postcondition between
-  [st1'] and [st2'].  One transition in the source code (left) can correspond
-  to zero or one transition in the transformed code (right).  The
-  "zero transition" case occurs when executing a [Lnop] instruction
-  in the source code that has been removed by tunneling.
-
-  In the definition of [match_states], what changes between the original and
-  transformed codes is mainly the control-flow
-  (in particular, the current program point [pc]), but also some values
-  and memory states, since some [Vundef] values can become more defined
-  as a consequence of eliminating useless [Lcond] instructions. *)
-
-Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
-  forall l, Val.lessdef (ls1 l) (ls2 l).
-
-Inductive match_stackframes: stackframe -> stackframe -> Prop :=
-  | match_stackframes_intro:
-      forall f tf sp ls0 bb tls0,
-      locmap_lessdef ls0 tls0 ->
-      tunnel_function f = OK tf ->
-      match_stackframes
-         (Stackframe f sp ls0 bb)
-         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
-
-Inductive match_states: state -> state -> Prop :=
-  | match_states_intro:
-      forall s f tf sp pc ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (TF: tunnel_function f = OK tf),
-      match_states (State s f sp pc ls m)
-                   (State ts tf sp (branch_target f pc) tls tm)
-  | match_states_block:
-      forall s f tf sp bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (TF: tunnel_function f = OK tf),
-      match_states (Block s f sp bb ls m)
-                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
-  | match_states_interm:
-      forall s f tf sp pc i bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
-        (TF: tunnel_function f = OK tf),
-      match_states (Block s f sp (i :: bb) ls m)
-                   (State ts tf sp pc tls tm)
-  | match_states_call:
-      forall s f tf ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (TF: tunnel_fundef f = OK tf),
-      match_states (Callstate s f ls m)
-                   (Callstate ts tf tls tm)
-  | match_states_return:
-      forall s ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Returnstate s ls m)
-                   (Returnstate ts tls tm).
-
-(** Properties of [locmap_lessdef] *)
-
-Lemma reglist_lessdef:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl).
-Proof.
-  induction rl; simpl; intros; auto.
-Qed.
-
-Lemma locmap_set_lessdef:
-  forall ls1 ls2 v1 v2 l,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2).
-Proof.
-  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
-- destruct l; auto using Val.load_result_lessdef.
-- destruct (Loc.diff_dec l l'); auto.
-Qed.
-
-Lemma locmap_set_undef_lessdef:
-  forall ls1 ls2 l,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2.
-Proof.
-  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
-- destruct l; auto. destruct ty; auto. 
-- destruct (Loc.diff_dec l l'); auto.
-Qed.
-
-Lemma locmap_undef_regs_lessdef:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2).
-Proof.
-  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_regs_lessdef_1:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2.
-Proof.
-  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
-Qed.
-
-Lemma locmap_getpair_lessdef:
-  forall p ls1 ls2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
-Proof.
-  intros; destruct p; simpl; auto using Val.longofwords_lessdef.
-Qed.
-
-Lemma locmap_getpairs_lessdef:
-  forall pl ls1 ls2,
-  locmap_lessdef ls1 ls2 ->
-  Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl).
-Proof.
-  intros. induction pl; simpl; auto using locmap_getpair_lessdef.
-Qed.
-
-Lemma locmap_setpair_lessdef:
-  forall p ls1 ls2 v1 v2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2).
-Proof.
-  intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
-Qed.
-
-Lemma locmap_setres_lessdef:
-  forall res ls1 ls2 v1 v2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2).
-Proof.
-  induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
-Qed.
-
-Lemma locmap_undef_caller_save_regs_lessdef:
-  forall ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2).
-Proof.
-  intros; red; intros. unfold undef_caller_save_regs. 
-  destruct l.
-- destruct (Conventions1.is_callee_save r); auto.
-- destruct sl; auto.
-Qed.
-
-Lemma find_function_translated:
-  forall ros ls tls fd,
-  locmap_lessdef ls tls ->
-  find_function ge ros ls = Some fd ->
-  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
-Proof.
-  intros. destruct ros; simpl in *.
-- assert (E: tls (R m) = ls (R m)).
-  { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
-    generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
-  rewrite E. exploit functions_translated; eauto.
-- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
-  exploit function_ptr_translated; eauto.
-  intros (tf & X1 & X2). exists tf; intuition.
-Qed.
-
-Lemma call_regs_lessdef:
-  forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2).
-Proof.
-  intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto.
-Qed.
-
-Lemma return_regs_lessdef:
-  forall caller1 callee1 caller2 callee2,
-  locmap_lessdef caller1 caller2 ->
-  locmap_lessdef callee1 callee2 ->
-  locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2).
-Proof.
-  intros; red; intros. destruct l; simpl.
-- destruct (Conventions1.is_callee_save r); auto.
-- destruct sl; auto.
-Qed. 
-
-(** To preserve non-terminating behaviours, we show that the transformed
-  code cannot take an infinity of "zero transition" cases.
-  We use the following [measure] function over source states,
-  which decreases strictly in the "zero transition" case. *)
-
-Definition measure (st: state) : nat :=
-  match st with
-  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
-  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
-  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
-  | Block s f sp bb ls m => 0
-  | Callstate s f ls m => 0
-  | Returnstate s ls m => 0
-  end.
-
-Lemma match_parent_locset:
-  forall s ts,
-  list_forall2 match_stackframes s ts ->
-  locmap_lessdef (parent_locset s) (parent_locset ts).
-Proof.
-  induction 1; simpl.
-- red; auto.
-- inv H; auto.
-Qed.
-
-Lemma tunnel_step_correct:
-  forall st1 t st2, step ge st1 t st2 ->
-  forall st1' (MS: match_states st1 st1'),
-  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
-  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
-Proof.
-  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
-
-- (* entering a block *)
-  exploit (branch_target_bounds f tf pc); eauto.
-  rewrite H. intros X; inversion X.
-  + (* TB_default *) 
-    rewrite TB; left. econstructor; split.
-    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
-    * econstructor; eauto.
-  + (* FT_branch *)
-    simpl; right.
-    rewrite EQ; repeat (econstructor; lia || eauto).
-  + (* FT_cond *)
-    simpl; right.
-    repeat (econstructor; lia || eauto); simpl.
-    destruct (peq _ _); try congruence.
-- (* Lop *)
-  exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
-  intros (tv & EV & LD).
-  left; simpl; econstructor; split.
-  eapply exec_Lop with (v := tv); eauto.
-  rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  exploit Mem.loadv_extends. eauto. eauto. eexact LD. 
-  intros (tv & LOAD & LD').
-  left; simpl; econstructor; split.
-  eapply exec_Lload with (a := ta).
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload notrap1 *)
-  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
-  left; simpl; econstructor; split.
-  eapply exec_Lload_notrap1.
-  rewrite <- H0.
-  apply eval_addressing_preserved. exact symbols_preserved. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload notrap2 *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  destruct (Mem.loadv chunk tm ta) eqn:Htload.
-  {
-  left; simpl; econstructor; split.
-  eapply exec_Lload.
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  exact Htload. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-  }
-  {
-  left; simpl; econstructor; split.
-  eapply exec_Lload_notrap2.
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  exact Htload. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-  }
-- (* Lgetstack *)
-  left; simpl; econstructor; split.
-  econstructor; eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lsetstack *)
-  left; simpl; econstructor; split.
-  econstructor; eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lstore *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS.  
-  intros (tm' & STORE & MEM').
-  left; simpl; econstructor; split.
-  eapply exec_Lstore with (a := ta).
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto. eauto.
-  econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lcall *)
-  left; simpl.
-  exploit find_function_translated; eauto.
-  intros (tfd & Htfd & FIND).
-  econstructor; split.
-  + eapply exec_Lcall; eauto.
-    erewrite sig_preserved; eauto.
-  + econstructor; eauto.
-    constructor; auto.
-    constructor; auto.
-- (* Ltailcall *)
-  exploit find_function_translated. 2: eauto.
-  { eauto using return_regs_lessdef, match_parent_locset. }
-  intros (tfd & Htfd & FIND).
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
-  left; simpl; econstructor; split.
-  + eapply exec_Ltailcall; eauto.
-    * eapply sig_preserved; eauto.
-    * erewrite fn_stacksize_preserved; eauto.
-  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
-- (* Lbuiltin *)
-  exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
-  exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
-  left; simpl; econstructor; split.
-  eapply exec_Lbuiltin; eauto.
-  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. 
-  eapply external_call_symbols_preserved. apply senv_preserved. eauto.
-  econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef.
-- (* Lbranch (preserved) *)
-  left; simpl; econstructor; split.
-  eapply exec_Lbranch; eauto.
-  fold (branch_target f pc). econstructor; eauto.
-- (* Lbranch (eliminated) *)
-  right; split. simpl. lia. split. auto. constructor; auto.
-- (* Lcond (preserved) *)
-  simpl; left; destruct (peq _ _) eqn: EQ.
-  + econstructor; split.
-    eapply exec_Lbranch. 
-    destruct b.
-    * constructor; eauto using locmap_undef_regs_lessdef_1.
-    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
-  + econstructor; split.
-    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
-    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lcond (eliminated) *)
-  destruct (peq _ _) eqn: EQ; try inv H1.
-  right; split; simpl. 
-  + destruct b.
-    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
-    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
-  + destruct b.
-    -- repeat (constructor; auto).
-    -- rewrite e; repeat (constructor; auto).
-- (* Ljumptable *)
-  assert (tls (R arg) = Vint n).
-  { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
-  left; simpl; econstructor; split.
-  eapply exec_Ljumptable.
-  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
-  econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lreturn *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
-  left; simpl; econstructor; split.
-  + eapply exec_Lreturn; eauto.
-    erewrite fn_stacksize_preserved; eauto.
-  + constructor; eauto using return_regs_lessdef, match_parent_locset.
-- (* internal function *)
-  exploit tunnel_fundef_Internal; eauto.
-  intros (tf' & TF' & ITF). subst.
-  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros (tm' & ALLOC & MEM').
-  left; simpl.
-  econstructor; split.
-  + eapply exec_function_internal; eauto.
-    erewrite fn_stacksize_preserved; eauto.
-  + simpl.
-    erewrite (fn_entrypoint_preserved f tf'); auto.
-    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
-- (* external function *)
-  exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
-  intros (tvres & tm' & A & B & C & D).
-  left; simpl; econstructor; split.
-  + erewrite (tunnel_fundef_External tf ef); eauto.
-    eapply exec_function_external; eauto.
-    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
-- (* return *)
-  inv STK. inv H1.
-  left; econstructor; split.
-  eapply exec_return; eauto.
-  constructor; auto.
-Qed.
-
-Lemma transf_initial_states:
-  forall st1, initial_state prog st1 ->
-  exists st2, initial_state tprog st2 /\ match_states st1 st2.
-Proof.
-  intros. inversion H.
-  exploit function_ptr_translated; eauto.
-  intros (tf & Htf & Hf).
-  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
-  econstructor; eauto.
-  apply (Genv.init_mem_transf_partial TRANSL); auto.
-  rewrite (match_program_main TRANSL).
-  rewrite symbols_preserved. eauto.
-  rewrite <- H3. apply sig_preserved. auto.
-  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
-Qed.
-
-Lemma transf_final_states:
-  forall st1 st2 r,
-  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
-Proof.
-  intros. inv H0. inv H. inv STK.
-  set (p := map_rpair R (Conventions1.loc_result signature_main)) in *.
-  generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD.
-  econstructor; eauto.
-Qed.
-
-Theorem transf_program_correct:
-  forward_simulation (LTL.semantics prog) (LTL.semantics tprog).
-Proof.
-  eapply forward_simulation_opt.
-  apply senv_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  eexact tunnel_step_correct.
-Qed.
-
-End PRESERVATION.
-- 
cgit