Merge branch 'kvx-work' into weak-software-pipelining

author: Justus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr> 2021-08-17 14:59:49 +0200
committer: Justus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr> 2021-08-17 14:59:49 +0200
commit: d0fd34aa9cc0d3c94a2a3af56fa9d50115a0e37f (patch)
tree: 5964f47dcbe87c243d2b93b735a3f51aab31be39
parent: 8ac2401ef5daea824b58e6e3424dd216a41b2d6d (diff)
parent: 1fbe45e2d1f02ef6e8fb6fe7545728a744e047b8 (diff)
download: compcert-kvx-d0fd34aa9cc0d3c94a2a3af56fa9d50115a0e37f.tar.gz
compcert-kvx-d0fd34aa9cc0d3c94a2a3af56fa9d50115a0e37f.zip
16 files changed, 1353 insertions, 358 deletions
diff --git a/Makefile b/Makefile
index f4fe1441..84c060af 100644
--- a/Makefile
+++ b/Makefile
@@ -143,7 +143,8 @@ BACKEND=\
   FirstNop.v FirstNopproof.v \
   Allnontrap.v Allnontrapproof.v \
   Allocation.v Allocationproof.v \
-  Tunneling.v Tunnelingproof.v \
+  LTLTunneling.v LTLTunnelingproof.v \
+  RTLTunneling.v RTLTunnelingproof.v \
   Linear.v Lineartyping.v \
   Linearize.v Linearizeproof.v \
   CleanupLabels.v CleanupLabelsproof.v \
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index ff7fbf56..9c6c98ac 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -1089,7 +1089,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array)
                    | Icond _ -> true
                    | _ -> false) seqa) with
       | None -> Printf.printf "no solution in prepass scheduling\n";
-                None
+                Stdlib.exit 1
+               (* TODO None *)
       | Some solution ->
          let positions = Array.init nr_instructions (fun i -> i) in
          let final_time = solution.(nr_instructions) in
diff --git a/backend/Tunneling.v b/backend/LTLTunneling.v
index c849ea92..4b404724 100644
--- a/backend/Tunneling.v
+++ b/backend/LTLTunneling.v
@@ -77,7 +77,7 @@ Definition UF := PTree.t (node * Z).
 
 (* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
 Axiom branch_target: LTL.function -> UF. 
-Extract Constant branch_target => "Tunnelingaux.branch_target".
+Extract Constant branch_target => "LTLTunnelingaux.branch_target".
 
 Local Open Scope error_monad_scope.
 
diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml
new file mode 100644
index 00000000..c3b8cf82
--- /dev/null
+++ b/backend/LTLTunnelingaux.ml
@@ -0,0 +1,108 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [LTLTunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = LTL.bblock
+  type funct = LTL.coq_function
+end
+
+module OPT = struct
+  let langname = "LTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc bb =
+    match bb with
+    | Lbranch s :: _ ->
+       let ns = get_node c s in
+       set_branch c pc ns;
+       acc
+    | Lcond (_, _, s1, s2, _) :: _ ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, bb) =
+    match bb with
+    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+    | _ -> Partial.debug "%d " pc; true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc bb =
+    match PTree.get pc td with
+    | Some p ->
+      let (tpc, dpc) = p in
+      let dpc0 = dpc in
+      (match bb with
+       | [] ->
+         raise (BugOnPC (P.to_int pc))
+       | i :: _ ->
+         (match i with
+          | Lbranch s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Lcond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc))))
+    | None -> ()
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
+
diff --git a/backend/Tunnelingproof.v b/backend/LTLTunnelingproof.v
index 3bc92f75..d36d3c76 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/LTLTunnelingproof.v
@@ -17,7 +17,7 @@ Require Import Coqlib Maps Errors.
 Require Import AST Linking.
 Require Import Values Memory Events Globalenvs Smallstep.
 Require Import Op Locations LTL.
-Require Import Tunneling.
+Require Import LTLTunneling.
 
 Local Open Scope nat.
 
diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
new file mode 100644
index 00000000..2bc92dff
--- /dev/null
+++ b/backend/RTLTunneling.v
@@ -0,0 +1,127 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          TODO: Proper author information                            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Branch tunneling for the RTL representation *)
+
+(* rq: ce code est commenté avec des remarques sur des éléments du code pas évidents, sur lequels j'ai buté, ou qui changent du tunneling LTL. Ils pourront être enlevés ultérieurement *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import RTL.
+
+(* This is a port of tunneling for LTL. See LTLTunneling.v *)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: RTL.function -> UF.
+Extract Constant branch_target => "RTLTunnelingaux.branch_target".
+(* TODO: add an extraction command to link branch_target with its implementation *)
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) (pc: node): node*Z :=
+  match td!pc with (* rq: notation pour `PTree.get pc td` *)
+  | Some (t,d) => (t,Z.abs d)
+  | None       => (pc,0)
+  end.
+
+(* rq: "coerce" la structure d'abre UF en une fonction qui retrouve le représentant canonique de la classe d'un noeud *)
+Definition target (td: UF) (pc: node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+(* rq: pas de basic block en RTL *)
+Definition check_included (td: UF) (c: code): option instruction
+  := PTree.fold (fun (ok:option instruction) pc _ => if ok then c!pc else None) td (Some (Inop xH)).
+(* rq: PTree.fold replie l'arbre avec un parcours préfixe. La fonction qui prend en argument l'accumulateur, le label du noeud et le pointeur (`positive`) vers ce noeud *)
+
+(* we check the validity of targets and their bound:
+*  the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_instr (td: UF) (pc: node) (i: instruction): res unit :=
+  match td!pc with
+  | None => OK tt (* rq: le type `unit` est le singleton {`tt`} *)
+  | Some (tpc, dpc) =>
+      let dpc := Z.abs dpc in
+      match i with
+      | Inop s =>
+          let (ts,ds) := get td s in
+          if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Inop")
+          else Error (msg "invalid skip of Inop")
+      | Icond _ _ ifso ifnot _ =>
+          let (tso,dso) := get td ifso in
+          let (tnot,dnot) := get td ifnot in
+          if peq tpc tso then
+            if peq tpc tnot then
+              if zlt dso dpc then
+                if zlt dnot dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this instruction")
+      end
+  end.
+
+Definition check_code (td: UF) (c: code): res unit :=
+  PTree.fold (fun ok pc i => do _ <- ok; check_instr td pc i) c (OK tt).
+(* rq: `do _ <- ok; ...` exécute le bloc à droite du ';' si `ok` est un `OK _`, sinon il passe simplement l'erreur *)
+
+(* The second pass rewrites all LTL instructions, replacing every
+ * successor [s] of every instruction by [t s], the canonical representative
+ * of its equivalence class in the union-find data structure.
+ *)
+
+(* rq: beaucoup plus de cas que pour LTL, car plein d'instructions ont des successeurs *)
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+  match i with
+  | Inop s => Inop (t s)
+  | Iop op args res s => Iop op args res (t s)
+  | Iload trap chunk addr args dst s => Iload trap chunk addr args dst (t s)
+  | Istore chunk addr args src s => Istore chunk addr args src (t s)
+  | Icall sig ros args res s => Icall sig ros args res (t s)
+  | Ibuiltin ef args res s => Ibuiltin ef args res (t s)
+  | Icond cond args ifso ifnot info =>
+      let ifso'  := t ifso in
+      let ifnot' := t ifnot in
+      if peq ifso' ifnot'
+      (* rq: si les deux branches se rejoignent, le if ne sert à rien *)
+      then Inop ifso'
+      else Icond cond args ifso' ifnot' info
+  | Ijumptable arg tbl => Ijumptable arg (List.map t tbl)
+  | _ => i
+  end.
+
+Definition tunnel_function (f: RTL.function): res RTL.function :=
+  let td := branch_target f in
+  let c := fn_code f in
+  if check_included td c then
+    do _ <- check_code td c ; OK
+    (mkfunction (* rq: mkfunction construit un "Record" `RTL.function` *)
+      (fn_sig f)
+      (fn_params f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_instr td) c) (* rq: `map1` est un map sans le pointeur *)
+      (td (fn_entrypoint f))
+      (mkuntrustedanalysis None None))
+  else Error (msg "Some node of the union-find is not in the CFG").
+
+Definition tunnel_fundef (f: fundef): res fundef :=
+  transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program): res program :=
+  transform_partial_program tunnel_fundef p.
+
diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml
new file mode 100644
index 00000000..9333e357
--- /dev/null
+++ b/backend/RTLTunnelingaux.ml
@@ -0,0 +1,112 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           TODO: Proper author information                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a RTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [RTLTunneling.v]
+
+*)
+
+open Coqlib
+open RTL
+open Maps
+open Camlcoq
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = RTL.instruction
+  type funct = RTL.coq_function
+end
+
+module OPT = struct
+  let langname = "RTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc i =
+    match i with
+    | Inop s ->
+       let ns = get_node c s in
+       set_branch c pc ns;
+       incr nopcounter;
+       acc
+    | Icond (_, _, s1, s2, _) ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, i) =
+    match i with
+    | Inop s -> (if println then Partial.debug "\n");
+        Partial.debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc);
+        false
+    | Icond (_, _, s1, s2, _) -> (if println then Partial.debug "\n");
+        Partial.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc);
+        false
+    | _ -> Partial.debug "%d " pc;
+        true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc i =
+    match PTree.get pc td with
+    | Some p ->
+        let (tpc, dpc) = p in
+        let dpc0 = dpc in begin
+          match i with
+          | Inop s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Icond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc)) end
+    | None -> ()
+
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
+
diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
new file mode 100644
index 00000000..51ef0f7a
--- /dev/null
+++ b/backend/RTLTunnelingproof.v
@@ -0,0 +1,670 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          TODO: Proper author information                            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for the branch tunneling optimization for RTL. *)
+(* This is a port of Tunnelingproof.v, the same optimisation for LTL. *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST Linking.
+Require Import Values Memory Registers Events Globalenvs Smallstep.
+Require Import Op Locations RTL.
+Require Import RTLTunneling.
+Require Import Conventions1.
+
+Local Open Scope nat.
+
+(**) Definition check_included_spec (c:code) (td:UF) (ok: option instruction) :=
+  ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td:UF) (c:code):
+  check_included_spec c td (check_included td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P:=check_included_spec c); unfold check_included_spec.
+  - intros m m' oi EQ IND N pc. rewrite <- EQ. apply IND. apply N.
+  - intros N pc. rewrite PTree.gempty. auto.
+  - intros m oi pc v N S IND. destruct oi.
+    + intros. rewrite PTree.gsspec. destruct (peq _ _); try congruence. apply IND. congruence. apply H0.
+    + contradiction.
+Qed.
+
+(**) Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node) : option instruction -> Prop :=
+  | TB_default (TB: target pc = pc) oi:
+      target_bounds target bound pc oi
+  | TB_nop s
+      (EQ: target pc = target s)
+      (DEC: bound s < bound pc):
+      target_bounds target bound pc (Some (Inop s))
+  | TB_cond cond args ifso ifnot info
+      (EQSO: target pc = target ifso)
+      (EQNOT: target pc = target ifnot)
+      (DECSO: bound ifso < bound pc)
+      (DECNOT: bound ifnot < bound pc):
+      target_bounds target bound pc (Some (Icond cond args ifso ifnot info))
+.
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intro EQ. rewrite EQ. auto.
+Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
+
+Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia.
+Qed.
+Local Hint Resolve get_nonneg: core.
+
+(**) Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
+
+
+(* TODO: à réécrire proprement *)
+Lemma check_instr_correct (td: UF) (pc: node) (i: instruction):
+  check_instr td pc i = OK tt ->
+  target_bounds (target td) (bound td) pc (Some i).
+Proof.
+  unfold check_instr. destruct (td!pc) as [(tpc,dpc)|] eqn:EQ.
+  assert (DPC: snd (get td pc) = Z.abs dpc). { unfold get. rewrite EQ. auto. }
+  - destruct i; try congruence.
+    + destruct (get td n) as (ts,ds) eqn:EQs.
+      destruct (peq _ _); try congruence.
+      destruct (zlt _ _); try congruence. intros _.
+      apply TB_nop. replace (td pc) with tpc.
+      unfold target. rewrite EQs. auto.
+      unfold target. unfold get. rewrite EQ. auto.
+      unfold bound. rewrite DPC. rewrite EQs; simpl. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n ts. apply EQs.
+    + destruct (get td n) as (tso,dso) eqn:EQSO.
+      destruct (get td n0) as (tnot,dnot) eqn:EQNOT.
+      intro H.
+      repeat ((destruct (peq _ _) in H || destruct (zlt _ _) in H); try congruence).
+      apply TB_cond; subst.
+      * unfold target. replace (fst (get td pc)) with tnot. rewrite EQSO. auto.
+        unfold get. rewrite EQ. auto.
+      * unfold target. replace (fst (get td pc)) with tnot. rewrite EQNOT. auto.
+        unfold get. rewrite EQ. auto.
+      * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n tnot. rewrite EQSO. auto. rewrite EQSO. auto.
+      * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n0 tnot. rewrite EQNOT; auto. rewrite EQNOT; auto.
+  - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto.
+Qed.
+
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc i, c!pc = Some i -> target_bounds (target td) (bound td) pc (Some i).
+
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  unfold check_code. apply PTree_Properties.fold_rec; unfold check_code_spec.
+  - intros. rewrite <- H in H2. apply H0; auto.
+  - intros. rewrite PTree.gempty in H0. congruence.
+  - intros m [[]|e] pc i N S IND; simpl; try congruence.
+    intros H pc0 i0. rewrite PTree.gsspec. destruct (peq _ _).
+    subst. intro. inv H0. apply check_instr_correct. apply H.
+    auto.
+Qed.
+
+Theorem branch_target_bounds:
+  forall f tf pc,
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
+Proof.
+(* ~old
+  intros t tf pc. unfold tunnel_function.
+  destruct (check_included _ _) eqn:HI; try congruence.
+  destruct (check_code _ _) eqn: HC.
+  - intros _. destruct ((fn_code t) ! pc) eqn:EQ.
+    + exploit check_code_correct; eauto.
+    replace tt with u. auto. {destruct u; auto. }
+    + exploit check_included_correct; eauto. rewrite HI. congruence.
+    intro. apply TB_default. unfold target. unfold get. rewrite H. debug auto.
+  - simpl. congruence.
+*)
+
+  intros. unfold tunnel_function in H.
+  destruct (check_included _ _) eqn:EQinc; try congruence.
+  monadInv H. rename EQ into EQcode.
+  destruct (_ ! _) eqn:EQ.
+  - exploit check_code_correct. destruct x. apply EQcode. apply EQ. auto.
+  - exploit check_included_correct.
+    rewrite EQinc. congruence.
+    apply EQ.
+    intro. apply TB_default. apply target_None. apply H.
+Qed.
+
+(** Preservation of semantics *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+  (* rq: `(fun _ ...)` est la fonction pour matcher des fonctions
+   *     `eq` la fonction pour matcher les variables ? (`varinfo` dans la def)
+   *     `p` et `tp` sont les programmes donc on doit dire s'ils match
+   *)
+
+ (**) Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+
+Section PRESERVATION.
+
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog. (* rq: on suppose que les programmes match *)
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+(* rq: pour les deux lemmes suivants, j'ai recopié les preuves, mais je ne les comprends pas du tout... D'où vient `exploit` ?? *)
+(* rq: `exploit` vient de Coqlib! Mais je comprends pas encore vraiment ce qui se passe... *)
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_match TRANSL). apply H.
+  intros (cu & tf & A & B & C).
+  (* exists tf. split. apply B. apply A. *)
+  (* rq: on peut remplacer les `split` et `apply` par `eauto`, et ne pas spécifier le
+   * `exists`, avec un `eexists` *)
+  eexists. eauto.
+  (* rq: Je ne comprends pas à quoi sert le `intuition`... *)
+  (* repeat eexists; intuition eauto. *)
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros. exploit (Genv.find_funct_ptr_match TRANSL).
+  - apply H.
+  - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B.
+Qed.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  apply (Genv.find_symbol_match TRANSL).
+  (* rq: ici pas de `exploit` mais un `apply` parce que `Genv.find_symbol_match
+   * prouve vraiment pile ce qu'on veut *)
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f; simpl in H.
+  -
+    (*
+    unfold bind in H.
+    destruct (tunnel_function _) as [x|] eqn:EQ; try congruence.
+    inversion H.
+    unfold tunnel_function in EQ.
+    destruct (check_included _ _) in EQ; try congruence.
+    unfold bind in EQ. destruct (check_code _ _) in EQ; try congruence. inversion EQ. auto.
+    *)
+
+    monadInv H. (* rq: c'est une tactique maison... *)
+    unfold tunnel_function in EQ.
+    destruct (check_included _ _) in EQ; try congruence.
+    monadInv EQ. auto.
+  - monadInv H. auto.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *)
+Qed.
+
+(* TODO: vérifier s'il faut faire quelque chose avec le `res` ? *)
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall res f tf sp pc rs trs
+      (TF: tunnel_function f = OK tf)
+      (RS: Registers.regs_lessdef rs trs),
+      match_stackframes
+        (Stackframe res f sp pc rs)
+        (Stackframe res tf sp (branch_target f pc) trs).
+
+(* rq: `match_states s1 s2` correspond à s1 ~ s2 *)
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s ts f tf sp pc rs trs m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (TF: tunnel_function f = OK tf)
+      (RS: Registers.regs_lessdef rs trs)
+      (MEM: Mem.extends m tm),
+      match_states
+        (State s f sp pc rs m)
+        (State ts tf sp (branch_target f pc) trs tm)
+  | match_states_call:
+      forall s ts f tf a ta m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (TF: tunnel_fundef f = OK tf)
+      (ARGS: list_forall2 Val.lessdef a ta)
+      (MEM: Mem.extends m tm),
+      match_states
+        (Callstate s f a m)
+        (Callstate ts tf ta tm)
+  | match_states_return:
+      forall s ts v tv m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (VAL: Val.lessdef v tv)
+      (MEM: Mem.extends m tm),
+      match_states
+        (Returnstate s v m)
+        (Returnstate ts tv tm).
+
+(* TODO: il faut définir une bonne mesure *)
+(* (**) Definition measure (st: state) : nat :=
+  match st with
+  | State s f sp pc rs m =>
+      match (fn_code f)!pc with
+      | Some (Inop pc') => (bound (branch_target f) pc') * 2 + 1
+      | Some (Icond _ _ ifso ifnot _) => (max
+                                    (bound (branch_target f) ifso)
+                                    (bound (branch_target f) ifnot)
+                                  ) * 2 + 1
+      | Some _ => (bound (branch_target f) pc) * 2
+      | None => 0
+      end
+  | Callstate s f v m => 0
+  | Returnstate s v m => 0
+  end.
+*)
+
+Definition measure (st: state): nat :=
+  match st with
+  | State s f sp pc rs m => bound (branch_target f) pc
+  | Callstate s f v m => 0
+  | Returnstate s v m => 0
+  end.
+
+
+(* rq: sans les lemmes définis au-dessus je ne vois pas trop comment j'aurais
+ * fait... ni comment j'aurais eu l'idée d'en faire des lemmes ? *)
+(**) Lemma transf_initial_states:
+  forall s1: state, initial_state prog s1 ->
+  exists s2: state, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL].
+  exploit function_ptr_translated.
+  - apply PTR.
+  - intros (tf & TPTR & TUN).
+    exists (Callstate nil tf nil m0). split.
+    + apply initial_state_intro with b.
+      * apply (Genv.init_mem_match TRANSL). apply MEM.
+      * rewrite (match_program_main TRANSL).
+        rewrite symbols_preserved. apply SYM.
+      * apply TPTR.
+      * rewrite <- SIG. apply sig_preserved. apply TUN.
+    + apply match_states_call.
+      * apply list_forall2_nil.
+      * apply TUN.
+      * apply list_forall2_nil.
+      * apply Mem.extends_refl.
+Qed.
+
+(**) Lemma transf_final_states:
+  forall (s1 : state)
+  (s2 : state) (r : Integers.Int.int),
+  match_states s1 s2 ->
+  final_state s1 r ->
+  final_state s2 r.
+Proof.
+  (* rq: `inv` au lieu de `inversion` fait beaucoup de nettoyage dans les
+   * hypothèses, mais je sais pas trop ce que ça fait exactement *)
+  intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro.
+Qed.
+
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf ->
+  (fn_code tf) ! pc =
+  option_map (tunnel_instr (branch_target f)) (fn_code f) ! pc.
+Proof.
+  intros f tf pc.
+  unfold tunnel_function.
+  destruct (check_included _ _) eqn:EQinc; try congruence.
+  destruct (check_code _ _) eqn:EQcode; simpl; try congruence.
+  intro. inv H. simpl. rewrite PTree.gmap1. reflexivity.
+Qed.
+
+Lemma reglist_lessdef:
+  forall (rs trs: Registers.Regmap.t val) (args: list Registers.reg),
+  regs_lessdef rs trs -> Val.lessdef_list (rs##args) (trs##args).
+Proof.
+  intros. induction args; simpl; constructor.
+  apply H. apply IHargs.
+Qed.
+
+Lemma instruction_type_preserved:
+  forall (f tf:function) (pc:node) (i ti:instruction)
+  (TF: tunnel_function f = OK tf)
+  (FATPC: (fn_code f) ! pc = Some i)
+  (NOTINOP: forall s, i <> Inop s)
+  (NOTICOND: forall cond args ifso ifnot info, i <> Icond cond args ifso ifnot info)
+  (TI: ti = tunnel_instr (branch_target f) i),
+  (fn_code tf) ! (branch_target f pc) = Some ti.
+Proof.
+  intros.
+  assert ((fn_code tf) ! pc = Some (tunnel_instr (branch_target f) i)) as TFATPC.
+  rewrite (tunnel_function_unfold f tf pc); eauto.
+  rewrite FATPC; eauto.
+  exploit branch_target_bounds; eauto.
+  intro TB. inversion TB as [BT|s|cond args ifso ifnot info]; try (rewrite FATPC in H; congruence).
+Qed.
+
+(* rq: utilisée pour `tunnel_step_correct` mais je ne comprends pas tout *)
+(**) Lemma find_function_translated:
+  forall ros rs trs fd,
+  regs_lessdef rs trs ->
+  find_function ge ros rs = Some fd ->
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros trs = Some tfd.
+Proof.
+  intros. destruct ros; simpl in *.
+  - (* reg *)
+    assert (E: trs # r = rs # r).
+    { exploit Genv.find_funct_inv. apply H0. intros (b & EQ).
+      generalize (H r) . rewrite EQ. intro LD. inv LD. auto. }
+    rewrite E. exploit functions_translated; eauto.
+  - (* ident *)
+    rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0.
+    exploit function_ptr_translated; eauto.
+    intros (tf & X1 & X2). exists tf; intuition.
+Qed.
+
+Lemma list_forall2_lessdef_rs:
+  forall rs trs args,
+  regs_lessdef rs trs ->
+  list_forall2 Val.lessdef rs ## args trs ## args.
+Proof.
+  intros rs trs args LD.
+  exploit (reglist_lessdef rs trs args). apply LD.
+  induction args; simpl; intros H; try constructor; inv H.
+  apply H3. apply IHargs. apply H5.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf
+  (TF: tunnel_function f = OK tf),
+  fn_stacksize f = fn_stacksize tf.
+Proof.
+  intros f tf. unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  intro H. monadInv H. simpl. reflexivity.
+Qed.
+
+Lemma regs_setres_lessdef:
+  forall res vres tvres rs trs,
+  regs_lessdef rs trs -> Val.lessdef vres tvres ->
+  regs_lessdef (regmap_setres res vres rs) (regmap_setres res tvres trs).
+Proof.
+  induction res; intros; simpl; try auto using set_reg_lessdef.
+Qed.
+
+Lemma regmap_optget_lessdef:
+  forall or rs trs,
+  regs_lessdef rs trs -> Val.lessdef (regmap_optget or Vundef rs) (regmap_optget or Vundef trs).
+Proof.
+  intros or rs trs RS.
+  induction or; simpl; auto using set_reg_lessdef.
+Qed.
+
+Lemma tunnel_fundef_Internal:
+  forall (f: function) (tf: fundef)
+  (TF: tunnel_fundef (Internal f) = OK tf),
+  exists (tf': function), tf = (Internal tf') /\ tunnel_function f = OK tf'.
+Proof.
+  intros f tf.
+  unfold tunnel_fundef. simpl. intro H. monadInv H. exists x.
+  split. reflexivity. apply EQ.
+Qed.
+
+Lemma tunnel_fundef_External:
+  forall (ef: external_function) (tf: fundef)
+  (TF: tunnel_fundef (External ef) = OK tf),
+  tf = (External ef).
+Proof.
+  intros f tf.
+  unfold tunnel_fundef. simpl. intro H. monadInv H. reflexivity.
+Qed.
+
+
+Lemma fn_entrypoint_preserved:
+  forall f tf
+  (TF: tunnel_function f = OK tf),
+  fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf.
+  unfold tunnel_function. destruct (check_included _ _); try congruence.
+  intro TF. monadInv TF. simpl. reflexivity.
+Qed.
+
+Lemma init_regs_lessdef:
+  forall f tf args targs
+  (ARGS: list_forall2 Val.lessdef args targs)
+  (TF: tunnel_function f = OK tf),
+  regs_lessdef (init_regs args (fn_params f)) (init_regs targs (fn_params tf)).
+Proof.
+  assert (regs_lessdef (Regmap.init Vundef) (Regmap.init Vundef)) as Hundef.
+  { unfold Regmap.init. unfold regs_lessdef. intro. unfold Regmap.get. rewrite PTree.gempty. apply Val.lessdef_undef. }
+
+  intros f tf args targs ARGS.
+
+  unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence.
+  intro TF. monadInv TF. simpl. 
+  (* 
+  induction ARGS.
+  - induction (fn_params f) eqn:EQP; apply Hundef.
+  - induction (fn_params f) eqn:EQP.
+    * simpl. apply Hundef.
+    * simpl. apply set_reg_lessdef. apply H. 
+    *)
+    
+  generalize (fn_params f) as l. induction ARGS; induction l; try (simpl; apply Hundef).
+    simpl. apply set_reg_lessdef; try assumption. apply IHARGS.
+Qed.
+
+Lemma lessdef_forall2_list:
+  forall args ta,
+  list_forall2 Val.lessdef args ta -> Val.lessdef_list args ta.
+Proof.
+  intros args ta H. induction H. apply Val.lessdef_list_nil. apply Val.lessdef_list_cons. apply H. apply IHlist_forall2.
+Qed.
+
+
+(* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *)
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
+Proof.
+  intros st1 t st2 H. induction H; intros; try (inv MS).
+  - (* Inop *)
+    exploit branch_target_bounds. apply TF.
+    rewrite H. intro. inv H0.
+    + (* TB_default *)
+      rewrite TB. left. eexists. split.
+      * apply exec_Inop. rewrite (tunnel_function_unfold f tf pc). rewrite H. simpl. eauto. apply TF.
+      * constructor; try assumption.
+    + (* TB_nop *)
+      simpl. right. repeat split. apply DEC.
+      rewrite EQ. apply match_states_intro; assumption.
+  - (* Iop *)
+    exploit eval_operation_lessdef; try eassumption.
+    apply reglist_lessdef. apply RS.
+    intros (tv & EVAL & LD).
+    left; eexists; split.
+    + eapply exec_Iop with (v:=tv).
+      apply instruction_type_preserved with (Iop op args res pc'); (simpl; auto)||congruence.
+      rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved.
+    + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS.
+  - (* Iload *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef. apply RS.
+    intros (ta & EVAL & LD).
+    exploit Mem.loadv_extends; try eassumption.
+    intros (tv & LOAD & LD').
+    left. eexists. split.
+    + eapply exec_Iload.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply LOAD.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. apply LD'. apply RS.
+  - (* Iload NOTRAP1 *)
+    exploit eval_addressing_lessdef_none; try eassumption.
+    apply reglist_lessdef; apply RS.
+    left. eexists. split.
+    + eapply exec_Iload_notrap1.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. apply Val.lessdef_undef. apply RS.
+  - (* Iload NOTRAP2 *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef; apply RS.
+    intros (ta & EVAL & LD).
+    (* TODO: on peut sans doute factoriser ça *)
+    destruct (Mem.loadv chunk tm ta) eqn:Htload.
+    + left; eexists; split.
+      eapply exec_Iload.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply Htload.
+      * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto.
+    + left; eexists; split.
+      eapply exec_Iload_notrap2.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply Htload.
+      * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto.
+  - (* Lstore *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef; apply RS.
+    intros (ta & EVAL & LD).
+    exploit Mem.storev_extends; try eassumption. apply RS.
+    intros (tm' & STORE & MEM').
+    left. eexists. split.
+    + eapply exec_Istore.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * rewrite STORE. reflexivity.
+    + apply match_states_intro; try eassumption.
+  - (* Icall *)
+    left.
+    exploit find_function_translated; try eassumption.
+    intros (tfd & TFD & FIND).
+    eexists. split.
+    + eapply exec_Icall.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * apply FIND.
+      * apply sig_preserved. apply TFD.
+    + apply match_states_call; try assumption.
+      * apply list_forall2_cons; try assumption. apply match_stackframes_intro; try assumption.
+      * apply list_forall2_lessdef_rs. apply RS.
+  - (* Itailcall *)
+    exploit find_function_translated; try eassumption.
+    intros (tfd & TFD & FIND).
+    exploit Mem.free_parallel_extends; try eassumption.
+    intros (tm' & FREE & MEM').
+    left. eexists. split.
+    + eapply exec_Itailcall.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * apply FIND.
+      * apply sig_preserved. apply TFD.
+      * erewrite <- fn_stacksize_preserved; try eassumption.
+    + apply match_states_call; try assumption.
+      apply list_forall2_lessdef_rs. apply RS.
+  - (* Ibuiltin *)
+    exploit eval_builtin_args_lessdef; try eassumption. apply RS.
+    intros (vl2 & EVAL & LD).
+    exploit external_call_mem_extends; try eassumption.
+    intros (tvres & tm' & EXT & LDRES & MEM' & UNCHGD).
+    left. eexists. split.
+    + eapply exec_Ibuiltin.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * eapply eval_builtin_args_preserved. eapply symbols_preserved. eapply EVAL.
+      * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT.
+    + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption.
+  - (* Icond *)
+    simpl. exploit branch_target_bounds. apply TF. rewrite H. intro. inv H1.
+    + (* TB_default *)
+      rewrite TB.
+      destruct (fn_code tf)!pc as [[]|] eqn:EQ;
+      assert (tunnel_function f = OK tf) as TF'; auto;
+      unfold tunnel_function in TF; destruct (check_included _ _) in TF; monadInv TF;
+      simpl in EQ; rewrite PTree.gmap1 in EQ; rewrite H in EQ; simpl in EQ; destruct (peq _ _) eqn: EQpeq in EQ; try congruence.
+      * left. eexists. split. 
+        -- eapply exec_Inop. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity.
+        -- destruct b. apply match_states_intro; eauto. rewrite e. apply match_states_intro; eauto.
+      * left. eexists. split.
+        -- eapply exec_Icond; auto. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. eapply eval_condition_lessdef; try eassumption. apply reglist_lessdef. apply RS.
+        -- destruct b; apply match_states_intro; auto.
+    + (* TB_cond *) right; repeat split; destruct b; try assumption.
+      * rewrite EQSO. apply match_states_intro; try assumption.
+      * rewrite EQNOT. apply match_states_intro; try assumption.
+  - (* Ijumptable *)
+    left. eexists. split.
+    + eapply exec_Ijumptable.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * generalize (RS arg). rewrite H0. intro. inv H2. reflexivity.
+      * rewrite list_nth_z_map. rewrite H1. simpl. reflexivity.
+    + apply match_states_intro; try eassumption.
+  - (* Ireturn *)
+    exploit Mem.free_parallel_extends; try eassumption.
+    intros (tm' & FREE & MEM').
+    left. eexists. split.
+    + eapply exec_Ireturn.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * erewrite <- fn_stacksize_preserved. eapply FREE. eapply TF.
+    + apply match_states_return; try eassumption.
+      apply regmap_optget_lessdef. apply RS.
+  - (* internal function *)
+    exploit tunnel_fundef_Internal; try eassumption.
+    intros (tf' & EQ & TF'). subst.
+    exploit Mem.alloc_extends; try eassumption. reflexivity. reflexivity.
+    intros (m2' & ALLOC & EXT).
+    left. eexists. split.
+    + eapply exec_function_internal.
+      rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'.
+    + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption.
+      apply init_regs_lessdef. apply ARGS. apply TF'.
+  - (* external function *)
+    exploit external_call_mem_extends. eapply H. eapply MEM.  eapply lessdef_forall2_list. eapply ARGS.
+    intros (tvres & tm' & EXTCALL & LD & EXT & MEMUNCHGD).
+    left. eexists. split.
+    + erewrite (tunnel_fundef_External ef tf); try eassumption.
+      eapply exec_function_external. eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXTCALL.
+    + eapply match_states_return; try assumption.
+  - (* return *)
+    inv STK. inv H1.
+    left. eexists. split.
+    + eapply exec_return.
+    + eapply match_states_intro; try assumption.
+      apply set_reg_lessdef; try assumption.
+Qed.
+
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_opt.
+  apply senv_preserved.
+  apply transf_initial_states.
+  apply transf_final_states.
+  exact tunnel_step_correct.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
deleted file mode 100644
index 87e6d303..00000000
--- a/backend/Tunnelingaux.ml
+++ /dev/null
@@ -1,283 +0,0 @@
-(* *************************************************************)
-(*                                                             *)
-(*             The Compcert verified compiler                  *)
-(*                                                             *)
-(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
-(*                                                             *)
-(*  Copyright VERIMAG. All rights reserved.                    *)
-(*  This file is distributed under the terms of the INRIA      *)
-(*  Non-Commercial License Agreement.                          *)
-(*                                                             *)
-(* *************************************************************)
-
-(*
-
-This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
-and computes their target node with the distance (ie the number of cummulated nops) toward this target.
-
-See [Tunneling.v]
-
-*)
-
-open Coqlib
-open LTL
-open Maps
-open Camlcoq
-
-let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
-let debug_flag = ref false
-let final_dump = false   (* set to true to have a more verbose debugging *)
-
-let debug fmt =
-  if !debug_flag then Printf.eprintf fmt
-  else Printf.ifprintf stderr fmt
-
-exception BugOnPC of int
-
-(* type of labels in the cfg *)
-type label = int * P.t
-
-(* instructions under analyzis *)
-type simple_inst = (* a simplified view of LTL instructions *)
-  LBRANCH of node
-| LCOND of node * node
-| OTHER
-and node = {
-    lab : label;
-    mutable inst: simple_inst;
-    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
-    mutable dist: int;
-    mutable tag: int
-  }
-
-(* type of the (simplified) CFG *)
-type cfg = {
-    nodes: (int, node) Hashtbl.t;
-    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
-    mutable num_rems: int;
-    mutable iter_num: int (* number of iterations in elimination of conditions *)
-  }
-
-let lab_i (n: node): int = fst n.lab
-let lab_p (n: node): P.t = snd n.lab
-
-let rec target c n = (* inspired from the "find" of union-find algorithm *)
-  match n.inst with
-  | LCOND(s1,s2) ->
-     if n.link != n
-     then update c n
-     else if n.tag < c.iter_num then (
-       (* we try to change the condition ... *)
-       n.tag <- c.iter_num; (* ... but at most once by iteration *)
-       let ts1 = target c s1 in
-       let ts2 = target c s2 in
-       if ts1 == ts2 then (n.link <- ts1; ts1) else n
-     ) else n
-  | _ ->
-     if n.link != n
-     then update c n
-     else n
-and update c n =
-  let t = target c n.link in
-  n.link <- t; t
-
-let get_node c p =
-  let li = P.to_int p in
-  try
-    Hashtbl.find c.nodes li
-  with
-    Not_found ->
-      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
-      Hashtbl.add c.nodes li n;
-      n
-
-let set_branch c p s =
-  let li = P.to_int p in
-  try
-    let n = Hashtbl.find c.nodes li in
-    n.inst <- LBRANCH s;
-    n.link <- target c s
-  with
-    Not_found ->
-      let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in
-      Hashtbl.add c.nodes li n
-
-
-(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
-let build_simplified_cfg c acc pc bb =
-  match bb with
-  | Lbranch s :: _ ->
-     let ns = get_node c s in
-     set_branch c pc ns;
-     acc
-  | Lcond (_, _, s1, s2, _) :: _ ->
-     c.num_rems <- c.num_rems + 1;
-     let ns1 = get_node c s1 in
-     let ns2 = get_node c s2 in
-     let npc = get_node c pc in
-     npc.inst <- LCOND(ns1, ns2);
-     npc::acc
-  | _ -> acc
-
-(* try to change a condition into a branch
-[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
-*)
-let try_change_cond c acc pc =
-  match pc.inst with
-  | LCOND(s1,s2) ->
-     let ts1 = target c s1 in
-     let ts2 = target c s2 in
-     if ts1 == ts2 then (
-       pc.link <- ts1;
-       c.num_rems <- c.num_rems - 1;
-       acc
-     ) else
-       pc::acc
-  | _  -> raise (BugOnPC (lab_i pc)) (* LCOND expected *)
-
-(* repeat [try_change_cond] until no condition is changed into a branch *)
-let rec repeat_change_cond c =
-  c.iter_num <- c.iter_num + 1;
-  debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
-  let old =  c.num_rems in
-  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
-  let curr = c.num_rems in
-  let continue =
-    match limit_tunneling with
-    | Some n -> curr < old && c.iter_num < n
-    | None -> curr < old
-  in
-  if continue
-  then repeat_change_cond c
-
-
-(* compute the final distance of each nop nodes to its target *)
-let undef_dist = -1
-let self_dist = undef_dist-1
-let rec dist n =
-  if n.dist = undef_dist
-  then (
-    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
-    n.dist <-
-      (match n.inst with
-       | OTHER -> 0
-       | LBRANCH p -> 1 + dist p
-       | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
-    n.dist
-  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
-    else n.dist
-
-let final_export f c =
-  let count = ref 0 in
-  let filter_nops_init_dist _ n acc =
-    let tn = target c n in
-    if tn == n
-    then (
-      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
-      acc
-    ) else (
-      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
-      count := !count+1;
-      n::acc
-    )
-  in
-  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
-  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
-  debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count;
-  res
-
-(*********************************************)
-(*** START: printing and debugging functions *)
-
-let string_of_labeli nodes ipc =
-  try
-    let pc = Hashtbl.find nodes ipc in
-    if pc.link == pc
-    then Printf.sprintf "(Target@%d)" (dist pc)
-    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
-  with
-    Not_found -> ""
-
-let print_bblock c println (pc, bb) =
-  match bb with
-  | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
-  | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
-  | _ -> debug "%d " pc; true
-
-
-let print_cfg f c  =
-  let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in
-  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
-  let ep = P.to_int f.fn_entrypoint in
-  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
-  let println = Array.fold_left (print_bblock c) false a in
-  (if println then debug "\n");debug "remaining cond:";
-  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
-  debug "\n"
-
-(*************************************************************)
-(* Copy-paste of the extracted code of the verifier          *)
-(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
-
-let get td pc =
-  match PTree.get pc td with
-  | Some p -> let (t0, d) = p in (t0, d)
-  | None -> (pc, Z.of_uint 0)
-
-let check_bblock td pc bb =
-  match PTree.get pc td with
-  | Some p ->
-    let (tpc, dpc) = p in
-    let dpc0 = dpc in
-    (match bb with
-     | [] ->
-       raise (BugOnPC (P.to_int pc))
-     | i :: _ ->
-       (match i with
-        | Lbranch s ->
-          let (ts, ds) = get td s in
-          if peq tpc ts
-          then if zlt ds dpc0
-               then ()
-               else raise (BugOnPC (P.to_int pc))
-          else raise (BugOnPC (P.to_int pc))
-        | Lcond (_, _, s1, s2, _) ->
-          let (ts1, ds1) = get td s1 in
-          let (ts2, ds2) = get td s2 in
-          if peq tpc ts1
-          then if peq tpc ts2
-               then if zlt ds1 dpc0
-                    then if zlt ds2 dpc0
-                         then ()
-                         else raise (BugOnPC (P.to_int pc))
-                    else raise (BugOnPC (P.to_int pc))
-               else raise (BugOnPC (P.to_int pc))
-          else raise (BugOnPC (P.to_int pc))
-        | _ ->
-          raise (BugOnPC (P.to_int pc))))
-  | None -> ()
-
-(** val check_code : coq_UF -> code -> unit res **)
-
-let check_code td c =
-  PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (())
-
-(*** END: copy-paste & debugging functions *******)
-
-let branch_target f =
-  debug "* Tunneling.branch_target: starting on a new function\n";
-  if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
-  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
-  c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
-  repeat_change_cond c;
-  let res = final_export f c in
-  if !debug_flag then (
-    try
-      check_code res f.fn_code;
-      if final_dump then print_cfg f c;
-    with e -> (
-      print_cfg f c;
-      check_code res f.fn_code
-    )
-  );
-  res
diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
new file mode 100644
index 00000000..7c826ba4
--- /dev/null
+++ b/backend/Tunnelinglibs.ml
@@ -0,0 +1,272 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           TODO: Proper author information                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the core functions of the tunneling passes, for both RTL
+and LTL, by using a simplified CFG as a transparent interface
+
+See [LTLTunneling.v] and [RTLTunneling.v]
+
+*)
+
+open Maps
+open Camlcoq
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of instructions *)
+  BRANCH of node
+| COND of node * node
+| OTHER
+and node = {
+    lab: label;
+    mutable inst: simple_inst;
+    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+    mutable dist: int;
+    mutable tag: int
+  }
+
+type positive = P.t
+type integer = Z.t
+
+(* type of the (simplified) CFG *)
+type cfg = {
+    nodes: (int, node) Hashtbl.t;
+    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+    mutable num_rems: int;
+    mutable iter_num: int (* number of iterations in elimination of conditions *)
+  }
+
+exception BugOnPC of int
+
+(* keeps track of the total number of nops seen, for debugging purposes *)
+let nopcounter = ref 0
+
+(* General functions that do not require language-specific context, and that
+ are used for building language-specific functions *)
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+  match n.inst with
+  | COND(s1,s2) ->
+     if n.link != n
+     then update c n
+     else if n.tag < c.iter_num then (
+       (* we try to change the condition ... *)
+       n.tag <- c.iter_num; (* ... but at most once by iteration *)
+       let ts1 = target c s1 in
+       let ts2 = target c s2 in
+       if ts1 == ts2 then (n.link <- ts1; ts1) else n
+     ) else n
+  | _ ->
+     if n.link != n
+     then update c n
+     else n
+and update c n =
+  let t = target c n.link in
+  n.link <- t; t
+
+let get_node c p =
+  let li = P.to_int p in
+  try
+    Hashtbl.find c.nodes li
+  with
+    Not_found ->
+      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
+      Hashtbl.add c.nodes li n;
+      n
+
+let set_branch c p s =
+  let li = P.to_int p in
+  try
+    let n = Hashtbl.find c.nodes li in
+    n.inst <- BRANCH s;
+    n.link <- target c s
+  with
+    Not_found ->
+      let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in
+      Hashtbl.add c.nodes li n
+
+let get td pc =
+  match PTree.get pc td with
+  | Some p -> let (t0, d) = p in (t0, d)
+  | None -> (pc, Z.of_uint 0)
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+  if n.dist = undef_dist
+  then (
+    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+    n.dist <-
+      (match n.inst with
+       | OTHER -> 0
+       | BRANCH p -> 1 + dist p
+       | COND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+    n.dist
+  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+    else n.dist
+
+let string_of_labeli nodes ipc =
+  try
+    let pc = Hashtbl.find nodes ipc in
+    if pc.link == pc
+    then Printf.sprintf "(Target@%d)" (dist pc)
+    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+  with
+    Not_found -> ""
+
+(*
+ * When given the necessary types and options as context, and then some
+ * language-specific functions that cannot be factorised between LTL and RTL, the
+ * `Tunneling` functor returns a module containing the corresponding
+ * `branch_target` function.
+ *)
+
+module Tunneling = functor
+  (* Language-specific types *)
+  (LANG: sig
+    type code_unit (* the type of a node of the code cfg (an instruction or a bblock *)
+    type funct (* type of internal functions *)
+  end)
+
+  (* Compilation options for debugging *)
+  (OPT: sig
+    val langname: string
+    val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *)
+    val debug_flag: bool ref
+    val final_dump: bool (* set to true to have a more verbose debugging *)
+  end)
+  -> struct
+
+  (* The `debug` function uses values from `OPT`, and is used in functions passed to `F`
+     so it must be defined between the two *)
+  let debug fmt =
+    if !OPT.debug_flag then Printf.eprintf fmt
+    else Printf.ifprintf stderr fmt
+
+  module T
+    (* Language-specific functions *)
+    (FUNS: sig
+      (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+      val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
+      val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
+      val fn_code: LANG.funct -> LANG.code_unit PTree.t
+      val fn_entrypoint: LANG.funct -> positive
+      val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
+    end)
+    (* only export what's needed *)
+    : sig val branch_target: LANG.funct -> (positive * integer) PTree.t end
+  = struct
+
+    (* try to change a condition into a branch [acc] is the current accumulator of
+       conditions to consider in the next iteration of repeat_change_cond *)
+    let try_change_cond c acc pc =
+      match pc.inst with
+      | COND(s1,s2) ->
+         let ts1 = target c s1 in
+         let ts2 = target c s2 in
+         if ts1 == ts2 then (
+           pc.link <- ts1;
+           c.num_rems <- c.num_rems - 1;
+           acc
+         ) else
+           pc::acc
+      | _  -> raise (BugOnPC (lab_i pc)) (* COND expected *)
+
+    (* repeat [try_change_cond] until no condition is changed into a branch *)
+    let rec repeat_change_cond c =
+      c.iter_num <- c.iter_num + 1;
+      debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems);
+      let old =  c.num_rems in
+      c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+      let curr = c.num_rems in
+      let continue =
+        match OPT.limit_tunneling with
+        | Some n -> curr < old && c.iter_num < n
+        | None -> curr < old
+      in
+      if continue
+      then repeat_change_cond c
+
+
+    (*********************************************)
+    (*** START: printing and debugging functions *)
+
+    let print_cfg (f: LANG.funct) c  =
+      let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in
+      Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+      let ep = P.to_int (FUNS.fn_entrypoint f) in
+      debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+      let println = Array.fold_left (FUNS.print_code_unit c) false a in
+      (if println then debug "\n");debug "remaining cond:";
+      List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+      debug "\n"
+
+
+    (*************************************************************)
+    (* Copy-paste of the extracted code of the verifier          *)
+    (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+    (** val check_code : coq_UF -> code -> unit res **)
+
+    let check_code td c =
+      PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (())
+
+    (*** END: copy-paste & debugging functions *******)
+
+    (* compute the final distance of each nop nodes to its target *)
+    let final_export f c =
+      let count = ref 0 in
+      let filter_nops_init_dist _ n acc =
+        let tn = target c n in
+        if tn == n
+        then (
+          n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+          acc
+        ) else (
+          n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+          count := !count+1;
+          n::acc
+        )
+      in
+      let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+      let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+      debug "* %sTunneling.branch_target: initial number of nops = %d\n" OPT.langname !nopcounter;
+      debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" OPT.langname !count;
+      res
+
+    let branch_target f =
+      debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname;
+      if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+      let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+      c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) [];
+      repeat_change_cond c;
+      let res = final_export f c in
+      if !OPT.debug_flag then (
+        try
+          check_code res (FUNS.fn_code f);
+          if OPT.final_dump then print_cfg f c;
+        with e -> (
+          print_cfg f c;
+          check_code res (FUNS.fn_code f)
+        )
+      );
+      res
+  end
+end
diff --git a/doc/index-kvx.html b/doc/index-kvx.html
index 62afb423..daa4cdc4 100644
--- a/doc/index-kvx.html
+++ b/doc/index-kvx.html
@@ -315,10 +315,10 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 
 <TR valign="top">
-  <TD>Branch tunneling</TD>
+  <TD>(LTL) Branch tunneling</TD>
   <TD>LTL to LTL</TD>
-  <TD><A HREF="html/compcert.backend.Tunneling.html">Tunneling</A></TD>
-  <TD><A HREF="html/compcert.backend.Tunnelingproof.html">Tunnelingproof</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunneling.html">LTLTunneling</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunnelingproof.html">LTLTunnelingproof</A></TD>
 </TR>
 
 <TR valign="top">
diff --git a/doc/index.html b/doc/index.html
index c3912cb2..34b87924 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -270,10 +270,10 @@ code.
 </TR>
 
 <TR valign="top">
-  <TD>Branch tunneling</TD>
+  <TD>(LTL) Branch tunneling</TD>
   <TD>LTL to LTL</TD>
-  <TD><A HREF="html/compcert.backend.Tunneling.html">Tunneling</A></TD>
-  <TD><A HREF="html/compcert.backend.Tunnelingproof.html">Tunnelingproof</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunneling.html">LTLTunneling</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunnelingproof.html">LTLTunnelingproof</A></TD>
 </TR>
 
 <TR valign="top">
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 2a680d4d..33c5964a 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -63,7 +63,7 @@ let option_ftargetinnerloops = ref false
 let option_mtune = ref ""
                 
 let option_fprepass = ref true
-let option_fprepass_sched = ref "list"
+let option_fprepass_sched = ref "regpres"
 
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand
index a32900b5..3442502f 100644
--- a/driver/Compiler.vexpand
+++ b/driver/Compiler.vexpand
@@ -346,7 +346,7 @@ EXPAND_RTL_FORWARD_SIMULATIONS
   eapply compose_forward_simulations.
     eapply Allocationproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
-    eapply Tunnelingproof.transf_program_correct; eassumption.
+    eapply LTLTunnelingproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply Linearizeproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 0203d9c8..e3a421a5 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -349,13 +349,13 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
 
 (* A scheduler sensitive to register pressure *)
 let reg_pres_scheduler (problem : problem) : solution option =
-  DebugPrint.debug_flag := true;
-
+  (* DebugPrint.debug_flag := true; *)
   let nr_instructions = get_nr_instructions problem in
 
   if !Clflags.option_debug_compcert > 6 then
-    DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions;
-
+    (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+     flush stderr);
+  
   let successors = get_successors problem
   and predecessors = get_predecessors problem
   and times = Array.make (nr_instructions+1) (-1) in
@@ -495,32 +495,22 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
   let attempt_scheduling ready usages =
     let result = ref (-1) in
+    DebugPrint.debug "\n\nREADY: ";
+    InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready;
+    DebugPrint.debug "\n\n";
     try
       Array.iteri (fun i avlregs ->
-          (* print_string "avlregs: ";
-           * print_int i;
-           * print_string " ";
-           * print_int avlregs;
-           * print_newline ();
-           * print_string "live regs: ";
-           * print_int (Hashtbl.length live_regs);
-           * print_newline ();
-           * flush stdout; *)
-          if avlregs <= regs_thresholds.(i)
+          DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n"
+            i avlregs (Hashtbl.length live_regs); 
+          if !cnt < 5 && avlregs <= regs_thresholds.(i)
           then (
             csr_b := true;
             let maybe = InstrSet.sched_CSR i ready usages in
-            (* print_string "maybe\n";
-             * print_int maybe;
-             * print_newline ();
-             * flush stdout; *)
-            (if maybe > 0 &&
+            DebugPrint.debug "maybe %d\n" maybe;
+            (if maybe >= 0 &&
                   let delta = 
                     List.fold_left (fold_delta i) 0 mentions.(maybe) in
-                  (* print_string "delta ";
-                   * print_int delta;
-                   * print_newline ();
-                   * flush stdout; *)
+                  DebugPrint.debug "delta %d\n" delta;
                   delta > 0
              then
                (vector_subtract usages.(maybe) current_resources;
@@ -534,7 +524,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
                       then result := ins
                     ) ready;
                   if !result <> -1 then
-                    vector_subtract usages.(!result) current_resources)
+                    vector_subtract usages.(!result) current_resources;
+                  incr cnt)
                else
                  (incr cnt)
             );
@@ -558,33 +549,31 @@ let reg_pres_scheduler (problem : problem) : solution option =
               problem.instruction_usages with
       | -1 -> advance_time()
       | i -> (assert(times.(i) < 0);
-            ((* print_string "INSTR ISSUED: ";
-              * print_int i;
-              * print_newline ();
-              * flush stdout; *)
-             if !csr_b && !Clflags.option_debug_compcert > 6 then
-               DebugPrint.debug "REGPRES: high pres class %d\n" i;
-             csr_b := false;
-             if !Clflags.option_regpres_wait_window then
-               cnt := 0;
-             List.iter (fun (r,b) ->
-                 if b then
-                   (match Hashtbl.find_opt counts r with
-                    | None -> assert false
-                    | Some (t, n) ->
-                       Hashtbl.remove counts r;
-                       if n = 1 then
-                         (Hashtbl.remove live_regs r;
-                          available_regs.(t)
-                          <- available_regs.(t) + 1))
-                 else
-                   let t = class_r r in
-                   match Hashtbl.find_opt live_regs r with
-                   | None -> (Hashtbl.add live_regs r t;
-                             available_regs.(t)
-                             <- available_regs.(t) - 1)
-                   | Some i -> ()
-               ) mentions.(i));
+             (DebugPrint.debug "INSTR ISSUED: %d\n" i;
+              if !csr_b && !Clflags.option_debug_compcert > 6 then
+                (Printf.eprintf "REGPRES: high pres class %d\n" i;
+                 flush stderr);
+              csr_b := false;
+              (* if !Clflags.option_regpres_wait_window then *)
+              cnt := 0;
+              List.iter (fun (r,b) ->
+                  if b then
+                    (match Hashtbl.find_opt counts r with
+                     | None -> assert false
+                     | Some (t, n) ->
+                        Hashtbl.remove counts r;
+                        if n = 1 then
+                          (Hashtbl.remove live_regs r;
+                           available_regs.(t)
+                           <- available_regs.(t) + 1))
+                  else
+                    let t = class_r r in
+                    match Hashtbl.find_opt live_regs r with
+                    | None -> (Hashtbl.add live_regs r t;
+                              available_regs.(t)
+                              <- available_regs.(t) - 1)
+                    | Some i -> ()
+                ) mentions.(i));
              times.(i) <- !current_time;
              ready.(!current_time)
              <- InstrSet.remove i (ready.(!current_time));
@@ -595,8 +584,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
                    | to_time ->
                       ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
                         *   (Array.length ready); *)
-                      ready.(to_time)
-                      <- InstrSet.add instr_to ready.(to_time))
+                       ready.(to_time)
+                       <- InstrSet.add instr_to ready.(to_time))
                ) successors.(i);
              successors.(i) <- []
             )
@@ -605,8 +594,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
   try
     let final_time = ref (-1) in
     for i = 0 to nr_instructions - 1 do
-      (* print_int i;
-       * flush stdout; *)
+      DebugPrint.debug "%d " i;
       (if times.(i) < 0 then raise Exit);
       (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
     done;
@@ -615,11 +603,11 @@ let reg_pres_scheduler (problem : problem) : solution option =
         if target_time > !final_time then
           final_time := target_time) predecessors.(nr_instructions);
     times.(nr_instructions) <- !final_time;
-    DebugPrint.debug_flag := false;
+    (* DebugPrint.debug_flag := false; *)
     Some times
   with Exit ->
     DebugPrint.debug "reg_pres_sched failed\n";
-    DebugPrint.debug_flag := false;
+    (* DebugPrint.debug_flag := false; *)
     None
               
 ;;
@@ -628,8 +616,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
 (********************************************************************)
 
 let reg_pres_scheduler_bis (problem : problem) : solution option =
-  DebugPrint.debug_flag := true;
-  Printf.printf "\nNEW\n\n";
+  (* Printf.printf "\nNEW\n\n"; *)
   let nr_instructions = get_nr_instructions problem in
   let successors = get_successors problem
   and predecessors = get_predecessors problem
@@ -814,11 +801,9 @@ let reg_pres_scheduler_bis (problem : problem) : solution option =
         if target_time > !final_time then
           final_time := target_time) predecessors.(nr_instructions);
     times.(nr_instructions) <- !final_time;
-    DebugPrint.debug_flag := false;
     Some times
   with Exit ->
     DebugPrint.debug "reg_pres_sched failed\n";
-    DebugPrint.debug_flag := false;
     None
               
 ;;
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 4c33aa8a..a0809296 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -13,6 +13,8 @@ type print_result = Noprint | Print of string;;
 type when_triggered = Always | Option of string;;
 type needs_require = Require | NoRequire;;
 
+let rtl_tunneling = PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling"
+
 (* FIXME - The gestion of NoRequire is a bit ugly right now. *)
 let rtl_passes =
 [|
@@ -38,6 +40,7 @@ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3";
 TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves";
 TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves";
 PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode";
+rtl_tunneling;
 TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber";
 PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
 TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber";
@@ -47,7 +50,6 @@ PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"
 PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode";
 TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap";
 PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob";
-TOTAL, Always, Require, (Some "Renumbering pre renaming+if-lifting for readability; REMOVEME"), "Renumber"
 |];;
 
 let post_rtl_passes =
@@ -83,7 +85,7 @@ let post_rtl_passes =
   (* PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode", Noprint; *)
 
   PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1");
-  PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2");
+  PARTIAL, Always, Require, (Some "LTL Branch tunneling"), "LTLTunneling", (Print "LTL 2");
   PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint;
   TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint;
   PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint;
author	Justus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr>	2021-08-17 14:59:49 +0200
committer	Justus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr>	2021-08-17 14:59:49 +0200
commit	d0fd34aa9cc0d3c94a2a3af56fa9d50115a0e37f (patch)
tree	5964f47dcbe87c243d2b93b735a3f51aab31be39
parent	8ac2401ef5daea824b58e6e3424dd216a41b2d6d (diff)
parent	1fbe45e2d1f02ef6e8fb6fe7545728a744e047b8 (diff)
download	compcert-kvx-d0fd34aa9cc0d3c94a2a3af56fa9d50115a0e37f.tar.gz compcert-kvx-d0fd34aa9cc0d3c94a2a3af56fa9d50115a0e37f.zip