107 files changed, 17510 insertions, 1314 deletions
diff --git a/backend/Allnontrap.v b/backend/Allnontrap.v
new file mode 100644
index 00000000..fedf14f7
--- /dev/null
+++ b/backend/Allnontrap.v
@@ -0,0 +1,38 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+
+Definition transf_ros (ros: reg + ident) : reg + ident := ros.
+
+Definition transf_instr (pc: node) (instr: instruction) :=
+  match instr with
+  | Iload trap chunk addr args dst s => Iload NOTRAP chunk addr args dst s
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map transf_instr f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
diff --git a/backend/Allnontrapproof.v b/backend/Allnontrapproof.v
new file mode 100644
index 00000000..157c5de2
--- /dev/null
+++ b/backend/Allnontrapproof.v
@@ -0,0 +1,227 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import Allnontrap.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+  | match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  constructor; auto. 
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. constructor; auto. constructor.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Allocation.v b/backend/Allocation.v
index 08e0a4f4..2323c050 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -58,7 +58,7 @@ Inductive block_shape: Type :=
          (mv2: moves) (s: node)
   | BSopdead (op: operation) (args: list reg) (res: reg)
          (mv: moves) (s: node)
-  | BSload (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
+  | BSload (trap : trapping_mode) (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
          (mv1: moves) (args': list mreg) (dst': mreg)
          (mv2: moves) (s: node)
   | BSloaddead (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
@@ -226,15 +226,19 @@ Definition pair_instr_block
       | operation_other _ _ =>
           pair_Iop_block op args res s b
       end
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       let (mv1, b1) := extract_moves nil b in
       match b1 with
-      | Lload chunk' addr' args' dst' :: b2 =>
+      | Lload trap' chunk' addr' args' dst' :: b2 =>
+        assertion (trapping_mode_eq trap' trap);
           if chunk_eq chunk Mint64 && Archi.splitlong then
+            (* TODO: do not support non trapping split loads *)
+            assertion (trapping_mode_eq trap TRAP);
             assertion (chunk_eq chunk' Mint32);
             let (mv2, b3) := extract_moves nil b2 in
             match b3 with
-            | Lload chunk'' addr'' args'' dst'' :: b4 =>
+            | Lload trap'' chunk'' addr'' args'' dst'' :: b4 =>
+              assertion (trapping_mode_eq trap'' TRAP);
                 let (mv3, b5) := extract_moves nil b4 in
                 assertion (chunk_eq chunk'' Mint32);
                 assertion (eq_addressing addr addr');
@@ -254,7 +258,7 @@ Definition pair_instr_block
             assertion (chunk_eq chunk chunk');
             assertion (eq_addressing addr addr');
             assertion (check_succ s b3);
-            Some(BSload chunk addr args dst mv1 args' dst' mv2 s))
+            Some(BSload trap chunk addr args dst mv1 args' dst' mv2 s))
       | _ =>
           assertion (check_succ s b1);
           Some(BSloaddead chunk addr args dst mv1 s)
@@ -310,10 +314,10 @@ Definition pair_instr_block
           Some(BSbuiltin ef args res mv1 args' res' mv2 s)
       | _ => None
       end
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 i =>
       let (mv1, b1) := extract_moves nil b in
       match b1 with
-      | Lcond cond' args' s1' s2' :: b2 =>
+      | Lcond cond' args' s1' s2' i' :: b2 =>
           assertion (eq_condition cond cond');
           assertion (peq s1 s1');
           assertion (peq s2 s2');
@@ -1023,7 +1027,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
       track_moves env mv e
-  | BSload chunk addr args dst mv1 args' dst' mv2 s =>
+  | BSload trap chunk addr args dst mv1 args' dst' mv2 s =>
       do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
       track_moves env mv1 e2
@@ -1263,7 +1267,7 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BShighlong src dst mv s => s :: nil
   | BSop op args res mv1 args' res' mv2 s => s :: nil
   | BSopdead op args res mv s => s :: nil
-  | BSload chunk addr args dst mv1 args' dst' mv2 s => s :: nil
+  | BSload trap chunk addr args dst mv1 args' dst' mv2 s => s :: nil
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s => s :: nil
   | BSload2_1 addr args dst mv1 args' dst' mv2 s => s :: nil
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s => s :: nil
diff --git a/backend/Allocproof.v b/backend/Allocationproof.v
index 3fdbacbe..15cbdcdc 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocationproof.v
@@ -96,44 +96,44 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
       expand_block_shape (BSopdead op args res mv s)
                          (Iop op args res s)
                          (expand_moves mv (Lbranch s :: k))
-  | ebs_load: forall chunk addr args dst mv1 args' dst' mv2 s k,
+  | ebs_load: forall trap chunk addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
-      expand_block_shape (BSload chunk addr args dst mv1 args' dst' mv2 s)
-                         (Iload chunk addr args dst s)
+      expand_block_shape (BSload trap chunk addr args dst mv1 args' dst' mv2 s)
+                         (Iload trap chunk addr args dst s)
                          (expand_moves mv1
-                           (Lload chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
+                           (Lload trap chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2: forall addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s k,
       wf_moves mv1 -> wf_moves mv2 -> wf_moves mv3 ->
       Archi.splitlong = true ->
       offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2 addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr args1' dst1' ::
+                           (Lload TRAP Mint32 addr args1' dst1' ::
                            expand_moves mv2
-                             (Lload Mint32 addr2 args2' dst2' ::
+                             (Lload TRAP Mint32 addr2 args2' dst2' ::
                               expand_moves mv3 (Lbranch s :: k))))
   | ebs_load2_1: forall addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
       Archi.splitlong = true ->
       expand_block_shape (BSload2_1 addr args dst mv1 args' dst' mv2 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr args' dst' ::
+                           (Lload TRAP Mint32 addr args' dst' ::
                             expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2_2: forall addr addr2 args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
       Archi.splitlong = true ->
       offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2_2 addr addr2 args dst mv1 args' dst' mv2 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr2 args' dst' ::
+                           (Lload TRAP Mint32 addr2 args' dst' ::
                             expand_moves mv2 (Lbranch s :: k)))
-  | ebs_load_dead: forall chunk addr args dst mv s k,
+  | ebs_load_dead: forall trap chunk addr args dst mv s k,
       wf_moves mv ->
       expand_block_shape (BSloaddead chunk addr args dst mv s)
-                         (Iload chunk addr args dst s)
+                         (Iload trap chunk addr args dst s)
                          (expand_moves mv (Lbranch s :: k))
   | ebs_store: forall chunk addr args src mv1 args' src' s k,
       wf_moves mv1 ->
@@ -169,11 +169,11 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                          (Ibuiltin ef args res s)
                          (expand_moves mv1
                            (Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k)))
-  | ebs_cond: forall cond args mv args' s1 s2 k,
+  | ebs_cond: forall cond args mv args' s1 s2 k i i',
       wf_moves mv ->
       expand_block_shape (BScond cond args mv args' s1 s2)
-                         (Icond cond args s1 s2)
-                         (expand_moves mv (Lcond cond args' s1 s2 :: k))
+                         (Icond cond args s1 s2 i)
+                         (expand_moves mv (Lcond cond args' s1 s2 i' :: k))
   | ebs_jumptable: forall arg mv arg' tbl k,
       wf_moves mv ->
       expand_block_shape (BSjumptable arg mv arg' tbl)
@@ -1970,8 +1970,8 @@ Ltac UseShape :=
   end.
 
 Remark addressing_not_long:
-  forall env f addr args dst s r,
-  wt_instr f env (Iload Mint64 addr args dst s) -> Archi.splitlong = true ->
+  forall trap env f addr args dst s r,
+  wt_instr f env (Iload trap Mint64 addr args dst s) -> Archi.splitlong = true ->
   In r args -> r <> dst.
 Proof.
   intros. inv H.
@@ -1981,7 +1981,7 @@ Proof.
   { rewrite <- H5. apply in_map; auto. }
   assert (C: env r = Tint).
   { apply A in B. rewrite B. unfold Tptr. rewrite Archi.splitlong_ptr32 by auto. auto. }
-  red; intros; subst r. rewrite C in H8; discriminate.
+  red; intros; subst r. rewrite C in H9; discriminate.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument of the
@@ -2082,8 +2082,8 @@ Proof.
   econstructor; eauto.
   eapply wt_exec_Iop; eauto.
 
-(* load regular *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+(* load regular TRAP *)
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
@@ -2100,7 +2100,7 @@ Proof.
   econstructor; eauto.
 
 (* load pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2155,7 +2155,7 @@ Proof.
   econstructor; eauto.
 
 (* load first word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2185,7 +2185,7 @@ Proof.
   econstructor; eauto.
 
 (* load second word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2229,6 +2229,79 @@ Proof.
   econstructor; eauto.
   eapply wt_exec_Iload; eauto.
 
+- (* load notrap1 *)
+  generalize (wt_exec_Iload_notrap _ _ _ _ _ _ _ _ WTI WTRS).
+  intro WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef_none; eauto. intro Haddr.
+  exploit (exec_moves mv2); eauto.  intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. eapply exec_Lload_notrap1. rewrite <- Haddr.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+
+(* load notrap1 dead *)
+- exploit exec_moves; eauto. intros [ls1 [X Y]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_right. eexact X. econstructor; eauto.
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto.
+  eapply reg_unconstrained_satisf; eauto.
+  intros [enext [U V]].
+  econstructor; eauto.
+  eapply wt_exec_Iload_notrap; eauto.
+
+(* load regular notrap2 *)
+- generalize (wt_exec_Iload_notrap _ _ _ _ _ _ _ _ WTI WTRS).
+  intro WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
+  destruct (Mem.loadv chunk m' a') as [v' |] eqn:Hload.
+  { exploit (exec_moves mv2 env (rs # dst <- Vundef)); eauto.  intros [ls2 [A2 B2]].
+      econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. econstructor. instantiate (1 := a'). rewrite <- F.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto.
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  }
+  { exploit (exec_moves mv2 env (rs # dst <- Vundef)); eauto.  intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. eapply exec_Lload_notrap2. rewrite <- F.
+  apply eval_addressing_preserved. exact symbols_preserved. assumption.
+  eauto.
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  }
+  
+- (* load notrap2 dead *)
+  exploit exec_moves; eauto. intros [ls1 [X Y]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_right. eexact X. econstructor; eauto.
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto.
+  eapply reg_unconstrained_satisf; eauto.
+  intros [enext [U V]].
+  econstructor; eauto.
+  eapply wt_exec_Iload_notrap; eauto.
+  
 (* store *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]].
   exploit add_equations_lessdef; eauto. intros LD. simpl in LD. inv LD.
diff --git a/backend/Asmaux.v b/backend/Asmaux.v
new file mode 100644
index 00000000..1167c34c
--- /dev/null
+++ b/backend/Asmaux.v
@@ -0,0 +1,19 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Asm.
+Require Import AST.
+
+(* Constant only needed by Asmexpandaux.ml *)
+Definition dummy_function := {| fn_code := nil; fn_sig := signature_main |}.
diff --git a/backend/Asmexpandaux.ml b/backend/Asmexpandaux.ml
index f7feb303..1017ce26 100644
--- a/backend/Asmexpandaux.ml
+++ b/backend/Asmexpandaux.ml
@@ -15,6 +15,7 @@
    pseudo-instructions *)
 
 open Asm
+open Asmaux
 open AST
 open Camlcoq
 
@@ -26,7 +27,10 @@ let emit i = current_code := i :: !current_code
 
 (* Generation of fresh labels *)
 
+(* now imported from Asmaux.ml
 let dummy_function = { fn_code = []; fn_sig = signature_main }
+*)
+  
 let current_function = ref dummy_function
 let next_label = ref (None: label option)
 
diff --git a/backend/Bounds.v b/backend/Bounds.v
index 4231d861..d6b67a02 100644
--- a/backend/Bounds.v
+++ b/backend/Bounds.v
@@ -67,7 +67,7 @@ Definition instr_within_bounds (i: instruction) :=
   | Lgetstack sl ofs ty r => slot_within_bounds sl ofs ty /\ mreg_within_bounds r
   | Lsetstack r sl ofs ty => slot_within_bounds sl ofs ty
   | Lop op args res => mreg_within_bounds res
-  | Lload chunk addr args dst => mreg_within_bounds dst
+  | Lload trap chunk addr args dst => mreg_within_bounds dst
   | Lcall sig ros => size_arguments sig <= bound_outgoing b
   | Lbuiltin ef args res =>
        (forall r, In r (params_of_builtin_res res) \/ In r (destroyed_by_builtin ef) -> mreg_within_bounds r)
@@ -104,7 +104,7 @@ Definition record_regs_of_instr (u: RegSet.t) (i: instruction) : RegSet.t :=
   | Lgetstack sl ofs ty r => record_reg u r
   | Lsetstack r sl ofs ty => record_reg u r
   | Lop op args res => record_reg u res
-  | Lload chunk addr args dst => record_reg u dst
+  | Lload trap chunk addr args dst => record_reg u dst
   | Lstore chunk addr args src => u
   | Lcall sig ros => u
   | Ltailcall sig ros => u
@@ -280,7 +280,7 @@ Definition defined_by_instr (r': mreg) (i: instruction) :=
   match i with
   | Lgetstack sl ofs ty r => r' = r
   | Lop op args res => r' = res
-  | Lload chunk addr args dst => r' = dst
+  | Lload trap chunk addr args dst => r' = dst
   | Lbuiltin ef args res => In r' (params_of_builtin_res res) \/ In r' (destroyed_by_builtin ef)
   | _ => False
   end.
diff --git a/backend/CSE.v b/backend/CSE.v
index ecfa1f9e..838d96a6 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -459,8 +459,10 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
           before
       | Iop op args res s =>
           add_op before res op args
-      | Iload chunk addr args dst s =>
-          add_load before dst chunk addr args
+      | Iload TRAP chunk addr args dst s =>
+        add_load before dst chunk addr args
+      | Iload NOTRAP _ _ _ dst _ =>
+        set_unknown before dst
       | Istore chunk addr args src s =>
           let app := approx!!pc in
           let n := kill_loads_after_store app before chunk addr args in
@@ -491,10 +493,10 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
               | _ =>
                   empty_numbering
               end
-          | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ =>
+          | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ | EF_profiling _ _ =>
               set_res_unknown before res
           end
-      | Icond cond args ifso ifnot =>
+      | Icond cond args ifso ifnot _ =>
           before
       | Ijumptable arg tbl =>
           before
@@ -534,23 +536,23 @@ Definition transf_instr (n: numbering) (instr: instruction) :=
             let (op', args') := reduce _ combine_op n1 op args vl in
             Iop op' args' res s
         end
-  | Iload chunk addr args dst s =>
+  | Iload TRAP chunk addr args dst s =>
       let (n1, vl) := valnum_regs n args in
       match find_rhs n1 (Load chunk addr vl) with
       | Some r =>
           Iop Omove (r :: nil) dst s
       | None =>
           let (addr', args') := reduce _ combine_addr n1 addr args vl in
-          Iload chunk addr' args' dst s
+          Iload TRAP chunk addr' args' dst s
       end
   | Istore chunk addr args src s =>
       let (n1, vl) := valnum_regs n args in
       let (addr', args') := reduce _ combine_addr n1 addr args vl in
       Istore chunk addr' args' src s
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 i =>
       let (n1, vl) := valnum_regs n args in
       let (cond', args') := reduce _ combine_cond n1 cond args vl in
-      Icond cond' args' s1 s2
+      Icond cond' args' s1 s2 i
   | _ =>
       instr
   end.
diff --git a/backend/CSE2.v b/backend/CSE2.v
new file mode 100644
index 00000000..3042645e
--- /dev/null
+++ b/backend/CSE2.v
@@ -0,0 +1,413 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+Replace available expressions by the register containing their value.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+
+(* Static analysis *)
+
+Inductive sym_val : Type :=
+| SMove (src : reg)
+| SOp (op : operation) (args : list reg)
+| SLoad (chunk : memory_chunk) (addr : addressing) (args : list reg).
+                                                   
+Definition eq_args (x y : list reg) : { x = y } + { x <> y } :=
+  list_eq_dec peq x y.
+
+Definition eq_sym_val : forall x y : sym_val,
+    {x = y} + { x <> y }.
+Proof.
+  generalize eq_operation.
+  generalize eq_args.
+  generalize peq.
+  generalize eq_addressing.
+  generalize chunk_eq.
+  decide equality.
+Defined.
+
+Module RELATION <: SEMILATTICE_WITHOUT_BOTTOM.
+  
+Definition t := (PTree.t sym_val).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty sym_val.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition sym_val_beq (x y : sym_val) :=
+  if eq_sym_val x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq sym_val_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct sym_val_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold sym_val_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (eq_sym_val R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if eq_sym_val v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (eq_sym_val _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (eq_sym_val _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill_sym_val (dst : reg) (sv : sym_val) :=
+  match sv with
+  | SMove src => if peq dst src then true else false
+  | SOp op args => List.existsb (peq dst) args
+  | SLoad chunk addr args => List.existsb (peq dst) args
+  end.
+                                                 
+Definition kill_reg (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => negb (kill_sym_val dst x))
+                (PTree.remove dst rel).
+  
+Definition kill_sym_val_mem (sv: sym_val) :=
+  match sv with
+  | SMove _ => false
+  | SOp op _ => op_depends_on_memory op
+  | SLoad _ _ _ => true
+  end.
+
+Definition kill_sym_val_store chunk addr args (sv: sym_val) :=
+  match sv with
+  | SMove _ => false
+  | SOp op _ => op_depends_on_memory op
+  | SLoad chunk' addr' args' => may_overlap chunk addr args chunk' addr' args'
+  end.
+
+Definition kill_mem (rel : RELATION.t) :=
+  PTree.filter1 (fun x => negb (kill_sym_val_mem x)) rel.
+
+Definition forward_move (rel : RELATION.t) (x : reg) : reg :=
+  match rel ! x with
+  | Some (SMove org) => org
+  | _ => x
+  end.
+
+Definition kill_store1 chunk addr args rel :=
+  PTree.filter1 (fun x => negb (kill_sym_val_store chunk addr args x)) rel.
+
+Definition kill_store chunk addr args rel :=
+  kill_store1 chunk addr (List.map (forward_move rel) args) rel.
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (SMove (forward_move rel src)) (kill_reg dst rel).
+
+Definition find_op_fold op args (already : option reg) x sv :=
+                match already with
+                | Some found => already
+                | None =>
+                  match sv with
+                  | (SOp op' args') =>
+                    if (eq_operation op op') && (eq_args args args')
+                    then Some x
+                    else None
+                  | _ => None
+                  end
+                end.
+
+Definition find_op (rel : RELATION.t) (op : operation) (args : list reg) :=
+  PTree.fold (find_op_fold op args) rel None.
+
+Definition find_load_fold chunk addr args (already : option reg) x sv :=
+                match already with
+                | Some found => already
+                | None =>
+                  match sv with
+                  | (SLoad chunk' addr' args') =>
+                    if (chunk_eq chunk chunk') &&
+                       (eq_addressing addr addr') &&
+                       (eq_args args args')
+                    then Some x
+                    else None
+                  | _ => None
+                  end
+                end.
+
+Definition find_load (rel : RELATION.t) (chunk : memory_chunk) (addr : addressing) (args : list reg) :=
+  PTree.fold (find_load_fold chunk addr args) rel None.
+
+Definition oper2 (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  let rel' := kill_reg dst rel in
+  PTree.set dst (SOp op (List.map (forward_move rel') args)) rel'.
+
+Definition oper1 (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  if List.in_dec peq dst args
+  then kill_reg dst rel
+  else oper2 op dst args rel.
+
+Definition oper (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  match find_op rel op (List.map (forward_move rel) args) with
+  | Some r => move r dst rel
+  | None => oper1 op dst args rel
+  end.
+
+Definition gen_oper (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  match op, args with
+  | Omove, src::nil => move src dst rel
+  | _, _ => oper op dst args rel
+  end.
+
+Definition load2 (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  let rel' := kill_reg dst rel in
+  PTree.set dst (SLoad chunk addr (List.map (forward_move rel') args)) rel'.
+
+Definition load1 (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  if List.in_dec peq dst args
+  then kill_reg dst rel
+  else load2 chunk addr dst args rel.
+
+Definition load (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  match find_load rel chunk addr (List.map (forward_move rel) args) with
+  | Some r => move r dst rel
+  | None => load1 chunk addr dst args rel
+  end.
+
+Definition kill_builtin_res res rel :=
+  match res with
+  | BR r => kill_reg r rel
+  | _ => rel
+  end.
+
+Definition apply_external_call ef (rel : RELATION.t) : RELATION.t :=
+  match ef with
+  | EF_builtin name sg
+  | EF_runtime name sg =>
+    match Builtins.lookup_builtin_function name sg with
+    | Some bf => rel
+    | None => kill_mem rel
+    end
+  | EF_malloc (* would need lessdef *)
+  | EF_external _ _
+  | EF_vstore _ 
+  | EF_free (* would need lessdef? *)
+  | EF_memcpy _ _ (* FIXME *)
+  | EF_inline_asm _ _ _ => kill_mem rel
+  | _ => rel
+  end.
+                              
+Definition apply_instr instr (rel : RELATION.t) : RB.t :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _ => Some rel
+  | Istore chunk addr args _ _ => Some (kill_store chunk addr args rel)
+  | Iop op args dst _ => Some (gen_oper op dst args rel)
+  | Iload trap chunk addr args dst _ => Some (load chunk addr dst args rel)
+  | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
+  | Ibuiltin ef _ res _ => Some (kill_builtin_res res (apply_external_call ef rel))
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition forward_move_b (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => forward_move rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => forward_move_b (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition find_op_in_fmap fmap pc op args :=
+  match fmap with
+  | None => None
+  | Some map =>
+    match PMap.get pc map with
+    | Some rel => find_op rel op args
+    | None => None
+    end
+  end.
+
+Definition find_load_in_fmap fmap pc chunk addr args :=
+  match fmap with
+  | None => None
+  | Some map =>
+    match PMap.get pc map with
+    | Some rel => find_load rel chunk addr args
+    | None => None
+    end
+  end.
+
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    let args' := subst_args fmap pc args in
+    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    | None => Iop op args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Iload trap chunk addr args dst s =>
+    let args' := subst_args fmap pc args in
+    match find_load_in_fmap fmap pc chunk addr args' with
+    | None => Iload trap chunk addr args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) (subst_arg fmap pc src) s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 i =>
+    Icond cond (subst_args fmap pc args) s1 s2 i
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v
new file mode 100644
index 00000000..252240c9
--- /dev/null
+++ b/backend/CSE2proof.v
@@ -0,0 +1,1755 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+Replace available expressions by the register containing their value.
+
+Proofs.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps CSE2depsproof.
+Require Import Lia.
+
+Lemma args_unaffected:
+  forall rs : regset,
+  forall dst : reg,
+  forall v,
+  forall args : list reg,
+    existsb (fun y : reg => peq dst y) args = false ->
+    (rs # dst <- v ## args) = (rs ## args).
+Proof.
+  induction args; simpl; trivial.
+  destruct (peq dst a) as [EQ | NEQ]; simpl.
+  { discriminate.
+  }
+  intro EXIST.
+  f_equal.
+  {
+    apply Regmap.gso.
+    congruence.
+  }
+  apply IHargs.
+  assumption.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section SAME_MEMORY.
+  Variable m : mem.
+
+Definition sem_sym_val sym rs (v : option val) : Prop :=
+  match sym with
+  | SMove src => v = Some (rs # src)
+  | SOp op args =>
+    v = (eval_operation genv sp op (rs ## args) m)
+  | SLoad chunk addr args =>
+    match eval_addressing genv sp addr rs##args with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => v = Some dat
+                | None => v = None \/ v = Some Vundef
+                end
+    | None   => v = None \/ v = Some Vundef
+    end
+  end.
+    
+Definition sem_reg (rel : RELATION.t) (x : reg) (rs : regset) (v : val) : Prop :=
+  match rel ! x with
+  | None => True
+  | Some sym => sem_sym_val sym rs (Some (rs # x)) 
+  end.
+
+Definition sem_rel (rel : RELATION.t) (rs : regset) :=
+  forall x : reg, (sem_reg rel x rs (rs # x)).
+
+Definition sem_rel_b (relb : RB.t) (rs : regset) :=
+  match relb with
+  | Some rel => sem_rel rel rs
+  | None => False
+  end.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => sem_rel_b (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold sem_rel_b, sem_rel, sem_reg in *.
+  destruct (map # pc).
+  2: contradiction.
+  pose proof (SEM arg) as SEMarg.
+  simpl. unfold forward_move.
+  unfold sem_sym_val in *.
+  destruct (t ! arg); trivial.
+  destruct s; congruence.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_reg_sound :
+  forall rel : RELATION.t,
+  forall dst : reg,
+  forall rs,
+  forall v,
+    sem_rel rel rs ->
+    sem_rel (kill_reg dst rel) (rs # dst <- v).
+Proof.
+  unfold sem_rel, kill_reg, sem_reg, sem_sym_val.
+  intros until v.
+  intros REL x.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec dst x).
+  {
+    subst x.
+    rewrite PTree.grs.
+    trivial.
+  }
+  rewrite PTree.gro by congruence.
+  rewrite Regmap.gso by congruence.
+  destruct (rel ! x) as [relx | ] eqn:RELx; trivial.
+  unfold kill_sym_val.
+  pose proof (REL x) as RELinstx.
+  rewrite RELx in RELinstx.
+  destruct relx eqn:SYMVAL.
+  {
+    destruct (peq dst src); simpl.
+    { reflexivity. }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  { destruct existsb eqn:EXISTS; simpl.
+    { reflexivity. }
+    rewrite args_unaffected by exact EXISTS.
+    assumption.
+  }
+  { destruct existsb eqn:EXISTS; simpl.
+    { reflexivity. }
+    rewrite args_unaffected by exact EXISTS.
+    assumption.
+  }
+Qed.
+
+Lemma write_same:
+  forall rs : regset,
+  forall src dst : reg,
+    (rs # dst <- (rs # src)) # src = rs # src.
+Proof.
+  intros.
+  destruct (peq src dst).
+  {
+    subst dst.
+    apply Regmap.gss.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+
+Lemma move_sound :
+  forall rel : RELATION.t,
+  forall src dst : reg,
+  forall rs,
+    sem_rel rel rs ->
+    sem_rel (move src dst rel) (rs # dst <- (rs # src)).
+Proof.
+  intros until rs. intros REL x.
+  pose proof (kill_reg_sound rel dst rs (rs # src) REL x) as KILL.
+  pose proof (REL src) as RELsrc.
+  unfold move.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    unfold sem_reg in *.
+    simpl.
+    unfold forward_move.
+    destruct (rel ! src) as [ sv |]; simpl.
+    destruct sv eqn:SV; simpl in *.
+    {
+      destruct (peq dst src0).
+      {
+        subst src0.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    all: f_equal; symmetry; apply write_same.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma move_cases_neq:
+  forall dst rel a,
+    a <> dst ->
+    (forward_move (kill_reg dst rel) a) <> dst.
+Proof.
+  intros until a. intro NEQ.
+  unfold kill_reg, forward_move.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! a); simpl.
+  2: congruence.
+  destruct s.
+  {
+    unfold kill_sym_val.
+    destruct peq; simpl; congruence.
+  }
+  all: simpl;
+    destruct negb; simpl; congruence.
+Qed.
+
+Lemma args_replace_dst :
+  forall rel,
+  forall args : list reg,
+  forall dst : reg,
+  forall rs : regset,
+  forall v,
+    (sem_rel rel rs) ->
+    not (In dst args) ->
+    (rs # dst <- v)
+    ## (map
+          (forward_move (kill_reg dst rel)) args) = rs ## args.
+Proof.
+  induction args; simpl.
+  1: reflexivity.
+  intros until v.
+  intros REL NOT_IN.
+  rewrite IHargs by auto.
+  f_equal.
+  pose proof (REL a) as RELa.
+  rewrite Regmap.gso by (apply move_cases_neq; auto).
+  unfold kill_reg.
+  unfold sem_reg in RELa.
+  unfold forward_move.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by auto.
+  destruct (rel ! a); simpl; trivial.
+  destruct s; simpl in *; destruct negb; simpl; congruence.
+Qed.
+
+Lemma oper2_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper2 op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL NOT_IN EVAL x.
+  pose proof (kill_reg_sound rel dst rs v REL x) as KILL.
+  unfold oper2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    symmetry.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma oper1_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper1 op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold oper1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply oper2_sound; auto.
+Qed.
+
+Lemma find_op_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_op rel op args = Some src ->
+    (eval_operation genv sp op (rs ## args) m) = Some (rs # src).
+Proof.
+  intros until rs.
+  unfold find_op.
+  rewrite PTree.fold_spec.
+  intro REL.
+  assert (
+     forall start,
+             match start with
+             | None => True
+             | Some src => eval_operation genv sp op rs ## args m = Some rs # src
+             end -> fold_left
+    (fun (a : option reg) (p : positive * sym_val) =>
+     find_op_fold op args a (fst p) (snd p)) (PTree.elements rel) start =
+                    Some src ->
+             eval_operation genv sp op rs ## args m = Some rs # src) as REC.
+  {
+    unfold sem_rel, sem_reg in REL.
+    generalize (PTree.elements_complete rel).
+    generalize (PTree.elements rel).
+    induction l; simpl.
+    {
+      intros.
+      subst start.
+      assumption.
+    }
+    destruct a as [r sv]; simpl.
+    intros COMPLETE start GEN.
+    apply IHl.
+    {
+      intros.
+      apply COMPLETE.
+      right.
+      assumption.
+    }
+    unfold find_op_fold.
+    destruct start.
+    assumption.
+    destruct sv; trivial.
+    destruct eq_operation; trivial.
+    subst op0.
+    destruct eq_args; trivial.
+    subst args0.
+    simpl.
+    assert ((rel ! r) = Some (SOp op args)) as RELatr.
+    {
+      apply COMPLETE.
+      left.
+      reflexivity.
+    }
+    pose proof (REL r) as RELr.
+    rewrite RELatr in RELr.
+    simpl in RELr.
+    symmetry.
+    assumption.
+  }
+  apply REC; auto.
+Qed.
+
+
+Lemma find_load_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    match eval_addressing genv sp addr rs##args with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end.
+Proof.
+  intros until rs.
+  unfold find_load.
+  rewrite PTree.fold_spec.
+  intro REL.
+  assert (
+     forall start,
+             match start with
+             | None => True
+             | Some src =>
+               match eval_addressing genv sp addr rs##args with
+               | Some a => match Mem.loadv chunk m a with
+                           | Some dat => rs#src = dat
+                           | None => rs#src = Vundef
+                           end
+               | None   => rs#src = Vundef
+               end
+             end ->
+    fold_left
+    (fun (a : option reg) (p : positive * sym_val) =>
+     find_load_fold chunk addr args a (fst p) (snd p)) (PTree.elements rel) start =
+    Some src ->
+    match eval_addressing genv sp addr rs##args with
+               | Some a => match Mem.loadv chunk m a with
+                           | Some dat => rs#src = dat
+                           | None => rs#src = Vundef
+                           end
+               | None   => rs#src = Vundef
+               end) as REC.
+  
+  {
+    unfold sem_rel, sem_reg in REL.
+    generalize (PTree.elements_complete rel).
+    generalize (PTree.elements rel).
+    induction l; simpl.
+    {
+      intros.
+      subst start.
+      assumption.
+    }
+    destruct a as [r sv]; simpl.
+    intros COMPLETE start GEN.
+    apply IHl.
+    {
+      intros.
+      apply COMPLETE.
+      right.
+      assumption.
+    }
+    unfold find_load_fold.
+    destruct start.
+    assumption.
+    destruct sv; trivial.
+    destruct chunk_eq; trivial.
+    subst chunk0.
+    destruct eq_addressing; trivial.
+    subst addr0.
+    destruct eq_args; trivial.
+    subst args0.
+    simpl.
+    assert ((rel ! r) = Some (SLoad chunk addr args)) as RELatr.
+    {
+      apply COMPLETE.
+      left.
+      reflexivity.
+    }
+    pose proof (REL r) as RELr.
+    rewrite RELatr in RELr.
+    simpl in RELr.
+    destruct eval_addressing.
+    { destruct Mem.loadv.
+      congruence.
+      destruct RELr; congruence.
+    }
+    destruct RELr; congruence.
+  }
+  apply REC; auto.
+Qed.
+
+
+Lemma find_load_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    v = rs # src.
+Proof.
+  intros until v. intros REL FINDLOAD ADDR LOAD.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  destruct eval_addressing in *.
+  {
+    replace a with v0 in * by congruence.
+    destruct Mem.loadv in * ; congruence.
+  }
+  discriminate.
+Qed.
+
+Lemma find_load_notrap1_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = None ->
+    rs # src = Vundef.
+Proof.
+  intros until rs. intros REL FINDLOAD ADDR.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  rewrite ADDR in Z.
+  assumption.
+Qed.
+
+Lemma find_load_notrap2_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = Some a ->
+    Mem.loadv chunk m a = None ->
+    rs # src = Vundef.
+Proof.
+  intros until a. intros REL FINDLOAD ADDR LOAD.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  rewrite ADDR in Z.
+  destruct Mem.loadv.
+  discriminate.
+  assumption.
+Qed.
+
+Lemma forward_move_map:
+  forall rel args rs,
+    sem_rel rel rs ->
+    rs ## (map (forward_move rel) args) = rs ## args.
+Proof.
+  induction args; simpl; trivial.
+  intros rs REL.
+  f_equal.
+  2: (apply IHargs; assumption).
+  unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+  pose proof (REL a) as RELa.
+  destruct (rel ! a); trivial.
+  destruct s; congruence.
+Qed.
+
+
+Lemma forward_move_rs:
+  forall rel arg rs,
+    sem_rel rel rs ->
+    rs # (forward_move rel arg) = rs # arg.
+Proof.
+  unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+  intros until rs.
+  intro REL.
+  pose proof (REL arg) as RELarg.
+  destruct (rel ! arg); trivial.
+  destruct s; congruence.
+Qed.
+
+Lemma oper_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold oper.
+  destruct find_op eqn:FIND.
+  {
+    assert (eval_operation genv sp op rs ## (map (forward_move rel) args) m = Some rs # r) as FIND_OP.
+    {
+      apply (find_op_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_OP by assumption.
+    replace v with (rs # r) by congruence.
+    apply move_sound; auto.
+  }
+  apply oper1_sound; trivial.
+Qed.
+
+Lemma gen_oper_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (gen_oper op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold gen_oper.
+  destruct op.
+  { destruct args as [ | h0 t0].
+    apply oper_sound; auto.
+    destruct t0.
+    {
+      simpl in *.
+      replace v with (rs # h0) by congruence.
+      apply move_sound; auto.
+    }
+    apply oper_sound; auto.
+  }
+  all: apply oper_sound; auto.
+Qed.
+
+
+Lemma load2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL NOT_IN ADDR LOAD x.
+  pose proof (kill_reg_sound rel dst rs v REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    destruct eval_addressing.
+    {
+      replace a with v0 in * by congruence.
+      destruct Mem.loadv; congruence.
+    }
+    discriminate.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load2_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL NOT_IN ADDR x.
+  pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    rewrite ADDR.
+    right.
+    trivial.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load2_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL NOT_IN ADDR LOAD x.
+  pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    rewrite ADDR.
+    rewrite LOAD.
+    right; trivial.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_sound with (a := a); auto.
+Qed.
+
+Lemma load1_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_notrap1_sound; auto.
+Qed.
+
+Lemma load1_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_notrap2_sound with (a := a); auto.
+Qed.
+
+Lemma load_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL ADDR LOAD.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    destruct eval_addressing in *.
+    2: discriminate.
+    replace v0 with a in * by congruence.
+    destruct Mem.loadv in *.
+    2: discriminate.
+    replace v with (rs # src) by congruence.
+    apply move_sound; auto.
+  }
+  apply load1_sound with (a := a); trivial.
+Qed.
+
+Lemma load_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL ADDR.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    destruct eval_addressing in *.
+    discriminate.
+    rewrite <- FIND_LOAD.
+    apply move_sound; auto.
+  }
+  apply load1_notrap1_sound; trivial.
+Qed.
+
+Lemma load_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL ADDR.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    rewrite ADDR in FIND_LOAD.
+    destruct Mem.loadv; intro.
+    discriminate.
+    rewrite <- FIND_LOAD.
+    apply move_sound; auto.
+  }
+  apply load1_notrap2_sound; trivial.
+Qed.
+
+Lemma kill_reg_weaken:
+  forall res mpc rs,
+    sem_rel mpc rs ->
+    sem_rel (kill_reg res mpc) rs.
+Proof.
+  intros until rs.
+  intros REL x.
+  pose proof (REL x) as RELx.
+  unfold kill_reg, sem_reg in *.
+  rewrite PTree.gfilter1.
+  destruct (peq res x).
+  { subst x.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence.
+  destruct (mpc ! x) as [sv | ]; trivial.
+  destruct negb; trivial.
+Qed.
+
+Lemma top_ok:
+  forall rs, sem_rel RELATION.top rs.
+Proof.
+  unfold sem_rel, sem_reg, RELATION.top.
+  intros.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma sem_rel_ge:
+  forall r1 r2 : RELATION.t,
+    (RELATION.ge r1 r2) ->
+    forall rs : regset,
+      (sem_rel r2 rs) -> (sem_rel r1 rs).
+Proof.
+  intros r1 r2 GE rs RE x.
+  pose proof (RE x) as REx.
+  pose proof (GE x) as GEx.
+  unfold sem_reg in *.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+End SAME_MEMORY.
+
+Lemma kill_mem_sound :
+  forall m m' : mem,
+  forall rel : RELATION.t,
+  forall rs,
+    sem_rel m rel rs -> sem_rel m' (kill_mem rel) rs.
+Proof.
+  unfold sem_rel, sem_reg.
+  intros until rs.
+  intros SEM x.
+  pose proof (SEM x) as SEMx.
+  unfold kill_mem.
+  rewrite PTree.gfilter1.
+  unfold kill_sym_val_mem.
+  destruct (rel ! x) as [ sv | ].
+  2: reflexivity.
+  destruct sv; simpl in *; trivial.
+  {
+    destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+    rewrite SEMx.
+    apply op_depends_on_memory_correct; auto.
+  }
+Qed.
+  
+Lemma kill_store_sound :
+  forall m m' : mem,
+  forall rel : RELATION.t,
+  forall chunk addr args a v rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (Mem.storev chunk m a v) = Some m' ->
+    sem_rel m rel rs -> sem_rel m' (kill_store chunk addr args rel) rs.
+Proof.
+  unfold sem_rel, sem_reg.
+  intros until rs.
+  intros ADDR STORE SEM x.
+  pose proof (SEM x) as SEMx.
+  unfold kill_store, kill_store1.
+  rewrite PTree.gfilter1.
+  destruct (rel ! x) as [ sv | ].
+  2: reflexivity.
+  destruct sv; simpl in *; trivial.
+  {
+    destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+    rewrite SEMx.
+    apply op_depends_on_memory_correct; auto.
+  }
+  destruct may_overlap eqn:OVERLAP; simpl; trivial.
+  destruct (eval_addressing genv sp addr0 rs ## args0) eqn:ADDR0.
+  {
+    erewrite may_overlap_sound with (args := (map (forward_move rel) args)).
+    all: try eassumption.
+    
+    erewrite forward_move_map by eassumption.
+    assumption.
+  }
+  intuition congruence.
+Qed.
+
+Lemma kill_builtin_res_sound:
+  forall res (m : mem) (rs : regset) vres (rel : RELATION.t)
+         (REL : sem_rel m rel rs),
+         (sem_rel m (kill_builtin_res res rel) (regmap_setres res vres rs)).
+Proof.
+  destruct res; simpl; intros; trivial.
+  apply kill_reg_sound; trivial.
+Qed.
+End SOUNDNESS.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun cu f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. apply match_transform_program; auto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall (f : function) (pc : node) (i : instruction),
+  (fn_code f)!pc = Some i ->
+  (fn_code (transf_function f))!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill_reg res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Lemma external_call_sound:
+  forall ef (rel : RELATION.t) sp (m m' : mem) (rs : regset) vargs t vres
+         (REL : sem_rel fundef unit ge sp m rel rs)
+         (CALL : external_call ef ge vargs m t vres m'),
+    sem_rel fundef unit ge sp m' (apply_external_call ef rel) rs.
+Proof.
+  destruct ef; intros; simpl in *.
+  all: eauto using kill_mem_sound.
+  all: unfold builtin_or_external_sem in *.
+  1, 2: destruct (Builtins.lookup_builtin_function name sg);
+    eauto using kill_mem_sound;
+    inv CALL; eauto using kill_mem_sound.
+  all: inv CALL.
+  all: eauto using kill_mem_sound.
+Qed.
+  
+Definition sem_rel_b' := sem_rel_b fundef unit ge.
+Definition fmap_sem' := fmap_sem fundef unit ge.
+Definition subst_arg_ok' := subst_arg_ok fundef unit ge.
+Definition subst_args_ok' := subst_args_ok fundef unit ge.
+Definition kill_mem_sound' := kill_mem_sound fundef unit ge.
+Definition kill_store_sound' := kill_store_sound fundef unit ge.
+
+Lemma sem_rel_b_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall sp m,
+    forall rs : regset,
+      (sem_rel_b' sp m rb2 rs) -> (sem_rel_b' sp m rb1 rs).
+Proof.
+  unfold sem_rel_b', sem_rel_b.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ]; simpl;
+      intros GE sp m rs RE; try contradiction.
+  apply sem_rel_ge with (r2 := r2); assumption.
+Qed.
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (forall m : mem,
+     forall vres, (fmap_sem' sp m (forward_map f) pc rs # res <- vres)) ->
+    match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem' sp m (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  unfold transf_instr in *.
+  destruct (if is_trivial_op op then None else find_op_in_fmap (forward_map f) pc op
+               (subst_args (forward_map f) pc args)) eqn:FIND_OP.
+  {
+    destruct (is_trivial_op op).
+    discriminate.
+    unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        rewrite MAP in H0.
+        rewrite find_op_sound with (rel := mpc) (src := r) in H0 by assumption.
+        assumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (gen_oper op res args mpc)).
+    {
+      replace (Some (gen_oper op res args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply gen_oper_sound; auto.
+  }
+  {
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      rewrite (subst_args_ok' sp m) by assumption.
+      rewrite <- H0.
+      apply eval_operation_preserved. exact symbols_preserved.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: constructor.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: contradiction.
+
+    apply sem_rel_b_ge with (rb2 := Some (gen_oper op res args mpc)).
+    {
+      replace (Some (gen_oper op res args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply gen_oper_sound; auto.
+  }
+    
+(* load *)
+- unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        symmetry.
+        rewrite MAP in H0.
+        eapply find_load_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_sound with (a := a); auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_sound with (a := a); assumption.
+  }
+  
+- (* load notrap1 *)
+  unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := Vundef); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        rewrite MAP in H0.
+        eapply find_load_notrap1_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_notrap1_sound; auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_notrap1_sound; assumption.
+  }
+  
+(* load notrap2 *)
+- unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := Vundef); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        rewrite MAP in H0.
+        eapply find_load_notrap2_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: try eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_notrap2_sound with (a := a); auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_notrap2_sound with (a := a); assumption.
+  }
+  
+- (* store *)
+  econstructor. split.
+  {
+    assert (eval_addressing tge sp addr rs ## args = Some a).
+    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+    eapply exec_Istore; eauto.
+    - rewrite (subst_args_ok' sp m) by assumption.
+      eassumption.
+    - rewrite (subst_arg_ok' sp m) by assumption.
+      eassumption.
+  }
+  
+  constructor; auto.
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (kill_store chunk addr args mpc)); trivial.
+  {
+  replace (Some (kill_store chunk addr args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  rewrite MPC.
+  rewrite H.
+  reflexivity.
+  }
+  eapply (kill_store_sound' sp m); eassumption.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite (subst_args_ok' sp m) by assumption.
+  constructor. constructor; auto.
+
+  constructor.
+  {
+    intros m' vres.
+    unfold fmap_sem', fmap_sem in *.
+    destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+    destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+    apply sem_rel_b_ge with (rb2 := Some (kill_reg res (kill_mem mpc))).
+    {
+      replace (Some (kill_reg res (kill_mem mpc))) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    apply kill_reg_sound.
+    apply (kill_mem_sound' sp m).
+    assumption.
+  }
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite (subst_args_ok' (Vptr stk Ptrofs.zero) m) by assumption.
+  constructor. auto.
+
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply sem_rel_b_ge with (rb2 := Some (kill_builtin_res res (apply_external_call ef mpc))).
+  {
+    replace (Some (kill_builtin_res res (apply_external_call ef mpc))) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_builtin_res_sound.
+  eapply external_call_sound with (m := m); eassumption.
+
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite (subst_args_ok' sp m); eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite (subst_arg_ok' sp m); eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite (subst_arg_ok' (Vptr stk Ptrofs.zero) m) by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/CSE3.v b/backend/CSE3.v
new file mode 100644
index 00000000..2f73a1a7
--- /dev/null
+++ b/backend/CSE3.v
@@ -0,0 +1,151 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+Require Import CSE3analysis HashedSet.
+Require Import RTLtyping.
+Require Compopts.
+
+Local Open Scope error_monad_scope.
+
+Axiom preanalysis : typing_env -> RTL.function -> invariants * analysis_hints.
+
+Record cse3params : Type :=
+  mkcse3params
+    { cse3_conditions : bool;
+      cse3_operations : bool;
+      cse3_trivial_ops: bool;
+    }.
+
+Section PARAMS.
+  Variable params : cse3params.
+
+Section REWRITE.
+  Context {ctx : eq_context}.
+
+Definition find_op_in_fmap fmap pc op args :=
+  match PMap.get pc fmap with
+  | Some rel => rhs_find (ctx:=ctx) pc (SOp op) args rel
+  | None => None
+  end.
+
+Definition find_load_in_fmap fmap pc chunk addr args :=
+  match PMap.get pc fmap with
+  | Some rel => rhs_find (ctx:=ctx) pc (SLoad chunk addr) args rel
+  | None => None
+  end.
+
+Definition forward_move_b (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => forward_move (ctx := ctx) rel x
+  end.
+
+Definition subst_arg (fmap : PMap.t RB.t) (pc : node) (x : reg) : reg :=
+  forward_move_b (PMap.get pc fmap) x.
+
+Definition forward_move_l_b (rb : RB.t) (xl : list reg) :=
+  match rb with
+  | None => xl
+  | Some rel => forward_move_l (ctx := ctx) rel xl
+  end.
+
+Definition subst_args fmap pc xl :=
+  forward_move_l_b (PMap.get pc fmap) xl.
+
+Definition find_cond_in_fmap fmap pc cond args :=
+  if params.(cse3_conditions)
+  then
+    match PMap.get pc fmap with
+    | Some rel =>
+      if is_condition_present (ctx:=ctx) pc rel cond args
+      then Some true
+      else
+        let ncond := negate_condition cond in
+        if is_condition_present (ctx:=ctx) pc rel ncond args
+        then Some false
+        else let args' := subst_args fmap pc args in
+             if is_condition_present (ctx:=ctx) pc rel cond args'
+             then Some true
+             else if is_condition_present (ctx:=ctx) pc rel ncond args'
+                  then Some false
+                  else None
+    | None => None
+    end
+  else None.
+
+Definition param_transf_instr (fmap : PMap.t RB.t)
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    let args' := subst_args fmap pc args in
+    match (if (negb params.(cse3_operations) || ((negb params.(cse3_trivial_ops)) && (is_trivial_op op)))
+           then None else find_op_in_fmap fmap pc op args') with
+    | None => Iop op args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Iload trap chunk addr args dst s =>
+    let args' := subst_args fmap pc args in
+    match find_load_in_fmap fmap pc chunk addr args' with
+    | None => Iload trap chunk addr args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) (subst_arg fmap pc src) s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 expected =>
+    let args' := subst_args fmap pc args in
+    match find_cond_in_fmap fmap pc cond args with
+    | None => Icond cond args' s1 s2 expected
+    | Some b => Inop (if b then s1 else s2)
+    end
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+End REWRITE.
+
+Definition param_transf_function (f: function) : res function :=
+  do tenv <- type_function f;
+  let (invariants, hints) := preanalysis tenv f in
+  let ctx := context_from_hints hints in
+  if check_inductiveness (ctx:=ctx) f tenv invariants
+  then
+    OK {| fn_sig := f.(fn_sig);
+          fn_params := f.(fn_params);
+          fn_stacksize := f.(fn_stacksize);
+          fn_code := PTree.map (param_transf_instr (ctx := ctx) invariants)
+                               f.(fn_code);
+          fn_entrypoint := f.(fn_entrypoint) |}
+  else Error (msg "cse3: not inductive").
+
+Definition param_transf_fundef (fd: fundef) : res fundef :=
+  AST.transf_partial_fundef param_transf_function fd.
+
+Definition param_transf_program (p: program) : res program :=
+  transform_partial_program param_transf_fundef p.
+
+End PARAMS.
+
+Definition cmdline_params (_ : unit) :=
+  {| cse3_conditions := Compopts.optim_CSE3_conditions tt;
+     cse3_operations := true;
+     cse3_trivial_ops:= Compopts.optim_CSE3_trivial_ops tt |}.
+
+Definition transf_program p := param_transf_program (cmdline_params tt) p.
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v
new file mode 100644
index 00000000..9a6c9c0d
--- /dev/null
+++ b/backend/CSE3analysis.v
@@ -0,0 +1,566 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+Require Import HashedSet.
+Require List Compopts.
+
+Definition typing_env := reg -> typ.
+
+Definition loadv_storev_compatible_type
+           (chunk : memory_chunk) (ty : typ) : bool :=
+  match chunk, ty with
+  | Mint32, Tint
+  | Mint64, Tlong
+  | Mfloat32, Tsingle
+  | Mfloat64, Tfloat => true
+  | _, _ => false
+  end.
+
+Module RELATION <: SEMILATTICE_WITHOUT_BOTTOM.
+  Definition t := PSet.t.
+  Definition eq (x : t) (y : t) := x = y.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq. trivial.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq. congruence.
+  Qed.
+
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq. congruence.
+  Qed.
+  
+  Definition beq (x y : t) := if PSet.eq x y then true else false.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq.
+    intros.
+    destruct PSet.eq; congruence.
+  Qed.
+  
+  Definition ge (x y : t) := (PSet.is_subset x y) = true.
+
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    intros.
+    subst y.
+    apply PSet.is_subset_spec.
+    trivial.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    intros.
+    rewrite PSet.is_subset_spec in *.
+    intuition.
+  Qed.
+  
+  Definition lub x y :=
+    if Compopts.optim_CSE3_across_merges tt
+    then PSet.inter x y
+    else
+      if PSet.eq x y
+      then x
+      else PSet.empty.
+  
+  Definition glb := PSet.union.
+  
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    intros.
+    destruct (Compopts.optim_CSE3_across_merges tt).
+    - apply PSet.is_subset_spec.
+      intro.
+      rewrite PSet.ginter.
+      rewrite andb_true_iff.
+      intuition.
+    - apply PSet.is_subset_spec.
+      intro.
+      destruct (PSet.eq x y).
+      + auto.
+      + rewrite PSet.gempty.
+        discriminate.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    intros.
+    destruct (Compopts.optim_CSE3_across_merges tt).
+    - apply PSet.is_subset_spec.
+      intro.
+      rewrite PSet.ginter.
+      rewrite andb_true_iff.
+      intuition.
+    - apply PSet.is_subset_spec.
+      intro.
+      destruct (PSet.eq x y).
+      + subst. auto.
+      + rewrite PSet.gempty.
+        discriminate.
+  Qed.
+
+  Definition top := PSet.empty.
+End RELATION.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Inductive sym_op : Type :=
+| SOp : operation -> sym_op
+| SLoad : memory_chunk -> addressing -> sym_op.
+
+Definition eq_dec_sym_op : forall s s' : sym_op, {s = s'} + {s <> s'}.
+Proof.
+  generalize eq_operation.
+  generalize eq_addressing.
+  generalize chunk_eq.
+  decide equality.
+Defined.
+
+Definition eq_dec_args : forall l l' : list reg, { l = l' } + { l <> l' }.
+Proof.
+  apply List.list_eq_dec.
+  exact peq.
+Defined.
+
+Inductive equation_or_condition :=
+| Equ : reg -> sym_op -> list reg -> equation_or_condition
+| Cond : condition -> list reg -> equation_or_condition.
+
+Definition eq_dec_equation :
+  forall eq eq' : equation_or_condition, {eq = eq'} + {eq <> eq'}.
+Proof.
+  generalize peq.
+  generalize eq_dec_sym_op.
+  generalize eq_dec_args.
+  generalize eq_condition.
+  decide equality.
+Defined.
+
+Definition eq_id := node.
+
+Definition add_i_j (i : reg) (j : eq_id) (m : Regmap.t PSet.t) :=
+  Regmap.set i (PSet.add j (Regmap.get i m)) m.
+
+Definition add_ilist_j (ilist : list reg) (j : eq_id) (m : Regmap.t PSet.t) :=
+  List.fold_left (fun already i => add_i_j i j already) ilist m.
+
+Definition get_reg_kills (eqs : PTree.t equation_or_condition) :
+  Regmap.t PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq_cond : equation_or_condition) =>
+                match eq_cond with
+                | Equ lhs sop args =>
+                  add_i_j lhs eqno
+                          (add_ilist_j args eqno already)
+                | Cond cond args => add_ilist_j args eqno already
+                end) eqs
+             (PMap.init PSet.empty).
+
+Definition eq_cond_depends_on_mem eq_cond :=
+  match eq_cond with
+  | Equ lhs sop args =>
+    match sop with
+    | SLoad _ _ => true
+    | SOp op => op_depends_on_memory op
+    end
+  | Cond cond args => cond_depends_on_memory cond
+  end.
+
+Definition eq_cond_depends_on_store eq_cond :=
+  match eq_cond with
+  | Equ _ (SLoad _ _) _ => true
+  | _ => false
+  end.
+
+Definition get_mem_kills (eqs : PTree.t equation_or_condition) : PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation_or_condition) =>
+                if eq_cond_depends_on_mem eq
+                then PSet.add eqno already
+                else already) eqs PSet.empty.
+
+Definition get_store_kills (eqs : PTree.t equation_or_condition) : PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation_or_condition) =>
+                if eq_cond_depends_on_store eq
+                then PSet.add eqno already
+                else already) eqs PSet.empty.
+
+Definition is_move (op : operation) :
+  { op = Omove } + { op <> Omove }.
+Proof.
+  destruct op; try (right ; congruence).
+  left; trivial.
+Qed.
+
+Definition is_smove (sop : sym_op) :
+  { sop = SOp Omove } + { sop <> SOp Omove }.
+Proof.
+  destruct sop; try (right ; congruence).
+  destruct (is_move o).
+  - left; congruence.
+  - right; congruence.
+Qed.
+
+Definition get_moves (eqs : PTree.t equation_or_condition) :
+  Regmap.t PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation_or_condition) =>
+                match eq with
+                | Equ lhs sop args =>
+                  if is_smove sop
+                  then add_i_j lhs eqno already
+                  else already
+                | _ => already
+                end) eqs (PMap.init PSet.empty).
+  
+Record eq_context := mkeqcontext
+              { eq_catalog : eq_id -> option equation_or_condition;
+                eq_find_oracle : node -> equation_or_condition -> option eq_id;
+                eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t;
+                eq_kill_reg : reg -> PSet.t;
+                eq_kill_mem : unit -> PSet.t;
+                eq_kill_store : unit -> PSet.t;
+                eq_moves : reg -> PSet.t }.
+
+Section OPERATIONS.
+  Context {ctx : eq_context}.
+  
+  Definition kill_reg (r : reg) (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_reg ctx r).
+  
+  Definition kill_mem (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_mem ctx tt).
+
+  Definition pick_source (l : list reg) := (* todo: take min? *)
+    match l with
+    | h::t => Some h
+    | nil => None
+    end.
+  
+  Definition forward_move (rel : RELATION.t)  (x : reg) : reg :=
+    match pick_source (PSet.elements (PSet.inter rel (eq_moves ctx x))) with
+    | None => x
+    | Some eqno =>
+      match eq_catalog ctx eqno with
+      | Some (Equ lhs sop args) =>
+        if is_smove sop && peq x lhs
+        then
+          match args with
+          | src::nil => src
+          | _ => x
+          end
+        else x
+      | _ => x
+      end
+    end.
+
+  Definition forward_move_l (rel : RELATION.t) : list reg -> list reg :=
+    List.map (forward_move rel).
+
+  Section PER_NODE.
+    Variable no : node.
+    
+  Definition eq_find  (eq : equation_or_condition) :=
+    match eq_find_oracle ctx no eq with
+    | Some id =>
+      match eq_catalog ctx id with
+      | Some eq' => if eq_dec_equation eq eq' then Some id else None
+      | None => None
+      end
+    | None => None
+    end.
+
+  Definition is_condition_present
+           (rel : RELATION.t) (cond : condition) (args : list reg) :=
+    match eq_find (Cond cond args) with
+    | Some id => PSet.contains rel id
+    | None => false
+    end.
+
+  Definition rhs_find (sop : sym_op) (args : list reg) (rel : RELATION.t) : option reg :=
+    match pick_source (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel)) with
+    | None => None
+    | Some src =>
+      match eq_catalog ctx src with
+      | Some (Equ eq_lhs eq_sop eq_args) =>
+        if eq_dec_sym_op sop eq_sop && eq_dec_args args eq_args
+        then Some eq_lhs
+        else None
+      | _ => None
+      end
+    end.
+
+  Definition oper2 (dst : reg) (op: sym_op)(args : list reg)
+           (rel : RELATION.t) : RELATION.t :=
+    match eq_find (Equ dst op args) with
+    | Some id =>
+      if PSet.contains rel id
+      then rel
+      else PSet.add id (kill_reg dst rel)
+    | None => kill_reg dst rel
+    end.
+
+  Definition oper1 (dst : reg) (op: sym_op) (args : list reg)
+             (rel : RELATION.t) : RELATION.t :=
+    if List.in_dec peq dst args
+    then kill_reg dst rel
+    else oper2 dst op args rel.
+
+  
+  Definition move (src dst : reg) (rel : RELATION.t) : RELATION.t :=
+    if peq src dst
+    then rel
+    else
+      match eq_find (Equ dst (SOp Omove) (src::nil)) with
+      | Some eq_id => PSet.add eq_id (kill_reg dst rel)
+      | None => kill_reg dst rel
+      end.
+
+  Definition is_trivial_sym_op sop :=
+    match sop with
+    | SOp op => is_trivial_op op
+    | SLoad _ _ => false
+    end.
+  
+  Definition oper (dst : reg) (op: sym_op) (args : list reg)
+             (rel : RELATION.t) : RELATION.t :=
+    if is_smove op
+    then
+      match args with
+      | src::nil =>
+        move (forward_move rel src) dst rel
+      | _ => kill_reg dst rel
+      end
+    else
+      let args' := forward_move_l rel args in
+      match rhs_find op args rel with
+      | Some r =>
+        if Compopts.optim_CSE3_glb tt
+        then RELATION.glb (move r dst rel)
+                          (RELATION.glb
+                             (oper1 dst op args rel)
+                             (oper1 dst op args' rel))
+        else RELATION.glb
+               (oper1 dst op args rel)
+               (oper1 dst op args' rel)
+      | None => RELATION.glb
+                  (oper1 dst op args rel)
+                  (oper1 dst op args' rel)
+      end.
+  
+  Definition kill_store (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_store ctx tt).
+
+  Definition clever_kill_store
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg)
+             (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel
+      (PSet.filter
+         (fun eqno =>
+            match eq_catalog ctx eqno with
+            | Some (Equ eq_lhs eq_sop eq_args) =>
+              match eq_sop with
+              | SOp op => true
+              | SLoad chunk' addr' =>
+                may_overlap chunk addr args chunk' addr' eq_args
+              end
+            | _ => false
+            end)
+         (PSet.inter rel (eq_kill_store ctx tt))).
+
+  Definition store2
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg)
+             (rel : RELATION.t) : RELATION.t :=
+    if Compopts.optim_CSE3_alias_analysis tt
+    then clever_kill_store chunk addr args src rel
+    else kill_store rel.
+
+  Definition store1
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg) (ty: typ)
+             (rel : RELATION.t) : RELATION.t :=
+    let rel' := store2 chunk addr args src rel in
+    if loadv_storev_compatible_type chunk ty
+    then
+      match eq_find (Equ src (SLoad chunk addr) args) with
+      | Some id => PSet.add id rel'
+      | None => rel'
+      end
+    else rel'.
+    
+  Definition store (tenv : typing_env)
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg)
+             (rel : RELATION.t) : RELATION.t :=
+    let args' := forward_move_l rel args in
+    let src' := forward_move rel src in
+    let tsrc := tenv src in
+    let tsrc' := tenv src' in
+    RELATION.glb
+      (RELATION.glb
+         (store1 chunk addr args src tsrc rel)
+         (store1 chunk addr args' src tsrc rel))
+      (RELATION.glb
+         (store1 chunk addr args src' tsrc' rel)
+         (store1 chunk addr args' src' tsrc' rel)).
+
+  Definition kill_builtin_res res rel :=
+    match res with
+    | BR r => kill_reg r rel
+    | _ => rel
+    end.
+
+  Definition apply_external_call ef (rel : RELATION.t) : RELATION.t :=
+    match ef with
+    | EF_builtin name sg =>
+      match Builtins.lookup_builtin_function name sg with
+      | Some bf => rel
+      | None => if Compopts.optim_CSE3_across_calls tt
+                then kill_mem rel
+                else RELATION.top
+      end
+    | EF_runtime name sg =>
+      if Compopts.optim_CSE3_across_calls tt
+      then 
+        match Builtins.lookup_builtin_function name sg with
+        | Some bf => rel
+        | None => kill_mem rel
+        end
+      else RELATION.top
+    | EF_malloc
+    | EF_external _ _
+    | EF_free =>
+      if Compopts.optim_CSE3_across_calls tt
+      then kill_mem rel
+      else RELATION.top
+    | EF_vstore _ 
+    | EF_memcpy _ _ (* FIXME *)
+    | EF_inline_asm _ _ _ => kill_mem rel
+    | _ => rel
+    end.
+
+  Definition apply_cond1 cond args (rel : RELATION.t) : RB.t :=
+    match eq_find (Cond (negate_condition cond) args) with
+    | Some eq_id =>
+      if PSet.contains rel eq_id
+      then RB.bot
+      else Some rel
+    | None => Some rel
+    end.
+
+  Definition apply_cond0 cond args (rel : RELATION.t) : RELATION.t :=
+    match eq_find (Cond cond args) with
+    | Some eq_id => PSet.add eq_id rel
+    | None => rel
+    end.
+
+  Definition apply_cond cond args (rel : RELATION.t) : RB.t :=
+    match apply_cond1 cond args rel with
+      | Some rel => Some (apply_cond0 cond args rel)
+      | None => RB.bot
+    end.
+      
+  Definition apply_instr (tenv : typing_env) (instr : RTL.instruction) (rel : RELATION.t) : list (node * RB.t) :=
+  match instr with
+  | Inop pc' => (pc', (Some rel))::nil
+  | Icond cond args ifso ifnot _ =>
+    (ifso,  (apply_cond cond args rel))::
+    (ifnot, (apply_cond (negate_condition cond) args rel))::nil
+  | Ijumptable _ targets => List.map (fun pc' => (pc', (Some rel))) targets
+  | Istore chunk addr args src pc' =>
+    (pc', (Some (store tenv chunk addr args src rel)))::nil
+  | Iop op args dst pc' => (pc', (Some (oper dst (SOp op) args rel)))::nil
+  | Iload trap chunk addr args dst pc' => (pc', (Some (oper dst (SLoad chunk addr) args rel)))::nil
+  | Icall _ _ _ dst pc' => (pc', (Some (kill_reg dst (kill_mem rel))))::nil
+  | Ibuiltin ef _ res pc' => (pc', (Some (kill_builtin_res res (apply_external_call ef rel))))::nil
+  | Itailcall _ _ _ | Ireturn _ => nil
+  end.
+  End PER_NODE.
+
+Definition apply_instr' (tenv : typing_env) code (pc : node) (ro : RB.t) :
+  list (node * RB.t)  :=
+  match code ! pc with
+  | None => nil
+  | Some instr => 
+    match ro with
+    | None => List.map (fun pc' => (pc', RB.bot)) (successors_instr instr)
+    | Some x => apply_instr pc tenv instr x
+    end
+  end.
+
+Definition invariants := PMap.t RB.t.
+
+Definition rel_leb (x y : RELATION.t) : bool := (PSet.is_subset y x).
+
+Definition relb_leb (x y : RB.t) : bool :=
+  match x, y with
+  | None, _ => true
+  | (Some _), None => false
+  | (Some x), (Some y) => rel_leb x y
+  end.
+
+Definition check_inductiveness (fn : RTL.function) (tenv: typing_env) (inv: invariants) :=
+  (RB.beq (Some RELATION.top) (PMap.get (fn_entrypoint fn) inv)) &&
+  PTree_Properties.for_all (fn_code fn) 
+      (fun pc instr =>
+         match PMap.get pc inv with
+         | None => true
+         | Some rel =>
+           List.forallb
+             (fun szz =>
+                relb_leb (snd szz) (PMap.get (fst szz) inv))
+             (apply_instr pc tenv instr rel)
+         end).
+(* No longer used. Incompatible with transfer functions that yield a different result depending on the successor.
+
+Definition internal_analysis
+  (tenv : typing_env)
+  (f : RTL.function) : option invariants := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' tenv (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+*)
+End OPERATIONS.
+
+Record analysis_hints :=
+  mkanalysis_hints
+    { hint_eq_catalog :  PTree.t equation_or_condition;
+      hint_eq_find_oracle : node -> equation_or_condition -> option eq_id;
+      hint_eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t }.
+
+Definition context_from_hints (hints : analysis_hints) :=
+  let eqs := hint_eq_catalog hints in
+  let reg_kills := get_reg_kills eqs in 
+  let mem_kills := get_mem_kills eqs in
+  let store_kills := get_store_kills eqs in
+  let moves := get_moves eqs in
+  {|
+    eq_catalog := fun eq_id => PTree.get eq_id eqs;
+    eq_find_oracle := hint_eq_find_oracle hints ;
+    eq_rhs_oracle  := hint_eq_rhs_oracle hints;
+    eq_kill_reg := fun reg => PMap.get reg reg_kills;
+    eq_kill_mem := fun _ => mem_kills;
+    eq_kill_store := fun _ => store_kills;
+    eq_moves    := fun reg => PMap.get reg moves
+  |}.
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
new file mode 100644
index 00000000..efe6b600
--- /dev/null
+++ b/backend/CSE3analysisaux.ml
@@ -0,0 +1,319 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open CSE3analysis
+open Maps
+open HashedSet
+open Camlcoq
+open Coqlib
+   
+type flattened_equation_or_condition =
+  | Flat_equ of int * sym_op * int list
+  | Flat_cond of Op.condition * int list;;
+
+let flatten_eq = function
+  | Equ(lhs, sop, args) ->
+     Flat_equ((P.to_int lhs), sop, (List.map P.to_int args))
+  | Cond(cond, args) ->
+     Flat_cond(cond, (List.map P.to_int args));;
+
+let imp_add_i_j s i j =
+  s := PMap.set i (PSet.add j (PMap.get i !s)) !s;;
+
+let string_of_chunk = function
+    | AST.Mint8signed -> "int8signed"
+    | AST.Mint8unsigned -> "int8unsigned"
+    | AST.Mint16signed -> "int16signed"
+    | AST.Mint16unsigned -> "int16unsigned"
+    | AST.Mint32 -> "int32"
+    | AST.Mint64 -> "int64"
+    | AST.Mfloat32 -> "float32"
+    | AST.Mfloat64 -> "float64"
+    | AST.Many32 -> "any32"
+    | AST.Many64 -> "any64";;
+
+let print_reg channel i =
+  Printf.fprintf channel "r%d" i;;
+
+let print_eq channel (lhs, sop, args) =
+  match sop with
+  | SOp op ->
+     Printf.printf "%a = %a" print_reg lhs (PrintOp.print_operation print_reg) (op, args)
+  | SLoad(chunk, addr) ->
+     Printf.printf "%a = %s @ %a" print_reg lhs (string_of_chunk chunk)
+       (PrintOp.print_addressing print_reg) (addr, args);;
+
+let print_cond channel (cond, args) =
+  Printf.printf "cond %a" (PrintOp.print_condition print_reg) (cond, args);;
+
+let pp_intset oc s =
+  Printf.fprintf oc "{ ";
+  List.iter (fun i -> Printf.fprintf oc "%d; " (P.to_int i)) (PSet.elements s);
+  Printf.fprintf oc "}";;
+
+let pp_rhs oc (sop, args) =
+  match sop with
+  | SOp op -> PrintOp.print_operation PrintRTL.reg oc (op, args)
+  | SLoad(chunk, addr) ->
+     Printf.fprintf oc "%s[%a]"
+       (PrintAST.name_of_chunk chunk)
+         (PrintOp.print_addressing PrintRTL.reg) (addr, args);;
+
+let pp_eq oc eq_cond =
+  match eq_cond with
+  | Equ(lhs, sop, args) ->
+     Printf.fprintf oc "x%d = %a" (P.to_int lhs)
+       pp_rhs (sop, args)
+  | Cond(cond, args) ->
+     Printf.fprintf oc "cond %a"
+       (PrintOp.print_condition PrintRTL.reg) (cond, args);;
+
+let pp_P oc x = Printf.fprintf oc "%d" (P.to_int x)
+              
+let pp_option pp oc = function
+  | None -> output_string oc "none"
+  | Some x -> pp oc x;;
+
+let is_trivial = function
+  | Equ(lhs, (SOp Op.Omove), [lhs']) -> lhs=lhs'
+  | _ -> false;;
+
+let rec pp_list separator pp_item chan = function
+  | [] -> ()
+  | [h] -> pp_item chan h
+  | h::t ->
+     pp_item chan h;
+     output_string chan separator;
+     pp_list separator pp_item chan t;;
+
+let pp_set separator pp_item chan s =
+  pp_list separator pp_item chan (PSet.elements s);;
+
+let pp_equation hints chan x =
+  match PTree.get x hints.hint_eq_catalog with
+  | None -> output_string chan "???"
+  | Some eq ->
+     match eq with
+     | Equ(lhs, sop, args) ->
+        print_eq chan  (P.to_int lhs, sop, List.map P.to_int args)
+     | Cond(cond, args) ->
+        print_cond chan (cond, List.map P.to_int args);;
+
+let pp_relation hints chan rel =
+  pp_set "; " (pp_equation hints) chan rel;;
+
+let pp_relation_b hints chan = function
+  | None -> output_string chan "bot"
+  | Some rel -> pp_relation hints chan rel;;
+
+let pp_results f (invariants : RB.t PMap.t) hints chan =
+  let max_pc = P.to_int (RTL.max_pc_function f) in
+  for pc=max_pc downto 1
+  do
+    Printf.fprintf chan "%d: %a\n\n" pc
+      (pp_relation_b hints) (PMap.get (P.of_int pc) invariants)
+  done
+
+module IntSet=Set.Make(struct type t=int let compare = ( - ) end);;
+
+let rec union_list prev = function
+  | [] -> prev
+  | h::t -> union_list (RB.lub prev h) t;;
+
+let rb_glb (x : RB.t) (y : RB.t) : RB.t =
+  match x, y with
+  | None, _ | _, None -> None
+  | (Some x'), (Some y') -> Some (RELATION.glb x' y');;
+
+let compute_invariants
+      (nodes : RTL.node list)
+      (entrypoint : RTL.node)
+      (tfr : RTL.node -> RB.t -> (RTL.node * RB.t) list) =
+  let todo = ref IntSet.empty
+  and invariants = ref (PMap.set entrypoint (Some RELATION.top) (PMap.init RB.bot)) in  
+  let add_todo (pc : RTL.node) =
+    todo := IntSet.add (P.to_int pc) !todo in 
+  let update_node (pc : RTL.node) =
+    (if !Clflags.option_debug_compcert > 9
+     then Printf.printf "UP updating node %d\n" (P.to_int pc));
+    let cur = PMap.get pc !invariants in
+    List.iter (fun (next_pc, next_contrib) ->
+        let previous = PMap.get next_pc !invariants in
+        let next = RB.lub previous next_contrib in
+        if not (RB.beq previous next)
+        then (
+          invariants := PMap.set next_pc next !invariants;
+          add_todo next_pc)) (tfr pc cur) in
+  add_todo entrypoint;
+  while not (IntSet.is_empty !todo) do
+    let nxt = IntSet.max_elt !todo in
+    todo := IntSet.remove nxt !todo;
+    update_node (P.of_int nxt)
+  done;
+  !invariants;;
+
+let refine_invariants
+      (nodes : RTL.node list)
+      (entrypoint : RTL.node)
+      (successors : RTL.node -> RTL.node list)
+      (predecessors : RTL.node -> RTL.node list)
+      (tfr : RTL.node -> RB.t -> (RTL.node * RB.t) list)
+      (invariants0 : RB.t PMap.t) =
+  let todo = ref IntSet.empty
+  and invariants = ref invariants0 in  
+  let add_todo (pc : RTL.node) =
+    todo := IntSet.add (P.to_int pc) !todo in 
+  let update_node (pc : RTL.node) =
+    (if !Clflags.option_debug_compcert > 9
+     then Printf.printf "DOWN updating node %d\n" (P.to_int pc));
+    if not (peq pc entrypoint)
+    then
+      let cur = PMap.get pc !invariants in
+      let nxt = union_list RB.bot
+                  (List.map
+                     (fun pred_pc->
+                       rb_glb cur
+                         (List.assoc pc (tfr pred_pc (PMap.get pred_pc !invariants))))
+                     (predecessors pc)) in
+      if not (RB.beq cur nxt)
+      then
+        begin
+          (if !Clflags.option_debug_compcert > 4
+           then Printf.printf "refining CSE3 node %d\n" (P.to_int pc));
+          List.iter add_todo (successors pc)
+        end in
+  (List.iter add_todo nodes);
+  while not (IntSet.is_empty !todo) do
+    let nxt = IntSet.max_elt !todo in
+    todo := IntSet.remove nxt !todo;
+    update_node (P.of_int nxt)
+  done;
+  !invariants;;
+
+let get_default default x ptree =
+  match PTree.get x ptree with
+  | None -> default
+  | Some y -> y;;
+
+let initial_analysis ctx tenv (f : RTL.coq_function) =
+  let tfr = apply_instr' ctx tenv f.RTL.fn_code in
+  compute_invariants
+    (List.map fst (PTree.elements f.RTL.fn_code))
+    f.RTL.fn_entrypoint tfr;;
+
+let refine_analysis ctx tenv
+      (f : RTL.coq_function) (invariants0 : RB.t PMap.t) =
+  let succ_map = RTL.successors_map f in
+  let succ_f x = get_default [] x succ_map in
+  let pred_map = Kildall.make_predecessors f.RTL.fn_code RTL.successors_instr in
+  let pred_f x = get_default [] x pred_map in
+  let tfr = apply_instr' ctx tenv f.RTL.fn_code in
+  refine_invariants
+    (List.map fst (PTree.elements f.RTL.fn_code))
+    f.RTL.fn_entrypoint succ_f pred_f tfr invariants0;;
+
+let add_to_set_in_table table key item =
+  Hashtbl.add table key
+    (PSet.add item
+       (match Hashtbl.find_opt table key with
+        | None -> PSet.empty
+        | Some s -> s));;
+  
+let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
+  let cur_eq_id = ref 0
+  and cur_catalog = ref PTree.empty
+  and eq_table = Hashtbl.create 100
+  and rhs_table = Hashtbl.create 100
+  and cur_kill_reg = ref (PMap.init PSet.empty)
+  and cur_kill_mem = ref PSet.empty
+  and cur_kill_store = ref PSet.empty
+  and cur_moves = ref (PMap.init PSet.empty) in
+  let eq_find_oracle node eq =
+    assert (not (is_trivial eq));
+    let o = Hashtbl.find_opt eq_table (flatten_eq eq) in
+    (* FIXME (if o = None then failwith "eq_find_oracle"); *)
+    (if !Clflags.option_debug_compcert > 5
+     then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node)
+            pp_eq eq (pp_option pp_P) o);
+    o
+  and rhs_find_oracle node sop args =
+    let o =
+      match Hashtbl.find_opt rhs_table (sop, List.map P.to_int args) with
+      | None -> PSet.empty
+      | Some s -> s in
+    (if !Clflags.option_debug_compcert > 5
+     then Printf.printf "@%d: rhs_find %a = %a\n"
+            (P.to_int node) pp_rhs (sop, args) pp_intset o);
+    o in
+  let mutating_eq_find_oracle node eq : P.t option =
+    let flat_eq = flatten_eq eq in
+    let o =
+    match Hashtbl.find_opt eq_table flat_eq with
+    | Some x ->
+       Some x
+    | None ->
+       (* TODO print_eq stderr flat_eq; *)
+       incr cur_eq_id;
+       let id = !cur_eq_id in
+       let coq_id = P.of_int id in
+       begin
+         Hashtbl.add eq_table flat_eq coq_id;
+         (cur_catalog := PTree.set coq_id eq !cur_catalog);
+         (match flat_eq with
+          | Flat_equ(flat_eq_lhs, flat_eq_op, flat_eq_args) ->
+             add_to_set_in_table rhs_table
+               (flat_eq_op, flat_eq_args) coq_id
+          | Flat_cond(flat_eq_cond, flat_eq_args) -> ());
+         (match eq with
+          | Equ(lhs, sop, args) ->
+             List.iter
+               (fun reg -> imp_add_i_j cur_kill_reg reg coq_id)
+               (lhs :: args);
+             (match sop, args with
+              | (SOp Op.Omove), [rhs] -> imp_add_i_j cur_moves lhs coq_id
+              | _, _ -> ())
+          | Cond(cond, args) ->
+             List.iter
+               (fun reg -> imp_add_i_j cur_kill_reg reg coq_id) args
+         );
+         (if eq_cond_depends_on_mem eq
+          then cur_kill_mem := PSet.add coq_id !cur_kill_mem);
+         (if eq_cond_depends_on_store eq
+          then cur_kill_store := PSet.add coq_id !cur_kill_store);
+         Some coq_id
+       end
+    in
+    (if !Clflags.option_debug_compcert > 5
+     then Printf.printf "@%d: mutating_eq_find %a -> %a\n" (P.to_int node)
+      pp_eq eq (pp_option pp_P) o);    
+    o
+  in
+  let ctx = { eq_catalog     = (fun eq_id -> PTree.get eq_id !cur_catalog);
+              eq_find_oracle = mutating_eq_find_oracle;
+              eq_rhs_oracle  = rhs_find_oracle ;
+              eq_kill_reg    = (fun reg -> PMap.get reg !cur_kill_reg);
+              eq_kill_mem    = (fun () -> !cur_kill_mem);
+              eq_kill_store  = (fun () -> !cur_kill_store);
+              eq_moves       = (fun reg -> PMap.get reg !cur_moves)
+            } in
+  let invariants = initial_analysis ctx tenv f in
+  let invariants' =
+    if ! Clflags.option_fcse3_refine
+    then refine_analysis ctx tenv f invariants
+    else invariants
+  and hints = { hint_eq_catalog    = !cur_catalog;
+                hint_eq_find_oracle= eq_find_oracle;
+                hint_eq_rhs_oracle = rhs_find_oracle } in
+  (if !Clflags.option_debug_compcert > 1
+   then pp_results f invariants' hints stdout);
+  invariants', hints
+;;
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
new file mode 100644
index 00000000..523b52df
--- /dev/null
+++ b/backend/CSE3analysisproof.v
@@ -0,0 +1,1421 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE3analysis CSE2deps CSE2depsproof HashedSet.
+Require Import RTLtyping.
+Require Import Lia.
+
+Lemma rel_leb_correct:
+  forall x x',
+    rel_leb x x' = true <-> RELATION.ge x' x.
+Proof.
+  unfold rel_leb, RELATION.ge.
+  split; auto.
+Qed.
+
+Hint Resolve rel_leb_correct : cse3.
+
+Lemma relb_leb_correct:
+  forall x x',
+    relb_leb x x' = true <-> RB.ge x' x.
+Proof.
+  unfold relb_leb, RB.ge.
+  destruct x; destruct x'; split; trivial; try contradiction; discriminate.
+Qed.
+
+Hint Resolve relb_leb_correct : cse3.
+
+Theorem loadv_storev_really_same:
+  forall chunk: memory_chunk,
+  forall m1: mem,
+  forall addr v: val,
+  forall m2: mem,
+  forall ty : typ,
+  forall TYPE: Val.has_type v ty,
+  forall STORE: Mem.storev chunk m1 addr v = Some m2,
+  forall COMPATIBLE: loadv_storev_compatible_type chunk ty = true,
+    Mem.loadv chunk m2 addr = Some v.
+Proof.
+  intros.
+  rewrite Mem.loadv_storev_same with (m1:=m1) (v:=v) by assumption.
+  f_equal.
+  destruct chunk; destruct ty; try discriminate.
+  all: destruct v; trivial; try contradiction.
+  all: unfold Val.load_result, Val.has_type in *.
+  all: destruct Archi.ptr64; trivial; discriminate.
+Qed.
+
+Lemma subst_args_notin :
+  forall (rs : regset) dst v args,
+    ~ In dst args ->
+    (rs # dst <- v) ## args = rs ## args.
+Proof.
+  induction args; simpl; trivial.
+  intro NOTIN.
+  destruct (peq a dst).
+  {
+    subst a.
+    intuition congruence.
+  }
+  rewrite Regmap.gso by congruence.
+  f_equal.
+  apply IHargs.
+  intuition congruence.
+Qed.
+
+Lemma add_i_j_adds : forall i j m,
+    PSet.contains (Regmap.get i (add_i_j i j m)) j = true.
+Proof.
+  intros.
+  unfold add_i_j.
+  rewrite Regmap.gss.
+  auto with pset.
+Qed.
+Hint Resolve add_i_j_adds: cse3.
+
+Lemma add_i_j_monotone : forall i j i' j' m,
+    PSet.contains (Regmap.get i' m) j' = true ->
+    PSet.contains (Regmap.get i' (add_i_j i j m)) j' = true.
+Proof.
+  intros.
+  unfold add_i_j.
+  destruct (peq i i').
+  - subst i'.
+    rewrite Regmap.gss.
+    destruct (peq j j').
+    + subst j'.
+      apply PSet.gadds.
+    + eauto with pset.
+  - rewrite Regmap.gso.
+    assumption.
+    congruence.
+Qed.
+
+Hint Resolve add_i_j_monotone: cse3.
+
+Lemma add_ilist_j_monotone : forall ilist j i' j' m,
+    PSet.contains (Regmap.get i' m) j' = true ->
+    PSet.contains (Regmap.get i' (add_ilist_j ilist j m)) j' = true.
+Proof.
+  induction ilist; simpl; intros until m; intro CONTAINS; auto with cse3.
+Qed.
+Hint Resolve add_ilist_j_monotone: cse3.
+
+Lemma add_ilist_j_adds : forall ilist j m,
+    forall i, In i ilist ->
+              PSet.contains (Regmap.get i (add_ilist_j ilist j m)) j = true.
+Proof.
+  induction ilist; simpl; intros until i; intro IN.
+  contradiction.
+  destruct IN as [HEAD | TAIL]; subst; auto with cse3.
+Qed.
+Hint Resolve add_ilist_j_adds: cse3.
+
+Definition xlget_kills (eqs : list (eq_id * equation_or_condition))
+                       (m :  Regmap.t PSet.t) :
+  Regmap.t PSet.t :=
+  List.fold_left (fun already (item : eq_id * equation_or_condition) =>
+    match snd item with
+    | Equ lhs sop args =>
+      add_i_j lhs (fst item)
+              (add_ilist_j args (fst item) already)
+    | Cond cond args => add_ilist_j args (fst item) already
+    end) eqs m.
+
+Definition xlget_mem_kills (eqs : list (positive * equation_or_condition))
+           (m : PSet.t) : PSet.t :=
+(fold_left
+       (fun (a : PSet.t) (item : positive * equation_or_condition) =>
+          if eq_cond_depends_on_mem (snd item)
+          then PSet.add (fst item) a
+          else a
+       )
+       eqs m).
+
+Definition xlget_store_kills (eqs : list (positive * equation_or_condition))
+           (m : PSet.t) : PSet.t :=
+(fold_left
+       (fun (a : PSet.t) (item : positive * equation_or_condition) =>
+          if eq_cond_depends_on_store (snd item)
+          then PSet.add (fst item) a
+          else a
+       )
+       eqs m).
+
+Lemma xlget_kills_monotone :
+  forall eqs m i j,
+    PSet.contains (Regmap.get i m) j = true ->
+    PSet.contains (Regmap.get i (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  destruct a as [id eq_cond]; cbn.
+  destruct eq_cond as [eq_lhs eq_sop eq_args | eq_cond eq_args]; auto with cse3.
+Qed.
+
+Hint Resolve xlget_kills_monotone : cse3.
+
+Lemma xlget_mem_kills_monotone :
+  forall eqs m j,
+    PSet.contains m j = true ->
+    PSet.contains (xlget_mem_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  destruct a as [id eq_cond]; cbn.
+  destruct eq_cond_depends_on_mem.
+  - apply IHeqs.
+    destruct (peq id j).
+    + subst j. apply PSet.gadds.
+    + rewrite PSet.gaddo by congruence.
+      trivial.
+  - auto.
+Qed.
+
+Hint Resolve xlget_mem_kills_monotone : cse3.
+
+Lemma xlget_store_kills_monotone :
+  forall eqs m j,
+    PSet.contains m j = true ->
+    PSet.contains (xlget_store_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  destruct a as [id eq_cond]; cbn.
+  destruct eq_cond_depends_on_store.
+  - apply IHeqs.
+    destruct (peq id j).
+    + subst j. apply PSet.gadds.
+    + rewrite PSet.gaddo by congruence.
+      trivial.
+  - auto.
+Qed.
+
+Hint Resolve xlget_store_kills_monotone : cse3.
+
+Lemma xlget_kills_has_lhs :
+  forall eqs m lhs sop args j,
+    In (j, (Equ lhs sop args))  eqs ->
+    PSet.contains (Regmap.get lhs (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros until j.
+  intro HEAD_TAIL.
+  destruct HEAD_TAIL as [HEAD | TAIL]; subst; simpl.
+  - auto with cse3.
+  - eapply IHeqs. eassumption.
+Qed.
+Hint Resolve xlget_kills_has_lhs : cse3.
+
+Lemma xlget_kills_has_arg :
+  forall eqs m lhs sop arg args j,
+    In (j, (Equ lhs sop args)) eqs ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros until j.
+  intros HEAD_TAIL ARG.
+  destruct HEAD_TAIL as [HEAD | TAIL]; subst; simpl.
+  - auto with cse3.
+  - eapply IHeqs; eassumption.
+Qed.
+
+Hint Resolve xlget_kills_has_arg : cse3.
+
+Lemma xlget_cond_kills_has_arg :
+  forall eqs m cond arg args j,
+    In (j, (Cond cond args)) eqs ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros until j.
+  intros HEAD_TAIL ARG.
+  destruct HEAD_TAIL as [HEAD | TAIL]; subst; simpl.
+  - auto with cse3.
+  - eapply IHeqs; eassumption.
+Qed.
+
+Hint Resolve xlget_cond_kills_has_arg : cse3.
+
+Lemma get_kills_has_lhs :
+  forall eqs lhs sop args j,
+    PTree.get j eqs = Some (Equ lhs sop args) ->
+    PSet.contains (Regmap.get lhs (get_reg_kills eqs)) j = true.
+Proof.
+  unfold get_reg_kills.
+  intros.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : Regmap.t PSet.t) (p : positive * equation_or_condition) =>
+        match snd p with
+        | Equ lhs0 _ args0 =>
+          add_i_j lhs0 (fst p) (add_ilist_j args0 (fst p) a)
+        | Cond _ args0 => add_ilist_j args0 (fst p) a
+        end))  with xlget_kills.
+  eapply xlget_kills_has_lhs.
+  apply PTree.elements_correct.
+  eassumption.
+Qed.
+
+Hint Resolve get_kills_has_lhs : cse3.
+
+Lemma context_from_hints_get_kills_has_lhs :
+  forall hints lhs sop args j,
+    PTree.get j (hint_eq_catalog hints) = Some (Equ lhs sop args) ->
+    PSet.contains  (eq_kill_reg (context_from_hints hints) lhs) j = true.
+Proof.
+  intros; simpl.
+  eapply get_kills_has_lhs.
+  eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_lhs : cse3.
+
+Lemma get_kills_has_arg :
+  forall eqs lhs sop arg args j,
+    PTree.get j eqs = Some (Equ lhs sop args) ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (get_reg_kills eqs)) j = true.
+Proof.
+  unfold get_reg_kills.
+  intros.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : Regmap.t PSet.t) (p : positive * equation_or_condition) =>
+        match snd p with
+        | Equ lhs0 _ args0 =>
+          add_i_j lhs0 (fst p) (add_ilist_j args0 (fst p) a)
+        | Cond _ args0 => add_ilist_j args0 (fst p) a
+        end)) with xlget_kills.
+  eapply xlget_kills_has_arg.
+  - apply PTree.elements_correct.
+    eassumption.
+  - assumption.
+Qed.
+
+Hint Resolve get_kills_has_arg : cse3.
+
+Lemma context_from_hints_get_kills_has_arg :
+  forall hints lhs sop arg args j,
+    PTree.get j (hint_eq_catalog hints) = Some (Equ lhs sop args) ->
+    In arg args ->
+    PSet.contains (eq_kill_reg (context_from_hints hints) arg) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_arg; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_arg : cse3.
+
+Lemma get_cond_kills_has_arg :
+  forall eqs cond arg args j,
+    PTree.get j eqs = Some (Cond cond args) ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (get_reg_kills eqs)) j = true.
+Proof.
+  unfold get_reg_kills.
+  intros.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : Regmap.t PSet.t) (p : positive * equation_or_condition) =>
+        match snd p with
+        | Equ lhs0 _ args0 =>
+          add_i_j lhs0 (fst p) (add_ilist_j args0 (fst p) a)
+        | Cond _ args0 => add_ilist_j args0 (fst p) a
+        end)) with xlget_kills.
+  eapply xlget_cond_kills_has_arg.
+  - apply PTree.elements_correct.
+    eassumption.
+  - assumption.
+Qed.
+
+Hint Resolve get_cond_kills_has_arg : cse3.
+
+Lemma context_from_hints_get_cond_kills_has_arg :
+  forall hints cond arg args j,
+    PTree.get j (hint_eq_catalog hints) = Some (Cond cond args) ->
+    In arg args ->
+    PSet.contains (eq_kill_reg (context_from_hints hints) arg) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_cond_kills_has_arg; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_cond_kills_has_arg : cse3.
+
+Lemma xlget_kills_has_eq_depends_on_mem :
+  forall eqs eq j m,
+    In (j, eq) eqs ->
+    eq_cond_depends_on_mem eq = true ->
+    PSet.contains (xlget_mem_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros.
+  destruct H.
+  { subst a.
+    simpl.
+    rewrite H0.
+    apply xlget_mem_kills_monotone.
+    apply PSet.gadds.
+  }
+  eauto.
+Qed.
+
+Hint Resolve xlget_kills_has_eq_depends_on_mem : cse3.
+
+Lemma get_kills_has_eq_depends_on_mem :
+  forall eqs eq j,
+    PTree.get j eqs = Some eq ->
+    eq_cond_depends_on_mem eq = true ->
+    PSet.contains (get_mem_kills eqs) j = true.
+Proof.
+  intros.
+  unfold get_mem_kills.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : PSet.t) (p : positive * equation_or_condition) =>
+          if eq_cond_depends_on_mem (snd p) then PSet.add (fst p) a else a))
+       with xlget_mem_kills.
+  eapply xlget_kills_has_eq_depends_on_mem.
+  apply PTree.elements_correct.
+  eassumption.
+  trivial.
+Qed.
+  
+Lemma context_from_hints_get_kills_has_eq_depends_on_mem :
+  forall hints eq j,
+    PTree.get j (hint_eq_catalog hints) = Some eq ->
+    eq_cond_depends_on_mem eq = true ->
+    PSet.contains (eq_kill_mem (context_from_hints hints) tt) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_eq_depends_on_mem; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_eq_depends_on_mem : cse3.
+
+Lemma xlget_kills_has_eq_depends_on_store :
+  forall eqs eq j m,
+    In (j, eq) eqs ->
+    eq_cond_depends_on_store eq = true ->
+    PSet.contains (xlget_store_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros.
+  destruct H.
+  { subst a.
+    simpl.
+    rewrite H0.
+    apply xlget_store_kills_monotone.
+    apply PSet.gadds.
+  }
+  eauto.
+Qed.
+
+Hint Resolve xlget_kills_has_eq_depends_on_store : cse3.
+
+Lemma get_kills_has_eq_depends_on_store :
+  forall eqs eq j,
+    PTree.get j eqs = Some eq ->
+    eq_cond_depends_on_store eq = true ->
+    PSet.contains (get_store_kills eqs) j = true.
+Proof.
+  intros.
+  unfold get_store_kills.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : PSet.t) (p : positive * equation_or_condition) =>
+          if eq_cond_depends_on_store (snd p) then PSet.add (fst p) a else a))
+       with xlget_store_kills.
+  eapply xlget_kills_has_eq_depends_on_store.
+  apply PTree.elements_correct.
+  eassumption.
+  trivial.
+Qed.
+  
+Lemma context_from_hints_get_kills_has_eq_depends_on_store :
+  forall hints eq j,
+    PTree.get j (hint_eq_catalog hints) = Some eq ->
+    eq_cond_depends_on_store eq = true ->
+    PSet.contains (eq_kill_store (context_from_hints hints) tt) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_eq_depends_on_store; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_eq_depends_on_store : cse3.
+
+Definition eq_involves (eq : equation_or_condition) (i : reg) :=
+  match eq with
+  | Equ lhs sop args =>
+    i = lhs \/ In i args
+  | Cond cond args =>  In i args
+  end.
+
+Section SOUNDNESS.
+  Context {F V : Type}.
+  Context {genv: Genv.t F V}.
+  Context {sp : val}.
+
+  Context {ctx : eq_context}.
+
+  Definition sem_rhs (sop : sym_op) (args : list reg)
+             (rs : regset) (m : mem) (v' : val) :=
+    match sop with
+    | SOp op =>
+      match eval_operation genv sp op (rs ## args) m with
+      | Some v => v' = v
+      | None => False
+      end
+    | SLoad chunk addr =>
+      match
+        match eval_addressing genv sp addr (rs ## args) with
+        | Some a => Mem.loadv chunk m a
+        | None => None
+        end
+      with
+      | Some dat => v' = dat
+      | None => v' = Vundef
+      end
+    end.
+    
+  Definition sem_eq (eq : equation_or_condition) (rs : regset) (m : mem) :=
+    match eq with
+    | Equ lhs sop args => sem_rhs sop args rs m (rs # lhs)
+    | Cond cond args => eval_condition cond (rs ## args) m = Some true
+    end.
+
+  Definition sem_rel (rel : RELATION.t) (rs : regset) (m : mem) :=
+    forall i eq,
+      PSet.contains rel i = true ->
+      eq_catalog ctx i = Some eq ->
+      sem_eq eq rs m.
+
+  Lemma sem_rel_glb:
+    forall rel1 rel2 rs m,
+      (sem_rel (RELATION.glb rel1 rel2) rs m) <->
+      ((sem_rel rel1 rs m) /\
+       (sem_rel rel2 rs m)).
+  Proof.
+    intros.
+    unfold sem_rel, RELATION.glb.
+    split.
+    - intro IMPLIES.
+      split;
+        intros i eq CONTAINS;
+        specialize IMPLIES with (i:=i) (eq0:=eq);
+        rewrite PSet.gunion in IMPLIES;
+        rewrite orb_true_iff in IMPLIES;
+        intuition.
+    - intros (IMPLIES1 & IMPLIES2) i eq.
+      rewrite PSet.gunion.
+      rewrite orb_true_iff.
+      specialize IMPLIES1 with (i:=i) (eq0:=eq).
+      specialize IMPLIES2 with (i:=i) (eq0:=eq).
+      intuition.
+  Qed.
+
+  Hypothesis ctx_kill_reg_has_lhs :
+    forall lhs sop args j,
+      eq_catalog ctx j = Some (Equ lhs sop args) ->
+      PSet.contains (eq_kill_reg ctx lhs) j = true.
+
+  Hypothesis ctx_kill_reg_has_arg :
+    forall lhs sop args j,
+      eq_catalog ctx j = Some (Equ lhs sop args) ->
+      forall arg,
+      In arg args ->
+      PSet.contains (eq_kill_reg ctx arg) j = true.
+
+  Hypothesis ctx_cond_kill_reg_has_arg :
+    forall cond args j,
+      eq_catalog ctx j = Some (Cond cond args) ->
+      forall arg,
+      In arg args ->
+      PSet.contains (eq_kill_reg ctx arg) j = true.
+
+  Hypothesis ctx_kill_mem_has_depends_on_mem :
+    forall eq j,
+      eq_catalog ctx j = Some eq ->
+      eq_cond_depends_on_mem eq = true ->
+      PSet.contains (eq_kill_mem ctx tt) j = true.
+
+  Hypothesis ctx_kill_store_has_depends_on_store :
+    forall eq j,
+      eq_catalog ctx j = Some eq ->
+      eq_cond_depends_on_store eq = true ->
+      PSet.contains (eq_kill_store ctx tt) j = true.
+
+  Theorem kill_reg_sound :
+    forall rel rs m dst v,
+      (sem_rel rel rs m) ->
+      (sem_rel (kill_reg (ctx:=ctx) dst rel) (rs#dst <- v) m).
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_reg.
+    intros until v.
+    intros REL i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    destruct eq as [lhs sop args | cond args]; simpl.
+  * specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
+    specialize ctx_kill_reg_has_arg with (lhs := lhs) (sop := sop) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+    simpl in *.
+    assert ({In dst args} + {~In dst args}) as IN_ARGS.
+    {
+      apply List.in_dec.
+      apply peq.
+    }
+    destruct IN_ARGS as [IN_ARGS | NOTIN_ARGS].
+    { intuition.
+      congruence.
+    }
+    destruct (peq dst lhs).
+    {
+      congruence.
+    }
+    rewrite subst_args_notin by assumption.
+    destruct sop.
+    - destruct (eval_operation genv sp o rs ## args m) as [ev | ]; trivial.
+      rewrite Regmap.gso by congruence.
+      assumption.
+    - rewrite Regmap.gso by congruence.
+      assumption.
+  * specialize ctx_cond_kill_reg_has_arg with (cond := cond) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+    simpl in *.
+    assert ({In dst args} + {~In dst args}) as IN_ARGS.
+    {
+      apply List.in_dec.
+      apply peq.
+    }
+    destruct IN_ARGS as [IN_ARGS | NOTIN_ARGS].
+    { intuition.
+      congruence.
+    }
+    rewrite subst_args_notin by assumption.
+    assumption.
+  Qed.
+
+  Hint Resolve kill_reg_sound : cse3.
+
+  Theorem kill_reg_sound2 :
+    forall rel rs m dst,
+      (sem_rel rel rs m) ->
+      (sem_rel (kill_reg (ctx:=ctx) dst rel) rs m).
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_reg.
+    intros until dst.
+    intros REL i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    destruct eq as [lhs sop args | cond args]; simpl.
+  * specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
+    specialize ctx_kill_reg_has_arg with (lhs := lhs) (sop := sop) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+  * specialize ctx_cond_kill_reg_has_arg with (cond := cond) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+  Qed.
+    
+  Lemma pick_source_sound :
+    forall (l : list reg),
+      match pick_source l with
+      | Some x => In x l
+      | None => True
+      end.
+  Proof.
+    unfold pick_source.
+    destruct l; simpl; trivial.
+    left; trivial.
+  Qed.
+    
+  Hint Resolve pick_source_sound : cse3.
+
+  Theorem forward_move_sound :
+    forall rel rs m x,
+      (sem_rel rel rs m) ->
+      rs # (forward_move (ctx := ctx) rel x) = rs # x.
+  Proof.
+    unfold sem_rel, forward_move.
+    intros until x.
+    intro REL.
+    pose proof (pick_source_sound (PSet.elements (PSet.inter rel (eq_moves ctx x)))) as ELEMENT.
+    destruct (pick_source (PSet.elements (PSet.inter rel (eq_moves ctx x)))).
+    2: reflexivity.
+    destruct (eq_catalog ctx r) as [eq | ] eqn:CATALOG.
+    2: reflexivity.
+    specialize REL with (i := r) (eq0 := eq).
+    destruct eq as [lhs sop args | cond args]; cbn in *; trivial.
+    destruct (is_smove sop) as [MOVE | ].
+    2: reflexivity.
+    rewrite MOVE in *; cbn in *.
+    destruct (peq x lhs).
+    2: reflexivity.
+    simpl.
+    subst x.
+    rewrite PSet.elements_spec in ELEMENT.
+    rewrite PSet.ginter in ELEMENT.
+    rewrite andb_true_iff in ELEMENT.
+    unfold sem_eq in REL.
+    simpl in REL.
+    destruct args as [ | h t].
+    reflexivity.
+    destruct t.
+    2: reflexivity.
+    simpl in REL.
+    intuition congruence.
+  Qed.
+
+  Hint Resolve forward_move_sound : cse3.
+
+  Theorem forward_move_l_sound :
+    forall rel rs m l,
+      (sem_rel rel rs m) ->
+      rs ## (forward_move_l (ctx := ctx) rel l) = rs ## l.
+  Proof.
+    induction l; simpl; intros; trivial.
+    erewrite forward_move_sound by eassumption.
+    intuition congruence.
+  Qed.
+  
+  Hint Resolve forward_move_l_sound : cse3.
+
+  Theorem kill_mem_sound :
+    forall rel rs m m',
+      (sem_rel rel rs m) ->
+      (sem_rel (kill_mem (ctx:=ctx) rel) rs m').
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_mem.
+    intros until m'.
+    intros REL i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    intros SUBTRACT CATALOG.
+    rewrite PSet.gsubtract in SUBTRACT.
+    rewrite andb_true_iff in SUBTRACT.
+    intuition.
+    destruct eq as [lhs sop args | cond args] eqn:EQ.
+  * destruct sop as [op | chunk addr] eqn:OP.
+    - specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
+      rewrite EQ in ctx_kill_mem_has_depends_on_mem.
+      unfold eq_cond_depends_on_mem in ctx_kill_mem_has_depends_on_mem.
+      rewrite (op_depends_on_memory_correct genv sp op) with (m2 := m).
+      assumption.
+      destruct (op_depends_on_memory op) in *; trivial.
+      rewrite ctx_kill_mem_has_depends_on_mem in H0; trivial.
+      discriminate H0.
+    - specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
+      rewrite EQ in ctx_kill_mem_has_depends_on_mem.
+      rewrite negb_true_iff in H0.
+      intuition.
+      congruence.
+  * specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
+    rewrite EQ in ctx_kill_mem_has_depends_on_mem.
+    unfold eq_cond_depends_on_mem in ctx_kill_mem_has_depends_on_mem.
+    rewrite (cond_depends_on_memory_correct cond) with (m2 := m).
+    assumption.
+    destruct (cond_depends_on_memory cond) in *; trivial.
+    rewrite negb_true_iff in H0.
+    intuition.
+    congruence.
+  Qed.
+
+  Hint Resolve kill_mem_sound : cse3.
+
+  (* TODO: shouldn't this already be proved somewhere else? *)
+  Lemma store_preserves_validity:
+    forall m m' wchunk a v
+      (STORE : Mem.storev wchunk m a v = Some m')
+      (b : block) (z : Z),
+      Mem.valid_pointer m' b z = Mem.valid_pointer m b z.
+  Proof.
+    unfold Mem.storev.
+    intros.
+    destruct a; try discriminate.
+    Local Transparent Mem.store.
+    unfold Mem.store in STORE.
+    destruct Mem.valid_access_dec in STORE.
+    2: discriminate.
+    inv STORE.
+    reflexivity.
+  Qed.
+
+  Hint Resolve store_preserves_validity : cse3.
+  
+  Theorem kill_store_sound :
+    forall rel rs m m' wchunk a v,
+      (sem_rel rel rs m) ->
+      (Mem.storev wchunk m a v = Some m') ->
+      (sem_rel (kill_store (ctx:=ctx) rel) rs m').
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_store.
+    intros until v.
+    intros REL STORE i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    intros SUBTRACT CATALOG.
+    rewrite PSet.gsubtract in SUBTRACT.
+    rewrite andb_true_iff in SUBTRACT.
+    intuition.
+    destruct eq as [lhs sop args | cond args] eqn:EQ.
+  * destruct sop as [op | chunk addr] eqn:OP.
+    - rewrite op_valid_pointer_eq with (m2 := m).
+      assumption.
+      eapply store_preserves_validity; eauto.
+    - specialize ctx_kill_store_has_depends_on_store with (eq0 := eq) (j := i).
+      rewrite EQ in ctx_kill_store_has_depends_on_store.
+      rewrite negb_true_iff in H0.
+      intuition.
+      congruence.
+  * rewrite cond_valid_pointer_eq with (m2 := m).
+    assumption.
+    eapply store_preserves_validity; eauto.
+  Qed.
+ 
+  Hint Resolve kill_store_sound : cse3.
+ 
+  Theorem eq_find_sound:
+    forall no eq id,
+      eq_find (ctx := ctx) no eq = Some id ->
+      eq_catalog ctx id = Some eq.
+  Proof.
+    unfold eq_find.
+    intros.
+    destruct (eq_find_oracle ctx no eq) as [ id' | ].
+    2: discriminate.
+    destruct (eq_catalog ctx id') as [eq' |] eqn:CATALOG.
+    2: discriminate.
+    destruct (eq_dec_equation eq eq').
+    2: discriminate.
+    congruence.
+  Qed.
+
+  Hint Resolve eq_find_sound : cse3.
+
+  Theorem is_condition_present_sound :
+    forall node rel cond args rs m
+      (REL : sem_rel rel rs m)
+      (COND : (is_condition_present (ctx := ctx) node rel cond args) = true),
+      (eval_condition cond (rs ## args) m) = Some true.
+  Proof.
+    unfold sem_rel, is_condition_present.
+    intros.
+    destruct eq_find as [i |] eqn:FIND.
+    2: discriminate.
+    pose proof (eq_find_sound node (Cond cond args) i FIND) as CATALOG.
+    exact (REL i (Cond cond args) COND CATALOG).
+  Qed.
+
+  Hint Resolve is_condition_present_sound : cse3.
+  
+  Theorem rhs_find_sound:
+    forall no sop args rel src rs m,
+      sem_rel rel rs m ->
+      rhs_find (ctx := ctx) no sop args rel = Some src ->
+      sem_rhs sop args rs m (rs # src).
+  Proof.
+    unfold rhs_find, sem_rel, sem_eq.
+    intros until m.
+    intros REL FIND.
+    pose proof (pick_source_sound (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel))) as SOURCE.
+    destruct (pick_source (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel))) as [ src' | ].
+    2: discriminate.
+    rewrite PSet.elements_spec in SOURCE.
+    rewrite PSet.ginter in SOURCE.
+    rewrite andb_true_iff in SOURCE.
+    destruct (eq_catalog ctx src') as [eq | ] eqn:CATALOG.
+    2: discriminate.
+    specialize REL with (i := src') (eq0 := eq).
+    destruct eq as [eq_lhs eq_sop eq_args | eq_cond eq_args] eqn:EQ.
+    2: discriminate.
+    destruct (eq_dec_sym_op sop eq_sop).
+    2: discriminate.
+    destruct (eq_dec_args args eq_args).
+    2: discriminate.
+    simpl in FIND.
+    intuition congruence.
+  Qed.
+
+  Hint Resolve rhs_find_sound : cse3.
+  
+  Theorem forward_move_rhs_sound :
+    forall sop args rel rs m v,
+      (sem_rel rel rs m) ->
+      (sem_rhs sop args rs m v) ->
+      (sem_rhs sop (forward_move_l (ctx := ctx) rel args) rs m v).
+  Proof.
+    intros until v.
+    intros REL RHS.
+    destruct sop; simpl in *.
+    all: erewrite forward_move_l_sound by eassumption; assumption.
+  Qed.
+
+  Hint Resolve forward_move_rhs_sound : cse3.
+
+  Lemma arg_not_replaced:
+    forall (rs : regset) dst v args,
+      ~ In dst args ->
+      (rs # dst <- v) ## args = rs ## args.
+  Proof.
+    induction args; simpl; trivial.
+    intuition.
+    f_equal; trivial.
+    apply Regmap.gso; congruence.
+  Qed.
+
+  Lemma sem_rhs_depends_on_args_only:
+    forall sop args rs dst m v,
+      sem_rhs sop args rs m v ->
+      ~ In dst args ->
+      sem_rhs sop args (rs # dst <- v) m v.
+  Proof.
+    unfold sem_rhs.
+    intros.
+    rewrite arg_not_replaced by assumption.
+    assumption.
+  Qed.
+  
+  Lemma replace_sound:
+    forall no eqno dst sop args rel rs m v,
+    sem_rel rel rs m ->
+    sem_rhs sop args rs m  v ->
+    ~ In dst args ->
+    eq_find (ctx := ctx) no (Equ dst sop args) = Some eqno ->
+    sem_rel (PSet.add eqno (kill_reg (ctx := ctx) dst rel)) (rs # dst <- v) m.
+  Proof.
+    intros until v.
+    intros REL RHS NOTIN FIND i eq CONTAINS CATALOG.
+    destruct (peq i eqno).
+    - subst i.
+      rewrite eq_find_sound with (no := no) (eq0 := Equ dst sop args) in CATALOG by exact FIND.
+      clear FIND.
+      inv CATALOG.
+      unfold sem_eq.
+      simpl in *.
+      rewrite Regmap.gss.
+      apply sem_rhs_depends_on_args_only; auto.
+    - rewrite PSet.gaddo in CONTAINS by congruence.
+      eapply kill_reg_sound; eauto.
+  Qed.
+
+  Lemma sem_rhs_det:
+    forall {sop} {args} {rs} {m} {v} {v'},
+      sem_rhs sop args rs m v ->
+      sem_rhs sop args rs m v' ->
+      v = v'.
+  Proof.
+    intros until v'. intro SEMv.
+    destruct sop; simpl in *.
+    - destruct eval_operation.
+      congruence.
+      contradiction.
+    - destruct eval_addressing.
+      + destruct Mem.loadv; congruence.
+      + congruence.
+  Qed.
+
+  Lemma arglist_idem_write:
+    forall { A : Type} args (rs : Regmap.t A) dst,
+      (rs # dst <- (rs # dst)) ## args = rs ## args.
+  Proof.
+    induction args; trivial.
+    intros. cbn.
+    f_equal; trivial.
+    apply Regmap.gsident.
+  Qed.
+  
+  Lemma sem_rhs_idem_write:
+    forall sop args rs dst m v,
+      sem_rhs sop args rs m v ->
+      sem_rhs sop args (rs # dst <- (rs # dst)) m v.
+  Proof.
+    intros.
+    unfold sem_rhs in *.
+    rewrite arglist_idem_write.
+    assumption.
+  Qed.
+  
+  Theorem oper2_sound:
+    forall no dst sop args rel rs m v,
+      sem_rel rel rs m ->
+      not (In dst args) ->
+      sem_rhs sop args rs m v ->
+      sem_rel (oper2 (ctx := ctx) no dst sop args rel) (rs # dst <- v) m.
+  Proof.
+    unfold oper2.
+    intros until v.
+    intros REL NOTIN RHS.    
+    pose proof (eq_find_sound no (Equ dst sop args)) as EQ_FIND_SOUND.
+    destruct eq_find.
+    2: auto with cse3; fail.
+    specialize EQ_FIND_SOUND with (id := e).
+    intuition.
+    intros i eq CONTAINS.
+    destruct (peq i e).
+    { subst i.
+      rewrite H.
+      clear H.
+      intro Z.
+      inv Z.
+      unfold sem_eq.
+      simpl.
+      rewrite Regmap.gss.
+      apply sem_rhs_depends_on_args_only; auto.
+    }
+    intros INi.
+    destruct (PSet.contains rel e) eqn:CONTAINSe.
+    { pose proof (REL e (Equ dst sop args) CONTAINSe H) as RELe.
+      pose proof (REL i eq CONTAINS INi) as RELi.
+      destruct eq as [eq_lhs eq_sop eq_args | eq_cond eq_args]; cbn in *.
+      - replace v with (rs # dst) by (eapply sem_rhs_det; eassumption).
+        rewrite Regmap.gsident.
+        apply sem_rhs_idem_write.
+        assumption.
+      - replace v with (rs # dst) by (eapply sem_rhs_det; eassumption).
+        rewrite arglist_idem_write.
+        assumption.
+    }
+    rewrite PSet.gaddo in CONTAINS by congruence.
+    apply (kill_reg_sound rel rs m dst v REL i eq); auto.
+  Qed.
+
+  Hint Resolve oper2_sound : cse3.
+  
+  Theorem oper1_sound:
+    forall no dst sop args rel rs m v,
+      sem_rel rel rs m ->
+      sem_rhs sop args rs m v ->
+      sem_rel (oper1 (ctx := ctx) no dst sop args rel) (rs # dst <- v) m.
+  Proof.
+    intros.
+    unfold oper1.
+    destruct in_dec; auto with cse3.
+  Qed.
+
+  Hint Resolve oper1_sound : cse3.
+
+  Lemma rel_idem_replace:
+    forall rel rs r m,
+      sem_rel rel rs m ->
+      sem_rel rel rs # r <- (rs # r) m.
+  Proof.
+    intros until m.
+    intro REL.
+    unfold sem_rel, sem_eq, sem_rhs in *.
+    intros.
+    specialize REL with (i:=i) (eq0:=eq).
+    destruct eq as [lhs sop args | cond args] eqn:EQ.
+  * rewrite Regmap.gsident.
+    replace ((rs # r <- (rs # r)) ## args) with
+        (rs ## args).
+    { apply REL; auto. }
+    apply list_map_exten.
+    intros.
+    apply Regmap.gsident.
+    (* TODO simplify? *)
+  * rewrite arglist_idem_write.
+    auto.
+  Qed.
+      
+  Lemma move_sound :
+    forall no : node,
+    forall rel : RELATION.t,
+    forall src dst : reg,
+    forall rs m,
+      sem_rel rel rs m ->
+      sem_rel (move (ctx:=ctx) no src dst rel) (rs # dst <- (rs # src)) m.
+  Proof.
+    unfold move.
+    intros until m.
+    intro REL.
+    destruct (peq src dst).
+    { subst dst.
+      apply rel_idem_replace; auto.
+    }
+    pose proof (eq_find_sound no (Equ dst (SOp Omove) (src::nil))) as EQ_FIND_SOUND.
+    destruct eq_find.
+    - intros i eq CONTAINS.
+      destruct (peq i e).
+      + subst i.
+        rewrite (EQ_FIND_SOUND e) by trivial.
+        intro Z.
+        inv Z.
+        unfold sem_eq.
+        simpl.
+        destruct (peq src dst).
+        * subst dst.
+          reflexivity.
+        * rewrite Regmap.gss.
+          rewrite Regmap.gso by congruence.
+          reflexivity.
+      + intros.
+        rewrite PSet.gaddo in CONTAINS by congruence.
+        apply (kill_reg_sound rel rs m dst (rs # src) REL i); auto.
+    - apply kill_reg_sound; auto.
+  Qed.
+
+  Hint Resolve move_sound : cse3.
+  
+  Theorem oper_sound:
+    forall no dst sop args rel rs m v,
+      sem_rel rel rs m ->
+      sem_rhs sop args rs m v ->
+      sem_rel (oper (ctx := ctx) no dst sop args rel) (rs # dst <- v) m.
+  Proof.
+    intros until v.
+    intros REL RHS.
+    unfold oper.
+    destruct (is_smove sop).
+    - subst.
+      simpl in RHS.
+      destruct args. contradiction.
+      destruct args. 2: contradiction.
+      cbn in *.
+      subst.
+      rewrite <- (forward_move_sound rel rs m r) by auto.
+      apply move_sound; auto.
+    - destruct rhs_find as [src |] eqn:RHS_FIND.
+      + destruct (Compopts.optim_CSE3_glb tt).
+        * apply sem_rel_glb; split.
+          ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND.                                                                    
+             rewrite <- (sem_rhs_det SOUND RHS).
+             apply move_sound; auto.
+          ** apply sem_rel_glb; split.
+             *** apply oper1_sound; auto.
+             *** apply oper1_sound; auto.
+                 apply forward_move_rhs_sound; auto.
+        * apply sem_rel_glb; split.
+          ** apply oper1_sound; auto.
+          ** apply oper1_sound; auto.
+             apply forward_move_rhs_sound; auto.
+      + apply sem_rel_glb; split.
+        * apply oper1_sound; auto.
+        * apply oper1_sound; auto.
+          apply forward_move_rhs_sound; auto.
+  Qed.
+
+  Hint Resolve oper_sound : cse3.
+
+
+  Theorem clever_kill_store_sound:
+    forall chunk addr args a src rel rs m m',
+      sem_rel rel rs m ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs # src) = Some m' ->
+      sem_rel (clever_kill_store (ctx:=ctx) chunk addr args src rel) rs m'.
+  Proof.
+    unfold clever_kill_store.
+    intros until m'. intros REL ADDR STORE i eq CONTAINS CATALOG.
+    autorewrite with pset in CONTAINS.
+    destruct (PSet.contains rel i) eqn:RELi; simpl in CONTAINS.
+    2: discriminate.
+    rewrite CATALOG in CONTAINS.
+    unfold sem_rel in REL.
+    specialize REL with (i := i) (eq0 := eq).
+    destruct eq as [eq_lhs eq_sop eq_args | eq_cond eq_args]; simpl in *.
+  * unfold sem_eq in *.
+    simpl in *.
+    destruct eq_sop as [op' | chunk' addr']; simpl.
+    - rewrite op_valid_pointer_eq with (m2 := m).
+      + cbn in *.
+        apply REL; auto.
+      + eapply store_preserves_validity; eauto.
+    - simpl in REL.
+      erewrite ctx_kill_store_has_depends_on_store in CONTAINS by eauto.
+      simpl in CONTAINS.
+      rewrite negb_true_iff in CONTAINS.
+      destruct (eval_addressing genv sp addr' rs ## eq_args) as [a'|] eqn:ADDR'.
+      + erewrite may_overlap_sound with (chunk:=chunk) (addr:=addr) (args:=args) (chunk':=chunk') (addr':=addr') (args':=eq_args); try eassumption.
+        apply REL; auto.
+      + apply REL; auto.
+  * rewrite cond_valid_pointer_eq with (m2 := m).
+    auto.
+    eapply store_preserves_validity; eauto.
+  Qed.
+
+  Hint Resolve clever_kill_store_sound : cse3.
+
+  Theorem store2_sound:
+    forall chunk addr args a src rel rs m m',
+      sem_rel rel rs m ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs # src) = Some m' ->
+      sem_rel (store2 (ctx:=ctx) chunk addr args src rel) rs m'.
+  Proof.
+    unfold store2.
+    intros.
+    destruct (Compopts.optim_CSE3_alias_analysis tt); eauto with cse3.
+  Qed.
+  
+  Hint Resolve store2_sound : cse3.
+
+  Theorem store1_sound:
+    forall no chunk addr args a src rel tenv rs m m',
+      sem_rel rel rs m ->
+      wt_regset tenv rs ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs#src) = Some m' ->
+      sem_rel (store1 (ctx:=ctx) no chunk addr args src (tenv src) rel) rs m'.
+  Proof.
+    unfold store1.
+    intros until m'.
+    intros REL WT ADDR STORE.
+    assert (sem_rel (store2 (ctx:=ctx) chunk addr args src rel) rs m') as REL' by eauto with cse3.
+    destruct loadv_storev_compatible_type eqn:COMPATIBLE.
+    2: auto; fail.
+    destruct eq_find as [eq_id | ] eqn:FIND.
+    2: auto; fail.
+    intros i eq CONTAINS CATALOG.
+    destruct (peq i eq_id).
+    { subst i.
+      rewrite eq_find_sound with (no:=no) (eq0:=Equ src (SLoad chunk addr) args) in CATALOG; trivial.
+      inv CATALOG.
+      unfold sem_eq.
+      simpl.
+      rewrite ADDR.
+      rewrite loadv_storev_really_same with (m1:=m) (v:=rs#src) (ty:=(tenv src)); trivial.
+    }
+    unfold sem_rel in REL'.
+    rewrite PSet.gaddo in CONTAINS by congruence.
+    eauto.
+  Qed.
+  
+  Hint Resolve store1_sound : cse3.
+    
+    
+  Theorem store_sound:
+    forall no chunk addr args a src rel tenv rs m m',
+      sem_rel rel rs m ->
+      wt_regset tenv rs ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs#src) = Some m' ->
+      sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'.
+  Proof.
+    unfold store.
+    intros until m'.
+    intros REL WT ADDR STORE.
+    apply sem_rel_glb; split.
+    - apply sem_rel_glb; split.
+      * apply store1_sound with (a := a) (m := m); trivial.
+      * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+        apply store1_sound with (a := a) (m := m); trivial.
+    - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
+      apply sem_rel_glb; split.
+      * apply store1_sound with (a := a) (m := m); trivial.
+      * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+        apply store1_sound with (a := a) (m := m); trivial.
+  Qed.
+
+  Hint Resolve store_sound : cse3.
+
+  Lemma kill_builtin_res_sound:
+    forall res (m : mem) (rs : regset) vres (rel : RELATION.t)
+           (REL : sem_rel rel rs m),
+      (sem_rel (kill_builtin_res (ctx:=ctx) res rel)
+               (regmap_setres res vres rs) m).
+  Proof.
+    destruct res; simpl; intros; trivial.
+    apply kill_reg_sound; trivial.
+  Qed.
+
+  Hint Resolve kill_builtin_res_sound : cse3.
+
+  Lemma top_sound:
+    forall rs m, (sem_rel RELATION.top rs m).
+  Proof.
+    unfold RELATION.top, sem_rel.
+    intros.
+    rewrite PSet.gempty in H.
+    discriminate.
+  Qed.
+
+  Hint Resolve top_sound : cse3.
+
+  Lemma external_call_sound:
+    forall ge ef (rel : RELATION.t) (m m' : mem) (rs : regset) vargs t vres
+           (REL : sem_rel rel rs m)
+           (CALL : external_call ef ge vargs m t vres m'),
+      sem_rel (apply_external_call (ctx:=ctx) ef rel) rs m'.
+  Proof.
+    destruct ef; intros; simpl in *.
+    all: eauto using kill_mem_sound.
+    all: unfold builtin_or_external_sem in *.
+    1, 2, 3, 5, 6: destruct (Compopts.optim_CSE3_across_calls tt).
+    all: eauto using kill_mem_sound, top_sound.
+    1, 2, 3: destruct (Builtins.lookup_builtin_function name sg).
+    all: eauto using kill_mem_sound, top_sound.
+    all: inv CALL; eauto using kill_mem_sound.
+  Qed.
+
+  Hint Resolve external_call_sound : cse3.
+
+
+  Definition sem_rel_b (rel : RB.t) (rs : regset) (m : mem) :=
+    match rel with
+    | None => False
+    | Some rel => sem_rel rel rs m
+    end.
+  
+  Lemma apply_cond1_sound :
+    forall pc cond args rel rs m
+      (COND : (eval_condition cond (rs ## args) m) = Some true)
+      (REL : (sem_rel rel rs m)),
+      (sem_rel_b (apply_cond1 (ctx:=ctx) pc cond args rel) rs m).
+  Proof.
+    intros.
+    unfold apply_cond1.
+    destruct eq_find as [eq_id | ] eqn:FIND; cbn.
+    2: assumption.
+    destruct PSet.contains eqn:CONTAINS.
+    {
+      pose proof (eq_find_sound pc (Cond (negate_condition cond) args) eq_id FIND) as FIND_SOUND.
+      unfold sem_rel in REL.
+      pose proof (REL eq_id (Cond (negate_condition cond) args) CONTAINS FIND_SOUND) as REL_id.
+      cbn in REL_id.
+      rewrite eval_negate_condition in REL_id.
+      rewrite COND in REL_id.
+      discriminate.
+    }
+    exact REL.
+  Qed.
+  
+  Lemma apply_cond0_sound :
+    forall pc cond args rel rs m
+      (COND : (eval_condition cond (rs ## args) m) = Some true)
+      (REL : (sem_rel rel rs m)),
+      (sem_rel (apply_cond0 (ctx:=ctx) pc cond args rel) rs m).
+  Proof.
+    intros.
+    unfold apply_cond0.
+    destruct eq_find as [eq_id | ] eqn:FIND; cbn.
+    2: assumption.
+    pose proof (eq_find_sound pc (Cond cond args) eq_id FIND) as FIND_SOUND.
+    intros eq_id' eq' CONTAINS CATALOG.
+    destruct (peq eq_id eq_id').
+    { subst eq_id'.
+      unfold sem_eq.
+      rewrite FIND_SOUND in CATALOG.
+      inv CATALOG.
+      assumption.
+    }
+    rewrite PSet.gaddo in CONTAINS by assumption.
+    unfold sem_rel in REL.
+    eapply REL; eassumption.
+  Qed.
+
+  Lemma apply_cond_sound :
+    forall pc cond args rel rs m
+      (COND : (eval_condition cond (rs ## args) m) = Some true)
+      (REL : (sem_rel rel rs m)),
+      (sem_rel_b (apply_cond (ctx:=ctx) pc cond args rel) rs m).
+  Proof.
+    unfold apply_cond.
+    intros.
+    pose proof (apply_cond1_sound pc cond args rel rs m COND REL) as SOUND1.
+    destruct apply_cond1 eqn:COND1.
+    { apply apply_cond0_sound; auto. }
+    exact SOUND1.
+  Qed.
+  
+  (*
+  Section INDUCTIVENESS.
+    Variable fn : RTL.function.
+    Variable tenv : typing_env.
+    Variable inv: invariants.
+    
+    Definition is_inductive_step (pc pc' : node) :=
+      forall instr,
+        PTree.get pc (fn_code fn) = Some instr ->
+        In pc' (successors_instr instr) ->
+        RB.ge (PMap.get pc' inv)
+              (match apply_instr' (ctx:=ctx) tenv (fn_code fn) pc
+                                  (PMap.get pc inv) with
+               | Abst_same rel' => rel'
+              end).
+
+    Definition is_inductive_allstep :=
+      forall pc pc', is_inductive_step pc pc'.
+
+    Lemma checked_is_inductive_allstep:
+      (check_inductiveness (ctx:=ctx) fn tenv inv) = true ->
+      is_inductive_allstep.
+    Proof.
+      unfold check_inductiveness, is_inductive_allstep, is_inductive_step.
+      rewrite andb_true_iff.
+      rewrite PTree_Properties.for_all_correct.
+      intros (ENTRYPOINT & ALL).
+      intros until instr.
+      intros INSTR IN_SUCC.
+      specialize ALL with (x := pc) (a := instr).
+      pose proof (ALL INSTR) as AT_PC.
+      destruct (inv # pc).
+      2: apply RB.ge_bot.
+      unfold apply_instr'.
+      rewrite INSTR.
+      destruct apply_instr.
+      { (* same *)
+        rewrite List.forallb_forall in AT_PC.
+        apply relb_leb_correct.
+        auto.
+      }
+    Qed.
+    
+    Lemma checked_is_inductive_entry:
+      (check_inductiveness (ctx:=ctx) fn tenv inv) = true ->
+      inv # (fn_entrypoint fn) = Some RELATION.top.
+    Proof.
+      unfold check_inductiveness, is_inductive_allstep, is_inductive_step.
+      rewrite andb_true_iff.
+      intros (ENTRYPOINT & ALL).
+      apply RB.beq_correct in ENTRYPOINT.
+      unfold RB.eq, RELATION.eq in ENTRYPOINT.
+      destruct (inv # (fn_entrypoint fn)) as [rel | ].
+      2: contradiction.
+      f_equal.
+      symmetry.
+      assumption.
+    Qed.
+  End INDUCTIVENESS.
+
+  Hint Resolve checked_is_inductive_allstep checked_is_inductive_entry : cse3.
+  *)
+End SOUNDNESS.
diff --git a/backend/CSE3proof.v b/backend/CSE3proof.v
new file mode 100644
index 00000000..a601d5d5
--- /dev/null
+++ b/backend/CSE3proof.v
@@ -0,0 +1,1221 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+Replace available expressions by the register containing their value.
+
+Proofs.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE3 CSE3analysis CSE3analysisproof.
+Require Import RTLtyping.
+
+Section PARAMS.
+  Variable params : cse3params.
+  
+Definition param_match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => param_transf_fundef params f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, param_transf_program params p = OK tp -> param_match_prog p tp.
+Proof.
+  intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSF: param_match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Section SOUNDNESS.
+Variable sp : val.
+Variable ctx : eq_context.
+
+Definition sem_rel_b (rel : RB.t) (rs : regset) (m : mem) :=
+  match rel with
+  | None => False
+  | Some rel => sem_rel (ctx:=ctx) (genv:=ge) (sp:=sp) rel rs m
+  end.
+
+Lemma forward_move_b_sound :
+  forall rel rs m x,
+    (sem_rel_b rel rs m) ->
+    rs # (forward_move_b (ctx := ctx) rel x) = rs # x.
+Proof.
+    destruct rel as [rel | ]; simpl; intros.
+    2: contradiction.
+    eapply forward_move_sound; eauto.
+  Qed.
+
+  Lemma forward_move_l_b_sound :
+    forall rel rs m x,
+      (sem_rel_b rel rs m) ->
+      rs ## (forward_move_l_b (ctx := ctx) rel x) = rs ## x.
+  Proof.
+    destruct rel as [rel | ]; simpl; intros.
+    2: contradiction.
+    eapply forward_move_l_sound; eauto.
+  Qed.
+
+  Definition fmap_sem (fmap : PMap.t RB.t) (pc : node) (rs : regset) (m : mem) :=
+    sem_rel_b (PMap.get pc fmap) rs m.
+  
+  Lemma subst_arg_ok:
+    forall invariants,
+    forall pc,
+    forall rs,
+    forall m,
+    forall arg,
+    forall (SEM : fmap_sem invariants pc rs m),
+      rs # (subst_arg (ctx:=ctx) invariants pc arg) = rs # arg.
+  Proof.
+    intros.
+    apply forward_move_b_sound with (m:=m).
+    assumption.
+  Qed.
+  
+  Lemma subst_args_ok:
+    forall invariants,
+    forall pc,
+    forall rs,
+    forall m,
+    forall args,
+    forall (SEM : fmap_sem invariants pc rs m),
+      rs ## (subst_args (ctx:=ctx) invariants pc args) = rs ## args.
+  Proof.
+    intros.
+    apply forward_move_l_b_sound with (m:=m).
+    assumption.
+  Qed.
+End SOUNDNESS.
+
+Lemma functions_translated:
+  forall (v: val) (f: RTL.fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf,
+    Genv.find_funct tge v = Some tf /\ param_transf_fundef params f = OK tf.
+Proof.
+  apply (Genv.find_funct_transf_partial TRANSF).
+Qed.
+
+Lemma function_ptr_translated:
+  forall (b: block) (f: RTL.fundef),
+  Genv.find_funct_ptr ge b = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge b = Some tf /\ param_transf_fundef params f = OK tf.
+Proof.
+  apply (Genv.find_funct_ptr_transf_partial TRANSF).
+Qed.
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof.
+  apply (Genv.find_symbol_match TRANSF).
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  apply (Genv.senv_match TRANSF).
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, param_transf_fundef params f = OK tf -> funsig tf = funsig f.
+Proof.
+  destruct f; simpl; intros.
+  - monadInv H.
+    monadInv EQ.
+    destruct preanalysis as [invariants hints].
+    destruct check_inductiveness.
+    2: discriminate.
+    inv EQ1.
+    reflexivity.
+  - monadInv H.
+    reflexivity.
+Qed.
+
+Lemma stacksize_preserved:
+  forall f tf, param_transf_function params f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma params_preserved:
+  forall f tf, param_transf_function params f = OK tf -> fn_params tf = fn_params f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma entrypoint_preserved:
+  forall f tf, param_transf_function params f = OK tf -> fn_entrypoint tf = fn_entrypoint f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma sig_preserved2:
+  forall f tf, param_transf_function params f = OK tf -> fn_sig tf = fn_sig f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma transf_function_is_typable:
+  forall f tf, param_transf_function params f = OK tf ->
+               exists tenv, type_function f = OK tenv.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  exists x.
+  assumption.
+Qed.
+Lemma transf_function_invariants_inductive:
+  forall f tf tenv, param_transf_function params f = OK tf ->
+    type_function f = OK tenv ->
+    check_inductiveness (ctx:=(context_from_hints (snd (preanalysis tenv f))))
+                        f tenv (fst (preanalysis tenv f)) = true.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  replace x with tenv in * by congruence.
+  clear x.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness; trivial; discriminate.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+    find_function ge ros rs = Some fd ->
+    exists tfd,
+      find_function tge ros rs = Some tfd /\ param_transf_fundef params fd = OK tfd.
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Inductive match_stackframes: list stackframe -> list stackframe -> signature -> Prop :=
+  | match_stackframes_nil: forall sg,
+      sg.(sig_res) = Tint ->
+      match_stackframes nil nil sg
+  | match_stackframes_cons:
+      forall res f sp pc rs s tf ts sg tenv
+        (STACKS: match_stackframes s ts (fn_sig tf))
+        (FUN: param_transf_function params f = OK tf)
+        (WTF: type_function f = OK tenv)
+        (WTRS: wt_regset tenv rs)
+        (WTRES: tenv res = proj_sig_res sg)
+        (REL: forall m vres,
+            sem_rel_b sp (context_from_hints (snd (preanalysis tenv f)))
+                      ((fst (preanalysis tenv f))#pc) (rs#res <- vres) m),
+
+      match_stackframes
+        (Stackframe res f sp pc rs :: s)
+        (Stackframe res tf sp pc rs :: ts)
+        sg.
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s f sp pc rs m ts tf tenv
+        (STACKS: match_stackframes s ts (fn_sig tf))
+        (FUN: param_transf_function params f = OK tf)
+        (WTF: type_function f = OK tenv)
+        (WTRS: wt_regset tenv rs)
+        (REL: sem_rel_b sp (context_from_hints (snd (preanalysis tenv f))) ((fst (preanalysis tenv f))#pc) rs m),
+      match_states (State s f sp pc rs m)
+                   (State ts tf sp pc rs m)
+  | match_states_call:
+      forall s f args m ts tf
+        (STACKS: match_stackframes s ts (funsig tf))
+        (FUN: param_transf_fundef params f = OK tf)
+        (WTARGS: Val.has_type_list args (sig_args (funsig tf))),
+      match_states (Callstate s f args m)
+                   (Callstate ts tf args m)
+  | match_states_return:
+      forall s res m ts sg
+        (STACKS: match_stackframes s ts sg)
+        (WTRES: Val.has_type res (proj_sig_res sg)),
+      match_states (Returnstate s res m)
+                   (Returnstate ts res m).
+
+Lemma match_stackframes_change_sig:
+  forall s ts sg sg',
+  match_stackframes s ts sg ->
+  sg'.(sig_res) = sg.(sig_res) ->
+  match_stackframes s ts sg'.
+Proof.
+  intros. inv H.
+  constructor. congruence.
+  econstructor; eauto.
+  unfold proj_sig_res in *. rewrite H0; auto.
+Qed.
+
+Lemma transf_function_at:
+  forall f tf pc tenv instr
+    (TF : param_transf_function params f = OK tf)
+    (TYPE : type_function f = OK tenv)
+    (PC : (fn_code f) ! pc = Some instr),
+    (fn_code tf) ! pc = Some (param_transf_instr
+       (ctx := (context_from_hints (snd (preanalysis tenv f))))
+       params (fst (preanalysis tenv f))
+       pc instr).
+Proof.
+  intros.
+  unfold transf_function in TF.
+  monadInv TF.
+  replace x with tenv in * by congruence.
+  clear EQ.
+  destruct (preanalysis tenv f) as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  simpl.
+  rewrite PTree.gmap.
+  rewrite PC.
+  reflexivity.
+Qed.
+
+Ltac TR_AT := erewrite transf_function_at by eauto.
+
+Hint Resolve wt_instrs type_function_correct : wt.
+
+Lemma wt_undef :
+  forall tenv rs dst,
+    wt_regset tenv rs ->
+    wt_regset tenv rs # dst <- Vundef.
+Proof.
+  unfold wt_regset.
+  intros.
+  destruct (peq r dst).
+  { subst dst.
+    rewrite Regmap.gss.
+    constructor.
+  }
+  rewrite Regmap.gso by congruence.
+  auto.
+Qed.
+
+Lemma rel_ge:
+  forall inv inv'
+         (GE : RELATION.ge inv' inv)
+         ctx sp rs m
+         (REL: sem_rel (genv:=ge) (sp:=sp) (ctx:=ctx) inv rs m),
+  sem_rel (genv:=ge) (sp:=sp) (ctx:=ctx) inv' rs m.
+Proof.
+  unfold sem_rel, RELATION.ge.
+  intros.
+  apply (REL i); trivial.
+  eapply HashedSet.PSet.is_subset_spec1; eassumption.
+Qed.
+
+Hint Resolve rel_ge : cse3.
+
+Lemma relb_ge:
+  forall inv inv'
+         (GE : RB.ge inv' inv)
+         ctx sp rs m
+         (REL: sem_rel_b sp ctx inv rs m),
+  sem_rel_b sp ctx inv' rs m.
+Proof.
+  intros.
+  destruct inv; cbn in *.
+  2: contradiction.
+  destruct inv'; cbn in *.
+  2: assumption.
+  eapply rel_ge; eassumption.
+Qed.
+
+Hint Resolve relb_ge : cse3.
+
+Lemma sem_rhs_sop :
+  forall sp op rs args m v,
+  eval_operation ge sp op rs ## args m = Some v ->
+  sem_rhs (genv:=ge) (sp:=sp) (SOp op) args rs m v.
+Proof.
+  intros. simpl.
+  rewrite H.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sop : cse3.
+
+Lemma sem_rhs_sload :
+  forall sp chunk addr rs args m a v,
+  eval_addressing ge sp addr rs ## args = Some a ->
+  Mem.loadv chunk m a = Some v ->
+  sem_rhs (genv:=ge) (sp:=sp) (SLoad chunk addr) args rs m v.
+Proof.
+  intros. simpl.
+  rewrite H. rewrite H0.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sload : cse3.
+
+Lemma sem_rhs_sload_notrap1 :
+  forall sp chunk addr rs args m,
+  eval_addressing ge sp addr rs ## args = None ->
+  sem_rhs (genv:=ge) (sp:=sp) (SLoad chunk addr) args rs m Vundef.
+Proof.
+  intros. simpl.
+  rewrite H.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sload_notrap1 : cse3.
+
+Lemma sem_rhs_sload_notrap2 :
+  forall sp chunk addr rs args m a,
+  eval_addressing ge sp addr rs ## args = Some a ->
+  Mem.loadv chunk m a = None ->
+  sem_rhs (genv:=ge) (sp:=sp) (SLoad chunk addr) args rs m Vundef.
+Proof.
+  intros. simpl.
+  rewrite H. rewrite H0.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sload_notrap2 : cse3.
+
+Lemma sem_rel_top:
+  forall ctx sp rs m, sem_rel (genv:=ge) (sp:=sp) (ctx:=ctx) RELATION.top rs m.
+Proof.
+  unfold sem_rel, RELATION.top.
+  intros.
+  rewrite HashedSet.PSet.gempty in *.
+  discriminate.
+Qed.
+
+Hint Resolve sem_rel_top : cse3.
+
+Lemma sem_rel_b_top:
+  forall ctx sp rs m, sem_rel_b sp ctx (Some RELATION.top) rs m.
+Proof.
+  intros. simpl.
+  apply sem_rel_top.
+Qed.
+
+Hint Resolve sem_rel_b_top : cse3.
+
+(*
+Ltac IND_STEP :=
+        match goal with
+        REW: (fn_code ?fn) ! ?mpc = Some ?minstr
+      |-
+        sem_rel_b ?sp (context_from_hints (snd (preanalysis ?tenv ?fn))) ((fst (preanalysis ?tenv ?fn)) # ?mpc') ?rs ?m =>
+        assert (is_inductive_allstep (ctx:= (context_from_hints (snd (preanalysis tenv fn)))) fn tenv (fst  (preanalysis tenv fn))) as IND by
+        (apply checked_is_inductive_allstep;
+          eapply transf_function_invariants_inductive; eassumption);
+        unfold is_inductive_allstep, is_inductive_step, apply_instr' in IND;
+        specialize IND with (pc:=mpc) (pc':=mpc') (instr:=minstr);
+        simpl in IND;
+        rewrite REW in IND;
+        simpl in IND;
+        destruct ((fst (preanalysis tenv fn)) # mpc') as [zinv' | ];
+        destruct ((fst (preanalysis tenv fn)) # mpc) as [zinv | ];
+        simpl in *;
+        intuition;
+        eapply rel_ge; eauto with cse3 (* ; for printing
+        idtac mpc mpc' fn minstr *)
+      end.
+ *)
+  
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 -> 
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS.
+  all: try set (ctx := (context_from_hints (snd (preanalysis tenv f)))) in *.
+  all: try set (invs := (fst (preanalysis tenv f))) in *.
+  - (* Inop *)
+    exists (State ts tf sp pc' rs m). split.
+    + apply exec_Inop; auto.
+      TR_AT. reflexivity.
+    + econstructor; eauto.
+      
+      (* BEGIN INVARIANT *)
+      fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      unfold sem_rel_b.
+      apply (rel_ge inv_pc inv_pc'); auto.
+      (* END INVARIANT *)
+      
+  - (* Iop *)
+    exists (State ts tf sp pc' (rs # res <- v) m). split.
+    + pose (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iop op args res pc')) as instr'.
+      assert (instr' = (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iop op args res pc'))) by reflexivity.
+      unfold param_transf_instr, find_op_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SOp op)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct (if (negb params.(cse3_operations) || (negb (params.(cse3_trivial_ops))) && (is_trivial_op op))
+               then None
+               else
+                rhs_find pc (SOp op)
+                  (subst_args (fst (preanalysis tenv f)) pc args) t) eqn:FIND.
+        ** destruct (negb params.(cse3_operations) || ((negb (params.(cse3_trivial_ops))) && (is_trivial_op op))). discriminate.
+           apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND.
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iop with (op := op) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)).
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_operation_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iop with (op := op) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)).
+        TR_AT.
+        { subst instr'.
+          rewrite if_same in H1.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_operation_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * eapply wt_exec_Iop with (f:=f); try eassumption.
+        eauto with wt.
+      *
+        (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
+  - (* Iload *)
+    exists (State ts tf sp pc' (rs # dst <- v) m). split.
+    + pose (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iload trap chunk addr args dst pc')) as instr'.
+      assert (instr' = (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iload trap chunk addr args dst pc'))) by reflexivity.
+      unfold param_transf_instr, find_load_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SLoad chunk addr)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct rhs_find eqn:FIND.
+        ** apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND. (* ADDR *)
+           rewrite H1 in FIND_SOUND. (* LOAD *)
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iload with (trap := trap) (chunk := chunk) (a := a) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iload with (chunk := chunk) (trap := trap) (addr := addr) (a := a) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * eapply wt_exec_Iload with (f:=f); try eassumption.
+        eauto with wt.
+      *         (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
+        
+  - (* Iload notrap1 *)
+    exists (State ts tf sp pc' (rs # dst <- Vundef) m). split.
+    + pose (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc')) as instr'.
+      assert (instr' = (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc'))) by reflexivity.
+      unfold param_transf_instr, find_load_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SLoad chunk addr)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct rhs_find eqn:FIND.
+        ** apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND. (* ADDR *)
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * apply wt_undef; assumption.
+      *          (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
+        
+  - (* Iload notrap2 *)
+    exists (State ts tf sp pc' (rs # dst <- Vundef) m). split.
+    + pose (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc')) as instr'.
+      assert (instr' = (param_transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) params (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc'))) by reflexivity.
+      unfold param_transf_instr, find_load_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SLoad chunk addr)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct rhs_find eqn:FIND.
+        ** apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND. (* ADDR *)
+           rewrite H1 in FIND_SOUND. (* LOAD *)
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iload_notrap2 with (chunk := chunk) (a := a) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (a := a) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * apply wt_undef; assumption.
+      *  (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
+
+  - (* Istore *)
+    exists (State ts tf sp pc' rs m'). split.
+    + eapply exec_Istore with (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args))
+      (src := (subst_arg (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc src)) ; try eassumption.
+      * TR_AT. reflexivity.
+      * rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+        rewrite eval_addressing_preserved with (ge1 := ge) by exact symbols_preserved.
+        eassumption.
+      * rewrite subst_arg_ok with (sp:=sp) (m:=m) by trivial.
+        assumption.
+    + econstructor; eauto.
+  (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply store_sound with (a0:=a) (m0:=m); unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
+      
+  - (* Icall *)
+    destruct (find_function_translated ros rs fd H0) as [tfd [HTFD1 HTFD2]].
+    econstructor. split.
+    + eapply exec_Icall; try eassumption.
+      * TR_AT. reflexivity.
+      * apply sig_preserved; auto.
+    + rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+      assert (wt_instr f tenv (Icall (funsig fd) ros args res pc')) as WTcall by eauto with wt.
+      inv WTcall.
+      constructor; trivial.
+      * econstructor; eauto.
+        ** rewrite sig_preserved with (f:=fd); assumption.
+        ** intros.
+
+            (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      (* END INVARIANT *)
+      { apply kill_reg_sound; unfold ctx; eauto with cse3.
+           eapply kill_mem_sound; unfold ctx; eauto with cse3. }
+      * rewrite sig_preserved with (f:=fd) by trivial.
+        rewrite <- H7.
+        apply wt_regset_list; auto.
+  - (* Itailcall *)
+    destruct (find_function_translated ros rs fd H0) as [tfd [HTFD1 HTFD2]].
+    econstructor. split.
+    + eapply exec_Itailcall; try eassumption.
+      * TR_AT. reflexivity.
+      * apply sig_preserved; auto.
+      * rewrite stacksize_preserved with (f:=f); eauto.
+    + rewrite subst_args_ok with (m:=m) (sp := (Vptr stk Ptrofs.zero)) by trivial.
+      assert (wt_instr f tenv (Itailcall (funsig fd) ros args)) as WTcall by eauto with wt.
+      inv WTcall.
+      constructor; trivial.
+      * rewrite sig_preserved with (f:=fd) by trivial.
+        inv STACKS.
+        ** econstructor; eauto.
+           rewrite H7.
+           rewrite <- sig_preserved2 with (tf:=tf) by trivial.
+           assumption.
+        ** econstructor; eauto.
+           unfold proj_sig_res in *.
+           rewrite H7.
+           rewrite WTRES.
+           rewrite sig_preserved2 with (f:=f) by trivial.
+           reflexivity.
+      * rewrite sig_preserved with (f:=fd) by trivial.
+        rewrite <- H6.
+        apply wt_regset_list; auto.
+  - (* Ibuiltin *)
+    econstructor. split.
+    + eapply exec_Ibuiltin; try eassumption.
+      * TR_AT. reflexivity.
+      * eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+      * eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + econstructor; eauto.
+      * eapply wt_exec_Ibuiltin with (f:=f); eauto with wt.
+      *   (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      (* END INVARIANT *)
+
+        apply kill_builtin_res_sound; unfold ctx; eauto with cse3.
+        eapply external_call_sound; unfold ctx; eauto with cse3.
+        
+  - (* Icond *)
+    destruct (find_cond_in_fmap (ctx := ctx) params invs pc cond args) as [bfound | ] eqn:FIND_COND.                                                                    
+    + econstructor; split.
+      * eapply exec_Inop; try eassumption.
+        TR_AT. unfold param_transf_instr. fold invs. fold ctx. rewrite FIND_COND. reflexivity.
+      * replace bfound with b.
+        { econstructor; eauto.
+          (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me as [IND_so [IND_not ZOT]].
+      clear ZOT.
+      rewrite relb_leb_correct in IND_so.
+      rewrite relb_leb_correct in IND_not.
+      
+      destruct b.
+      { eapply relb_ge. eassumption. apply apply_cond_sound; auto. }
+      eapply relb_ge. eassumption. apply apply_cond_sound; trivial.
+      rewrite eval_negate_condition.
+      rewrite H0.
+      reflexivity.
+      (* END INVARIANT *)
+        }
+        unfold sem_rel_b in REL.
+        destruct (invs # pc) as [rel | ] eqn:FIND_REL.
+        2: contradiction.
+        pose proof (is_condition_present_sound pc rel cond args rs m REL) as COND_PRESENT_TRUE.
+        pose proof (is_condition_present_sound pc rel (negate_condition cond) args rs m REL) as COND_PRESENT_FALSE.
+        rewrite eval_negate_condition in COND_PRESENT_FALSE.
+        unfold find_cond_in_fmap in FIND_COND.
+        change (@PMap.get (option RELATION.t)) with (@Regmap.get RB.t) in FIND_COND.
+        rewrite FIND_REL in FIND_COND.
+        destruct (params.(cse3_conditions)).
+        2: discriminate.
+        destruct (is_condition_present pc rel cond args).
+        { rewrite COND_PRESENT_TRUE in H0 by trivial.
+          congruence.
+        }
+        destruct (is_condition_present pc rel (negate_condition cond) args).
+        { destruct (eval_condition cond rs ## args m) as [b0 | ].
+          2: discriminate.
+          inv H0.
+          cbn in COND_PRESENT_FALSE.
+          intuition.
+          inv H0.
+          inv FIND_COND.
+          destruct b; trivial; cbn in H2; discriminate.
+        }
+        clear COND_PRESENT_TRUE COND_PRESENT_FALSE.
+        pose proof (is_condition_present_sound pc rel cond (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) rs m REL) as COND_PRESENT_TRUE.
+        pose proof (is_condition_present_sound pc rel (negate_condition cond) (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) rs m REL) as COND_PRESENT_FALSE.
+        rewrite eval_negate_condition in COND_PRESENT_FALSE.
+        
+        destruct is_condition_present.
+        { rewrite subst_args_ok with (sp:=sp) (m:=m) in COND_PRESENT_TRUE.
+          { rewrite COND_PRESENT_TRUE in H0 by trivial.
+            congruence.
+          }
+          unfold fmap_sem.
+          unfold sem_rel_b.
+          fold invs.
+          rewrite FIND_REL.
+          exact REL.
+        }
+        destruct is_condition_present.
+        { rewrite subst_args_ok with (sp:=sp) (m:=m) in COND_PRESENT_FALSE.
+          { destruct (eval_condition cond rs ## args m) as [b0 | ].
+            2: discriminate.
+            inv H0.
+            cbn in COND_PRESENT_FALSE.
+            intuition.
+            inv H0.
+            inv FIND_COND.
+            destruct b; trivial; cbn in H2; discriminate.
+          }
+          unfold fmap_sem.
+          unfold sem_rel_b.
+          fold invs.
+          rewrite FIND_REL.
+          exact REL.          
+        }
+        discriminate.
+   + econstructor; split.        
+      * eapply exec_Icond with (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); try eassumption.
+        ** TR_AT. unfold param_transf_instr. fold invs. fold ctx.
+           rewrite FIND_COND.
+           reflexivity.
+        ** rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+           eassumption.
+        ** reflexivity.
+     * econstructor; eauto.
+
+          (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me as [IND_so [IND_not ZOT]].
+      clear ZOT.
+      rewrite relb_leb_correct in IND_so.
+      rewrite relb_leb_correct in IND_not.
+      
+      destruct b.
+      { eapply relb_ge. eassumption. apply apply_cond_sound; auto. }
+      eapply relb_ge. eassumption. apply apply_cond_sound; trivial.
+      rewrite eval_negate_condition.
+      rewrite H0.
+      reflexivity.
+      (* END INVARIANT *)
+      
+  - (* Ijumptable *)
+    econstructor. split.
+    + eapply exec_Ijumptable with (arg := (subst_arg (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc arg)); try eassumption.
+      * TR_AT. reflexivity.
+      * rewrite subst_arg_ok with (sp:=sp) (m:=m) by trivial.
+        assumption.
+    + econstructor; eauto.
+      
+      (* BEGIN INVARIANT *)
+      fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      rewrite forallb_forall in IND_step_me.
+      assert (RB.ge (invs # pc') (Some inv_pc)) as GE.
+      {
+        apply relb_leb_correct.
+        specialize IND_step_me with (pc', Some inv_pc).
+        apply IND_step_me.
+        apply (in_map (fun pc'0 : node => (pc'0, Some inv_pc))).
+        eapply list_nth_z_in.
+        eassumption.
+      }
+      destruct (invs # pc'); cbn in *.
+      2: contradiction.
+      eapply rel_ge; eauto.
+      (* END INVARIANT *)
+
+  - (* Ireturn *)
+    destruct or as [arg | ].
+    -- econstructor. split.
+       + eapply exec_Ireturn with (or := Some (subst_arg (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc arg)).
+         * TR_AT. reflexivity.
+         * rewrite stacksize_preserved with (f:=f); eauto.
+       + simpl.
+         rewrite subst_arg_ok with (sp:=(Vptr stk Ptrofs.zero)) (m:=m) by trivial.
+         econstructor; eauto.
+         apply type_function_correct in WTF.
+         apply wt_instrs with (pc:=pc) (instr:=(Ireturn (Some arg))) in WTF.
+         2: assumption.
+         inv WTF.
+         rewrite sig_preserved2 with (f:=f) by assumption.
+         rewrite <- H3.
+         unfold wt_regset in WTRS.
+         apply WTRS.
+    -- econstructor. split.
+       + eapply exec_Ireturn; try eassumption.
+         * TR_AT; reflexivity.
+         * rewrite stacksize_preserved with (f:=f); eauto.
+       + econstructor; eauto.
+         simpl. trivial.
+  - (* Callstate internal *)
+    monadInv FUN.
+    rename x into tf.
+    destruct (transf_function_is_typable f tf EQ) as [tenv TENV].
+    econstructor; split.
+    + apply exec_function_internal.
+      rewrite stacksize_preserved with (f:=f); eauto.
+    + rewrite params_preserved with (tf:=tf) (f:=f) by assumption.
+      rewrite entrypoint_preserved with (tf:=tf) (f:=f) by assumption.
+      econstructor; eauto.
+      * apply type_function_correct in TENV.
+        inv TENV.
+        simpl in WTARGS.
+        rewrite sig_preserved2 with (f:=f) in WTARGS by assumption.
+        apply wt_init_regs.
+        rewrite <- wt_params in WTARGS.
+        assumption.
+      * assert ((check_inductiveness f tenv (fst (preanalysis tenv f)))=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+        unfold check_inductiveness in IND.
+        rewrite andb_true_iff in IND.
+        destruct IND as [IND_entry IND_step].
+        clear IND_step.
+        apply RB.beq_correct in IND_entry.
+        unfold RB.eq in *.
+        destruct ((fst (preanalysis tenv f)) # (fn_entrypoint f)).
+        2: contradiction.
+        cbn.
+        rewrite <- IND_entry.
+        apply sem_rel_top.
+           
+  - (* external *)
+    simpl in FUN.
+    inv FUN.
+    econstructor. split.
+    + eapply exec_function_external.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + econstructor; eauto.
+      eapply external_call_well_typed; eauto.
+  - (* return *)
+    inv STACKS.
+    econstructor. split.
+    + eapply exec_return.
+    + econstructor; eauto.
+      apply wt_regset_assign; trivial.
+      rewrite WTRES0.
+      exact WTRES.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto.
+  intros (tf & A & B).
+  exists (Callstate nil tf nil m0); split.
+  - econstructor; eauto.
+    + eapply (Genv.init_mem_match TRANSF); eauto.
+    + replace (prog_main tprog) with (prog_main prog).
+      rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main; eauto.
+    + rewrite <- H3. eapply sig_preserved; eauto.
+  - constructor; trivial.
+    + constructor. rewrite sig_preserved with (f:=f) by assumption.
+      rewrite H3. reflexivity.
+    + rewrite sig_preserved with (f:=f) by assumption.
+      rewrite H3. reflexivity.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> final_state S1 r -> final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  - apply senv_preserved.
+  - eexact transf_initial_states.
+  - eexact transf_final_states.
+  - intros. eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
+End PARAMS.
+
+Definition match_prog := param_match_prog (cmdline_params tt).
diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v
index e96c4cd4..9641d012 100644
--- a/backend/CSEdomain.v
+++ b/backend/CSEdomain.v
@@ -43,7 +43,7 @@ Definition eq_list_valnum: forall (x y: list valnum), {x=y}+{x<>y} := list_eq_de
 
 Definition eq_rhs (x y: rhs) : {x=y}+{x<>y}.
 Proof.
-  generalize chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
+  generalize trapping_mode_eq chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
   decide equality.
 Defined.
 
@@ -109,7 +109,16 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem):
   | load_eval_to: forall chunk addr vl a v,
       eval_addressing ge sp addr (map valu vl) = Some a ->
       Mem.loadv chunk m a = Some v ->
-      rhs_eval_to valu ge sp m (Load chunk addr vl) v.
+      rhs_eval_to valu ge sp m (Load chunk addr vl) v
+(*  | load_notrap1_eval_to: forall chunk addr vl,
+      eval_addressing ge sp addr (map valu vl) = None ->
+      rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
+                  Vundef
+  | load_notrap2_eval_to: forall chunk addr vl a,
+      eval_addressing ge sp addr (map valu vl) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
+                  Vundef *).
 
 Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem):
                                                       equation -> Prop :=
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index a2a1b461..556b44b3 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -71,7 +71,11 @@ Lemma rhs_eval_to_exten:
 Proof.
   intros. inv H; simpl in *.
 - constructor. rewrite valnums_val_exten by assumption. auto.
-- econstructor; eauto. rewrite valnums_val_exten by assumption. auto.
+- eapply load_eval_to; eauto. rewrite valnums_val_exten by assumption. auto.
+(*
+- apply load_notrap1_eval_to; auto. rewrite valnums_val_exten by assumption. assumption.
+- eapply load_notrap2_eval_to; eauto. rewrite valnums_val_exten by assumption. assumption.
+*)
 Qed.
 
 Lemma equation_holds_exten:
@@ -393,6 +397,39 @@ Proof.
 + intros. apply Regmap.gso; auto.
 Qed.
 
+(*
+Lemma add_load_holds_none1:
+  forall valu1 ge sp rs m n addr (args: list reg) chunk dst,
+  numbering_holds valu1 ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = None ->
+  exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst chunk addr args).
+Proof.
+  unfold add_load; intros.
+  destruct (valnum_regs n args) as [n1 vl] eqn:VN.
+  exploit valnum_regs_holds; eauto.
+  intros (valu2 & A & B & C & D & E).
+  eapply add_rhs_holds; eauto.
++ rewrite Regmap.gss; auto. eapply load_notrap1_eval_to. rewrite <- B; eauto.
++ intros. apply Regmap.gso; auto.
+Qed.
+
+Lemma add_load_holds_none2:
+  forall valu1 ge sp rs m n addr (args: list reg) a chunk dst,
+  numbering_holds valu1 ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst NOTRAP chunk addr args).
+Proof.
+  unfold add_load; intros.
+  destruct (valnum_regs n args) as [n1 vl] eqn:VN.
+  exploit valnum_regs_holds; eauto.
+  intros (valu2 & A & B & C & D & E).
+  eapply add_rhs_holds; eauto.
++ rewrite Regmap.gss; auto. eapply load_notrap2_eval_to. rewrite <- B; eauto. assumption. 
++ intros. apply Regmap.gso; auto.
+Qed.
+ *)
+
 Lemma set_unknown_holds:
   forall valu ge sp rs m n r v,
   numbering_holds valu ge sp rs m n ->
@@ -456,8 +493,8 @@ Lemma kill_all_loads_hold:
 Proof.
   intros. eapply kill_equations_hold; eauto.
   unfold filter_loads; intros. inv H1.
-  constructor. rewrite <- H2. apply op_depends_on_memory_correct; auto.
-  discriminate.
+  1: constructor; rewrite <- H2; apply op_depends_on_memory_correct; auto.
+  all: discriminate.
 Qed.
 
 Lemma kill_loads_after_store_holds:
@@ -486,6 +523,20 @@ Proof.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
   erewrite <- regs_valnums_sound by eauto. eauto with va.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+(*
+- eapply load_notrap1_eval_to; assumption.
+- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
+  eapply load_notrap2_eval_to; eauto.
+  rewrite <- H9.
+  destruct a; simpl in H1; try discriminate.
+  destruct a0; simpl in H9; try discriminate; simpl; trivial.
+  rewrite negb_false_iff in H6. unfold aaddressing in H6.
+  eapply Mem.load_store_other. eauto.
+  eapply pdisjoint_sound; eauto.
+  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
+  erewrite <- regs_valnums_sound by eauto. eauto with va.
+  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+*)
 Qed.
 
 Lemma store_normalized_range_sound:
@@ -562,6 +613,19 @@ Proof.
   unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
   erewrite <- regs_valnums_sound by eauto. eauto with va.
   auto.
+(*
+- eapply load_notrap1_eval_to; assumption.
+- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
+  eapply load_notrap2_eval_to; eauto. rewrite <- H11.
+  destruct a; simpl in H10; try discriminate; simpl; trivial.
+  rewrite negb_false_iff in H8.
+  eapply Mem.load_storebytes_other. eauto.
+  rewrite H6. rewrite Z2Nat.id by lia.
+  eapply pdisjoint_sound. eauto.
+  unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
+  erewrite <- regs_valnums_sound by eauto. eauto with va.
+  auto.
+*)
 Qed.
 
 Lemma load_memcpy:
@@ -1034,6 +1098,10 @@ Proof.
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
   exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+  destruct trap.
+
+  (* TRAP *)
+  {
   destruct (find_rhs n1 (Load chunk addr vl)) as [r|] eqn:?.
 + (* replaced by move *)
   exploit find_rhs_sound; eauto. intros (v' & EV & LD).
@@ -1063,7 +1131,102 @@ Proof.
   unfold transfer; rewrite H.
   eapply add_load_holds; eauto.
   apply set_reg_lessdef; auto.
+  }
+
+  (* NOTRAP *)
+  {
+    assert (exists a' : val,
+       eval_addressing ge sp addr rs' ## args = Some a' /\ Val.lessdef a a')
+       as Haa'.    
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Haa' as [a' [Ha'1 Ha'2]].
+
+    assert (
+    exists v' : val,
+        Mem.loadv chunk m' a' = Some v' /\ Val.lessdef v v') as Hload' by
+    (apply Mem.loadv_extends with (m1 := m) (addr1 := a); assumption).
+    destruct Hload' as [v' [Hv'1 Hv'2]].
+
+    econstructor. split.
+    eapply exec_Iload; eauto.
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+
+    econstructor; eauto.
+    eapply analysis_correct_1; eauto. simpl; eauto.
+    unfold transfer. rewrite H.
+    exists valu1.
+    apply set_unknown_holds.
+    assumption.
+    apply set_reg_lessdef; assumption.
+  }
 
+- (* Iload notrap 1*)
+  destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
+  destruct SAT as [valu1 NH1].
+  exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+
+    econstructor. split.
+    eapply exec_Iload_notrap1; eauto.
+    rewrite eval_addressing_preserved with (ge1 := ge).
+    apply eval_addressing_lessdef_none with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    exact symbols_preserved.
+
+    econstructor; eauto.
+    eapply analysis_correct_1; eauto. simpl; eauto.
+    unfold transfer. rewrite H.
+    exists valu1.
+    apply set_unknown_holds.
+    assumption.
+    apply set_reg_lessdef.
+    constructor. assumption.
+  
+- (* Iload notrap 2*)
+  destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
+  destruct SAT as [valu1 NH1].
+  exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+
+    assert (exists a' : val,
+       eval_addressing ge sp addr rs' ## args = Some a' /\ Val.lessdef a a')
+       as Haa'.    
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Haa' as [a' [Ha'1 Ha'2]].
+
+    destruct (Mem.loadv chunk m' a') eqn:Hload'.
+
+    {
+      econstructor. split.
+      eapply exec_Iload; eauto.
+      try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+      
+      econstructor; eauto.
+      eapply analysis_correct_1; eauto. simpl; eauto.
+      unfold transfer. rewrite H.
+      exists valu1.
+      apply set_unknown_holds.
+      assumption.
+      apply set_reg_lessdef; eauto.
+    }
+    {
+      econstructor. split.
+      eapply exec_Iload_notrap2; eauto.
+      try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+      
+      econstructor; eauto.
+      eapply analysis_correct_1; eauto. simpl; eauto.
+      unfold transfer. rewrite H.
+      exists valu1.
+      apply set_unknown_holds.
+      assumption.
+      apply set_reg_lessdef.
+      constructor. assumption.
+    }
+    
 - (* Istore *)
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
@@ -1154,6 +1317,7 @@ Proof.
   + apply CASE2; inv H1; auto.
   + apply CASE1.
   + apply CASE2; inv H1; auto.
+  + apply CASE2; inv H1; auto.
 * apply set_res_lessdef; auto.
 
 - (* Icond *)
diff --git a/backend/CleanupLabelsproof.v b/backend/CleanupLabelsproof.v
index fb8e57b7..39c3919f 100644
--- a/backend/CleanupLabelsproof.v
+++ b/backend/CleanupLabelsproof.v
@@ -255,6 +255,18 @@ Proof.
   left; econstructor; split.
   econstructor; eauto.
   econstructor; eauto with coqlib.
+(* Lload notrap1 *)
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = None).
+    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  left; econstructor; split.
+  eapply exec_Lload_notrap1; eauto.
+  econstructor; eauto with coqlib.
+(* Lload notrap2 *)
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
+    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  left; econstructor; split.
+  eapply exec_Lload_notrap2; eauto.
+  econstructor; eauto with coqlib.
 (* Lstore *)
   assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/backend/Cminor.v b/backend/Cminor.v
index 1618866e..829adca0 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -78,6 +78,7 @@ Inductive unary_operation : Type :=
   | Osingleoflongu: unary_operation.       (**r unsigned long to float32 *)
 
 Inductive binary_operation : Type :=
+  | Oexpect: typ -> binary_operation       (**r first value, second is expected*)
   | Oadd: binary_operation                 (**r integer addition *)
   | Osub: binary_operation                 (**r integer subtraction *)
   | Omul: binary_operation                 (**r integer multiplication *)
@@ -302,6 +303,7 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
 Definition eval_binop
             (op: binary_operation) (arg1 arg2: val) (m: mem): option val :=
   match op with
+  | Oexpect ty => Some (Val.normalize arg1 ty)
   | Oadd => Some (Val.add arg1 arg2)
   | Osub => Some (Val.sub arg1 arg2)
   | Omul => Some (Val.mul arg1 arg2)
diff --git a/backend/CminorSel.v b/backend/CminorSel.v
index f6f6e34d..cedd2bed 100644
--- a/backend/CminorSel.v
+++ b/backend/CminorSel.v
@@ -50,7 +50,7 @@ with exprlist : Type :=
   | Econs: expr -> exprlist -> exprlist
 
 with condexpr : Type :=
-  | CEcond : condition -> exprlist -> condexpr
+  | CEcond : condition -> option bool -> exprlist -> condexpr
   | CEcondition : condexpr -> condexpr -> condexpr -> condexpr
   | CElet: expr -> condexpr -> condexpr.
 
@@ -207,10 +207,10 @@ with eval_exprlist: letenv -> exprlist -> list val -> Prop :=
       eval_exprlist le (Econs a1 al) (v1 :: vl)
 
 with eval_condexpr: letenv -> condexpr -> bool -> Prop :=
-  | eval_CEcond: forall le cond al vl vb,
+  | eval_CEcond: forall le cond expected al vl vb,
       eval_exprlist le al vl ->
       eval_condition cond vl m = Some vb ->
-      eval_condexpr le (CEcond cond al) vb
+      eval_condexpr le (CEcond cond expected al) vb
   | eval_CEcondition: forall le a b c va v,
       eval_condexpr le a va ->
       eval_condexpr le (if va then b else c) v ->
@@ -495,7 +495,7 @@ with lift_exprlist (p: nat) (a: exprlist) {struct a}: exprlist :=
 
 with lift_condexpr (p: nat) (a: condexpr) {struct a}: condexpr :=
   match a with
-  | CEcond c al => CEcond c (lift_exprlist p al)
+  | CEcond c expected al => CEcond c expected (lift_exprlist p al)
   | CEcondition a b c => CEcondition (lift_condexpr p a) (lift_condexpr p b) (lift_condexpr p c)
   | CElet a b => CElet (lift_expr p a) (lift_condexpr (S p) b)
   end.
diff --git a/backend/Cminortyping.v b/backend/Cminortyping.v
index 9f35fe35..d9e99122 100644
--- a/backend/Cminortyping.v
+++ b/backend/Cminortyping.v
@@ -64,6 +64,7 @@ Definition type_binop (op: binary_operation) : typ * typ * typ :=
   | Ocmpf _ => (Tfloat, Tfloat, Tint)
   | Ocmpfs _ => (Tsingle, Tsingle, Tint)
   | Ocmpl _ | Ocmplu _ => (Tlong, Tlong, Tint)
+  | Oexpect ty => (ty, ty, ty)
   end.
 
 Module RTLtypes <: TYPE_ALGEBRA.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index 4aab7677..0be9438c 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -69,7 +69,7 @@ Fixpoint successor_rec (n: nat) (f: function) (ae: AE.t) (pc: node) : node :=
       match f.(fn_code)!pc with
       | Some (Inop s) =>
           successor_rec n' f ae s
-      | Some (Icond cond args s1 s2) =>
+      | Some (Icond cond args s1 s2 _) =>
           match resolve_branch (eval_static_condition cond (aregs ae args)) with
           | Some b => successor_rec n' f ae (if b then s1 else s2)
           | None => pc
@@ -181,7 +181,7 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
               let (op', args') := op_strength_reduction op args aargs in
               Iop op' args' res s'
           end
-      | Iload chunk addr args dst s =>
+      | Iload TRAP chunk addr args dst s =>
           let aargs := aregs ae args in
           let a := ValueDomain.loadv chunk rm am (eval_static_addressing addr aargs) in
           match const_for_result a with
@@ -189,7 +189,7 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
               Iop cop nil dst s
           | None =>
               let (addr', args') := addr_strength_reduction addr args aargs in
-              Iload chunk addr' args' dst s
+              Iload TRAP chunk addr' args' dst s
           end
       | Istore chunk addr args src s =>
           let aargs := aregs ae args in
@@ -217,14 +217,14 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
              end
           | _, _ => dfl
           end
-      | Icond cond args s1 s2 =>
+      | Icond cond args s1 s2 i =>
           let aargs := aregs ae args in
           match resolve_branch (eval_static_condition cond aargs) with
           | Some b =>
               if b then Inop s1 else Inop s2
           | None =>
               let (cond', args') := cond_strength_reduction cond args aargs in
-              Icond cond' args' s1 s2
+              Icond cond' args' s1 s2 i
           end
       | Ijumptable arg tbl =>
           match areg ae arg with
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index a3592c4d..b59ee8b4 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -142,8 +142,8 @@ Inductive match_pc (f: function) (rs: regset) (m: mem): nat -> node -> node -> P
       f.(fn_code)!pc = Some (Inop s) ->
       match_pc f rs m n s pcx ->
       match_pc f rs m (S n) pc pcx
-  | match_pc_cond: forall n pc cond args s1 s2 pcx,
-      f.(fn_code)!pc = Some (Icond cond args s1 s2) ->
+  | match_pc_cond: forall n pc cond args s1 s2 pcx i,
+      f.(fn_code)!pc = Some (Icond cond args s1 s2 i) ->
       (forall b,
         eval_condition cond rs##args m = Some b ->
         match_pc f rs m n (if b then s1 else s2) pcx) ->
@@ -406,6 +406,8 @@ Proof.
   assert (VM1: vmatch bc a aa) by (eapply eval_static_addressing_sound; eauto with va).
   set (av := loadv chunk (romem_for cu) am aa).
   assert (VM2: vmatch bc v av) by (eapply loadv_sound; eauto).
+  destruct trap.
+  {
   destruct (const_for_result av) as [cop|] eqn:?; intros.
 + (* constant-propagated *)
   exploit const_for_result_correct; eauto. intros (v' & A & B).
@@ -431,6 +433,59 @@ Proof.
   left; econstructor; econstructor; split.
   eapply exec_Iload; eauto.
   eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+  }
+  {
+    assert (exists v2 : val,
+         eval_addressing ge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) = Some v2 /\ Val.lessdef a v2) as Hexist2.
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Hexist2 as [v2 [Heval2 Hlessdef2]].
+    destruct (Mem.loadv_extends chunk m m' a v2 v MEM H1 Hlessdef2) as [vX [Hvx1 Hvx2]].
+    left; econstructor; econstructor; split.
+    eapply exec_Iload with (a := v2); eauto.
+    try (erewrite eval_addressing_preserved with (ge1:=ge); auto;
+    exact symbols_preserved).
+  eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    
+  }
+
+- (* Iload notrap1 *)
+  rename pc'0 into pc. TransfInstr.
+  assert (eval_addressing tge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) =  None).
+  rewrite eval_addressing_preserved with (ge1 := ge); eauto.
+  apply eval_addressing_lessdef_none with (vl1 := rs ## args).
+  apply regs_lessdef_regs; assumption.
+  assumption.
+  exact symbols_preserved.
+  
+  left; econstructor; econstructor; split.
+  eapply exec_Iload_notrap1; eauto.
+  eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+
+- (* Iload notrap2 *)
+  rename pc'0 into pc. TransfInstr.
+    assert (exists v2 : val,
+         eval_addressing ge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) = Some v2 /\ Val.lessdef a v2) as Hexist2.
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Hexist2 as [a' [Heval' Hlessdef']].
+    destruct (Mem.loadv chunk m' a') eqn:Hload'.
+    {
+    left; econstructor; econstructor; split.
+    eapply exec_Iload; eauto.
+   
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+    eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    }
+    {
+      left; econstructor; econstructor; split.
+      eapply exec_Iload_notrap2; eauto.
+   
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+    eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    }
 
 - (* Istore *)
   rename pc'0 into pc. TransfInstr.
diff --git a/backend/Deadcode.v b/backend/Deadcode.v
index 2286876e..3412a6fa 100644
--- a/backend/Deadcode.v
+++ b/backend/Deadcode.v
@@ -123,7 +123,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
       if is_dead nres then after
       else if is_int_zero nres then (kill res ne, nm)
       else (add_needs args (needs_of_operation op nres) (kill res ne), nm)
-  | Some (Iload chunk addr args dst s) =>
+  | Some (Iload trap chunk addr args dst s) =>
       let ndst := nreg ne dst in
       if is_dead ndst then after
       else if is_int_zero ndst then (kill dst ne, nm)
@@ -142,7 +142,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
        nmem_dead_stack f.(fn_stacksize))
   | Some(Ibuiltin ef args res s) =>
       transfer_builtin approx!!pc ef args res ne nm
-  | Some(Icond cond args s1 s2) =>
+  | Some(Icond cond args s1 s2 _) =>
       if peq s1 s2 then after else 
         (add_needs args (needs_of_condition cond) ne, nm)
   | Some(Ijumptable arg tbl) =>
@@ -175,7 +175,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
         end
       else
         instr
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       let ndst := nreg (fst an!!pc) dst in
       if is_dead ndst then
         Inop s
@@ -192,7 +192,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
       if nmem_contains (snd an!!pc) (aaddr_arg approx!!pc dst) sz
       then instr
       else Inop s
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 _ =>
       if peq s1 s2 then Inop s1 else instr
   | _ =>
       instr
diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v
index 7be12c69..be20af0b 100644
--- a/backend/Deadcodeproof.v
+++ b/backend/Deadcodeproof.v
@@ -829,6 +829,81 @@ Ltac UseTransfer :=
   apply eagree_update; eauto 2 with na.
   eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
 
+- (* load notrap1 *)
+  TransfInstr; UseTransfer.
+  destruct (is_dead (nreg ne dst)) eqn:DEAD;
+  [idtac|destruct (is_int_zero (nreg ne dst)) eqn:INTZERO];
+  simpl in *.
++ (* dead instruction, turned into a nop *)
+  econstructor; split.
+  eapply exec_Inop; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update_dead; auto with na.
++ (* instruction with needs = [I Int.zero], turned into a load immediate of zero. *)
+  econstructor; split.
+  eapply exec_Iop with (v := Vint Int.zero); eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; auto.
+  rewrite is_int_zero_sound by auto.
+  constructor.
++ (* preserved *)
+  exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption.
+  intro Hnone'.
+  assert (eval_addressing tge (Vptr sp0 Ptrofs.zero) addr te ## args = None) as Hnone2'.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  assumption.
+  exact symbols_preserved.
+  
+  econstructor; split.
+  eapply exec_Iload_notrap1; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+
+- (* load notrap2 *)
+  TransfInstr; UseTransfer.
+  
+  destruct (is_dead (nreg ne dst)) eqn:DEAD;
+  [idtac|destruct (is_int_zero (nreg ne dst)) eqn:INTZERO];
+  simpl in *.
++ (* dead instruction, turned into a nop *)
+  econstructor; split.
+  eapply exec_Inop; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update_dead; auto with na.
++ (* instruction with needs = [I Int.zero], turned into a load immediate of zero. *)
+  econstructor; split.
+  eapply exec_Iop with (v := Vint Int.zero); eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; auto.
+  rewrite is_int_zero_sound by auto.
+  constructor.
++ (* preserved *)
+  exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto.
+  intros (ta & U & V).
+  destruct (Mem.loadv chunk tm ta) eqn:Hchunk2.
+  {
+  econstructor; split.
+  eapply exec_Iload. eauto.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  eassumption.
+  exact symbols_preserved.
+  eassumption.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+  }
+  {
+  econstructor; split.
+  eapply exec_Iload_notrap2. eauto.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  eassumption.
+  exact symbols_preserved.
+  eassumption.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+  }
 - (* store *)
   TransfInstr; UseTransfer.
   destruct (nmem_contains nm (aaddressing (vanalyze cu f) # pc addr args)
diff --git a/backend/Debugvar.v b/backend/Debugvar.v
index 3204dae5..7806984a 100644
--- a/backend/Debugvar.v
+++ b/backend/Debugvar.v
@@ -233,7 +233,7 @@ Definition transfer (lm: labelmap) (before: option avail) (i: instruction):
           (lm, Some (kill (S sl ofs ty) s))
       | Lop op args dst =>
           (lm, Some (kill (R dst) s))
-      | Lload chunk addr args dst =>
+      | Lload trap chunk addr args dst =>
           (lm, Some (kill (R dst) s))
       | Lstore chunk addr args src =>
           (lm, before)
diff --git a/backend/Debugvarproof.v b/backend/Debugvarproof.v
index d31c63ec..95020637 100644
--- a/backend/Debugvarproof.v
+++ b/backend/Debugvarproof.v
@@ -449,6 +449,22 @@ Proof.
   eauto. eauto.
   apply eval_add_delta_ranges. traceEq.
   constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_Lload_notrap1.
+  rewrite <- H; apply eval_addressing_preserved; exact symbols_preserved.
+  eauto. eauto.
+  apply eval_add_delta_ranges. traceEq.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_Lload_notrap2.
+  rewrite <- H; apply eval_addressing_preserved; exact symbols_preserved.
+  eauto. eauto.
+  apply eval_add_delta_ranges. traceEq.
+  constructor; auto.
 - (* store *)
   econstructor; split.
   eapply plus_left.
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
new file mode 100644
index 00000000..3fd86728
--- /dev/null
+++ b/backend/Duplicate.v
@@ -0,0 +1,232 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** RTL node duplication using external oracle. Used to form superblock
+  structures *)
+
+Require Import AST RTL Maps Globalenvs.
+Require Import Coqlib Errors Op.
+
+
+
+Module Type DuplicateOracle.
+
+  (** External oracle returning the new RTL code (entry point unchanged),
+    along with the new entrypoint, and a mapping of new nodes to old nodes *)
+  Parameter duplicate_aux: function -> code * node * (PTree.t node).
+
+End DuplicateOracle.
+
+
+
+Module Duplicate (D: DuplicateOracle).
+
+Export D.
+
+Definition duplicate_aux := duplicate_aux.
+
+(* Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux". *)
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
+
+(** * Verification of node duplications *)
+
+Definition verify_is_copy dupmap n n' :=
+  match dupmap!n' with
+  | None => Error(msg "verify_is_copy None")
+  | Some revn => match (Pos.compare n revn) with Eq => OK tt | _ => Error(msg "verify_is_copy invalid map") end
+  end.
+
+Fixpoint verify_is_copy_list dupmap ln ln' :=
+  match ln with
+  | n::ln => match ln' with
+             | n'::ln' => do u <- verify_is_copy dupmap n n';
+                          verify_is_copy_list dupmap ln ln'
+             | nil => Error (msg "verify_is_copy_list: ln' bigger than ln") end
+  | nil => match ln' with
+          | n :: ln' => Error (msg "verify_is_copy_list: ln bigger than ln'")
+          | nil => OK tt end
+  end.
+
+Definition verify_mapping_entrypoint dupmap (f f': function): res unit :=
+  verify_is_copy dupmap (fn_entrypoint f) (fn_entrypoint f').
+
+Lemma product_eq {A B: Type} :
+  (forall (a b: A), {a=b} + {a<>b}) ->
+  (forall (c d: B), {c=d} + {c<>d}) ->
+  forall (x y: A+B), {x=y} + {x<>y}.
+Proof.
+  intros H H'. intros. decide equality.
+Qed.
+
+(** FIXME Ideally i would like to put this in AST.v but i get an "illegal application"
+ * error when doing so *)
+Remark builtin_arg_eq_pos: forall (a b: builtin_arg positive), {a=b} + {a<>b}.
+Proof.
+  intros.
+  apply (builtin_arg_eq Pos.eq_dec).
+Defined.
+Global Opaque builtin_arg_eq_pos.
+
+Remark builtin_res_eq_pos: forall (a b: builtin_res positive), {a=b} + {a<>b}.
+Proof. intros. apply (builtin_res_eq Pos.eq_dec). Qed.
+Global Opaque builtin_res_eq_pos.
+
+Definition verify_match_inst dupmap inst tinst :=
+  match inst with
+  | Inop n => match tinst with Inop n' => verify_is_copy dupmap n n' | _ => Error(msg "verify_match_inst Inop") end
+
+  | Iop op lr r n => match tinst with
+      Iop op' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (eq_operation op op') then
+            if (list_eq_dec Pos.eq_dec lr lr') then
+              if (Pos.eq_dec r r') then
+                OK tt
+              else Error (msg "Different r in Iop")
+            else Error (msg "Different lr in Iop")
+          else Error(msg "Different operations in Iop")
+      | _ => Error(msg "verify_match_inst Inop") end
+
+  | Iload tm m a lr r n => match tinst with
+      | Iload tm' m' a' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (trapping_mode_eq tm tm') then
+            if (chunk_eq m m') then
+              if (eq_addressing a a') then
+                if (list_eq_dec Pos.eq_dec lr lr') then
+                  if (Pos.eq_dec r r') then OK tt
+                  else Error (msg "Different r in Iload")
+                else Error (msg "Different lr in Iload")
+              else Error (msg "Different addressing in Iload")
+            else Error (msg "Different mchunk in Iload")
+          else Error (msg "Different trapping_mode in Iload")
+      | _ => Error (msg "verify_match_inst Iload") end
+
+  | Istore m a lr r n => match tinst with
+      | Istore m' a' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (chunk_eq m m') then
+            if (eq_addressing a a') then
+              if (list_eq_dec Pos.eq_dec lr lr') then
+                if (Pos.eq_dec r r') then OK tt
+                else Error (msg "Different r in Istore")
+              else Error (msg "Different lr in Istore")
+            else Error (msg "Different addressing in Istore")
+          else Error (msg "Different mchunk in Istore")
+      | _ => Error (msg "verify_match_inst Istore") end
+
+  | Icall s ri lr r n => match tinst with
+      | Icall s' ri' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (signature_eq s s') then
+            if (product_eq Pos.eq_dec ident_eq ri ri') then
+              if (list_eq_dec Pos.eq_dec lr lr') then
+                if (Pos.eq_dec r r') then OK tt
+                else Error (msg "Different r r' in Icall")
+              else Error (msg "Different lr in Icall")
+            else Error (msg "Different ri in Icall")
+          else Error (msg "Different signatures in Icall")
+      | _ => Error (msg "verify_match_inst Icall") end
+
+  | Itailcall s ri lr => match tinst with
+      | Itailcall s' ri' lr' =>
+          if (signature_eq s s') then
+            if (product_eq Pos.eq_dec ident_eq ri ri') then
+              if (list_eq_dec Pos.eq_dec lr lr') then OK tt
+              else Error (msg "Different lr in Itailcall")
+            else Error (msg "Different ri in Itailcall")
+          else Error (msg "Different signatures in Itailcall")
+      | _ => Error (msg "verify_match_inst Itailcall") end
+
+  | Ibuiltin ef lbar brr n => match tinst with
+      | Ibuiltin ef' lbar' brr' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (external_function_eq ef ef') then
+            if (list_eq_dec builtin_arg_eq_pos lbar lbar') then
+              if (builtin_res_eq_pos brr brr') then OK tt
+              else Error (msg "Different brr in Ibuiltin")
+            else Error (msg "Different lbar in Ibuiltin")
+          else Error (msg "Different ef in Ibuiltin")
+      | _ => Error (msg "verify_match_inst Ibuiltin") end
+
+  | Icond cond lr n1 n2 i => match tinst with
+      | Icond cond' lr' n1' n2' i' =>
+          if (list_eq_dec Pos.eq_dec lr lr') then
+            if (eq_condition cond cond') then
+              do u1 <- verify_is_copy dupmap n1 n1';
+              verify_is_copy dupmap n2 n2'
+            else if (eq_condition (negate_condition cond) cond') then
+              do u1 <- verify_is_copy dupmap n1 n2';
+              verify_is_copy dupmap n2 n1'
+            else Error (msg "Incompatible conditions in Icond")
+          else Error (msg "Different lr in Icond")
+      | _ => Error (msg "verify_match_inst Icond") end
+
+  | Ijumptable r ln => match tinst with
+      | Ijumptable r' ln' =>
+          do u <- verify_is_copy_list dupmap ln ln';
+          if (Pos.eq_dec r r') then OK tt
+          else Error (msg "Different r in Ijumptable")
+      | _ => Error (msg "verify_match_inst Ijumptable") end
+
+  | Ireturn or => match tinst with
+      | Ireturn or' =>
+          if (option_eq Pos.eq_dec or or') then OK tt
+          else Error (msg "Different or in Ireturn")
+      | _ => Error (msg "verify_match_inst Ireturn") end
+  end.
+
+Definition verify_mapping_mn dupmap f f' (m: positive*positive) :=
+  let (tn, n) := m in
+  match (fn_code f)!n with
+  | None => Error (msg "verify_mapping_mn: Could not get an instruction at (fn_code f)!n")
+  | Some inst => match (fn_code f')!tn with
+                 | None => Error (msg "verify_mapping_mn: Could not get an instruction at (fn_code xf)!tn")
+                 | Some tinst => verify_match_inst dupmap inst tinst
+                 end
+  end.
+
+Fixpoint verify_mapping_mn_rec dupmap f f' lm :=
+  match lm with
+  | nil => OK tt
+  | m :: lm => do u <- verify_mapping_mn dupmap f f' m;
+               verify_mapping_mn_rec dupmap f f' lm
+  end.
+
+Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
+  verify_mapping_mn_rec dupmap f f' (PTree.elements dupmap).
+
+(** Verifies that the [dupmap] of the translated function [f'] is giving correct information in regards to [f] *)
+Definition verify_mapping dupmap (f f': function) : res unit :=
+  do u <- verify_mapping_entrypoint dupmap f f';
+  verify_mapping_match_nodes dupmap f f'.
+
+(** * Entry points *)
+
+Definition transf_function (f: function) : res function :=
+  let (tcte, dupmap) := duplicate_aux f in
+  let (tc, te) := tcte in
+  let f' := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+  do u <- verify_mapping dupmap f f';
+  OK f'.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef transf_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
+
+End Duplicate.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
new file mode 100644
index 00000000..22bee067
--- /dev/null
+++ b/backend/Duplicateaux.ml
@@ -0,0 +1,1137 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(* Oracle for Duplicate pass.
+ * - Add static prediction information to Icond nodes
+ * - Performs tail duplication on interesting traces to form superblocks
+ * - Unrolls a single iteration of innermost loops
+ * - (TODO: perform partial loop unrolling inside innermost loops)
+ *)
+
+open RTL
+open Maps
+open Camlcoq
+open DebugPrint
+open RTLcommonaux
+
+let stats_oc = ref None
+
+let set_stats_oc () =
+  try
+    let name = Sys.getenv "COMPCERT_PREDICT_STATS" in
+    let oc = open_out_gen [Open_append; Open_creat; Open_text] 0o666 name in
+    stats_oc := Some oc
+  with Not_found -> ()
+
+(* number of total CBs *)
+let stats_nb_total = ref 0
+(* we predicted the same thing as the profiling *)
+let stats_nb_correct_predicts = ref 0
+(* we predicted something (say Some true), but the profiling predicted the opposite (say Some false) *)
+let stats_nb_mispredicts = ref 0
+(* we did not predict anything (None) even though the profiling did predict something *)
+let stats_nb_missed_opportunities = ref 0
+(* we predicted something (say Some true) but the profiling preferred not to predict anything (None) *)
+let stats_nb_overpredict = ref 0
+
+(* heuristic specific counters *)
+let wrong_opcode = ref 0
+let wrong_return = ref 0
+let wrong_loop2 = ref 0
+let wrong_call = ref 0
+
+let right_opcode = ref 0
+let right_return = ref 0
+let right_loop2 = ref 0
+let right_call = ref 0
+
+let reset_stats () = begin
+  stats_nb_total := 0;
+  stats_nb_correct_predicts := 0;
+  stats_nb_mispredicts := 0;
+  stats_nb_missed_opportunities := 0;
+  stats_nb_overpredict := 0;
+  wrong_opcode := 0;
+  wrong_return := 0;
+  wrong_loop2 := 0;
+  wrong_call := 0;
+  right_opcode := 0;
+  right_return := 0;
+  right_loop2 := 0;
+  right_call := 0;
+end
+
+let incr theref = theref := !theref + 1
+
+let has_some o = match o with Some _ -> true | None -> false
+
+let stats_oc_recording () = has_some !stats_oc
+
+let write_stats_oc () =
+  match !stats_oc with
+  | None -> ()
+  | Some oc -> begin
+      Printf.fprintf oc "%d %d %d %d %d %d %d %d %d %d %d %d %d\n" !stats_nb_total
+        !stats_nb_correct_predicts !stats_nb_mispredicts !stats_nb_missed_opportunities
+        !stats_nb_overpredict
+        !wrong_opcode !wrong_return !wrong_loop2 !wrong_call
+        !right_opcode !right_return !right_loop2 !right_call
+        ;
+      close_out oc
+    end
+
+let get_loop_headers = LICMaux.get_loop_headers
+let rtl_successors = LICMaux.rtl_successors
+
+(* Get list of nodes following a BFS of the code *)
+(* Stops when predicate is reached
+ * Excludes any node given in excluded function *)
+let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin
+  debug "bfs\n";
+  let visited = ref (PTree.map (fun n i -> false) code)
+  and bfs_list = ref []
+  and to_visit = Queue.create ()
+  and node = ref entrypoint
+  in begin
+    Queue.add entrypoint to_visit;
+    while not (Queue.is_empty to_visit) do
+      node := Queue.pop to_visit;
+      if (not (get_some @@ PTree.get !node !visited)) then begin
+        visited := PTree.set !node true !visited;
+        if not (excluded !node) then begin
+          match PTree.get !node code with
+          | None -> failwith "No such node"
+          | Some i ->
+              bfs_list := !node :: !bfs_list;
+              if not (predicate !node) then
+                let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+        end
+      end
+    done;
+    List.rev !bfs_list
+  end
+end
+
+let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false)
+
+let optbool o = match o with Some _ -> true | None -> false
+
+let ptree_get_some n ptree = get_some @@ PTree.get n ptree
+
+(* Returns a PTree: node -> list of the predecessors of that node *)
+let get_predecessors_rtl code = begin
+  debug "get_predecessors_rtl\n";
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, i) =
+    let succ = rtl_successors i
+    in List.iter (fun s ->
+      let previous_preds = ptree_get_some s !preds in
+      if optbool @@ List.find_opt (fun e -> e == node) previous_preds then ()
+      else preds := PTree.set s (node::previous_preds) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+end
+
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+let print_intset s =
+  let seq = PSet.to_seq s
+  in begin
+    if !debug_flag then begin
+      Printf.printf "{";
+      Seq.iter (fun n ->
+        Printf.printf "%d " (P.to_int n)
+      ) seq;
+      Printf.printf "}"
+    end
+  end
+
+(* Looks ahead (until a branch) to see if a node further down verifies
+ * the given predicate *)
+let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
+  if (predicate node) then true
+  else match (successors @@ get_some @@ PTree.get node code) with
+    | [n] -> if (predicate n) then true
+        else (
+          if (get_some @@ PTree.get n is_loop_header) then false
+          else look_ahead_gen successors code n is_loop_header predicate
+        )
+    | _ -> false
+
+let look_ahead = look_ahead_gen rtl_successors
+
+(** 
+ * Heuristics mostly based on the paper Branch Prediction for Free 
+ *)
+
+let do_call_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    debug "\tCall heuristic..\n";
+    let predicate n = (function
+    | Icall _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_call = look_ahead code ifso is_loop_header predicate
+    in let ifnot_call = look_ahead code ifnot is_loop_header predicate
+    in if ifso_call && ifnot_call then None
+    else if ifso_call then Some false
+    else if ifnot_call then Some true
+    else None
+  end
+
+let do_opcode_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    debug "\tOpcode heuristic..\n";
+    DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header
+  end
+
+let do_return_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    debug "\tReturn heuristic..\n";
+    let predicate n = (function
+    | Ireturn _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_return = look_ahead code ifso is_loop_header predicate
+    in let ifnot_return = look_ahead code ifnot is_loop_header predicate
+    in if ifso_return && ifnot_return then None
+    else if ifso_return then Some false
+    else if ifnot_return then Some true
+    else None
+  end
+
+let do_store_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    debug "\tStore heuristic..\n";
+    let predicate n = (function
+    | Istore _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_store = look_ahead code ifso is_loop_header predicate
+    in let ifnot_store = look_ahead code ifnot is_loop_header predicate
+    in if ifso_store && ifnot_store then None
+    else if ifso_store then Some false
+    else if ifnot_store then Some true
+    else None
+  end
+
+let do_loop_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    debug "\tLoop heuristic..\n";
+    let predicate n = get_some @@ PTree.get n is_loop_header in
+    let ifso_loop = look_ahead code ifso is_loop_header predicate in
+    let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
+    if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *)
+    else if ifso_loop then Some true
+    else if ifnot_loop then Some false
+    else None
+  end
+
+let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
+  begin
+    debug "\tLoop2 heuristic..\n";
+    match get_some @@ PTree.get n loop_info with
+    | None -> None
+    | Some b -> Some b
+  end
+
+(** Innermost loop detection *)
+
+type innerLoop = {
+  preds: P.t list;
+  body: P.t list;
+  head: P.t; (* head of the loop *)
+  finals: P.t list; (* the final instructions, which loops back to the head *)
+  (* There may be more than one ; for instance if there is an if inside the loop with both
+   * branches leading to a goto backedge
+   * Such cases usually happen after a tail-duplication *)
+  sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+    (* may be None if we predict that we do not loop *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+  | Some true -> [n1]
+  | Some false -> [n2]
+  | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+  let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+  let headers = List.filter (fun n -> 
+    get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in 
+  match headers with
+  | [] -> (
+      let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
+      match next_nodes with
+      | [n] -> (
+          (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
+          if HashedSet.PSet.contains trace n then
+            find_last_node_before_loop code n trace is_loop_header
+          else None
+        )
+      | _ -> None (* May happen when we predict that a loop is not taken *)
+    )
+  | [h] -> Some node
+  | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
+let get_inner_loops f code is_loop_header =
+  let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; 
+    fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+  let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+  begin
+    debug "PREDMAP: "; print_ptree print_intlist predmap;
+    debug "LOOPMAP: "; print_ptree print_pset loopmap;
+    List.map (fun (n, body) ->
+      let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) 
+        @@ get_some @@ PTree.get n predmap in
+      let head = (* the instruction from body which is a loop header *)
+        let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter 
+          (fun n -> ptree_get_some n is_loop_header) body in
+        begin
+          assert (List.length heads == 1);
+          List.hd heads
+        end in
+      let finals = (* the predecessors from head that are in the body *)
+        let head_preds = ptree_get_some head predmap in
+        let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+        begin
+          debug "HEAD: %d\n" (P.to_int head);
+          debug "BODY: %a\n" print_pset body;
+          debug "HEADPREDS: %a\n" print_intlist head_preds;
+          filtered
+        end in
+      let sb_final = find_last_node_before_loop code head body is_loop_header in
+      let body = HashedSet.PSet.elements body in
+      { preds = preds; body = body; head = head; finals = finals;
+        sb_final = sb_final; }
+    ) 
+    (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+     * We remove those to get just the inner loops *)
+    @@ List.filter (fun (n, body) ->
+      let count = List.length @@ HashedSet.PSet.elements body in count != 1
+    ) (PTree.elements loopmap)
+  end
+
+let get_loop_bodies code entrypoint =
+  let predecessors = get_predecessors_rtl code in
+  (* Algorithm from Muchnik, Compiler Design & Implementation, Figure 7.21 page 192 *)
+  let natural_loop n m =
+    debug "Natural Loop from %d to %d\n" (P.to_int n) (P.to_int m);
+    let in_body = ref (PTree.map (fun n b -> false) code) in
+    let body = ref [] in
+    let add_to_body n = begin
+      in_body := PTree.set n true !in_body;
+      body := n :: !body
+    end
+    in let rec process_node p =
+      debug "    Processing node %d\n" (P.to_int p);
+      List.iter (fun pred ->
+        debug "        Looking at predecessor of %d: %d\n" (P.to_int p) (P.to_int pred);
+        let is_in_body = get_some @@ PTree.get pred !in_body in
+        if (not @@ is_in_body) then begin
+          debug "        --> adding to body\n";
+          add_to_body pred;
+          process_node pred
+        end
+      ) (get_some @@ PTree.get p predecessors)
+    in begin
+      add_to_body m;
+      add_to_body n;
+      (if (m != n) then process_node m);
+      !body
+    end
+  in let option_natural_loop n = function
+    | None -> None
+    | Some m -> Some (natural_loop n m)
+  in PTree.map option_natural_loop (LICMaux.get_loop_backedges code entrypoint)
+
+(* Returns a PTree of either None or Some b where b determines the node in the loop body, for a cb instruction *)
+let get_loop_info f is_loop_header bfs_order code =
+  let loop_info = ref (PTree.map (fun n i -> None) code) in
+  let mark_body body =
+    List.iter (fun n ->
+      match get_some @@ PTree.get n code with
+      | Icond (_, _, ifso, ifnot, _) -> begin
+          match PTree.get n !loop_info with
+          | None -> ()
+          | Some _ ->
+              let b1 = List.mem ifso body in
+              let b2 = List.mem ifnot body in
+              if (b1 && b2) then ()
+              else if (b1 || b2) then begin
+                if b1 then loop_info := PTree.set n (Some true) !loop_info
+                else if b2 then loop_info := PTree.set n (Some false) !loop_info
+              end
+        end
+      | _ -> ()
+    ) body
+  in let bodymap = get_loop_bodies code f.fn_entrypoint in
+  List.iter (fun (_,obody) ->
+    match obody with
+    | None -> ()
+    | Some body -> mark_body body
+    ) (PTree.elements bodymap);
+  !loop_info
+
+(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
+let get_directions f code entrypoint = begin
+  debug "get_directions\n";
+  let bfs_order = bfs code entrypoint in
+  let is_loop_header = get_loop_headers code entrypoint in
+  let loop_info = get_loop_info f is_loop_header bfs_order code in
+  let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
+  begin
+    (* ptree_printbool is_loop_header; *)
+    (* debug "\n"; *)
+    List.iter (fun n ->
+      match (get_some @@ PTree.get n code) with
+      | Icond (cond, lr, ifso, ifnot, pred) -> begin
+          if stats_oc_recording () || not @@ has_some pred then
+            (* debug "Analyzing %d.." (P.to_int n); *)
+            let heuristics = [ do_opcode_heuristic;
+              do_return_heuristic; do_loop2_heuristic loop_info n; (* do_loop_heuristic; *) do_call_heuristic;
+               (* do_store_heuristic *) ] in
+            let preferred = ref None in
+            let current_heuristic = ref 0 in
+            begin
+              debug "Deciding condition for RTL node %d\n" (P.to_int n);
+              List.iter (fun do_heur ->
+                match !preferred with
+                | None -> begin
+                  preferred := do_heur code cond ifso ifnot is_loop_header;
+                  if stats_oc_recording () then begin
+                    (* Getting stats about mispredictions from each heuristic *)
+                    (match !preferred, pred with
+                      | Some false, Some true
+                      | Some true, Some false
+                      (* | Some _, None  *) (* Uncomment for overpredicts *)
+                          -> begin
+                          match !current_heuristic with
+                          | 0 -> incr wrong_opcode
+                          | 1 -> incr wrong_return
+                          | 2 -> incr wrong_loop2
+                          | 3 -> incr wrong_call
+                          | _ -> failwith "Shouldn't happen"
+                          end
+                      | Some false, Some false
+                      | Some true, Some true -> begin
+                          match !current_heuristic with
+                          | 0 -> incr right_opcode
+                          | 1 -> incr right_return
+                          | 2 -> incr right_loop2
+                          | 3 -> incr right_call
+                          | _ -> failwith "Shouldn't happen"
+                          end
+                      | _ -> ()
+                    );
+                    incr current_heuristic
+                    end
+                  end 
+                | Some _ -> ()
+              ) heuristics;
+              directions := PTree.set n !preferred !directions;
+              (match !preferred with | Some false -> debug "\tFALLTHROUGH\n"
+                                     | Some true -> debug "\tBRANCH\n"
+                                     | None -> debug "\tUNSURE\n");
+              debug "---------------------------------------\n"
+            end
+        end
+      | _ -> ()
+    ) bfs_order;
+    !directions
+  end
+end
+
+let update_direction direction = function
+| Icond (cond, lr, n, n', pred) -> begin
+    (* Counting stats from profiling *)
+    if stats_oc_recording () then begin
+      incr stats_nb_total;
+      match pred, direction with
+      | None, None -> incr stats_nb_correct_predicts
+      | None, Some _ -> incr stats_nb_overpredict
+      | Some _, None -> incr stats_nb_missed_opportunities
+      | Some false, Some false -> incr stats_nb_correct_predicts
+      | Some false, Some true -> incr stats_nb_mispredicts
+      | Some true, Some false -> incr stats_nb_mispredicts
+      | Some true, Some true -> incr stats_nb_correct_predicts
+    end;
+
+    (* only update if there is no prior existing branch prediction *)
+    (match pred with
+    | None -> Icond (cond, lr, n, n', direction)
+    | Some _ -> begin
+        Icond (cond, lr, n, n', pred) 
+      end
+    )
+    end
+| i -> i
+
+(* Uses branch prediction to write prediction annotations in Icond *)
+let update_directions f code entrypoint = begin
+  debug "Update_directions\n";
+  let directions = get_directions f code entrypoint in
+  let code' = ref code in
+  begin
+    debug "Get Directions done, now proceeding to update all direction information..\n";
+    (* debug "Ifso directions: ";
+    ptree_printbool directions;
+    debug "\n"; *)
+    List.iter (fun (n, i) ->
+      let direction = get_some @@ PTree.get n directions in
+      code' := PTree.set n (update_direction direction i) !code'
+    ) (PTree.elements code);
+    !code'
+  end
+end
+
+(** Trace selection *)
+
+let rec exists_false_rec = function
+  | [] -> false
+  | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true
+
+let exists_false boolmap = exists_false_rec (PTree.elements boolmap)
+
+(* DFS using prediction info to guide the exploration *)
+let dfs code entrypoint = begin
+  debug "dfs\n";
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let rec dfs_list code = function
+  | [] -> []
+  | node :: ln ->
+      if get_some @@ PTree.get node !visited then dfs_list code ln
+      else begin
+        visited := PTree.set node true !visited;
+        let next_nodes = (match get_some @@ PTree.get node code with
+        | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+        | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n]
+        | Ijumptable (_, ln) -> ln
+        | Itailcall _ | Ireturn _ -> []
+        | Icond (_, _, n1, n2, info) -> (match info with
+          | Some false -> [n2; n1]
+          | _ -> [n1; n2]
+          )
+        ) in node :: dfs_list code (next_nodes @ ln)
+      end
+  in dfs_list code [entrypoint]
+end
+
+let rec select_unvisited_node is_visited = function
+| [] -> failwith "Empty list"
+| n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln
+
+let best_successor_of node code is_visited =
+  match (PTree.get node code) with
+  | None -> failwith "No such node in the code"
+  | Some i ->
+      let next_node = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore(_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> Some n
+      | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1)
+      | _ -> None
+      in match next_node with
+      | None -> None
+      | Some n -> if not (ptree_get_some n is_visited) then Some n else None
+
+(* FIXME - could be improved by selecting in priority the predicted paths *)
+let best_predecessor_of node predecessors code order is_visited =
+  match (PTree.get node predecessors) with
+  | None -> failwith "No predecessor list found"
+  | Some lp ->
+      try Some (List.find (fun n ->
+          if (List.mem n lp) && (not (ptree_get_some n is_visited)) then
+            match ptree_get_some n code with
+            | Icond (_, _, n1, n2, ob) -> (match ob with
+              | None -> false
+              | Some false -> n == n2
+              | Some true -> n == n1
+              )
+            | _ -> true
+          else false
+        ) order)
+      with Not_found -> None
+
+let print_trace = print_intlist
+
+let print_traces oc traces =
+  let rec f oc = function
+  | [] -> ()
+  | t::lt -> Printf.fprintf oc "\n\t%a,\n%a" print_trace t f lt
+  in begin
+    if !debug_flag then
+      Printf.fprintf oc "Traces: {%a}\n" f traces
+  end
+
+(* Dumb (but linear) trace selection *)
+let select_traces_linear code entrypoint =
+  let is_visited = ref (PTree.map (fun n i -> false) code) in
+  let bfs_order = bfs code entrypoint in
+  let rec go_through node = begin
+    is_visited := PTree.set node true !is_visited;
+    let next_node = match (get_some @@ PTree.get node code) with
+      | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+      | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n
+      | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+      | Icond (_, _, n1, n2, info) -> (match info with
+        | Some false -> Some n2
+        | Some true -> Some n1
+        | None -> None
+        )
+    in match next_node with
+    | None -> [node]
+    | Some n ->
+        if not (get_some @@ PTree.get n !is_visited) then node :: go_through n
+        else [node]
+    end
+  in let traces = ref [] in begin
+    List.iter (fun n ->
+      if not (get_some @@ PTree.get n !is_visited) then
+        traces := (go_through n) :: !traces
+    ) bfs_order;
+    !traces
+  end
+
+
+(* Algorithm mostly inspired from Chang and Hwu 1988
+ * "Trace Selection for Compiling Large C Application Programs to Microcode" *)
+let select_traces_chang code entrypoint = begin
+  debug "select_traces\n";
+  let order = dfs code entrypoint in
+  let predecessors = get_predecessors_rtl code in
+  let traces = ref [] in
+  let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *)
+    debug "Length: %d\n" (List.length order);
+    while exists_false !is_visited do (* while (there are unvisited nodes) *)
+      let seed = select_unvisited_node !is_visited order in
+      let trace = ref [seed] in
+      let current = ref seed in begin
+        is_visited := PTree.set seed true !is_visited; (* mark seed visited *)
+        let quit_loop = ref false in begin
+          while not !quit_loop do
+            let s = best_successor_of !current code !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some succ -> begin
+                trace := !trace @ [succ];
+                is_visited := PTree.set succ true !is_visited; (* mark s visited *)
+                current := succ
+                end
+          done;
+          current := seed;
+          quit_loop := false;
+          while not !quit_loop do
+            let s = best_predecessor_of !current predecessors code order !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some pred -> begin
+                trace := pred :: !trace;
+                is_visited := PTree.set pred true !is_visited; (* mark s visited *)
+                current := pred
+                end
+          done;
+          traces := !trace :: !traces;
+        end
+      end
+    done;
+    (* debug "DFS: \t"; print_intlist order; debug "\n"; *)
+    debug "Traces: %a" print_traces !traces;
+    !traces
+  end
+end
+
+let select_traces code entrypoint =
+  let length = List.length @@ PTree.elements code in
+  if (length < 5000) then select_traces_chang code entrypoint
+  else select_traces_linear code entrypoint
+
+let rec make_identity_ptree_rec = function
+| [] -> PTree.empty
+| m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm)
+
+let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
+
+(* Change the pointers of nodes to point to n' instead of n *)
+let rec change_pointers code n n' = function
+  | [] -> code
+  | node :: nodes ->
+      let new_pred_inst = match ptree_get_some node code with
+        | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n')
+        | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n')
+        | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln);
+                               Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln)
+        | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n);
+                                 let n1' = if (n1 = n) then n' else n1
+                                 in let n2' = if (n2 = n) then n' else n2
+                                 in Icond(a, b, n1', n2', i)
+        | Inop n0 -> assert (n0 = n); Inop n'
+        | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n')
+        | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n')
+        | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n')
+        | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
+      in let new_code = PTree.set node new_pred_inst code
+      in change_pointers new_code n n' nodes
+
+(* parent: parent of n to keep as parent
+ * preds: all the other parents of n
+ * n': the integer which should contain the duplicate of n
+ * returns: new code, new ptree *)
+let duplicate code ptree parent n preds n' =
+  debug "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+  match PTree.get n' code with
+  | Some _ -> failwith "The PTree already has a node n'"
+  | None ->
+      let c' = change_pointers code n n' preds
+      in let new_code = PTree.set n' (ptree_get_some n code) c'
+      and new_ptree = PTree.set n' n ptree
+      in (new_code, new_ptree)
+
+let rec maxint = function
+  | [] -> 0
+  | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m
+
+let is_empty = function
+  | [] -> true
+  | _ -> false
+
+let next_free_pc code = maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1
+
+let is_a_nop code n =
+  match get_some @@ PTree.get n code with
+  | Inop _ -> true
+  | _ -> false
+
+(* code: RTL code
+ * preds: mapping node -> predecessors
+ * ptree: the revmap
+ * trace: the trace to follow tail duplication on *)
+let tail_duplicate code preds is_loop_header ptree trace =
+  debug "Tail_duplicate on that trace: %a\n" print_trace trace;
+  (* next_int: unused integer that can be used for the next duplication *)
+  let next_int = ref (next_free_pc code)
+  (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
+  in let last_node = ref None
+  in let last_duplicate = ref None
+  in let nb_duplicated = ref 0
+  (* recursive function on a trace *)
+  in let rec f code ptree is_first = function
+    | [] -> (code, ptree)
+    | n :: t ->
+        let (new_code, new_ptree) =
+          if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
+          else
+            let node_preds = ptree_get_some n preds
+            in let node_preds_nolast = 
+              (* We traverse loop headers without initiating tail duplication 
+               * (see case of two imbricated loops) *)
+              if (get_some @@ PTree.get n is_loop_header) then []
+              else List.filter (fun e -> e <> get_some !last_node) node_preds
+            (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
+            in let final_node_preds = match !last_duplicate with
+              | None -> node_preds_nolast
+              | Some n' -> n' :: node_preds_nolast
+            in if not (is_empty final_node_preds) then
+              let n' = !next_int
+              in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
+              in begin
+                next_int := !next_int + 1;
+                (if not @@ is_a_nop code n then nb_duplicated := !nb_duplicated + 1);
+                last_duplicate := Some (P.of_int n');
+                (newc, newp)
+              end
+            else (code, ptree)
+        in begin
+          last_node := Some n;
+          f new_code new_ptree false t
+        end
+  in let new_code, new_ptree = f code ptree true trace
+  in (new_code, new_ptree, !nb_duplicated)
+
+let superblockify_traces code preds is_loop_header traces ptree =
+  let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
+  in let rec f code ptree = function
+    | [] -> (code, ptree, 0)
+    | trace :: traces ->
+        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
+        in if (nb_duplicated < max_nb_duplicated)
+          then (debug "End duplication\n"; f new_code new_ptree traces)
+          else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
+  in let new_code, new_ptree, _ = f code ptree traces
+  in (new_code, new_ptree)
+
+let invert_iconds code =
+  PTree.map1 (fun i -> match i with
+    | Icond (c, lr, ifso, ifnot, info) -> (match info with
+        | Some true -> begin
+            (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+            Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)
+          end
+        | _ -> i)
+    | _ -> i
+  ) code
+
+(** Partial loop unrolling
+ *
+ * The following code seeks innermost loops, and unfolds the first iteration
+ * Most of the code has been moved from LICMaux.ml to Duplicateaux.ml to solve
+ * cyclic dependencies between LICMaux and Duplicateaux
+ *)
+
+let print_inner_loop iloop =
+  debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+    print_intlist iloop.preds
+    print_intlist iloop.body
+    (P.to_int iloop.head)
+    print_intlist iloop.finals
+    print_option_pint iloop.sb_final
+
+let rec print_inner_loops = function
+| [] -> ()
+| iloop :: iloops -> begin
+    print_inner_loop iloop;
+    debug "\n";
+    print_inner_loops iloops
+  end
+
+let cb_exit_node = function
+  | Icond (_,_,n1,n2,p) -> begin match p with
+      | Some true -> Some n2
+      | Some false -> Some n1
+      | None -> None
+    end
+  | _ -> None
+
+      (*
+(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *)
+let get_natural_loop code predmap n =
+  let is_final_node m =
+    let successors = rtl_successors @@ get_some @@ PTree.get m code in
+    List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors
+  in 
+  let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in
+  let is_excluded m = match excluded_node with
+    | None -> false
+    | Some ex -> P.to_int ex == P.to_int m
+  in
+  debug "get_natural_loop for %d\n" (P.to_int n);
+  let body = bfs_until code n is_final_node is_excluded in
+  debug "BODY: %a\n" print_intlist body;
+  let final = List.find is_final_node body in
+  debug "FINAL: %d\n" (P.to_int final);
+  let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in
+  debug "PREDS: %a\n" print_intlist preds;
+  { preds = preds; body = body; head = n; final = final }
+
+let rec count_loop_headers is_loop_header = function
+  | [] -> 0
+  | n :: ln ->
+      let rem = count_loop_headers is_loop_header ln in
+      if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem
+
+let get_inner_loops f code is_loop_header =
+  let predmap = get_predecessors_rtl code in
+  let iloops = ref [] in
+  List.iter (fun (n, ilh) -> if ilh then begin
+    let iloop = get_natural_loop code predmap n in
+    let nb_headers = count_loop_headers is_loop_header iloop.body in
+    if nb_headers == 1 then (* innermost loop *)
+      iloops := iloop :: !iloops end
+  ) (PTree.elements is_loop_header);
+  !iloops
+  *)
+
+let rec generate_fwmap ln ln' fwmap =
+  match ln with
+  | [] -> begin
+      match ln' with
+      | [] -> fwmap
+      | _ -> failwith "ln and ln' have different lengths"
+    end
+  | n :: ln -> begin
+      match ln' with
+      | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap)
+      | _ -> failwith "ln and ln' have different lengths"
+    end
+
+let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap
+
+let apply_map fw n = P.of_int @@ ptree_get_some n fw
+
+let apply_map_list fw ln = List.map (apply_map fw) ln
+
+let apply_map_opt fw n =
+  match PTree.get n fw with
+  | Some n' -> P.of_int n'
+  | None -> n
+
+let change_nexts fwmap = function
+  | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n)
+  | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n)
+  | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln)
+  | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i)
+  | Inop n -> Inop (apply_map fwmap n)
+  | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n)
+  | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n)
+  | Istore (a, b, c, d, n) -> Istore (a, b, c, d, apply_map fwmap n)
+  | Itailcall (a, b, c) -> Itailcall (a, b, c)
+  | Ireturn o -> Ireturn o
+
+(** Clone a list of instructions into free pc indexes
+ *
+ * The list of instructions should be contiguous, and not include any loop.
+ * It is assumed that the first instruction of the list is the head.
+ * Also, the last instruction of the list should be the loop backedge.
+ *
+ * Returns: (code', revmap', ln', fwmap)
+ *  code' is the updated code, after cloning
+ *  revmap' is the updated revmap
+ *  ln' is the list of the new indexes used to reference the cloned instructions
+ *  fwmap is a map from ln to ln'
+ *)
+let clone code revmap ln = begin
+  assert (List.length ln > 0);
+  let head' = next_free_pc code in
+  (* +head' to ensure we never overlap with the existing code *)
+  let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in
+  let fwmap = generate_fwmap ln ln' PTree.empty in
+  let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in
+  let code' = ref code in
+  List.iter (fun n ->
+    let instr = get_some @@ PTree.get n code in
+    let instr' = change_nexts fwmap instr in
+    code' := PTree.set (apply_map fwmap n) instr' !code'
+  ) ln;
+  (!code', revmap', ln', fwmap)
+end
+
+let rec count_ignore_nops code = function
+  | [] -> 0
+  | n::ln ->
+      let inst = get_some @@ PTree.get n code in
+      match inst with
+      | Inop _ -> count_ignore_nops code ln
+      | _ -> 1 + count_ignore_nops code ln
+
+(* Unrolls a single interation of the inner loop
+ * 1) Clones the body into body'
+ * 2) Links the preds to the first instruction of body'
+ * 3) Links the last instruction of body' into the first instruction of body
+ *)
+let unroll_inner_loop_single code revmap iloop =
+  let body = iloop.body in
+  if count_ignore_nops code body > !Clflags.option_funrollsingle then begin
+    debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle;
+    (code, revmap)
+  end else
+    let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+    let code' = ref code2 in
+    let head' = apply_map fwmap (iloop.head) in
+    let finals' = apply_map_list fwmap (iloop.finals) in
+    begin
+      debug "PREDS: %a\n" print_intlist iloop.preds;
+      debug "IHEAD: %d\n" (P.to_int iloop.head);
+      code' := change_pointers !code' (iloop.head) head' (iloop.preds);
+      code' := change_pointers !code' head' (iloop.head) finals';
+      (!code', revmap2)
+    end
+
+let unroll_inner_loops_single f code revmap =
+  let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+  let inner_loops = get_inner_loops f code is_loop_header in
+  let code' = ref code in
+  let revmap' = ref revmap in
+  begin
+    print_inner_loops inner_loops;
+    List.iter (fun iloop ->
+      let (new_code, new_revmap) = unroll_inner_loop_single !code' !revmap' iloop in
+      code' := new_code; revmap' := new_revmap
+    ) inner_loops;
+    (!code', !revmap')
+  end
+
+let is_some o = match o with Some _ -> true | None -> false
+
+let rec go_through_predicted code start final =
+  if start == final then
+    Some [start]
+  else
+    match rtl_successors_pref @@ get_some @@ PTree.get start code with
+    | [n] -> (
+        match go_through_predicted code n final with
+        | Some ln -> Some (start :: ln)
+        | None -> None
+      )
+    | _ -> None
+
+(* Unrolls the body of the inner loop once - duplicating the exit condition as well 
+ * 1) Clones body into body'
+ * 2) Links the last instruction of body (sb_final) into the first of body' 
+ * 3) Links the last instruction of body' into the first of body
+ *)
+let unroll_inner_loop_body code revmap iloop =
+  debug "iloop = "; print_inner_loop iloop;
+  let body = iloop.body in
+  let limit = !Clflags.option_funrollbody in
+  if count_ignore_nops code body > limit then begin
+    debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit;
+    (code, revmap)
+  end else if not @@ is_some iloop.sb_final then begin
+    debug "The loop body does not form a superblock OR we have predicted that we do not loop\n";
+    (code, revmap)
+  end else
+    let sb_final = get_some @@ iloop.sb_final in
+    let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in
+    let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in
+    let code' = ref code2 in
+    let head' = apply_map fwmap (iloop.head) in
+    let sb_final' = apply_map fwmap sb_final in
+    begin
+      code' := change_pointers !code' iloop.head head' [sb_final];
+      code' := change_pointers !code' head' iloop.head [sb_final']; 
+      (!code', revmap2)
+    end
+
+let unroll_inner_loops_body f code revmap =
+  let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+  let inner_loops = get_inner_loops f code is_loop_header in
+  debug "Number of loops found: %d\n" (List.length inner_loops);
+  let code' = ref code in
+  let revmap' = ref revmap in
+  begin
+    print_inner_loops inner_loops;
+    List.iter (fun iloop ->
+      let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
+      code' := new_code; revmap' := new_revmap
+    ) inner_loops;
+    (!code', !revmap')
+  end
+
+let extract_upto_icond f code head =
+  let rec extract h =
+    let inst = get_some @@ PTree.get h code in
+    match inst with
+    | Icond _ -> [h]
+    | _ -> ( match rtl_successors inst with
+        | [n] -> h :: (extract n)
+        | _ -> failwith "Found a node with more than one successor??"
+      )
+  in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+  let header = extract_upto_icond f code iloop.head in
+  let limit = !Clflags.option_flooprotate in
+  let nb_duplicated = count_ignore_nops code header in
+  if nb_duplicated > limit then begin
+    debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+    (code, revmap)
+  end else if nb_duplicated == count_ignore_nops code iloop.body then begin
+    debug "The conditional branch is already at the end! No need to rotate.";
+    (code, revmap)
+  end else
+    let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+    let code' = ref code2 in
+    let head' = apply_map fwmap iloop.head in
+    begin
+      code' := change_pointers !code' iloop.head head' iloop.preds;
+      (!code', revmap2)
+    end
+
+let rotate_inner_loops f code revmap =
+  let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+  let inner_loops = get_inner_loops f code is_loop_header in
+  let code' = ref code in
+  let revmap' = ref revmap in
+  begin
+    print_inner_loops inner_loops;
+    List.iter (fun iloop ->
+      let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+      code' := new_code; revmap' := new_revmap
+    ) inner_loops;
+    (!code', !revmap')
+  end
+
+let loop_rotate f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_flooprotate > 0 then
+      rotate_inner_loops f code revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
+
+let static_predict f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  begin
+    reset_stats ();
+    set_stats_oc ();
+    let code =
+      if !Clflags.option_fpredict then
+        update_directions f code entrypoint
+      else code in
+    write_stats_oc ();
+    let code =
+      if !Clflags.option_fpredict then
+        invert_iconds code
+      else code in
+    ((code, entrypoint), revmap)
+  end
+
+let unroll_single f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_funrollsingle > 0 then
+      unroll_inner_loops_single f code revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
+
+let unroll_body f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_funrollbody > 0 then
+      unroll_inner_loops_body f code revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
+
+let tail_duplicate f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_ftailduplicate > 0 then
+      let traces = select_traces code entrypoint in
+      let preds = get_predecessors_rtl code in
+      let is_loop_header = get_loop_headers code entrypoint in
+      superblockify_traces code preds is_loop_header traces revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v
new file mode 100644
index 00000000..7e58eedf
--- /dev/null
+++ b/backend/Duplicatepasses.v
@@ -0,0 +1,58 @@
+Require Import RTL.
+Require Import Maps.
+Require Import Duplicate.
+Require Import Duplicateproof.
+
+(** Static Prediction *)
+
+Module StaticPredictOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux => "Duplicateaux.static_predict".
+End StaticPredictOracle.
+
+Module Staticpredictproof := DuplicateProof StaticPredictOracle.
+
+Module Staticpredict := Staticpredictproof.
+
+(** Unrolling one iteration out of the body *)
+
+Module UnrollSingleOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux => "Duplicateaux.unroll_single".
+End UnrollSingleOracle.
+
+Module Unrollsingleproof := DuplicateProof UnrollSingleOracle.
+
+Module Unrollsingle := Unrollsingleproof.
+
+(** Unrolling the body of innermost loops *)
+
+Module UnrollBodyOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux => "Duplicateaux.unroll_body".
+End UnrollBodyOracle.
+
+Module Unrollbodyproof := DuplicateProof UnrollBodyOracle.
+
+Module Unrollbody := Unrollbodyproof.
+
+(** Tail Duplication *)
+
+Module TailDuplicateOracle <: DuplicateOracle.
+ Axiom duplicate_aux : function -> code * node * (PTree.t node).
+ Extract Constant duplicate_aux =>  "Duplicateaux.tail_duplicate".
+End TailDuplicateOracle.
+
+Module Tailduplicateproof := DuplicateProof TailDuplicateOracle.
+
+Module Tailduplicate := Tailduplicateproof.
+
+(** Loop Rotate *)
+
+Module LoopRotateOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux =>  "Duplicateaux.loop_rotate".
+End LoopRotateOracle.
+
+Module Looprotateproof := DuplicateProof LoopRotateOracle.
+Module Looprotate := Looprotateproof.
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
new file mode 100644
index 00000000..2f3bad2f
--- /dev/null
+++ b/backend/Duplicateproof.v
@@ -0,0 +1,542 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** Correctness proof for code duplication *)
+Require Import AST Linking Errors Globalenvs Smallstep.
+Require Import Coqlib Maps Events Values.
+Require Import Op RTL Duplicate.
+
+Module DuplicateProof (D: DuplicateOracle).
+Include Duplicate D.
+
+Local Open Scope positive_scope.
+
+(** * Definition of [match_states] (independently of the translation) *)
+
+(* est-ce plus simple de prendre dupmap: node -> node, avec un noeud hors CFG à la place de None ? *)
+Inductive match_inst (dupmap: PTree.t node): instruction -> instruction -> Prop :=
+  | match_inst_nop: forall n n',
+      dupmap!n' = (Some n) -> match_inst dupmap (Inop n) (Inop n')
+  | match_inst_op: forall n n' op lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Iop op lr r n) (Iop op lr r n')
+  | match_inst_load: forall n n' tm m a lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Iload tm m a lr r n) (Iload tm m a lr r n')
+  | match_inst_store: forall n n' m a lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Istore m a lr r n) (Istore m a lr r n')
+  | match_inst_call: forall n n' s ri lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Icall s ri lr r n) (Icall s ri lr r n')
+  | match_inst_tailcall: forall s ri lr,
+      match_inst dupmap (Itailcall s ri lr) (Itailcall s ri lr)
+  | match_inst_builtin: forall n n' ef la br,
+      dupmap!n' = (Some n) -> match_inst dupmap (Ibuiltin ef la br n) (Ibuiltin ef la br n')
+  | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr i i',
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot i) (Icond c lr ifso' ifnot' i')
+  | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr i i',
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot i) (Icond (negate_condition c) lr ifnot' ifso' i')
+  | match_inst_jumptable: forall ln ln' r,
+      list_forall2 (fun n n' => (dupmap!n' = (Some n))) ln ln' ->
+      match_inst dupmap (Ijumptable r ln) (Ijumptable r ln')
+  | match_inst_return: forall or, match_inst dupmap (Ireturn or) (Ireturn or).
+
+Record match_function dupmap f f': Prop := {
+  dupmap_correct: forall n n', dupmap!n' = Some n ->
+    (forall i, (fn_code f)!n = Some i -> exists i', (fn_code f')!n' = Some i' /\ match_inst dupmap i i');
+  dupmap_entrypoint: dupmap!(fn_entrypoint f') = Some (fn_entrypoint f);
+  preserv_fnsig: fn_sig f = fn_sig f';
+  preserv_fnparams: fn_params f = fn_params f';
+  preserv_fnstacksize: fn_stacksize f = fn_stacksize f'
+}.
+
+Inductive match_fundef: RTL.fundef -> RTL.fundef -> Prop :=
+  | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
+  | match_External ef: match_fundef (External ef) (External ef).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframe_intro 
+      dupmap res f sp pc rs f' pc'
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc):
+      match_stackframes (Stackframe res f sp pc rs) (Stackframe res f' sp pc' rs).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro 
+      dupmap st f sp pc rs m st' f' pc'
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc):
+      match_states (State st f sp pc rs m) (State st' f' sp pc' rs m)
+  | match_states_call:
+    forall st st' f f' args m
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_fundef f f'),
+      match_states (Callstate st f args m) (Callstate st' f' args m)
+  | match_states_return:
+    forall st st' v m
+      (STACKS: list_forall2 match_stackframes st st'),
+      match_states (Returnstate st v m) (Returnstate st' v m).
+
+(** * Auxiliary properties *)
+
+
+Theorem transf_function_preserves:
+  forall f f',
+  transf_function f = OK f' ->
+     fn_sig f = fn_sig f' /\ fn_params f = fn_params f' /\ fn_stacksize f = fn_stacksize f'.
+Proof.
+  intros. unfold transf_function in H. destruct (duplicate_aux _) as (tcte & mp). destruct tcte as (tc & te). monadInv H.
+  repeat (split; try reflexivity).
+Qed.
+
+
+Lemma verify_mapping_mn_rec_step:
+  forall dupmap lb b f f',
+  In b lb ->
+  verify_mapping_mn_rec dupmap f f' lb = OK tt ->
+  verify_mapping_mn dupmap f f' b = OK tt.
+Proof.
+  induction lb; intros.
+  - monadInv H0. inversion H.
+  - inversion H.
+    + subst. monadInv H0. destruct x. assumption.
+    + monadInv H0. destruct x. eapply IHlb; assumption.
+Qed.
+
+Lemma verify_is_copy_correct:
+  forall dupmap n n',
+  verify_is_copy dupmap n n' = OK tt ->
+  dupmap ! n' = Some n.
+Proof.
+  intros. unfold verify_is_copy in H. destruct (_ ! n') eqn:REVM; [|inversion H].
+  destruct (n ?= p) eqn:NP; try (inversion H; fail).
+  eapply Pos.compare_eq in NP. subst.
+  reflexivity.
+Qed.
+
+Lemma verify_is_copy_list_correct:
+  forall dupmap ln ln',
+  verify_is_copy_list dupmap ln ln' = OK tt ->
+  list_forall2 (fun n n' => dupmap ! n' = Some n) ln ln'.
+Proof.
+  induction ln.
+  - intros. destruct ln'; monadInv H. constructor.
+  - intros. destruct ln'; monadInv H. destruct x. apply verify_is_copy_correct in EQ.
+    eapply IHln in EQ0. constructor; assumption.
+Qed.
+
+Lemma verify_match_inst_correct:
+  forall dupmap i i',
+  verify_match_inst dupmap i i' = OK tt ->
+  match_inst dupmap i i'.
+Proof.
+  intros. unfold verify_match_inst in H.
+  destruct i; try (inversion H; fail).
+(* Inop *)
+  - destruct i'; try (inversion H; fail).
+    eapply verify_is_copy_correct in H.
+    constructor; eauto.
+(* Iop *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (eq_operation _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Iload *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (trapping_mode_eq _ _); try discriminate.
+    destruct (chunk_eq _ _); try discriminate.
+    destruct (eq_addressing _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Istore *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (chunk_eq _ _); try discriminate.
+    destruct (eq_addressing _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Icall *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (signature_eq _ _); try discriminate.
+    destruct (product_eq _ _ _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. subst.
+    constructor. assumption.
+(* Itailcall *)
+  - destruct i'; try (inversion H; fail).
+    destruct (signature_eq _ _); try discriminate.
+    destruct (product_eq _ _ _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate. subst. clear H.
+    constructor.
+(* Ibuiltin *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (external_function_eq _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (builtin_res_eq_pos _ _); try discriminate. subst.
+    constructor. assumption.
+(* Icond *)
+  - destruct i'; try (inversion H; fail).
+    destruct (list_eq_dec _ _ _); try discriminate. subst.
+    destruct (eq_condition _ _); try discriminate.
+    + monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      eapply verify_is_copy_correct in EQ0.
+      subst; constructor; assumption.
+    + destruct (eq_condition _ _); try discriminate.
+      monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      eapply verify_is_copy_correct in EQ0.
+      subst; constructor; assumption.
+(* Ijumptable *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_list_correct in EQ.
+    destruct (Pos.eq_dec _ _); try discriminate. subst.
+    constructor. assumption.
+(* Ireturn *)
+  - destruct i'; try (inversion H; fail).
+    destruct (option_eq _ _ _); try discriminate. subst. clear H.
+    constructor.
+Qed.
+
+
+Lemma verify_mapping_mn_correct mp n n' i f f' tc:
+  mp ! n' = Some n ->
+  (fn_code f) ! n = Some i ->
+  (fn_code f') = tc ->
+  verify_mapping_mn mp f f' (n', n) = OK tt ->
+  exists i',
+     tc ! n' = Some i'
+  /\ match_inst mp i i'.
+Proof.
+  unfold verify_mapping_mn; intros H H0 H1 H2. rewrite H0 in H2. clear H0. rewrite H1 in H2. clear H1.
+  destruct (tc ! n') eqn:TCN; [| inversion H2].
+  exists i0. split; auto.
+  eapply verify_match_inst_correct. assumption.
+Qed.
+
+
+Lemma verify_mapping_mn_rec_correct:
+  forall mp n n' i f f' tc,
+  mp ! n' = Some n ->
+  (fn_code f) ! n = Some i ->
+  (fn_code f') = tc ->
+  verify_mapping_mn_rec mp f f' (PTree.elements mp) = OK tt ->
+  exists i',
+     tc ! n' = Some i'
+  /\ match_inst mp i i'.
+Proof.
+  intros. exploit PTree.elements_correct. eapply H. intros IN.
+  eapply verify_mapping_mn_rec_step in H2; eauto.
+  eapply verify_mapping_mn_correct; eauto.
+Qed.
+
+Theorem transf_function_correct f f':
+    transf_function f = OK f' -> exists dupmap, match_function dupmap f f'.
+Proof.
+  unfold transf_function.
+  intros TRANSF.
+  destruct (duplicate_aux _) as (tcte & mp). destruct tcte as (tc & te).
+  monadInv TRANSF.
+  unfold verify_mapping in EQ. monadInv EQ.
+  exists mp; constructor 1; simpl; auto.
+  + (* correct *) 
+  intros until n'. intros REVM i FNC.
+  unfold verify_mapping_match_nodes in EQ1. simpl in EQ1. destruct x.
+  eapply verify_mapping_mn_rec_correct; eauto.
+  simpl; eauto.
+  + (* entrypoint *)
+  intros. unfold verify_mapping_entrypoint in EQ0. simpl in EQ0.
+  eapply verify_is_copy_correct; eauto.
+  destruct x0; auto.
+Qed.
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> match_fundef f f'.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  + exploit transf_function_correct; eauto.
+    intros (dupmap & MATCH_F).
+    eapply match_Internal; eauto.
+  + eapply match_External.
+Qed.
+
+(** * Preservation proof *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: program.
+Variable tprog: program.
+
+Hypothesis TRANSL: match_prog prog tprog.
+
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+(* UNUSED LEMMA ?
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+*)
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tge v = Some tf /\ linkorder cunit prog.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+  + unfold incl; auto.
+  + eapply linkorder_refl.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma function_sig_translated:
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. simpl. symmetry. apply transf_function_preserves. assumption.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma list_nth_z_dupmap:
+  forall dupmap ln ln' (pc pc': node) val,
+  list_nth_z ln val = Some pc ->
+  list_forall2 (fun n n' => dupmap!n' = Some n) ln ln' ->
+  exists pc',
+     list_nth_z ln' val = Some pc'
+  /\ dupmap!pc' = Some pc.
+Proof.
+  induction ln; intros until val; intros LNZ LFA.
+  - inv LNZ.
+  - inv LNZ. destruct (zeq val 0) eqn:ZEQ.
+    + inv H0. destruct ln'; inv LFA.
+      simpl. exists p. split; auto.
+    + inv LFA. simpl. rewrite ZEQ. exploit IHln. 2: eapply H0. all: eauto.
+      intros (pc'1 & LNZ & REV). exists pc'1. split; auto. congruence.
+Qed.
+
+Theorem transf_initial_states:
+  forall s1, initial_state prog s1 ->
+  exists s2, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inv H.
+  exploit function_ptr_translated; eauto. intros (tf & FIND & TRANSF).
+  eexists. split.
+  - econstructor; eauto.
+    + eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+    + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main. eauto.
+    + destruct f.
+      * monadInv TRANSF. rewrite <- H3. symmetry; eapply transf_function_preserves. assumption.
+      * monadInv TRANSF. assumption.
+  - constructor; eauto.
+    + constructor. 
+    + apply transf_fundef_correct; auto.
+Qed.
+
+Theorem transf_final_states:
+  forall s1 s2 r,
+  match_states s1 s2 -> final_state s1 r -> final_state s2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem step_simulation:
+  forall s1 t s1', step ge s1 t s1' ->
+  forall s2 (MS: match_states s1 s2),
+  exists s2',
+     step tge s2 t s2'
+  /\ match_states s1' s2'.
+Proof.
+  Local Hint Resolve transf_fundef_correct: core.
+  induction 1; intros; inv MS.
+(* Inop *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3).
+    inv H3.
+    eexists. split.
+    + eapply exec_Inop; eauto.
+    + econstructor; eauto.
+(* Iop *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iop; eauto. erewrite eval_operation_preserved; eauto.
+    + econstructor; eauto.
+(* Iload *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload; eauto; (* is the follow still needed?*) erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+(* Iload notrap1 *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload_notrap1; eauto; erewrite eval_addressing_preserved; eauto. 
+    + econstructor; eauto.
+(* Iload notrap2 *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload_notrap2; eauto; erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+      
+(* Istore *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Istore; eauto; erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+(* Icall *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    destruct ros.
+    * simpl in H0. apply functions_translated in H0.
+      destruct H0 as (tf & cunit & TFUN & GFIND & LO).
+      eexists. split.
+      + eapply exec_Icall. eassumption. simpl. eassumption.
+        apply function_sig_translated. assumption.
+      + repeat (econstructor; eauto).
+    * simpl in H0. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
+      apply function_ptr_translated in H0. destruct H0 as (tf & GFF & TF).
+      eexists. split.
+      + eapply exec_Icall. eassumption. simpl. rewrite symbols_preserved. rewrite GFS.
+        eassumption. apply function_sig_translated. assumption.
+      + repeat (econstructor; eauto).
+(* Itailcall *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H10 & H11). inv H11.
+    pose symbols_preserved as SYMPRES.
+    destruct ros.
+    * simpl in H0. apply functions_translated in H0.
+      destruct H0 as (tf & cunit & TFUN & GFIND & LO).
+      eexists. split.
+      + eapply exec_Itailcall. eassumption. simpl. eassumption.
+        apply function_sig_translated. assumption.
+        erewrite <- preserv_fnstacksize; eauto.
+      + repeat (constructor; auto).
+    * simpl in H0. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
+      apply function_ptr_translated in H0. destruct H0 as (tf & GFF & TF).
+      eexists. split.
+      + eapply exec_Itailcall. eassumption. simpl. rewrite symbols_preserved. rewrite GFS.
+        eassumption. apply function_sig_translated. assumption.
+        erewrite <- preserv_fnstacksize; eauto.
+      + repeat (constructor; auto).
+(* Ibuiltin *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Ibuiltin; eauto. eapply eval_builtin_args_preserved; eauto.
+      eapply external_call_symbols_preserved; eauto. eapply senv_preserved.
+    + econstructor; eauto.
+(* Icond *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    * (* match_inst_cond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto.
+      + econstructor; eauto. destruct b; auto.
+    * (* match_inst_revcond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto. rewrite eval_negate_condition. rewrite H0. simpl. eauto.
+      + econstructor; eauto. destruct b; auto.
+(* Ijumptable *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    exploit list_nth_z_dupmap; eauto. intros (pc'1 & LNZ & REVM).
+    eexists. split.
+    + eapply exec_Ijumptable; eauto.
+    + econstructor; eauto.
+(* Ireturn *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Ireturn; eauto. erewrite <- preserv_fnstacksize; eauto.
+    + econstructor; eauto.
+(* exec_function_internal *)
+  - inversion TRANSF as [dupmap f0 f0' MATCHF|]; subst. eexists. split.
+    + eapply exec_function_internal. erewrite <- preserv_fnstacksize; eauto.
+    + erewrite preserv_fnparams; eauto.
+      econstructor; eauto. apply dupmap_entrypoint. assumption.
+(* exec_function_external *)
+  - inversion TRANSF as [|]; subst. eexists. split.
+    + econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor. assumption.
+(* exec_return *)
+  - inv STACKS. destruct b1 as [res' f' sp' pc' rs']. eexists. split.
+    + constructor.
+    + inv H1. econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (semantics prog) (semantics tprog).
+Proof.
+  eapply forward_simulation_step with match_states.
+  - eapply senv_preserved.
+  - eapply transf_initial_states.
+  - eapply transf_final_states.
+  - eapply step_simulation.
+Qed.
+
+End PRESERVATION.
+
+End DuplicateProof.
diff --git a/backend/FirstNop.v b/backend/FirstNop.v
new file mode 100644
index 00000000..b3c765e4
--- /dev/null
+++ b/backend/FirstNop.v
@@ -0,0 +1,30 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Definition transf_function (f: function) : function :=
+  let start_pc := Pos.succ (max_pc_function f) in
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.set start_pc (Inop f.(fn_entrypoint)) f.(fn_code);
+     fn_entrypoint := start_pc |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
diff --git a/backend/FirstNopproof.v b/backend/FirstNopproof.v
new file mode 100644
index 00000000..5a1c5acf
--- /dev/null
+++ b/backend/FirstNopproof.v
@@ -0,0 +1,285 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import FirstNop.
+Require Import Lia.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some i.
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  destruct (peq pc (Pos.succ (max_pc_function f))) as [EQ | NEQ].
+  { assert (pc <= (max_pc_function f))%positive as LE by (eapply max_pc_function_sound; eassumption).
+    subst pc.
+    lia.
+  }
+  rewrite PTree.gso by congruence.
+  assumption.
+Qed.
+
+Hint Resolve transf_function_at : firstnop.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+(*
+Lemma match_pc_refl : forall f pc, match_pc f pc pc.
+Proof.
+  unfold match_pc.
+  left.
+  trivial.
+Qed.
+
+Hint Resolve match_pc_refl : firstnop.
+
+Lemma initial_jump:
+  forall f,
+  (fn_code (transf_function f)) ! (Pos.succ (max_pc_function f)) =
+  Some (Inop (fn_entrypoint f)).
+Proof.
+  intros. unfold transf_function. simpl.
+  apply PTree.gss.
+Qed.
+
+Hint Resolve initial_jump : firstnop.
+ *)
+
+Lemma match_pc_same :
+  forall f pc i,
+    PTree.get pc (fn_code f) = Some i ->
+    PTree.get pc (fn_code (transf_function f)) = Some i.
+Proof.
+  intros.
+  unfold transf_function. simpl.
+  rewrite <- H.
+  apply PTree.gso.
+  pose proof (max_pc_function_sound f pc i H) as LE.
+  unfold Ple in LE.
+  lia.
+Qed.
+
+Hint Resolve match_pc_same : firstnop.
+
+
+Definition measure (S: RTL.state) : nat :=
+  match S with
+  | State _ _ _ _ _ _ => 0%nat
+  | Callstate _ _ _ _ => 1%nat
+  | Returnstate _ _ _ => 0%nat
+  end.
+
+Lemma step_simulation:
+  forall S1 t S2,
+  step ge S1 t S2 ->
+  forall S1' (MS: match_states S1 S1'),
+  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
+  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+  induction 1; intros; inv MS.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Inop; eauto with firstnop.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iop with (v:=v); eauto with firstnop.
+      rewrite <- H0.
+      apply eval_operation_preserved.
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iload with (v:=v); eauto with firstnop.
+      all: rewrite <- H0.
+      all: auto using eval_addressing_preserved, symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iload_notrap1; eauto with firstnop.
+      all: rewrite <- H0;
+      apply eval_addressing_preserved;
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iload_notrap2; eauto with firstnop.
+      all: rewrite <- H0;
+      apply eval_addressing_preserved;
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Istore; eauto with firstnop.
+      all: rewrite <- H0;
+      apply eval_addressing_preserved;
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Icall.
+      apply match_pc_same. exact H.
+      apply find_function_translated.
+      exact H0.
+      apply sig_preserved.
+    + constructor.
+      constructor; auto.
+      constructor.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Itailcall.
+      apply match_pc_same. exact H.
+      apply find_function_translated.
+      exact H0.
+      apply sig_preserved.
+      unfold transf_function; simpl.
+      eassumption.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Ibuiltin; eauto with firstnop.
+      eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Icond; eauto with firstnop.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Ijumptable; eauto with firstnop.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Ireturn; eauto with firstnop.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_two.
+      * eapply exec_function_internal; eauto with firstnop.
+      * eapply exec_Inop.
+        unfold transf_function; simpl.
+        rewrite PTree.gss.
+        reflexivity.
+      * auto.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_function_external; eauto with firstnop.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor; auto.
+  - left.
+    inv STACKS. inv H1.
+    econstructor; split.
+    + eapply plus_one. eapply exec_return; eauto.
+    + constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_star.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v
new file mode 100644
index 00000000..1b375532
--- /dev/null
+++ b/backend/ForwardMoves.v
@@ -0,0 +1,345 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+(* Static analysis *)
+
+Module RELATION.
+  
+Definition t := (PTree.t reg).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty reg.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition reg_beq (x y : reg) :=
+  if Pos.eq_dec x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq reg_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct reg_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold reg_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (Pos.eq_dec R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if Pos.eq_dec v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
+
+  Parameter t: Type.
+  Parameter eq: t -> t -> Prop.
+  Axiom eq_refl: forall x, eq x x.
+  Axiom eq_sym: forall x y, eq x y -> eq y x.
+  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Parameter beq: t -> t -> bool.
+  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
+  Parameter ge: t -> t -> Prop.
+  Axiom ge_refl: forall x y, eq x y -> ge x y.
+  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Parameter lub: t -> t -> t.
+  Axiom ge_lub_left: forall x y, ge (lub x y) x.
+  Axiom ge_lub_right: forall x y, ge (lub x y) y.
+
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => if Pos.eq_dec dst x then false else true)
+                (PTree.remove dst rel).
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (match PTree.get src rel with
+                 | Some src' => src'
+                 | None => src
+                 end) (kill dst rel).
+
+Fixpoint kill_builtin_res (res : builtin_res reg) (rel : RELATION.t) :=
+  match res with
+  | BR z => kill z rel
+  | BR_none => rel
+  | BR_splitlong hi lo => kill_builtin_res hi (kill_builtin_res lo rel)
+  end.
+
+Definition apply_instr instr x :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _
+  | Istore _ _ _ _ _ => Some x
+  | Iop Omove (src :: nil) dst _ => Some (move src dst x)
+  | Iop _ _ dst _
+  | Iload _ _ _ _ dst _
+  | Icall _ _ _ dst _ => Some (kill dst x)
+  | Ibuiltin _ _ res _ => Some (RELATION.top) (* TODO (kill_builtin_res res x) *)
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition get_r (rel : RELATION.t) (x : reg) :=
+  match PTree.get x rel with
+  | None => x
+  | Some src => src
+  end.
+
+Definition get_rb (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => get_r rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => get_rb (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    Iop op (subst_args fmap pc args) dst s
+  | Iload trap chunk addr args dst s =>
+    Iload trap chunk addr (subst_args fmap pc args) dst s
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) src s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 i =>
+    Icond cond (subst_args fmap pc args) s1 s2 i
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ForwardMovesproof.v b/backend/ForwardMovesproof.v
new file mode 100644
index 00000000..f3e572e0
--- /dev/null
+++ b/backend/ForwardMovesproof.v
@@ -0,0 +1,813 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ForwardMoves.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+(*
+Definition fmap_sem (fmap : option (PMap.t RB.t)) (pc : node) (rs : regset) :=
+  forall x : reg,
+    (rs # (subst_arg fmap pc x)) = (rs # x).
+ *)
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Definition get_rb_sem (rb : RB.t) (rs : regset) :=
+  match rb with
+  | None => False
+  | Some rel =>
+    forall x : reg,
+      (rs # (get_r rel x)) = (rs # x)
+  end.
+
+Lemma get_rb_sem_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall rs : regset,
+      (get_rb_sem rb2 rs) -> (get_rb_sem rb1 rs).
+Proof.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ];
+    unfold get_rb_sem;
+    simpl;
+    intros GE rs RB2RS;
+    try contradiction.
+  unfold RELATION.ge in GE.
+  unfold get_r in *.
+  intro x.
+  pose proof (GE x) as GEx.
+  pose proof (RB2RS x) as RB2RSx.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => get_rb_sem (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold get_rb_sem in *.
+  destruct (map # pc).
+  2: contradiction.
+  apply SEM.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_ok:
+  forall dst,
+  forall mpc,
+  forall rs,
+  forall v,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs # dst <- v.
+Proof.
+  unfold get_rb_sem.
+  intros until v.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    rewrite Regmap.gss.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    apply Regmap.gss.
+  }
+  rewrite (Regmap.gso v rs NEQ).
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      subst dst.
+      rewrite Regmap.gso by assumption.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by assumption.
+  reflexivity.
+Qed.
+
+Lemma kill_weaken:
+  forall dst,
+  forall mpc,
+  forall rs,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs.
+Proof.
+  unfold get_rb_sem.
+  intros until rs.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      reflexivity.
+    }
+    assumption.
+  }
+  reflexivity.
+Qed.
+
+Lemma top_ok :
+  forall rs, get_rb_sem (Some RELATION.top) rs.
+Proof.
+  unfold get_rb_sem, RELATION.top.
+  intros.
+  unfold get_r.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma move_ok:
+  forall mpc : RELATION.t,
+  forall src res : reg,
+  forall rs : regset,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (move src res mpc)) (rs # res <- (rs # src)).
+Proof.
+  unfold get_rb_sem, move.
+  intros until rs.
+  intros SEM x.
+  unfold get_r in *.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    pose proof (SEM src) as SEMsrc.
+    destruct (mpc ! src) as [mpcsrc | ] in *.
+    {
+      destruct (Pos.eq_dec x mpcsrc).
+      {
+        subst mpcsrc.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    destruct (Pos.eq_dec x src).
+    {
+      subst src.
+      rewrite Regmap.gss.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    reflexivity.
+  }
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  unfold kill.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x) as [ r |].
+  {
+    destruct (Pos.eq_dec res r).
+    {
+      subst r.
+      rewrite Regmap.gso by congruence.
+      trivial.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Definition killed_twice:
+  forall rel : RELATION.t,
+  forall res,
+    RELATION.eq (kill res rel) (kill res (kill res rel)).
+Proof.
+  unfold kill, RELATION.eq.
+  intros.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.grs.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! x) as [relx | ]; trivial.
+  destruct (Pos.eq_dec res relx); trivial.
+  destruct (Pos.eq_dec res relx); congruence.
+Qed.
+
+Lemma get_rb_killed:
+  forall mpc,
+  forall rs,
+  forall rel,
+  forall res,
+  forall vres,
+    (get_rb_sem (Some mpc) rs) ->
+    (RELATION.ge mpc (kill res rel)) ->
+    (get_rb_sem (Some mpc) rs # res <- vres).
+Proof.
+  simpl.
+  intros until vres.
+  intros SEM GE x.
+  pose proof (GE x) as GEx.
+  pose proof (SEM x) as SEMx.
+  unfold get_r in *.
+  destruct (mpc ! x) as [mpcx | ] in *; trivial.
+  unfold kill in GEx.
+  rewrite PTree.gfilter1 in GEx.
+  destruct (Pos.eq_dec res x) as [ | res_NE_x].
+  {
+    subst res.
+    rewrite PTree.grs in GEx.
+    discriminate.
+  }
+  rewrite PTree.gro in GEx by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  destruct (rel ! x) as [relx | ]; try discriminate.
+  destruct (Pos.eq_dec res relx) as [ res_EQ_relx | res_NE_relx] in *; try discriminate.
+  rewrite Regmap.gso by congruence.
+  congruence.
+Qed.
+  
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (fmap_sem (forward_map f) pc rs) ->
+    (is_killed_in_fmap (forward_map f) pc res) ->
+      match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma op_cases:
+  forall op,
+  forall args,
+  forall dst,
+  forall s,
+  forall x,
+    (exists src, op=Omove /\ args = src :: nil /\
+                 (apply_instr (Iop op args dst s) x) = Some (move src dst x))
+    \/
+    (apply_instr (Iop op args dst s) x) = Some (kill dst x).
+Proof.
+  destruct op; try (right; simpl; reflexivity).
+  destruct args as [| arg0 args0t]; try (right; simpl; reflexivity).
+  destruct args0t as [| arg1 args1t]; try (right; simpl; reflexivity).
+  left.
+  eauto.
+Qed.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0.
+  rewrite subst_args_ok by assumption.
+  apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  rewrite MPC in GE.
+  rewrite H in GE.
+  
+  destruct (op_cases op args res pc' mpc) as [[src [OP [ARGS MOVE]]] | KILL].
+  {
+    subst op.
+    subst args.
+    rewrite MOVE in GE.
+    simpl in H0.
+    simpl in GE.
+    apply get_rb_sem_ge with (rb2 := Some (move src res mpc)).
+    assumption.
+    replace v with (rs # src) by congruence.
+    apply move_ok.
+    assumption.
+  }
+  rewrite KILL in GE.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  assumption.
+  apply kill_ok.
+  assumption.
+  
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. constructor; auto. constructor.
+
+  {
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  {
+    replace (Some (kill res mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_weaken.
+  assumption.
+  }
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  unfold fmap_sem in *.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  rewrite H in GE.
+  simpl in GE.
+  unfold is_killed_in_fmap, is_killed_in_map.
+  unfold RB.ge in GE.
+  destruct (map # pc') as [mpc'|] eqn:MPC' in *; trivial.
+  eauto.
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. auto.
+  
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    replace (Some RELATION.top) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply top_ok.
+  
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite subst_args_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite subst_arg_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite subst_arg_ok by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  unfold is_killed_in_fmap in H8.
+  unfold is_killed_in_map in H8.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  destruct H8 as [rel' RGE].
+  eapply get_rb_killed; eauto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 6f4bbe29..d9e4651e 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -15,6 +15,7 @@ open Camlcoq
 open AST
 open Registers
 open Machregs
+open Machregsaux
 open Locations
 open Conventions1
 open XTL
@@ -237,12 +238,6 @@ type graph = {
    according to their types.  A variable can be forced into class 2
    by giving it a negative spill cost. *)
 
-let class_of_type = function
-  | Tint | Tlong -> 0
-  | Tfloat | Tsingle -> 1
-  | Tany32 -> 0
-  | Tany64 -> if Archi.ptr64 then 0 else 1
-
 let class_of_reg r =
   if Conventions1.is_float_reg r then 1 else 0
 
diff --git a/backend/IRC.mli b/backend/IRC.mli
index 59471329..254f27ff 100644
--- a/backend/IRC.mli
+++ b/backend/IRC.mli
@@ -39,5 +39,4 @@ val add_pref: graph -> var -> var -> unit
 val coloring: graph -> (var -> loc)
 
 (* Auxiliaries to deal with register classes *)
-val class_of_type: AST.typ -> int
 val class_of_loc: loc -> int
diff --git a/backend/Inject.v b/backend/Inject.v
new file mode 100644
index 00000000..a24fef50
--- /dev/null
+++ b/backend/Inject.v
@@ -0,0 +1,134 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Inductive inj_instr : Type :=
+  | INJnop
+  | INJop: operation -> list reg -> reg -> inj_instr
+  | INJload: memory_chunk -> addressing -> list reg -> reg -> inj_instr.
+
+Definition inject_instr (i : inj_instr) (pc' : node) : instruction :=
+  match i with
+  | INJnop => Inop pc'
+  | INJop op args dst => Iop op args dst pc'
+  | INJload chunk addr args dst => Iload NOTRAP chunk addr args dst pc'
+  end.
+
+Fixpoint inject_list (prog : code) (pc : node) (dst : node)
+         (l : list inj_instr) : node * code :=
+  let pc' := Pos.succ pc in
+  match l with
+  | nil => (pc', PTree.set pc (Inop dst) prog)
+  | h::t =>
+    inject_list (PTree.set pc (inject_instr h pc') prog)
+                pc' dst t
+  end.
+
+Definition successor (i : instruction) : node :=
+  match i with
+  | Inop pc' => pc'
+  | Iop _ _ _ pc' => pc'
+  | Iload _ _ _ _ _ pc' => pc'
+  | Istore _ _ _ _ pc' => pc'
+  | Icall _ _ _ _ pc' => pc'
+  | Ibuiltin _ _ _ pc' => pc'
+  | Icond _ _ pc' _ _ => pc'
+  | Itailcall _ _ _
+  | Ijumptable _ _
+  | Ireturn _ => 1
+  end.
+
+Definition alter_successor (i : instruction) (pc' : node) : instruction :=
+  match i with
+  | Inop _ => Inop pc'
+  | Iop op args dst _ => Iop op args dst pc'
+  | Iload trap chunk addr args dst _ => Iload trap chunk addr args dst pc'
+  | Istore chunk addr args src _ => Istore chunk addr args src pc'
+  | Ibuiltin ef args res _ => Ibuiltin ef args res pc'
+  | Icond cond args _ pc2 expected => Icond cond args pc' pc2 expected
+  | Icall sig ros args res _ => Icall sig ros args res pc'
+  | Itailcall _ _ _
+  | Ijumptable _ _
+  | Ireturn _ => i
+  end.
+
+Definition inject_at (prog : code) (pc extra_pc : node)
+           (l : list inj_instr) : node * code :=
+  match PTree.get pc prog with
+  | Some i =>
+    inject_list (PTree.set pc (alter_successor i extra_pc) prog)
+                extra_pc (successor i) l
+  | None => inject_list prog extra_pc 1 l (* does not happen *)
+  end.
+
+Definition inject_at' (already : node * code) pc l :=
+  let (extra_pc, prog) := already in
+  inject_at prog pc extra_pc l.
+
+Definition inject_l (prog : code) extra_pc injections :=
+  List.fold_left (fun already (injection : node * (list inj_instr)) =>
+                    inject_at' already (fst injection) (snd injection))
+    injections
+    (extra_pc, prog).
+(*
+Definition inject' (prog : code) (extra_pc : node) (injections : PTree.t (list inj_instr)) :=
+  PTree.fold inject_at' injections (extra_pc, prog).
+
+Definition inject prog extra_pc injections : code :=
+  snd (inject' prog extra_pc injections).
+*)
+
+Section INJECTOR.
+  Variable gen_injections : function -> node -> reg -> PTree.t (list inj_instr).
+
+  Definition valid_injection_instr (max_reg : reg) (i : inj_instr) :=
+    match i with
+    | INJnop => true
+    | INJop op args res => (max_reg <? res) && (negb (is_trapping_op op)
+       && (Datatypes.length args =? args_of_operation op)%nat) 
+    | INJload _ _ _ res => max_reg <? res
+    end.
+  
+  Definition valid_injections1 max_pc max_reg :=
+    List.forallb
+         (fun injection =>
+            ((fst injection) <=? max_pc) &&
+            (List.forallb (valid_injection_instr max_reg) (snd injection))
+         ).
+
+  Definition valid_injections f :=
+    valid_injections1 (max_pc_function f) (max_reg_function f).
+  
+  Definition transf_function (f : function) : res function :=
+    let max_pc := max_pc_function f in
+    let max_reg := max_reg_function f in
+    let injections := PTree.elements (gen_injections f max_pc max_reg) in
+    if valid_injections1 max_pc max_reg injections
+    then
+      OK {| fn_sig := f.(fn_sig);
+            fn_params := f.(fn_params);
+            fn_stacksize := f.(fn_stacksize);
+            fn_code := snd (inject_l (fn_code f) (Pos.succ max_pc) injections);
+            fn_entrypoint := f.(fn_entrypoint) |}
+    else Error (msg "Inject.transf_function: injections at bad locations").
+
+Definition transf_fundef (fd: fundef) : res fundef :=
+  AST.transf_partial_fundef transf_function fd.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
+End INJECTOR.
diff --git a/backend/Injectproof.v b/backend/Injectproof.v
new file mode 100644
index 00000000..dd5e72f8
--- /dev/null
+++ b/backend/Injectproof.v
@@ -0,0 +1,1806 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Globalenvs Values Events.
+Require Import Inject.
+Require Import Lia.
+
+Local Open Scope positive.
+
+Lemma inject_list_preserves:
+  forall l prog pc dst pc0,
+    pc0 < pc ->
+    PTree.get pc0 (snd (inject_list prog pc dst l)) = PTree.get pc0 prog.
+Proof.
+  induction l; intros; simpl.
+  - apply PTree.gso. lia.
+  - rewrite IHl by lia.
+    apply PTree.gso. lia.
+Qed.
+
+Fixpoint pos_add_nat x n :=
+  match n with
+  | O => x
+  | S n' => Pos.succ (pos_add_nat x n')
+  end.
+
+Lemma pos_add_nat_increases : forall x n, x <= (pos_add_nat x n).
+Proof.
+  induction n; simpl; lia.
+Qed.
+
+Lemma pos_add_nat_succ : forall n x,
+    Pos.succ (pos_add_nat x n) = pos_add_nat (Pos.succ x) n.
+Proof.
+  induction n; simpl; intros; trivial.
+  rewrite IHn.
+  reflexivity.
+Qed.
+
+Lemma pos_add_nat_monotone : forall x n1 n2,
+    (n1 < n2) % nat ->
+    (pos_add_nat x n1) < (pos_add_nat x n2).
+Proof.
+  induction n1; destruct n2; intros.
+  - lia.
+  - simpl.
+    pose proof (pos_add_nat_increases x n2).
+    lia.
+  - lia.
+  - simpl.
+    specialize IHn1 with n2.
+    lia.
+Qed.
+
+Lemma inject_list_increases:
+  forall l prog pc dst,
+    (fst (inject_list prog pc dst l)) = pos_add_nat pc (S (List.length l)).
+Proof.
+  induction l; simpl; intros; trivial.
+  rewrite IHl.
+  simpl.
+  rewrite <- pos_add_nat_succ.
+  reflexivity.
+Qed.
+
+Program Fixpoint bounded_nth
+  {T : Type} (k : nat) (l : list T) (BOUND : (k < List.length l)%nat) : T :=
+  match k, l with
+  | O, h::_ => h
+  | (S k'), _::l' => bounded_nth k' l' _
+  | _, nil => _
+  end.
+Obligation 1.
+Proof.
+  simpl in BOUND.
+  lia.
+Qed.
+Obligation 2.
+Proof.
+  simpl in BOUND.
+  lia.
+Qed.
+
+Program Definition bounded_nth_S_statement : Prop :=
+  forall (T : Type) (k : nat) (h : T) (l : list T) (BOUND : (k < List.length l)%nat),
+    bounded_nth (S k) (h::l) _ = bounded_nth k l BOUND.
+Obligation 1.
+lia.
+Qed.
+
+Lemma bounded_nth_proof_irr :
+  forall {T : Type} (k : nat) (l : list T)
+         (BOUND1 BOUND2 : (k < List.length l)%nat),
+    (bounded_nth k l BOUND1) = (bounded_nth k l BOUND2).
+Proof.
+  induction k; destruct l; simpl; intros; trivial; lia.
+Qed.
+
+Lemma bounded_nth_S : bounded_nth_S_statement.
+Proof.
+  unfold bounded_nth_S_statement.
+  induction k; destruct l; simpl; intros; trivial.
+  1, 2: lia.
+  apply bounded_nth_proof_irr.
+Qed.
+
+Lemma inject_list_injected:
+  forall l prog pc dst k (BOUND : (k < (List.length l))%nat),
+    PTree.get (pos_add_nat pc k) (snd (inject_list prog pc dst l)) =
+    Some (inject_instr (bounded_nth k l BOUND) (Pos.succ (pos_add_nat pc k))).
+Proof.
+  induction l; simpl; intros.
+  - lia.
+  - simpl.
+    destruct k as [ | k]; simpl pos_add_nat.
+    + simpl bounded_nth.
+      rewrite inject_list_preserves by lia.
+      apply PTree.gss.
+    + rewrite pos_add_nat_succ.
+      erewrite IHl.
+      f_equal. f_equal.
+      simpl.
+      apply bounded_nth_proof_irr.
+      Unshelve.
+      lia.
+Qed.
+
+Lemma inject_list_injected_end:
+  forall l prog pc dst,
+    PTree.get (pos_add_nat pc (List.length l))
+              (snd (inject_list prog pc dst l)) =
+    Some (Inop dst).
+Proof.
+  induction l; simpl; intros.
+  - apply PTree.gss.
+  - rewrite pos_add_nat_succ.
+    apply IHl.
+Qed.
+    
+Lemma inject_at_preserves :
+  forall prog pc extra_pc l pc0,
+    pc0 < extra_pc ->
+    pc0 <> pc ->
+    PTree.get pc0 (snd (inject_at prog pc extra_pc l)) = PTree.get pc0 prog.
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc prog) eqn:GET.
+  - rewrite inject_list_preserves; trivial.
+    apply PTree.gso; lia.
+  - apply inject_list_preserves; trivial.
+Qed.
+
+Lemma inject_at_redirects:
+  forall prog pc extra_pc l i,
+    pc < extra_pc ->
+    PTree.get pc prog = Some i ->
+    PTree.get pc (snd (inject_at prog pc extra_pc l)) =
+    Some (alter_successor i extra_pc).
+Proof.
+  intros until i. intros BEFORE GET. unfold inject_at.
+  rewrite GET.
+  rewrite inject_list_preserves by trivial.
+  apply PTree.gss.
+Qed.
+
+Lemma inject_at_redirects_none:
+  forall prog pc extra_pc l,
+    pc < extra_pc ->
+    PTree.get pc prog = None ->
+    PTree.get pc (snd (inject_at prog pc extra_pc l)) = None.
+Proof.
+  intros until l. intros BEFORE GET. unfold inject_at.
+  rewrite GET.
+  rewrite inject_list_preserves by trivial.
+  assumption.
+Qed.
+
+Lemma inject_at_increases:
+  forall prog pc extra_pc l,
+    (fst (inject_at prog pc extra_pc l)) = pos_add_nat extra_pc (S (List.length l)).  
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc prog).
+  all: apply inject_list_increases.
+Qed.
+
+Lemma inject_at_injected:
+  forall l prog pc extra_pc k (BOUND : (k < (List.length l))%nat),
+    PTree.get (pos_add_nat extra_pc k) (snd (inject_at prog pc extra_pc l)) =
+    Some (inject_instr (bounded_nth k l BOUND) (Pos.succ (pos_add_nat extra_pc k))).
+Proof.
+  intros. unfold inject_at.
+  destruct (prog ! pc); apply inject_list_injected.
+Qed.
+
+Lemma inject_at_injected_end:
+  forall l prog pc extra_pc i,
+    PTree.get pc prog = Some i ->
+    PTree.get (pos_add_nat extra_pc (List.length l))
+              (snd (inject_at prog pc extra_pc l)) =
+    Some (Inop (successor i)).
+Proof.
+  intros until i. intro REW. unfold inject_at.
+  rewrite REW.
+  apply inject_list_injected_end.
+Qed.
+
+Lemma pair_expand:
+  forall { A B : Type } (p : A*B),
+    p = ((fst p), (snd p)).
+Proof.
+  destruct p; simpl; trivial.
+Qed.
+
+Fixpoint inject_l_position extra_pc
+         (injections : list (node * (list inj_instr)))
+         (k : nat) {struct injections} : node :=
+  match injections with
+  | nil => extra_pc
+  | (pc,l)::l' =>
+    match k with
+    | O => extra_pc
+    | S k' =>
+      inject_l_position
+        (Pos.succ (pos_add_nat extra_pc (List.length l))) l' k'
+    end
+  end.
+
+Lemma inject_l_position_increases : forall injections pc k,
+    pc <= inject_l_position pc injections k.
+Proof.
+  induction injections; simpl; intros.
+  lia.
+  destruct a as [_ l].
+  destruct k.
+  lia.
+  specialize IHinjections with (pc := (Pos.succ (pos_add_nat pc (Datatypes.length l)))) (k := k).
+  assert (pc <= (pos_add_nat pc (Datatypes.length l))) by apply pos_add_nat_increases.
+  lia.
+Qed.
+                    
+Definition inject_l (prog : code) extra_pc injections :=
+  List.fold_left (fun already (injection : node * (list inj_instr)) =>
+                    inject_at' already (fst injection) (snd injection))
+    injections
+    (extra_pc, prog).
+
+Lemma inject_l_preserves :
+  forall injections prog extra_pc pc0,
+    pc0 < extra_pc ->
+    List.forallb (fun injection => if peq (fst injection) pc0 then false else true) injections = true ->
+    PTree.get pc0 (snd (inject_l prog extra_pc injections)) = PTree.get pc0 prog.
+Proof.
+  induction injections;
+    intros until pc0; intros BEFORE ALL; simpl; trivial.
+  unfold inject_l.
+  destruct a as [pc l]. simpl.
+  simpl in ALL.
+  rewrite andb_true_iff in ALL.
+  destruct ALL as [NEQ ALL].
+  rewrite pair_expand with (p := inject_at prog pc extra_pc l).
+  fold (inject_l (snd (inject_at prog pc extra_pc l))
+              (fst (inject_at prog pc extra_pc l))
+              injections).
+  rewrite IHinjections; trivial.
+  - apply inject_at_preserves; trivial.
+    destruct (peq pc pc0); congruence.
+  - rewrite inject_at_increases.
+    pose proof (pos_add_nat_increases extra_pc (S (Datatypes.length l))).
+    lia.
+Qed.
+
+Lemma nth_error_nil : forall { T : Type} k,
+    nth_error (@nil T) k = None.
+Proof.
+  destruct k; simpl; trivial.
+Qed.
+
+Lemma inject_l_injected:
+  forall injections prog injnum pc l extra_pc k
+         (BELOW : forallb (fun injection => (fst injection) <? extra_pc) injections = true)
+         (NUMBER : nth_error injections injnum = Some (pc, l))
+         (BOUND : (k < (List.length l))%nat),
+    PTree.get (pos_add_nat (inject_l_position extra_pc injections injnum) k)
+              (snd (inject_l prog extra_pc injections)) =
+    Some (inject_instr (bounded_nth k l BOUND)
+       (Pos.succ (pos_add_nat (inject_l_position extra_pc injections injnum) k))).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER.
+  }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  destruct a as [pc' l'].
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at prog pc' extra_pc l').
+  progress fold (inject_l (snd (inject_at prog pc' extra_pc l'))
+              (fst (inject_at prog pc' extra_pc l'))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - apply inject_at_injected.
+    - rewrite inject_at_increases.
+      apply pos_add_nat_monotone.
+      lia.
+    - rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      pose proof (pos_add_nat_increases extra_pc k).
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (pc := pc); trivial.
+  rewrite forallb_forall.
+  rewrite forallb_forall in BELOW2.
+  intros loc IN.
+  specialize BELOW2 with loc.
+  apply BELOW2 in IN.
+  pose proof (pos_add_nat_increases extra_pc (Datatypes.length l')).
+  rewrite Pos.ltb_lt.
+  rewrite Pos.ltb_lt in IN.
+  lia.
+Qed.
+
+Lemma inject_l_injected_end:
+  forall injections prog injnum pc i l extra_pc
+         (BEFORE : PTree.get pc prog = Some i)
+         (DISTINCT : list_norepet (map fst injections))
+         (BELOW : forallb (fun injection => (fst injection) <? extra_pc) injections = true)
+         (NUMBER : nth_error injections injnum = Some (pc, l)),
+    PTree.get (pos_add_nat (inject_l_position extra_pc injections injnum)
+                           (List.length l))
+              (snd (inject_l prog extra_pc injections)) =
+    Some (Inop (successor i)).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER.
+  }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  destruct a as [pc' l'].
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at prog pc' extra_pc l').
+  progress fold (inject_l (snd (inject_at prog pc' extra_pc l'))
+              (fst (inject_at prog pc' extra_pc l'))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - apply inject_at_injected_end; trivial.
+    - rewrite inject_at_increases.
+      apply pos_add_nat_monotone.
+      lia.
+    - rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      pose proof (pos_add_nat_increases extra_pc (Datatypes.length l)).
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (pc := pc); trivial.
+  {
+    rewrite <- BEFORE.
+    apply inject_at_preserves.
+    {
+      apply nth_error_In in NUMBER.
+      rewrite forallb_forall in BELOW2.
+      specialize BELOW2 with (pc, l).
+      apply BELOW2 in NUMBER.
+      apply Pos.ltb_lt in NUMBER.
+      simpl in NUMBER.
+      assumption.
+    }
+    simpl in DISTINCT.
+    inv DISTINCT.
+    intro SAME.
+    subst pc'.
+    apply nth_error_in in NUMBER.
+    assert (In (fst (pc, l)) (map fst injections)) as Z.
+    { apply in_map. assumption.
+    }
+    simpl in Z.
+    auto.
+  }
+  { inv DISTINCT.
+    assumption.
+  }
+  {
+    rewrite forallb_forall.
+    rewrite forallb_forall in BELOW2.
+    intros loc IN.
+    specialize BELOW2 with loc.
+    apply BELOW2 in IN.
+    pose proof (pos_add_nat_increases extra_pc (Datatypes.length l')).
+    rewrite Pos.ltb_lt.
+    rewrite Pos.ltb_lt in IN.
+    assert (pos_add_nat extra_pc (Datatypes.length l') <
+            pos_add_nat extra_pc (S (Datatypes.length l'))).
+    { apply pos_add_nat_monotone.
+      lia.
+    }
+    lia.
+  }
+Qed.
+
+
+Lemma inject_l_redirects:
+  forall injections prog injnum pc i l extra_pc
+         (BEFORE : PTree.get pc prog = Some i)
+         (DISTINCT : list_norepet (map fst injections))
+         (BELOW : forallb (fun injection => (fst injection) <? extra_pc) injections = true)
+         (NUMBER : nth_error injections injnum = Some (pc, l)),
+    PTree.get pc (snd (inject_l prog extra_pc injections)) =
+    Some (alter_successor i (inject_l_position extra_pc injections injnum)).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER.
+  }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  destruct a as [pc' l'].
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at prog pc' extra_pc l').
+  progress fold (inject_l (snd (inject_at prog pc' extra_pc l'))
+              (fst (inject_at prog pc' extra_pc l'))
+              injections).
+  simpl in BELOW1.
+  apply Pos.ltb_lt in BELOW1.
+  inv DISTINCT.
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - apply inject_at_redirects; trivial.
+    - rewrite inject_at_increases.
+      pose proof (pos_add_nat_increases extra_pc (S (Datatypes.length l))).
+      lia.
+    - rewrite forallb_forall.
+      intros loc IN.
+      destruct loc as [pc' l'].
+      simpl in *.
+      destruct peq; trivial.
+      subst pc'.
+      apply in_map with (f := fst) in IN.
+      simpl in IN.
+      exfalso.
+      auto.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (pc := pc) (l := l); trivial.
+  {
+    rewrite <- BEFORE.
+    apply nth_error_In in NUMBER.
+    rewrite forallb_forall in BELOW2.
+    specialize BELOW2 with (pc, l).
+    simpl in BELOW2.
+    rewrite Pos.ltb_lt in BELOW2.
+    apply inject_at_preserves; auto.
+    assert (In (fst (pc, l)) (map fst injections)) as Z.
+    { apply in_map. assumption.
+    }
+    simpl in Z.
+    intro EQ.
+    subst pc'.
+    auto.
+  }
+  {
+    rewrite forallb_forall.
+    rewrite forallb_forall in BELOW2.
+    intros loc IN.
+    specialize BELOW2 with loc.
+    apply BELOW2 in IN.
+    pose proof (pos_add_nat_increases extra_pc (Datatypes.length l')).
+    rewrite Pos.ltb_lt.
+    rewrite Pos.ltb_lt in IN.
+    assert (pos_add_nat extra_pc (Datatypes.length l') <
+            pos_add_nat extra_pc (S (Datatypes.length l'))).
+    { apply pos_add_nat_monotone.
+      lia.
+    }
+    lia.
+  }
+Qed.
+
+(*
+Lemma inject'_preserves :
+  forall injections prog extra_pc pc0,
+    pc0 < extra_pc ->
+    PTree.get pc0 injections = None ->
+    PTree.get pc0 (snd (inject' prog extra_pc injections)) = PTree.get pc0 prog.
+Proof.
+  intros. unfold inject'.
+  rewrite PTree.fold_spec.
+  change (fold_left
+        (fun (a : node * code) (p : positive * list inj_instr) =>
+         inject_at' a (fst p) (snd p)) (PTree.elements injections)
+        (extra_pc, prog)) with (inject_l prog extra_pc (PTree.elements injections)).
+  apply inject_l_preserves; trivial.
+  rewrite List.forallb_forall.
+  intros injection IN.
+  destruct injection as [pc l].
+  simpl.
+  apply PTree.elements_complete in IN.
+  destruct (peq pc pc0); trivial.
+  congruence.
+Qed.
+
+Lemma inject_preserves :
+  forall injections prog extra_pc pc0,
+    pc0 < extra_pc ->
+    PTree.get pc0 injections = None ->
+    PTree.get pc0 (inject prog extra_pc injections) = PTree.get pc0 prog.
+Proof.
+  unfold inject'.
+  apply inject'_preserves.
+Qed.
+*)
+
+Section INJECTOR.
+  Variable gen_injections : function -> node -> reg -> PTree.t (list inj_instr).
+
+  Definition match_prog (p tp: RTL.program) :=
+    match_program (fun ctx f tf => transf_fundef gen_injections f = OK tf) eq p tp.
+
+  Lemma transf_program_match:
+    forall p tp, transf_program gen_injections p = OK tp -> match_prog p tp.
+  Proof.
+    intros. eapply match_transform_partial_program; eauto.
+  Qed.
+
+  Section PRESERVATION.
+
+    Variables prog tprog: program.
+    Hypothesis TRANSF: match_prog prog tprog.
+    Let ge := Genv.globalenv prog.
+    Let tge := Genv.globalenv tprog.
+
+    Definition match_regs (f : function) (rs rs' : regset) :=
+      forall r, r <= max_reg_function f -> (rs'#r = rs#r).
+
+    Lemma match_regs_refl : forall f rs, match_regs f rs rs.
+    Proof.
+      unfold match_regs. intros. trivial.
+    Qed.
+
+    Lemma match_regs_trans : forall f rs1 rs2 rs3,
+        match_regs f rs1 rs2 -> match_regs f rs2 rs3 -> match_regs f rs1 rs3.
+    Proof.
+      unfold match_regs. intros until rs3. intros M12 M23 r.
+      specialize M12 with r.
+      specialize M23 with r.
+      intuition congruence.
+    Qed.
+
+    Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+    | match_frames_intro: forall res f tf sp pc pc' rs trs
+        (FUN : transf_function gen_injections f = OK tf)
+        (REGS : match_regs f rs trs)
+        (STAR:
+           forall ts m trs1,
+           exists trs2,
+             (match_regs f trs1 trs2) /\
+             Smallstep.star RTL.step tge
+                                      (State ts tf sp pc' trs1 m) E0
+                                      (State ts tf sp pc trs2 m)),
+        match_frames (Stackframe res f sp pc rs)
+                     (Stackframe res tf sp pc' trs).
+
+    Inductive match_states: state -> state -> Prop :=
+    | match_states_intro:
+      forall s f tf sp pc rs trs m ts
+        (FUN : transf_function gen_injections f = OK tf)
+        (STACKS: list_forall2 match_frames s ts)
+        (REGS: match_regs f rs trs),
+      match_states (State s f sp pc rs m) (State ts tf sp pc trs m)
+    | match_states_call:
+        forall s fd tfd args m ts
+        (FUN : transf_fundef gen_injections fd = OK tfd)
+        (STACKS: list_forall2 match_frames s ts),
+          match_states (Callstate s fd args m) (Callstate ts tfd args m)
+    | match_states_return:
+        forall s res m ts
+               (STACKS: list_forall2 match_frames s ts),
+          match_states (Returnstate s res m)
+                       (Returnstate ts res m).
+
+    Lemma functions_translated:
+      forall (v: val) (f: RTL.fundef),
+        Genv.find_funct ge v = Some f ->
+        exists tf,
+          Genv.find_funct tge v = Some tf /\
+          transf_fundef gen_injections f = OK tf.
+    Proof.
+      apply (Genv.find_funct_transf_partial TRANSF).
+    Qed.
+
+    Lemma function_ptr_translated:
+      forall (b: block) (f: RTL.fundef),
+        Genv.find_funct_ptr ge b = Some f ->
+        exists tf,
+          Genv.find_funct_ptr tge b = Some tf /\
+          transf_fundef gen_injections f = OK tf.
+    Proof.
+      apply (Genv.find_funct_ptr_transf_partial TRANSF).
+    Qed.
+
+    Lemma symbols_preserved:
+      forall id,
+        Genv.find_symbol tge id = Genv.find_symbol ge id.
+    Proof.
+      apply (Genv.find_symbol_match TRANSF).
+    Qed.
+
+    Lemma senv_preserved:
+      Senv.equiv ge tge.
+    Proof.
+      apply (Genv.senv_match TRANSF).
+    Qed.
+
+    Lemma sig_preserved:
+      forall f tf, transf_fundef gen_injections f = OK tf
+                   -> funsig tf = funsig f.
+    Proof.
+      destruct f; simpl; intros; monadInv H; trivial.
+      unfold transf_function in *.
+      destruct valid_injections1 in EQ.
+      2: discriminate.
+      inv EQ.
+      reflexivity.
+    Qed.
+
+    Lemma stacksize_preserved:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_stacksize tf = fn_stacksize f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma params_preserved:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_params tf = fn_params f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma entrypoint_preserved:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_entrypoint tf = fn_entrypoint f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma sig_preserved2:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_sig tf = fn_sig f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma transf_initial_states:
+      forall S1, RTL.initial_state prog S1 ->
+                 exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+    Proof.
+      intros. inversion H.
+      exploit function_ptr_translated; eauto.
+      intros (tf & A & B).
+      exists (Callstate nil tf nil m0); split.
+      - econstructor; eauto.
+        + eapply (Genv.init_mem_match TRANSF); eauto.
+        + replace (prog_main tprog) with (prog_main prog).
+          rewrite symbols_preserved. eauto.
+          symmetry. eapply match_program_main; eauto.
+        + rewrite <- H3. eapply sig_preserved; eauto.
+      - constructor; trivial.
+        constructor.
+    Qed.
+
+    Lemma transf_final_states:
+      forall S1 S2 r, match_states S1 S2 ->
+                      final_state S1 r -> final_state S2 r.
+    Proof.
+      intros. inv H0. inv H. inv STACKS. constructor.
+    Qed.
+
+    Lemma assign_above:
+      forall f trs res v,
+        (max_reg_function f) < res ->
+        match_regs f trs trs # res <- v.
+    Proof.
+      unfold match_regs.
+      intros.
+      apply Regmap.gso.
+      lia.
+    Qed.
+    
+    Lemma transf_function_inj_step:
+      forall ts f tf sp pc trs m ii
+        (FUN : transf_function gen_injections f = OK tf)
+        (GET : (fn_code tf) ! pc = Some (inject_instr ii (Pos.succ pc)))
+        (VALID : valid_injection_instr (max_reg_function f) ii = true),
+      exists trs',
+        RTL.step tge
+              (State ts tf sp pc trs m) E0
+              (State ts tf sp (Pos.succ pc) trs' m) /\
+        match_regs (f : function) trs trs'.
+    Proof.
+      destruct ii as [ |op args res | chunk addr args res]; simpl; intros.
+      - exists trs.
+        split.
+        * apply exec_Inop; assumption.
+        * apply match_regs_refl.
+      - repeat rewrite andb_true_iff in VALID.
+        rewrite negb_true_iff in VALID.
+        destruct VALID as (MAX_REG & NOTRAP & LENGTH).
+        rewrite Pos.ltb_lt in MAX_REG.
+        rewrite Nat.eqb_eq in LENGTH.
+        destruct (eval_operation ge sp op trs ## args m) as [v | ] eqn:EVAL.
+        + exists (trs # res <- v).
+          split.
+          * apply exec_Iop with (op := op) (args := args) (res := res); trivial.
+            rewrite eval_operation_preserved with (ge1 := ge).
+            assumption.
+            exact symbols_preserved.
+          * apply assign_above; auto.
+        + exfalso.
+          generalize EVAL.
+          apply is_trapping_op_sound; trivial.
+          rewrite map_length.
+          assumption.
+      - rewrite Pos.ltb_lt in VALID.
+        destruct (eval_addressing ge sp addr trs ## args) as [a | ] eqn:ADDR.
+        + destruct (Mem.loadv chunk m a) as [v | ] eqn:LOAD.
+          * exists (trs # res <- v).
+            split.
+            ** apply exec_Iload with (trap := NOTRAP) (chunk := chunk) (addr := addr) (args := args) (dst := res) (a := a); trivial.
+               all: try rewrite eval_addressing_preserved with (ge1 := ge).
+               all: auto using symbols_preserved.
+            ** apply assign_above; auto.
+          * exists (trs # res <- Vundef).
+            split.
+            ** apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (args := args) (dst := res) (a := a); trivial.
+               all: rewrite eval_addressing_preserved with (ge1 := ge).
+               all: auto using symbols_preserved.
+            ** apply assign_above; auto.
+        + exists (trs # res <- Vundef).
+          split.
+          * apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := args) (dst := res); trivial.
+            all: rewrite eval_addressing_preserved with (ge1 := ge).
+            all: auto using symbols_preserved.
+          * apply assign_above; auto.
+    Qed.
+
+    Lemma bounded_nth_In: forall {T : Type} (l : list T) k LESS,
+        In (bounded_nth k l LESS) l.
+    Proof.
+      induction l; simpl; intros.
+      lia.
+      destruct k; simpl.
+      - left; trivial.
+      - right. apply IHl.
+    Qed.
+
+    Lemma transf_function_inj_starstep_rec :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (k : nat)
+        (CUR : (k <= (List.length inj_code))%nat)
+        (trs : regset),
+      exists trs',
+        match_regs (f : function) trs trs' /\
+        Smallstep.star RTL.step tge
+              (State ts tf sp (pos_add_nat inj_pc
+                                    ((List.length inj_code) - k)%nat) trs m) E0
+              (State ts tf sp (pos_add_nat inj_pc (List.length inj_code)) trs' m).
+    Proof.
+      induction k; simpl; intros.
+      { rewrite Nat.sub_0_r.
+        exists trs.
+        split.
+        - apply match_regs_refl.
+        - constructor.
+      }
+      assert (k <= Datatypes.length inj_code)%nat as KK by lia.
+      pose proof (IHk KK) as IH.
+      clear IHk KK.
+      pose proof FUN as VALIDATE.
+      unfold transf_function, valid_injections1 in VALIDATE.
+      destruct forallb eqn:FORALL in VALIDATE.
+      2: discriminate.
+      injection VALIDATE.
+      intro TF.
+      symmetry in TF.
+      pose proof (inject_l_injected (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) (fn_code f) inj_n src_pc inj_code (Pos.succ (max_pc_function f)) ((List.length inj_code) - (S k))%nat) as INJECTED.
+      lapply INJECTED.
+      { clear INJECTED.
+        intro INJECTED.
+        assert ((Datatypes.length inj_code - S k <
+                 Datatypes.length inj_code)%nat) as LESS by lia.
+        pose proof (INJECTED INJ LESS) as INJ'.
+        replace (snd
+            (inject_l (fn_code f) (Pos.succ (max_pc_function f))
+                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))))) with (fn_code tf) in INJ'.
+        2: rewrite TF; simpl; reflexivity.                                              apply transf_function_inj_step with (f:=f) (ts:=ts) (sp:=sp) (trs:=trs) (m := m) in INJ'.
+        2: assumption.
+        {
+          destruct INJ' as [trs'' [STEP STEPMATCH]].
+          destruct (IH trs'') as [trs' [STARSTEPMATCH STARSTEP]].
+          exists trs'.
+          split.
+          { apply match_regs_trans with (rs2 := trs''); assumption. }
+          eapply Smallstep.star_step with (t1:=E0) (t2:=E0).
+          {
+            rewrite POSITION in STEP.
+            exact STEP.
+          }
+          {
+            replace (Datatypes.length inj_code - k)%nat
+              with (S (Datatypes.length inj_code - (S k)))%nat in STARSTEP by lia.
+            simpl pos_add_nat in STARSTEP.
+            exact STARSTEP.
+          }
+          constructor.
+        }
+        rewrite forallb_forall in FORALL.
+        specialize FORALL with  (src_pc, inj_code).
+        lapply FORALL.
+        {
+          simpl.
+          rewrite andb_true_iff.
+          intros (SRC & ALL_VALID).
+          rewrite forallb_forall in ALL_VALID.
+          apply ALL_VALID.
+          apply bounded_nth_In.
+        }
+        apply nth_error_In with (n := inj_n).
+        assumption.
+      }
+      rewrite forallb_forall in FORALL.
+      rewrite forallb_forall.
+      intros x INx.
+      rewrite Pos.ltb_lt.
+      pose proof (FORALL x INx) as ALLx.
+      rewrite andb_true_iff in ALLx.
+      destruct ALLx as [ALLx1 ALLx2].
+      rewrite Pos.leb_le in ALLx1.
+      lia.
+    Qed.
+    
+    Lemma transf_function_inj_starstep :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (trs : regset),
+      exists trs',
+        match_regs (f : function) trs trs' /\
+        Smallstep.star RTL.step tge
+              (State ts tf sp inj_pc trs m) E0
+              (State ts tf sp (pos_add_nat inj_pc (List.length inj_code)) trs' m).
+    Proof.
+      intros.
+      replace (State ts tf sp inj_pc trs m) with (State ts tf sp (pos_add_nat inj_pc ((List.length inj_code) - (List.length inj_code))%nat) trs m).
+      eapply transf_function_inj_starstep_rec; eauto.
+      f_equal.
+      rewrite <- minus_n_n.
+      reflexivity.
+    Qed.
+
+    Lemma transf_function_inj_end :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code i
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (SRC: (fn_code f) ! src_pc = Some i)
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (trs : regset),
+        RTL.step tge
+          (State ts tf sp (pos_add_nat inj_pc (List.length inj_code)) trs m) E0
+          (State ts tf sp (successor i) trs m).
+    Proof.
+      intros.
+      pose proof FUN as VALIDATE.
+      unfold transf_function, valid_injections1 in VALIDATE.
+      destruct forallb eqn:FORALL in VALIDATE.
+      2: discriminate.
+      injection VALIDATE.
+      intro TF.
+      symmetry in TF.
+      pose proof (inject_l_injected_end (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) (fn_code f) inj_n src_pc i inj_code (Pos.succ (max_pc_function f))) as INJECTED.
+      lapply INJECTED.
+      2: assumption.
+      clear INJECTED.
+      intro INJECTED.
+      lapply INJECTED.
+      2: apply (PTree.elements_keys_norepet (gen_injections f (max_pc_function f) (max_reg_function f))); fail.
+      clear INJECTED.
+      intro INJECTED.
+      lapply INJECTED.
+      { clear INJECTED.
+        intro INJECTED.
+        pose proof (INJECTED INJ) as INJ'.
+        clear INJECTED.
+        replace (snd
+                   (inject_l (fn_code f) (Pos.succ (max_pc_function f))
+                             (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))))) with (fn_code tf) in INJ'.
+        2: rewrite TF; simpl; reflexivity.
+        rewrite POSITION in INJ'.
+        apply exec_Inop.
+        assumption.
+      }
+      clear INJECTED.
+      rewrite forallb_forall in FORALL.
+      rewrite forallb_forall.
+      intros x INx.
+      rewrite Pos.ltb_lt.
+      pose proof (FORALL x INx) as ALLx.
+      rewrite andb_true_iff in ALLx.
+      destruct ALLx as [ALLx1 ALLx2].
+      rewrite Pos.leb_le in ALLx1.
+      lia.
+    Qed.
+
+    Lemma transf_function_inj_plusstep :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code i
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (SRC: (fn_code f) ! src_pc = Some i)
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (trs : regset),
+      exists trs',
+        match_regs (f : function) trs trs' /\
+        Smallstep.plus RTL.step tge
+              (State ts tf sp inj_pc trs m) E0
+              (State ts tf sp (successor i) trs' m).
+    Proof.
+      intros.
+      destruct (transf_function_inj_starstep ts f tf sp m inj_n src_pc inj_pc inj_code FUN INJ POSITION trs) as [trs' [MATCH PLUS]].
+      exists trs'.
+      split. assumption.
+      eapply Smallstep.plus_right.
+      exact PLUS.
+      eapply transf_function_inj_end; eassumption.
+      reflexivity.
+    Qed.
+    
+    Lemma transf_function_preserves:
+      forall f tf pc 
+        (FUN : transf_function gen_injections f = OK tf)
+        (LESS : pc <= max_pc_function f)
+        (NOCHANGE : (gen_injections f (max_pc_function f) (max_reg_function f)) ! pc = None),
+        (fn_code tf) ! pc = (fn_code f) ! pc.
+    Proof.
+      intros.
+      unfold transf_function in FUN.
+      destruct valid_injections1 in FUN.
+      2: discriminate.
+      inv FUN.
+      simpl.
+      apply inject_l_preserves.
+      lia.
+      rewrite forallb_forall.
+      intros x INx.
+      destruct peq; trivial.
+      subst pc.
+      exfalso.
+      destruct x as [pc ii].
+      simpl in *.
+      apply PTree.elements_complete in INx.
+      congruence.
+    Qed.
+    
+    Lemma transf_function_redirects:
+      forall f tf pc injl ii
+        (FUN : transf_function gen_injections f = OK tf)
+        (LESS : pc <= max_pc_function f)
+        (INJECTION : (gen_injections f (max_pc_function f) (max_reg_function f)) ! pc = Some injl)
+        (INSTR: (fn_code f) ! pc = Some ii),
+      exists pc' : node,
+        (fn_code tf) ! pc = Some (alter_successor ii pc') /\
+        (forall ts sp m trs,
+            exists trs',
+            match_regs f trs trs' /\
+            Smallstep.plus RTL.step tge
+              (State ts tf sp pc' trs m) E0
+              (State ts tf sp (successor ii) trs' m)).        
+    Proof.
+      intros.
+      apply PTree.elements_correct in INJECTION.
+      apply In_nth_error in INJECTION.
+      destruct INJECTION as [injn INJECTION].
+      exists (inject_l_position (Pos.succ (max_pc_function f))
+                                (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) injn).
+      split.
+      { unfold transf_function in FUN.
+        destruct (valid_injections1) eqn:VALID in FUN.
+        2: discriminate.
+        inv FUN.
+        simpl.
+        apply inject_l_redirects with (l := injl); auto.
+        apply PTree.elements_keys_norepet.
+        unfold valid_injections1 in VALID.
+        rewrite forallb_forall in VALID.
+        rewrite forallb_forall.
+        intros x INx.
+        pose proof (VALID x INx) as VALIDx.
+        clear VALID.
+        rewrite andb_true_iff in VALIDx.
+        rewrite Pos.leb_le in VALIDx.
+        destruct VALIDx as [VALIDx1 VALIDx2].
+        rewrite Pos.ltb_lt.
+        lia.
+      }
+      intros.
+      pose proof (transf_function_inj_plusstep ts f tf sp m injn pc
+                 (inject_l_position (Pos.succ (max_pc_function f))
+                                    (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) injn)
+                 injl ii FUN INJECTION INSTR) as TRANS.
+      lapply TRANS.
+      2: reflexivity.
+      clear TRANS.
+      intro TRANS.
+      exact (TRANS trs).
+    Qed.
+
+    Lemma transf_function_preserves_uses:
+      forall f tf pc rs trs ii
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some ii),
+        trs ## (instr_uses ii) = rs ## (instr_uses ii).
+    Proof.
+      intros.
+      assert (forall r, In r (instr_uses ii) ->
+                        trs # r = rs # r) as SAME.
+      {
+        intros r INr.
+        apply MATCH.
+        apply (max_reg_function_use f pc ii); auto.
+      }
+      induction (instr_uses ii); simpl; trivial.
+      f_equal.
+      - apply SAME. constructor; trivial.
+      - apply IHl. intros.
+        apply SAME. right. assumption.
+    Qed.
+
+    (*
+    Lemma transf_function_preserves_builtin_arg:
+      forall rs trs ef res sp m pc'
+        (arg : builtin_arg reg)
+        (SAME : (forall r,
+                    In r (instr_uses (Ibuiltin ef args res pc')) ->
+                    trs # r = rs # r) )
+        varg
+        (EVAL : eval_builtin_arg ge (fun r => rs#r) sp m arg varg),
+        eval_builtin_arg ge (fun r => trs#r) sp m arg varg.
+    Proof.
+     *)
+    
+    Lemma transf_function_preserves_builtin_args_rec:
+      forall rs trs ef res sp m pc'
+        (args : list (builtin_arg reg))
+        (SAME : (forall r,
+                    In r (instr_uses (Ibuiltin ef args res pc')) ->
+                       trs # r = rs # r) )
+        (vargs : list val)
+        (EVAL : eval_builtin_args ge (fun r => rs#r) sp m args vargs),
+        eval_builtin_args ge (fun r => trs#r) sp m args vargs.
+    Proof.
+      unfold eval_builtin_args.
+      induction args; intros; inv EVAL.
+      - constructor.
+      - constructor.
+        + induction H1.
+          all: try (constructor; auto; fail).
+          * rewrite <- SAME.
+            apply eval_BA.
+            simpl.
+            left. reflexivity.
+          * constructor.
+            ** apply IHeval_builtin_arg1.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+            ** apply IHeval_builtin_arg2.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+          * constructor.
+            ** apply IHeval_builtin_arg1.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+            ** apply IHeval_builtin_arg2.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+        + apply IHargs.
+          2: assumption.
+          intros r INr.
+          apply SAME.
+          simpl.
+          apply in_or_app.
+          right.
+          exact INr.
+    Qed.
+    
+    Lemma transf_function_preserves_builtin_args:
+      forall f tf pc rs trs ef res sp m pc'
+        (args : list (builtin_arg reg))
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some (Ibuiltin ef args res pc'))
+        (vargs : list val)
+        (EVAL : eval_builtin_args ge (fun r => rs#r) sp m args vargs),
+        eval_builtin_args ge (fun r => trs#r) sp m args vargs.
+    Proof.
+      intros.
+      apply transf_function_preserves_builtin_args_rec with (rs := rs) (ef := ef) (res := res) (pc' := pc').
+      intros r INr.
+      apply MATCH.
+      apply (max_reg_function_use f pc (Ibuiltin ef args res pc')).
+      all: auto.
+    Qed.
+    
+    Lemma match_regs_write:
+      forall f rs trs res v
+             (MATCH : match_regs f rs trs),
+        match_regs f (rs # res <- v) (trs # res <- v).
+    Proof.
+      intros.
+      intros r LESS.
+      destruct (peq r res).
+      {
+        subst r.
+        rewrite Regmap.gss.
+        symmetry.
+        apply Regmap.gss.
+      }
+      rewrite Regmap.gso.
+      rewrite Regmap.gso.
+      all: trivial.
+      apply MATCH.
+      trivial.
+    Qed.
+
+    Lemma match_regs_setres:
+      forall f res rs trs vres
+             (MATCH : match_regs f rs trs),
+        match_regs f (regmap_setres res vres rs) (regmap_setres res vres trs).
+    Proof.
+      induction res; simpl; intros; trivial.
+      apply match_regs_write; auto.
+    Qed.
+    
+    Lemma transf_function_preserves_ros:
+      forall f tf pc rs trs ros args res fd pc' sig
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some (Icall sig ros args res pc'))
+        (FIND : find_function ge ros rs = Some fd),
+      exists tfd, find_function tge ros trs = Some tfd
+                  /\ transf_fundef gen_injections fd = OK tfd.
+    Proof.
+      intros; destruct ros as  [r|id].
+      - apply functions_translated; auto.
+        replace (trs # r) with (hd Vundef (trs ## (instr_uses (Icall sig (inl r) args res pc')))) by reflexivity.
+        rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+      - simpl. rewrite symbols_preserved.
+        simpl in FIND.
+        destruct (Genv.find_symbol ge id); try congruence.
+        eapply function_ptr_translated; eauto.
+    Qed.
+
+    Lemma transf_function_preserves_ros_tail:
+      forall f tf pc rs trs ros args fd sig
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some (Itailcall sig ros args))
+        (FIND : find_function ge ros rs = Some fd),
+      exists tfd, find_function tge ros trs = Some tfd
+                  /\ transf_fundef gen_injections fd = OK tfd.
+    Proof.
+      intros; destruct ros as  [r|id].
+      - apply functions_translated; auto.
+        replace (trs # r) with (hd Vundef (trs ## (instr_uses (Itailcall sig (inl r) args)))) by reflexivity.
+        rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+      - simpl. rewrite symbols_preserved.
+        simpl in FIND.
+        destruct (Genv.find_symbol ge id); try congruence.
+        eapply function_ptr_translated; eauto.
+    Qed.
+      
+    Theorem transf_step_correct:
+      forall s1 t s2, step ge s1 t s2 ->
+         forall ts1 (MS: match_states s1 ts1),
+         exists ts2, Smallstep.plus step tge ts1 t ts2 /\ match_states s2 ts2.
+    Proof.
+      induction 1; intros ts1 MS; inv MS; try (inv TRC).
+      - (* nop *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Inop.
+               exact ALTER.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs); assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Inop.
+            rewrite transf_function_preserves with (f:=f); eauto.
+            eapply max_pc_function_sound; eauto.
+          * constructor; trivial.
+            
+      - (* op *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # res <- v).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iop with (op := op) (args := args).
+               exact ALTER.
+               rewrite eval_operation_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iop op args res pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+                 simpl.
+                 eassumption.
+               }
+               exact symbols_preserved.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # res <- v); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iop with (op := op) (args := args).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_operation_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iop op args res pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+                 simpl.
+                 eassumption.
+               }
+               exact symbols_preserved.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+
+      - (* load *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # dst <- v).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iload with (trap := trap) (chunk := chunk) (addr := addr) (args := args) (a := a).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload trap chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+               eassumption.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # dst <- v); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iload with (trap := trap) (chunk := chunk) (addr := addr) (args := args) (a := a).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload trap chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** eassumption.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+        
+      - (* load notrap1 *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # dst <- Vundef).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := args).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # dst <- Vundef); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := args).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+
+      - (* load notrap2 *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # dst <- Vundef).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (args := args) (a := a).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+               eassumption.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # dst <- Vundef); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (args := args) (a := a).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** eassumption.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+        
+      - (* store *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m') (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Istore with (chunk := chunk) (addr := addr) (args := args) (a := a) (src := src).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace (trs ## args) with (tl (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+               replace (trs # src) with (hd Vundef (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               simpl.
+               eassumption.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs); trivial.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Istore with (chunk := chunk) (addr := addr) (args := args) (a := a) (src := src).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace (trs ## args) with (tl (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** replace (trs # src) with (hd Vundef (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               simpl.
+               eassumption.
+          * constructor; trivial.
+      - (* call *)
+        destruct (transf_function_preserves_ros f tf pc rs trs ros args res fd pc' (funsig fd) FUN REGS H H0) as [tfd [TFD1 TFD2]].
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          simpl in ALTER.
+          econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Icall with (args := args) (sig := (funsig fd)) (ros := ros).
+            exact ALTER.
+            exact TFD1.
+            apply sig_preserved; auto.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Icall (funsig fd) (inl r) args res pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros.
+               destruct (SKIP ts0 sp m0 trs1) as [trs2 [MATCH PLUS]].
+               exists trs2. split. assumption.
+               apply Smallstep.plus_star. exact PLUS.
+               
+            ** replace (trs ## args) with (trs ## (instr_uses (Icall (funsig fd) (inr id) args res pc'))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros.
+               destruct (SKIP ts0 sp m0 trs1) as [trs2 [MATCH PLUS]].
+               exists trs2. split. assumption.
+               apply Smallstep.plus_star. exact PLUS.
+               
+        + econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Icall with (args := args) (sig := (funsig fd)) (ros := ros).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** exact TFD1.
+            ** apply sig_preserved; auto.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Icall (funsig fd) (inl r) args res pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros. exists trs1. split.
+               apply match_regs_refl. constructor.
+               
+            ** replace (trs ## args) with (trs ## (instr_uses (Icall (funsig fd) (inr id) args res pc'))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros. exists trs1. split.
+               apply match_regs_refl. constructor.
+        
+      -  (* tailcall *)
+        destruct (transf_function_preserves_ros_tail f tf pc rs trs ros args fd (funsig fd) FUN REGS H H0) as [tfd [TFD1 TFD2]].
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          simpl in ALTER.
+          econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Itailcall with (args := args) (sig := (funsig fd)) (ros := ros).
+            exact ALTER.
+            exact TFD1.
+            apply sig_preserved; auto.
+            rewrite stacksize_preserved with (f:=f) by trivial.
+            eassumption.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Itailcall (funsig fd) (inl r) args)))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+            ** replace (trs ## args) with (trs ## (instr_uses (Itailcall (funsig fd) (inr id) args))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+        + econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Itailcall with (args := args) (sig := (funsig fd)) (ros := ros).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** exact TFD1.
+            ** apply sig_preserved; auto.
+            ** rewrite stacksize_preserved with (f:=f) by trivial.
+               eassumption.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Itailcall (funsig fd) (inl r) args)))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+            ** replace (trs ## args) with (trs ## (instr_uses (Itailcall (funsig fd) (inr id) args))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+
+      - (* builtin *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m')
+             (trs := (regmap_setres res vres trs)).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Ibuiltin with (ef := ef) (args := args) (res := res) (vargs := vargs).
+               *** exact ALTER.
+               *** apply eval_builtin_args_preserved with (ge1 := ge); eauto.
+                   exact symbols_preserved.
+                   apply transf_function_preserves_builtin_args with (f:=f) (tf:=tf) (pc:=pc) (rs:=rs) (ef:=ef) (res0:=res) (pc':=pc'); auto.
+               *** eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** symmetry. apply E0_right.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := (regmap_setres res vres trs)); trivial.
+            apply match_regs_setres.
+            assumption.
+        + econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Ibuiltin with (ef := ef) (args := args) (res := res) (vargs := vargs).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** apply eval_builtin_args_preserved with (ge1 := ge); eauto.
+               exact symbols_preserved.
+               apply transf_function_preserves_builtin_args with (f:=f) (tf:=tf) (pc:=pc) (rs:=rs) (ef:=ef) (res0:=res) (pc':=pc'); auto.
+            ** eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+          * constructor; auto.
+            apply match_regs_setres.
+            assumption.
+            
+      - (* cond *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + destruct b eqn:B.
+          ++ exploit transf_function_redirects; eauto.
+             { eapply max_pc_function_sound; eauto. }
+             intros [pc_inj [ALTER SKIP]].
+             specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+             destruct SKIP as [trs' [MATCH PLUS]].
+             econstructor; split.
+             * eapply Smallstep.plus_left.
+               ** apply exec_Icond with (b := true) (cond := cond) (args := args) (ifso := pc_inj) (ifnot := ifnot) (predb := predb).
+                   exact ALTER.
+                   replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+                   rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+                   simpl. reflexivity.
+               ** apply Smallstep.plus_star.
+                  exact PLUS.
+               ** reflexivity.
+             * simpl. constructor; auto.
+               apply match_regs_trans with (rs2:=trs); auto.
+
+          ++ exploit transf_function_redirects; eauto.
+             { eapply max_pc_function_sound; eauto. }
+             intros [pc_inj [ALTER SKIP]].
+             specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+             destruct SKIP as [trs' [MATCH PLUS]].
+             econstructor; split.
+             * eapply Smallstep.plus_one.
+               apply exec_Icond with (b := false) (cond := cond) (args := args) (ifso := pc_inj) (ifnot := ifnot) (predb := predb).
+               exact ALTER.
+               replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               simpl. reflexivity.
+             * simpl. constructor; auto.
+        + destruct b eqn:B.
+          * econstructor; split.
+            ** eapply Smallstep.plus_one.
+               apply exec_Icond with (b := true) (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (predb := predb).
+               *** rewrite transf_function_preserves with (f:=f); eauto.
+                   eapply max_pc_function_sound; eauto.
+               *** replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+                   rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               *** reflexivity.
+            ** constructor; auto.
+          *  econstructor; split.
+            ** eapply Smallstep.plus_one.
+               apply exec_Icond with (b := false) (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (predb := predb).
+               *** rewrite transf_function_preserves with (f:=f); eauto.
+                   eapply max_pc_function_sound; eauto.
+               *** replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+                   rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               *** reflexivity.
+            ** constructor; auto.
+                       
+      - destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ijumptable with (arg := arg) (tbl := tbl) (n := n); trivial.
+            replace (trs # arg) with (hd Vundef (trs ## (instr_uses (Ijumptable arg tbl)))) by reflexivity.
+            rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+            eassumption.
+          * constructor; trivial.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ijumptable with (arg := arg) (tbl := tbl) (n := n); trivial.
+            rewrite transf_function_preserves with (f:=f); eauto.
+            eapply max_pc_function_sound; eauto.
+            replace (trs # arg) with (hd Vundef (trs ## (instr_uses (Ijumptable arg tbl)))) by reflexivity.
+            rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+            eassumption.
+          * constructor; trivial.
+      - (* return *)
+         destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := (Vptr stk Ptrofs.zero)) (m := m) (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ireturn.
+            exact ALTER.
+            rewrite stacksize_preserved with (f:=f); eassumption.
+          * destruct or as [r | ]; simpl.
+            ** replace (trs # r) with (hd Vundef (trs ## (instr_uses (Ireturn (Some r))))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               constructor; auto.
+            ** constructor; auto.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ireturn.
+            rewrite transf_function_preserves with (f:=f); eauto.
+            eapply max_pc_function_sound; eauto.
+            rewrite stacksize_preserved with (f:=f); eassumption.
+          * destruct or as [r | ]; simpl.
+            ** replace (trs # r) with (hd Vundef (trs ## (instr_uses (Ireturn (Some r))))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               constructor; auto.
+            ** constructor; auto.
+
+      - (* internal call *)
+        monadInv FUN.
+        econstructor; split.
+        + apply Smallstep.plus_one.
+          apply exec_function_internal.
+          rewrite stacksize_preserved with (f:=f) by assumption.
+          eassumption.
+        + rewrite entrypoint_preserved with (f:=f)(tf:=x) by assumption.
+          constructor; auto.
+          rewrite params_preserved with (f:=f)(tf:=x) by assumption.
+          apply match_regs_refl.
+      - (* external call *)
+        monadInv FUN.
+        econstructor; split.
+        + apply Smallstep.plus_one.
+          apply exec_function_external.
+          eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+        + constructor; auto.
+        
+      - (* return *)
+        inv STACKS. inv H1.
+        destruct (STAR bl m (trs # res <- vres)) as [trs2 [MATCH' STAR']].
+        econstructor; split.
+        + eapply Smallstep.plus_left.
+          * apply exec_return.
+          * exact STAR'.
+          * reflexivity.
+        + constructor; trivial.
+          apply match_regs_trans with (rs2 := (trs # res <- vres)).
+          apply match_regs_write.
+          assumption.
+          assumption.
+    Qed.
+
+    Theorem transf_program_correct:
+      Smallstep.forward_simulation (semantics prog) (semantics tprog).
+    Proof.
+      eapply Smallstep.forward_simulation_plus.
+      apply senv_preserved.
+      eexact transf_initial_states.
+      eexact transf_final_states.
+      eexact transf_step_correct.
+    Qed.
+    
+End PRESERVATION.
+End INJECTOR.
diff --git a/backend/Inlining.v b/backend/Inlining.v
index d66d2586..317c288c 100644
--- a/backend/Inlining.v
+++ b/backend/Inlining.v
@@ -367,9 +367,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
   | Iop op args res s =>
       set_instr (spc ctx pc)
                 (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s))
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       set_instr (spc ctx pc)
-                (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
+                (Iload trap chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
   | Istore chunk addr args src s =>
       set_instr (spc ctx pc)
                 (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s))
@@ -400,9 +400,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
   | Ibuiltin ef args res s =>
       set_instr (spc ctx pc)
                 (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s))
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 info =>
       set_instr (spc ctx pc)
-                (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2))
+                (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) info)
   | Ijumptable r tbl =>
       set_instr (spc ctx pc)
                 (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl))
diff --git a/backend/Inliningaux.ml b/backend/Inliningaux.ml
index 2e83eb0c..cf308962 100644
--- a/backend/Inliningaux.ml
+++ b/backend/Inliningaux.ml
@@ -17,7 +17,8 @@ open Maps
 open Op
 open Ordered
 open! RTL
-
+open Camlcoq
+   
 module PSet = Make(OrderedPositive)
 
 type inlining_info = {
@@ -57,7 +58,7 @@ let used_in_globvar io gv =
 let fun_inline_analysis id io fn =
   let inst io nid = function
     | Iop (op, args, dest, succ) -> used_id io (globals_operation op)
-    | Iload (chunk, addr, args, dest, succ)
+    | Iload (_, chunk, addr, args, dest, succ)
     | Istore (chunk, addr, args, dest, succ) -> used_id io (globals_addressing addr)
     | Ibuiltin (ef, args, dest, succ) -> used_id io (globals_of_builtin_args args)
     | Icall (_, Coq_inr cid, _, _, _)
@@ -83,13 +84,15 @@ let static_called_once id io =
   else
     false
 
-(* To be considered: heuristics based on size of function? *)
+(* D. Monniaux: attempt at heuristic based on size *)
+let small_enough (f : coq_function) =
+  P.to_int (RTL.max_pc_function f) <= !Clflags.option_inline_auto_threshold
 
 let should_inline (io: inlining_info) (id: ident) (f: coq_function) =
   if !Clflags.option_finline then begin
     match C2C.atom_inline id with
     | C2C.Inline -> true
     | C2C.Noinline -> false
-    | C2C.No_specifier -> static_called_once id io
+    | C2C.No_specifier -> static_called_once id io || small_enough f
   end else
     false
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index 0434a4a4..eb30732b 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -929,6 +929,15 @@ Proof.
   intros. inv H. eauto.
 Qed.
 
+Lemma eval_addressing_none:
+  forall sp' ctx addr rs,
+  eval_addressing ge (Vptr sp' (Ptrofs.repr (dstk ctx))) addr rs = None ->
+  eval_addressing ge (Vptr sp' Ptrofs.zero) (saddr ctx addr) rs = None.
+Proof.
+  intros until rs; intro Heval.
+  destruct addr; destruct rs as [| r0 rs1]; simpl in *; trivial; discriminate.
+Qed.
+
 Theorem step_simulation:
   forall S1 t S2,
   step ge S1 t S2 ->
@@ -976,6 +985,51 @@ Proof.
   apply match_stacks_inside_set_reg; auto.
   apply agree_set_reg; auto.
 
+- (* load notrap1 *)
+  exploit tr_funbody_inv; eauto. intros TR; inv TR.
+  left; econstructor; split.
+  eapply plus_one. eapply exec_Iload_notrap1. eassumption.
+  rewrite eval_addressing_preserved with (ge1:=ge) (ge2:=tge).
+  exploit eval_addressing_inj_none.
+  4: eassumption.
+  intros. eapply symbol_address_inject.
+  eapply match_stacks_inside_globals; eauto.
+  eauto.
+  instantiate (1 := rs'##(sregs ctx args)). eapply agree_val_regs; eauto.
+  rewrite Ptrofs.add_zero_l.
+  apply eval_addressing_none.
+  exact symbols_preserved.
+  econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+
+- (* load notrap2 *)
+  exploit tr_funbody_inv; eauto. intros TR; inv TR.
+
+  exploit eval_addressing_inject.
+    eapply match_stacks_inside_globals; eauto.
+    eexact SP.
+    instantiate (2 := rs##args). instantiate (1 := rs'##(sregs ctx args)). eapply agree_val_regs; eauto.
+    eauto.
+  fold (saddr ctx addr). intros [a' [P Q]].
+
+  destruct (Mem.loadv chunk m' a') eqn:Hload'.
+  + left; econstructor; split.
+    eapply plus_one.
+    eapply exec_Iload; eauto.
+    try (rewrite <- P; apply eval_addressing_preserved; exact symbols_preserved).
+    econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+    
+  + left; econstructor; split.
+    eapply plus_one.
+    eapply exec_Iload_notrap2; eauto.
+    try (rewrite <- P; apply eval_addressing_preserved; exact symbols_preserved).
+    econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+  
 - (* store *)
   exploit tr_funbody_inv; eauto. intros TR; inv TR.
   exploit eval_addressing_inject.
diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v
index 477f883a..e846e0fd 100644
--- a/backend/Inliningspec.v
+++ b/backend/Inliningspec.v
@@ -270,10 +270,10 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
       Ple res ctx.(mreg) ->
       c!(spc ctx pc) = Some (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
       tr_instr ctx pc (Iop op args res s) c
-  | tr_load: forall ctx pc c chunk addr args res s,
+  | tr_load: forall ctx pc c trap chunk addr args res s,
       Ple res ctx.(mreg) ->
-      c!(spc ctx pc) = Some (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
-      tr_instr ctx pc (Iload chunk addr args res s) c
+      c!(spc ctx pc) = Some (Iload trap chunk (saddr ctx addr) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
+      tr_instr ctx pc (Iload trap chunk addr args res s) c
   | tr_store: forall ctx pc c chunk addr args src s,
       c!(spc ctx pc) = Some (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s)) ->
       tr_instr ctx pc (Istore chunk addr args src s) c
@@ -312,9 +312,9 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
       match res with BR r => Ple r ctx.(mreg) | _ => True end ->
       c!(spc ctx pc) = Some (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s)) ->
       tr_instr ctx pc (Ibuiltin ef args res s) c
-  | tr_cond: forall ctx pc cond args s1 s2 c,
-      c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) ->
-      tr_instr ctx pc (Icond cond args s1 s2) c
+  | tr_cond: forall ctx pc cond args s1 s2 c i,
+      c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) i) ->
+      tr_instr ctx pc (Icond cond args s1 s2 i) c
   | tr_jumptable: forall ctx pc r tbl c,
       c!(spc ctx pc) = Some (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) ->
       tr_instr ctx pc (Ijumptable r tbl) c
diff --git a/backend/JsonAST.ml b/backend/JsonAST.ml
index 8ab874b1..2e70aae7 100644
--- a/backend/JsonAST.ml
+++ b/backend/JsonAST.ml
@@ -39,7 +39,7 @@ let pp_section pp sec =
 
   match sec with
   | Section_text -> pp_simple "Text"
-  | Section_data init -> pp_complex "Data" init
+  | Section_data(init, thread_local) -> pp_complex "Data" init (* FIXME *)
   | Section_small_data init -> pp_complex "Small Data" init
   | Section_const init -> pp_complex "Const" init
   | Section_small_const init -> pp_complex "Small Const" init
diff --git a/backend/KillUselessMoves.v b/backend/KillUselessMoves.v
new file mode 100644
index 00000000..bdd7ec60
--- /dev/null
+++ b/backend/KillUselessMoves.v
@@ -0,0 +1,40 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+Require List.
+
+Definition transf_ros (ros: reg + ident) : reg + ident := ros.
+
+Definition transf_instr (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args res s =>
+    if (eq_operation op Omove) && (List.list_eq_dec peq args (res :: nil))
+    then Inop s
+    else instr
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map transf_instr f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/KillUselessMovesproof.v b/backend/KillUselessMovesproof.v
new file mode 100644
index 00000000..629aa6aa
--- /dev/null
+++ b/backend/KillUselessMovesproof.v
@@ -0,0 +1,361 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Axioms.
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import KillUselessMoves.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Section SAME_RS.
+  Context {A : Type}.
+  
+  Definition same_rs (rs rs' : Regmap.t A) :=
+    forall x, rs # x = rs' # x.
+
+  Lemma same_rs_refl : forall rs, same_rs rs rs.
+  Proof.
+    unfold same_rs.
+    reflexivity.
+  Qed.
+
+  Lemma same_rs_comm : forall rs rs', (same_rs rs rs') -> (same_rs rs' rs).
+  Proof.
+    unfold same_rs.
+    congruence.
+  Qed.
+
+  Lemma same_rs_trans : forall rs1 rs2 rs3,
+      (same_rs rs1 rs2) -> (same_rs rs2 rs3) -> (same_rs rs1 rs3).
+  Proof.
+    unfold same_rs.
+    congruence.
+  Qed.
+
+  Lemma same_rs_idem_write : forall rs r,
+      (same_rs rs (rs # r <- (rs # r))).
+  Proof.
+    unfold same_rs.
+    intros.
+    rewrite Regmap.gsident.
+    reflexivity.
+  Qed.
+
+  Lemma same_rs_read:
+    forall rs rs' r, (same_rs rs rs') -> rs # r = rs' # r.
+  Proof.
+    unfold same_rs.
+    auto.
+  Qed.
+  
+  Lemma same_rs_subst:
+    forall rs rs' l, (same_rs rs rs') -> rs ## l = rs' ## l.
+  Proof.
+    induction l; cbn; intuition congruence.
+  Qed.
+
+  Lemma same_rs_write: forall rs rs' r x,
+      (same_rs rs rs') -> (same_rs (rs # r <- x) (rs' # r <- x)).
+  Proof.
+    unfold same_rs.
+    intros.
+    destruct (peq r x0).
+    { subst x0.
+      rewrite Regmap.gss. rewrite Regmap.gss.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    rewrite Regmap.gso by congruence.
+    auto.
+  Qed.
+
+  Lemma same_rs_setres:
+    forall rs rs' (SAME: same_rs rs rs') res vres,
+      same_rs (regmap_setres res vres rs) (regmap_setres res vres rs').
+  Proof.
+    induction res; cbn; auto using same_rs_write.
+  Qed.
+End SAME_RS.
+
+Lemma same_find_function: forall tge rs rs' (SAME: same_rs rs rs') ros,
+  find_function tge ros rs = find_function tge ros rs'.
+Proof.
+  destruct ros; cbn.
+  { rewrite (same_rs_read rs rs' r SAME).
+    reflexivity. }
+  reflexivity.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs rs' (SAME : same_rs rs rs'),
+    match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs').
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs rs' m stk'
+        (SAME: same_rs rs rs')
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs' m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  cbn in H1.
+  destruct (_ && _) eqn:IS_MOVE in H1.
+  {
+    destruct eq_operation in IS_MOVE. 2: discriminate.
+    destruct list_eq_dec in IS_MOVE. 2: discriminate.
+    subst op. subst args.
+    clear IS_MOVE.
+    cbn in H0.
+    inv H0.
+    econstructor; split.
+    { eapply exec_Inop; eauto. }
+    constructor.
+    2: assumption.
+    eapply same_rs_trans.
+    { apply same_rs_comm.
+      apply same_rs_idem_write.
+    }
+    assumption.
+  }
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0.
+  rewrite (same_rs_subst rs rs' args SAME).
+  apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto using same_rs_write.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = Some a).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  eapply exec_Iload; eauto.
+  constructor; auto using same_rs_write.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = None).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto using same_rs_write.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = Some a).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto using same_rs_write. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = Some a).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  rewrite (same_rs_read rs rs' src SAME) in H1.
+  eapply exec_Istore; eauto.
+  constructor; auto.
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+  eapply find_function_translated; eauto.
+  { rewrite <- (same_find_function ge rs rs') by assumption.
+    assumption. }
+  apply sig_preserved.
+  rewrite (same_rs_subst rs rs' args SAME).  
+  constructor. constructor; auto. constructor; auto.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+  { rewrite <- (same_find_function ge rs rs') by assumption.
+    assumption. }
+    apply sig_preserved.
+  rewrite (same_rs_subst rs rs' args SAME).  
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+  {
+    replace (fun r : positive => rs' # r) with (fun r : positive => rs # r).
+    eassumption.
+    apply functional_extensionality.
+    auto using same_rs_read.
+  }
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+  auto using same_rs_setres.
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite <- (same_rs_subst rs rs' args SAME); eassumption.
+  constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite <- (same_rs_read rs rs' arg SAME); eassumption.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  destruct or; cbn.
+  + rewrite <- (same_rs_read rs rs' r SAME) by auto.
+    constructor; auto.
+  + constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+  cbn.
+  apply same_rs_refl.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto using same_rs_write.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/LICM.v b/backend/LICM.v
new file mode 100644
index 00000000..787ce256
--- /dev/null
+++ b/backend/LICM.v
@@ -0,0 +1,21 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+Require Inject.
+
+Axiom gen_injections : function -> node -> reg -> PTree.t (list Inject.inj_instr).
+
+Definition transf_program : program -> res program :=
+  Inject.transf_program gen_injections.
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
new file mode 100644
index 00000000..82e4629f
--- /dev/null
+++ b/backend/LICMaux.ml
@@ -0,0 +1,332 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTL;;
+open Camlcoq;;
+open Maps;;
+open Kildall;;
+open HashedSet;;
+open Inject;;
+open DebugPrint;;
+open RTLcommonaux;;
+
+type reg = P.t;;
+
+(** get_loop_headers moved from Duplicateaux.ml to LICMaux.ml to prevent cycle dependencies *)
+type vstate = Unvisited | Processed | Visited
+
+let rtl_successors = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,_) -> [n1; n2]
+| Ijumptable (_,ln) -> ln
+
+(** Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+let get_loop_backedges code entrypoint = begin
+  debug "get_loop_backedges\n";
+  let visited = ref (PTree.map (fun n i -> Unvisited) code)
+  and loop_backedge = ref (PTree.map (fun n i -> None) code)
+  in let rec dfs_visit code origin = function
+  | [] -> ()
+  | node :: ln ->
+      debug "ENTERING node %d, REM are %a\n" (P.to_int node) print_intlist ln;
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> begin
+          debug "\tNode %d is already Visited, skipping\n" (P.to_int node);
+          dfs_visit code origin ln
+        end
+      | Processed -> begin
+          debug "Node %d is a loop header\n" (P.to_int node);
+          debug "The backedge is from %d\n" (P.to_int @@ get_some origin);
+          loop_backedge := PTree.set node origin !loop_backedge;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code origin ln
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          debug "Node %d is Processed\n" (P.to_int node);
+          (match PTree.get node code with
+          | None -> failwith "No such node"
+          | Some i -> let next_visits = rtl_successors i in begin
+              debug "About to visit: %a\n" print_intlist next_visits;
+              dfs_visit code (Some node) next_visits
+            end);
+          debug "Node %d is Visited!\n" (P.to_int node);
+          visited := PTree.set node Visited !visited;
+          dfs_visit code origin ln
+        end
+  in begin
+    dfs_visit code None [entrypoint];
+    debug "LOOP BACKEDGES: %a\n" print_ptree_opint !loop_backedge;
+    !loop_backedge
+  end
+end
+
+let get_loop_headers code entrypoint =
+  let backedges = get_loop_backedges code entrypoint in
+  PTree.map (fun _ ob ->
+    match ob with
+    | None -> false
+    | Some _ -> true
+  ) backedges
+
+module Dominator =
+  struct
+    type t = Unreachable | Dominated of int | Multiple
+    let bot = Unreachable and top = Multiple
+    let beq a b =
+      match a, b with
+      | Unreachable, Unreachable
+      | Multiple, Multiple -> true
+      | (Dominated x), (Dominated y) -> x = y
+      | _ -> false
+    let lub a b =
+      match a, b with
+      | Multiple, _
+      | _, Multiple -> Multiple
+      | Unreachable, x
+      | x, Unreachable -> x
+      | (Dominated x), (Dominated y) when x=y -> a
+      | (Dominated _), (Dominated _) -> Multiple
+
+    let pp oc = function
+      | Unreachable -> output_string oc "unreachable"
+      | Multiple -> output_string oc "multiple"
+      | Dominated x -> Printf.fprintf oc "%d" x;;
+  end
+
+module Dominator_Solver = Dataflow_Solver(Dominator)(NodeSetForward)
+
+let apply_dominator (is_marked : node -> bool) (pc : node)
+      (before : Dominator.t) : Dominator.t =
+  match before with
+  | Dominator.Unreachable -> before
+  | _ ->
+     if is_marked pc
+     then Dominator.Dominated (P.to_int pc)
+     else before;;
+
+let dominated_parts1 (f : coq_function) :
+      (bool PTree.t) * (Dominator.t PMap.t option) =
+  let headers = get_loop_headers f.fn_code f.fn_entrypoint in
+  let dominated = Dominator_Solver.fixpoint f.fn_code RTL.successors_instr
+    (apply_dominator (fun pc -> match PTree.get pc headers with
+                                | Some x -> x
+                                | None -> false)) f.fn_entrypoint
+    Dominator.top in
+  (headers, dominated);;
+
+let dominated_parts (f : coq_function) : Dominator.t PMap.t * PSet.t PTree.t =
+  let (headers, dominated) = dominated_parts1 f in
+  match dominated with
+  | None -> failwith "dominated_parts 1"
+  | Some dominated ->
+     let singletons =
+       PTree.fold (fun before pc flag ->
+         if flag
+         then PTree.set pc (PSet.add pc PSet.empty) before
+         else before) headers PTree.empty in
+     (dominated,
+     PTree.fold (fun before pc ii ->
+         match PMap.get pc dominated with
+         | Dominator.Dominated x ->
+            let px = P.of_int x in
+            (match PTree.get px before with
+             | None -> failwith "dominated_parts 2"
+             | Some old ->
+                PTree.set px (PSet.add pc old) before)
+         | _ -> before) f.fn_code singletons);;
+
+let graph_traversal (initial_node : P.t)
+  (successor_iterator : P.t -> (P.t -> unit) -> unit) : PSet.t =
+  let seen = ref PSet.empty
+  and stack = Stack.create () in
+  Stack.push initial_node stack;
+  while not (Stack.is_empty stack)
+  do
+    let vertex = Stack.pop stack in
+    if not (PSet.contains !seen vertex)
+    then
+      begin
+        seen := PSet.add vertex !seen;
+        successor_iterator vertex (fun x -> Stack.push x stack) 
+      end
+  done;
+  !seen;;
+
+let filter_dominated_part (predecessors : P.t list PTree.t)
+      (header : P.t) (dominated_part : PSet.t) =
+  graph_traversal header
+    (fun (vertex : P.t) (f : P.t -> unit) ->
+      match PTree.get vertex predecessors with
+      | None -> ()
+      | Some l ->
+         List.iter
+           (fun x ->
+             if PSet.contains dominated_part x
+             then f x) l
+    );;
+
+let inner_loops (f : coq_function) =
+  let (dominated, parts) = dominated_parts f
+  and predecessors = Kildall.make_predecessors f.fn_code RTL.successors_instr in
+  (dominated, predecessors, PTree.map (filter_dominated_part predecessors) parts);;
+
+let map_reg mapper r =
+  match PTree.get r mapper with
+  | None -> r
+  | Some x -> x;;
+
+let rewrite_loop_body (last_alloc : reg ref)
+      (insns : RTL.code) (header : P.t) (loop_body : PSet.t) =
+  let seen = ref PSet.empty
+  and stack = Stack.create ()
+  and rewritten = ref [] in
+  let add_inj ii = rewritten := ii::!rewritten in
+  Stack.push (header, PTree.empty) stack;
+  while not (Stack.is_empty stack)
+  do
+    let (pc, mapper) = Stack.pop stack in
+    if not (PSet.contains !seen pc)
+    then
+      begin
+        seen := PSet.add pc !seen;
+        match PTree.get pc insns with
+        | None -> ()
+        | Some ii ->
+           let mapper' =
+             match ii with
+             | Iop(op, args, res, pc') when not (Op.is_trapping_op op) ->
+                let new_res = P.succ !last_alloc in
+                last_alloc := new_res;
+                add_inj (INJop(op,
+                               (List.map (map_reg mapper) args),
+                               new_res));
+                PTree.set res new_res mapper
+             | Iload(_, chunk, addr, args, v, pc')
+             | Istore(chunk, addr, args, v, pc')
+                  when Archi.has_notrap_loads &&
+                       !Clflags.option_fnontrap_loads ->
+                let new_res = P.succ !last_alloc in
+                last_alloc := new_res;
+                add_inj (INJload(chunk, addr,
+                                 (List.map (map_reg mapper) args),
+                                 new_res));
+                PTree.set v new_res mapper
+             | _ -> mapper in
+           List.iter (fun x ->
+               if PSet.contains loop_body x
+               then Stack.push (x, mapper') stack)
+             (successors_instr ii)
+      end
+  done;
+  List.rev !rewritten;;
+
+let pp_inj_instr (oc : out_channel) (ii : inj_instr) =
+  match ii with
+  | INJnop -> output_string oc "nop"
+  | INJop(op, args, res) ->
+     Printf.fprintf oc "%a = %a"
+       PrintRTL.reg res (PrintOp.print_operation PrintRTL.reg) (op, args)
+  | INJload(chunk, addr, args, dst) ->
+     Printf.fprintf oc "%a = %s[%a]"
+       PrintRTL.reg dst (PrintAST.name_of_chunk chunk)
+         (PrintOp.print_addressing PrintRTL.reg) (addr, args);;
+
+let pp_inj_list (oc : out_channel) (l : inj_instr list) =
+  List.iter (Printf.fprintf oc "%a; " pp_inj_instr) l;;
+
+let pp_injections (oc : out_channel) (injections : inj_instr list PTree.t) =
+  List.iter
+    (fun (pc, injl) ->
+      Printf.fprintf oc "%d : %a\n" (P.to_int pc) pp_inj_list injl)
+    (PTree.elements injections);;
+
+let compute_injections1 (f : coq_function) =
+  let (dominated, predecessors, loop_bodies) = inner_loops f
+  and last_alloc = ref (max_reg_function f) in
+  (dominated, predecessors,
+   PTree.map (fun header body ->
+    (body, rewrite_loop_body last_alloc f.fn_code header body)) loop_bodies);;
+
+let compute_injections (f : coq_function) : inj_instr list PTree.t =
+  let (dominated, predecessors, injections) = compute_injections1 f in
+  let output_map = ref PTree.empty in
+  List.iter
+    (fun (header, (body, inj)) ->
+      match PTree.get header predecessors with
+      | None -> failwith "compute_injections"
+      | Some l ->
+         List.iter (fun predecessor ->
+             if (PMap.get predecessor dominated)<>Dominator.Unreachable &&
+                  not (PSet.contains body predecessor)
+             then output_map := PTree.set predecessor inj !output_map) l)
+    (PTree.elements injections);
+  !output_map;;
+  
+let pp_list pp_item oc l =
+  output_string oc "{ ";
+  let first = ref true in
+  List.iter (fun x ->
+      (if !first
+       then first := false
+       else output_string oc ", ");
+      pp_item oc x) l;
+  output_string oc " }";;
+
+let pp_pset oc s =
+  pp_list (fun oc -> Printf.fprintf oc "%d") oc
+    (List.sort (fun x y -> y - x) (List.map P.to_int (PSet.elements s)));;
+
+let print_dominated_parts oc f =
+  List.iter (fun (header, nodes) ->
+      Printf.fprintf oc "%d : %a\n" (P.to_int header) pp_pset nodes)
+    (PTree.elements (snd (dominated_parts f)));;
+
+let print_inner_loops oc f =
+  List.iter (fun (header, nodes) ->
+      Printf.fprintf oc "%d : %a\n" (P.to_int header) pp_pset nodes)
+    (PTree.elements (let (_,_,l) = (inner_loops f) in l));;
+
+let print_dominated_parts1 oc f =
+  match snd (dominated_parts1 f) with
+  | None -> output_string oc "error\n"
+  | Some parts ->
+     List.iter
+       (fun (pc, instr) ->
+         Printf.fprintf oc "%d : %a\n" (P.to_int pc) Dominator.pp
+           (PMap.get pc parts)
+       )
+       (PTree.elements f.fn_code);;
+  
+let loop_headers (f : coq_function) : RTL.node list =
+  List.map fst (List.filter snd (PTree.elements (get_loop_headers f.fn_code f.fn_entrypoint)));;
+
+let print_loop_headers f =
+  print_endline "Loop headers";
+  List.iter
+    (fun i -> Printf.printf "%d " (P.to_int i))
+    (loop_headers f);
+  print_newline ();;
+
+let gen_injections (f : coq_function) (coq_max_pc : node) (coq_max_reg : reg):
+      (Inject.inj_instr list) PTree.t =
+  let injections = compute_injections f in
+  (* let () = pp_injections stdout injections in *)
+  injections;;
diff --git a/backend/LICMproof.v b/backend/LICMproof.v
new file mode 100644
index 00000000..e3f0c2b8
--- /dev/null
+++ b/backend/LICMproof.v
@@ -0,0 +1,39 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+Require Import LICM.
+Require Injectproof.
+
+Definition match_prog : program -> program -> Prop :=
+  Injectproof.match_prog gen_injections.
+
+Section PRESERVATION.
+
+  Variables prog tprog: program.
+  Hypothesis TRANSF: match_prog prog tprog.
+
+  Lemma transf_program_match:
+    forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+  Proof.
+    intros. eapply match_transform_partial_program_contextual; eauto.
+  Qed.
+  
+  Theorem transf_program_correct :
+    Smallstep.forward_simulation (semantics prog) (semantics tprog).
+  Proof.
+    apply Injectproof.transf_program_correct with (gen_injections := gen_injections).
+    exact TRANSF.
+  Qed.
+End PRESERVATION.
diff --git a/backend/LTL.v b/backend/LTL.v
index 5e7eec8c..a382ef0e 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -29,7 +29,7 @@ Definition node := positive.
 
 Inductive instruction: Type :=
   | Lop (op: operation) (args: list mreg) (res: mreg)
-  | Lload (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
+  | Lload (trap : trapping_mode) (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
   | Lgetstack (sl: slot) (ofs: Z) (ty: typ) (dst: mreg)
   | Lsetstack (src: mreg) (sl: slot) (ofs: Z) (ty: typ)
   | Lstore (chunk: memory_chunk) (addr: addressing) (args: list mreg) (src: mreg)
@@ -37,7 +37,7 @@ Inductive instruction: Type :=
   | Ltailcall (sg: signature) (ros: mreg + ident)
   | Lbuiltin (ef: external_function) (args: list (builtin_arg loc)) (res: builtin_res mreg)
   | Lbranch (s: node)
-  | Lcond (cond: condition) (args: list mreg) (s1 s2: node)
+  | Lcond (cond: condition) (args: list mreg) (s1 s2: node) (info: option bool)
   | Ljumptable (arg: mreg) (tbl: list node)
   | Lreturn.
 
@@ -209,11 +209,24 @@ Inductive step: state -> trace -> state -> Prop :=
       rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) ->
       step (Block s f sp (Lop op args res :: bb) rs m)
         E0 (Block s f sp bb rs' m)
-  | exec_Lload: forall s f sp chunk addr args dst bb rs m a v rs',
+  | exec_Lload: forall s f sp trap chunk addr args dst bb rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
-      step (Block s f sp (Lload chunk addr args dst :: bb) rs m)
+      step (Block s f sp (Lload trap chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs',
+      eval_addressing ge sp addr (reglist rs args) = None ->
+      rs' = Locmap.set (R dst)  Vundef
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = Locmap.set (R dst) Vundef
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
         E0 (Block s f sp bb rs' m)
   | exec_Lgetstack: forall s f sp sl ofs ty dst bb rs m rs',
       rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) ->
@@ -250,11 +263,11 @@ Inductive step: state -> trace -> state -> Prop :=
   | exec_Lbranch: forall s f sp pc bb rs m,
       step (Block s f sp (Lbranch pc :: bb) rs m)
         E0 (State s f sp pc rs m)
-  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m,
+  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m i,
       eval_condition cond (reglist rs args) m = Some b ->
       pc = (if b then pc1 else pc2) ->
       rs' = undef_regs (destroyed_by_cond cond) rs ->
-      step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m)
+      step (Block s f sp (Lcond cond args pc1 pc2 i :: bb) rs m)
         E0 (State s f sp pc rs' m)
   | exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs',
       rs (R arg) = Vint n ->
@@ -315,7 +328,7 @@ Fixpoint successors_block (b: bblock) : list node :=
   | nil => nil                          (**r should never happen *)
   | Ltailcall _ _ :: _ => nil
   | Lbranch s :: _ => s :: nil
-  | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil
+  | Lcond _ _ s1 s2 _ :: _ => s1 :: s2 :: nil
   | Ljumptable _ tbl :: _ => tbl
   | Lreturn :: _ => nil
   | instr :: b' => successors_block b'
diff --git a/backend/LTLTunneling.v b/backend/LTLTunneling.v
new file mode 100644
index 00000000..4b404724
--- /dev/null
+++ b/backend/LTLTunneling.v
@@ -0,0 +1,167 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Branch tunneling (optimization of branches to branches). *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import LTL.
+
+(** Branch tunneling shortens sequences of branches (with no intervening
+  computations) by rewriting the branch and conditional branch instructions
+  so that they jump directly to the end of the branch sequence.
+  For example:
+<<
+     L1: if (cond) nop L2;                L1: nop L3;
+     L2: nop L3;               becomes    L2: nop L3;
+     L3: instr;                           L3: instr;
+     L4: if (cond) goto L1;               L4: if (cond) nop L1;
+>>
+  This optimization can be applied to several of our intermediate
+  languages.  We choose to perform it on the [LTL] language,
+  after register allocation but before code linearization.
+  Register allocation can delete instructions (such as dead
+  computations or useless moves), therefore there are more
+  opportunities for tunneling after allocation than before.
+  Symmetrically, prior tunneling helps linearization to produce
+  better code, e.g. by revealing that some [branch] instructions are
+  dead code (as the "branch L3" in the example above).
+*)
+
+(** The implementation consists in two passes: the first pass
+    records the branch t of each "nop"
+    and the second pass replace any "nop" node to [pc]
+    by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
+<<
+  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
+                     = pc                   otherwise
+>>
+  However, this definition can fail to terminate if
+  the program can contain loops consisting only of branches, as in
+<<
+     L1: branch L1;
+>>
+  or
+<<
+     L1: nop L2;
+     L2: nop L1;
+>>
+  Coq warns us of this fact by not accepting the definition
+  of [branch_t] above.
+
+  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+  for every instruction [pc: nop pc'] in the function.
+
+  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+  we need to iterate until we reach a fixpoint.
+
+  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+  in order to help the proof.
+
+  A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF. 
+Extract Constant branch_target => "LTLTunnelingaux.branch_target".
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) pc:node*Z :=
+  match td!pc with
+  | Some (t,d) => (t,Z.abs d)
+  | _ => (pc,0)
+  end.
+
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+    | None => OK tt
+    | Some (tpc, dpc) =>
+       let dpc := Z.abs dpc in
+       match bb with
+       | Lbranch s ::_ =>
+         let (ts, ds) := get td s in 
+         if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Lbranch")
+         else Error (msg "invalid skip of Lbranch")
+       | Lcond _ _ s1 s2 _ :: _ =>
+          let (ts1, ds1) := get td s1 in
+          let (ts2, ds2) := get td s2 in
+          if peq tpc ts1 then
+            if peq tpc ts2 then
+              if zlt ds1 dpc then
+                if zlt ds2 dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this block")
+      end
+   end.
+
+Definition check_code (td: UF) (c:code): res unit
+  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
+
+(** The second pass rewrites all LTL instructions, replacing every
+  successor [s] of every instruction by [t s], the canonical representative
+  of its equivalence class in the union-find data structure. *)
+
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+  match i with
+  | Lbranch s => Lbranch (t s)
+  | Lcond cond args s1 s2 info =>
+      let s1' := t s1 in let s2' := t s2 in
+      if peq s1' s2'
+      then Lbranch s1'
+      else Lcond cond args s1' s2' info
+  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
+  | _ => i
+  end.
+
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+  List.map (tunnel_instr t) b.
+
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+  let td := branch_target f in
+  let c := (fn_code f) in
+  if check_included td c then 
+    do _ <- check_code td c ; OK
+    (mkfunction
+      (fn_sig f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_block td) c)
+      (td (fn_entrypoint f)))
+  else 
+   Error (msg "Some node of the union-find is not in the CFG")
+   .
+
+Definition tunnel_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program tunnel_fundef p.
diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml
new file mode 100644
index 00000000..66540bc1
--- /dev/null
+++ b/backend/LTLTunnelingaux.ml
@@ -0,0 +1,109 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           Pierre Goutagny ENS-Lyon, VERIMAG                 *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [LTLTunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = LTL.bblock
+  type funct = LTL.coq_function
+end
+
+module OPT = struct
+  let langname = "LTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc bb =
+    match bb with
+    | Lbranch s :: _ ->
+       let ns = get_node c s in
+       set_branch c pc ns;
+       acc
+    | Lcond (_, _, s1, s2, _) :: _ ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, bb) =
+    match bb with
+    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+    | _ -> Partial.debug "%d " pc; true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc bb =
+    match PTree.get pc td with
+    | Some p ->
+      let (tpc, dpc) = p in
+      let dpc0 = dpc in
+      (match bb with
+       | [] ->
+         raise (BugOnPC (P.to_int pc))
+       | i :: _ ->
+         (match i with
+          | Lbranch s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Lcond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc))))
+    | None -> ()
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
+
diff --git a/backend/LTLTunnelingproof.v b/backend/LTLTunnelingproof.v
new file mode 100644
index 00000000..d36d3c76
--- /dev/null
+++ b/backend/LTLTunnelingproof.v
@@ -0,0 +1,666 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for the branch tunneling optimization. *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations LTL.
+Require Import LTLTunneling.
+
+Local Open Scope nat.
+
+
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+   ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td: UF) (c: code): 
+  check_included_spec c td (check_included td c).
+Proof. 
+  apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_included_spec.
+  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+  intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+     : target_bounds target bound pc ob
+ | TB_branch s bb
+     (EQ: target pc = target s)
+     (DECREASE: bound s < bound pc)
+     : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+     (EQ1: target pc = target s1)
+     (EQ2: target pc = target s2)
+     (DEC1: bound s1 < bound pc)
+     (DEC2: bound s2 < bound pc)
+     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intros H; rewrite H; auto.
+Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
+
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
+
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
+
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+  check_bblock td pc bb = OK tt -> 
+  target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+  unfold check_bblock, bound.
+  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+  destruct bb as [|[ ] bb]; simpl; try congruence.
+  + destruct (get td s) as (ts, ds) eqn:Hs.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_branch.
+    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_cond.
+    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
+
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
+
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_code_spec.
+  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+  intros X; inversion X; subst.
+  apply check_bblock_correct; auto.
+Qed.
+
+Theorem branch_target_bounds:
+  forall f tf pc, 
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros _.
+  destruct ((fn_code f)!pc) eqn:X.
+  - exploit check_code_correct; eauto.
+  - exploit check_included_correct; eauto.
+    congruence.
+Qed.
+
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf -> 
+  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros X; inversion X; clear X; subst.
+  simpl. rewrite PTree.gmap1. auto.
+Qed.
+
+Lemma tunnel_fundef_Internal:
+  forall f tf, tunnel_fundef (Internal f) = OK tf
+  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
+Proof.
+  intros f tf; simpl.
+  destruct (tunnel_function f) eqn:X; simpl; try congruence.
+  intros EQ; inversion EQ.
+  eexists; split; eauto.
+Qed.
+
+Lemma tunnel_fundef_External:
+  forall tf ef, tunnel_fundef (External ef) = OK tf
+  -> tf = External ef.
+Proof.
+  intros tf ef; simpl. intros H; inversion H; auto.
+Qed.
+
+(** * Preservation of semantics *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
+    destruct (check_included _ _); try congruence.
+    monadInv EQ. simpl; auto.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+
+(** The proof of semantic preservation is a simulation argument
+  based on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  ?|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The [match_states] predicate, defined below, captures the precondition
+  between states [st1] and [st2], as well as the postcondition between
+  [st1'] and [st2'].  One transition in the source code (left) can correspond
+  to zero or one transition in the transformed code (right).  The
+  "zero transition" case occurs when executing a [Lnop] instruction
+  in the source code that has been removed by tunneling.
+
+  In the definition of [match_states], what changes between the original and
+  transformed codes is mainly the control-flow
+  (in particular, the current program point [pc]), but also some values
+  and memory states, since some [Vundef] values can become more defined
+  as a consequence of eliminating useless [Lcond] instructions. *)
+
+Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
+  forall l, Val.lessdef (ls1 l) (ls2 l).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall f tf sp ls0 bb tls0,
+      locmap_lessdef ls0 tls0 ->
+      tunnel_function f = OK tf ->
+      match_stackframes
+         (Stackframe f sp ls0 bb)
+         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s f tf sp pc ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
+      match_states (State s f sp pc ls m)
+                   (State ts tf sp (branch_target f pc) tls tm)
+  | match_states_block:
+      forall s f tf sp bb ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp bb ls m)
+                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
+  | match_states_interm:
+      forall s f tf sp pc i bb ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp (i :: bb) ls m)
+                   (State ts tf sp pc tls tm)
+  | match_states_call:
+      forall s f tf ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_fundef f = OK tf),
+      match_states (Callstate s f ls m)
+                   (Callstate ts tf tls tm)
+  | match_states_return:
+      forall s ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm),
+      match_states (Returnstate s ls m)
+                   (Returnstate ts tls tm).
+
+(** Properties of [locmap_lessdef] *)
+
+Lemma reglist_lessdef:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl).
+Proof.
+  induction rl; simpl; intros; auto.
+Qed.
+
+Lemma locmap_set_lessdef:
+  forall ls1 ls2 v1 v2 l,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2).
+Proof.
+  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
+- destruct l; auto using Val.load_result_lessdef.
+- destruct (Loc.diff_dec l l'); auto.
+Qed.
+
+Lemma locmap_set_undef_lessdef:
+  forall ls1 ls2 l,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2.
+Proof.
+  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
+- destruct l; auto. destruct ty; auto. 
+- destruct (Loc.diff_dec l l'); auto.
+Qed.
+
+Lemma locmap_undef_regs_lessdef:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2).
+Proof.
+  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. 
+Qed.
+
+Lemma locmap_undef_regs_lessdef_1:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2.
+Proof.
+  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
+Qed.
+
+Lemma locmap_getpair_lessdef:
+  forall p ls1 ls2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
+Proof.
+  intros; destruct p; simpl; auto using Val.longofwords_lessdef.
+Qed.
+
+Lemma locmap_getpairs_lessdef:
+  forall pl ls1 ls2,
+  locmap_lessdef ls1 ls2 ->
+  Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl).
+Proof.
+  intros. induction pl; simpl; auto using locmap_getpair_lessdef.
+Qed.
+
+Lemma locmap_setpair_lessdef:
+  forall p ls1 ls2 v1 v2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2).
+Proof.
+  intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
+Qed.
+
+Lemma locmap_setres_lessdef:
+  forall res ls1 ls2 v1 v2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2).
+Proof.
+  induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
+Qed.
+
+Lemma locmap_undef_caller_save_regs_lessdef:
+  forall ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2).
+Proof.
+  intros; red; intros. unfold undef_caller_save_regs. 
+  destruct l.
+- destruct (Conventions1.is_callee_save r); auto.
+- destruct sl; auto.
+Qed.
+
+Lemma find_function_translated:
+  forall ros ls tls fd,
+  locmap_lessdef ls tls ->
+  find_function ge ros ls = Some fd ->
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
+Proof.
+  intros. destruct ros; simpl in *.
+- assert (E: tls (R m) = ls (R m)).
+  { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
+    generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
+  rewrite E. exploit functions_translated; eauto.
+- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
+  exploit function_ptr_translated; eauto.
+  intros (tf & X1 & X2). exists tf; intuition.
+Qed.
+
+Lemma call_regs_lessdef:
+  forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2).
+Proof.
+  intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto.
+Qed.
+
+Lemma return_regs_lessdef:
+  forall caller1 callee1 caller2 callee2,
+  locmap_lessdef caller1 caller2 ->
+  locmap_lessdef callee1 callee2 ->
+  locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2).
+Proof.
+  intros; red; intros. destruct l; simpl.
+- destruct (Conventions1.is_callee_save r); auto.
+- destruct sl; auto.
+Qed. 
+
+(** To preserve non-terminating behaviours, we show that the transformed
+  code cannot take an infinity of "zero transition" cases.
+  We use the following [measure] function over source states,
+  which decreases strictly in the "zero transition" case. *)
+
+Definition measure (st: state) : nat :=
+  match st with
+  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+  | Block s f sp bb ls m => 0
+  | Callstate s f ls m => 0
+  | Returnstate s ls m => 0
+  end.
+
+Lemma match_parent_locset:
+  forall s ts,
+  list_forall2 match_stackframes s ts ->
+  locmap_lessdef (parent_locset s) (parent_locset ts).
+Proof.
+  induction 1; simpl.
+- red; auto.
+- inv H; auto.
+Qed.
+
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
+Proof.
+  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
+
+- (* entering a block *)
+  exploit (branch_target_bounds f tf pc); eauto.
+  rewrite H. intros X; inversion X.
+  + (* TB_default *) 
+    rewrite TB; left. econstructor; split.
+    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+    * econstructor; eauto.
+  + (* FT_branch *)
+    simpl; right.
+    rewrite EQ; repeat (econstructor; lia || eauto).
+  + (* FT_cond *)
+    simpl; right.
+    repeat (econstructor; lia || eauto); simpl.
+    destruct (peq _ _); try congruence.
+- (* Lop *)
+  exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
+  intros (tv & EV & LD).
+  left; simpl; econstructor; split.
+  eapply exec_Lop with (v := tv); eauto.
+  rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  exploit Mem.loadv_extends. eauto. eauto. eexact LD. 
+  intros (tv & LOAD & LD').
+  left; simpl; econstructor; split.
+  eapply exec_Lload with (a := ta).
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap1 *)
+  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap1.
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap2 *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  destruct (Mem.loadv chunk tm ta) eqn:Htload.
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap2.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+- (* Lgetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lsetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lstore *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS.  
+  intros (tm' & STORE & MEM').
+  left; simpl; econstructor; split.
+  eapply exec_Lstore with (a := ta).
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcall *)
+  left; simpl.
+  exploit find_function_translated; eauto.
+  intros (tfd & Htfd & FIND).
+  econstructor; split.
+  + eapply exec_Lcall; eauto.
+    erewrite sig_preserved; eauto.
+  + econstructor; eauto.
+    constructor; auto.
+    constructor; auto.
+- (* Ltailcall *)
+  exploit find_function_translated. 2: eauto.
+  { eauto using return_regs_lessdef, match_parent_locset. }
+  intros (tfd & Htfd & FIND).
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
+  left; simpl; econstructor; split.
+  + eapply exec_Ltailcall; eauto.
+    * eapply sig_preserved; eauto.
+    * erewrite fn_stacksize_preserved; eauto.
+  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
+- (* Lbuiltin *)
+  exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
+  exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
+  left; simpl; econstructor; split.
+  eapply exec_Lbuiltin; eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. 
+  eapply external_call_symbols_preserved. apply senv_preserved. eauto.
+  econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef.
+- (* Lbranch (preserved) *)
+  left; simpl; econstructor; split.
+  eapply exec_Lbranch; eauto.
+  fold (branch_target f pc). econstructor; eauto.
+- (* Lbranch (eliminated) *)
+  right; split. simpl. lia. split. auto. constructor; auto.
+- (* Lcond (preserved) *)
+  simpl; left; destruct (peq _ _) eqn: EQ.
+  + econstructor; split.
+    eapply exec_Lbranch. 
+    destruct b.
+    * constructor; eauto using locmap_undef_regs_lessdef_1.
+    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+  + econstructor; split.
+    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+  destruct (peq _ _) eqn: EQ; try inv H1.
+  right; split; simpl. 
+  + destruct b.
+    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+  + destruct b.
+    -- repeat (constructor; auto).
+    -- rewrite e; repeat (constructor; auto).
+- (* Ljumptable *)
+  assert (tls (R arg) = Vint n).
+  { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
+  left; simpl; econstructor; split.
+  eapply exec_Ljumptable.
+  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
+  econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lreturn *)
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
+  left; simpl; econstructor; split.
+  + eapply exec_Lreturn; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + constructor; eauto using return_regs_lessdef, match_parent_locset.
+- (* internal function *)
+  exploit tunnel_fundef_Internal; eauto.
+  intros (tf' & TF' & ITF). subst.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros (tm' & ALLOC & MEM').
+  left; simpl.
+  econstructor; split.
+  + eapply exec_function_internal; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + simpl.
+    erewrite (fn_entrypoint_preserved f tf'); auto.
+    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+- (* external function *)
+  exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
+  intros (tvres & tm' & A & B & C & D).
+  left; simpl; econstructor; split.
+  + erewrite (tunnel_fundef_External tf ef); eauto.
+    eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+- (* return *)
+  inv STK. inv H1.
+  left; econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto.
+  intros (tf & Htf & Hf).
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
+  econstructor; eauto.
+  apply (Genv.init_mem_transf_partial TRANSL); auto.
+  rewrite (match_program_main TRANSL).
+  rewrite symbols_preserved. eauto.
+  rewrite <- H3. apply sig_preserved. auto.
+  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv STK.
+  set (p := map_rpair R (Conventions1.loc_result signature_main)) in *.
+  generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD.
+  econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (LTL.semantics prog) (LTL.semantics tprog).
+Proof.
+  eapply forward_simulation_opt.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact tunnel_step_correct.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Linear.v b/backend/Linear.v
index 447c6ba6..cb11f7dc 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -28,7 +28,7 @@ Inductive instruction: Type :=
   | Lgetstack: slot -> Z -> typ -> mreg -> instruction
   | Lsetstack: mreg -> slot -> Z -> typ -> instruction
   | Lop: operation -> list mreg -> mreg -> instruction
-  | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
+  | Lload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lcall: signature -> mreg + ident -> instruction
   | Ltailcall: signature -> mreg + ident -> instruction
@@ -160,11 +160,28 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Lop op args res :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lload:
-      forall s f sp chunk addr args dst b rs m a v rs',
+      forall s f sp trap chunk addr args dst b rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
-      step (State s f sp (Lload chunk addr args dst :: b) rs m)
+      step (State s f sp (Lload trap chunk addr args dst :: b) rs m)
+        E0 (State s f sp b rs' m)
+  | exec_Lload_notrap1:
+      forall s f sp chunk addr args dst b rs m rs',
+      eval_addressing ge sp addr (reglist rs args) = None ->
+      rs' = Locmap.set (R dst)
+                       Vundef
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
+        E0 (State s f sp b rs' m)
+  | exec_Lload_notrap2:
+      forall s f sp chunk addr args dst b rs m a rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = Locmap.set (R dst)
+                       Vundef
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lstore:
       forall s f sp chunk addr args src b rs m m' a rs',
diff --git a/backend/Linearize.v b/backend/Linearize.v
index 2cfa4d3c..66b36428 100644
--- a/backend/Linearize.v
+++ b/backend/Linearize.v
@@ -163,8 +163,8 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
   | nil => k
   | LTL.Lop op args res :: b' =>
       Lop op args res :: linearize_block b' k
-  | LTL.Lload chunk addr args dst :: b' =>
-      Lload chunk addr args dst :: linearize_block b' k
+  | LTL.Lload trap chunk addr args dst :: b' =>
+      Lload trap chunk addr args dst :: linearize_block b' k
   | LTL.Lgetstack sl ofs ty dst :: b' =>
       Lgetstack sl ofs ty dst :: linearize_block b' k
   | LTL.Lsetstack src sl ofs ty :: b' =>
@@ -179,7 +179,7 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
       Lbuiltin ef args res :: linearize_block b' k
   | LTL.Lbranch s :: b' =>
       add_branch s k
-  | LTL.Lcond cond args s1 s2 :: b' =>
+  | LTL.Lcond cond args s1 s2 _ :: b' =>
       if starts_with s1 k then
         Lcond (negate_condition cond) args s2 :: add_branch s1 k
       else
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index 902724e0..5914f6a3 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -12,7 +12,12 @@
 
 open LTL
 open Maps
-open Camlcoq
+
+let debug_flag = ref false
+
+let debug fmt =
+  if !debug_flag then Printf.eprintf fmt
+  else Printf.ifprintf stderr fmt
 
 (* Trivial enumeration, in decreasing order of PC *)
 
@@ -29,6 +34,8 @@ let enumerate_aux f reach =
 
 (* More clever enumeration that flattens basic blocks *)
 
+open Camlcoq
+
 module IntSet = Set.Make(struct type t = int let compare = compare end)
 
 (* Determine join points: reachable nodes that have > 1 predecessor *)
@@ -80,7 +87,7 @@ let basic_blocks f joins =
        | [] -> assert false
        | Lbranch s :: _ -> next_in_block blk minpc s
        | Ltailcall (sig0, ros) :: _ -> end_block blk minpc
-       | Lcond (cond, args, ifso, ifnot) :: _ ->
+       | Lcond (cond, args, ifso, ifnot, _) :: _ ->
              end_block blk minpc; start_block ifso; start_block ifnot
        | Ljumptable(arg, tbl) :: _ ->
              end_block blk minpc; List.iter start_block tbl
@@ -110,5 +117,73 @@ let flatten_blocks blks =
 
 (* Build the enumeration *)
 
-let enumerate_aux f reach =
+let enumerate_aux_flat f reach =
   flatten_blocks (basic_blocks f (join_points f))
+
+(**
+ * Alternate enumeration based on traces as identified by Duplicate.v 
+ *
+ * This is a slight alteration to the above heuristic, ensuring that any
+ * superblock will be contiguous in memory, while still following the original
+ * heuristic
+ *
+ * Slight change: instead of taking the minimum pc of the superblock, we just take
+ * the pc of the first block.
+ * (experimentally this leads to slightly better performance..)
+ *)
+
+let super_blocks f joins =
+  let blocks = ref [] in
+  let visited = ref IntSet.empty in
+  (* start_block:
+       pc is the function entry point
+          or a join point
+          or the successor of a conditional test *)
+  let rec start_block pc =
+    let npc = P.to_int pc in
+    if not (IntSet.mem npc !visited) then begin
+      visited := IntSet.add npc !visited;
+      in_block [] npc pc
+    end
+  (* in_block: add pc to block and check successors *)
+  and in_block blk minpc pc =
+    let blk = pc :: blk in
+    match PTree.get pc f.fn_code with
+    | None -> assert false
+    | Some b ->
+       let rec do_instr_list = function
+       | [] -> assert false
+       | Lbranch s :: _ -> next_in_block blk minpc s
+       | Ltailcall (sig0, ros) :: _ -> end_block blk minpc
+       | Lcond (cond, args, ifso, ifnot, pred) :: _ -> begin
+            match pred with
+            | None -> (end_block blk minpc; start_block ifso; start_block ifnot)
+            | Some true -> (next_in_block blk minpc ifso; start_block ifnot)
+            | Some false -> (next_in_block blk minpc ifnot; start_block ifso)
+          end
+       | Ljumptable(arg, tbl) :: _ ->
+             end_block blk minpc; List.iter start_block tbl
+       | Lreturn :: _ -> end_block blk minpc
+       | instr :: b' -> do_instr_list b' in
+       do_instr_list b
+  (* next_in_block: check if join point and either extend block
+     or start block *)
+  and next_in_block blk minpc pc =
+    let npc = P.to_int pc in
+    if IntSet.mem npc joins
+    then (end_block blk minpc; start_block pc)
+    else in_block blk minpc pc
+  (* end_block: record block that we just discovered *)
+  and end_block blk minpc =
+    blocks := (minpc, List.rev blk) :: !blocks
+  in
+    start_block f.fn_entrypoint; !blocks
+
+(* Build the enumeration *)
+
+let enumerate_aux_sb f reach =
+  flatten_blocks (super_blocks f (join_points f))
+
+let enumerate_aux f reach =
+  if !Clflags.option_ftracelinearize then enumerate_aux_sb f reach
+  else enumerate_aux_flat f reach
diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v
index b065238c..c12eab6e 100644
--- a/backend/Linearizeproof.v
+++ b/backend/Linearizeproof.v
@@ -585,45 +585,61 @@ Proof.
   intros; eapply reachable_successors; eauto.
   eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto.
 
-  (* Lop *)
+- (* Lop *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   instantiate (1 := v); rewrite <- H; apply eval_operation_preserved.
   exact symbols_preserved.
   econstructor; eauto.
 
-  (* Lload *)
+- (* Lload *)
   left; econstructor; split. simpl.
-  apply plus_one. econstructor.
+  apply plus_one. eapply exec_Lload.
   instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved.
   exact symbols_preserved. eauto. eauto.
   econstructor; eauto.
 
-  (* Lgetstack *)
+- (* Lload notrap1 *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lload_notrap1.
+  rewrite <- H.
+  apply eval_addressing_preserved.
+  exact symbols_preserved. eauto.
+  econstructor; eauto.
+  
+- (* Lload notrap2 *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lload_notrap2.
+  rewrite <- H.
+  apply eval_addressing_preserved.
+  exact symbols_preserved. eauto. eauto.
+  econstructor; eauto.
+
+- (* Lgetstack *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   econstructor; eauto.
 
-  (* Lsetstack *)
+- (* Lsetstack *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   econstructor; eauto.
 
-  (* Lstore *)
+- (* Lstore *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor.
   instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved.
   exact symbols_preserved. eauto. eauto.
   econstructor; eauto.
 
-  (* Lcall *)
+- (* Lcall *)
   exploit find_function_translated; eauto. intros [tfd [A B]].
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   symmetry; eapply sig_preserved; eauto.
   econstructor; eauto. constructor; auto. econstructor; eauto.
 
-  (* Ltailcall *)
+- (* Ltailcall *)
   exploit find_function_translated; eauto. intros [tfd [A B]].
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
@@ -633,18 +649,18 @@ Proof.
   rewrite (match_parent_locset _ _ STACKS).
   econstructor; eauto.
 
-  (* Lbuiltin *)
+- (* Lbuiltin *)
   left; econstructor; split. simpl.
   apply plus_one. eapply exec_Lbuiltin; eauto.
   eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 
-  (* Lbranch *)
+- (* Lbranch *)
   assert ((reachable f)!!pc = true). apply REACH; simpl; auto.
   right; split. simpl; lia. split. auto. simpl. econstructor; eauto.
 
-  (* Lcond *)
+- (* Lcond *)
   assert (REACH1: (reachable f)!!pc1 = true) by (apply REACH; simpl; auto).
   assert (REACH2: (reachable f)!!pc2 = true) by (apply REACH; simpl; auto).
   simpl linearize_block.
@@ -670,18 +686,18 @@ Proof.
   apply plus_one. eapply exec_Lcond_false. eauto. eauto.
   econstructor; eauto.
 
-  (* Ljumptable *)
+- (* Ljumptable *)
   assert (REACH': (reachable f)!!pc = true).
     apply REACH. simpl. eapply list_nth_z_in; eauto.
   right; split. simpl; lia. split. auto. econstructor; eauto.
 
-  (* Lreturn *)
+- (* Lreturn *)
   left; econstructor; split.
   simpl. apply plus_one. econstructor; eauto.
   rewrite (stacksize_preserved _ _ TRF). eauto.
   rewrite (match_parent_locset _ _ STACKS). econstructor; eauto.
 
-  (* internal functions *)
+- (* internal functions *)
   assert (REACH: (reachable f)!!(LTL.fn_entrypoint f) = true).
     apply reachable_entrypoint.
   monadInv H7.
@@ -691,13 +707,13 @@ Proof.
   generalize EQ; intro EQ'; monadInv EQ'. simpl.
   econstructor; eauto. simpl. eapply is_tail_add_branch. constructor.
 
-  (* external function *)
+- (* external function *)
   monadInv H8. left; econstructor; split.
   apply plus_one. eapply exec_function_external; eauto.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 
-  (* return *)
+- (* return *)
   inv H3. inv H1.
   left; econstructor; split.
   apply plus_one. econstructor.
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 0e3b7c8e..cf903aad 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -76,7 +76,7 @@ Definition wt_instr (i: instruction) : bool :=
           let (targs, tres) := type_of_operation op in
           subtype tres (mreg_type res)
       end
-  | Lload chunk addr args dst =>
+  | Lload trap chunk addr args dst =>
       subtype (type_of_chunk chunk) (mreg_type dst)
   | Ltailcall sg ros =>
       zeq (size_arguments sg) 0
@@ -321,17 +321,31 @@ Local Opaque mreg_type.
   + (* other ops *)
     destruct (type_of_operation op) as [ty_args ty_res] eqn:TYOP. InvBooleans.
     econstructor; eauto.
-    apply wt_setreg. eapply Val.has_subtype; eauto.
+ 
+    apply wt_setreg; auto; try (apply wt_undef_regs; auto).
+    eapply Val.has_subtype; eauto.
     change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto.
     red; intros; subst op. simpl in ISMOVE.
     destruct args; try discriminate. destruct args; discriminate.
-    apply wt_undef_regs; auto.
+    (* no longer needed apply wt_undef_regs; auto. *)
 - (* load *)
   simpl in *; InvBooleans.
   econstructor; eauto.
   apply wt_setreg. eapply Val.has_subtype; eauto.
   destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
   apply wt_undef_regs; auto.
+- (* load notrap1 *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setreg. eapply Val.has_subtype; eauto.
+  constructor.
+  apply wt_undef_regs; auto.
+- (* load notrap2 *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setreg. eapply Val.has_subtype; eauto.
+  constructor.
+  apply wt_undef_regs; auto.
 - (* store *)
   simpl in *; InvBooleans.
   econstructor. eauto. eauto. eauto.
diff --git a/backend/Liveness.v b/backend/Liveness.v
index 16533158..9652b363 100644
--- a/backend/Liveness.v
+++ b/backend/Liveness.v
@@ -79,7 +79,7 @@ Definition transfer
             reg_list_live args (reg_dead res after)
           else
             after
-      | Iload chunk addr args dst s =>
+      | Iload trap chunk addr args dst s =>
           if Regset.mem dst after then
             reg_list_live args (reg_dead dst after)
           else
@@ -94,7 +94,7 @@ Definition transfer
       | Ibuiltin ef args res s =>
           reg_list_live (params_of_builtin_args args)
             (reg_list_dead (params_of_builtin_res res) after)
-      | Icond cond args ifso ifnot =>
+      | Icond cond args ifso ifnot _ =>
           reg_list_live args after
       | Ijumptable arg tbl =>
           reg_live arg after
diff --git a/backend/Mach.v b/backend/Mach.v
index 9fdee9eb..2cfd738d 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -56,7 +56,7 @@ Inductive instruction: Type :=
   | Msetstack: mreg -> ptrofs -> typ -> instruction
   | Mgetparam: ptrofs -> typ -> mreg -> instruction
   | Mop: operation -> list mreg -> mreg -> instruction
-  | Mload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
+  | Mload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mcall: signature -> mreg + ident -> instruction
   | Mtailcall: signature -> mreg + ident -> instruction
@@ -321,11 +321,24 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Mop op args res :: c) rs m)
         E0 (State s f sp c rs' m)
   | exec_Mload:
-      forall s f sp chunk addr args dst c rs m a v rs',
+      forall s f sp trap chunk addr args dst c rs m a v rs',
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- v) ->
-      step (State s f sp (Mload chunk addr args dst :: c) rs m)
+      step (State s f sp (Mload trap chunk addr args dst :: c) rs m)
+        E0 (State s f sp c rs' m)
+  | exec_Mload_notrap1:
+      forall s f sp chunk addr args dst c rs m rs',
+      eval_addressing ge sp addr rs##args = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
+      step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
+        E0 (State s f sp c rs' m)
+  | exec_Mload_notrap2:
+      forall s f sp chunk addr args dst c rs m a rs',
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
+      step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
         E0 (State s f sp c rs' m)
   | exec_Mstore:
       forall s f sp chunk addr args src c rs m m' a rs',
diff --git a/backend/OpHelpers.v b/backend/OpHelpers.v
new file mode 100644
index 00000000..7f8af39b
--- /dev/null
+++ b/backend/OpHelpers.v
@@ -0,0 +1,54 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib.
+Require Import AST Integers Floats.
+Require Import Op CminorSel.
+
+(** Some arithmetic operations are transformed into calls to
+  runtime library functions.  The following type class collects
+  the names of these functions. *)
+
+Definition sig_l_l := mksignature (Tlong :: nil) Tlong cc_default.
+Definition sig_l_f := mksignature (Tlong :: nil) Tfloat cc_default.
+Definition sig_l_s := mksignature (Tlong :: nil) Tsingle cc_default.
+Definition sig_f_l := mksignature (Tfloat :: nil) Tlong cc_default.
+Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) Tlong cc_default.
+Definition sig_li_l := mksignature (Tlong :: Tint :: nil) Tlong cc_default.
+Definition sig_ii_l := mksignature (Tint :: Tint :: nil) Tlong cc_default.
+Definition sig_ii_i := mksignature (Tint :: Tint :: nil) Tint cc_default.
+Definition sig_ff_f := mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default.
+Definition sig_ss_s := mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default.
+
+Class helper_functions := mk_helper_functions {
+  i64_dtos: ident;                      (**r float64 -> signed long *)
+  i64_dtou: ident;                      (**r float64 -> unsigned long *)
+  i64_stod: ident;                      (**r signed long -> float64 *)
+  i64_utod: ident;                      (**r unsigned long -> float64 *)
+  i64_stof: ident;                      (**r signed long -> float32 *)
+  i64_utof: ident;                      (**r unsigned long -> float32 *)
+  i64_sdiv: ident;                      (**r signed division *)
+  i64_udiv: ident;                      (**r unsigned division *)
+  i64_smod: ident;                      (**r signed remainder *)
+  i64_umod: ident;                      (**r unsigned remainder *)
+  i64_shl: ident;                       (**r shift left *)
+  i64_shr: ident;                       (**r shift right unsigned *)
+  i64_sar: ident;                       (**r shift right signed *)
+  i64_umulh: ident;                     (**r unsigned multiply high *)
+  i64_smulh: ident;                     (**r signed multiply high *)
+  i32_sdiv: ident;                      (**r signed division *)
+  i32_udiv: ident;                      (**r unsigned division *)
+  i32_smod: ident;                      (**r signed remainder *)
+  i32_umod: ident;                      (**r unsigned remainder *)
+  f64_div: ident;                       (**float division*)
+  f32_div: ident;                       (**float division*)
+}.
diff --git a/backend/OpHelpersproof.v b/backend/OpHelpersproof.v
new file mode 100644
index 00000000..63199520
--- /dev/null
+++ b/backend/OpHelpersproof.v
@@ -0,0 +1,90 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Cminor.
+Require Import Op.
+Require Import CminorSel.
+Require Import Events.
+Require Import OpHelpers.
+
+(** * Axiomatization of the helper functions *)
+
+Definition external_implements (name: string) (sg: signature) (vargs: list val) (vres: val) : Prop :=
+  forall F V (ge: Genv.t F V) m,
+  external_call (EF_runtime name sg) ge vargs m E0 vres m.
+
+Definition builtin_implements (name: string) (sg: signature) (vargs: list val) (vres: val) : Prop :=
+  forall F V (ge: Genv.t F V) m,
+  external_call (EF_builtin name sg) ge vargs m E0 vres m.
+
+Axiom arith_helpers_correct :
+    (forall x z, Val.longoffloat x = Some z -> external_implements "__compcert_i64_dtos" sig_f_l (x::nil) z)
+ /\ (forall x z, Val.longuoffloat x = Some z -> external_implements "__compcert_i64_dtou" sig_f_l (x::nil) z)
+ /\ (forall x z, Val.floatoflong x = Some z -> external_implements "__compcert_i64_stod" sig_l_f (x::nil) z)
+ /\ (forall x z, Val.floatoflongu x = Some z -> external_implements "__compcert_i64_utod" sig_l_f (x::nil) z)
+ /\ (forall x z, Val.singleoflong x = Some z -> external_implements "__compcert_i64_stof" sig_l_s (x::nil) z)
+ /\ (forall x z, Val.singleoflongu x = Some z -> external_implements "__compcert_i64_utof" sig_l_s (x::nil) z)
+ /\ (forall x, builtin_implements "__builtin_negl" sig_l_l (x::nil) (Val.negl x))
+ /\ (forall x y, builtin_implements "__builtin_addl" sig_ll_l (x::y::nil) (Val.addl x y))
+ /\ (forall x y, builtin_implements "__builtin_subl" sig_ll_l (x::y::nil) (Val.subl x y))
+ /\ (forall x y, builtin_implements "__builtin_mull" sig_ii_l (x::y::nil) (Val.mull' x y))
+ /\ (forall x y z, Val.divls x y = Some z -> external_implements "__compcert_i64_sdiv" sig_ll_l (x::y::nil) z)
+ /\ (forall x y z, Val.divlu x y = Some z -> external_implements "__compcert_i64_udiv" sig_ll_l (x::y::nil) z)
+ /\ (forall x y z, Val.modls x y = Some z -> external_implements "__compcert_i64_smod" sig_ll_l (x::y::nil) z)
+ /\ (forall x y z, Val.modlu x y = Some z -> external_implements "__compcert_i64_umod" sig_ll_l (x::y::nil) z)
+ /\ (forall x y, external_implements "__compcert_i64_shl" sig_li_l (x::y::nil) (Val.shll x y))
+ /\ (forall x y, external_implements "__compcert_i64_shr" sig_li_l (x::y::nil) (Val.shrlu x y))
+ /\ (forall x y, external_implements "__compcert_i64_sar" sig_li_l (x::y::nil) (Val.shrl x y))
+ /\ (forall x y, external_implements "__compcert_i64_umulh" sig_ll_l (x::y::nil) (Val.mullhu x y))
+ /\ (forall x y, external_implements "__compcert_i64_smulh" sig_ll_l (x::y::nil) (Val.mullhs x y))
+ /\ (forall x y z, Val.divs x y = Some z -> external_implements "__compcert_i32_sdiv" sig_ii_i (x::y::nil) z)
+ /\ (forall x y z, Val.divu x y = Some z -> external_implements "__compcert_i32_udiv" sig_ii_i (x::y::nil) z)
+ /\ (forall x y z, Val.mods x y = Some z -> external_implements "__compcert_i32_smod" sig_ii_i (x::y::nil) z)
+ /\ (forall x y z, Val.modu x y = Some z -> external_implements "__compcert_i32_umod" sig_ii_i (x::y::nil) z)
+ /\ (forall x y z, Val.divfs x y = z -> external_implements "__compcert_f32_div" sig_ss_s (x::y::nil) z)
+ /\ (forall x y z, Val.divf x y = z -> external_implements "__compcert_f64_div" sig_ff_f (x::y::nil) z)
+.
+
+Definition helper_declared {F V: Type} (p: AST.program (AST.fundef F) V) (id: ident) (name: string) (sg: signature) : Prop :=
+  (prog_defmap p)!id = Some (Gfun (External (EF_runtime name sg))).
+
+Definition helper_functions_declared {F V: Type} (p: AST.program (AST.fundef F) V) (hf: helper_functions) : Prop :=
+     helper_declared p i64_dtos "__compcert_i64_dtos" sig_f_l
+  /\ helper_declared p i64_dtou "__compcert_i64_dtou" sig_f_l
+  /\ helper_declared p i64_stod "__compcert_i64_stod" sig_l_f
+  /\ helper_declared p i64_utod "__compcert_i64_utod" sig_l_f
+  /\ helper_declared p i64_stof "__compcert_i64_stof" sig_l_s
+  /\ helper_declared p i64_utof "__compcert_i64_utof" sig_l_s
+  /\ helper_declared p i64_sdiv "__compcert_i64_sdiv" sig_ll_l
+  /\ helper_declared p i64_udiv "__compcert_i64_udiv" sig_ll_l
+  /\ helper_declared p i64_smod "__compcert_i64_smod" sig_ll_l
+  /\ helper_declared p i64_umod "__compcert_i64_umod" sig_ll_l
+  /\ helper_declared p i64_shl "__compcert_i64_shl" sig_li_l
+  /\ helper_declared p i64_shr "__compcert_i64_shr" sig_li_l
+  /\ helper_declared p i64_sar "__compcert_i64_sar" sig_li_l
+  /\ helper_declared p i64_umulh "__compcert_i64_umulh" sig_ll_l
+  /\ helper_declared p i64_smulh "__compcert_i64_smulh" sig_ll_l
+  /\ helper_declared p i32_sdiv "__compcert_i32_sdiv" sig_ii_i
+  /\ helper_declared p i32_udiv "__compcert_i32_udiv" sig_ii_i
+  /\ helper_declared p i32_smod "__compcert_i32_smod" sig_ii_i
+  /\ helper_declared p i32_umod "__compcert_i32_umod" sig_ii_i
+  /\ helper_declared p f32_div  "__compcert_f32_div"  sig_ss_s
+  /\ helper_declared p f64_div  "__compcert_f64_div"  sig_ff_f
+.
diff --git a/backend/PrintAsm.ml b/backend/PrintAsm.ml
index 22df68ae..7cc386ed 100644
--- a/backend/PrintAsm.ml
+++ b/backend/PrintAsm.ml
@@ -121,7 +121,7 @@ module Printer(Target:TARGET) =
           let sec =
             match C2C.atom_sections name with
             | [s] -> s
-            |  _  -> Section_data Init
+            |  _  -> Section_data (Init, false)  (* FIX Sylvain: not sure of this fix *) 
           and align =
             match C2C.atom_alignof name with
             | Some a -> a
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index e39ba8aa..f1978ad2 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -111,6 +111,10 @@ let elf_symbol_offset oc (symb, ofs) =
   if ofs <> 0L then fprintf oc " + %Ld" ofs
 
 (* Functions for fun and var info *)
+let elf_text_print_fun_info oc name =
+  fprintf oc "	.type	%s, @function\n" name;
+  fprintf oc "	.size	%s, . - %s\n" name name
+
 let elf_print_fun_info oc name =
   fprintf oc "	.type	%a, @function\n" elf_symbol name;
   fprintf oc "	.size	%a, . - %a\n" elf_symbol name elf_symbol name
@@ -328,3 +332,84 @@ let variable_section ~sec ?bss ?reloc ?(common = !Clflags.option_fcommon) i =
   | Init -> sec
   | Init_reloc ->
       begin match reloc with Some s -> s | None -> sec end
+
+
+(* Profiling *)
+let profiling_table : (Digest.t, int) Hashtbl.t = Hashtbl.create 1000;;
+let next_profiling_position = ref 0;;
+let profiling_position (x : Digest.t) : int =
+  match Hashtbl.find_opt profiling_table x with
+  | None -> let y = !next_profiling_position in
+            next_profiling_position := succ y;
+            Hashtbl.replace profiling_table x y;
+            y
+  | Some y -> y;;
+
+let profiling_ids () =
+  let nr_items = !next_profiling_position in
+  let ar = Array.make nr_items "" in
+  Hashtbl.iter
+    (fun x y -> ar.(y) <- x)
+    profiling_table;
+  ar;;
+
+let print_profiling_id oc id =
+  assert (String.length id = 16);
+  output_string oc "	.byte";
+  for i=0 to 15 do
+    fprintf oc " 0x%02x" (Char.code (String.get id i));
+    if i < 15 then output_char oc ','
+  done;
+  output_char oc '\n';;
+
+let profiling_counter_table_name = ".compcert_profiling_counters"
+and profiling_id_table_name = ".compcert_profiling_ids"
+and profiling_write_table = ".compcert_profiling_save_for_this_object"
+and profiling_init = ".compcert_profiling_init"
+and profiling_write_table_helper = "_compcert_write_profiling_table"
+and dtor_section = ".dtors.65435,\"aw\",@progbits"
+(* and fini_section = ".fini_array_00100,\"aw\"" *)
+and init_section = ".init_array,\"aw\"";;
+
+type finalizer_call_method =
+  | Dtors
+  | Init_atexit of (out_channel -> string -> unit);;
+
+let write_symbol_pointer oc sym =
+  if Archi.ptr64
+  then fprintf oc "	.8byte	%s\n" sym
+  else fprintf oc "	.4byte	%s\n" sym;;
+
+let print_profiling_epilogue declare_function finalizer_call_method print_profiling_stub oc =
+  if !Clflags.option_profile_arcs
+  then
+    let nr_items = !next_profiling_position in
+    if nr_items > 0
+    then
+      begin
+        fprintf oc "	.lcomm	%s, %d\n"
+          profiling_counter_table_name (nr_items * 16);
+        fprintf oc "	.section	.rodata\n";
+        fprintf oc "%s:\n" profiling_id_table_name;
+        Array.iter (print_profiling_id oc) (profiling_ids ());
+        fprintf oc "	.text\n";
+        fprintf oc "%s:\n" profiling_write_table;
+        print_profiling_stub oc nr_items
+          profiling_id_table_name
+          profiling_counter_table_name;
+        declare_function oc profiling_write_table;
+        match finalizer_call_method with
+        | Dtors ->
+           fprintf oc "	.section	%s\n" dtor_section;
+           write_symbol_pointer oc profiling_write_table
+        | Init_atexit(atexit_call) ->
+           fprintf oc "	.section	%s\n" init_section;
+           write_symbol_pointer oc profiling_init;
+           fprintf oc "	.text\n";
+           fprintf oc "%s:\n" profiling_init;
+           atexit_call oc profiling_write_table;
+           declare_function oc profiling_init
+      end;;
+
+let profiling_offset id kind =
+  ((profiling_position id)*2 + kind)*8;;
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 59b340f7..9ca0e3a0 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -35,6 +35,7 @@ let precedence = function
   | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddfs|Osubfs|Oaddl|Osubl), _, _) -> (12, LtoR)
   | Ebinop((Oshl|Oshr|Oshru|Oshll|Oshrl|Oshrlu), _, _) -> (11, LtoR)
   | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpfs _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
+  | Ebinop((Oexpect _), _, _) -> (9, LtoR)
   | Ebinop((Oand|Oandl), _, _) -> (8, LtoR)
   | Ebinop((Oxor|Oxorl), _, _) -> (7, LtoR)
   | Ebinop((Oor|Oorl), _, _) -> (6, LtoR)
@@ -90,6 +91,7 @@ let comparison_name = function
   | Cge -> ">="
 
 let name_of_binop = function
+  | Oexpect _ -> "expect"
   | Oadd -> "+"
   | Osub -> "-"
   | Omul -> "*"
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index d75ba19c..87e8a1fc 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -61,9 +61,10 @@ let print_succ pp s dfl =
 let print_instruction pp succ = function
   | Lop(op, args, res) ->
       fprintf pp "%a = %a" mreg res (print_operation mreg) (op, args)
-  | Lload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]"
-         mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+  | Lload(trap,chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a]%a"
+        mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+        print_trapping_mode trap
   | Lgetstack(sl, ofs, ty, dst) ->
       fprintf pp "%a = %a" mreg dst slot (sl, ofs, ty)
   | Lsetstack(src, sl, ofs, ty) ->
@@ -82,10 +83,11 @@ let print_instruction pp succ = function
         (print_builtin_args loc) args
   | Lbranch s ->
       print_succ pp s succ
-  | Lcond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %d else goto %d"
+  | Lcond(cond, args, s1, s2, info) ->
+      fprintf pp "if (%a) goto %d else goto %d (prediction: %s)"
         (print_condition mreg) (cond, args)
         (P.to_int s1) (P.to_int s2)
+        (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
   | Ljumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
       fprintf pp "jumptable (%a)" mreg arg;
@@ -131,10 +133,10 @@ let print_program pp (prog: LTL.program) =
 
 let destination : string option ref = ref None
 
-let print_if prog =
+let print_if passno prog =
   match !destination with
   | None -> ()
   | Some f ->
-      let oc = open_out f in
+      let oc = open_out (f ^ "." ^ Z.to_string passno) in
       print_program oc prog;
       close_out oc
diff --git a/backend/PrintMach.ml b/backend/PrintMach.ml
index 8a5f9a7c..3481421b 100644
--- a/backend/PrintMach.ml
+++ b/backend/PrintMach.ml
@@ -47,10 +47,11 @@ let print_instruction pp i =
   | Mop(op, args, res) ->
       fprintf pp "\t%a = %a\n"
          reg res (PrintOp.print_operation reg) (op, args)
-  | Mload(chunk, addr, args, dst) ->
-      fprintf pp "\t%a = %s[%a]\n"
+  | Mload(trap, chunk, addr, args, dst) ->
+      fprintf pp "\t%a = %s[%a]%a\n"
          reg dst (name_of_chunk chunk)
          (PrintOp.print_addressing reg) (addr, args)
+         print_trapping_mode trap
   | Mstore(chunk, addr, args, src) ->
       fprintf pp "\t%s[%a] = %a\n"
          (name_of_chunk chunk)
diff --git a/backend/PrintRTL.ml b/backend/PrintRTL.ml
index 841540b6..b2ef05ca 100644
--- a/backend/PrintRTL.ml
+++ b/backend/PrintRTL.ml
@@ -50,10 +50,11 @@ let print_instruction pp (pc, i) =
       fprintf pp "%a = %a\n"
          reg res (PrintOp.print_operation reg) (op, args);
       print_succ pp s (pc - 1)
-  | Iload(chunk, addr, args, dst, s) ->
-      fprintf pp "%a = %s[%a]\n"
+  | Iload(trap, chunk, addr, args, dst, s) ->
+      fprintf pp "%a = %s[%a]%a\n"
          reg dst (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args);
+         (PrintOp.print_addressing reg) (addr, args)
+         print_trapping_mode trap;
       print_succ pp s (pc - 1)
   | Istore(chunk, addr, args, src, s) ->
       fprintf pp "%s[%a] = %a\n"
@@ -74,10 +75,11 @@ let print_instruction pp (pc, i) =
         (name_of_external ef)
         (print_builtin_args reg) args;
       print_succ pp s (pc - 1)
-  | Icond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %d else goto %d\n"
+  | Icond(cond, args, s1, s2, info) ->
+      fprintf pp "if (%a) goto %d else goto %d (prediction: %s)\n"
         (PrintOp.print_condition reg) (cond, args)
         (P.to_int s1) (P.to_int s2)
+        (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
   | Ijumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
       fprintf pp "jumptable (%a)\n" reg arg;
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index aeaef25e..6f2b1df9 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -86,9 +86,10 @@ let print_instruction pp succ = function
       fprintf pp "(%a) = (%a) using %a, %a" vars dsts vars srcs var t1 var t2
   | Xop(op, args, res) ->
       fprintf pp "%a = %a" var res (print_operation var) (op, args)
-  | Xload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]"
-         var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+  | Xload(trap, chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a]%a"
+        var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+        print_trapping_mode trap
   | Xstore(chunk, addr, args, src) ->
       fprintf pp "%s[%a] = %a"
          (name_of_chunk chunk) (print_addressing var) (addr, args) var src
@@ -103,7 +104,7 @@ let print_instruction pp succ = function
         (print_builtin_args var) args
   | Xbranch s ->
       print_succ pp s succ
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       fprintf pp "if (%a) goto %d else goto %d"
         (print_condition var) (cond, args)
         (P.to_int s1) (P.to_int s2)
diff --git a/backend/Profiling.v b/backend/Profiling.v
new file mode 100644
index 00000000..83e96311
--- /dev/null
+++ b/backend/Profiling.v
@@ -0,0 +1,77 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Parameter function_id : function -> AST.profiling_id.
+Parameter branch_id : AST.profiling_id -> node -> AST.profiling_id.
+
+Section PER_FUNCTION_ID.
+  Variable f_id : AST.profiling_id.
+  
+  Definition inject_profiling_call (prog : code)
+             (pc extra_pc ifso ifnot : node) : node * code :=
+    let id := branch_id f_id pc in
+    let extra_pc' := Pos.succ extra_pc in
+    let prog' := PTree.set extra_pc
+                           (Ibuiltin (EF_profiling id 0%Z) nil BR_none ifnot) prog in
+    let prog'':= PTree.set extra_pc'
+                           (Ibuiltin (EF_profiling id 1%Z) nil BR_none ifso) prog' in
+    (Pos.succ extra_pc', prog'').
+  
+  Definition inject_at (prog : code) (pc extra_pc : node) : node * code :=
+    match PTree.get pc prog with
+    | Some (Icond cond args ifso ifnot expected) =>
+      inject_profiling_call
+        (PTree.set pc
+                   (Icond cond args (Pos.succ extra_pc) extra_pc expected) prog)
+        pc extra_pc ifso ifnot
+    | _ => inject_profiling_call prog pc extra_pc 1 1 (* does not happen *)
+    end.
+
+  Definition inject_at' (already : node * code) pc :=
+    let (extra_pc, prog) := already in
+    inject_at prog pc extra_pc.
+
+  Definition inject_l (prog : code) extra_pc injections :=
+    List.fold_left (fun already (inject_pc : node) =>
+                      inject_at' already inject_pc)
+                   injections
+                   (extra_pc, prog).
+
+  Definition gen_conditions (prog : code) :=
+    List.map fst (PTree.elements (PTree.filter1
+                                    (fun instr =>
+                                       match instr with
+                                       | Icond cond args ifso ifnot expected => true
+                                       | _ => false
+                                       end) prog)).
+End PER_FUNCTION_ID.
+
+Definition transf_function (f : function) : function :=
+  let max_pc := max_pc_function f in
+  let conditions := gen_conditions (fn_code f) in
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := snd (inject_l (function_id f) (fn_code f) (Pos.succ max_pc) conditions);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ProfilingExploit.v b/backend/ProfilingExploit.v
new file mode 100644
index 00000000..2325f582
--- /dev/null
+++ b/backend/ProfilingExploit.v
@@ -0,0 +1,42 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Parameter function_id : function -> AST.profiling_id.
+Parameter branch_id : AST.profiling_id -> node -> AST.profiling_id.
+Parameter condition_oracle : AST.profiling_id -> option bool.
+
+Definition transf_instr (f_id : AST.profiling_id)
+           (pc : node) (i : instruction) : instruction :=
+  match i with
+  | Icond cond args ifso ifnot None =>
+    Icond cond args ifso ifnot (condition_oracle (branch_id f_id pc))
+  | _ => i
+  end.
+
+Definition transf_function (f : function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (function_id f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ProfilingExploitproof.v b/backend/ProfilingExploitproof.v
new file mode 100644
index 00000000..78de09af
--- /dev/null
+++ b/backend/ProfilingExploitproof.v
@@ -0,0 +1,236 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ProfilingExploit.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr (function_id f) pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+  | match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  constructor; auto. 
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. constructor; auto. constructor.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* cond *)
+- destruct predb.
+  + econstructor; split.
+    eapply exec_Icond; eauto.
+    constructor; auto.
+  + simpl transf_instr in H1.
+    destruct condition_oracle in H1.
+    * econstructor; split.
+      eapply exec_Icond; eauto.
+      constructor; auto.
+    * econstructor; split.
+      eapply exec_Icond; eauto.
+      constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Profilingaux.ml b/backend/Profilingaux.ml
new file mode 100644
index 00000000..6ecea9e6
--- /dev/null
+++ b/backend/Profilingaux.ml
@@ -0,0 +1,85 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open Camlcoq
+open RTL
+open Maps
+   
+type identifier = Digest.t
+
+let pp_id channel (x : identifier) =
+  assert(String.length x = 16);
+  for i=0 to 15 do
+    Printf.fprintf channel "%02x" (Char.code (String.get x i))
+  done
+
+let print_anonymous_function pp f =
+  let instrs =
+    List.sort
+      (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+      (List.rev_map
+        (fun (pc, i) -> (P.to_int pc, i))
+        (PTree.elements f.fn_code)) in
+  PrintRTL.print_succ pp f.fn_entrypoint
+    (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1);
+  List.iter (PrintRTL.print_instruction pp) instrs;
+  Printf.fprintf pp "}\n\n"
+  
+let function_id (f : coq_function) : identifier =
+  let digest = Digest.string (Marshal.to_string f []) in
+  (*
+  Printf.fprintf stderr "FUNCTION hash = %a\n" pp_id digest;
+  print_anonymous_function stderr f;
+   *)
+  digest
+
+let branch_id (f_id : identifier) (node : P.t) : identifier =
+  Digest.string (f_id ^ (Int64.to_string (P.to_int64 node)));;
+
+let profiling_counts : (identifier, (Int64.t*Int64.t)) Hashtbl.t = Hashtbl.create 1000;;
+
+let get_counts id =
+  match Hashtbl.find_opt profiling_counts id with
+  | Some x -> x
+  | None -> (0L, 0L);;
+  
+let add_profiling_counts id counter0 counter1 =
+  let (old0, old1) = get_counts id in
+    Hashtbl.replace profiling_counts id (Int64.add old0 counter0,
+                                         Int64.add old1 counter1);;
+
+let input_counter (ic : in_channel) : Int64.t =
+  let r = ref Int64.zero in
+  for i=0 to 7
+  do
+    r := Int64.add !r (Int64.shift_left (Int64.of_int (input_byte ic)) (8*i))
+  done;
+  !r;;
+  
+let load_profiling_info (filename : string) : unit =
+  let ic = open_in filename in
+  try
+    while true do
+      let id : identifier = really_input_string ic 16 in
+      let counter0 = input_counter ic in
+      let counter1 = input_counter ic in
+      (* Printf.fprintf stderr "%a : %Ld %Ld\n" pp_id id counter0 counter1 *)
+      add_profiling_counts id counter0 counter1
+    done
+  with End_of_file -> close_in ic;;
+
+let condition_oracle (id : identifier) : bool option =
+  let (count0, count1) = get_counts id in
+  (* (if count0 <> 0L || count1 <> 0L then
+    Printf.fprintf stderr "%a : %Ld %Ld\n" pp_id id count0 count1); *)
+  if count0 = count1 then None
+  else Some(count1 > count0);;
diff --git a/backend/Profilingproof.v b/backend/Profilingproof.v
new file mode 100644
index 00000000..0cebc601
--- /dev/null
+++ b/backend/Profilingproof.v
@@ -0,0 +1,704 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import Profiling.
+Require Import Lia.
+
+Local Open Scope positive.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma pair_expand:
+  forall { A B : Type } (p : A*B),
+    p = ((fst p), (snd p)).
+Proof.
+  destruct p; simpl; trivial.
+Qed.
+
+Lemma inject_profiling_call_preserves:
+  forall id body pc extra_pc ifso ifnot pc0,
+    pc0 < extra_pc ->
+    PTree.get pc0 (snd (inject_profiling_call id body pc extra_pc ifso ifnot)) = PTree.get pc0 body.
+Proof.
+  intros. simpl.
+  rewrite PTree.gso by lia.
+  apply PTree.gso.
+  lia.
+Qed.
+    
+Lemma inject_at_preserves :
+  forall id body pc extra_pc pc0,
+    pc0 < extra_pc ->
+    pc0 <> pc ->
+    PTree.get pc0 (snd (inject_at id body pc extra_pc)) = PTree.get pc0 body.
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc body) eqn:GET.
+  - destruct i.
+    all: try (rewrite inject_profiling_call_preserves; trivial; fail).
+    rewrite inject_profiling_call_preserves by trivial.
+    apply PTree.gso; lia.
+  - apply inject_profiling_call_preserves; trivial.
+Qed.
+
+Lemma inject_profiling_call_increases:
+  forall id body pc extra_pc ifso ifnot,
+    fst (inject_profiling_call id body pc extra_pc ifso ifnot) = extra_pc + 2.
+Proof.
+  intros.
+  simpl.
+  rewrite <- (Pos2Nat.id (Pos.succ (Pos.succ extra_pc))).
+  rewrite <- (Pos2Nat.id (extra_pc + 2)).
+  rewrite !Pos2Nat.inj_succ.
+  rewrite !Pos2Nat.inj_add.
+  apply f_equal.
+  lia.
+Qed.
+
+Lemma inject_at_increases:
+  forall id body pc extra_pc,
+    (fst (inject_at id body pc extra_pc)) = extra_pc + 2.
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc body).
+  - destruct i; apply inject_profiling_call_increases.
+  - apply inject_profiling_call_increases.
+Qed.
+
+Lemma inject_l_preserves :
+  forall id injections body extra_pc pc0,
+    pc0 < extra_pc ->
+    List.forallb (fun injection => if peq injection pc0 then false else true) injections = true ->
+    PTree.get pc0 (snd (inject_l id body extra_pc injections)) = PTree.get pc0 body.
+Proof.
+  induction injections;
+    intros until pc0; intros BEFORE ALL; simpl; trivial.
+  unfold inject_l.
+  simpl in ALL.
+  rewrite andb_true_iff in ALL.
+  destruct ALL as [NEQ ALL].
+  simpl.
+  rewrite pair_expand with (p := inject_at id body a extra_pc).
+  progress fold (inject_l id (snd (inject_at id body a extra_pc))
+              (fst (inject_at id body a extra_pc))
+              injections).
+  rewrite IHinjections; trivial.
+  - apply inject_at_preserves; trivial.
+    destruct (peq a pc0); congruence.
+  - rewrite inject_at_increases.
+    lia.
+Qed.
+
+Fixpoint inject_l_position extra_pc
+         (injections : list node)
+         (k : nat) {struct injections} : node :=
+  match injections with
+  | nil => extra_pc
+  | pc::l' =>
+    match k with
+    | O => extra_pc
+    | S k' => inject_l_position (extra_pc + 2) l' k'
+    end
+  end.
+
+Lemma inject_l_position_increases : forall injections pc k,
+    pc <= inject_l_position pc injections k.
+Proof.
+  induction injections; simpl; intros.
+  lia.
+  destruct k.
+  lia.
+  specialize IHinjections with (pc := pc + 2) (k := k).
+  lia.
+Qed.
+
+Lemma inject_l_injected_pc:
+  forall f_id injections cond args ifso ifnot expected body injnum pc extra_pc
+         (INSTR : body ! pc = Some  (Icond cond args ifso ifnot expected))
+         (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+         (NOREPET : list_norepet injections)
+         (NUMBER : nth_error injections injnum = Some pc),
+    PTree.get pc (snd (inject_l f_id body extra_pc injections)) =
+    Some (Icond cond args
+                (Pos.succ (inject_l_position extra_pc injections injnum))
+                (inject_l_position extra_pc injections injnum) expected).
+Proof.
+  induction injections; simpl; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER. }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  rewrite Pos.ltb_lt in BELOW1.
+  unfold inject_l.
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+  progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+              (fst (inject_at f_id body a extra_pc))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - unfold inject_at.
+      rewrite INSTR.
+      unfold inject_profiling_call. simpl.
+      rewrite PTree.gso by lia.
+      rewrite PTree.gso by lia.
+      apply PTree.gss.
+    - rewrite inject_at_increases.
+      lia.
+    - inv NOREPET.
+      rewrite forallb_forall.
+      intros x IN.
+      destruct peq as [EQ | ]; trivial.
+      subst x.
+      contradiction.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (ifso := ifso) (ifnot := ifnot).
+  - rewrite inject_at_preserves; trivial.
+    + rewrite forallb_forall in BELOW2.
+      rewrite <- Pos.ltb_lt.
+      apply nth_error_In in NUMBER.
+      auto.
+    + inv NOREPET.
+      intro ZZZ.
+      subst a.
+      apply nth_error_In in NUMBER.
+      auto.
+
+  - rewrite forallb_forall in BELOW2.
+    rewrite forallb_forall.
+    intros.
+    specialize BELOW2 with x.
+    rewrite Pos.ltb_lt in *.
+    intuition lia.
+  - inv NOREPET. trivial.
+  - trivial.
+Qed.
+ 
+Lemma inject_l_injected0:
+  forall f_id  cond args ifso ifnot expected injections body injnum pc extra_pc
+         (INSTR : body ! pc = Some  (Icond cond args ifso ifnot expected))
+         (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+         (NOREPET : list_norepet injections)
+         (NUMBER : nth_error injections injnum = Some pc),
+    PTree.get (inject_l_position extra_pc injections injnum)
+              (snd (inject_l f_id body extra_pc injections)) =
+    Some (Ibuiltin (EF_profiling (branch_id f_id pc) 0%Z) nil BR_none ifnot).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER. }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+  progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+              (fst (inject_at f_id body a extra_pc))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - unfold inject_at.
+      rewrite INSTR.
+      unfold inject_profiling_call. simpl.
+      rewrite PTree.gso by lia.
+      apply PTree.gss.
+    - rewrite inject_at_increases.
+      lia.
+    -  rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  
+  apply IHinjections.
+  - rewrite inject_at_preserves; trivial.
+    + rewrite forallb_forall in BELOW2.
+      rewrite <- Pos.ltb_lt.
+      apply nth_error_In in NUMBER.
+      auto.
+    + inv NOREPET.
+      intro ZZZ.
+      subst a.
+      apply nth_error_In in NUMBER.
+      auto.
+
+  - rewrite forallb_forall in BELOW2.
+    rewrite forallb_forall.
+    intros.
+    specialize BELOW2 with x.
+    rewrite Pos.ltb_lt in *.
+    intuition lia.
+  - inv NOREPET. trivial.
+  - trivial.
+Qed.
+
+Lemma inject_l_injected1:
+  forall f_id  cond args ifso ifnot expected injections body injnum pc extra_pc
+         (INSTR : body ! pc = Some  (Icond cond args ifso ifnot expected))
+         (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+         (NOREPET : list_norepet injections)
+         (NUMBER : nth_error injections injnum = Some pc),
+    PTree.get (Pos.succ (inject_l_position extra_pc injections injnum))
+              (snd (inject_l f_id body extra_pc injections)) =
+    Some (Ibuiltin (EF_profiling (branch_id f_id pc) 1%Z) nil BR_none ifso).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER. }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+  progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+              (fst (inject_at f_id body a extra_pc))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - unfold inject_at.
+      rewrite INSTR.
+      unfold inject_profiling_call. simpl.
+      apply PTree.gss.
+    - rewrite inject_at_increases.
+      lia.
+    -  rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  
+  apply IHinjections.
+  - rewrite inject_at_preserves; trivial.
+    + rewrite forallb_forall in BELOW2.
+      rewrite <- Pos.ltb_lt.
+      apply nth_error_In in NUMBER.
+      auto.
+    + inv NOREPET.
+      intro ZZZ.
+      subst a.
+      apply nth_error_In in NUMBER.
+      auto.
+
+  - rewrite forallb_forall in BELOW2.
+    rewrite forallb_forall.
+    intros.
+    specialize BELOW2 with x.
+    rewrite Pos.ltb_lt in *.
+    intuition lia.
+  - inv NOREPET. trivial.
+  - trivial.
+Qed.
+  
+Lemma transf_function_at:
+  forall f pc i
+    (CODE : f.(fn_code)!pc = Some i)
+    (INSTR : match i with
+             | Icond _ _ _ _ _ => False
+             | _ => True
+             end),
+    (transf_function f).(fn_code)!pc = Some i.
+Proof.
+  intros.
+  unfold transf_function; simpl.
+  rewrite inject_l_preserves.
+  assumption.
+  - pose proof (max_pc_function_sound f pc i CODE) as LE.
+    unfold Ple in LE.
+    lia.
+  - rewrite forallb_forall.
+    intros x IN.
+    destruct peq; trivial.
+    subst x.
+    unfold gen_conditions in IN.
+    rewrite in_map_iff in IN.
+    destruct IN as [[pc' i'] [EQ IN]].
+    simpl in EQ.
+    subst pc'.
+    apply PTree.elements_complete in IN.
+    rewrite PTree.gfilter1 in IN.
+    rewrite CODE in IN.
+    destruct i; try discriminate; contradiction.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma funsig_preserved:
+  forall fd,
+    funsig (transf_fundef fd) = funsig fd.
+Proof.
+  destruct fd; simpl; trivial.
+Qed.
+
+Lemma stacksize_preserved:
+  forall f,
+    fn_stacksize (transf_function f) = fn_stacksize f.
+Proof.
+  destruct f; simpl; trivial.
+Qed.
+
+Hint Resolve symbols_preserved funsig_preserved external_call_symbols_preserved senv_preserved stacksize_preserved : profiling.
+
+Lemma step_simulation:
+  forall s1 t s2 (STEP : step ge s1 t s2)
+  s1' (MS: match_states s1 s1'),
+  exists s2', plus step tge s1' t s2' /\ match_states s2 s2'.
+Proof.
+  induction 1; intros; inv MS.
+  - econstructor; split.
+    + apply plus_one. apply exec_Inop.
+      erewrite transf_function_at; eauto. apply I.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Iop with (op:=op) (args:=args).
+      * erewrite transf_function_at; eauto. apply I.
+      * rewrite eval_operation_preserved with (ge1:=ge);
+          eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Iload with (trap:=trap) (chunk:=chunk)
+                                            (addr:=addr) (args:=args) (a:=a).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Iload_notrap1 with (chunk:=chunk)
+                                            (addr:=addr) (args:=args).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  -  econstructor; split.
+    + apply plus_one. apply exec_Iload_notrap2 with (chunk:=chunk)
+                                            (addr:=addr) (args:=args) (a:=a).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  -  econstructor; split.
+    + apply plus_one. apply exec_Istore with (chunk:=chunk) (src := src)
+                                            (addr:=addr) (args:=args) (a:=a).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Icall with (sig:=(funsig fd)) (ros:=ros).
+      erewrite transf_function_at; eauto. apply I.
+      apply find_function_translated with (fd := fd).
+      all: eauto with profiling.
+    + constructor; auto.
+      constructor; auto.
+      constructor.
+  - econstructor; split.
+    + apply plus_one. apply exec_Itailcall with (sig:=(funsig fd)) (ros:=ros).
+      erewrite transf_function_at; eauto. apply I.
+      apply find_function_translated with (fd := fd).
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one.
+      apply exec_Ibuiltin with (ef:=ef) (args:=args) (vargs:=vargs).
+      erewrite transf_function_at; eauto. apply I.
+      apply eval_builtin_args_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  - destruct b.
+    + assert (In pc (gen_conditions (fn_code f))) as IN.
+              { unfold gen_conditions.
+                rewrite in_map_iff.
+                exists (pc,  (Icond cond args ifso ifnot predb)).
+                split; simpl; trivial.
+                apply PTree.elements_correct.
+                rewrite PTree.gfilter1.
+                rewrite H.
+                reflexivity.
+              }
+      apply In_nth_error in IN.
+      destruct IN as [n IN].
+      econstructor; split.
+      * eapply plus_two.
+        ++ eapply exec_Icond with (cond := cond) (args := args) (predb := predb) (b := true).
+           unfold transf_function. simpl.
+           erewrite inject_l_injected_pc with (cond := cond) (args := args).
+           ** reflexivity.
+           ** eassumption.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** assumption.
+           ** reflexivity.
+        ++ apply exec_Ibuiltin with (ef :=  (EF_profiling (branch_id (function_id f) pc) 1%Z)) (args := nil) (vargs := nil).
+           apply inject_l_injected1 with (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (expected := predb).
+           ** exact H.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** constructor.
+           ** constructor.
+        ++ reflexivity.
+      * simpl. constructor; auto.
+        
+    + assert (In pc (gen_conditions (fn_code f))) as IN.
+              { unfold gen_conditions.
+                rewrite in_map_iff.
+                exists (pc,  (Icond cond args ifso ifnot predb)).
+                split; simpl; trivial.
+                apply PTree.elements_correct.
+                rewrite PTree.gfilter1.
+                rewrite H.
+                reflexivity.
+              }
+      apply In_nth_error in IN.
+      destruct IN as [n IN].
+      econstructor; split.
+      * eapply plus_two.
+        ++ eapply exec_Icond with (cond := cond) (args := args) (predb := predb) (b := false).
+           unfold transf_function. simpl.
+           erewrite inject_l_injected_pc with (cond := cond) (args := args).
+           ** reflexivity.
+           ** eassumption.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** assumption.
+           ** reflexivity.
+        ++ apply exec_Ibuiltin with (ef :=  (EF_profiling (branch_id (function_id f) pc) 0%Z)) (args := nil) (vargs := nil).
+           apply inject_l_injected0 with (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (expected := predb).
+           ** exact H.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** constructor.
+           ** constructor.
+        ++ reflexivity.
+      * simpl. constructor; auto.
+        
+  - econstructor; split.
+    + apply plus_one.
+      apply exec_Ijumptable with (arg:=arg) (tbl:=tbl) (n:=n).
+      erewrite transf_function_at; eauto. apply I.
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one.
+      apply exec_Ireturn.
+      erewrite transf_function_at; eauto. apply I.
+      rewrite stacksize_preserved. eassumption.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_function_internal.
+      rewrite stacksize_preserved. eassumption.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_function_external.
+      eauto with profiling.
+    + constructor; auto.
+  - inv STACKS. inv H1.
+    econstructor; split.
+    + apply plus_one. apply exec_return.
+    + constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_plus.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/RTL.v b/backend/RTL.v
index a022f55a..fe350adf 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -43,11 +43,12 @@ Inductive instruction: Type :=
       (** [Iop op args dest succ] performs the arithmetic operation [op]
           over the values of registers [args], stores the result in [dest],
           and branches to [succ]. *)
-  | Iload: memory_chunk -> addressing -> list reg -> reg -> node -> instruction
-      (** [Iload chunk addr args dest succ] loads a [chunk] quantity from
+  | Iload: trapping_mode -> memory_chunk -> addressing -> list reg -> reg -> node -> instruction
+      (** [Iload trap chunk addr args dest succ] loads a [chunk] quantity from
           the address determined by the addressing mode [addr] and the
           values of the [args] registers, stores the quantity just read
-          into [dest], and branches to [succ]. *)
+          into [dest], and branches to [succ].
+          If trap=NOTRAP, then failures lead to a default value written to [dest]. *)
   | Istore: memory_chunk -> addressing -> list reg -> reg -> node -> instruction
       (** [Istore chunk addr args src succ] stores the value of register
           [src] in the [chunk] quantity at the
@@ -66,11 +67,12 @@ Inductive instruction: Type :=
       (** [Ibuiltin ef args dest succ] calls the built-in function
           identified by [ef], giving it the values of [args] as arguments.
           It stores the return value in [dest] and branches to [succ]. *)
-  | Icond: condition -> list reg -> node -> node -> instruction
-      (** [Icond cond args ifso ifnot] evaluates the boolean condition
+  | Icond: condition -> list reg -> node -> node -> option bool -> instruction
+      (** [Icond cond args ifso ifnot info] evaluates the boolean condition
           [cond] over the values of registers [args].  If the condition
           is true, it transitions to [ifso].  If the condition is false,
-          it transitions to [ifnot]. *)
+          it transitions to [ifnot]. [info] is a ghost field there to provide
+          information relative to branch prediction. *)
   | Ijumptable: reg -> list node -> instruction
       (** [Ijumptable arg tbl] transitions to the node that is the [n]-th
           element of the list [tbl], where [n] is the unsigned integer
@@ -212,12 +214,25 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp pc rs m)
         E0 (State s f sp pc' (rs#res <- v) m)
   | exec_Iload:
-      forall s f sp pc rs m chunk addr args dst pc' a v,
-      (fn_code f)!pc = Some(Iload chunk addr args dst pc') ->
+      forall s f sp pc rs m trap chunk addr args dst pc' a v,
+      (fn_code f)!pc = Some(Iload trap chunk addr args dst pc') ->
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       step (State s f sp pc rs m)
         E0 (State s f sp pc' (rs#dst <- v) m)
+  | exec_Iload_notrap1:
+      forall s f sp pc rs m chunk addr args dst pc',
+      (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = None ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (rs#dst <- Vundef) m)
+  | exec_Iload_notrap2:
+      forall s f sp pc rs m chunk addr args dst pc' a,
+      (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (rs#dst <- Vundef) m)
   | exec_Istore:
       forall s f sp pc rs m chunk addr args src pc' a m',
       (fn_code f)!pc = Some(Istore chunk addr args src pc') ->
@@ -248,8 +263,8 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp pc rs m)
          t (State s f sp pc' (regmap_setres res vres rs) m')
   | exec_Icond:
-      forall s f sp pc rs m cond args ifso ifnot b pc',
-      (fn_code f)!pc = Some(Icond cond args ifso ifnot) ->
+      forall s f sp pc rs m cond args ifso ifnot b pc' predb,
+      (fn_code f)!pc = Some(Icond cond args ifso ifnot predb) ->
       eval_condition cond rs##args m = Some b ->
       pc' = (if b then ifso else ifnot) ->
       step (State s f sp pc rs m)
@@ -299,8 +314,8 @@ Proof.
 Qed.
 
 Lemma exec_Iload':
-  forall s f sp pc rs m chunk addr args dst pc' rs' a v,
-  (fn_code f)!pc = Some(Iload chunk addr args dst pc') ->
+  forall s f sp pc rs m trap chunk addr args dst pc' rs' a v,
+  (fn_code f)!pc = Some(Iload trap chunk addr args dst pc') ->
   eval_addressing ge sp addr rs##args = Some a ->
   Mem.loadv chunk m a = Some v ->
   rs' = (rs#dst <- v) ->
@@ -384,12 +399,12 @@ Definition successors_instr (i: instruction) : list node :=
   match i with
   | Inop s => s :: nil
   | Iop op args res s => s :: nil
-  | Iload chunk addr args dst s => s :: nil
+  | Iload trap chunk addr args dst s => s :: nil
   | Istore chunk addr args src s => s :: nil
   | Icall sig ros args res s => s :: nil
   | Itailcall sig ros args => nil
   | Ibuiltin ef args res s => s :: nil
-  | Icond cond args ifso ifnot => ifso :: ifnot :: nil
+  | Icond cond args ifso ifnot _ => ifso :: ifnot :: nil
   | Ijumptable arg tbl => tbl
   | Ireturn optarg => nil
   end.
@@ -403,14 +418,14 @@ Definition instr_uses (i: instruction) : list reg :=
   match i with
   | Inop s => nil
   | Iop op args res s => args
-  | Iload chunk addr args dst s => args
+  | Iload trap chunk addr args dst s => args
   | Istore chunk addr args src s => src :: args
   | Icall sig (inl r) args res s => r :: args
   | Icall sig (inr id) args res s => args
   | Itailcall sig (inl r) args => r :: args
   | Itailcall sig (inr id) args => args
   | Ibuiltin ef args res s => params_of_builtin_args args
-  | Icond cond args ifso ifnot => args
+  | Icond cond args ifso ifnot _ => args
   | Ijumptable arg tbl => arg :: nil
   | Ireturn None => nil
   | Ireturn (Some arg) => arg :: nil
@@ -422,13 +437,13 @@ Definition instr_defs (i: instruction) : option reg :=
   match i with
   | Inop s => None
   | Iop op args res s => Some res
-  | Iload chunk addr args dst s => Some dst
+  | Iload trap chunk addr args dst s => Some dst
   | Istore chunk addr args src s => None
   | Icall sig ros args res s => Some res
   | Itailcall sig ros args => None
   | Ibuiltin ef args res s =>
       match res with BR r => Some r | _ => None end
-  | Icond cond args ifso ifnot => None
+  | Icond cond args ifso ifnot _ => None
   | Ijumptable arg tbl => None
   | Ireturn optarg => None
   end.
@@ -462,7 +477,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
   match i with
   | Inop s => m
   | Iop op args res s => fold_left Pos.max args (Pos.max res m)
-  | Iload chunk addr args dst s => fold_left Pos.max args (Pos.max dst m)
+  | Iload trap chunk addr args dst s => fold_left Pos.max args (Pos.max dst m)
   | Istore chunk addr args src s => fold_left Pos.max args (Pos.max src m)
   | Icall sig (inl r) args res s => fold_left Pos.max args (Pos.max r (Pos.max res m))
   | Icall sig (inr id) args res s => fold_left Pos.max args (Pos.max res m)
@@ -471,7 +486,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
   | Ibuiltin ef args res s =>
       fold_left Pos.max (params_of_builtin_args args)
         (fold_left Pos.max (params_of_builtin_res res) m)
-  | Icond cond args ifso ifnot => fold_left Pos.max args m
+  | Icond cond args ifso ifnot _ => fold_left Pos.max args m
   | Ijumptable arg tbl => Pos.max arg m
   | Ireturn None => m
   | Ireturn (Some arg) => Pos.max arg m
diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
new file mode 100644
index 00000000..878e079f
--- /dev/null
+++ b/backend/RTLTunneling.v
@@ -0,0 +1,121 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*          Pierre Goutagny ENS-Lyon, VERIMAG                          *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Branch tunneling for the RTL representation *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import RTL.
+
+(* This is a port of tunneling for LTL. See LTLTunneling.v *)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: RTL.function -> UF.
+Extract Constant branch_target => "RTLTunnelingaux.branch_target".
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) (pc: node): node*Z :=
+  match td!pc with 
+  | Some (t,d) => (t,Z.abs d)
+  | None       => (pc,0)
+  end.
+
+
+Definition target (td: UF) (pc: node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+
+Definition check_included (td: UF) (c: code): option instruction
+  := PTree.fold (fun (ok:option instruction) pc _ => if ok then c!pc else None) td (Some (Inop xH)).
+
+(* we check the validity of targets and their bound:
+*  the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_instr (td: UF) (pc: node) (i: instruction): res unit :=
+  match td!pc with
+  | None => OK tt 
+  | Some (tpc, dpc) =>
+      let dpc := Z.abs dpc in
+      match i with
+      | Inop s =>
+          let (ts,ds) := get td s in
+          if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Inop")
+          else Error (msg "invalid skip of Inop")
+      | Icond _ _ ifso ifnot _ =>
+          let (tso,dso) := get td ifso in
+          let (tnot,dnot) := get td ifnot in
+          if peq tpc tso then
+            if peq tpc tnot then
+              if zlt dso dpc then
+                if zlt dnot dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this instruction")
+      end
+  end.
+
+Definition check_code (td: UF) (c: code): res unit :=
+  PTree.fold (fun ok pc i => do _ <- ok; check_instr td pc i) c (OK tt).
+
+(* The second pass rewrites all LTL instructions, replacing every
+ * successor [s] of every instruction by [t s], the canonical representative
+ * of its equivalence class in the union-find data structure.
+ *)
+
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+  match i with
+  | Inop s => Inop (t s)
+  | Iop op args res s => Iop op args res (t s)
+  | Iload trap chunk addr args dst s => Iload trap chunk addr args dst (t s)
+  | Istore chunk addr args src s => Istore chunk addr args src (t s)
+  | Icall sig ros args res s => Icall sig ros args res (t s)
+  | Ibuiltin ef args res s => Ibuiltin ef args res (t s)
+  | Icond cond args ifso ifnot info =>
+      let ifso'  := t ifso in
+      let ifnot' := t ifnot in
+      if peq ifso' ifnot'
+      
+      then Inop ifso'
+      else Icond cond args ifso' ifnot' info
+  | Ijumptable arg tbl => Ijumptable arg (List.map t tbl)
+  | _ => i
+  end.
+
+Definition tunnel_function (f: RTL.function): res RTL.function :=
+  let td := branch_target f in
+  let c := fn_code f in
+  if check_included td c then
+    do _ <- check_code td c ; OK
+    (mkfunction
+      (fn_sig f)
+      (fn_params f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_instr td) c)
+      (td (fn_entrypoint f)))
+  else Error (msg "Some node of the union-find is not in the CFG").
+
+Definition tunnel_fundef (f: fundef): res fundef :=
+  transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program): res program :=
+  transform_partial_program tunnel_fundef p.
+
diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml
new file mode 100644
index 00000000..43d4bf9f
--- /dev/null
+++ b/backend/RTLTunnelingaux.ml
@@ -0,0 +1,112 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           Pierre Goutagny ENS-Lyon, VERIMAG                 *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a RTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [RTLTunneling.v]
+
+*)
+
+open Coqlib
+open RTL
+open Maps
+open Camlcoq
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = RTL.instruction
+  type funct = RTL.coq_function
+end
+
+module OPT = struct
+  let langname = "RTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc i =
+    match i with
+    | Inop s ->
+       let ns = get_node c s in
+       set_branch c pc ns;
+       incr nopcounter;
+       acc
+    | Icond (_, _, s1, s2, _) ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, i) =
+    match i with
+    | Inop s -> (if println then Partial.debug "\n");
+        Partial.debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc);
+        false
+    | Icond (_, _, s1, s2, _) -> (if println then Partial.debug "\n");
+        Partial.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc);
+        false
+    | _ -> Partial.debug "%d " pc;
+        true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc i =
+    match PTree.get pc td with
+    | Some p ->
+        let (tpc, dpc) = p in
+        let dpc0 = dpc in begin
+          match i with
+          | Inop s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Icond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc)) end
+    | None -> ()
+
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
+
diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
new file mode 100644
index 00000000..0861143b
--- /dev/null
+++ b/backend/RTLTunnelingproof.v
@@ -0,0 +1,609 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*          Pierre Goutagny ENS-Lyon, VERIMAG                          *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for the branch tunneling optimization for RTL. *)
+(* This is a port of Tunnelingproof.v, the same optimisation for LTL. *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST Linking.
+Require Import Values Memory Registers Events Globalenvs Smallstep.
+Require Import Op Locations RTL.
+Require Import RTLTunneling.
+Require Import Conventions1.
+
+Local Open Scope nat.
+
+Definition check_included_spec (c:code) (td:UF) (ok: option instruction) :=
+  ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td:UF) (c:code):
+  check_included_spec c td (check_included td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P:=check_included_spec c); unfold check_included_spec.
+  - intros m m' oi EQ IND N pc. rewrite <- EQ. apply IND. apply N.
+  - intros N pc. rewrite PTree.gempty. auto.
+  - intros m oi pc v N S IND. destruct oi.
+    + intros. rewrite PTree.gsspec. destruct (peq _ _); try congruence. apply IND. congruence. apply H0.
+    + contradiction.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node) : option instruction -> Prop :=
+  | TB_default (TB: target pc = pc) oi:
+      target_bounds target bound pc oi
+  | TB_nop s
+      (EQ: target pc = target s)
+      (DEC: bound s < bound pc):
+      target_bounds target bound pc (Some (Inop s))
+  | TB_cond cond args ifso ifnot info
+      (EQSO: target pc = target ifso)
+      (EQNOT: target pc = target ifnot)
+      (DECSO: bound ifso < bound pc)
+      (DECNOT: bound ifnot < bound pc):
+      target_bounds target bound pc (Some (Icond cond args ifso ifnot info))
+.
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intro EQ. rewrite EQ. auto.
+Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
+
+Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia.
+Qed.
+Local Hint Resolve get_nonneg: core.
+
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
+
+
+(* TODO: à réécrire proprement *)
+Lemma check_instr_correct (td: UF) (pc: node) (i: instruction):
+  check_instr td pc i = OK tt ->
+  target_bounds (target td) (bound td) pc (Some i).
+Proof.
+  unfold check_instr. destruct (td!pc) as [(tpc,dpc)|] eqn:EQ.
+  assert (DPC: snd (get td pc) = Z.abs dpc). { unfold get. rewrite EQ. auto. }
+  - destruct i; try congruence.
+    + destruct (get td n) as (ts,ds) eqn:EQs.
+      destruct (peq _ _); try congruence.
+      destruct (zlt _ _); try congruence. intros _.
+      apply TB_nop. replace (td pc) with tpc.
+      unfold target. rewrite EQs. auto.
+      unfold target. unfold get. rewrite EQ. auto.
+      unfold bound. rewrite DPC. rewrite EQs; simpl. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n ts. apply EQs.
+    + destruct (get td n) as (tso,dso) eqn:EQSO.
+      destruct (get td n0) as (tnot,dnot) eqn:EQNOT.
+      intro H.
+      repeat ((destruct (peq _ _) in H || destruct (zlt _ _) in H); try congruence).
+      apply TB_cond; subst.
+      * unfold target. replace (fst (get td pc)) with tnot. rewrite EQSO. auto.
+        unfold get. rewrite EQ. auto.
+      * unfold target. replace (fst (get td pc)) with tnot. rewrite EQNOT. auto.
+        unfold get. rewrite EQ. auto.
+      * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n tnot. rewrite EQSO. auto. rewrite EQSO. auto.
+      * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n0 tnot. rewrite EQNOT; auto. rewrite EQNOT; auto.
+  - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto.
+Qed.
+
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc i, c!pc = Some i -> target_bounds (target td) (bound td) pc (Some i).
+
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  unfold check_code. apply PTree_Properties.fold_rec; unfold check_code_spec.
+  - intros. rewrite <- H in H2. apply H0; auto.
+  - intros. rewrite PTree.gempty in H0. congruence.
+  - intros m [[]|e] pc i N S IND; simpl; try congruence.
+    intros H pc0 i0. rewrite PTree.gsspec. destruct (peq _ _).
+    subst. intro. inv H0. apply check_instr_correct. apply H.
+    auto.
+Qed.
+
+Theorem branch_target_bounds:
+  forall f tf pc,
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
+Proof.
+  intros. unfold tunnel_function in H.
+  destruct (check_included _ _) eqn:EQinc; try congruence.
+  monadInv H. rename EQ into EQcode.
+  destruct (_ ! _) eqn:EQ.
+  - exploit check_code_correct. destruct x. apply EQcode. apply EQ. auto.
+  - exploit check_included_correct.
+    rewrite EQinc. congruence.
+    apply EQ.
+    intro. apply TB_default. apply target_None. apply H.
+Qed.
+
+(** Preservation of semantics *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+
+Section PRESERVATION.
+
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_match TRANSL). apply H.
+  intros (cu & tf & A & B & C).
+  eexists. eauto.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros. exploit (Genv.find_funct_ptr_match TRANSL).
+  - apply H.
+  - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B.
+Qed.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  apply (Genv.find_symbol_match TRANSL).
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f; simpl in H.
+  - monadInv H.
+    unfold tunnel_function in EQ.
+    destruct (check_included _ _) in EQ; try congruence.
+    monadInv EQ. auto.
+  - monadInv H. auto.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *)
+Qed.
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall res f tf sp pc rs trs
+      (TF: tunnel_function f = OK tf)
+      (RS: Registers.regs_lessdef rs trs),
+      match_stackframes
+        (Stackframe res f sp pc rs)
+        (Stackframe res tf sp (branch_target f pc) trs).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s ts f tf sp pc rs trs m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (TF: tunnel_function f = OK tf)
+      (RS: Registers.regs_lessdef rs trs)
+      (MEM: Mem.extends m tm),
+      match_states
+        (State s f sp pc rs m)
+        (State ts tf sp (branch_target f pc) trs tm)
+  | match_states_call:
+      forall s ts f tf a ta m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (TF: tunnel_fundef f = OK tf)
+      (ARGS: list_forall2 Val.lessdef a ta)
+      (MEM: Mem.extends m tm),
+      match_states
+        (Callstate s f a m)
+        (Callstate ts tf ta tm)
+  | match_states_return:
+      forall s ts v tv m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (VAL: Val.lessdef v tv)
+      (MEM: Mem.extends m tm),
+      match_states
+        (Returnstate s v m)
+        (Returnstate ts tv tm).
+
+Definition measure (st: state): nat :=
+  match st with
+  | State s f sp pc rs m => bound (branch_target f) pc
+  | Callstate s f v m => 0
+  | Returnstate s v m => 0
+  end.
+
+
+Lemma transf_initial_states:
+  forall s1: state, initial_state prog s1 ->
+  exists s2: state, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL].
+  exploit function_ptr_translated.
+  - apply PTR.
+  - intros (tf & TPTR & TUN).
+    exists (Callstate nil tf nil m0). split.
+    + apply initial_state_intro with b.
+      * apply (Genv.init_mem_match TRANSL). apply MEM.
+      * rewrite (match_program_main TRANSL).
+        rewrite symbols_preserved. apply SYM.
+      * apply TPTR.
+      * rewrite <- SIG. apply sig_preserved. apply TUN.
+    + apply match_states_call.
+      * apply list_forall2_nil.
+      * apply TUN.
+      * apply list_forall2_nil.
+      * apply Mem.extends_refl.
+Qed.
+
+Lemma transf_final_states:
+  forall (s1 : state)
+  (s2 : state) (r : Integers.Int.int),
+  match_states s1 s2 ->
+  final_state s1 r ->
+  final_state s2 r.
+Proof.
+  intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro.
+Qed.
+
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf ->
+  (fn_code tf) ! pc =
+  option_map (tunnel_instr (branch_target f)) (fn_code f) ! pc.
+Proof.
+  intros f tf pc.
+  unfold tunnel_function.
+  destruct (check_included _ _) eqn:EQinc; try congruence.
+  destruct (check_code _ _) eqn:EQcode; simpl; try congruence.
+  intro. inv H. simpl. rewrite PTree.gmap1. reflexivity.
+Qed.
+
+Lemma reglist_lessdef:
+  forall (rs trs: Registers.Regmap.t val) (args: list Registers.reg),
+  regs_lessdef rs trs -> Val.lessdef_list (rs##args) (trs##args).
+Proof.
+  intros. induction args; simpl; constructor.
+  apply H. apply IHargs.
+Qed.
+
+Lemma instruction_type_preserved:
+  forall (f tf:function) (pc:node) (i ti:instruction)
+  (TF: tunnel_function f = OK tf)
+  (FATPC: (fn_code f) ! pc = Some i)
+  (NOTINOP: forall s, i <> Inop s)
+  (NOTICOND: forall cond args ifso ifnot info, i <> Icond cond args ifso ifnot info)
+  (TI: ti = tunnel_instr (branch_target f) i),
+  (fn_code tf) ! (branch_target f pc) = Some ti.
+Proof.
+  intros.
+  assert ((fn_code tf) ! pc = Some (tunnel_instr (branch_target f) i)) as TFATPC.
+  rewrite (tunnel_function_unfold f tf pc); eauto.
+  rewrite FATPC; eauto.
+  exploit branch_target_bounds; eauto.
+  intro TB. inversion TB as [BT|s|cond args ifso ifnot info]; try (rewrite FATPC in H; congruence).
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs trs fd,
+  regs_lessdef rs trs ->
+  find_function ge ros rs = Some fd ->
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros trs = Some tfd.
+Proof.
+  intros. destruct ros; simpl in *.
+  - (* reg *)
+    assert (E: trs # r = rs # r).
+    { exploit Genv.find_funct_inv. apply H0. intros (b & EQ).
+      generalize (H r) . rewrite EQ. intro LD. inv LD. auto. }
+    rewrite E. exploit functions_translated; eauto.
+  - (* ident *)
+    rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0.
+    exploit function_ptr_translated; eauto.
+    intros (tf & X1 & X2). exists tf; intuition.
+Qed.
+
+Lemma list_forall2_lessdef_rs:
+  forall rs trs args,
+  regs_lessdef rs trs ->
+  list_forall2 Val.lessdef rs ## args trs ## args.
+Proof.
+  intros rs trs args LD.
+  exploit (reglist_lessdef rs trs args). apply LD.
+  induction args; simpl; intros H; try constructor; inv H.
+  apply H3. apply IHargs. apply H5.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf
+  (TF: tunnel_function f = OK tf),
+  fn_stacksize f = fn_stacksize tf.
+Proof.
+  intros f tf. unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  intro H. monadInv H. simpl. reflexivity.
+Qed.
+
+Lemma regs_setres_lessdef:
+  forall res vres tvres rs trs,
+  regs_lessdef rs trs -> Val.lessdef vres tvres ->
+  regs_lessdef (regmap_setres res vres rs) (regmap_setres res tvres trs).
+Proof.
+  induction res; intros; simpl; try auto using set_reg_lessdef.
+Qed.
+
+Lemma regmap_optget_lessdef:
+  forall or rs trs,
+  regs_lessdef rs trs -> Val.lessdef (regmap_optget or Vundef rs) (regmap_optget or Vundef trs).
+Proof.
+  intros or rs trs RS.
+  induction or; simpl; auto using set_reg_lessdef.
+Qed.
+
+Lemma tunnel_fundef_Internal:
+  forall (f: function) (tf: fundef)
+  (TF: tunnel_fundef (Internal f) = OK tf),
+  exists (tf': function), tf = (Internal tf') /\ tunnel_function f = OK tf'.
+Proof.
+  intros f tf.
+  unfold tunnel_fundef. simpl. intro H. monadInv H. exists x.
+  split. reflexivity. apply EQ.
+Qed.
+
+Lemma tunnel_fundef_External:
+  forall (ef: external_function) (tf: fundef)
+  (TF: tunnel_fundef (External ef) = OK tf),
+  tf = (External ef).
+Proof.
+  intros f tf.
+  unfold tunnel_fundef. simpl. intro H. monadInv H. reflexivity.
+Qed.
+
+
+Lemma fn_entrypoint_preserved:
+  forall f tf
+  (TF: tunnel_function f = OK tf),
+  fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf.
+  unfold tunnel_function. destruct (check_included _ _); try congruence.
+  intro TF. monadInv TF. simpl. reflexivity.
+Qed.
+
+Lemma init_regs_lessdef:
+  forall f tf args targs
+  (ARGS: list_forall2 Val.lessdef args targs)
+  (TF: tunnel_function f = OK tf),
+  regs_lessdef (init_regs args (fn_params f)) (init_regs targs (fn_params tf)).
+Proof.
+  assert (regs_lessdef (Regmap.init Vundef) (Regmap.init Vundef)) as Hundef.
+  { unfold Regmap.init. unfold regs_lessdef. intro. unfold Regmap.get. rewrite PTree.gempty. apply Val.lessdef_undef. }
+
+  intros f tf args targs ARGS.
+
+  unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence.
+  intro TF. monadInv TF. simpl. 
+  (* 
+  induction ARGS.
+  - induction (fn_params f) eqn:EQP; apply Hundef.
+  - induction (fn_params f) eqn:EQP.
+    * simpl. apply Hundef.
+    * simpl. apply set_reg_lessdef. apply H. 
+    *)
+    
+  generalize (fn_params f) as l. induction ARGS; induction l; try (simpl; apply Hundef).
+    simpl. apply set_reg_lessdef; try assumption. apply IHARGS.
+Qed.
+
+Lemma lessdef_forall2_list:
+  forall args ta,
+  list_forall2 Val.lessdef args ta -> Val.lessdef_list args ta.
+Proof.
+  intros args ta H. induction H. apply Val.lessdef_list_nil. apply Val.lessdef_list_cons. apply H. apply IHlist_forall2.
+Qed.
+
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
+Proof.
+  intros st1 t st2 H. induction H; intros; try (inv MS).
+  - (* Inop *)
+    exploit branch_target_bounds. apply TF.
+    rewrite H. intro. inv H0.
+    + (* TB_default *)
+      rewrite TB. left. eexists. split.
+      * apply exec_Inop. rewrite (tunnel_function_unfold f tf pc). rewrite H. simpl. eauto. apply TF.
+      * constructor; try assumption.
+    + (* TB_nop *)
+      simpl. right. repeat split. apply DEC.
+      rewrite EQ. apply match_states_intro; assumption.
+  - (* Iop *)
+    exploit eval_operation_lessdef; try eassumption.
+    apply reglist_lessdef. apply RS.
+    intros (tv & EVAL & LD).
+    left; eexists; split.
+    + eapply exec_Iop with (v:=tv).
+      apply instruction_type_preserved with (Iop op args res pc'); (simpl; auto)||congruence.
+      rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved.
+    + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS.
+  - (* Iload *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef. apply RS.
+    intros (ta & EVAL & LD).
+    exploit Mem.loadv_extends; try eassumption.
+    intros (tv & LOAD & LD').
+    left. eexists. split.
+    + eapply exec_Iload.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply LOAD.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. apply LD'. apply RS.
+  - (* Iload NOTRAP1 *)
+    exploit eval_addressing_lessdef_none; try eassumption.
+    apply reglist_lessdef; apply RS.
+    left. eexists. split.
+    + eapply exec_Iload_notrap1.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. apply Val.lessdef_undef. apply RS.
+  - (* Iload NOTRAP2 *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef; apply RS.
+    intros (ta & EVAL & LD).
+    (* TODO: on peut sans doute factoriser ça *)
+    destruct (Mem.loadv chunk tm ta) eqn:Htload.
+    + left; eexists; split.
+      eapply exec_Iload.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply Htload.
+      * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto.
+    + left; eexists; split.
+      eapply exec_Iload_notrap2.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply Htload.
+      * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto.
+  - (* Lstore *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef; apply RS.
+    intros (ta & EVAL & LD).
+    exploit Mem.storev_extends; try eassumption. apply RS.
+    intros (tm' & STORE & MEM').
+    left. eexists. split.
+    + eapply exec_Istore.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * rewrite STORE. reflexivity.
+    + apply match_states_intro; try eassumption.
+  - (* Icall *)
+    left.
+    exploit find_function_translated; try eassumption.
+    intros (tfd & TFD & FIND).
+    eexists. split.
+    + eapply exec_Icall.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * apply FIND.
+      * apply sig_preserved. apply TFD.
+    + apply match_states_call; try assumption.
+      * apply list_forall2_cons; try assumption. apply match_stackframes_intro; try assumption.
+      * apply list_forall2_lessdef_rs. apply RS.
+  - (* Itailcall *)
+    exploit find_function_translated; try eassumption.
+    intros (tfd & TFD & FIND).
+    exploit Mem.free_parallel_extends; try eassumption.
+    intros (tm' & FREE & MEM').
+    left. eexists. split.
+    + eapply exec_Itailcall.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * apply FIND.
+      * apply sig_preserved. apply TFD.
+      * erewrite <- fn_stacksize_preserved; try eassumption.
+    + apply match_states_call; try assumption.
+      apply list_forall2_lessdef_rs. apply RS.
+  - (* Ibuiltin *)
+    exploit eval_builtin_args_lessdef; try eassumption. apply RS.
+    intros (vl2 & EVAL & LD).
+    exploit external_call_mem_extends; try eassumption.
+    intros (tvres & tm' & EXT & LDRES & MEM' & UNCHGD).
+    left. eexists. split.
+    + eapply exec_Ibuiltin.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * eapply eval_builtin_args_preserved. eapply symbols_preserved. eapply EVAL.
+      * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT.
+    + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption.
+  - (* Icond *)
+    simpl. exploit branch_target_bounds. apply TF. rewrite H. intro. inv H1.
+    + (* TB_default *)
+      rewrite TB.
+      destruct (fn_code tf)!pc as [[]|] eqn:EQ;
+      assert (tunnel_function f = OK tf) as TF'; auto;
+      unfold tunnel_function in TF; destruct (check_included _ _) in TF; monadInv TF;
+      simpl in EQ; rewrite PTree.gmap1 in EQ; rewrite H in EQ; simpl in EQ; destruct (peq _ _) eqn: EQpeq in EQ; try congruence.
+      * left. eexists. split. 
+        -- eapply exec_Inop. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity.
+        -- destruct b. apply match_states_intro; eauto. rewrite e. apply match_states_intro; eauto.
+      * left. eexists. split.
+        -- eapply exec_Icond; auto. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. eapply eval_condition_lessdef; try eassumption. apply reglist_lessdef. apply RS.
+        -- destruct b; apply match_states_intro; auto.
+    + (* TB_cond *) right; repeat split; destruct b; try assumption.
+      * rewrite EQSO. apply match_states_intro; try assumption.
+      * rewrite EQNOT. apply match_states_intro; try assumption.
+  - (* Ijumptable *)
+    left. eexists. split.
+    + eapply exec_Ijumptable.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * generalize (RS arg). rewrite H0. intro. inv H2. reflexivity.
+      * rewrite list_nth_z_map. rewrite H1. simpl. reflexivity.
+    + apply match_states_intro; try eassumption.
+  - (* Ireturn *)
+    exploit Mem.free_parallel_extends; try eassumption.
+    intros (tm' & FREE & MEM').
+    left. eexists. split.
+    + eapply exec_Ireturn.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * erewrite <- fn_stacksize_preserved. eapply FREE. eapply TF.
+    + apply match_states_return; try eassumption.
+      apply regmap_optget_lessdef. apply RS.
+  - (* internal function *)
+    exploit tunnel_fundef_Internal; try eassumption.
+    intros (tf' & EQ & TF'). subst.
+    exploit Mem.alloc_extends; try eassumption. reflexivity. reflexivity.
+    intros (m2' & ALLOC & EXT).
+    left. eexists. split.
+    + eapply exec_function_internal.
+      rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'.
+    + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption.
+      apply init_regs_lessdef. apply ARGS. apply TF'.
+  - (* external function *)
+    exploit external_call_mem_extends. eapply H. eapply MEM.  eapply lessdef_forall2_list. eapply ARGS.
+    intros (tvres & tm' & EXTCALL & LD & EXT & MEMUNCHGD).
+    left. eexists. split.
+    + erewrite (tunnel_fundef_External ef tf); try eassumption.
+      eapply exec_function_external. eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXTCALL.
+    + eapply match_states_return; try assumption.
+  - (* return *)
+    inv STK. inv H1.
+    left. eexists. split.
+    + eapply exec_return.
+    + eapply match_states_intro; try assumption.
+      apply set_reg_lessdef; try assumption.
+Qed.
+
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_opt.
+  apply senv_preserved.
+  apply transf_initial_states.
+  apply transf_final_states.
+  exact tunnel_step_correct.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/RTLcommonaux.ml b/backend/RTLcommonaux.ml
new file mode 100644
index 00000000..2e9dde2e
--- /dev/null
+++ b/backend/RTLcommonaux.ml
@@ -0,0 +1,105 @@
+open RTL
+open Maps
+open Camlcoq
+open Registers
+open Kildall
+open Lattice
+
+let p2i r = P.to_int r
+
+let i2p i = P.of_int i
+
+let get_some = function
+  | None -> failwith "Got None instead of Some _"
+  | Some thing -> thing
+
+let get_ok r = match r with Errors.OK x -> x | _ -> failwith "Did not get OK"
+
+let successors_inst = function
+  | Inop n
+  | Iop (_, _, _, n)
+  | Iload (_, _, _, _, _, n)
+  | Istore (_, _, _, _, n)
+  | Icall (_, _, _, _, n)
+  | Ibuiltin (_, _, _, n) ->
+      [ n ]
+  | Icond (_, _, n1, n2, _) -> [ n1; n2 ]
+  | Ijumptable (_, l) -> l
+  | Itailcall _ | Ireturn _ -> []
+
+let predicted_successor = function
+  | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n)
+    ->
+      Some n
+  | Icall (_, _, _, _, n) | Ibuiltin (_, _, _, n) -> None
+  | Icond (_, _, n1, n2, p) -> (
+      match p with Some true -> Some n1 | Some false -> Some n2 | None -> None)
+  | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+
+let non_predicted_successors i = function
+  | None -> successors_inst i
+  | Some ps -> List.filter (fun s -> s != ps) (successors_inst i)
+
+(* adapted from Linearizeaux.get_join_points *)
+let get_join_points code entry =
+  let reached = ref (PTree.map (fun n i -> false) code) in
+  let reached_twice = ref (PTree.map (fun n i -> false) code) in
+  let rec traverse pc =
+    if get_some @@ PTree.get pc !reached then (
+      if not (get_some @@ PTree.get pc !reached_twice) then
+        reached_twice := PTree.set pc true !reached_twice)
+    else (
+      reached := PTree.set pc true !reached;
+      traverse_succs (successors_inst @@ get_some @@ PTree.get pc code))
+  and traverse_succs = function
+    | [] -> ()
+    | [ pc ] -> traverse pc
+    | pc :: l ->
+        traverse pc;
+        traverse_succs l
+  in
+  traverse entry;
+  !reached_twice
+
+let transfer f pc after =
+  let open Liveness in
+  match PTree.get pc f.fn_code with
+  | Some i -> (
+      match i with
+      | Inop _ -> after
+      | Iop (_, args, res, _) -> reg_list_live args (Regset.remove res after)
+      | Iload (_, _, _, args, dst, _) ->
+          reg_list_live args (Regset.remove dst after)
+      | Istore (_, _, args, src, _) -> reg_list_live args (Regset.add src after)
+      | Icall (_, ros, args, res, _) ->
+          reg_list_live args (reg_sum_live ros (Regset.remove res after))
+      | Itailcall (_, ros, args) ->
+          reg_list_live args (reg_sum_live ros Regset.empty)
+      | Ibuiltin (_, args, res, _) ->
+          reg_list_live
+            (AST.params_of_builtin_args args)
+            (reg_list_dead (AST.params_of_builtin_res res) after)
+      | Icond (_, args, _, _, _) -> reg_list_live args after
+      | Ijumptable (arg, _) -> Regset.add arg after
+      | Ireturn optarg -> reg_option_live optarg Regset.empty)
+  | None -> Regset.empty
+
+module RegsetLat = LFSet (Regset)
+module DS = Backward_Dataflow_Solver (RegsetLat) (NodeSetBackward)
+
+let analyze f =
+  let liveouts =
+    get_some @@ DS.fixpoint f.fn_code successors_instr (transfer f)
+  in
+  PTree.map
+    (fun n _ ->
+      let lo = PMap.get n liveouts in
+      transfer f n lo)
+    f.fn_code
+
+let get_outputs liveness n last =
+  let path_last_successors = successors_inst last in
+  let list_input_regs =
+    List.map (fun n -> get_some @@ PTree.get n liveness) path_last_successors
+  in
+  List.fold_left Regset.union Regset.empty list_input_regs
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index f7280c9e..243d7b7c 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -435,7 +435,7 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node)
       transl_exprlist map al rl no
   | Eload chunk addr al =>
       do rl <- alloc_regs map al;
-      do no <- add_instr (Iload chunk addr rl rd nd);
+      do no <- add_instr (Iload TRAP chunk addr rl rd nd);
          transl_exprlist map al rl no
   | Econdition a b c =>
       do nfalse <- transl_expr map c rd nd;
@@ -477,9 +477,9 @@ with transl_exprlist (map: mapping) (al: exprlist) (rl: list reg) (nd: node)
 with transl_condexpr (map: mapping) (a: condexpr) (ntrue nfalse: node)
                      {struct a} : mon node :=
   match a with
-  | CEcond c al =>
+  | CEcond c expected al =>
       do rl <- alloc_regs map al;
-      do nt <- add_instr (Icond c rl ntrue nfalse);
+      do nt <- add_instr (Icond c rl ntrue nfalse expected);
          transl_exprlist map al rl nt
   | CEcondition a b c =>
       do nc <- transl_condexpr map c ntrue nfalse;
diff --git a/backend/RTLgenaux.ml b/backend/RTLgenaux.ml
index e39d3b56..26688e23 100644
--- a/backend/RTLgenaux.ml
+++ b/backend/RTLgenaux.ml
@@ -41,7 +41,7 @@ and size_exprs = function
   | Econs(e1, el) -> size_expr e1 + size_exprs el
 
 and size_condexpr = function
-  | CEcond(c, args) -> size_exprs args
+  | CEcond(c, expected, args) -> size_exprs args
   | CEcondition(c1, c2, c3) ->
       1 + size_condexpr c1 + size_condexpr c2 + size_condexpr c3
   | CElet(a, c) ->
diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v
index 1602823f..d07dc968 100644
--- a/backend/RTLgenproof.v
+++ b/backend/RTLgenproof.v
@@ -799,11 +799,11 @@ Proof.
 Qed.
 
 Lemma transl_condexpr_CEcond_correct:
-  forall le cond al vl vb,
+  forall le cond expected al vl vb,
   eval_exprlist ge sp e m le al vl ->
   transl_exprlist_prop le al vl ->
   eval_condition cond vl m = Some vb ->
-  transl_condexpr_prop le (CEcond cond al) vb.
+  transl_condexpr_prop le (CEcond cond expected al) vb.
 Proof.
   intros; red; intros. inv TE.
   exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]].
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 25f9954c..0210aa5b 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -707,7 +707,7 @@ Inductive tr_expr (c: code):
       tr_expr c map pr (Eop op al) ns nd rd dst
   | tr_Eload: forall map pr chunk addr al ns nd rd n1 rl dst,
       tr_exprlist c map pr al ns n1 rl ->
-      c!n1 = Some (Iload chunk addr rl rd nd) ->
+      c!n1 = Some (Iload TRAP chunk addr rl rd nd) ->
       reg_map_ok map rd dst -> ~In rd pr ->
       tr_expr c map pr (Eload chunk addr al) ns nd rd dst
   | tr_Econdition: forall map pr a ifso ifnot ns nd rd ntrue nfalse dst,
@@ -744,10 +744,10 @@ Inductive tr_expr (c: code):
 
 with tr_condition (c: code):
        mapping -> list reg -> condexpr -> node -> node -> node -> Prop :=
-  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl,
+  | tr_CEcond: forall map pr cond expected bl ns ntrue nfalse n1 rl i,
       tr_exprlist c map pr bl ns n1 rl ->
-      c!n1 = Some (Icond cond rl ntrue nfalse) ->
-      tr_condition c map pr (CEcond cond bl) ns ntrue nfalse
+      c!n1 = Some (Icond cond rl ntrue nfalse i) ->
+      tr_condition c map pr (CEcond cond expected bl) ns ntrue nfalse
   | tr_CEcondition: forall map pr a1 a2 a3 ns ntrue nfalse n2 n3,
       tr_condition c map pr a1 ns n2 n3 ->
       tr_condition c map pr a2 n2 ntrue nfalse ->
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 5b8646ea..6048f895 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -104,11 +104,11 @@ Inductive wt_instr : instruction -> Prop :=
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
-      forall chunk addr args dst s,
+      forall trap chunk addr args dst s,
       map env args = type_of_addressing addr ->
       env dst = type_of_chunk chunk ->
       valid_successor s ->
-      wt_instr (Iload chunk addr args dst s)
+      wt_instr (Iload trap chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
       map env args = type_of_addressing addr ->
@@ -139,11 +139,11 @@ Inductive wt_instr : instruction -> Prop :=
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
-      forall cond args s1 s2,
+      forall cond args s1 s2 i,
       map env args = type_of_condition cond ->
       valid_successor s1 ->
       valid_successor s2 ->
-      wt_instr (Icond cond args s1 s2)
+      wt_instr (Icond cond args s1 s2 i)
   | wt_Ijumptable:
       forall arg tbl,
       env arg = Tint ->
@@ -283,7 +283,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
       else
        (let (targs, tres) := type_of_operation op in
         do e1 <- S.set_list e args targs; S.set e1 res tres)
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
       S.set e1 dst (type_of_chunk chunk)
@@ -313,7 +313,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
         | _ => type_builtin_args e args sig.(sig_args)
         end;
       type_builtin_res e1 res (proj_sig_res sig)
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 _ =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
       S.set_list e args (type_of_condition cond)
@@ -844,14 +844,24 @@ Proof.
 Qed.
 
 Lemma wt_exec_Iload:
-  forall env f chunk addr args dst s m a v rs,
-  wt_instr f env (Iload chunk addr args dst s) ->
+  forall env f trap chunk addr args dst s m a v rs,
+  wt_instr f env (Iload trap chunk addr args dst s) ->
   Mem.loadv chunk m a = Some v ->
   wt_regset env rs ->
   wt_regset env (rs#dst <- v).
 Proof.
   intros. destruct a; simpl in H0; try discriminate. inv H.
-  eapply wt_regset_assign; eauto. rewrite H8; eapply Mem.load_type; eauto.
+  eapply wt_regset_assign; eauto. rewrite H9; eapply Mem.load_type; eauto.
+Qed.
+
+Lemma wt_exec_Iload_notrap:
+  forall env f chunk addr args dst s rs,
+  wt_instr f env (Iload NOTRAP chunk addr args dst s) ->
+  wt_regset env rs ->
+  wt_regset env (rs#dst <- Vundef).
+Proof.
+  intros. 
+  eapply wt_regset_assign; eauto. simpl. trivial.
 Qed.
 
 Lemma wt_exec_Ibuiltin:
@@ -933,6 +943,10 @@ Proof.
   econstructor; eauto. eapply wt_exec_Iop; eauto.
   (* Iload *)
   econstructor; eauto. eapply wt_exec_Iload; eauto.
+  (* Iload notrap1*)
+  econstructor; eauto. eapply wt_exec_Iload_notrap; eauto.
+  (* Iload notrap2*)
+  econstructor; eauto. eapply wt_exec_Iload_notrap; eauto.
   (* Istore *)
   econstructor; eauto.
   (* Icall *)
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index 19aba4f6..ffe26933 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -249,18 +249,18 @@ let block_of_RTL_instr funsig tyenv = function
             else
               let t = new_temp (tyenv res) in (t :: args2', t) in
       movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s])
-  | RTL.Iload(chunk, addr, args, dst, s) ->
+  | RTL.Iload(trap, chunk, addr, args, dst, s) ->
       if Archi.splitlong && chunk = Mint64 then begin
         match offset_addressing addr (coqint_of_camlint 4l) with
         | None -> assert false
         | Some addr' ->
-            [Xload(Mint32, addr, vregs tyenv args,
+            [Xload(trap, Mint32, addr, vregs tyenv args,
                    V((if Archi.big_endian then dst else twin_reg dst), Tint));
-             Xload(Mint32, addr', vregs tyenv args,
+             Xload(trap, Mint32, addr', vregs tyenv args,
                    V((if Archi.big_endian then twin_reg dst else dst), Tint));
              Xbranch s]
       end else
-        [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
+        [Xload(trap, chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
   | RTL.Istore(chunk, addr, args, src, s) ->
       if Archi.splitlong && chunk = Mint64 then begin
         match offset_addressing addr (coqint_of_camlint 4l) with
@@ -295,8 +295,8 @@ let block_of_RTL_instr funsig tyenv = function
          (Xbuiltin(ef, args2, res2) ::
             movelist (params_of_builtin_res res2) (params_of_builtin_res res1)
                [Xbranch s])
-  | RTL.Icond(cond, args, s1, s2) ->
-      [Xcond(cond, vregs tyenv args, s1, s2)]
+  | RTL.Icond(cond, args, s1, s2, i) ->
+      [Xcond(cond, vregs tyenv args, s1, s2, i)]
   | RTL.Ijumptable(arg, tbl) ->
       [Xjumptable(vreg tyenv arg, tbl)]
   | RTL.Ireturn None ->
@@ -364,7 +364,7 @@ let live_before instr after =
       if VSet.mem res after
       then vset_addlist args (VSet.remove res after)
       else after
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       if VSet.mem dst after
       then vset_addlist args (VSet.remove dst after)
       else after
@@ -380,7 +380,7 @@ let live_before instr after =
       vset_addargs args (vset_removeres res after)
   | Xbranch s ->
       after
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       List.fold_right VSet.add args after
   | Xjumptable(arg, tbl) ->
       VSet.add arg after
@@ -459,7 +459,7 @@ let dce_instr instr after k =
       if VSet.mem res after
       then instr :: k
       else k
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       if VSet.mem dst after
       then instr :: k
       else k
@@ -550,7 +550,7 @@ let spill_costs f =
         (* temps must not be spilled *)
     | Xop(op, args, res) ->
         charge_list 10 1 args; charge 10 1 res
-    | Xload(chunk, addr, args, dst) ->
+    | Xload(trap, chunk, addr, args, dst) ->
         charge_list 10 1 args; charge 10 1 dst
     | Xstore(chunk, addr, args, src) ->
         charge_list 10 1 args; charge 10 1 src
@@ -575,7 +575,7 @@ let spill_costs f =
             charge_list 10 1 (params_of_builtin_res res)
         end
     | Xbranch _ -> ()
-    | Xcond(cond, args, _, _) ->
+    | Xcond(cond, args, _, _, _) ->
         charge_list 10 1 args
     | Xjumptable(arg, _) ->
         charge 10 1 arg
@@ -677,7 +677,7 @@ let add_interfs_instr g instr live =
             (vset_addlist (res :: argl) (VSet.remove res live))
       end;
       add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op)
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       add_interfs_def g dst live;
       add_interfs_destroyed g (VSet.remove dst live)
                               (destroyed_by_load chunk addr)
@@ -718,7 +718,7 @@ let add_interfs_instr g instr live =
       end
   | Xbranch s ->
       ()
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       add_interfs_destroyed g live (destroyed_by_cond cond)
   | Xjumptable(arg, tbl) ->
       add_interfs_destroyed g live destroyed_by_jumptable
@@ -782,7 +782,7 @@ let tospill_instr alloc instr ts =
       ts
   | Xop(op, args, res) ->
       addlist_tospill alloc args (add_tospill alloc res ts)
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       addlist_tospill alloc args (add_tospill alloc dst ts)
   | Xstore(chunk, addr, args, src) ->
       addlist_tospill alloc args (add_tospill alloc src ts)
@@ -797,7 +797,7 @@ let tospill_instr alloc instr ts =
          (addlist_tospill alloc (params_of_builtin_res res) ts)
   | Xbranch s ->
       ts
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       addlist_tospill alloc args ts
   | Xjumptable(arg, tbl) ->
       add_tospill alloc arg ts
@@ -964,10 +964,10 @@ let spill_instr tospill eqs instr =
                add res tmp (kill tmp (kill res eqs2)))
           end
       end
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       let (args', c1, eqs1) = reload_vars tospill eqs args in
       let (dst', c2, eqs2) = save_var tospill eqs1 dst in
-      (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2)
+      (c1 @ Xload(trap, chunk, addr, args', dst') :: c2, eqs2)
   | Xstore(chunk, addr, args, src) ->
       let (args', c1, eqs1) = reload_vars tospill eqs args in
       let (src', c2, eqs2) = reload_var tospill eqs1 src in
@@ -990,9 +990,9 @@ let spill_instr tospill eqs instr =
       (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
   | Xbranch s ->
       ([instr], eqs)
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, i) ->
      let (args', c1, eqs1) = reload_vars tospill eqs args in
-     (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
+     (c1 @ [Xcond(cond, args', s1, s2, i)], eqs1)
   | Xjumptable(arg, tbl) ->
       let (arg', c1, eqs1) = reload_var tospill eqs arg in
       (c1 @ [Xjumptable(arg', tbl)], eqs1)
@@ -1067,7 +1067,7 @@ let make_parmove srcs dsts itmp ftmp k =
     | Locations.S(sl, ofs, ty), R rd ->
         code := LTL.Lgetstack(sl, ofs, ty, rd) :: !code
     | Locations.S(sls, ofss, tys), Locations.S(sld, ofsd, tyd) ->
-        let tmp = temp_for (class_of_type tys) in
+        let tmp = temp_for (Machregsaux.class_of_type tys) in
         (* code will be reversed at the end *)
         code := LTL.Lsetstack(tmp, sld, ofsd, tyd) ::
                 LTL.Lgetstack(sls, ofss, tys, tmp) :: !code
@@ -1115,8 +1115,8 @@ let transl_instr alloc instr k =
             LTL.Lop(Omove, [rarg1], rres) ::
             LTL.Lop(op, rres :: rargl, rres) :: k
       end
-  | Xload(chunk, addr, args, dst) ->
-      LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
+  | Xload(trap, chunk, addr, args, dst) ->
+      LTL.Lload(trap, chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
   | Xstore(chunk, addr, args, src) ->
       LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k
   | Xcall(sg, vos, args, res) ->
@@ -1128,8 +1128,8 @@ let transl_instr alloc instr k =
                        AST.map_builtin_res (mreg_of alloc) res) :: k
   | Xbranch s ->
       LTL.Lbranch s :: []
-  | Xcond(cond, args, s1, s2) ->
-      LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
+  | Xcond(cond, args, s1, s2, i) ->
+      LTL.Lcond(cond, mregs_of alloc args, s1, s2, i) :: []
   | Xjumptable(arg, tbl) ->
       LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
   | Xreturn optarg ->
diff --git a/backend/Renumber.v b/backend/Renumber.v
index 10f58251..2727b979 100644
--- a/backend/Renumber.v
+++ b/backend/Renumber.v
@@ -43,12 +43,12 @@ Definition renum_instr (i: instruction) : instruction :=
   match i with
   | Inop s => Inop (renum_pc s)
   | Iop op args res s => Iop op args res (renum_pc s)
-  | Iload chunk addr args res s => Iload chunk addr args res (renum_pc s)
+  | Iload trap chunk addr args res s => Iload trap chunk addr args res (renum_pc s)
   | Istore chunk addr args src s => Istore chunk addr args src (renum_pc s)
   | Icall sg ros args res s => Icall sg ros args res (renum_pc s)
   | Itailcall sg ros args => i
   | Ibuiltin ef args res s => Ibuiltin ef args res (renum_pc s)
-  | Icond cond args s1 s2 => Icond cond args (renum_pc s1) (renum_pc s2)
+  | Icond cond args s1 s2 info => Icond cond args (renum_pc s1) (renum_pc s2) info
   | Ijumptable arg tbl => Ijumptable arg (List.map renum_pc tbl)
   | Ireturn or => i
   end.
diff --git a/backend/Renumberproof.v b/backend/Renumberproof.v
index 7cda9425..2e161965 100644
--- a/backend/Renumberproof.v
+++ b/backend/Renumberproof.v
@@ -175,6 +175,18 @@ Proof.
   rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
   eapply exec_Iload; eauto.
   constructor; auto. eapply reach_succ; eauto. simpl; auto.
+  (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto. eapply reach_succ; eauto. simpl; auto.
+  (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. eapply reach_succ; eauto. simpl; auto.
 (* store *)
   econstructor; split.
   assert (eval_addressing tge sp addr rs ## args = Some a).
diff --git a/backend/SelectDiv.vp b/backend/SelectDiv.vp
index d91797c5..9f852616 100644
--- a/backend/SelectDiv.vp
+++ b/backend/SelectDiv.vp
@@ -15,10 +15,13 @@
 Require Import Coqlib.
 Require Import Compopts.
 Require Import AST Integers Floats.
-Require Import Op CminorSel SelectOp SplitLong SelectLong.
+Require Import Op CminorSel OpHelpers SelectOp SplitLong SelectLong.
 
 Local Open Scope cminorsel_scope.
 
+Section SELECT.
+Context {hf: helper_functions}.
+
 Definition is_intconst (e: expr) : option int :=
   match e with
   | Eop (Ointconst n) _ => Some n
@@ -221,10 +224,6 @@ Definition mods (e1: expr) (e2: expr) :=
 
 (** 64-bit integer divisions *)
 
-Section SELECT.
-
-Context {hf: helper_functions}.
-
 Definition modl_from_divl (equo: expr) (n: int64) :=
   subl (Eletvar O) (mullimm n equo).
 
@@ -241,8 +240,8 @@ Definition divlu (e1 e2: expr) :=
                     divlu_base e1 e2
                   else
                     match divlu_mul_params (Int64.unsigned n2) with
-                    | None => divlu_base e1 e2
-                    | Some(p, m) => Elet e1 (divlu_mull p m)
+                    | _ => divlu_base e1 e2
+                    (* | Some(p, m) => Elet e1 (divlu_mull p m) *) (* FIXME - hack K1 *)
                     end
       end
   | _, _ => divlu_base e1 e2
@@ -258,8 +257,8 @@ Definition modlu (e1 e2: expr) :=
                     modlu_base e1 e2
                   else
                     match divlu_mul_params (Int64.unsigned n2) with
-                    | None => modlu_base e1 e2
-                    | Some(p, m) => Elet e1 (modl_from_divl (divlu_mull p m) n2)
+                    | _ => modlu_base e1 e2
+                    (* | Some(p, m) => Elet e1 (modl_from_divl (divlu_mull p m) n2) *) (* FIXME - hack K1 *)
                     end
       end
   | _, _ => modlu_base e1 e2
@@ -285,8 +284,8 @@ Definition divls (e1 e2: expr) :=
                     divls_base e1 e2
                   else
                     match divls_mul_params (Int64.signed n2) with
-                    | None => divls_base e1 e2
-                    | Some(p, m) => Elet e1 (divls_mull p m)
+                    | _ => divls_base e1 e2
+                    (* | Some(p, m) => Elet e1 (divls_mull p m) *) (* FIXME - hack K1 *)
                     end
       end
   | _, _ => divls_base e1 e2
@@ -304,40 +303,38 @@ Definition modls (e1 e2: expr) :=
                     modls_base e1 e2
                   else
                     match divls_mul_params (Int64.signed n2) with
-                    | None => modls_base e1 e2
-                    | Some(p, m) => Elet e1 (modl_from_divl (divls_mull p m) n2)
+                    | _ => modls_base e1 e2
+                    (* | Some(p, m) => Elet e1 (modl_from_divl (divls_mull p m) n2) *) (* FIXME - hack K1 *)
                     end
       end
   | _, _ => modls_base e1 e2
   end.
-
-End SELECT.
-
+  
 (** Floating-point division by a constant can also be turned into a FP
     multiplication by the inverse constant, but only for powers of 2. *)
 
 Definition divfimm (e: expr) (n: float) :=
   match Float.exact_inverse n with
   | Some n' => Eop Omulf (e ::: Eop (Ofloatconst n') Enil ::: Enil)
-  | None    => Eop Odivf (e ::: Eop (Ofloatconst n) Enil ::: Enil)
+  | None    => divf_base e (Eop (Ofloatconst n) Enil)
   end.
 
 Nondetfunction divf (e1: expr) (e2: expr) :=
   match e2 with
   | Eop (Ofloatconst n2) Enil => divfimm e1 n2
-  | _ => Eop Odivf (e1 ::: e2 ::: Enil)
+  | _ => divf_base e1 e2
   end.
 
 Definition divfsimm (e: expr) (n: float32) :=
   match Float32.exact_inverse n with
   | Some n' => Eop Omulfs (e ::: Eop (Osingleconst n') Enil ::: Enil)
-  | None    => Eop Odivfs (e ::: Eop (Osingleconst n) Enil ::: Enil)
+  | None    => divfs_base e (Eop (Osingleconst n) Enil)
   end.
 
 Nondetfunction divfs (e1: expr) (e2: expr) :=
   match e2 with
   | Eop (Osingleconst n2) Enil => divfsimm e1 n2
-  | _ => Eop Odivfs (e1 ::: e2 ::: Enil)
+  | _ => divfs_base e1 e2
   end.
 
-
+End SELECT.
+\ No newline at end of file
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index 9d581ec9..3f91b1ba 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -15,6 +15,7 @@
 Require Import Zquot Coqlib Zbits.
 Require Import AST Integers Floats Values Memory Globalenvs Events.
 Require Import Cminor Op CminorSel.
+Require Import OpHelpers OpHelpersproof.
 Require Import SelectOp SelectOpproof SplitLong SplitLongproof SelectLong SelectLongproof SelectDiv.
 
 Local Open Scope cminorsel_scope.
@@ -587,12 +588,12 @@ Proof.
 - destruct (Compopts.optim_for_size tt).
   + eapply eval_modu_base; eauto. EvalOp.
   + destruct (divu_mul_params (Int.unsigned n2)) as [[p M] | ] eqn:PARAMS.
-    * econstructor; split.
+     * econstructor; split.
       econstructor; eauto. eapply eval_mod_from_div.
       eapply eval_divu_mul; eauto. simpl; eauto. simpl; eauto.
       rewrite Int.modu_divu. auto.
       red; intros; subst n2; discriminate.
-    * eapply eval_modu_base; eauto. EvalOp.
+     * eapply eval_modu_base; eauto. EvalOp.
 Qed.
 
 Theorem eval_modu:
@@ -704,7 +705,7 @@ Proof.
             || Int.eq i (Int.repr Int.min_signed) && Int.eq n2 Int.mone) eqn:Z2; inv DIV.
   destruct (Int.is_power2 n2) as [l | ] eqn:P2.
 - destruct (Int.ltu l (Int.repr 31)) eqn:LT31.
-  + exploit (eval_shrximm ge sp e m (Vint i :: le) (Eletvar O)).
+  + exploit (eval_shrximm prog sp e m (Vint i :: le) (Eletvar O)).
     constructor. simpl; eauto. eapply Val.divs_pow2; eauto.
     intros [v1 [X LD]]. inv LD.
     econstructor; split. econstructor. eauto.
@@ -788,10 +789,10 @@ Proof.
 + destruct (Int64.is_power2' n2) as [l|] eqn:POW.
 * exploit Val.divlu_pow2; eauto. intros EQ; subst z. apply eval_shrluimm; auto.
 * destruct (Compopts.optim_for_size tt). eapply eval_divlu_base; eauto.
-  destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS.
+  (* destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero); inv H1.
-   econstructor; split; eauto. econstructor. eauto. eapply eval_divlu_mull; eauto.
+   econstructor; split; eauto. econstructor. eauto. eapply eval_divlu_mull; eauto. *) (* FIXME - K1 hack *)
 ** eapply eval_divlu_base; eauto.
 - eapply eval_divlu_base; eauto.
 Qed.
@@ -813,14 +814,14 @@ Proof.
 + destruct (Int64.is_power2 n2) as [l|] eqn:POW.
 * exploit Val.modlu_pow2; eauto. intros EQ; subst z. eapply eval_andl; eauto. apply eval_longconst.
 * destruct (Compopts.optim_for_size tt). eapply eval_modlu_base; eauto.
-  destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS.
+  (* destruct (divlu_mul_params (Int64.unsigned n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero) eqn:Z; inv H1.
    rewrite Int64.modu_divu.
     econstructor; split; eauto. econstructor. eauto.
     eapply eval_modl_from_divl; eauto.
     eapply eval_divlu_mull; eauto.
-    red; intros; subst n2; discriminate Z.
+    red; intros; subst n2; discriminate Z. *)
 ** eapply eval_modlu_base; eauto.
 - eapply eval_modlu_base; eauto.
 Qed.
@@ -883,12 +884,12 @@ Proof.
 ** exploit Val.divls_pow2; eauto. intros EQ. eapply eval_shrxlimm; eauto.
 ** eapply eval_divls_base; eauto.
 * destruct (Compopts.optim_for_size tt). eapply eval_divls_base; eauto.
-  destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS.
+  (* destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero
              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); inv H1.
    econstructor; split; eauto. econstructor. eauto.
-   eapply eval_divls_mull; eauto.
+   eapply eval_divls_mull; eauto. *)
 ** eapply eval_divls_base; eauto.
 - eapply eval_divls_base; eauto.
 Qed.
@@ -925,14 +926,14 @@ Proof.
   rewrite Int64.mods_divs. auto.
 **eapply eval_modls_base; eauto.
 * destruct (Compopts.optim_for_size tt). eapply eval_modls_base; eauto.
-  destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS.
+  (* destruct (divls_mul_params (Int64.signed n2)) as [[p M]|] eqn:PARAMS.
 ** destruct x; simpl in H1; try discriminate.
    destruct (Int64.eq n2 Int64.zero
              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); inv H1.
    econstructor; split; eauto. econstructor. eauto.
    rewrite Int64.mods_divs.
    eapply eval_modl_from_divl; auto.
-   eapply eval_divls_mull; eauto.
+   eapply eval_divls_mull; eauto. *)
 ** eapply eval_modls_base; eauto.
 - eapply eval_modls_base; eauto.
 Qed.
@@ -950,8 +951,8 @@ Proof.
   + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
     repeat (econstructor; eauto). 
     destruct x; simpl; auto. erewrite Float.div_mul_inverse; eauto.
-  + TrivialExists.
-- TrivialExists.
+  + apply eval_divf_base; trivial.
+- apply eval_divf_base; trivial.
 Qed.
 
 Theorem eval_divfs:
@@ -965,8 +966,8 @@ Proof.
   + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
     repeat (econstructor; eauto). 
     destruct x; simpl; auto. erewrite Float32.div_mul_inverse; eauto.
-  + TrivialExists.
-- TrivialExists.
+  + apply eval_divfs_base; trivial.
+- apply eval_divfs_base; trivial.
 Qed.
 
 End CMCONSTRS.
diff --git a/backend/Selection.v b/backend/Selection.v
index a5bef9ae..8667922f 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -26,7 +26,7 @@ Require String.
 Require Import Coqlib Maps.
 Require Import AST Errors Integers Globalenvs Builtins Switch.
 Require Cminor.
-Require Import Op CminorSel Cminortyping.
+Require Import Op CminorSel OpHelpers Cminortyping.
 Require Import SelectOp SplitLong SelectLong SelectDiv.
 Require Machregs.
 
@@ -35,12 +35,13 @@ Local Open Scope error_monad_scope.
 
 (** Conversion of conditions *)
 
-Function condexpr_of_expr (e: expr) : condexpr :=
+Function condexpr_of_expr (e: expr) (expected : option bool) : condexpr :=
   match e with
-  | Eop (Ocmp c) el => CEcond c el
-  | Econdition a b c => CEcondition a (condexpr_of_expr b) (condexpr_of_expr c)
-  | Elet a b => CElet a (condexpr_of_expr b)
-  | _ => CEcond (Ccompuimm Cne Int.zero) (e ::: Enil)
+  | Eop (Ocmp c) el => CEcond c expected el
+  | Econdition a b c => CEcondition a (condexpr_of_expr b expected)
+                                      (condexpr_of_expr c expected)
+  | Elet a b => CElet a (condexpr_of_expr b expected)
+  | _ => CEcond (Ccompuimm Cne Int.zero) expected (e ::: Enil)
   end.
 
 Function condition_of_expr (e: expr) : condition * exprlist :=
@@ -120,6 +121,7 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
 
 Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   match op with
+  | Cminor.Oexpect ty => arg1
   | Cminor.Oadd => add arg1 arg2
   | Cminor.Osub => sub arg1 arg2
   | Cminor.Omul => mul arg1 arg2
@@ -166,7 +168,7 @@ Definition sel_select (ty: typ) (cnd ifso ifnot: expr) : expr :=
    let (cond, args) := condition_of_expr cnd in
    match SelectOp.select ty cond args ifso ifnot with
    | Some a => a
-   | None => Econdition (condexpr_of_expr cnd) ifso ifnot
+   | None => Econdition (condexpr_of_expr cnd None) ifso ifnot
    end.
 
 (** Conversion from Cminor expression to Cminorsel expressions *)
@@ -243,7 +245,8 @@ Definition sel_builtin_res (optid: option ident) : builtin_res ident :=
 Function sel_known_builtin (bf: builtin_function) (args: exprlist) :=
   match bf, args with
   | BI_platform b, _ =>
-      SelectOp.platform_builtin b args
+    SelectOp.platform_builtin b args
+(*  | BI_standard BI_expect, a1 ::: a2 ::: Enil => Some a1 *)
   | BI_standard (BI_select ty), a1 ::: a2 ::: a3 ::: Enil =>
       Some (sel_select ty a1 a2 a3)
   | BI_standard BI_fabs, a1 ::: Enil =>
@@ -302,16 +305,16 @@ Fixpoint sel_switch (arg: nat) (t: comptree): exitexpr :=
   | CTaction act =>
       XEexit act
   | CTifeq key act t' =>
-      XEcondition (condexpr_of_expr (make_cmp_eq (Eletvar arg) key))
+      XEcondition (condexpr_of_expr (make_cmp_eq (Eletvar arg) key) None)
                   (XEexit act)
                   (sel_switch arg t')
   | CTiflt key t1 t2 =>
-      XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar arg) key))
+      XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar arg) key) None)
                   (sel_switch arg t1)
                   (sel_switch arg t2)
   | CTjumptable ofs sz tbl t' =>
       XElet (make_sub (Eletvar arg) ofs)
-        (XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar O) sz))
+        (XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar O) sz) None)
                      (XEjumptable (make_to_int (Eletvar O)) tbl)
                      (sel_switch (S arg) t'))
   end.
@@ -386,6 +389,22 @@ Definition if_conversion
   | _, _ => None
   end.
 
+Definition extract_expect1 (e : Cminor.expr) : option bool :=
+  match e with
+  | Cminor.Ebinop (Cminor.Oexpect ty) e1 (Cminor.Econst (Cminor.Ointconst c)) =>
+    Some (if Int.eq_dec c Int.zero then false else true)
+  | Cminor.Ebinop (Cminor.Oexpect ty) e1 (Cminor.Econst (Cminor.Olongconst c)) =>
+    Some (if Int64.eq_dec c Int64.zero then false else true)
+  | _ => None
+  end.
+
+Definition extract_expect (e : Cminor.expr) : option bool :=
+  match e with
+  | Cminor.Ebinop (Cminor.Ocmpu Cne) e1 (Cminor.Econst (Cminor.Ointconst c)) =>
+    if Int.eq_dec c Int.zero then extract_expect1 e1 else None
+  | _ => extract_expect1 e
+  end.
+
 (** Conversion from Cminor statements to Cminorsel statements. *)
 
 Fixpoint sel_stmt (ki: known_idents) (env: typenv) (s: Cminor.stmt) : res stmt :=
@@ -413,8 +432,10 @@ Fixpoint sel_stmt (ki: known_idents) (env: typenv) (s: Cminor.stmt) : res stmt :
       match if_conversion ki env e ifso ifnot with
       | Some s => OK s
       | None =>
-          do ifso' <- sel_stmt ki env ifso; do ifnot' <- sel_stmt ki env ifnot;
-          OK (Sifthenelse (condexpr_of_expr (sel_expr e)) ifso' ifnot')
+        do ifso' <- sel_stmt ki env ifso;
+        do ifnot' <- sel_stmt ki env ifnot;
+        OK (Sifthenelse (condexpr_of_expr (sel_expr e)
+                          (extract_expect e)) ifso' ifnot')
       end
   | Cminor.Sloop body =>
       do body' <- sel_stmt ki env body; OK (Sloop body')
@@ -518,11 +539,19 @@ Definition get_helpers (defmap: PTree.t globdef) : res helper_functions :=
   do i64_sar <- lookup_helper globs "__compcert_i64_sar" sig_li_l ;
   do i64_umulh <- lookup_helper globs "__compcert_i64_umulh" sig_ll_l ;
   do i64_smulh <- lookup_helper globs "__compcert_i64_smulh" sig_ll_l ;
+  do i32_sdiv <- lookup_helper globs "__compcert_i32_sdiv" sig_ii_i ;
+  do i32_udiv <- lookup_helper globs "__compcert_i32_udiv" sig_ii_i ;
+  do i32_smod <- lookup_helper globs "__compcert_i32_smod" sig_ii_i ;
+  do i32_umod <- lookup_helper globs "__compcert_i32_umod" sig_ii_i ;
+  do f64_div <- lookup_helper globs "__compcert_f64_div" sig_ff_f ;
+  do f32_div <- lookup_helper globs "__compcert_f32_div" sig_ss_s ;
   OK (mk_helper_functions
      i64_dtos i64_dtou i64_stod i64_utod i64_stof i64_utof
      i64_sdiv i64_udiv i64_smod i64_umod
      i64_shl i64_shr i64_sar
-     i64_umulh i64_smulh).
+     i64_umulh i64_smulh
+     i32_sdiv i32_udiv i32_smod i32_umod
+     f64_div f32_div).
 
 (** Conversion of programs. *)
 
diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml
index 8acae8f2..fcf8e238 100644
--- a/backend/Selectionaux.ml
+++ b/backend/Selectionaux.ml
@@ -39,6 +39,7 @@ let cost_unop = function
   | Osingleoflong | Osingleoflongu -> assert false
 
 let cost_binop = function
+  | Oexpect _ -> 0
   | Oadd  | Osub -> 1
   | Omul -> 2
   | Odiv  | Odivu | Omod  | Omodu -> assert false
@@ -73,13 +74,13 @@ let fast_cmove ty =
   | "arm", _ ->
       (match ty with Tint | Tfloat | Tsingle -> true | _ -> false)
   | "powerpc", "e5500" -> 
-      (match ty with Tint -> true | Tlong -> true | _ -> false)
+      (match ty with Tint | Tlong -> true | _ -> false)
   | "powerpc", _ -> false
   | "riscV", _ -> false
   | "x86", _ -> 
-      (match ty with Tint -> true | Tlong -> Archi.ptr64 | _ -> false)
-  | _, _ ->
-      assert false
+     (match ty with Tint -> true | Tlong -> Archi.ptr64 | _ -> false)
+  | "kvx", _ -> true
+  | a, m -> failwith (Printf.sprintf "fast_cmove: unknown arch %s %s" a m)
 
 (* The if-conversion heuristic depend on the
    -fif-conversion and -Obranchless flags.
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 4755ab79..e737ba4b 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -17,6 +17,7 @@ Require Import Coqlib Maps.
 Require Import AST Linking Errors Integers.
 Require Import Values Memory Builtins Events Globalenvs Smallstep.
 Require Import Switch Cminor Op CminorSel Cminortyping.
+Require Import OpHelpers OpHelpersproof.
 Require Import SelectOp SelectDiv SplitLong SelectLong Selection.
 Require Import SelectOpproof SelectDivproof SplitLongproof SelectLongproof.
 
@@ -87,7 +88,7 @@ Lemma get_helpers_correct:
   forall p hf,
   get_helpers (prog_defmap p) = OK hf -> helper_functions_declared p hf.
 Proof.
-  intros. monadInv H. red; simpl. auto 20 using lookup_helper_correct.
+  intros. monadInv H. red; simpl. auto 22 using lookup_helper_correct.
 Qed.
 
 Theorem transf_program_match:
@@ -107,7 +108,7 @@ Proof.
   { unfold helper_declared; intros.
     destruct (prog_defmap_linkorder _ _ _ _ H0 H1) as (gd & P & Q).
     inv Q. inv H3. auto. }
-  red in H. decompose [Logic.and] H; clear H. red; auto 20.
+  red in H. decompose [Logic.and] H; clear H. red; auto 22.
 Qed.
 
 (** * Correctness of the instruction selection functions for expressions *)
@@ -175,7 +176,7 @@ Proof.
     generalize (match_program_defmap _ _ _ _ _ TRANSF id).
     unfold Cminor.fundef; rewrite H; intros R; inv R. inv H2.
     destruct H4 as (cu & A & B). monadInv B. auto. }
-  unfold helper_functions_declared; intros. decompose [Logic.and] H; clear H. auto 20.
+  unfold helper_functions_declared; intros. decompose [Logic.and] H; clear H. auto 22.
 Qed.
 
 Section CMCONSTR.
@@ -195,12 +196,12 @@ Variable e: env.
 Variable m: mem.
 
 Lemma eval_condexpr_of_expr:
-  forall a le v b,
+  forall expected a le v b,
   eval_expr tge sp e m le a v ->
   Val.bool_of_val v b ->
-  eval_condexpr tge sp e m le (condexpr_of_expr a) b.
+  eval_condexpr tge sp e m le (condexpr_of_expr a expected) b.
 Proof.
-  intros until a. functional induction (condexpr_of_expr a); intros.
+  intros until a. functional induction (condexpr_of_expr a expected); intros.
 (* compare *)
   inv H. econstructor; eauto.
   simpl in H6. inv H6. apply Val.bool_of_val_of_optbool. auto.
@@ -309,46 +310,47 @@ Lemma eval_sel_binop:
   exists v', eval_expr tge sp e m le (sel_binop op a1 a2) v' /\ Val.lessdef v v'.
 Proof.
   destruct op; simpl; intros; FuncInv; try subst v.
-  apply eval_add; auto.
-  apply eval_sub; auto.
-  apply eval_mul; auto.
-  eapply eval_divs; eauto.
-  eapply eval_divu; eauto.
-  eapply eval_mods; eauto.
-  eapply eval_modu; eauto.
-  apply eval_and; auto.
-  apply eval_or; auto.
-  apply eval_xor; auto.
-  apply eval_shl; auto.
-  apply eval_shr; auto.
-  apply eval_shru; auto.
-  apply eval_addf; auto.
-  apply eval_subf; auto.
-  apply eval_mulf; auto.
-  apply eval_divf; auto.
-  apply eval_addfs; auto.
-  apply eval_subfs; auto.
-  apply eval_mulfs; auto.
-  apply eval_divfs; auto.
-  eapply eval_addl; eauto.
-  eapply eval_subl; eauto.
-  eapply eval_mull; eauto.
-  eapply eval_divls; eauto.
-  eapply eval_divlu; eauto.
-  eapply eval_modls; eauto.
-  eapply eval_modlu; eauto.
-  eapply eval_andl; eauto.
-  eapply eval_orl; eauto.
-  eapply eval_xorl; eauto.
-  eapply eval_shll; eauto.
-  eapply eval_shrl; eauto.
-  eapply eval_shrlu; eauto.
-  apply eval_comp; auto.
-  apply eval_compu; auto.
-  apply eval_compf; auto.
-  apply eval_compfs; auto.
-  exists v; split; auto. eapply eval_cmpl; eauto.
-  exists v; split; auto. eapply eval_cmplu; eauto.
+  - exists v1; split; trivial. apply Val.lessdef_normalize.
+  - apply eval_add; auto.
+  - apply eval_sub; auto.
+  - apply eval_mul; auto.
+  - eapply eval_divs; eauto.
+  - eapply eval_divu; eauto.
+  - eapply eval_mods; eauto.
+  - eapply eval_modu; eauto.
+  - apply eval_and; auto.
+  - apply eval_or; auto.
+  - apply eval_xor; auto.
+  - apply eval_shl; auto.
+  - apply eval_shr; auto.
+  - apply eval_shru; auto.
+  - apply eval_addf; auto.
+  - apply eval_subf; auto.
+  - apply eval_mulf; auto.
+  - apply eval_divf; auto.
+  - apply eval_addfs; auto.
+  - apply eval_subfs; auto.
+  - apply eval_mulfs; auto.
+  - apply eval_divfs; auto.
+  - eapply eval_addl; eauto.
+  - eapply eval_subl; eauto.
+  - eapply eval_mull; eauto.
+  - eapply eval_divls; eauto.
+  - eapply eval_divlu; eauto.
+  - eapply eval_modls; eauto.
+  - eapply eval_modlu; eauto.
+  - eapply eval_andl; eauto.
+  - eapply eval_orl; eauto.
+  - eapply eval_xorl; eauto.
+  - eapply eval_shll; eauto.
+  - eapply eval_shrl; eauto.
+  - eapply eval_shrlu; eauto.
+  - apply eval_comp; auto.
+  - apply eval_compu; auto.
+  - apply eval_compf; auto.
+  - apply eval_compfs; auto.
+  - exists v; split; auto. eapply eval_cmpl; eauto.
+  - exists v; split; auto. eapply eval_cmplu; eauto.
 Qed.
 
 Lemma eval_sel_select:
@@ -781,6 +783,8 @@ Lemma sel_select_opt_correct:
   Cminor.eval_expr ge sp e m cond vcond ->
   Cminor.eval_expr ge sp e m a1 v1 ->
   Cminor.eval_expr ge sp e m a2 v2 ->
+  Val.has_type v1 ty ->
+  Val.has_type v2 ty ->
   Val.bool_of_val vcond b ->
   env_lessdef e e' -> Mem.extends m m' ->
   exists v', eval_expr tge sp e' m' le a v' /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v'.
@@ -790,7 +794,7 @@ Proof.
   exploit sel_expr_correct. eexact H0. eauto. eauto. intros (vcond' & EVC & LDC).
   exploit sel_expr_correct. eexact H1. eauto. eauto. intros (v1' & EV1 & LD1).
   exploit sel_expr_correct. eexact H2. eauto. eauto. intros (v2' & EV2 & LD2).
-  assert (Val.bool_of_val vcond' b) by (inv H3; inv LDC; constructor).
+  assert (Val.bool_of_val vcond' b) by (inv H5; inv LDC; constructor).
   exploit eval_condition_of_expr. eexact EVC. eauto. rewrite C. intros (vargs' & EVARGS & EVCOND).
   exploit eval_select; eauto. intros (v' & X & Y). 
   exists v'; split; eauto. 
@@ -1194,21 +1198,21 @@ Remark find_label_commut:
 Proof.
   induction s; intros until k'; simpl; intros MC SE; try (monadInv SE); simpl; auto.
 (* store *)
-  unfold store. destruct (addressing m (sel_expr e)); simpl; auto.
+- unfold store. destruct (addressing m (sel_expr e)); simpl; auto.
 (* call *)
-  destruct (classify_call (prog_defmap cunit) e); simpl; auto.
+- destruct (classify_call (prog_defmap cunit) e); simpl; auto.
   rewrite sel_builtin_nolabel; auto.
 (* tailcall *)
-  destruct (classify_call (prog_defmap cunit) e); simpl; auto.
+- destruct (classify_call (prog_defmap cunit) e); simpl; auto.
 (* builtin *)
-  rewrite sel_builtin_nolabel; auto.
+- rewrite sel_builtin_nolabel; auto.
 (* seq *)
-  exploit (IHs1 (Cminor.Kseq s2 k)). constructor; eauto. eauto.
+- exploit (IHs1 (Cminor.Kseq s2 k)). constructor; eauto. eauto.
   destruct (Cminor.find_label lbl s1 (Cminor.Kseq s2 k)) as [[sx kx] | ];
   destruct (find_label lbl x (Kseq x0 k')) as [[sy ky] | ];
   intuition. apply IHs2; auto.
 (* ifthenelse *)
-  destruct (if_conversion ki env e s1 s2) as [s|] eqn:IFC.
+- destruct (if_conversion ki env e s1 s2) as [s|] eqn:IFC.
   inv SE. exploit if_conversion_nolabel; eauto. intros (A & B & C).
   rewrite A, B, C. auto.
   monadInv SE; simpl.
@@ -1217,19 +1221,19 @@ Proof.
   destruct (find_label lbl x k') as [[sy ky] | ];
   intuition. apply IHs2; auto.
 (* loop *)
-  apply IHs. constructor; auto. simpl; rewrite EQ; auto. auto.
+- apply IHs. constructor; auto. simpl; rewrite EQ; auto. auto.
 (* block *)
-  apply IHs. constructor; auto. auto.
+- apply IHs. constructor; auto. auto.
 (* switch *)
-  destruct b.
+- destruct b.
   destruct (validate_switch Int64.modulus n l (compile_switch Int64.modulus n l)); inv SE.
   simpl; auto.
   destruct (validate_switch Int.modulus n l (compile_switch Int.modulus n l)); inv SE.
   simpl; auto.
 (* return *)
-  destruct o; inv SE; simpl; auto.
+- destruct o; inv SE; simpl; auto.
 (* label *)
-  destruct (ident_eq lbl l). auto. apply IHs; auto.
+- destruct (ident_eq lbl l). auto. apply IHs; auto.
 Qed.
 
 Definition measure (s: Cminor.state) : nat :=
diff --git a/backend/SplitLong.vp b/backend/SplitLong.vp
index 694bb0e2..0f240602 100644
--- a/backend/SplitLong.vp
+++ b/backend/SplitLong.vp
@@ -10,47 +10,19 @@
 (*                                                                     *)
 (* *********************************************************************)
 
+(* FIXME: expected branching information not propagated *)
 (** Instruction selection for 64-bit integer operations *)
 
 Require String.
 Require Import Coqlib.
 Require Import AST Integers Floats.
 Require Import Op CminorSel.
+Require Import OpHelpers.
 Require Import SelectOp.
 
 Local Open Scope cminorsel_scope.
 Local Open Scope string_scope.
 
-(** Some operations on 64-bit integers are transformed into calls to
-  runtime library functions.  The following type class collects
-  the names of these functions. *)
-
-Class helper_functions := mk_helper_functions {
-  i64_dtos: ident;                      (**r float64 -> signed long *)
-  i64_dtou: ident;                      (**r float64 -> unsigned long *)
-  i64_stod: ident;                      (**r signed long -> float64 *)
-  i64_utod: ident;                      (**r unsigned long -> float64 *)
-  i64_stof: ident;                      (**r signed long -> float32 *)
-  i64_utof: ident;                      (**r unsigned long -> float32 *)
-  i64_sdiv: ident;                      (**r signed division *)
-  i64_udiv: ident;                      (**r unsigned division *)
-  i64_smod: ident;                      (**r signed remainder *)
-  i64_umod: ident;                      (**r unsigned remainder *)
-  i64_shl: ident;                       (**r shift left *)
-  i64_shr: ident;                       (**r shift right unsigned *)
-  i64_sar: ident;                       (**r shift right signed *)
-  i64_umulh: ident;                     (**r unsigned multiply high *)
-  i64_smulh: ident;                     (**r signed multiply high *)
-}.
-
-Definition sig_l_l := mksignature (Tlong :: nil) Tlong cc_default.
-Definition sig_l_f := mksignature (Tlong :: nil) Tfloat cc_default.
-Definition sig_l_s := mksignature (Tlong :: nil) Tsingle cc_default.
-Definition sig_f_l := mksignature (Tfloat :: nil) Tlong cc_default.
-Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) Tlong cc_default.
-Definition sig_li_l := mksignature (Tlong :: Tint :: nil) Tlong cc_default.
-Definition sig_ii_l := mksignature (Tint :: Tint :: nil) Tlong cc_default.
-
 Section SELECT.
 
 Context {hf: helper_functions}.
@@ -285,7 +257,7 @@ Definition cmpl_ne_zero (e: expr) :=
 
 Definition cmplu_gen (ch cl: comparison) (e1 e2: expr) :=
   splitlong2 e1 e2 (fun h1 l1 h2 l2 =>
-    Econdition (CEcond (Ccomp Ceq) (h1:::h2:::Enil))
+    Econdition (CEcond (Ccomp Ceq) None (h1:::h2:::Enil))
                (Eop (Ocmp (Ccompu cl)) (l1:::l2:::Enil))
                (Eop (Ocmp (Ccompu ch)) (h1:::h2:::Enil))).
 
@@ -307,7 +279,7 @@ Definition cmplu (c: comparison) (e1 e2: expr) :=
 
 Definition cmpl_gen (ch cl: comparison) (e1 e2: expr) :=
   splitlong2 e1 e2 (fun h1 l1 h2 l2 =>
-    Econdition (CEcond (Ccomp Ceq) (h1:::h2:::Enil))
+    Econdition (CEcond (Ccomp Ceq) None (h1:::h2:::Enil))
                (Eop (Ocmp (Ccompu cl)) (l1:::l2:::Enil))
                (Eop (Ocmp (Ccomp ch)) (h1:::h2:::Enil))).
 
diff --git a/backend/SplitLongproof.v b/backend/SplitLongproof.v
index 1e50b1c2..e45c3a34 100644
--- a/backend/SplitLongproof.v
+++ b/backend/SplitLongproof.v
@@ -16,6 +16,8 @@ Require Import String.
 Require Import Coqlib Maps.
 Require Import AST Errors Integers Floats.
 Require Import Values Memory Globalenvs Builtins Events Cminor Op CminorSel.
+Require Import OpHelpers OpHelpersproof.
+Require Import Values Memory Globalenvs Builtins Events Cminor Op CminorSel.
 Require Import SelectOp SelectOpproof SplitLong.
 
 Local Open Scope cminorsel_scope.
@@ -23,26 +25,6 @@ Local Open Scope string_scope.
 
 (** * Properties of the helper functions *)
 
-Definition helper_declared {F V: Type} (p: AST.program (AST.fundef F) V) (id: ident) (name: string) (sg: signature) : Prop :=
-  (prog_defmap p)!id = Some (Gfun (External (EF_runtime name sg))).
-
-Definition helper_functions_declared {F V: Type} (p: AST.program (AST.fundef F) V) (hf: helper_functions) : Prop :=
-     helper_declared p i64_dtos "__compcert_i64_dtos" sig_f_l
-  /\ helper_declared p i64_dtou "__compcert_i64_dtou" sig_f_l
-  /\ helper_declared p i64_stod "__compcert_i64_stod" sig_l_f
-  /\ helper_declared p i64_utod "__compcert_i64_utod" sig_l_f
-  /\ helper_declared p i64_stof "__compcert_i64_stof" sig_l_s
-  /\ helper_declared p i64_utof "__compcert_i64_utof" sig_l_s
-  /\ helper_declared p i64_sdiv "__compcert_i64_sdiv" sig_ll_l
-  /\ helper_declared p i64_udiv "__compcert_i64_udiv" sig_ll_l
-  /\ helper_declared p i64_smod "__compcert_i64_smod" sig_ll_l
-  /\ helper_declared p i64_umod "__compcert_i64_umod" sig_ll_l
-  /\ helper_declared p i64_shl "__compcert_i64_shl" sig_li_l
-  /\ helper_declared p i64_shr "__compcert_i64_shr" sig_li_l
-  /\ helper_declared p i64_sar "__compcert_i64_sar" sig_li_l
-  /\ helper_declared p i64_umulh "__compcert_i64_umulh" sig_ll_l
-  /\ helper_declared p i64_smulh "__compcert_i64_smulh" sig_ll_l.
-
 (** * Correctness of the instruction selection functions for 64-bit operators *)
 
 Section CMCONSTR.
@@ -55,6 +37,7 @@ Variable sp: val.
 Variable e: env.
 Variable m: mem.
 
+Ltac UseHelper := decompose [Logic.and] arith_helpers_correct; eauto.
 Ltac DeclHelper := red in HELPERS; decompose [Logic.and] HELPERS; eauto.
 
 Lemma eval_helper:
@@ -342,7 +325,7 @@ Theorem eval_longofint: unary_constructor_sound longofint Val.longofint.
 Proof.
   red; intros. unfold longofint. destruct (longofint_match a).
 - InvEval. econstructor; split. apply eval_longconst. auto.
-- exploit (eval_shrimm ge sp e m (Int.repr 31) (x :: le) (Eletvar 0)). EvalOp.
+- exploit (eval_shrimm prog sp e m (Int.repr 31) (x :: le) (Eletvar 0)). EvalOp.
   intros [v1 [A B]].
   econstructor; split. EvalOp.
   destruct x; simpl; auto.
diff --git a/backend/Splitting.ml b/backend/Splitting.ml
index 40f09c3d..3ca45c3b 100644
--- a/backend/Splitting.ml
+++ b/backend/Splitting.ml
@@ -151,8 +151,8 @@ let ren_instr f maps pc i =
   | Inop s -> Inop s
   | Iop(op, args, res, s) ->
       Iop(op, ren_regs before args, ren_reg after res, s)
-  | Iload(chunk, addr, args, dst, s) ->
-      Iload(chunk, addr, ren_regs before args, ren_reg after dst, s)
+  | Iload(trap, chunk, addr, args, dst, s) ->
+      Iload(trap, chunk, addr, ren_regs before args, ren_reg after dst, s)
   | Istore(chunk, addr, args, src, s) ->
       Istore(chunk, addr, ren_regs before args, ren_reg before src, s)
   | Icall(sg, ros, args, res, s) ->
@@ -162,8 +162,8 @@ let ren_instr f maps pc i =
   | Ibuiltin(ef, args, res, s) ->
       Ibuiltin(ef, List.map (AST.map_builtin_arg (ren_reg before)) args,
                    AST.map_builtin_res (ren_reg after) res, s)
-  | Icond(cond, args, s1, s2) ->
-      Icond(cond, ren_regs before args, s1, s2)
+  | Icond(cond, args, s1, s2, i) ->
+      Icond(cond, ren_regs before args, s1, s2, i)
   | Ijumptable(arg, tbl) ->
       Ijumptable(ren_reg before arg, tbl)
   | Ireturn optarg ->
diff --git a/backend/Stacking.v b/backend/Stacking.v
index 7b382d05..0e3f2832 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -133,8 +133,8 @@ Definition transl_instr
       end
   | Lop op args res =>
       Mop (transl_op fe op) args res :: k
-  | Lload chunk addr args dst =>
-      Mload chunk (transl_addr fe addr) args dst :: k
+  | Lload trap chunk addr args dst =>
+      Mload trap chunk (transl_addr fe addr) args dst :: k
   | Lstore chunk addr args src =>
       Mstore chunk (transl_addr fe addr) args src :: k
   | Lcall sig ros =>
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 7724c5d6..6d793961 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -868,7 +868,7 @@ Qed.
 Remark transl_destroyed_by_op:
   forall op e, destroyed_by_op (transl_op e op) = destroyed_by_op op.
 Proof.
-  intros; destruct op; reflexivity.
+  intros; destruct op; try reflexivity; simpl.
 Qed.
 
 Remark transl_destroyed_by_load:
@@ -1891,12 +1891,13 @@ Proof.
   apply plus_one. econstructor.
   instantiate (1 := v'). rewrite <- A. apply eval_operation_preserved.
   exact symbols_preserved. eauto.
-  econstructor; eauto with coqlib.
-  apply agree_regs_set_reg; auto.
-  rewrite transl_destroyed_by_op.  apply agree_regs_undef_regs; auto.
-  apply agree_locs_set_reg; auto. apply agree_locs_undef_locs. auto. apply destroyed_by_op_caller_save.
-  apply frame_set_reg. apply frame_undef_regs. exact SEP.
-
+  econstructor; eauto with coqlib;
+  try (apply agree_regs_set_reg; auto);
+  (* generic proof *)
+  solve [ 
+    (rewrite transl_destroyed_by_op; apply agree_regs_undef_regs; auto) |
+    (apply agree_locs_set_reg; auto; apply agree_locs_undef_locs; auto; apply destroyed_by_op_caller_save) |
+    (apply frame_set_reg; apply frame_undef_regs; exact SEP) ].
 - (* Lload *)
   assert (exists a',
           eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
@@ -1917,6 +1918,46 @@ Proof.
   apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
   apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
 
+- (* Lload notrap1*)
+  assert (eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = None) as Haddress.
+  eapply eval_addressing_inject_none; eauto.
+  eapply globalenv_inject_preserves_globals. eapply sep_proj2. eapply sep_proj2. eapply sep_proj2. eexact SEP.
+  eapply agree_reglist; eauto.
+  econstructor; split.
+  apply plus_one. apply exec_Mload_notrap1.
+  rewrite <- Haddress. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  
+- (* Lload notrap2 *)
+  assert (exists a',
+          eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
+       /\ Val.inject j a a').
+  eapply eval_addressing_inject; eauto.
+  eapply globalenv_inject_preserves_globals. eapply sep_proj2. eapply sep_proj2. eapply sep_proj2. eexact SEP.
+  eapply agree_reglist; eauto.
+  destruct H1 as [a' [A B]].
+
+  destruct ( Mem.loadv chunk m' a') as [v'|] eqn:Hloadv.
+  {
+  econstructor; split.
+  apply plus_one. apply exec_Mload with (a:=a') (v:=v'); eauto.
+  try (rewrite <- A; apply eval_addressing_preserved; auto; exact symbols_preserved).
+  econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  }
+  {
+  econstructor; split.
+  apply plus_one. apply exec_Mload_notrap2 with (a:=a'); eauto.
+  try (rewrite <- A; apply eval_addressing_preserved; auto; exact symbols_preserved).
+  
+  econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  }
+
 - (* Lstore *)
   assert (exists a',
           eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v
index 7a5be5ed..39fc10fb 100644
--- a/backend/Tailcallproof.v
+++ b/backend/Tailcallproof.v
@@ -436,6 +436,43 @@ Proof.
   apply eval_addressing_preserved. exact symbols_preserved. eauto.
   econstructor; eauto. apply set_reg_lessdef; auto.
 
+- (* load notrap1 *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
+  left.
+  exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split.
+  eapply exec_Iload_notrap1.
+  eassumption.
+  eapply eval_addressing_lessdef_none. eassumption.
+  erewrite eval_addressing_preserved.
+  eassumption. exact symbols_preserved.
+
+  econstructor; eauto. apply set_reg_lessdef; auto.
+
+- (* load notrap2 *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
+  left.
+
+  exploit eval_addressing_lessdef; eauto.
+  intros [a' [ADDR' ALD]].
+  
+  destruct (Mem.loadv chunk m' a') eqn:Echunk2.
+  + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- v) m'); split.
+    eapply exec_Iload with (a:=a'). eassumption.
+    erewrite eval_addressing_preserved.
+    eassumption.
+    exact symbols_preserved.
+    assumption.
+    econstructor; eauto. apply set_reg_lessdef; auto.
+  + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split.
+    eapply exec_Iload_notrap2. eassumption.
+    erewrite eval_addressing_preserved.
+    eassumption.
+    exact symbols_preserved.
+    assumption.
+    econstructor; eauto. apply set_reg_lessdef; auto.
+    
 - (* store *)
   TransfInstr.
   assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
deleted file mode 100644
index 265e06ba..00000000
--- a/backend/Tunneling.v
+++ /dev/null
@@ -1,192 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Branch tunneling (optimization of branches to branches). *)
-
-Require Import FunInd.
-Require Import Coqlib Maps UnionFind.
-Require Import AST.
-Require Import LTL.
-
-(** Branch tunneling shortens sequences of branches (with no intervening
-  computations) by rewriting the branch and conditional branch instructions
-  so that they jump directly to the end of the branch sequence.
-  For example:
-<<
-     L1: branch L2;                       L1: branch L3;
-     L2; branch L3;            becomes    L2: branch L3;
-     L3: instr;                           L3: instr;
-     L4: if (cond) goto L1;               L4: if (cond) goto L3;
->>
-  This optimization can be applied to several of our intermediate
-  languages.  We choose to perform it on the [LTL] language,
-  after register allocation but before code linearization.
-  Register allocation can delete instructions (such as dead
-  computations or useless moves), therefore there are more
-  opportunities for tunneling after allocation than before.
-  Symmetrically, prior tunneling helps linearization to produce
-  better code, e.g. by revealing that some [branch] instructions are
-  dead code (as the "branch L3" in the example above).
-*)
-
-(** The naive implementation of branch tunneling would replace
-  any branch to a node [pc] by a branch to the node
-  [branch_target f pc], defined as follows:
-<<
-  branch_target f pc = branch_target f pc'  if f(pc) = branch pc'
-                     = pc                   otherwise
->>
-  However, this definition can fail to terminate if
-  the program can contain loops consisting only of branches, as in
-<<
-     L1: branch L1;
->>
-  or
-<<
-     L1: branch L2;
-     L2: branch L1;
->>
-  Coq warns us of this fact by not accepting the definition
-  of [branch_target] above.
-
-  To handle this problem, we proceed in two passes:
-
-- The first pass populates a union-find data structure, adding equalities
-  between PCs of blocks that are connected by branches and no other
-  computation.
-
-- The second pass rewrites the code, replacing every branch to a node [pc]
-  by a branch to the canonical representative of the equivalence class of [pc].
-*)
-
-(** * Construction of the union-find data structure *)
-
-Module U := UnionFind.UF(PTree).
-
-(** We start populating the union-find data structure by adding
-    equalities [pc = pc'] for every block [pc: branch pc'] in the function. *)
-
-Definition record_branch (uf: U.t) (pc: node) (b: bblock) : U.t :=
-  match b with
-  | Lbranch s :: _ => U.union uf pc s
-  | _ => uf
-  end.
-
-Definition record_branches (f: LTL.function) : U.t :=
-  PTree.fold record_branch f.(fn_code) U.empty.
-
-(** An additional optimization opportunity comes from conditional branches.
-    Consider a block [pc: cond ifso ifnot].  If the [ifso] case
-    and the [ifnot] case jump to the same block [pc']
-    (modulo intermediate branches), the block can be simplified into
-    [pc: branch pc'], and the equality [pc = pc'] can be added to the
-    union-find data structure. *)
-
-(** In rare cases, the extra equation [pc = pc'] introduced by the
-    simplification of a conditional branch can trigger further simplifications
-    of other conditional branches.  We therefore iterate the analysis
-    until no optimizable conditional branch remains. *)
-
-(** The code [c] (first component of the [st] triple) starts identical
-    to the code [fn.(fn_code)] of the current function, but each time
-    conditional branch at [pc] is optimized, we remove the block at
-    [pc] from the code [c].  This guarantees termination of the
-    iteration. *)
-
-Definition record_cond (st: code * U.t * bool) (pc: node) (b: bblock) : code * U.t * bool :=
-   match b with
-  | Lcond cond args s1 s2 :: _ =>
-      let '(c, u, _) := st in
-      if peq (U.repr u s1) (U.repr u s2)
-      then (PTree.remove pc c, U.union u pc s1, true)
-      else st
-  | _ =>
-      st
-  end.
-
-Definition record_conds_1 (cu: code * U.t) : code * U.t * bool :=
-  let (c, u) := cu in PTree.fold record_cond c (c, u, false).
-
-Definition measure_state (cu: code * U.t) : nat :=
-  PTree_Properties.cardinal (fst cu).
-
-Function record_conds (cu: code * U.t) {measure measure_state cu} : U.t :=
-  let (cu', changed) := record_conds_1 cu in
-  if changed then record_conds cu' else snd cu.
-Proof.
-  intros [c0 u0] [c1 u1].
-  set (P := fun (c: code) (s: code * U.t * bool) =>
-              (forall pc, c!pc = None -> (fst (fst s))!pc = c0!pc) /\
-              (PTree_Properties.cardinal (fst (fst s))
-               + (if snd s then 1 else 0)
-               <= PTree_Properties.cardinal c0)%nat).
-  assert (A: P c0 (PTree.fold record_cond c0 (c0, u0, false))).
-  { apply PTree_Properties.fold_rec; unfold P.
-  - intros. destruct H0; split; auto. intros. rewrite <- H in H2. auto.
-  - simpl; split; intros. auto.  simpl; lia.
-  - intros cd [[c u] changed] pc b NONE SOME [HR1 HR2]. simpl. split.
-    + intros p EQ. rewrite PTree.gsspec in EQ. destruct (peq p pc); try discriminate.
-      unfold record_cond. destruct b as [ | [] b ]; auto.
-      destruct (peq (U.repr u s1) (U.repr u s2)); auto.
-      simpl. rewrite PTree.gro by auto. auto.
-    + unfold record_cond. destruct b as [ | [] b ]; auto.
-      destruct (peq (U.repr u s1) (U.repr u s2)); auto.
-      simpl in *.
-      assert (SOME': c!pc = Some (Lcond cond args s1 s2 :: b)).
-      { rewrite HR1 by auto. auto. } 
-      generalize (PTree_Properties.cardinal_remove SOME').
-      destruct changed; lia.
-  }
-  unfold record_conds_1, measure_state; intros. 
-  destruct A as [_ A]. rewrite teq in A. simpl in *.
-  lia.
-Qed.
-
-Definition record_gotos (f: LTL.function) : U.t :=
-  record_conds (f.(fn_code), record_branches f).
-
-(** * Code transformation *)
-
-(** The code transformation rewrites all LTL instruction, replacing every
-  successor [s] of every instruction by the canonical representative
-  of its equivalence class in the union-find data structure.
-  Additionally, [Lcond] conditional branches are turned into [Lbranch]
-  unconditional branches whenever possible. *)
-
-Definition tunnel_instr (u: U.t) (i: instruction) : instruction :=
-  match i with
-  | Lbranch s => Lbranch (U.repr u s)
-  | Lcond cond args s1 s2 =>
-      let s1' := U.repr u s1 in let s2' := U.repr u s2 in
-      if peq s1' s2'
-      then Lbranch s1'
-      else Lcond cond args s1' s2'
-  | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr u) tbl)
-  | _ => i
-  end.
-
-Definition tunnel_block (u: U.t) (b: bblock) : bblock :=
-  List.map (tunnel_instr u) b.
-
-Definition tunnel_function (f: LTL.function) : LTL.function :=
-  let u := record_gotos f in
-  mkfunction
-    (fn_sig f)
-    (fn_stacksize f)
-    (PTree.map1 (tunnel_block u) (fn_code f))
-    (U.repr u (fn_entrypoint f)).
-
-Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
-  transf_fundef tunnel_function f.
-
-Definition tunnel_program (p: LTL.program) : LTL.program :=
-  transform_program tunnel_fundef p.
diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
new file mode 100644
index 00000000..010595be
--- /dev/null
+++ b/backend/Tunnelinglibs.ml
@@ -0,0 +1,272 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           Pierre Goutagny ENS-Lyon, VERIMAG                 *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the core functions of the tunneling passes, for both RTL
+and LTL, by using a simplified CFG as a transparent interface
+
+See [LTLTunneling.v]/[LTLTunnelingaux.ml] and [RTLTunneling.v]/[RTLTunnelingaux.ml].
+
+*)
+
+open Maps
+open Camlcoq
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of instructions *)
+  BRANCH of node
+| COND of node * node
+| OTHER
+and node = {
+    lab: label;
+    mutable inst: simple_inst;
+    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+    mutable dist: int;
+    mutable tag: int
+  }
+
+type positive = P.t
+type integer = Z.t
+
+(* type of the (simplified) CFG *)
+type cfg = {
+    nodes: (int, node) Hashtbl.t;
+    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+    mutable num_rems: int;
+    mutable iter_num: int (* number of iterations in elimination of conditions *)
+  }
+
+exception BugOnPC of int
+
+(* keeps track of the total number of nops seen, for debugging purposes *)
+let nopcounter = ref 0
+
+(* General functions that do not require language-specific context, and that
+ are used for building language-specific functions *)
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+  match n.inst with
+  | COND(s1,s2) ->
+     if n.link != n
+     then update c n
+     else if n.tag < c.iter_num then (
+       (* we try to change the condition ... *)
+       n.tag <- c.iter_num; (* ... but at most once by iteration *)
+       let ts1 = target c s1 in
+       let ts2 = target c s2 in
+       if ts1 == ts2 then (n.link <- ts1; ts1) else n
+     ) else n
+  | _ ->
+     if n.link != n
+     then update c n
+     else n
+and update c n =
+  let t = target c n.link in
+  n.link <- t; t
+
+let get_node c p =
+  let li = P.to_int p in
+  try
+    Hashtbl.find c.nodes li
+  with
+    Not_found ->
+      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
+      Hashtbl.add c.nodes li n;
+      n
+
+let set_branch c p s =
+  let li = P.to_int p in
+  try
+    let n = Hashtbl.find c.nodes li in
+    n.inst <- BRANCH s;
+    n.link <- target c s
+  with
+    Not_found ->
+      let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in
+      Hashtbl.add c.nodes li n
+
+let get td pc =
+  match PTree.get pc td with
+  | Some p -> let (t0, d) = p in (t0, d)
+  | None -> (pc, Z.of_uint 0)
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+  if n.dist = undef_dist
+  then (
+    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+    n.dist <-
+      (match n.inst with
+       | OTHER -> 0
+       | BRANCH p -> 1 + dist p
+       | COND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+    n.dist
+  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+    else n.dist
+
+let string_of_labeli nodes ipc =
+  try
+    let pc = Hashtbl.find nodes ipc in
+    if pc.link == pc
+    then Printf.sprintf "(Target@%d)" (dist pc)
+    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+  with
+    Not_found -> ""
+
+(*
+ * When given the necessary types and options as context, and then some
+ * language-specific functions that cannot be factorised between LTL and RTL, the
+ * `Tunneling` functor returns a module containing the corresponding
+ * `branch_target` function.
+ *)
+
+module Tunneling = functor
+  (* Language-specific types *)
+  (LANG: sig
+    type code_unit (* the type of a node of the code cfg (an instruction or a bblock *)
+    type funct (* type of internal functions *)
+  end)
+
+  (* Compilation options for debugging *)
+  (OPT: sig
+    val langname: string
+    val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *)
+    val debug_flag: bool ref
+    val final_dump: bool (* set to true to have a more verbose debugging *)
+  end)
+  -> struct
+
+  (* The `debug` function uses values from `OPT`, and is used in functions passed to `F`
+     so it must be defined between the two *)
+  let debug fmt =
+    if !OPT.debug_flag then Printf.eprintf fmt
+    else Printf.ifprintf stderr fmt
+
+  module T
+    (* Language-specific functions *)
+    (FUNS: sig
+      (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+      val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
+      val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
+      val fn_code: LANG.funct -> LANG.code_unit PTree.t
+      val fn_entrypoint: LANG.funct -> positive
+      val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
+    end)
+    (* only export what's needed *)
+    : sig val branch_target: LANG.funct -> (positive * integer) PTree.t end
+  = struct
+
+    (* try to change a condition into a branch [acc] is the current accumulator of
+       conditions to consider in the next iteration of repeat_change_cond *)
+    let try_change_cond c acc pc =
+      match pc.inst with
+      | COND(s1,s2) ->
+         let ts1 = target c s1 in
+         let ts2 = target c s2 in
+         if ts1 == ts2 then (
+           pc.link <- ts1;
+           c.num_rems <- c.num_rems - 1;
+           acc
+         ) else
+           pc::acc
+      | _  -> raise (BugOnPC (lab_i pc)) (* COND expected *)
+
+    (* repeat [try_change_cond] until no condition is changed into a branch *)
+    let rec repeat_change_cond c =
+      c.iter_num <- c.iter_num + 1;
+      debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems);
+      let old =  c.num_rems in
+      c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+      let curr = c.num_rems in
+      let continue =
+        match OPT.limit_tunneling with
+        | Some n -> curr < old && c.iter_num < n
+        | None -> curr < old
+      in
+      if continue
+      then repeat_change_cond c
+
+
+    (*********************************************)
+    (*** START: printing and debugging functions *)
+
+    let print_cfg (f: LANG.funct) c  =
+      let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in
+      Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+      let ep = P.to_int (FUNS.fn_entrypoint f) in
+      debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+      let println = Array.fold_left (FUNS.print_code_unit c) false a in
+      (if println then debug "\n");debug "remaining cond:";
+      List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+      debug "\n"
+
+
+    (*************************************************************)
+    (* Copy-paste of the extracted code of the verifier          *)
+    (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+    (** val check_code : coq_UF -> code -> unit res **)
+
+    let check_code td c =
+      PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (())
+
+    (*** END: copy-paste & debugging functions *******)
+
+    (* compute the final distance of each nop nodes to its target *)
+    let final_export f c =
+      let count = ref 0 in
+      let filter_nops_init_dist _ n acc =
+        let tn = target c n in
+        if tn == n
+        then (
+          n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+          acc
+        ) else (
+          n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+          count := !count+1;
+          n::acc
+        )
+      in
+      let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+      let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+      debug "* %sTunneling.branch_target: initial number of nops = %d\n" OPT.langname !nopcounter;
+      debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" OPT.langname !count;
+      res
+
+    let branch_target f =
+      debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname;
+      if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+      let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+      c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) [];
+      repeat_change_cond c;
+      let res = final_export f c in
+      if !OPT.debug_flag then (
+        try
+          check_code res (FUNS.fn_code f);
+          if OPT.final_dump then print_cfg f c;
+        with e -> (
+          print_cfg f c;
+          check_code res (FUNS.fn_code f)
+        )
+      );
+      res
+  end
+end
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
deleted file mode 100644
index 68913fc9..00000000
--- a/backend/Tunnelingproof.v
+++ /dev/null
@@ -1,714 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for the branch tunneling optimization. *)
-
-Require Import FunInd.
-Require Import Coqlib Maps UnionFind.
-Require Import AST Linking.
-Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations LTL.
-Require Import Tunneling.
-
-Definition match_prog (p tp: program) :=
-  match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp.
-
-Lemma transf_program_match:
-  forall p, match_prog p (tunnel_program p).
-Proof.
-  intros. eapply match_transform_program; eauto.
-Qed.
-
-(** * Properties of the branch map computed using union-find. *)
-
-Section BRANCH_MAP_CORRECT.
-
-Variable fn: LTL.function.
-
-Definition measure_branch (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
-  fun x => if peq (U.repr u s) pc then f x
-           else if peq (U.repr u x) pc then (f x + f s + 1)%nat
-           else f x.
-
-Definition measure_cond (u: U.t) (pc s1 s2: node) (f: node -> nat) : node -> nat :=
-  fun x => if peq (U.repr u s1) pc then f x
-           else if peq (U.repr u x) pc then (f x + Nat.max (f s1) (f s2) + 1)%nat
-           else f x.
-
-Definition branch_map_correct_1 (c: code) (u: U.t) (f: node -> nat): Prop :=
-  forall pc,
-  match c!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
-  | _ =>
-      U.repr u pc = pc
-  end.
-
-Lemma record_branch_correct:
-  forall c u f pc b,
-  branch_map_correct_1 (PTree.remove pc c) u f ->
-  c!pc = Some b ->
-  { f' | branch_map_correct_1 c (record_branch u pc b) f' }.
-Proof.
-  intros c u f pc b BMC GET1.
-  assert (PC: U.repr u pc = pc).
-  { specialize (BMC pc). rewrite PTree.grs in BMC. auto. }
-  assert (DFL: { f | branch_map_correct_1 c u f }).
-  { exists f. intros p. destruct (peq p pc).
-  - subst p. rewrite GET1. destruct b as [ | [] b ]; auto.
-  - specialize (BMC p). rewrite PTree.gro in BMC by auto. exact BMC.
-  }
-  unfold record_branch. destruct b as [ | [] b ]; auto.
-  exists (measure_branch u pc s f). intros p. destruct (peq p pc).
-+ subst p. rewrite GET1. unfold measure_branch.
-  rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3.
-  destruct (peq (U.repr u s) pc); auto. rewrite PC, peq_true. right; split; auto. lia.
-+ specialize (BMC p). rewrite PTree.gro in BMC by auto.
-  assert (U.repr u p = p -> U.repr (U.union u pc s) p = p).
-  { intro. rewrite <- H at 2. apply U.repr_union_1. congruence. }
-  destruct (c!p) as [ [ | [] _ ] | ]; auto.
-  destruct BMC as [A | [A B]]. auto.
-  right; split. apply U.sameclass_union_2; auto.
-  unfold measure_branch. destruct (peq (U.repr u s) pc). auto.
-  rewrite A. destruct (peq (U.repr u s0) pc); lia.
-Qed.
-
-Lemma record_branches_correct:
-  { f | branch_map_correct_1 fn.(fn_code) (record_branches fn) f }.
-Proof.
-  unfold record_branches. apply PTree_Properties.fold_ind.
-- (* base case *)
-  intros m EMPTY. exists (fun _ => O). 
-  red; intros. rewrite EMPTY. apply U.repr_empty.
-- (* inductive case *)
-  intros m u pc bb GET1 GET2 [f BMC]. eapply record_branch_correct; eauto.
-Qed.
-
-Definition branch_map_correct_2 (c: code) (u: U.t) (f: node -> nat): Prop :=
-  forall pc,
-  match fn.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
-  | Some(Lcond cond args s1 s2 :: b) =>
-      U.repr u pc = pc \/ (c!pc = None /\ U.repr u pc = U.repr u s1 /\ U.repr u pc = U.repr u s2 /\ f s1 < f pc /\ f s2 < f pc)%nat
-  | _ =>
-      U.repr u pc = pc
-  end.
-
-Lemma record_cond_correct:
-  forall c u changed f pc b,
-  branch_map_correct_2 c u f ->
-  fn.(fn_code)!pc = Some b ->
-  c!pc <> None ->
-  let '(c1, u1, _) := record_cond (c, u, changed) pc b in
-  { f' | branch_map_correct_2 c1 u1 f' }.
-Proof.
-  intros c u changed f pc b BMC GET1 GET2.
-  assert (DFL: { f' | branch_map_correct_2 c u f' }).
-  { exists f; auto. }
-  unfold record_cond. destruct b as [ | [] b ]; auto.
-  destruct (peq (U.repr u s1) (U.repr u s2)); auto.
-  exists (measure_cond u pc s1 s2 f).
-  assert (PC: U.repr u pc = pc).
-  { specialize (BMC pc). rewrite GET1 in BMC. intuition congruence. }
-  intro p. destruct (peq p pc).
-- subst p. rewrite GET1. unfold measure_cond.
-  rewrite U.repr_union_2 by auto. rewrite <- e, PC, peq_true.
-  destruct (peq (U.repr u s1) pc); auto.
-  right; repeat split.
-  + apply PTree.grs.
-  + rewrite U.repr_union_3. auto.
-  + rewrite U.repr_union_1 by congruence. auto.
-  + lia.
-  + lia.
-- assert (P: U.repr u p = p -> U.repr (U.union u pc s1) p = p).
-  { intros. rewrite U.repr_union_1 by congruence. auto. }
-  specialize (BMC p). destruct (fn_code fn)!p as [ [ | [] bb ] | ]; auto.
-  + destruct BMC as [A | (A & B)]; auto. right; split.
-    * apply U.sameclass_union_2; auto.
-    * unfold measure_cond. rewrite <- A.
-      destruct (peq (U.repr u s1) pc). auto.
-      destruct (peq (U.repr u p) pc); lia.
-  + destruct BMC as [A | (A & B & C & D & E)]; auto. right; split; [ | split; [ | split]].
-    * rewrite PTree.gro by auto. auto.
-    * apply U.sameclass_union_2; auto.
-    * apply U.sameclass_union_2; auto.
-    * unfold measure_cond. rewrite <- B, <- C.
-      destruct (peq (U.repr u s1) pc). auto.
-      destruct (peq (U.repr u p) pc); lia.
-Qed.
-
-Definition code_compat (c: code) : Prop :=
-  forall pc b, c!pc = Some b -> fn.(fn_code)!pc = Some b.
-
-Definition code_invariant (c0 c1 c2: code) : Prop :=
-  forall pc, c0!pc = None -> c1!pc = c2!pc.
-
-Lemma record_conds_1_correct:
-  forall c u f,
-  branch_map_correct_2 c u f ->
-  code_compat c ->
-  let '(c', u', _) := record_conds_1 (c, u) in
-  (code_compat c' * { f' | branch_map_correct_2 c' u' f' })%type.
-Proof.
-  intros c0 u0 f0 BMC0 COMPAT0.
-  unfold record_conds_1.
-  set (x := PTree.fold record_cond c0 (c0, u0, false)).
-  set (P := fun (cd: code) (cuc: code * U.t * bool) =>
-            (code_compat (fst (fst cuc)) *
-             code_invariant cd (fst (fst cuc)) c0 *
-             { f | branch_map_correct_2 (fst (fst cuc)) (snd (fst cuc)) f })%type).
-  assert (REC: P c0 x).
-  { unfold x; apply PTree_Properties.fold_ind.
-  - intros cd EMPTY. split; [split|]; simpl.
-    + auto.
-    + red; auto.
-    + exists f0; auto.
-  - intros cd [[c u] changed] pc b GET1 GET2 [[COMPAT INV] [f BMC]]. simpl in *.
-    split; [split|].
-    + unfold record_cond; destruct b as [ | [] b]; simpl; auto.
-      destruct (peq (U.repr u s1) (U.repr u s2)); simpl; auto.
-      red; intros. rewrite PTree.grspec in H. destruct (PTree.elt_eq pc0 pc). discriminate. auto.
-    + assert (DFL: code_invariant cd c c0).
-      { intros p GET. apply INV. rewrite PTree.gro by congruence. auto. }
-      unfold record_cond; destruct b as [ | [] b]; simpl; auto.
-      destruct (peq (U.repr u s1) (U.repr u s2)); simpl; auto.
-      intros p GET. rewrite PTree.gro by congruence. apply INV. rewrite PTree.gro by congruence. auto.
-    + assert (GET3: c!pc = Some b).
-      { rewrite <- GET2. apply INV. apply PTree.grs. }
-      assert (X: fn.(fn_code)!pc = Some b) by auto.
-      assert (Y: c!pc <> None) by congruence.
-      generalize (record_cond_correct c u changed f pc b BMC X Y).
-      destruct (record_cond (c, u, changed) pc b) as [[c1 u1] changed1]; simpl.
-      auto.
-  }
-  destruct x as [[c1 u1] changed1]; destruct REC as [[COMPAT1 INV1] BMC1]; auto.
-Qed.
-
-Definition branch_map_correct (u: U.t) (f: node -> nat): Prop :=
-  forall pc,
-  match fn.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
-  | Some(Lcond cond args s1 s2 :: b) =>
-      U.repr u pc = pc \/ (U.repr u pc = U.repr u s1 /\ U.repr u pc = U.repr u s2 /\ f s1 < f pc /\ f s2 < f pc)%nat
-  | _ =>
-      U.repr u pc = pc
-  end.
-
-Lemma record_conds_correct:
-  forall cu,
-  { f | branch_map_correct_2 (fst cu) (snd cu) f } ->
-  code_compat (fst cu) ->
-  { f | branch_map_correct (record_conds cu) f }.
-Proof.
-  intros cu0. functional induction (record_conds cu0); intros.
-- destruct cu as [c u], cu' as [c' u'], H as [f BMC]. 
-  generalize (record_conds_1_correct c u f BMC H0).
-  rewrite e. intros [U V]. apply IHt; auto.
-- destruct cu as [c u], H as [f BMC].
-  exists f. intros pc. specialize (BMC pc); simpl in *.
-  destruct (fn_code fn)!pc as [ [ | [] b ] | ]; tauto.
-Qed.
-
-Lemma record_gotos_correct_1:
-  { f | branch_map_correct (record_gotos fn) f }.
-Proof.
-  apply record_conds_correct; simpl.
-- destruct record_branches_correct as [f BMC].
-  exists f. intros pc. specialize (BMC pc); simpl in *.
-  destruct (fn_code fn)!pc as [ [ | [] b ] | ]; auto.
-- red; auto.
-Qed.
-
-Definition branch_target  (pc: node) : node :=
-  U.repr (record_gotos fn) pc.
-
-Definition count_gotos (pc: node) : nat :=
-  proj1_sig record_gotos_correct_1 pc.
-
-Theorem record_gotos_correct:
-  forall pc,
-  match fn.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-      branch_target pc = pc \/
-      (branch_target pc = branch_target s /\ count_gotos s < count_gotos pc)%nat
-  | Some(Lcond cond args s1 s2 :: b) =>
-      branch_target pc = pc \/
-      (branch_target pc = branch_target s1 /\ branch_target pc = branch_target s2
-       /\ count_gotos s1 < count_gotos pc /\ count_gotos s2 < count_gotos pc)%nat
-  | _ =>
-      branch_target pc = pc
-  end.
-Proof.
-  intros. unfold count_gotos. destruct record_gotos_correct_1 as [f P]; simpl.
-  apply P.
-Qed.
-
-End BRANCH_MAP_CORRECT.
-
-(** * Preservation of semantics *)
-
-Section PRESERVATION.
-
-Variables prog tprog: program.
-Hypothesis TRANSL: match_prog prog tprog.
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Lemma functions_translated:
-  forall v f,
-  Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_transf TRANSL).
-
-Lemma function_ptr_translated:
-  forall v f,
-  Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_ptr_transf TRANSL).
-
-Lemma symbols_preserved:
-  forall id,
-  Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (Genv.find_symbol_transf TRANSL).
-
-Lemma senv_preserved:
-  Senv.equiv ge tge.
-Proof (Genv.senv_transf TRANSL).
-
-Lemma sig_preserved:
-  forall f, funsig (tunnel_fundef f) = funsig f.
-Proof.
-  destruct f; reflexivity.
-Qed.
-
-(** The proof of semantic preservation is a simulation argument
-  based on diagrams of the following form:
-<<
-           st1 --------------- st2
-            |                   |
-           t|                  ?|t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  The [match_states] predicate, defined below, captures the precondition
-  between states [st1] and [st2], as well as the postcondition between
-  [st1'] and [st2'].  One transition in the source code (left) can correspond
-  to zero or one transition in the transformed code (right).  The
-  "zero transition" case occurs when executing a [Lgoto] instruction
-  in the source code that has been removed by tunneling.
-
-  In the definition of [match_states], what changes between the original and
-  transformed codes is mainly the control-flow
-  (in particular, the current program point [pc]), but also some values
-  and memory states, since some [Vundef] values can become more defined
-  as a consequence of eliminating useless [Lcond] instructions. *)
-
-Definition tunneled_block (f: function) (b: bblock) :=
-  tunnel_block (record_gotos f) b.
-
-Definition tunneled_code (f: function) :=
-  PTree.map1 (tunneled_block f) (fn_code f).
-
-Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
-  forall l, Val.lessdef (ls1 l) (ls2 l).
-
-Inductive match_stackframes: stackframe -> stackframe -> Prop :=
-  | match_stackframes_intro:
-      forall f sp ls0 bb tls0,
-      locmap_lessdef ls0 tls0 ->
-      match_stackframes
-         (Stackframe f sp ls0 bb)
-         (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)).
-
-Inductive match_states: state -> state -> Prop :=
-  | match_states_intro:
-      forall s f sp pc ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (State s f sp pc ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
-  | match_states_block:
-      forall s f sp bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Block s f sp bb ls m)
-                   (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm)
-  | match_states_interm_branch:
-      forall s f sp pc bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Block s f sp (Lbranch pc :: bb) ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
-  | match_states_interm_cond:
-      forall s f sp cond args pc1 pc2 bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (SAME: branch_target f pc1 = branch_target f pc2),
-      match_states (Block s f sp (Lcond cond args pc1 pc2 :: bb) ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc1) tls tm)
-  | match_states_call:
-      forall s f ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Callstate s f ls m)
-                   (Callstate ts (tunnel_fundef f) tls tm)
-  | match_states_return:
-      forall s ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Returnstate s ls m)
-                   (Returnstate ts tls tm).
-
-(** Properties of [locmap_lessdef] *)
-
-Lemma reglist_lessdef:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl).
-Proof.
-  induction rl; simpl; intros; auto.
-Qed.
-
-Lemma locmap_set_lessdef:
-  forall ls1 ls2 v1 v2 l,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2).
-Proof.
-  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
-- destruct l; auto using Val.load_result_lessdef.
-- destruct (Loc.diff_dec l l'); auto.
-Qed.
-
-Lemma locmap_set_undef_lessdef:
-  forall ls1 ls2 l,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2.
-Proof.
-  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
-- destruct l; auto. destruct ty; auto. 
-- destruct (Loc.diff_dec l l'); auto.
-Qed.
-
-Lemma locmap_undef_regs_lessdef:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2).
-Proof.
-  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_regs_lessdef_1:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2.
-Proof.
-  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
-Qed.
-
-(*
-Lemma locmap_undef_lessdef:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2).
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_lessdef_1:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2.
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto. 
-Qed.
-*)
-
-Lemma locmap_getpair_lessdef:
-  forall p ls1 ls2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
-Proof.
-  intros; destruct p; simpl; auto using Val.longofwords_lessdef.
-Qed.
-
-Lemma locmap_getpairs_lessdef:
-  forall pl ls1 ls2,
-  locmap_lessdef ls1 ls2 ->
-  Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl).
-Proof.
-  intros. induction pl; simpl; auto using locmap_getpair_lessdef.
-Qed.
-
-Lemma locmap_setpair_lessdef:
-  forall p ls1 ls2 v1 v2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2).
-Proof.
-  intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
-Qed.
-
-Lemma locmap_setres_lessdef:
-  forall res ls1 ls2 v1 v2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2).
-Proof.
-  induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
-Qed.
-
-Lemma locmap_undef_caller_save_regs_lessdef:
-  forall ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2).
-Proof.
-  intros; red; intros. unfold undef_caller_save_regs. 
-  destruct l.
-- destruct (Conventions1.is_callee_save r); auto.
-- destruct sl; auto.
-Qed.
-
-Lemma find_function_translated:
-  forall ros ls tls fd,
-  locmap_lessdef ls tls ->
-  find_function ge ros ls = Some fd ->
-  find_function tge ros tls = Some (tunnel_fundef fd).
-Proof.
-  intros. destruct ros; simpl in *.
-- assert (E: tls (R m) = ls (R m)).
-  { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
-    generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
-  rewrite E. apply functions_translated; auto.
-- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
-  apply function_ptr_translated; auto.
-Qed.
-
-Lemma call_regs_lessdef:
-  forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2).
-Proof.
-  intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto.
-Qed.
-
-Lemma return_regs_lessdef:
-  forall caller1 callee1 caller2 callee2,
-  locmap_lessdef caller1 caller2 ->
-  locmap_lessdef callee1 callee2 ->
-  locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2).
-Proof.
-  intros; red; intros. destruct l; simpl.
-- destruct (Conventions1.is_callee_save r); auto.
-- destruct sl; auto.
-Qed. 
-
-(** To preserve non-terminating behaviours, we show that the transformed
-  code cannot take an infinity of "zero transition" cases.
-  We use the following [measure] function over source states,
-  which decreases strictly in the "zero transition" case. *)
-
-Definition measure (st: state) : nat :=
-  match st with
-  | State s f sp pc ls m => (count_gotos f pc * 2)%nat
-  | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
-  | Block s f sp (Lcond _ _ pc1 pc2 :: _) ls m => (Nat.max (count_gotos f pc1) (count_gotos f pc2) * 2 + 1)%nat
-  | Block s f sp bb ls m => 0%nat
-  | Callstate s f ls m => 0%nat
-  | Returnstate s ls m => 0%nat
-  end.
-
-Lemma match_parent_locset:
-  forall s ts,
-  list_forall2 match_stackframes s ts ->
-  locmap_lessdef (parent_locset s) (parent_locset ts).
-Proof.
-  induction 1; simpl.
-- red; auto.
-- inv H; auto.
-Qed.
-
-Lemma tunnel_step_correct:
-  forall st1 t st2, step ge st1 t st2 ->
-  forall st1' (MS: match_states st1 st1'),
-  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
-  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
-Proof.
-  induction 1; intros; try inv MS.
-
-- (* entering a block *)
-  assert (DEFAULT: branch_target f pc = pc ->
-    (exists st2' : state,
-     step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2'
-     /\ match_states (Block s f sp bb rs m) st2')).
-  { intros. rewrite H0. econstructor; split.
-    econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto.
-    econstructor; eauto. }
-
-  generalize (record_gotos_correct f pc). rewrite H.
-  destruct bb; auto. destruct i; auto.
-+ (* Lbranch *)
-  intros [A | [B C]]. auto.
-  right. split. simpl. lia.
-  split. auto.
-  rewrite B. econstructor; eauto.
-+ (* Lcond *)
-  intros [A | (B & C & D & E)]. auto.
-  right. split. simpl. lia.
-  split. auto.
-  rewrite B. econstructor; eauto. congruence.
-
-- (* Lop *)
-  exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
-  intros (tv & EV & LD).
-  left; simpl; econstructor; split.
-  eapply exec_Lop with (v := tv); eauto.
-  rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  exploit Mem.loadv_extends. eauto. eauto. eexact LD. 
-  intros (tv & LOAD & LD').
-  left; simpl; econstructor; split.
-  eapply exec_Lload with (a := ta).
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lgetstack *)
-  left; simpl; econstructor; split.
-  econstructor; eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lsetstack *)
-  left; simpl; econstructor; split.
-  econstructor; eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lstore *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS.  
-  intros (tm' & STORE & MEM').
-  left; simpl; econstructor; split.
-  eapply exec_Lstore with (a := ta).
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto. eauto.
-  econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lcall *)
-  left; simpl; econstructor; split.
-  eapply exec_Lcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto.
-  rewrite sig_preserved. auto.
-  econstructor; eauto.
-  constructor; auto.
-  constructor; auto.
-- (* Ltailcall *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
-  left; simpl; econstructor; split.
-  eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset.
-  apply sig_preserved.
-  econstructor; eauto using return_regs_lessdef, match_parent_locset.
-- (* Lbuiltin *)
-  exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
-  exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
-  left; simpl; econstructor; split.
-  eapply exec_Lbuiltin; eauto.
-  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. 
-  eapply external_call_symbols_preserved. apply senv_preserved. eauto.
-  econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef.
-- (* Lbranch (preserved) *)
-  left; simpl; econstructor; split.
-  eapply exec_Lbranch; eauto.
-  fold (branch_target f pc). econstructor; eauto.
-- (* Lbranch (eliminated) *)
-  right; split. simpl. lia. split. auto. constructor; auto.
-
-- (* Lcond (preserved) *)
-  simpl tunneled_block.
-  set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2).
-  destruct (peq s1 s2).
-+ left; econstructor; split.
-  eapply exec_Lbranch.
-  set (pc := if b then pc1 else pc2).
-  replace s1 with (branch_target f pc) by (unfold pc; destruct b; auto).
-  constructor; eauto using locmap_undef_regs_lessdef_1.
-+ left; econstructor; split.
-  eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
-  destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lcond (eliminated) *)
-  right; split. simpl. destruct b; lia.
-  split. auto.
-  set (pc := if b then pc1 else pc2).
-  replace (branch_target f pc1) with (branch_target f pc) by (unfold pc; destruct b; auto).
-  econstructor; eauto.
-
-- (* Ljumptable *)
-  assert (tls (R arg) = Vint n).
-  { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
-  left; simpl; econstructor; split.
-  eapply exec_Ljumptable.
-  eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto.
-  econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lreturn *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
-  left; simpl; econstructor; split.
-  eapply exec_Lreturn; eauto.
-  constructor; eauto using return_regs_lessdef, match_parent_locset.
-- (* internal function *)
-  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros (tm' & ALLOC & MEM'). 
-  left; simpl; econstructor; split.
-  eapply exec_function_internal; eauto.
-  simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
-- (* external function *)
-  exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
-  intros (tvres & tm' & A & B & C & D).
-  left; simpl; econstructor; split.
-  eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
-- (* return *)
-  inv STK. inv H1.
-  left; econstructor; split.
-  eapply exec_return; eauto.
-  constructor; auto.
-Qed.
-
-Lemma transf_initial_states:
-  forall st1, initial_state prog st1 ->
-  exists st2, initial_state tprog st2 /\ match_states st1 st2.
-Proof.
-  intros. inversion H.
-  exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split.
-  econstructor; eauto.
-  apply (Genv.init_mem_transf TRANSL); auto.
-  rewrite (match_program_main TRANSL).
-  rewrite symbols_preserved. eauto.
-  apply function_ptr_translated; auto.
-  rewrite <- H3. apply sig_preserved.
-  constructor. constructor. red; simpl; auto. apply Mem.extends_refl.
-Qed.
-
-Lemma transf_final_states:
-  forall st1 st2 r,
-  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
-Proof.
-  intros. inv H0. inv H. inv STK.
-  set (p := map_rpair R (Conventions1.loc_result signature_main)) in *.
-  generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD.
-  econstructor; eauto.
-Qed.
-
-Theorem transf_program_correct:
-  forward_simulation (LTL.semantics prog) (LTL.semantics tprog).
-Proof.
-  eapply forward_simulation_opt.
-  apply senv_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  eexact tunnel_step_correct.
-Qed.
-
-End PRESERVATION.
diff --git a/backend/Unusedglob.v b/backend/Unusedglob.v
index 8ac7c4ce..3b8e19ad 100644
--- a/backend/Unusedglob.v
+++ b/backend/Unusedglob.v
@@ -46,14 +46,14 @@ Definition ref_instruction (i: instruction) : list ident :=
   match i with
   | Inop _ => nil
   | Iop op _ _ _ => globals_operation op
-  | Iload _ addr _ _ _ => globals_addressing addr
+  | Iload _ _ addr _ _ _ => globals_addressing addr
   | Istore _ addr _ _ _ => globals_addressing addr
   | Icall _ (inl r) _ _ _ => nil
   | Icall _ (inr id) _ _ _ => id :: nil
   | Itailcall _ (inl r) _ => nil
   | Itailcall _ (inr id) _ => id :: nil
   | Ibuiltin _ args _ _ => globals_of_builtin_args args
-  | Icond cond _ _ _ => nil
+  | Icond cond _ _ _ _ => nil
   | Ijumptable _ _ => nil
   | Ireturn _ => nil
   end.
@@ -126,7 +126,7 @@ Fixpoint filter_globdefs (used: IS.t) (accu defs: list (ident * globdef fundef u
 Definition global_defined (p: program) (pm: prog_map) (id: ident) : bool :=
   match pm!id with Some _ => true | None => ident_eq id (prog_main p) end.
 
-Definition transform_program (p: program) : res program :=
+Definition transf_program (p: program) : res program :=
   let pm := prog_defmap p in
   match used_globals p pm with
   | None => Error (msg "Unusedglob: analysis failed")
diff --git a/backend/Unusedglobproof.v b/backend/Unusedglobproof.v
index 3216ec50..aaacf9d1 100644
--- a/backend/Unusedglobproof.v
+++ b/backend/Unusedglobproof.v
@@ -428,9 +428,9 @@ Qed.
 End TRANSFORMATION.
 
 Theorem transf_program_match:
-  forall p tp, transform_program p = OK tp -> match_prog p tp.
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
 Proof.
-  unfold transform_program; intros p tp TR. set (pm := prog_defmap p) in *.
+  unfold transf_program; intros p tp TR. set (pm := prog_defmap p) in *.
   destruct (used_globals p pm) as [u|] eqn:U; try discriminate.
   destruct (IS.for_all (global_defined p pm) u) eqn:DEF; inv TR.
   exists u; split.
@@ -915,7 +915,7 @@ Proof.
             /\ Val.inject j a ta).
   { apply eval_addressing_inj with (ge1 := ge) (sp1 := Vptr sp0 Ptrofs.zero) (vl1 := rs##args).
     intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
-    apply KEPT. red. exists pc, (Iload chunk addr args dst pc'); auto.
+    apply KEPT. red. exists pc, (Iload trap chunk addr args dst pc'); auto.
     econstructor; eauto.
     apply regs_inject; auto.
     assumption. }
@@ -924,6 +924,36 @@ Proof.
   econstructor; split. eapply exec_Iload; eauto.
   econstructor; eauto. apply set_reg_inject; auto.
 
+- (* load notrap1 *)
+  assert (eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = None).
+  { eapply eval_addressing_inj_none.
+    intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
+    apply KEPT. red. exists pc, (Iload NOTRAP chunk addr args dst pc'); auto.
+    econstructor; eauto.
+    rewrite Ptrofs.add_zero; reflexivity.
+    apply regs_inject; auto.
+    eassumption.
+    assumption. }
+ 
+  econstructor; split. eapply exec_Iload_notrap1; eauto.
+  econstructor; eauto. apply set_reg_inject; auto.
+
+- (* load notrap2 *)
+  assert (A: exists ta,
+               eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = Some ta
+            /\ Val.inject j a ta).
+  { apply eval_addressing_inj with (ge1 := ge) (sp1 := Vptr sp0 Ptrofs.zero) (vl1 := rs##args).
+    intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
+    apply KEPT. red. exists pc, (Iload NOTRAP chunk addr args dst pc'); auto.
+    econstructor; eauto.
+    apply regs_inject; auto.
+    assumption. }
+  destruct A as (ta & B & C).
+  destruct (Mem.loadv chunk tm ta) eqn:Echunk2.
+  + econstructor; split. eapply exec_Iload; eauto.
+    econstructor; eauto. apply set_reg_inject; auto.
+  + econstructor; split. eapply exec_Iload_notrap2; eauto.
+    econstructor; eauto. apply set_reg_inject; auto.
 - (* store *)
   assert (A: exists ta,
                eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = Some ta
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index ebf2c5ea..e20edff7 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -139,9 +139,14 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
   | Some(Iop op args res s) =>
       let a := eval_static_operation op (aregs ae args) in
       VA.State (AE.set res a ae) am
-  | Some(Iload chunk addr args dst s) =>
+  | Some(Iload TRAP chunk addr args dst s) =>
       let a := loadv chunk rm am (eval_static_addressing addr (aregs ae args)) in
       VA.State (AE.set dst a ae) am
+
+  (* TODO: maybe a case analysis on the results of loadv? *)
+               
+  | Some(Iload NOTRAP chunk addr args dst s) =>
+      VA.State (AE.set dst Vtop ae) am
   | Some(Istore chunk addr args src s) =>
       let am' := storev chunk am (eval_static_addressing addr (aregs ae args)) (areg ae src) in
       VA.State ae am'
@@ -151,7 +156,7 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
       VA.Bot
   | Some(Ibuiltin ef args res s) =>
       transfer_builtin ae am rm ef args res
-  | Some(Icond cond args s1 s2) =>
+  | Some(Icond cond args s1 s2 _) =>
       VA.State ae am
   | Some(Ijumptable arg tbl) =>
       VA.State ae am
@@ -1267,11 +1272,27 @@ Proof.
   apply ematch_update; auto. eapply eval_static_operation_sound; eauto with va.
 
 - (* load *)
+  destruct trap.
+  + eapply sound_succ_state; eauto. simpl; auto.
+    unfold transfer; rewrite H. eauto.
+    apply ematch_update; auto. eapply loadv_sound; eauto with va.
+    eapply eval_static_addressing_sound; eauto with va.
+  + eapply sound_succ_state; eauto. simpl; auto.
+    unfold transfer; rewrite H. eauto.
+    apply ematch_update; auto.
+    eapply vmatch_top.
+    eapply loadv_sound; try eassumption.
+    eapply eval_static_addressing_sound; eauto with va.
+- (* load notrap1 *)
   eapply sound_succ_state; eauto. simpl; auto.
   unfold transfer; rewrite H. eauto.
-  apply ematch_update; auto. eapply loadv_sound; eauto with va.
-  eapply eval_static_addressing_sound; eauto with va.
-
+  apply ematch_update; auto.
+  constructor.
+- (* load notrap2 *)
+  eapply sound_succ_state; eauto. simpl; auto.
+  unfold transfer; rewrite H. eauto.
+  apply ematch_update; auto.
+  constructor.
 - (* store *)
   exploit eval_static_addressing_sound; eauto with va. intros VMADDR.
   eapply sound_succ_state; eauto. simpl; auto.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index 01f080ff..5a7cfc12 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -15,6 +15,7 @@ Require Import Zwf Coqlib Maps Zbits Integers Floats Lattice.
 Require Import Compopts AST.
 Require Import Values Memory Globalenvs Builtins Events.
 Require Import Registers RTL.
+Require Import Lia.
 
 (** The abstract domains for value analysis *)
 
@@ -2069,7 +2070,6 @@ Definition divfs := binop_single Float32.div.
 Lemma divfs_sound:
   forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divfs v w) (divfs x y).
 Proof (binop_single_sound Float32.div).
-
 (** Conversions *)
 
 Definition zero_ext (nbits: Z) (v: aval) :=
@@ -2483,6 +2483,468 @@ Proof.
   destruct 1; simpl; auto with va.
 Qed.
 
+
+(* Extensions for KVX and Risc-V *)
+
+Definition intoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_int f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_intu f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_long f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_longu f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_long f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_longu f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma intoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intoffloat v)) (intoffloat_total x).
+Proof.
+  unfold Val.intoffloat, intoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuoffloat v)) (intuoffloat_total x).
+Proof.
+  unfold Val.intoffloat, intoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intofsingle v)) (intofsingle_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuofsingle v)) (intuofsingle_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma singleofint_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleofint v)) (singleofint x).
+Proof.
+  unfold Val.singleofint, singleofint; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma singleofintu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleofintu v)) (singleofintu x).
+Proof.
+  unfold Val.singleofintu, singleofintu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma longoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longoffloat v)) (longoffloat_total x).
+Proof.
+  unfold Val.longoffloat, longoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuoffloat v)) (longuoffloat_total x).
+Proof.
+  unfold Val.longoffloat, longoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longofsingle v)) (longofsingle_total x).
+Proof.
+  unfold Val.longofsingle, longofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuofsingle v)) (longuofsingle_total x).
+Proof.
+  unfold Val.longofsingle, longofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma singleoflong_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleoflong v)) (singleoflong x).
+Proof.
+  unfold Val.singleoflong, singleoflong; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma singleoflongu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleoflongu v)) (singleoflongu x).
+Proof.
+  unfold Val.singleoflongu, singleoflongu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatoflong_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatoflong v)) (floatoflong x).
+Proof.
+  unfold Val.floatoflong, floatoflong; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatoflongu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatoflongu v)) (floatoflongu x).
+Proof.
+  unfold Val.floatoflongu, floatoflongu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatofint_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatofint v)) (floatofint x).
+Proof.
+  unfold Val.floatofint, floatofint; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatofintu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatofintu v)) (floatofintu x).
+Proof.
+  unfold Val.floatofintu, floatofintu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+
+Definition divs_total (v w: aval) := 
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone
+      then ntop
+      else I (Int.divs i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divs_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divs v w)) (divs_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition divu_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+       then ntop
+      else I (Int.divu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divu v w)) (divu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct Int.eq eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct Int.eq eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition mods_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone
+      then ntop
+      else I (Int.mods i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma mods_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.mods v w)) (mods_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition modu_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      then ntop
+      else I (Int.modu i1 i2)
+  | I i2, _ => uns (provenance v) (usize i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modu v w)) (modu_total x y).
+Proof.
+  assert (UNS: forall i j, j <> Int.zero -> is_uns (usize j) (Int.modu i j)).
+  {
+    intros. apply is_uns_mon with (usize (Int.modu i j)).
+    { apply is_uns_usize.
+    }
+    unfold usize, Int.size.
+    apply Zsize_monotone.
+    generalize (Int.unsigned_range_2 j); intros RANGE.
+    assert (Int.unsigned j <> 0).
+    { red; intros; elim H. rewrite <- (Int.repr_unsigned j). rewrite H0. auto. }
+    exploit (Z_mod_lt (Int.unsigned i) (Int.unsigned j)). lia. intros MOD.
+    unfold Int.modu. rewrite Int.unsigned_repr. lia. lia.
+  }
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct Int.eq eqn:E; unfold ntop; cbn; auto with va.
+  }
+  all: try discriminate.
+  all: unfold ntop2; auto with va.
+  all: try (destruct Int.eq eqn:E; cbn; unfold ntop2; auto with va; fail).
+  all: try apply vmatch_uns_undef.
+  
+  all:
+    generalize (Int.eq_spec i0 Int.zero);
+    destruct (Int.eq i0 Int.zero);
+    cbn;
+    intro.
+  all: try apply vmatch_uns_undef.
+  all: apply vmatch_uns; auto.
+Qed.
+
+
+Lemma shrx_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrx v w)) (shrx x y).
+Proof.
+  intros until y. intros HX HY.
+  inv HX; inv HY; cbn.
+  all: unfold ntop1; auto with va.
+  all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va.
+Qed.
+
+
+Definition divls_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone
+      then ntop
+      else L (Int64.divs i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divls_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divls v w)) (divls_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+Definition divlu_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+       then ntop
+      else L (Int64.divu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divlu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divlu v w)) (divlu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct Int64.eq eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+
+Definition modls_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone
+      then ntop
+      else L (Int64.mods i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modls_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modls v w)) (modls_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+
+Definition modlu_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      then ntop
+      else L (Int64.modu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modlu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modlu v w)) (modlu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct Int64.eq eqn:E; cbn; unfold ntop2, ntop; auto with va.
+Qed.
+
+Lemma shrxl_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrxl v w)) (shrxl x y).
+Proof.
+  intros until y. intros HX HY.
+  inv HX; inv HY; cbn.
+  all: unfold ntop1; auto with va.
+  all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va.
+Qed.
+
 (** Comparisons and variation intervals *)
 
 Definition cmp_intv (c: comparison) (i: Z * Z) (n: Z) : abool :=
@@ -4739,6 +5201,26 @@ Global Hint Resolve cnot_sound symbol_address_sound
        longoffloat_sound longuoffloat_sound floatoflong_sound floatoflongu_sound
        longofsingle_sound longuofsingle_sound singleoflong_sound singleoflongu_sound
        longofwords_sound loword_sound hiword_sound
+       intoffloat_total_sound
+       intuoffloat_total_sound
+       intofsingle_total_sound
+       intuofsingle_total_sound
+       singleofint_total_sound
+       singleofintu_total_sound
+       longoffloat_total_sound
+       longuoffloat_total_sound
+       longofsingle_total_sound
+       longuofsingle_total_sound
+       singleoflong_total_sound
+       singleoflongu_total_sound
+       floatoflong_total_sound
+       floatoflongu_total_sound
+       floatofint_total_sound
+       floatofintu_total_sound
+       divu_total_sound divs_total_sound
+       modu_total_sound mods_total_sound shrx_total_sound
+       divlu_total_sound divls_total_sound
+       modlu_total_sound modls_total_sound shrxl_total_sound
        cmpu_bool_sound cmp_bool_sound cmplu_bool_sound cmpl_bool_sound
        cmpf_bool_sound cmpfs_bool_sound
        maskzero_sound : va.
diff --git a/backend/XTL.ml b/backend/XTL.ml
index f10efeed..1d8e89c0 100644
--- a/backend/XTL.ml
+++ b/backend/XTL.ml
@@ -30,13 +30,13 @@ type instruction =
   | Xspill of var * var
   | Xparmove of var list * var list * var * var
   | Xop of operation * var list * var
-  | Xload of memory_chunk * addressing * var list * var
+  | Xload of trapping_mode * memory_chunk * addressing * var list * var
   | Xstore of memory_chunk * addressing * var list * var
   | Xcall of signature * (var, ident) sum * var list * var list
   | Xtailcall of signature * (var, ident) sum * var list
   | Xbuiltin of external_function * var builtin_arg list * var builtin_res
   | Xbranch of node
-  | Xcond of condition * var list * node * node
+  | Xcond of condition * var list * node * node * bool option
   | Xjumptable of var * node list
   | Xreturn of var list
 
@@ -105,7 +105,7 @@ let twin_reg r =
 let rec successors_block = function
   | Xbranch s :: _ -> [s]
   | Xtailcall(sg, vos, args) :: _ -> []
-  | Xcond(cond, args, s1, s2) :: _ -> [s1; s2]
+  | Xcond(cond, args, s1, s2, _) :: _ -> [s1; s2]
   | Xjumptable(arg, tbl) :: _ -> tbl
   | Xreturn  _:: _ -> []
   | instr :: blk -> successors_block blk
@@ -159,7 +159,7 @@ let type_instr = function
       let (targs, tres) = type_of_operation op in
       set_vars_type args targs;
       set_var_type res tres
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       set_vars_type args (type_of_addressing addr);
       set_var_type dst (type_of_chunk chunk)
   | Xstore(chunk, addr, args, src) ->
@@ -179,7 +179,7 @@ let type_instr = function
       type_builtin_res res (proj_sig_res sg)
   | Xbranch s ->
       ()
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       set_vars_type args (type_of_condition cond)
   | Xjumptable(arg, tbl) ->
       set_var_type arg Tint
diff --git a/backend/XTL.mli b/backend/XTL.mli
index 54988d4b..7b7f7186 100644
--- a/backend/XTL.mli
+++ b/backend/XTL.mli
@@ -31,13 +31,13 @@ type instruction =
   | Xspill of var * var
   | Xparmove of var list * var list * var * var
   | Xop of operation * var list * var
-  | Xload of memory_chunk * addressing * var list * var
+  | Xload of trapping_mode * memory_chunk * addressing * var list * var
   | Xstore of memory_chunk * addressing * var list * var
   | Xcall of signature * (var, ident) sum * var list * var list
   | Xtailcall of signature * (var, ident) sum * var list
   | Xbuiltin of external_function * var builtin_arg list * var builtin_res
   | Xbranch of node
-  | Xcond of condition * var list * node * node
+  | Xcond of condition * var list * node * node * bool option
   | Xjumptable of var * node list
   | Xreturn of var list