Merge remote-tracking branch 'origin/mppa-work' into mppa-expect

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-06 22:45:05 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-06 22:45:05 +0200
commit: 5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633 (patch)
tree: 299bdd3c6068f121ca243d8602addcd27d690fd2 /backend
parent: c420bc8d3b87d71c38209b5ab8bca22875466362 (diff)
parent: c6356cdc5f567a317afcb99cb004354cf7dcce0f (diff)
download: compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.tar.gz
compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.zip
75 files changed, 6684 insertions, 490 deletions
diff --git a/backend/Allnontrap.v b/backend/Allnontrap.v
new file mode 100644
index 00000000..acf03eca
--- /dev/null
+++ b/backend/Allnontrap.v
@@ -0,0 +1,26 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+
+Definition transf_ros (ros: reg + ident) : reg + ident := ros.
+
+Definition transf_instr (pc: node) (instr: instruction) :=
+  match instr with
+  | Iload trap chunk addr args dst s => Iload NOTRAP chunk addr args dst s
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map transf_instr f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
diff --git a/backend/Allnontrapproof.v b/backend/Allnontrapproof.v
new file mode 100644
index 00000000..92e5a88c
--- /dev/null
+++ b/backend/Allnontrapproof.v
@@ -0,0 +1,215 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import Allnontrap.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+  | match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  constructor; auto. 
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. constructor; auto. constructor.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Allocation.v b/backend/Allocation.v
index 13e14530..2323c050 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -58,7 +58,7 @@ Inductive block_shape: Type :=
          (mv2: moves) (s: node)
   | BSopdead (op: operation) (args: list reg) (res: reg)
          (mv: moves) (s: node)
-  | BSload (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
+  | BSload (trap : trapping_mode) (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
          (mv1: moves) (args': list mreg) (dst': mreg)
          (mv2: moves) (s: node)
   | BSloaddead (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
@@ -226,15 +226,19 @@ Definition pair_instr_block
       | operation_other _ _ =>
           pair_Iop_block op args res s b
       end
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       let (mv1, b1) := extract_moves nil b in
       match b1 with
-      | Lload chunk' addr' args' dst' :: b2 =>
+      | Lload trap' chunk' addr' args' dst' :: b2 =>
+        assertion (trapping_mode_eq trap' trap);
           if chunk_eq chunk Mint64 && Archi.splitlong then
+            (* TODO: do not support non trapping split loads *)
+            assertion (trapping_mode_eq trap TRAP);
             assertion (chunk_eq chunk' Mint32);
             let (mv2, b3) := extract_moves nil b2 in
             match b3 with
-            | Lload chunk'' addr'' args'' dst'' :: b4 =>
+            | Lload trap'' chunk'' addr'' args'' dst'' :: b4 =>
+              assertion (trapping_mode_eq trap'' TRAP);
                 let (mv3, b5) := extract_moves nil b4 in
                 assertion (chunk_eq chunk'' Mint32);
                 assertion (eq_addressing addr addr');
@@ -254,7 +258,7 @@ Definition pair_instr_block
             assertion (chunk_eq chunk chunk');
             assertion (eq_addressing addr addr');
             assertion (check_succ s b3);
-            Some(BSload chunk addr args dst mv1 args' dst' mv2 s))
+            Some(BSload trap chunk addr args dst mv1 args' dst' mv2 s))
       | _ =>
           assertion (check_succ s b1);
           Some(BSloaddead chunk addr args dst mv1 s)
@@ -310,10 +314,10 @@ Definition pair_instr_block
           Some(BSbuiltin ef args res mv1 args' res' mv2 s)
       | _ => None
       end
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 i =>
       let (mv1, b1) := extract_moves nil b in
       match b1 with
-      | Lcond cond' args' s1' s2' :: b2 =>
+      | Lcond cond' args' s1' s2' i' :: b2 =>
           assertion (eq_condition cond cond');
           assertion (peq s1 s1');
           assertion (peq s2 s2');
@@ -734,11 +738,11 @@ Function add_equations_args (rl: list reg) (tyl: list typ) (ll: list (rpair loc)
 (** [add_equations_res] is similar but is specialized to the case where
   there is only one pseudo-register. *)
 
-Function add_equations_res (r: reg) (oty: option typ) (p: rpair mreg) (e: eqs) : option eqs :=
-  match p, oty with
+Function add_equations_res (r: reg) (ty: typ) (p: rpair mreg) (e: eqs) : option eqs :=
+  match p, ty with
   | One mr, _ =>
       Some (add_equation (Eq Full r (R mr)) e)
-  | Twolong mr1 mr2, Some Tlong =>
+  | Twolong mr1 mr2, Tlong =>
       if Archi.ptr64 then None else
       Some (add_equation (Eq Low r (R mr2)) (add_equation (Eq High r (R mr1)) e))
   | _, _ =>
@@ -1023,7 +1027,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
       track_moves env mv e
-  | BSload chunk addr args dst mv1 args' dst' mv2 s =>
+  | BSload trap chunk addr args dst mv1 args' dst' mv2 s =>
       do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
       track_moves env mv1 e2
@@ -1084,7 +1088,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
   | BStailcall sg ros args mv1 ros' =>
       let args' := loc_arguments sg in
       assertion (tailcall_is_possible sg);
-      assertion (opt_typ_eq sg.(sig_res) f.(RTL.fn_sig).(sig_res));
+      assertion (rettype_eq sg.(sig_res) f.(RTL.fn_sig).(sig_res));
       assertion (ros_compatible_tailcall ros');
       do e1 <- add_equation_ros ros ros' empty_eqs;
       do e2 <- add_equations_args args (sig_args sg) args' e1;
@@ -1114,7 +1118,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
       track_moves env mv empty_eqs
   | BSreturn (Some arg) mv =>
       let arg' := loc_result (RTL.fn_sig f) in
-      do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
+      do e1 <- add_equations_res arg (proj_sig_res (RTL.fn_sig f)) arg' empty_eqs;
       track_moves env mv e1
   end.
 
@@ -1263,7 +1267,7 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BShighlong src dst mv s => s :: nil
   | BSop op args res mv1 args' res' mv2 s => s :: nil
   | BSopdead op args res mv s => s :: nil
-  | BSload chunk addr args dst mv1 args' dst' mv2 s => s :: nil
+  | BSload trap chunk addr args dst mv1 args' dst' mv2 s => s :: nil
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s => s :: nil
   | BSload2_1 addr args dst mv1 args' dst' mv2 s => s :: nil
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s => s :: nil
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 1804f46b..3c7df58a 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -96,44 +96,44 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
       expand_block_shape (BSopdead op args res mv s)
                          (Iop op args res s)
                          (expand_moves mv (Lbranch s :: k))
-  | ebs_load: forall chunk addr args dst mv1 args' dst' mv2 s k,
+  | ebs_load: forall trap chunk addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
-      expand_block_shape (BSload chunk addr args dst mv1 args' dst' mv2 s)
-                         (Iload chunk addr args dst s)
+      expand_block_shape (BSload trap chunk addr args dst mv1 args' dst' mv2 s)
+                         (Iload trap chunk addr args dst s)
                          (expand_moves mv1
-                           (Lload chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
+                           (Lload trap chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2: forall addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s k,
       wf_moves mv1 -> wf_moves mv2 -> wf_moves mv3 ->
       Archi.splitlong = true ->
       offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2 addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr args1' dst1' ::
+                           (Lload TRAP Mint32 addr args1' dst1' ::
                            expand_moves mv2
-                             (Lload Mint32 addr2 args2' dst2' ::
+                             (Lload TRAP Mint32 addr2 args2' dst2' ::
                               expand_moves mv3 (Lbranch s :: k))))
   | ebs_load2_1: forall addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
       Archi.splitlong = true ->
       expand_block_shape (BSload2_1 addr args dst mv1 args' dst' mv2 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr args' dst' ::
+                           (Lload TRAP Mint32 addr args' dst' ::
                             expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2_2: forall addr addr2 args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
       Archi.splitlong = true ->
       offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2_2 addr addr2 args dst mv1 args' dst' mv2 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr2 args' dst' ::
+                           (Lload TRAP Mint32 addr2 args' dst' ::
                             expand_moves mv2 (Lbranch s :: k)))
-  | ebs_load_dead: forall chunk addr args dst mv s k,
+  | ebs_load_dead: forall trap chunk addr args dst mv s k,
       wf_moves mv ->
       expand_block_shape (BSloaddead chunk addr args dst mv s)
-                         (Iload chunk addr args dst s)
+                         (Iload trap chunk addr args dst s)
                          (expand_moves mv (Lbranch s :: k))
   | ebs_store: forall chunk addr args src mv1 args' src' s k,
       wf_moves mv1 ->
@@ -169,11 +169,11 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                          (Ibuiltin ef args res s)
                          (expand_moves mv1
                            (Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k)))
-  | ebs_cond: forall cond args mv args' s1 s2 k,
+  | ebs_cond: forall cond args mv args' s1 s2 k i i',
       wf_moves mv ->
       expand_block_shape (BScond cond args mv args' s1 s2)
-                         (Icond cond args s1 s2)
-                         (expand_moves mv (Lcond cond args' s1 s2 :: k))
+                         (Icond cond args s1 s2 i)
+                         (expand_moves mv (Lcond cond args' s1 s2 i' :: k))
   | ebs_jumptable: forall arg mv arg' tbl k,
       wf_moves mv ->
       expand_block_shape (BSjumptable arg mv arg' tbl)
@@ -1301,10 +1301,10 @@ Proof.
 Qed.
 
 Lemma add_equations_res_lessdef:
-  forall r oty l e e' rs ls,
-  add_equations_res r oty l e = Some e' ->
+  forall r ty l e e' rs ls,
+  add_equations_res r ty l e = Some e' ->
   satisf rs ls e' ->
-  Val.has_type rs#r (match oty with Some ty => ty | None => Tint end) ->
+  Val.has_type rs#r ty ->
   Val.lessdef rs#r (Locmap.getpair (map_rpair R l) ls).
 Proof.
   intros. functional inversion H; simpl.
@@ -1892,7 +1892,7 @@ Qed.
 
 Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signature -> Prop :=
   | match_stackframes_nil: forall sg,
-      sg.(sig_res) = Some Tint ->
+      sg.(sig_res) = Tint ->
       match_stackframes nil nil sg
   | match_stackframes_cons:
       forall res f sp pc rs s tf bb ls ts sg an e env
@@ -1970,8 +1970,8 @@ Ltac UseShape :=
   end.
 
 Remark addressing_not_long:
-  forall env f addr args dst s r,
-  wt_instr f env (Iload Mint64 addr args dst s) -> Archi.splitlong = true ->
+  forall trap env f addr args dst s r,
+  wt_instr f env (Iload trap Mint64 addr args dst s) -> Archi.splitlong = true ->
   In r args -> r <> dst.
 Proof.
   intros. inv H.
@@ -1981,7 +1981,7 @@ Proof.
   { rewrite <- H5. apply in_map; auto. }
   assert (C: env r = Tint).
   { apply A in B. rewrite B. unfold Tptr. rewrite Archi.splitlong_ptr32 by auto. auto. }
-  red; intros; subst r. rewrite C in H8; discriminate.
+  red; intros; subst r. rewrite C in H9; discriminate.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument of the
@@ -2082,8 +2082,8 @@ Proof.
   econstructor; eauto.
   eapply wt_exec_Iop; eauto.
 
-(* load regular *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+(* load regular TRAP *)
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
@@ -2100,7 +2100,7 @@ Proof.
   econstructor; eauto.
 
 (* load pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2155,7 +2155,7 @@ Proof.
   econstructor; eauto.
 
 (* load first word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2185,7 +2185,7 @@ Proof.
   econstructor; eauto.
 
 (* load second word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2229,6 +2229,79 @@ Proof.
   econstructor; eauto.
   eapply wt_exec_Iload; eauto.
 
+- (* load notrap1 *)
+  generalize (wt_exec_Iload_notrap _ _ _ _ _ _ _ _ WTI WTRS).
+  intro WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef_none; eauto. intro Haddr.
+  exploit (exec_moves mv2); eauto.  intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. eapply exec_Lload_notrap1. rewrite <- Haddr.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+
+(* load notrap1 dead *)
+- exploit exec_moves; eauto. intros [ls1 [X Y]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_right. eexact X. econstructor; eauto.
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto.
+  eapply reg_unconstrained_satisf; eauto.
+  intros [enext [U V]].
+  econstructor; eauto.
+  eapply wt_exec_Iload_notrap; eauto.
+
+(* load regular notrap2 *)
+- generalize (wt_exec_Iload_notrap _ _ _ _ _ _ _ _ WTI WTRS).
+  intro WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
+  destruct (Mem.loadv chunk m' a') as [v' |] eqn:Hload.
+  { exploit (exec_moves mv2 env (rs # dst <- Vundef)); eauto.  intros [ls2 [A2 B2]].
+      econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. econstructor. instantiate (1 := a'). rewrite <- F.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto.
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  }
+  { exploit (exec_moves mv2 env (rs # dst <- Vundef)); eauto.  intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. eapply exec_Lload_notrap2. rewrite <- F.
+  apply eval_addressing_preserved. exact symbols_preserved. assumption.
+  eauto.
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  }
+  
+- (* load notrap2 dead *)
+  exploit exec_moves; eauto. intros [ls1 [X Y]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_right. eexact X. econstructor; eauto.
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto.
+  eapply reg_unconstrained_satisf; eauto.
+  intros [enext [U V]].
+  econstructor; eauto.
+  eapply wt_exec_Iload_notrap; eauto.
+  
 (* store *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]].
   exploit add_equations_lessdef; eauto. intros LD. simpl in LD. inv LD.
@@ -2425,13 +2498,13 @@ Proof.
                           (return_regs (parent_locset ts) ls1))
   with (Locmap.getpair (map_rpair R (loc_result (RTL.fn_sig f))) ls1).
   eapply add_equations_res_lessdef; eauto.
-  rewrite H13. apply WTRS.
+  rewrite <- H14. apply WTRS.
   generalize (loc_result_caller_save (RTL.fn_sig f)).
   destruct (loc_result (RTL.fn_sig f)); simpl.
   intros A; rewrite A; auto.
   intros [A B]; rewrite A, B; auto.
   apply return_regs_agree_callee_save.
-  unfold proj_sig_res. rewrite <- H11; rewrite H13. apply WTRS.
+  rewrite <- H11, <- H14. apply WTRS.
 
 (* internal function *)
 - monadInv FUN. simpl in *.
@@ -2463,7 +2536,8 @@ Proof.
   simpl. destruct (loc_result (ef_sig ef)) eqn:RES; simpl.
   rewrite Locmap.gss; auto.
   generalize (loc_result_pair (ef_sig ef)); rewrite RES; intros (A & B & C & D & E).
-  exploit external_call_well_typed; eauto. unfold proj_sig_res; rewrite B. intros WTRES'.
+  assert (WTRES': Val.has_type v' Tlong).
+  { rewrite <- B. eapply external_call_well_typed; eauto. }
   rewrite Locmap.gss. rewrite Locmap.gso by (red; auto). rewrite Locmap.gss.
   rewrite val_longofwords_eq_1 by auto. auto.
   red; intros. rewrite (AG l H0).
diff --git a/backend/Asmexpandaux.ml b/backend/Asmexpandaux.ml
index b1d822db..cc171cae 100644
--- a/backend/Asmexpandaux.ml
+++ b/backend/Asmexpandaux.ml
@@ -100,7 +100,7 @@ let translate_annot sp preg_to_dwarf annot =
   | a::_ -> aux a)
 
 let builtin_nop =
-  let signature ={sig_args = []; sig_res = None; sig_cc = cc_default} in
+  let signature ={sig_args = []; sig_res = Tvoid; sig_cc = cc_default} in
   let name = coqstring_of_camlstring "__builtin_nop" in
   Pbuiltin(EF_builtin(name,signature),[],BR_none)
 
diff --git a/backend/Asmgenproof0.v b/backend/Asmgenproof0.v
index 70c4323c..3638c465 100644
--- a/backend/Asmgenproof0.v
+++ b/backend/Asmgenproof0.v
@@ -897,6 +897,55 @@ Proof.
   apply code_tail_next_int with i; auto.
 Qed.
 
+(** A variant that supports zero steps of execution *)
+
+Inductive exec_straight_opt: code -> regset -> mem -> code -> regset -> mem -> Prop :=
+  | exec_straight_opt_refl: forall c rs m,
+      exec_straight_opt c rs m c rs m
+  | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2,
+      exec_straight c1 rs1 m1 c2 rs2 m2 ->
+      exec_straight_opt c1 rs1 m1 c2 rs2 m2.
+
+Lemma exec_straight_opt_left:
+  forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+  exec_straight c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight_opt c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+  destruct 2; intros. auto. eapply exec_straight_trans; eauto. 
+Qed.
+
+Lemma exec_straight_opt_right:
+  forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+  exec_straight_opt c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+  destruct 1; intros. auto. eapply exec_straight_trans; eauto. 
+Qed.
+
+Lemma exec_straight_opt_step:
+  forall i c rs1 m1 rs2 m2 c' rs3 m3,
+  exec_instr ge fn i rs1 m1 = Next rs2 m2 ->
+  rs2#PC = Val.offset_ptr rs1#PC Ptrofs.one ->
+  exec_straight_opt c rs2 m2 c' rs3 m3 ->
+  exec_straight (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+  intros. inv H1. 
+- apply exec_straight_one; auto.
+- eapply exec_straight_step; eauto.
+Qed.
+
+Lemma exec_straight_opt_step_opt:
+  forall i c rs1 m1 rs2 m2 c' rs3 m3,
+  exec_instr ge fn i rs1 m1 = Next rs2 m2 ->
+  rs2#PC = Val.offset_ptr rs1#PC Ptrofs.one ->
+  exec_straight_opt c rs2 m2 c' rs3 m3 ->
+  exec_straight_opt (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+  intros. apply exec_straight_opt_intro. eapply exec_straight_opt_step; eauto.
+Qed.
+
 End STRAIGHTLINE.
 
 (** * Properties of the Mach call stack *)
diff --git a/backend/Bounds.v b/backend/Bounds.v
index fa695234..b8c12166 100644
--- a/backend/Bounds.v
+++ b/backend/Bounds.v
@@ -67,7 +67,7 @@ Definition instr_within_bounds (i: instruction) :=
   | Lgetstack sl ofs ty r => slot_within_bounds sl ofs ty /\ mreg_within_bounds r
   | Lsetstack r sl ofs ty => slot_within_bounds sl ofs ty
   | Lop op args res => mreg_within_bounds res
-  | Lload chunk addr args dst => mreg_within_bounds dst
+  | Lload trap chunk addr args dst => mreg_within_bounds dst
   | Lcall sig ros => size_arguments sig <= bound_outgoing b
   | Lbuiltin ef args res =>
        (forall r, In r (params_of_builtin_res res) \/ In r (destroyed_by_builtin ef) -> mreg_within_bounds r)
@@ -104,7 +104,7 @@ Definition record_regs_of_instr (u: RegSet.t) (i: instruction) : RegSet.t :=
   | Lgetstack sl ofs ty r => record_reg u r
   | Lsetstack r sl ofs ty => record_reg u r
   | Lop op args res => record_reg u res
-  | Lload chunk addr args dst => record_reg u dst
+  | Lload trap chunk addr args dst => record_reg u dst
   | Lstore chunk addr args src => u
   | Lcall sig ros => u
   | Ltailcall sig ros => u
@@ -280,7 +280,7 @@ Definition defined_by_instr (r': mreg) (i: instruction) :=
   match i with
   | Lgetstack sl ofs ty r => r' = r
   | Lop op args res => r' = res
-  | Lload chunk addr args dst => r' = dst
+  | Lload trap chunk addr args dst => r' = dst
   | Lbuiltin ef args res => In r' (params_of_builtin_res res) \/ In r' (destroyed_by_builtin ef)
   | _ => False
   end.
diff --git a/backend/CSE.v b/backend/CSE.v
index ecfa1f9e..1936d4e4 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -459,8 +459,10 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
           before
       | Iop op args res s =>
           add_op before res op args
-      | Iload chunk addr args dst s =>
-          add_load before dst chunk addr args
+      | Iload TRAP chunk addr args dst s =>
+        add_load before dst chunk addr args
+      | Iload NOTRAP _ _ _ dst _ =>
+        set_unknown before dst
       | Istore chunk addr args src s =>
           let app := approx!!pc in
           let n := kill_loads_after_store app before chunk addr args in
@@ -494,7 +496,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
           | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ =>
               set_res_unknown before res
           end
-      | Icond cond args ifso ifnot =>
+      | Icond cond args ifso ifnot _ =>
           before
       | Ijumptable arg tbl =>
           before
@@ -534,23 +536,23 @@ Definition transf_instr (n: numbering) (instr: instruction) :=
             let (op', args') := reduce _ combine_op n1 op args vl in
             Iop op' args' res s
         end
-  | Iload chunk addr args dst s =>
+  | Iload TRAP chunk addr args dst s =>
       let (n1, vl) := valnum_regs n args in
       match find_rhs n1 (Load chunk addr vl) with
       | Some r =>
           Iop Omove (r :: nil) dst s
       | None =>
           let (addr', args') := reduce _ combine_addr n1 addr args vl in
-          Iload chunk addr' args' dst s
+          Iload TRAP chunk addr' args' dst s
       end
   | Istore chunk addr args src s =>
       let (n1, vl) := valnum_regs n args in
       let (addr', args') := reduce _ combine_addr n1 addr args vl in
       Istore chunk addr' args' src s
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 i =>
       let (n1, vl) := valnum_regs n args in
       let (cond', args') := reduce _ combine_cond n1 cond args vl in
-      Icond cond' args' s1 s2
+      Icond cond' args' s1 s2 i
   | _ =>
       instr
   end.
diff --git a/backend/CSE2.v b/backend/CSE2.v
new file mode 100644
index 00000000..900a7517
--- /dev/null
+++ b/backend/CSE2.v
@@ -0,0 +1,518 @@
+(*
+Replace available expressions by the register containing their value.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+
+(* Static analysis *)
+
+Inductive sym_val : Type :=
+| SMove (src : reg)
+| SOp (op : operation) (args : list reg)
+| SLoad (chunk : memory_chunk) (addr : addressing) (args : list reg).
+                                                   
+Definition eq_args (x y : list reg) : { x = y } + { x <> y } :=
+  list_eq_dec peq x y.
+
+Definition eq_sym_val : forall x y : sym_val,
+    {x = y} + { x <> y }.
+Proof.
+  generalize eq_operation.
+  generalize eq_args.
+  generalize peq.
+  generalize eq_addressing.
+  generalize chunk_eq.
+  decide equality.
+Defined.
+
+Module RELATION.
+  
+Definition t := (PTree.t sym_val).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty sym_val.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition sym_val_beq (x y : sym_val) :=
+  if eq_sym_val x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq sym_val_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct sym_val_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold sym_val_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (eq_sym_val R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if eq_sym_val v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (eq_sym_val _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (eq_sym_val _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
+
+  Parameter t: Type.
+  Parameter eq: t -> t -> Prop.
+  Axiom eq_refl: forall x, eq x x.
+  Axiom eq_sym: forall x y, eq x y -> eq y x.
+  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Parameter beq: t -> t -> bool.
+  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
+  Parameter ge: t -> t -> Prop.
+  Axiom ge_refl: forall x y, eq x y -> ge x y.
+  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Parameter lub: t -> t -> t.
+  Axiom ge_lub_left: forall x y, ge (lub x y) x.
+  Axiom ge_lub_right: forall x y, ge (lub x y) y.
+
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill_sym_val (dst : reg) (sv : sym_val) :=
+  match sv with
+  | SMove src => if peq dst src then true else false
+  | SOp op args => List.existsb (peq dst) args
+  | SLoad chunk addr args => List.existsb (peq dst) args
+  end.
+                                                 
+Definition kill_reg (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => negb (kill_sym_val dst x))
+                (PTree.remove dst rel).
+  
+Definition kill_sym_val_mem (sv: sym_val) :=
+  match sv with
+  | SMove _ => false
+  | SOp op _ => op_depends_on_memory op
+  | SLoad _ _ _ => true
+  end.
+
+Definition kill_sym_val_store chunk addr args (sv: sym_val) :=
+  match sv with
+  | SMove _ => false
+  | SOp op _ => op_depends_on_memory op
+  | SLoad chunk' addr' args' => may_overlap chunk addr args chunk' addr' args'
+  end.
+
+Definition kill_mem (rel : RELATION.t) :=
+  PTree.filter1 (fun x => negb (kill_sym_val_mem x)) rel.
+
+Definition forward_move (rel : RELATION.t) (x : reg) : reg :=
+  match rel ! x with
+  | Some (SMove org) => org
+  | _ => x
+  end.
+
+Definition kill_store1 chunk addr args rel :=
+  PTree.filter1 (fun x => negb (kill_sym_val_store chunk addr args x)) rel.
+
+Definition kill_store chunk addr args rel :=
+  kill_store1 chunk addr (List.map (forward_move rel) args) rel.
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (SMove (forward_move rel src)) (kill_reg dst rel).
+
+Definition find_op_fold op args (already : option reg) x sv :=
+                match already with
+                | Some found => already
+                | None =>
+                  match sv with
+                  | (SOp op' args') =>
+                    if (eq_operation op op') && (eq_args args args')
+                    then Some x
+                    else None
+                  | _ => None
+                  end
+                end.
+
+Definition find_op (rel : RELATION.t) (op : operation) (args : list reg) :=
+  PTree.fold (find_op_fold op args) rel None.
+
+Definition find_load_fold chunk addr args (already : option reg) x sv :=
+                match already with
+                | Some found => already
+                | None =>
+                  match sv with
+                  | (SLoad chunk' addr' args') =>
+                    if (chunk_eq chunk chunk') &&
+                       (eq_addressing addr addr') &&
+                       (eq_args args args')
+                    then Some x
+                    else None
+                  | _ => None
+                  end
+                end.
+
+Definition find_load (rel : RELATION.t) (chunk : memory_chunk) (addr : addressing) (args : list reg) :=
+  PTree.fold (find_load_fold chunk addr args) rel None.
+
+Definition oper2 (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  let rel' := kill_reg dst rel in
+  PTree.set dst (SOp op (List.map (forward_move rel') args)) rel'.
+
+Definition oper1 (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  if List.in_dec peq dst args
+  then kill_reg dst rel
+  else oper2 op dst args rel.
+
+Definition oper (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  match find_op rel op (List.map (forward_move rel) args) with
+  | Some r => move r dst rel
+  | None => oper1 op dst args rel
+  end.
+
+Definition gen_oper (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  match op, args with
+  | Omove, src::nil => move src dst rel
+  | _, _ => oper op dst args rel
+  end.
+
+Definition load2 (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  let rel' := kill_reg dst rel in
+  PTree.set dst (SLoad chunk addr (List.map (forward_move rel') args)) rel'.
+
+Definition load1 (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  if List.in_dec peq dst args
+  then kill_reg dst rel
+  else load2 chunk addr dst args rel.
+
+Definition load (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  match find_load rel chunk addr (List.map (forward_move rel) args) with
+  | Some r => move r dst rel
+  | None => load1 chunk addr dst args rel
+  end.
+
+(* NO LONGER NEEDED
+Fixpoint list_represents { X : Type } (l : list (positive*X)) (tr : PTree.t X) : Prop :=
+  match l with
+  | nil => True
+  | (r,sv)::tail => (tr ! r) = Some sv /\ list_represents tail tr
+  end.
+
+Lemma elements_represent :
+  forall { X : Type },
+  forall tr : (PTree.t X),
+    (list_represents (PTree.elements tr) tr).
+Proof.
+  intros.
+  generalize (PTree.elements_complete tr).
+  generalize (PTree.elements tr).
+  induction l; simpl; trivial.
+  intro COMPLETE.
+  destruct a as [ r sv ].
+  split.
+  {
+    apply COMPLETE.
+    left; reflexivity.
+  }
+  apply IHl; auto.
+Qed.
+*)
+    
+Definition apply_instr instr (rel : RELATION.t) : RB.t :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _ => Some rel
+  | Istore chunk addr args _ _ => Some (kill_store chunk addr args rel)
+  | Iop op args dst _ => Some (gen_oper op dst args rel)
+  | Iload trap chunk addr args dst _ => Some (load chunk addr dst args rel)
+  | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
+  | Ibuiltin _ _ res _ => Some (RELATION.top) (* TODO (kill_builtin_res res x) *)
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition forward_move_b (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => forward_move rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => forward_move_b (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition find_op_in_fmap fmap pc op args :=
+  match fmap with
+  | None => None
+  | Some map =>
+    match PMap.get pc map with
+    | Some rel => find_op rel op args
+    | None => None
+    end
+  end.
+
+Definition find_load_in_fmap fmap pc chunk addr args :=
+  match fmap with
+  | None => None
+  | Some map =>
+    match PMap.get pc map with
+    | Some rel => find_load rel chunk addr args
+    | None => None
+    end
+  end.
+
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    let args' := subst_args fmap pc args in
+    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    | None => Iop op args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Iload trap chunk addr args dst s =>
+    let args' := subst_args fmap pc args in
+    match find_load_in_fmap fmap pc chunk addr args' with
+    | None => Iload trap chunk addr args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) src s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 i =>
+    Icond cond (subst_args fmap pc args) s1 s2 i
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v
new file mode 100644
index 00000000..309ccce1
--- /dev/null
+++ b/backend/CSE2proof.v
@@ -0,0 +1,1715 @@
+(*
+Replace available expressions by the register containing their value.
+
+Proofs.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps CSE2depsproof.
+Require Import Lia.
+
+Lemma args_unaffected:
+  forall rs : regset,
+  forall dst : reg,
+  forall v,
+  forall args : list reg,
+    existsb (fun y : reg => peq dst y) args = false ->
+    (rs # dst <- v ## args) = (rs ## args).
+Proof.
+  induction args; simpl; trivial.
+  destruct (peq dst a) as [EQ | NEQ]; simpl.
+  { discriminate.
+  }
+  intro EXIST.
+  f_equal.
+  {
+    apply Regmap.gso.
+    congruence.
+  }
+  apply IHargs.
+  assumption.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section SAME_MEMORY.
+  Variable m : mem.
+
+Definition sem_sym_val sym rs (v : option val) : Prop :=
+  match sym with
+  | SMove src => v = Some (rs # src)
+  | SOp op args =>
+    v = (eval_operation genv sp op (rs ## args) m)
+  | SLoad chunk addr args =>
+    match eval_addressing genv sp addr rs##args with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => v = Some dat
+                | None => v = None \/ v = Some Vundef
+                end
+    | None   => v = None \/ v = Some Vundef
+    end
+  end.
+    
+Definition sem_reg (rel : RELATION.t) (x : reg) (rs : regset) (v : val) : Prop :=
+  match rel ! x with
+  | None => True
+  | Some sym => sem_sym_val sym rs (Some (rs # x)) 
+  end.
+
+Definition sem_rel (rel : RELATION.t) (rs : regset) :=
+  forall x : reg, (sem_reg rel x rs (rs # x)).
+
+Definition sem_rel_b (relb : RB.t) (rs : regset) :=
+  match relb with
+  | Some rel => sem_rel rel rs
+  | None => False
+  end.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => sem_rel_b (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold sem_rel_b, sem_rel, sem_reg in *.
+  destruct (map # pc).
+  2: contradiction.
+  pose proof (SEM arg) as SEMarg.
+  simpl. unfold forward_move.
+  unfold sem_sym_val in *.
+  destruct (t ! arg); trivial.
+  destruct s; congruence.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_reg_sound :
+  forall rel : RELATION.t,
+  forall dst : reg,
+  forall rs,
+  forall v,
+    sem_rel rel rs ->
+    sem_rel (kill_reg dst rel) (rs # dst <- v).
+Proof.
+  unfold sem_rel, kill_reg, sem_reg, sem_sym_val.
+  intros until v.
+  intros REL x.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec dst x).
+  {
+    subst x.
+    rewrite PTree.grs.
+    trivial.
+  }
+  rewrite PTree.gro by congruence.
+  rewrite Regmap.gso by congruence.
+  destruct (rel ! x) as [relx | ] eqn:RELx; trivial.
+  unfold kill_sym_val.
+  pose proof (REL x) as RELinstx.
+  rewrite RELx in RELinstx.
+  destruct relx eqn:SYMVAL.
+  {
+    destruct (peq dst src); simpl.
+    { reflexivity. }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  { destruct existsb eqn:EXISTS; simpl.
+    { reflexivity. }
+    rewrite args_unaffected by exact EXISTS.
+    assumption.
+  }
+  { destruct existsb eqn:EXISTS; simpl.
+    { reflexivity. }
+    rewrite args_unaffected by exact EXISTS.
+    assumption.
+  }
+Qed.
+
+Lemma write_same:
+  forall rs : regset,
+  forall src dst : reg,
+    (rs # dst <- (rs # src)) # src = rs # src.
+Proof.
+  intros.
+  destruct (peq src dst).
+  {
+    subst dst.
+    apply Regmap.gss.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+
+Lemma move_sound :
+  forall rel : RELATION.t,
+  forall src dst : reg,
+  forall rs,
+    sem_rel rel rs ->
+    sem_rel (move src dst rel) (rs # dst <- (rs # src)).
+Proof.
+  intros until rs. intros REL x.
+  pose proof (kill_reg_sound rel dst rs (rs # src) REL x) as KILL.
+  pose proof (REL src) as RELsrc.
+  unfold move.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    unfold sem_reg in *.
+    simpl.
+    unfold forward_move.
+    destruct (rel ! src) as [ sv |]; simpl.
+    destruct sv eqn:SV; simpl in *.
+    {
+      destruct (peq dst src0).
+      {
+        subst src0.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    all: f_equal; symmetry; apply write_same.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma move_cases_neq:
+  forall dst rel a,
+    a <> dst ->
+    (forward_move (kill_reg dst rel) a) <> dst.
+Proof.
+  intros until a. intro NEQ.
+  unfold kill_reg, forward_move.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! a); simpl.
+  2: congruence.
+  destruct s.
+  {
+    unfold kill_sym_val.
+    destruct peq; simpl; congruence.
+  }
+  all: simpl;
+    destruct negb; simpl; congruence.
+Qed.
+
+Lemma args_replace_dst :
+  forall rel,
+  forall args : list reg,
+  forall dst : reg,
+  forall rs : regset,
+  forall v,
+    (sem_rel rel rs) ->
+    not (In dst args) ->
+    (rs # dst <- v)
+    ## (map
+          (forward_move (kill_reg dst rel)) args) = rs ## args.
+Proof.
+  induction args; simpl.
+  1: reflexivity.
+  intros until v.
+  intros REL NOT_IN.
+  rewrite IHargs by auto.
+  f_equal.
+  pose proof (REL a) as RELa.
+  rewrite Regmap.gso by (apply move_cases_neq; auto).
+  unfold kill_reg.
+  unfold sem_reg in RELa.
+  unfold forward_move.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by auto.
+  destruct (rel ! a); simpl; trivial.
+  destruct s; simpl in *; destruct negb; simpl; congruence.
+Qed.
+
+Lemma oper2_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper2 op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL NOT_IN EVAL x.
+  pose proof (kill_reg_sound rel dst rs v REL x) as KILL.
+  unfold oper2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    symmetry.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma oper1_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper1 op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold oper1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply oper2_sound; auto.
+Qed.
+
+Lemma find_op_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_op rel op args = Some src ->
+    (eval_operation genv sp op (rs ## args) m) = Some (rs # src).
+Proof.
+  intros until rs.
+  unfold find_op.
+  rewrite PTree.fold_spec.
+  intro REL.
+  assert (
+     forall start,
+             match start with
+             | None => True
+             | Some src => eval_operation genv sp op rs ## args m = Some rs # src
+             end -> fold_left
+    (fun (a : option reg) (p : positive * sym_val) =>
+     find_op_fold op args a (fst p) (snd p)) (PTree.elements rel) start =
+                    Some src ->
+             eval_operation genv sp op rs ## args m = Some rs # src) as REC.
+  {
+    unfold sem_rel, sem_reg in REL.
+    generalize (PTree.elements_complete rel).
+    generalize (PTree.elements rel).
+    induction l; simpl.
+    {
+      intros.
+      subst start.
+      assumption.
+    }
+    destruct a as [r sv]; simpl.
+    intros COMPLETE start GEN.
+    apply IHl.
+    {
+      intros.
+      apply COMPLETE.
+      right.
+      assumption.
+    }
+    unfold find_op_fold.
+    destruct start.
+    assumption.
+    destruct sv; trivial.
+    destruct eq_operation; trivial.
+    subst op0.
+    destruct eq_args; trivial.
+    subst args0.
+    simpl.
+    assert ((rel ! r) = Some (SOp op args)) as RELatr.
+    {
+      apply COMPLETE.
+      left.
+      reflexivity.
+    }
+    pose proof (REL r) as RELr.
+    rewrite RELatr in RELr.
+    simpl in RELr.
+    symmetry.
+    assumption.
+  }
+  apply REC; auto.
+Qed.
+
+
+Lemma find_load_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    match eval_addressing genv sp addr rs##args with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end.
+Proof.
+  intros until rs.
+  unfold find_load.
+  rewrite PTree.fold_spec.
+  intro REL.
+  assert (
+     forall start,
+             match start with
+             | None => True
+             | Some src =>
+               match eval_addressing genv sp addr rs##args with
+               | Some a => match Mem.loadv chunk m a with
+                           | Some dat => rs#src = dat
+                           | None => rs#src = Vundef
+                           end
+               | None   => rs#src = Vundef
+               end
+             end ->
+    fold_left
+    (fun (a : option reg) (p : positive * sym_val) =>
+     find_load_fold chunk addr args a (fst p) (snd p)) (PTree.elements rel) start =
+    Some src ->
+    match eval_addressing genv sp addr rs##args with
+               | Some a => match Mem.loadv chunk m a with
+                           | Some dat => rs#src = dat
+                           | None => rs#src = Vundef
+                           end
+               | None   => rs#src = Vundef
+               end) as REC.
+  
+  {
+    unfold sem_rel, sem_reg in REL.
+    generalize (PTree.elements_complete rel).
+    generalize (PTree.elements rel).
+    induction l; simpl.
+    {
+      intros.
+      subst start.
+      assumption.
+    }
+    destruct a as [r sv]; simpl.
+    intros COMPLETE start GEN.
+    apply IHl.
+    {
+      intros.
+      apply COMPLETE.
+      right.
+      assumption.
+    }
+    unfold find_load_fold.
+    destruct start.
+    assumption.
+    destruct sv; trivial.
+    destruct chunk_eq; trivial.
+    subst chunk0.
+    destruct eq_addressing; trivial.
+    subst addr0.
+    destruct eq_args; trivial.
+    subst args0.
+    simpl.
+    assert ((rel ! r) = Some (SLoad chunk addr args)) as RELatr.
+    {
+      apply COMPLETE.
+      left.
+      reflexivity.
+    }
+    pose proof (REL r) as RELr.
+    rewrite RELatr in RELr.
+    simpl in RELr.
+    destruct eval_addressing.
+    { destruct Mem.loadv.
+      congruence.
+      destruct RELr; congruence.
+    }
+    destruct RELr; congruence.
+  }
+  apply REC; auto.
+Qed.
+
+
+Lemma find_load_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    v = rs # src.
+Proof.
+  intros until v. intros REL FINDLOAD ADDR LOAD.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  destruct eval_addressing in *.
+  {
+    replace a with v0 in * by congruence.
+    destruct Mem.loadv in * ; congruence.
+  }
+  discriminate.
+Qed.
+
+Lemma find_load_notrap1_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = None ->
+    rs # src = Vundef.
+Proof.
+  intros until rs. intros REL FINDLOAD ADDR.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  rewrite ADDR in Z.
+  assumption.
+Qed.
+
+Lemma find_load_notrap2_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = Some a ->
+    Mem.loadv chunk m a = None ->
+    rs # src = Vundef.
+Proof.
+  intros until a. intros REL FINDLOAD ADDR LOAD.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  rewrite ADDR in Z.
+  destruct Mem.loadv.
+  discriminate.
+  assumption.
+Qed.
+
+Lemma forward_move_map:
+  forall rel args rs,
+    sem_rel rel rs ->
+    rs ## (map (forward_move rel) args) = rs ## args.
+Proof.
+  induction args; simpl; trivial.
+  intros rs REL.
+  f_equal.
+  2: (apply IHargs; assumption).
+  unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+  pose proof (REL a) as RELa.
+  destruct (rel ! a); trivial.
+  destruct s; congruence.
+Qed.
+
+
+Lemma forward_move_rs:
+  forall rel arg rs,
+    sem_rel rel rs ->
+    rs # (forward_move rel arg) = rs # arg.
+Proof.
+  unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+  intros until rs.
+  intro REL.
+  pose proof (REL arg) as RELarg.
+  destruct (rel ! arg); trivial.
+  destruct s; congruence.
+Qed.
+
+Lemma oper_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold oper.
+  destruct find_op eqn:FIND.
+  {
+    assert (eval_operation genv sp op rs ## (map (forward_move rel) args) m = Some rs # r) as FIND_OP.
+    {
+      apply (find_op_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_OP by assumption.
+    replace v with (rs # r) by congruence.
+    apply move_sound; auto.
+  }
+  apply oper1_sound; trivial.
+Qed.
+
+Lemma gen_oper_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (gen_oper op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold gen_oper.
+  destruct op.
+  { destruct args as [ | h0 t0].
+    apply oper_sound; auto.
+    destruct t0.
+    {
+      simpl in *.
+      replace v with (rs # h0) by congruence.
+      apply move_sound; auto.
+    }
+    apply oper_sound; auto.
+  }
+  all: apply oper_sound; auto.
+Qed.
+
+
+Lemma load2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL NOT_IN ADDR LOAD x.
+  pose proof (kill_reg_sound rel dst rs v REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    destruct eval_addressing.
+    {
+      replace a with v0 in * by congruence.
+      destruct Mem.loadv; congruence.
+    }
+    discriminate.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load2_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL NOT_IN ADDR x.
+  pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    rewrite ADDR.
+    right.
+    trivial.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load2_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL NOT_IN ADDR LOAD x.
+  pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    rewrite ADDR.
+    rewrite LOAD.
+    right; trivial.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_sound with (a := a); auto.
+Qed.
+
+Lemma load1_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_notrap1_sound; auto.
+Qed.
+
+Lemma load1_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_notrap2_sound with (a := a); auto.
+Qed.
+
+Lemma load_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL ADDR LOAD.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    destruct eval_addressing in *.
+    2: discriminate.
+    replace v0 with a in * by congruence.
+    destruct Mem.loadv in *.
+    2: discriminate.
+    replace v with (rs # src) by congruence.
+    apply move_sound; auto.
+  }
+  apply load1_sound with (a := a); trivial.
+Qed.
+
+Lemma load_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL ADDR.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    destruct eval_addressing in *.
+    discriminate.
+    rewrite <- FIND_LOAD.
+    apply move_sound; auto.
+  }
+  apply load1_notrap1_sound; trivial.
+Qed.
+
+Lemma load_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL ADDR.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    rewrite ADDR in FIND_LOAD.
+    destruct Mem.loadv; intro.
+    discriminate.
+    rewrite <- FIND_LOAD.
+    apply move_sound; auto.
+  }
+  apply load1_notrap2_sound; trivial.
+Qed.
+
+Lemma kill_reg_weaken:
+  forall res mpc rs,
+    sem_rel mpc rs ->
+    sem_rel (kill_reg res mpc) rs.
+Proof.
+  intros until rs.
+  intros REL x.
+  pose proof (REL x) as RELx.
+  unfold kill_reg, sem_reg in *.
+  rewrite PTree.gfilter1.
+  destruct (peq res x).
+  { subst x.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence.
+  destruct (mpc ! x) as [sv | ]; trivial.
+  destruct negb; trivial.
+Qed.
+
+Lemma top_ok:
+  forall rs, sem_rel RELATION.top rs.
+Proof.
+  unfold sem_rel, sem_reg, RELATION.top.
+  intros.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma sem_rel_ge:
+  forall r1 r2 : RELATION.t,
+    (RELATION.ge r1 r2) ->
+    forall rs : regset,
+      (sem_rel r2 rs) -> (sem_rel r1 rs).
+Proof.
+  intros r1 r2 GE rs RE x.
+  pose proof (RE x) as REx.
+  pose proof (GE x) as GEx.
+  unfold sem_reg in *.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+End SAME_MEMORY.
+
+Lemma kill_mem_sound :
+  forall m m' : mem,
+  forall rel : RELATION.t,
+  forall rs,
+    sem_rel m rel rs -> sem_rel m' (kill_mem rel) rs.
+Proof.
+  unfold sem_rel, sem_reg.
+  intros until rs.
+  intros SEM x.
+  pose proof (SEM x) as SEMx.
+  unfold kill_mem.
+  rewrite PTree.gfilter1.
+  unfold kill_sym_val_mem.
+  destruct (rel ! x) as [ sv | ].
+  2: reflexivity.
+  destruct sv; simpl in *; trivial.
+  {
+    destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+    rewrite SEMx.
+    apply op_depends_on_memory_correct; auto.
+  }
+Qed.
+  
+Lemma kill_store_sound :
+  forall m m' : mem,
+  forall rel : RELATION.t,
+  forall chunk addr args a v rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (Mem.storev chunk m a v) = Some m' ->
+    sem_rel m rel rs -> sem_rel m' (kill_store chunk addr args rel) rs.
+Proof.
+  unfold sem_rel, sem_reg.
+  intros until rs.
+  intros ADDR STORE SEM x.
+  pose proof (SEM x) as SEMx.
+  unfold kill_store, kill_store1.
+  rewrite PTree.gfilter1.
+  destruct (rel ! x) as [ sv | ].
+  2: reflexivity.
+  destruct sv; simpl in *; trivial.
+  {
+    destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+    rewrite SEMx.
+    apply op_depends_on_memory_correct; auto.
+  }
+  destruct may_overlap eqn:OVERLAP; simpl; trivial.
+  destruct (eval_addressing genv sp addr0 rs ## args0) eqn:ADDR0.
+  {
+    erewrite may_overlap_sound with (args := (map (forward_move rel) args)).
+    all: try eassumption.
+    
+    erewrite forward_move_map by eassumption.
+    assumption.
+  }
+  intuition congruence.
+Qed. 
+End SOUNDNESS.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun cu f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. apply match_transform_program; auto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall (f : function) (pc : node) (i : instruction),
+  (fn_code f)!pc = Some i ->
+  (fn_code (transf_function f))!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill_reg res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Definition sem_rel_b' := sem_rel_b fundef unit ge.
+Definition fmap_sem' := fmap_sem fundef unit ge.
+Definition subst_arg_ok' := subst_arg_ok fundef unit ge.
+Definition subst_args_ok' := subst_args_ok fundef unit ge.
+Definition kill_mem_sound' := kill_mem_sound fundef unit ge.
+Definition kill_store_sound' := kill_store_sound fundef unit ge.
+
+Lemma sem_rel_b_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall sp m,
+    forall rs : regset,
+      (sem_rel_b' sp m rb2 rs) -> (sem_rel_b' sp m rb1 rs).
+Proof.
+  unfold sem_rel_b', sem_rel_b.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ]; simpl;
+      intros GE sp m rs RE; try contradiction.
+  apply sem_rel_ge with (r2 := r2); assumption.
+Qed.
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (forall m : mem,
+     forall vres, (fmap_sem' sp m (forward_map f) pc rs # res <- vres)) ->
+    match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem' sp m (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  unfold transf_instr in *.
+  destruct (if is_trivial_op op then None else find_op_in_fmap (forward_map f) pc op
+               (subst_args (forward_map f) pc args)) eqn:FIND_OP.
+  {
+    destruct (is_trivial_op op).
+    discriminate.
+    unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        rewrite MAP in H0.
+        rewrite find_op_sound with (rel := mpc) (src := r) in H0 by assumption.
+        assumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (gen_oper op res args mpc)).
+    {
+      replace (Some (gen_oper op res args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply gen_oper_sound; auto.
+  }
+  {
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      rewrite (subst_args_ok' sp m) by assumption.
+      rewrite <- H0.
+      apply eval_operation_preserved. exact symbols_preserved.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: constructor.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: contradiction.
+
+    apply sem_rel_b_ge with (rb2 := Some (gen_oper op res args mpc)).
+    {
+      replace (Some (gen_oper op res args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply gen_oper_sound; auto.
+  }
+    
+(* load *)
+- unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        symmetry.
+        rewrite MAP in H0.
+        eapply find_load_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_sound with (a := a); auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_sound with (a := a); assumption.
+  }
+  
+- (* load notrap1 *)
+  unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        rewrite MAP in H0.
+        eapply find_load_notrap1_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_notrap1_sound; auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_notrap1_sound; assumption.
+  }
+  
+(* load notrap2 *)
+- unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        rewrite MAP in H0.
+        eapply find_load_notrap2_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: try eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_notrap2_sound with (a := a); auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_notrap2_sound with (a := a); assumption.
+  }
+  
+- (* store *)
+  econstructor. split.
+  {
+    assert (eval_addressing tge sp addr rs ## args = Some a).
+    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+    eapply exec_Istore; eauto.
+    rewrite (subst_args_ok' sp m); assumption.
+  }
+  
+  constructor; auto.
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (kill_store chunk addr args mpc)); trivial.
+  {
+  replace (Some (kill_store chunk addr args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  rewrite MPC.
+  rewrite H.
+  reflexivity.
+  }
+  eapply (kill_store_sound' sp m); eassumption.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite (subst_args_ok' sp m) by assumption.
+  constructor. constructor; auto.
+
+  constructor.
+  {
+    intros m' vres.
+    unfold fmap_sem', fmap_sem in *.
+    destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+    destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+    apply sem_rel_b_ge with (rb2 := Some (kill_reg res (kill_mem mpc))).
+    {
+      replace (Some (kill_reg res (kill_mem mpc))) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    apply kill_reg_sound.
+    apply (kill_mem_sound' sp m).
+    assumption.
+  }
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite (subst_args_ok' (Vptr stk Ptrofs.zero) m) by assumption.
+  constructor. auto.
+
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply sem_rel_b_ge with (rb2 := Some RELATION.top).
+  {
+    replace (Some RELATION.top) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply top_ok.
+  
+
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite (subst_args_ok' sp m); eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite (subst_arg_ok' sp m); eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite (subst_arg_ok' (Vptr stk Ptrofs.zero) m) by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v
index 9b1243c8..34ec0118 100644
--- a/backend/CSEdomain.v
+++ b/backend/CSEdomain.v
@@ -43,7 +43,7 @@ Definition eq_list_valnum: forall (x y: list valnum), {x=y}+{x<>y} := list_eq_de
 
 Definition eq_rhs (x y: rhs) : {x=y}+{x<>y}.
 Proof.
-  generalize chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
+  generalize trapping_mode_eq chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
   decide equality.
 Defined.
 
@@ -109,7 +109,16 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem):
   | load_eval_to: forall chunk addr vl a v,
       eval_addressing ge sp addr (map valu vl) = Some a ->
       Mem.loadv chunk m a = Some v ->
-      rhs_eval_to valu ge sp m (Load chunk addr vl) v.
+      rhs_eval_to valu ge sp m (Load chunk addr vl) v
+(*  | load_notrap1_eval_to: forall chunk addr vl,
+      eval_addressing ge sp addr (map valu vl) = None ->
+      rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
+                  (default_notrap_load_value chunk)
+  | load_notrap2_eval_to: forall chunk addr vl a,
+      eval_addressing ge sp addr (map valu vl) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
+                  (default_notrap_load_value chunk) *).
 
 Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem):
                                                       equation -> Prop :=
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 03c7ecfc..5bbb7508 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -71,7 +71,11 @@ Lemma rhs_eval_to_exten:
 Proof.
   intros. inv H; simpl in *.
 - constructor. rewrite valnums_val_exten by assumption. auto.
-- econstructor; eauto. rewrite valnums_val_exten by assumption. auto.
+- eapply load_eval_to; eauto. rewrite valnums_val_exten by assumption. auto.
+(*
+- apply load_notrap1_eval_to; auto. rewrite valnums_val_exten by assumption. assumption.
+- eapply load_notrap2_eval_to; eauto. rewrite valnums_val_exten by assumption. assumption.
+*)
 Qed.
 
 Lemma equation_holds_exten:
@@ -393,6 +397,39 @@ Proof.
 + intros. apply Regmap.gso; auto.
 Qed.
 
+(*
+Lemma add_load_holds_none1:
+  forall valu1 ge sp rs m n addr (args: list reg) chunk dst,
+  numbering_holds valu1 ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = None ->
+  exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst chunk addr args).
+Proof.
+  unfold add_load; intros.
+  destruct (valnum_regs n args) as [n1 vl] eqn:VN.
+  exploit valnum_regs_holds; eauto.
+  intros (valu2 & A & B & C & D & E).
+  eapply add_rhs_holds; eauto.
++ rewrite Regmap.gss; auto. eapply load_notrap1_eval_to. rewrite <- B; eauto.
++ intros. apply Regmap.gso; auto.
+Qed.
+
+Lemma add_load_holds_none2:
+  forall valu1 ge sp rs m n addr (args: list reg) a chunk dst,
+  numbering_holds valu1 ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst NOTRAP chunk addr args).
+Proof.
+  unfold add_load; intros.
+  destruct (valnum_regs n args) as [n1 vl] eqn:VN.
+  exploit valnum_regs_holds; eauto.
+  intros (valu2 & A & B & C & D & E).
+  eapply add_rhs_holds; eauto.
++ rewrite Regmap.gss; auto. eapply load_notrap2_eval_to. rewrite <- B; eauto. assumption. 
++ intros. apply Regmap.gso; auto.
+Qed.
+ *)
+
 Lemma set_unknown_holds:
   forall valu ge sp rs m n r v,
   numbering_holds valu ge sp rs m n ->
@@ -456,8 +493,8 @@ Lemma kill_all_loads_hold:
 Proof.
   intros. eapply kill_equations_hold; eauto.
   unfold filter_loads; intros. inv H1.
-  constructor. rewrite <- H2. apply op_depends_on_memory_correct; auto.
-  discriminate.
+  1: constructor; rewrite <- H2; apply op_depends_on_memory_correct; auto.
+  all: discriminate.
 Qed.
 
 Lemma kill_loads_after_store_holds:
@@ -486,6 +523,20 @@ Proof.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
   erewrite <- regs_valnums_sound by eauto. eauto with va.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+(*
+- eapply load_notrap1_eval_to; assumption.
+- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
+  eapply load_notrap2_eval_to; eauto.
+  rewrite <- H9.
+  destruct a; simpl in H1; try discriminate.
+  destruct a0; simpl in H9; try discriminate; simpl; trivial.
+  rewrite negb_false_iff in H6. unfold aaddressing in H6.
+  eapply Mem.load_store_other. eauto.
+  eapply pdisjoint_sound; eauto.
+  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
+  erewrite <- regs_valnums_sound by eauto. eauto with va.
+  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+*)
 Qed.
 
 Lemma store_normalized_range_sound:
@@ -562,6 +613,19 @@ Proof.
   unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
   erewrite <- regs_valnums_sound by eauto. eauto with va.
   auto.
+(*
+- eapply load_notrap1_eval_to; assumption.
+- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
+  eapply load_notrap2_eval_to; eauto. rewrite <- H11.
+  destruct a; simpl in H10; try discriminate; simpl; trivial.
+  rewrite negb_false_iff in H8.
+  eapply Mem.load_storebytes_other. eauto.
+  rewrite H6. rewrite Z2Nat.id by omega.
+  eapply pdisjoint_sound. eauto.
+  unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
+  erewrite <- regs_valnums_sound by eauto. eauto with va.
+  auto.
+*)
 Qed.
 
 Lemma load_memcpy:
@@ -1034,6 +1098,10 @@ Proof.
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
   exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+  destruct trap.
+
+  (* TRAP *)
+  {
   destruct (find_rhs n1 (Load chunk addr vl)) as [r|] eqn:?.
 + (* replaced by move *)
   exploit find_rhs_sound; eauto. intros (v' & EV & LD).
@@ -1063,7 +1131,103 @@ Proof.
   unfold transfer; rewrite H.
   eapply add_load_holds; eauto.
   apply set_reg_lessdef; auto.
+  }
+
+  (* NOTRAP *)
+  {
+    assert (exists a' : val,
+       eval_addressing ge sp addr rs' ## args = Some a' /\ Val.lessdef a a')
+       as Haa'.    
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Haa' as [a' [Ha'1 Ha'2]].
+
+    assert (
+    exists v' : val,
+        Mem.loadv chunk m' a' = Some v' /\ Val.lessdef v v') as Hload' by
+    (apply Mem.loadv_extends with (m1 := m) (addr1 := a); assumption).
+    destruct Hload' as [v' [Hv'1 Hv'2]].
+
+    econstructor. split.
+    eapply exec_Iload; eauto.
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+
+    econstructor; eauto.
+    eapply analysis_correct_1; eauto. simpl; eauto.
+    unfold transfer. rewrite H.
+    exists valu1.
+    apply set_unknown_holds.
+    assumption.
+    apply set_reg_lessdef; assumption.
+  }
+
+- (* Iload notrap 1*)
+  destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
+  destruct SAT as [valu1 NH1].
+  exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+
+    econstructor. split.
+    eapply exec_Iload_notrap1; eauto.
+    rewrite eval_addressing_preserved with (ge1 := ge).
+    apply eval_addressing_lessdef_none with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    exact symbols_preserved.
+
+    econstructor; eauto.
+    eapply analysis_correct_1; eauto. simpl; eauto.
+    unfold transfer. rewrite H.
+    exists valu1.
+    apply set_unknown_holds.
+    assumption.
+    apply set_reg_lessdef.
+    constructor. assumption.
+  
+- (* Iload notrap 2*)
+  destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
+  destruct SAT as [valu1 NH1].
+  exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
 
+    assert (exists a' : val,
+       eval_addressing ge sp addr rs' ## args = Some a' /\ Val.lessdef a a')
+       as Haa'.    
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Haa' as [a' [Ha'1 Ha'2]].
+
+    destruct (Mem.loadv chunk m' a') eqn:Hload'.
+
+    {
+      econstructor. split.
+      eapply exec_Iload; eauto.
+      try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+      
+      econstructor; eauto.
+      eapply analysis_correct_1; eauto. simpl; eauto.
+      unfold transfer. rewrite H.
+      exists valu1.
+      apply set_unknown_holds.
+      assumption.
+      unfold default_notrap_load_value.
+      apply set_reg_lessdef; eauto.
+    }
+    {
+      econstructor. split.
+      eapply exec_Iload_notrap2; eauto.
+      try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+      
+      econstructor; eauto.
+      eapply analysis_correct_1; eauto. simpl; eauto.
+      unfold transfer. rewrite H.
+      exists valu1.
+      apply set_unknown_holds.
+      assumption.
+      apply set_reg_lessdef.
+      constructor. assumption.
+    }
+    
 - (* Istore *)
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
diff --git a/backend/CleanupLabelsproof.v b/backend/CleanupLabelsproof.v
index e92be2b4..84ca403e 100644
--- a/backend/CleanupLabelsproof.v
+++ b/backend/CleanupLabelsproof.v
@@ -255,6 +255,18 @@ Proof.
   left; econstructor; split.
   econstructor; eauto.
   econstructor; eauto with coqlib.
+(* Lload notrap1 *)
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = None).
+    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  left; econstructor; split.
+  eapply exec_Lload_notrap1; eauto.
+  econstructor; eauto with coqlib.
+(* Lload notrap2 *)
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
+    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  left; econstructor; split.
+  eapply exec_Lload_notrap2; eauto.
+  econstructor; eauto with coqlib.
 (* Lstore *)
   assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/backend/Cminor.v b/backend/Cminor.v
index ca01ad50..91a4c104 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -676,12 +676,24 @@ Definition outcome_block (out: outcome) : outcome :=
   | out => out
   end.
 
+(*
 Definition outcome_result_value
-    (out: outcome) (retsig: option typ) (vres: val) : Prop :=
+    (out: outcome) (retsig: rettype) (vres: val) : Prop :=
   match out with
   | Out_normal => vres = Vundef
   | Out_return None => vres = Vundef
-  | Out_return (Some v) => retsig <> None /\ vres = v
+  | Out_return (Some v) => retsig <> Tvoid /\ vres = v
+  | Out_tailcall_return v => vres = v
+  | _ => False
+  end.
+*)
+
+Definition outcome_result_value
+    (out: outcome) (vres: val) : Prop :=
+  match out with
+  | Out_normal => vres = Vundef
+  | Out_return None => vres = Vundef
+  | Out_return (Some v) => vres = v
   | Out_tailcall_return v => vres = v
   | _ => False
   end.
@@ -711,7 +723,7 @@ Inductive eval_funcall:
       Mem.alloc m 0 f.(fn_stackspace) = (m1, sp) ->
       set_locals f.(fn_vars) (set_params vargs f.(fn_params)) = e ->
       exec_stmt f (Vptr sp Ptrofs.zero) e m1 f.(fn_body) t e2 m2 out ->
-      outcome_result_value out f.(fn_sig).(sig_res) vres ->
+      outcome_result_value out vres ->
       outcome_free_mem out m2 sp f.(fn_stackspace) m3 ->
       eval_funcall m (Internal f) vargs t m3 vres
   | eval_funcall_external:
@@ -995,7 +1007,7 @@ Proof.
   subst vres. replace k with (call_cont k') by congruence.
   apply star_one. apply step_return_0; auto.
   (* Out_return Some *)
-  destruct H3. subst vres.
+  subst vres.
   replace k with (call_cont k') by congruence.
   apply star_one. eapply step_return_1; eauto.
   (* Out_tailcall_return *)
diff --git a/backend/Cminortyping.v b/backend/Cminortyping.v
index fccbda27..92ec45f2 100644
--- a/backend/Cminortyping.v
+++ b/backend/Cminortyping.v
@@ -130,7 +130,7 @@ Definition opt_set (e: S.typenv) (optid: option ident) (ty: typ) : res S.typenv
   | Some id => S.set e id ty
   end.
 
-Fixpoint type_stmt (tret: option typ) (e: S.typenv) (s: stmt) : res S.typenv :=
+Fixpoint type_stmt (tret: rettype) (e: S.typenv) (s: stmt) : res S.typenv :=
   match s with
   | Sskip => OK e
   | Sassign id a => type_assign e id a
@@ -141,7 +141,7 @@ Fixpoint type_stmt (tret: option typ) (e: S.typenv) (s: stmt) : res S.typenv :=
       do e2 <- type_exprlist e1 args sg.(sig_args);
       opt_set e2 optid (proj_sig_res sg)
   | Stailcall sg fn args =>
-      assertion (opt_typ_eq sg.(sig_res) tret);
+      assertion (rettype_eq sg.(sig_res) tret);
       do e1 <- type_expr e fn Tptr;
       type_exprlist e1 args sg.(sig_args)
   | Sbuiltin optid ef args =>
@@ -163,10 +163,14 @@ Fixpoint type_stmt (tret: option typ) (e: S.typenv) (s: stmt) : res S.typenv :=
   | Sswitch sz a tbl dfl =>
       type_expr e a (if sz then Tlong else Tint)
   | Sreturn opta =>
-      match opta, tret with
-      | None, _ => OK e
-      | Some a, Some t => type_expr e a t
-      | _, _ => Error (msg "inconsistent return")
+      match opta with
+      | None => OK e
+      | Some a => type_expr e a (proj_rettype tret)
+(*
+          if rettype_eq tret Tvoid
+          then Error (msg "inconsistent return")
+          else type_expr e a (proj_rettype tret)
+*)
       end
   | Slabel lbl s1 =>
       type_stmt tret e s1
@@ -186,7 +190,7 @@ Definition type_function (f: function) : res typenv :=
 Section SPEC.
 
 Variable env: ident -> typ.
-Variable tret: option typ.
+Variable tret: rettype.
 
 Inductive wt_expr: expr -> typ -> Prop :=
   | wt_Evar: forall id,
@@ -205,9 +209,9 @@ Inductive wt_expr: expr -> typ -> Prop :=
       wt_expr a1 Tptr ->
       wt_expr (Eload chunk a1) (type_of_chunk chunk).
 
-Definition wt_opt_assign (optid: option ident) (optty: option typ) : Prop :=
+Definition wt_opt_assign (optid: option ident) (ty: rettype) : Prop :=
   match optid with
-  | Some id => match optty with Some ty => ty | None => Tint end = env id
+  | Some id => proj_rettype ty = env id
   | _ => True
   end.
 
@@ -251,8 +255,8 @@ Inductive wt_stmt: stmt -> Prop :=
       wt_stmt (Sswitch sz a tbl dfl)
   | wt_Sreturn_none:
       wt_stmt (Sreturn None)
-  | wt_Sreturn_some: forall a t,
-      tret = Some t -> wt_expr a t ->
+  | wt_Sreturn_some: forall a,
+      wt_expr a (proj_rettype tret) ->
       wt_stmt (Sreturn (Some a))
   | wt_Slabel: forall lbl s1,
       wt_stmt s1 ->
@@ -393,7 +397,7 @@ Proof.
 - constructor; eauto.
 - constructor.
 - constructor; eauto using type_expr_sound with ty.
-- destruct tret, o; try (monadInv T); econstructor; eauto using type_expr_sound with ty.
+- destruct o; try (monadInv T); econstructor; eauto using type_expr_sound with ty.
 - constructor; eauto.
 - constructor.
 Qed.
@@ -414,9 +418,9 @@ Definition wt_env (env: typenv) (e: Cminor.env) : Prop :=
 Definition def_env (f: function) (e: Cminor.env) : Prop :=
   forall id, In id f.(fn_params) \/ In id f.(fn_vars) -> exists v, e!id = Some v.
 
-Inductive wt_cont_call: cont -> option typ -> Prop :=
+Inductive wt_cont_call: cont -> rettype -> Prop :=
   | wt_cont_Kstop:
-      wt_cont_call Kstop (Some Tint)
+      wt_cont_call Kstop Tint
   | wt_cont_Kcall: forall optid f sp e k tret env
         (WT_FN: wt_function env f)
         (WT_CONT: wt_cont env f.(fn_sig).(sig_res) k)
@@ -425,7 +429,7 @@ Inductive wt_cont_call: cont -> option typ -> Prop :=
         (WT_DEST: wt_opt_assign env optid tret),
       wt_cont_call (Kcall optid f sp e k) tret
 
-with wt_cont: typenv -> option typ -> cont -> Prop :=
+with wt_cont: typenv -> rettype -> cont -> Prop :=
   | wt_cont_Kseq: forall env tret s k,
       wt_stmt env tret s ->
       wt_cont env tret k ->
@@ -451,7 +455,7 @@ Inductive wt_state: state -> Prop :=
         (WT_CONT: wt_cont_call k (funsig f).(sig_res)),
       wt_state (Callstate f args k m)
   | wt_return_state: forall v k m tret
-        (WT_RES: Val.has_type v (match tret with None => Tint | Some t => t end))
+        (WT_RES: Val.has_type v (proj_rettype tret))
         (WT_CONT: wt_cont_call k tret),
       wt_state (Returnstate v k m).
 
@@ -651,9 +655,8 @@ Proof.
   rewrite H8; eapply call_cont_wt; eauto.
 - inv WT_STMT. exploit external_call_well_typed; eauto. intros TRES.
   econstructor; eauto using wt_Sskip.
-  unfold proj_sig_res in TRES; red in H5.
-  destruct optid. rewrite H5 in TRES. apply wt_env_assign; auto. assumption.
-  destruct optid. apply def_env_assign; auto. assumption.
+  destruct optid; auto. apply wt_env_assign; auto. rewrite <- H5; auto.
+  destruct optid; auto. apply def_env_assign; auto.
 - inv WT_STMT. econstructor; eauto. econstructor; eauto.
 - inv WT_STMT. destruct b; econstructor; eauto.
 - inv WT_STMT. econstructor; eauto. econstructor; eauto. constructor; auto.
@@ -664,7 +667,7 @@ Proof.
 - econstructor; eauto using wt_Sexit.
 - inv WT_STMT. econstructor; eauto using call_cont_wt. exact I.
 - inv WT_STMT. econstructor; eauto using call_cont_wt.
-  rewrite H2. eapply wt_eval_expr; eauto.
+  eapply wt_eval_expr; eauto.
 - inv WT_STMT. econstructor; eauto.
 - inversion WT_FN; subst.
   assert (WT_CK: wt_cont env (sig_res (fn_sig f)) (call_cont k)).
@@ -675,7 +678,7 @@ Proof.
   constructor; auto.
   apply wt_env_set_locals. apply wt_env_set_params. rewrite H2; auto.
   red; intros. apply def_set_locals. destruct H4; auto. left; apply def_set_params; auto.
-- exploit external_call_well_typed; eauto. unfold proj_sig_res. simpl in *. intros.
+- exploit external_call_well_typed; eauto. intros.
   econstructor; eauto.
 - inv WT_CONT. econstructor; eauto using wt_Sskip.
   red in WT_DEST.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index 4aab7677..0be9438c 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -69,7 +69,7 @@ Fixpoint successor_rec (n: nat) (f: function) (ae: AE.t) (pc: node) : node :=
       match f.(fn_code)!pc with
       | Some (Inop s) =>
           successor_rec n' f ae s
-      | Some (Icond cond args s1 s2) =>
+      | Some (Icond cond args s1 s2 _) =>
           match resolve_branch (eval_static_condition cond (aregs ae args)) with
           | Some b => successor_rec n' f ae (if b then s1 else s2)
           | None => pc
@@ -181,7 +181,7 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
               let (op', args') := op_strength_reduction op args aargs in
               Iop op' args' res s'
           end
-      | Iload chunk addr args dst s =>
+      | Iload TRAP chunk addr args dst s =>
           let aargs := aregs ae args in
           let a := ValueDomain.loadv chunk rm am (eval_static_addressing addr aargs) in
           match const_for_result a with
@@ -189,7 +189,7 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
               Iop cop nil dst s
           | None =>
               let (addr', args') := addr_strength_reduction addr args aargs in
-              Iload chunk addr' args' dst s
+              Iload TRAP chunk addr' args' dst s
           end
       | Istore chunk addr args src s =>
           let aargs := aregs ae args in
@@ -217,14 +217,14 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
              end
           | _, _ => dfl
           end
-      | Icond cond args s1 s2 =>
+      | Icond cond args s1 s2 i =>
           let aargs := aregs ae args in
           match resolve_branch (eval_static_condition cond aargs) with
           | Some b =>
               if b then Inop s1 else Inop s2
           | None =>
               let (cond', args') := cond_strength_reduction cond args aargs in
-              Icond cond' args' s1 s2
+              Icond cond' args' s1 s2 i
           end
       | Ijumptable arg tbl =>
           match areg ae arg with
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index a5d08a0f..60663503 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -142,8 +142,8 @@ Inductive match_pc (f: function) (rs: regset) (m: mem): nat -> node -> node -> P
       f.(fn_code)!pc = Some (Inop s) ->
       match_pc f rs m n s pcx ->
       match_pc f rs m (S n) pc pcx
-  | match_pc_cond: forall n pc cond args s1 s2 pcx,
-      f.(fn_code)!pc = Some (Icond cond args s1 s2) ->
+  | match_pc_cond: forall n pc cond args s1 s2 pcx i,
+      f.(fn_code)!pc = Some (Icond cond args s1 s2 i) ->
       (forall b,
         eval_condition cond rs##args m = Some b ->
         match_pc f rs m n (if b then s1 else s2) pcx) ->
@@ -406,6 +406,8 @@ Proof.
   assert (VM1: vmatch bc a aa) by (eapply eval_static_addressing_sound; eauto with va).
   set (av := loadv chunk (romem_for cu) am aa).
   assert (VM2: vmatch bc v av) by (eapply loadv_sound; eauto).
+  destruct trap.
+  {
   destruct (const_for_result av) as [cop|] eqn:?; intros.
 + (* constant-propagated *)
   exploit const_for_result_correct; eauto. intros (v' & A & B).
@@ -431,6 +433,59 @@ Proof.
   left; econstructor; econstructor; split.
   eapply exec_Iload; eauto.
   eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+  }
+  {
+    assert (exists v2 : val,
+         eval_addressing ge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) = Some v2 /\ Val.lessdef a v2) as Hexist2.
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Hexist2 as [v2 [Heval2 Hlessdef2]].
+    destruct (Mem.loadv_extends chunk m m' a v2 v MEM H1 Hlessdef2) as [vX [Hvx1 Hvx2]].
+    left; econstructor; econstructor; split.
+    eapply exec_Iload with (a := v2); eauto.
+    try (erewrite eval_addressing_preserved with (ge1:=ge); auto;
+    exact symbols_preserved).
+  eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    
+  }
+
+- (* Iload notrap1 *)
+  rename pc'0 into pc. TransfInstr.
+  assert (eval_addressing tge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) =  None).
+  rewrite eval_addressing_preserved with (ge1 := ge); eauto.
+  apply eval_addressing_lessdef_none with (vl1 := rs ## args).
+  apply regs_lessdef_regs; assumption.
+  assumption.
+  exact symbols_preserved.
+  
+  left; econstructor; econstructor; split.
+  eapply exec_Iload_notrap1; eauto.
+  eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+
+- (* Iload notrap2 *)
+  rename pc'0 into pc. TransfInstr.
+    assert (exists v2 : val,
+         eval_addressing ge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) = Some v2 /\ Val.lessdef a v2) as Hexist2.
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Hexist2 as [a' [Heval' Hlessdef']].
+    destruct (Mem.loadv chunk m' a') eqn:Hload'.
+    {
+    left; econstructor; econstructor; split.
+    eapply exec_Iload; eauto.
+   
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+    eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    }
+    {
+      left; econstructor; econstructor; split.
+      eapply exec_Iload_notrap2; eauto.
+   
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+    eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    }
 
 - (* Istore *)
   rename pc'0 into pc. TransfInstr.
diff --git a/backend/Conventions.v b/backend/Conventions.v
index 6025c6b4..14ffb587 100644
--- a/backend/Conventions.v
+++ b/backend/Conventions.v
@@ -34,6 +34,73 @@ Proof.
   apply IHpl; auto.
 Qed.
 
+(** ** Stack size of function arguments *)
+
+(** [size_arguments s] returns the number of [Outgoing] slots used
+  to call a function with signature [s]. *)
+
+Definition max_outgoing_1 (accu: Z) (l: loc) : Z :=
+  match l with
+  | S Outgoing ofs ty => Z.max accu (ofs + typesize ty)
+  | _ => accu
+  end.
+
+Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z :=
+  match rl with
+  | One l => max_outgoing_1 accu l
+  | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2
+  end.
+
+Definition size_arguments (s: signature) : Z :=
+  List.fold_left max_outgoing_2 (loc_arguments s) 0.
+
+(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
+
+Remark fold_max_outgoing_above:
+  forall l n, fold_left max_outgoing_2 l n >= n.
+Proof.
+  assert (A: forall n l, max_outgoing_1 n l >= n).
+  { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
+  induction l; simpl; intros. 
+  - omega.
+  - eapply Zge_trans. eauto.
+    destruct a; simpl. apply A. eapply Zge_trans; eauto.
+Qed.
+
+Lemma size_arguments_above:
+  forall s, size_arguments s >= 0.
+Proof.
+  intros. apply fold_max_outgoing_above.
+Qed.
+
+Lemma loc_arguments_bounded:
+  forall (s: signature) (ofs: Z) (ty: typ),
+  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
+  ofs + typesize ty <= size_arguments s.
+Proof.
+  intros until ty.
+  assert (A: forall n l, n <= max_outgoing_1 n l).
+  { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
+  assert (B: forall p n,
+             In (S Outgoing ofs ty) (regs_of_rpair p) ->
+             ofs + typesize ty <= max_outgoing_2 n p).
+  { intros. destruct p; simpl in H; intuition; subst; simpl.
+  - xomega.
+  - eapply Z.le_trans. 2: apply A. xomega.
+  - xomega. }
+  assert (C: forall l n,
+             In (S Outgoing ofs ty) (regs_of_rpairs l) ->
+             ofs + typesize ty <= fold_left max_outgoing_2 l n).
+  { induction l; simpl; intros.
+  - contradiction.
+  - rewrite in_app_iff in H. destruct H.
+  + eapply Z.le_trans. eapply B; eauto.
+    apply Z.ge_le. apply fold_max_outgoing_above.
+  + apply IHl; auto.
+  }
+  apply C. 
+Qed.
+
 (** ** Location of function parameters *)
 
 (** A function finds the values of its parameter in the same locations
diff --git a/backend/Deadcode.v b/backend/Deadcode.v
index 2286876e..3412a6fa 100644
--- a/backend/Deadcode.v
+++ b/backend/Deadcode.v
@@ -123,7 +123,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
       if is_dead nres then after
       else if is_int_zero nres then (kill res ne, nm)
       else (add_needs args (needs_of_operation op nres) (kill res ne), nm)
-  | Some (Iload chunk addr args dst s) =>
+  | Some (Iload trap chunk addr args dst s) =>
       let ndst := nreg ne dst in
       if is_dead ndst then after
       else if is_int_zero ndst then (kill dst ne, nm)
@@ -142,7 +142,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
        nmem_dead_stack f.(fn_stacksize))
   | Some(Ibuiltin ef args res s) =>
       transfer_builtin approx!!pc ef args res ne nm
-  | Some(Icond cond args s1 s2) =>
+  | Some(Icond cond args s1 s2 _) =>
       if peq s1 s2 then after else 
         (add_needs args (needs_of_condition cond) ne, nm)
   | Some(Ijumptable arg tbl) =>
@@ -175,7 +175,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
         end
       else
         instr
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       let ndst := nreg (fst an!!pc) dst in
       if is_dead ndst then
         Inop s
@@ -192,7 +192,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
       if nmem_contains (snd an!!pc) (aaddr_arg approx!!pc dst) sz
       then instr
       else Inop s
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 _ =>
       if peq s1 s2 then Inop s1 else instr
   | _ =>
       instr
diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v
index 2edc0395..6919fe78 100644
--- a/backend/Deadcodeproof.v
+++ b/backend/Deadcodeproof.v
@@ -829,6 +829,83 @@ Ltac UseTransfer :=
   apply eagree_update; eauto 2 with na.
   eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
 
+- (* load notrap1 *)
+  TransfInstr; UseTransfer.
+  destruct (is_dead (nreg ne dst)) eqn:DEAD;
+  [idtac|destruct (is_int_zero (nreg ne dst)) eqn:INTZERO];
+  simpl in *.
++ (* dead instruction, turned into a nop *)
+  econstructor; split.
+  eapply exec_Inop; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update_dead; auto with na.
++ (* instruction with needs = [I Int.zero], turned into a load immediate of zero. *)
+  econstructor; split.
+  eapply exec_Iop with (v := Vint Int.zero); eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; auto.
+  rewrite is_int_zero_sound by auto.
+  unfold default_notrap_load_value.
+  constructor.
++ (* preserved *)
+  exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption.
+  intro Hnone'.
+  assert (eval_addressing tge (Vptr sp0 Ptrofs.zero) addr te ## args = None) as Hnone2'.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  assumption.
+  exact symbols_preserved.
+  
+  econstructor; split.
+  eapply exec_Iload_notrap1; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+
+- (* load notrap2 *)
+  TransfInstr; UseTransfer.
+  
+  destruct (is_dead (nreg ne dst)) eqn:DEAD;
+  [idtac|destruct (is_int_zero (nreg ne dst)) eqn:INTZERO];
+  simpl in *.
++ (* dead instruction, turned into a nop *)
+  econstructor; split.
+  eapply exec_Inop; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update_dead; auto with na.
++ (* instruction with needs = [I Int.zero], turned into a load immediate of zero. *)
+  econstructor; split.
+  eapply exec_Iop with (v := Vint Int.zero); eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; auto.
+  rewrite is_int_zero_sound by auto.
+  unfold default_notrap_load_value.
+  constructor.
++ (* preserved *)
+  exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto.
+  intros (ta & U & V).
+  destruct (Mem.loadv chunk tm ta) eqn:Hchunk2.
+  {
+  econstructor; split.
+  eapply exec_Iload. eauto.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  eassumption.
+  exact symbols_preserved.
+  eassumption.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+  }
+  {
+  econstructor; split.
+  eapply exec_Iload_notrap2. eauto.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  eassumption.
+  exact symbols_preserved.
+  eassumption.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+  }
 - (* store *)
   TransfInstr; UseTransfer.
   destruct (nmem_contains nm (aaddressing (vanalyze cu f) # pc addr args)
diff --git a/backend/Debugvar.v b/backend/Debugvar.v
index 1f361030..56908855 100644
--- a/backend/Debugvar.v
+++ b/backend/Debugvar.v
@@ -233,7 +233,7 @@ Definition transfer (lm: labelmap) (before: option avail) (i: instruction):
           (lm, Some (kill (S sl ofs ty) s))
       | Lop op args dst =>
           (lm, Some (kill (R dst) s))
-      | Lload chunk addr args dst =>
+      | Lload trap chunk addr args dst =>
           (lm, Some (kill (R dst) s))
       | Lstore chunk addr args src =>
           (lm, before)
diff --git a/backend/Debugvarproof.v b/backend/Debugvarproof.v
index d31c63ec..95020637 100644
--- a/backend/Debugvarproof.v
+++ b/backend/Debugvarproof.v
@@ -449,6 +449,22 @@ Proof.
   eauto. eauto.
   apply eval_add_delta_ranges. traceEq.
   constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_Lload_notrap1.
+  rewrite <- H; apply eval_addressing_preserved; exact symbols_preserved.
+  eauto. eauto.
+  apply eval_add_delta_ranges. traceEq.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_Lload_notrap2.
+  rewrite <- H; apply eval_addressing_preserved; exact symbols_preserved.
+  eauto. eauto.
+  apply eval_add_delta_ranges. traceEq.
+  constructor; auto.
 - (* store *)
   econstructor; split.
   eapply plus_left.
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
new file mode 100644
index 00000000..af85efe4
--- /dev/null
+++ b/backend/Duplicate.v
@@ -0,0 +1,203 @@
+(** RTL node duplication using external oracle. Used to form superblock
+  structures *)
+
+Require Import AST RTL Maps Globalenvs.
+Require Import Coqlib Errors Op.
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
+
+(** External oracle returning the new RTL code (entry point unchanged),
+    along with the new entrypoint, and a mapping of new nodes to old nodes *)
+Axiom duplicate_aux: function -> code * node * (PTree.t node).
+
+Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux".
+
+(** * Verification of node duplications *)
+
+Definition verify_is_copy dupmap n n' :=
+  match dupmap!n' with
+  | None => Error(msg "verify_is_copy None")
+  | Some revn => match (Pos.compare n revn) with Eq => OK tt | _ => Error(msg "verify_is_copy invalid map") end
+  end.
+
+Fixpoint verify_is_copy_list dupmap ln ln' :=
+  match ln with
+  | n::ln => match ln' with
+             | n'::ln' => do u <- verify_is_copy dupmap n n';
+                          verify_is_copy_list dupmap ln ln'
+             | nil => Error (msg "verify_is_copy_list: ln' bigger than ln") end
+  | nil => match ln' with
+          | n :: ln' => Error (msg "verify_is_copy_list: ln bigger than ln'")
+          | nil => OK tt end
+  end.
+
+Definition verify_mapping_entrypoint dupmap (f f': function): res unit :=
+  verify_is_copy dupmap (fn_entrypoint f) (fn_entrypoint f').
+
+Lemma product_eq {A B: Type} :
+  (forall (a b: A), {a=b} + {a<>b}) ->
+  (forall (c d: B), {c=d} + {c<>d}) ->
+  forall (x y: A+B), {x=y} + {x<>y}.
+Proof.
+  intros H H'. intros. decide equality.
+Qed.
+
+(** FIXME Ideally i would like to put this in AST.v but i get an "illegal application"
+ * error when doing so *)
+Remark builtin_arg_eq_pos: forall (a b: builtin_arg positive), {a=b} + {a<>b}.
+Proof.
+  intros.
+  apply (builtin_arg_eq Pos.eq_dec).
+Defined.
+Global Opaque builtin_arg_eq_pos.
+
+Remark builtin_res_eq_pos: forall (a b: builtin_res positive), {a=b} + {a<>b}.
+Proof. intros. apply (builtin_res_eq Pos.eq_dec). Qed.
+Global Opaque builtin_res_eq_pos.
+
+Definition verify_match_inst dupmap inst tinst :=
+  match inst with
+  | Inop n => match tinst with Inop n' => do u <- verify_is_copy dupmap n n'; OK tt | _ => Error(msg "verify_match_inst Inop") end
+
+  | Iop op lr r n => match tinst with
+      Iop op' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (eq_operation op op') then
+            if (list_eq_dec Pos.eq_dec lr lr') then
+              if (Pos.eq_dec r r') then
+                OK tt
+              else Error (msg "Different r in Iop")
+            else Error (msg "Different lr in Iop")
+          else Error(msg "Different operations in Iop")
+      | _ => Error(msg "verify_match_inst Inop") end
+
+  | Iload tm m a lr r n => match tinst with
+      | Iload tm' m' a' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (trapping_mode_eq tm tm') then
+            if (chunk_eq m m') then
+              if (eq_addressing a a') then
+                if (list_eq_dec Pos.eq_dec lr lr') then
+                  if (Pos.eq_dec r r') then OK tt
+                  else Error (msg "Different r in Iload")
+                else Error (msg "Different lr in Iload")
+              else Error (msg "Different addressing in Iload")
+            else Error (msg "Different mchunk in Iload")
+          else Error (msg "Different trapping_mode in Iload")
+      | _ => Error (msg "verify_match_inst Iload") end
+
+  | Istore m a lr r n => match tinst with
+      | Istore m' a' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (chunk_eq m m') then
+            if (eq_addressing a a') then
+              if (list_eq_dec Pos.eq_dec lr lr') then
+                if (Pos.eq_dec r r') then OK tt
+                else Error (msg "Different r in Istore")
+              else Error (msg "Different lr in Istore")
+            else Error (msg "Different addressing in Istore")
+          else Error (msg "Different mchunk in Istore")
+      | _ => Error (msg "verify_match_inst Istore") end
+
+  | Icall s ri lr r n => match tinst with
+      | Icall s' ri' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (signature_eq s s') then
+            if (product_eq Pos.eq_dec ident_eq ri ri') then
+              if (list_eq_dec Pos.eq_dec lr lr') then
+                if (Pos.eq_dec r r') then OK tt
+                else Error (msg "Different r r' in Icall")
+              else Error (msg "Different lr in Icall")
+            else Error (msg "Different ri in Icall")
+          else Error (msg "Different signatures in Icall")
+      | _ => Error (msg "verify_match_inst Icall") end
+
+  | Itailcall s ri lr => match tinst with
+      | Itailcall s' ri' lr' =>
+          if (signature_eq s s') then
+            if (product_eq Pos.eq_dec ident_eq ri ri') then
+              if (list_eq_dec Pos.eq_dec lr lr') then OK tt
+              else Error (msg "Different lr in Itailcall")
+            else Error (msg "Different ri in Itailcall")
+          else Error (msg "Different signatures in Itailcall")
+      | _ => Error (msg "verify_match_inst Itailcall") end
+
+  | Ibuiltin ef lbar brr n => match tinst with
+      | Ibuiltin ef' lbar' brr' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (external_function_eq ef ef') then
+            if (list_eq_dec builtin_arg_eq_pos lbar lbar') then
+              if (builtin_res_eq_pos brr brr') then OK tt
+              else Error (msg "Different brr in Ibuiltin")
+            else Error (msg "Different lbar in Ibuiltin")
+          else Error (msg "Different ef in Ibuiltin")
+      | _ => Error (msg "verify_match_inst Ibuiltin") end
+
+  | Icond cond lr n1 n2 i => match tinst with
+      | Icond cond' lr' n1' n2' i' =>
+          if (list_eq_dec Pos.eq_dec lr lr') then
+            if (eq_condition cond cond') then
+              do u1 <- verify_is_copy dupmap n1 n1';
+              do u2 <- verify_is_copy dupmap n2 n2'; OK tt
+            else if (eq_condition (negate_condition cond) cond') then
+              do u1 <- verify_is_copy dupmap n1 n2';
+              do u2 <- verify_is_copy dupmap n2 n1'; OK tt
+            else Error (msg "Incompatible conditions in Icond")
+          else Error (msg "Different lr in Icond")
+      | _ => Error (msg "verify_match_inst Icond") end
+
+  | Ijumptable r ln => match tinst with
+      | Ijumptable r' ln' =>
+          do u <- verify_is_copy_list dupmap ln ln';
+          if (Pos.eq_dec r r') then OK tt
+          else Error (msg "Different r in Ijumptable")
+      | _ => Error (msg "verify_match_inst Ijumptable") end
+
+  | Ireturn or => match tinst with
+      | Ireturn or' =>
+          if (option_eq Pos.eq_dec or or') then OK tt
+          else Error (msg "Different or in Ireturn")
+      | _ => Error (msg "verify_match_inst Ireturn") end
+  end.
+
+Definition verify_mapping_mn dupmap f f' (m: positive*positive) :=
+  let (tn, n) := m in
+  match (fn_code f)!n with
+  | None => Error (msg "verify_mapping_mn: Could not get an instruction at (fn_code f)!n")
+  | Some inst => match (fn_code f')!tn with
+                 | None => Error (msg "verify_mapping_mn: Could not get an instruction at (fn_code xf)!tn")
+                 | Some tinst => verify_match_inst dupmap inst tinst
+                 end
+  end.
+
+Fixpoint verify_mapping_mn_rec dupmap f f' lm :=
+  match lm with
+  | nil => OK tt
+  | m :: lm => do u <- verify_mapping_mn dupmap f f' m;
+               do u2 <- verify_mapping_mn_rec dupmap f f' lm;
+               OK tt
+  end.
+
+Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
+  verify_mapping_mn_rec dupmap f f' (PTree.elements dupmap).
+
+(** Verifies that the [dupmap] of the translated function [f'] is giving correct information in regards to [f] *)
+Definition verify_mapping dupmap (f f': function) : res unit :=
+  do u <- verify_mapping_entrypoint dupmap f f';
+  do v <- verify_mapping_match_nodes dupmap f f'; OK tt.
+
+(** * Entry points *)
+
+Definition transf_function (f: function) : res function :=
+  let (tcte, dupmap) := duplicate_aux f in
+  let (tc, te) := tcte in
+  let f' := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+  do u <- verify_mapping dupmap f f';
+  OK f'.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef transf_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
new file mode 100644
index 00000000..98e2f325
--- /dev/null
+++ b/backend/Duplicateaux.ml
@@ -0,0 +1,662 @@
+open RTL
+open Maps
+open Camlcoq
+
+let get_some = function
+| None -> failwith "Did not get some"
+| Some thing -> thing
+
+let bfs code entrypoint = begin
+  Printf.printf "bfs\n"; flush stdout;
+  let visited = ref (PTree.map (fun n i -> false) code)
+  and bfs_list = ref []
+  and to_visit = Queue.create ()
+  and node = ref entrypoint
+  in begin
+    Queue.add entrypoint to_visit;
+    while not (Queue.is_empty to_visit) do
+      node := Queue.pop to_visit;
+      if not (get_some @@ PTree.get !node !visited) then begin
+        visited := PTree.set !node true !visited;
+        match PTree.get !node code with
+        | None -> failwith "No such node"
+        | Some i ->
+            bfs_list := !node :: !bfs_list;
+            match i with
+            | Icall(_, _, _, _, n) -> Queue.add n to_visit
+            | Ibuiltin(_, _, _, n) -> Queue.add n to_visit
+            | Ijumptable(_, ln) -> List.iter (fun n -> Queue.add n to_visit) ln
+            | Itailcall _ | Ireturn _ -> ()
+            | Icond (_, _, n1, n2, _) -> Queue.add n1 to_visit; Queue.add n2 to_visit
+            | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> Queue.add n to_visit
+      end
+    done;
+    List.rev !bfs_list
+  end
+end
+
+let optbool o = match o with Some _ -> true | None -> false
+
+let ptree_get_some n ptree = get_some @@ PTree.get n ptree
+
+let get_predecessors_rtl code = begin
+  Printf.printf "get_predecessors_rtl\n"; flush stdout;
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, i) =
+    let succ = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
+      | Icond (_,_,n1,n2,_) -> [n1;n2]
+      | Ijumptable (_,ln) -> ln
+      | Itailcall _ | Ireturn _ -> []
+    in List.iter (fun s ->
+      let previous_preds = ptree_get_some s !preds in
+      if optbool @@ List.find_opt (fun e -> e == node) previous_preds then ()
+      else preds := PTree.set s (node::previous_preds) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+end
+
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+let print_intlist l =
+  let rec f = function
+  | [] -> ()
+  | n::ln -> (Printf.printf "%d " (P.to_int n); f ln)
+  in begin
+    Printf.printf "[";
+    f l;
+    Printf.printf "]"
+  end
+
+let print_intset s =
+  let seq = PSet.to_seq s
+  in begin
+    Printf.printf "{";
+    Seq.iter (fun n ->
+      Printf.printf "%d " (P.to_int n)
+    ) seq;
+    Printf.printf "}"
+  end
+
+type vstate = Unvisited | Processed | Visited
+
+(** Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+let get_loop_headers code entrypoint = begin
+  Printf.printf "get_loop_headers\n"; flush stdout;
+  let visited = ref (PTree.map (fun n i -> Unvisited) code)
+  and is_loop_header = ref (PTree.map (fun n i -> false) code)
+  in let rec dfs_visit code = function
+  | [] -> ()
+  | node :: ln ->
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> ()
+      | Processed -> begin
+          Printf.printf "Node %d is a loop header\n" (P.to_int node);
+          is_loop_header := PTree.set node true !is_loop_header;
+          visited := PTree.set node Visited !visited
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          match PTree.get node code with
+          | None -> failwith "No such node"
+          | Some i -> let next_visits = (match i with
+            | Icall (_, _, _, _, n) | Ibuiltin (_, _, _, n) | Inop n | Iop (_, _, _, n)
+            | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> [n]
+            | Icond (_, _, n1, n2, _) -> [n1; n2]
+            | Itailcall _ | Ireturn _ -> []
+            | Ijumptable (_, ln) -> ln
+            ) in dfs_visit code next_visits;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code ln
+        end
+  in begin
+    dfs_visit code [entrypoint];
+    !is_loop_header
+  end
+end
+
+let ptree_printbool pt =
+  let elements = PTree.elements pt
+  in begin
+    Printf.printf "[";
+    List.iter (fun (n, b) ->
+      if b then Printf.printf "%d, " (P.to_int n) else ()
+    ) elements;
+    Printf.printf "]"
+  end
+
+(* Looks ahead (until a branch) to see if a node further down verifies
+ * the given predicate *)
+let rec look_ahead code node is_loop_header predicate =
+  if (predicate node) then true
+  else match (get_some @@ PTree.get node code) with
+    | Ireturn _ | Itailcall _ | Icond _ | Ijumptable _ -> false
+    | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n)
+    | Istore (_, _, _, _, n) | Icall (_, _, _, _, n)
+    | Ibuiltin (_, _, _, n) ->
+      if (predicate n) then true
+      else (
+        if (get_some @@ PTree.get n is_loop_header) then false
+        else look_ahead code n is_loop_header predicate
+      )
+
+let do_call_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tCall heuristic..\n";
+    let predicate n = (function
+    | Icall _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_call = look_ahead code ifso is_loop_header predicate
+    in let ifnot_call = look_ahead code ifnot is_loop_header predicate
+    in if ifso_call && ifnot_call then None
+    else if ifso_call then Some false
+    else if ifnot_call then Some true
+    else None
+  end
+
+let do_opcode_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tOpcode heuristic..\n";
+    DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header
+  end
+
+let do_return_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tReturn heuristic..\n";
+    let predicate n = (function
+    | Ireturn _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_return = look_ahead code ifso is_loop_header predicate
+    in let ifnot_return = look_ahead code ifnot is_loop_header predicate
+    in if ifso_return && ifnot_return then None
+    else if ifso_return then Some false
+    else if ifnot_return then Some true
+    else None
+  end
+
+let do_store_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tStore heuristic..\n";
+    let predicate n = (function
+    | Istore _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_store = look_ahead code ifso is_loop_header predicate
+    in let ifnot_store = look_ahead code ifnot is_loop_header predicate
+    in if ifso_store && ifnot_store then None
+    else if ifso_store then Some false
+    else if ifnot_store then Some true
+    else None
+  end
+
+let do_loop_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tLoop heuristic..\n";
+    let predicate n = get_some @@ PTree.get n is_loop_header in
+    let ifso_loop = look_ahead code ifso is_loop_header predicate in
+    let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
+    if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *)
+    else if ifso_loop then Some true
+    else if ifnot_loop then Some false
+    else None
+  end
+
+let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tLoop2 heuristic..\n";
+    match get_some @@ PTree.get n loop_info with
+    | None -> None
+    | Some b -> Some b
+  end
+
+(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
+(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
+let get_loop_info is_loop_header bfs_order code =
+  let loop_info = ref (PTree.map (fun n i -> None) code) in
+  let mark_path s n =
+    let visited = ref (PTree.map (fun n i -> false) code) in
+    let rec explore src dest =
+      if (get_some @@ PTree.get src !visited) then false
+      else if src == dest then true
+      else begin
+        visited := PTree.set src true !visited;
+        match get_some @@ PTree.get src code with
+        | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+        | Ibuiltin (_,_,_,s) -> explore s dest
+        | Icond (_,_,s1,s2,_) -> (explore s1 dest) || (explore s2 dest)
+        | Ijumptable _ | Itailcall _ | Ireturn _ -> false
+      end
+    in let rec advance_to_cb src =
+      if (get_some @@ PTree.get src !visited) then None
+      else begin
+        visited := PTree.set src true !visited;
+        match get_some @@ PTree.get src code with
+        | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+        | Ibuiltin (_,_,_,s) -> advance_to_cb s
+        | Icond _ -> Some src
+        | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+      end
+    in begin
+      Printf.printf "Marking path from %d to %d\n" (P.to_int n) (P.to_int s);
+      match advance_to_cb s with
+      | None -> (Printf.printf "Nothing found\n")
+      | Some s -> ( Printf.printf "Advancing to %d\n" (P.to_int s);
+          match get_some @@ PTree.get s !loop_info with
+          | None | Some _ -> begin
+              match get_some @@ PTree.get s code with
+              | Icond (_, _, n1, n2, _) ->
+                  let b1 = explore n1 n in
+                  let b2 = explore n2 n in
+                  if (b1 && b2) then (Printf.printf "both true\n")
+                  else if b1 then (Printf.printf "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info)
+                  else if b2 then (Printf.printf "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info)
+                  else (Printf.printf "none true\n")
+              | _ -> ( Printf.printf "not an icond\n" )
+            end
+          (* | Some _ -> ( Printf.printf "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
+        )
+    end
+  in begin
+    List.iter (fun n ->
+      match get_some @@ PTree.get n code with
+      | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+      | Ibuiltin (_, _, _, s) ->
+          if get_some @@ PTree.get s is_loop_header then mark_path s n
+      | Icond _ -> () (* loop backedges are never Icond in CompCert *)
+      | Ijumptable _ -> ()
+      | Itailcall _ | Ireturn _ -> ()
+    ) bfs_order;
+    !loop_info
+  end
+
+  (* Remark - compared to the original paper, we don't use the store heuristic *)
+let get_directions code entrypoint = begin
+  Printf.printf "get_directions\n"; flush stdout;
+  let bfs_order = bfs code entrypoint in
+  let is_loop_header = get_loop_headers code entrypoint in
+  let loop_info = get_loop_info is_loop_header bfs_order code in
+  let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
+  begin
+    (* ptree_printbool is_loop_header; *)
+    (* Printf.printf "\n"; *)
+    List.iter (fun n ->
+      match (get_some @@ PTree.get n code) with
+      | Icond (cond, lr, ifso, ifnot, _) ->
+          (* Printf.printf "Analyzing %d.." (P.to_int n); *)
+          let heuristics = [ do_opcode_heuristic;
+            do_return_heuristic; do_loop2_heuristic loop_info n; do_loop_heuristic; do_call_heuristic;
+             (* do_store_heuristic *) ] in
+          let preferred = ref None in
+          begin
+            Printf.printf "Deciding condition for RTL node %d\n" (P.to_int n);
+            List.iter (fun do_heur ->
+              match !preferred with
+              | None -> preferred := do_heur code cond ifso ifnot is_loop_header
+              | Some _ -> ()
+            ) heuristics;
+            directions := PTree.set n !preferred !directions;
+            (match !preferred with | Some false -> Printf.printf "\tFALLTHROUGH\n"
+                                   | Some true -> Printf.printf "\tBRANCH\n"
+                                   | None -> Printf.printf "\tUNSURE\n");
+            Printf.printf "---------------------------------------\n"
+          end
+      | _ -> ()
+    ) bfs_order;
+    !directions
+  end
+end
+
+let update_direction direction = function
+| Icond (cond, lr, n, n', _) -> Icond (cond, lr, n, n', direction)
+| i -> i
+
+let rec update_direction_rec directions = function
+| [] -> PTree.empty
+| m::lm -> let (n, i) = m
+    in let direction = get_some @@ PTree.get n directions
+    in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
+
+(* Uses branch prediction to write prediction annotations in Icond *)
+let update_directions code entrypoint = begin
+  Printf.printf "Update_directions\n"; flush stdout;
+  let directions = get_directions code entrypoint
+  in begin
+    (* Printf.printf "Ifso directions: ";
+    ptree_printbool directions;
+    Printf.printf "\n"; *)
+    update_direction_rec directions (PTree.elements code)
+  end
+end
+
+(** Trace selection *)
+
+let rec exists_false_rec = function
+  | [] -> false
+  | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true
+
+let exists_false boolmap = exists_false_rec (PTree.elements boolmap)
+
+(* DFS using prediction info to guide the exploration *)
+let dfs code entrypoint = begin
+  Printf.printf "dfs\n"; flush stdout;
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let rec dfs_list code = function
+  | [] -> []
+  | node :: ln ->
+      if get_some @@ PTree.get node !visited then dfs_list code ln
+      else begin
+        visited := PTree.set node true !visited;
+        let next_nodes = (match get_some @@ PTree.get node code with
+        | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+        | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n]
+        | Ijumptable (_, ln) -> ln
+        | Itailcall _ | Ireturn _ -> []
+        | Icond (_, _, n1, n2, info) -> (match info with
+          | Some false -> [n2; n1]
+          | _ -> [n1; n2]
+          )
+        ) in node :: dfs_list code (next_nodes @ ln)
+      end
+  in dfs_list code [entrypoint]
+end
+
+(*
+let get_predecessors_ttl code =
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, ti) = match ti with
+  | Tleaf _ -> ()
+  | Tnext (_, i) -> let succ = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
+      | Icond (_,_,n1,n2,_) -> [n1;n2]
+      | Ijumptable (_,ln) -> ln
+      | _ -> []
+      in List.iter (fun s -> preds := PTree.set s (node::(get_some @@ PTree.get s !preds)) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+*)
+
+let rec select_unvisited_node is_visited = function
+| [] -> failwith "Empty list"
+| n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln
+
+let best_successor_of node code is_visited =
+  match (PTree.get node code) with
+  | None -> failwith "No such node in the code"
+  | Some i ->
+      let next_node = match i with
+      | Inop n -> Some n
+      | Iop (_, _, _, n) -> Some n
+      | Iload (_, _, _, _, _, n) -> Some n
+      | Istore (_, _, _, _, n) -> Some n
+      | Icall (_, _, _, _, n) -> Some n
+      | Ibuiltin (_, _, _, n) -> Some n
+      | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1)
+      | _ -> None
+      in match next_node with
+      | None -> None
+      | Some n -> if not (ptree_get_some n is_visited) then Some n else None
+
+(* FIXME - could be improved by selecting in priority the predicted paths *)
+let best_predecessor_of node predecessors code order is_visited =
+  match (PTree.get node predecessors) with
+  | None -> failwith "No predecessor list found"
+  | Some lp ->
+      try Some (List.find (fun n ->
+          if (List.mem n lp) && (not (ptree_get_some n is_visited)) then
+            match ptree_get_some n code with
+            | Icond (_, _, n1, n2, ob) -> (match ob with
+              | None -> false
+              | Some false -> n == n2
+              | Some true -> n == n1
+              )
+            | _ -> true
+          else false
+        ) order)
+      with Not_found -> None
+
+let print_trace t = print_intlist t
+
+let print_traces traces =
+  let rec f = function
+  | [] -> ()
+  | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
+  in begin
+    Printf.printf "Traces: {";
+    f traces;
+    Printf.printf "}\n";
+  end
+
+(* Dumb (but linear) trace selection *)
+let select_traces_linear code entrypoint =
+  let is_visited = ref (PTree.map (fun n i -> false) code) in
+  let bfs_order = bfs code entrypoint in
+  let rec go_through node = begin
+    is_visited := PTree.set node true !is_visited;
+    let next_node = match (get_some @@ PTree.get node code) with
+      | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+      | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n
+      | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+      | Icond (_, _, n1, n2, info) -> (match info with
+        | Some false -> Some n2
+        | Some true -> Some n1
+        | None -> None
+        )
+    in match next_node with
+    | None -> [node]
+    | Some n ->
+        if not (get_some @@ PTree.get n !is_visited) then node :: go_through n
+        else [node]
+    end
+  in let traces = ref [] in begin
+    List.iter (fun n ->
+      if not (get_some @@ PTree.get n !is_visited) then
+        traces := (go_through n) :: !traces
+    ) bfs_order;
+    !traces
+  end
+
+
+(* Algorithm mostly inspired from Chang and Hwu 1988
+ * "Trace Selection for Compiling Large C Application Programs to Microcode" *)
+let select_traces_chang code entrypoint = begin
+  Printf.printf "select_traces\n"; flush stdout;
+  let order = dfs code entrypoint in
+  let predecessors = get_predecessors_rtl code in
+  let traces = ref [] in
+  let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *)
+    Printf.printf "Length: %d\n" (List.length order); flush stdout;
+    while exists_false !is_visited do (* while (there are unvisited nodes) *)
+      let seed = select_unvisited_node !is_visited order in
+      let trace = ref [seed] in
+      let current = ref seed in begin
+        is_visited := PTree.set seed true !is_visited; (* mark seed visited *)
+        let quit_loop = ref false in begin
+          while not !quit_loop do
+            let s = best_successor_of !current code !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some succ -> begin
+                trace := !trace @ [succ];
+                is_visited := PTree.set succ true !is_visited; (* mark s visited *)
+                current := succ
+                end
+          done;
+          current := seed;
+          quit_loop := false;
+          while not !quit_loop do
+            let s = best_predecessor_of !current predecessors code order !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some pred -> begin
+                trace := pred :: !trace;
+                is_visited := PTree.set pred true !is_visited; (* mark s visited *)
+                current := pred
+                end
+          done;
+          traces := !trace :: !traces;
+        end
+      end
+    done;
+    (* Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n"; *)
+    Printf.printf "Traces: "; print_traces !traces;
+    !traces
+  end
+end
+
+let select_traces code entrypoint =
+  let length = List.length @@ PTree.elements code in
+  if (length < 5000) then select_traces_chang code entrypoint
+  else select_traces_linear code entrypoint
+
+let rec make_identity_ptree_rec = function
+| [] -> PTree.empty
+| m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm)
+
+let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
+
+(* Change the pointers of preds nodes to point to n' instead of n *)
+let rec change_pointers code n n' = function
+  | [] -> code
+  | pred :: preds ->
+      let new_pred_inst = match ptree_get_some pred code with
+        | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n')
+        | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
+        | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
+                               Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
+        | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n);
+                                 let n1' = if (n1 == n) then n' else n1
+                                 in let n2' = if (n2 == n) then n' else n2
+                                 in Icond(a, b, n1', n2', i)
+        | Inop n0 -> assert (n0 == n); Inop n'
+        | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
+        | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
+        | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n')
+        | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
+      in let new_code = PTree.set pred new_pred_inst code
+      in change_pointers new_code n n' preds
+
+(* parent: parent of n to keep as parent
+ * preds: all the other parents of n
+ * n': the integer which should contain the duplicate of n
+ * returns: new code, new ptree *)
+let duplicate code ptree parent n preds n' =
+  Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+  match PTree.get n' code with
+  | Some _ -> failwith "The PTree already has a node n'"
+  | None ->
+      let c' = change_pointers code n n' preds
+      in let new_code = PTree.set n' (ptree_get_some n code) c'
+      and new_ptree = PTree.set n' n ptree
+      in (new_code, new_ptree)
+
+let rec maxint = function
+  | [] -> 0
+  | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m
+
+let is_empty = function
+  | [] -> true
+  | _ -> false
+
+(* code: RTL code
+ * preds: mapping node -> predecessors
+ * ptree: the revmap
+ * trace: the trace to follow tail duplication on *)
+let tail_duplicate code preds ptree trace =
+  (* next_int: unused integer that can be used for the next duplication *)
+  let next_int = ref (maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1)
+  (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
+  in let last_node = ref None
+  in let last_duplicate = ref None
+  in let nb_duplicated = ref 0
+  (* recursive function on a trace *)
+  in let rec f code ptree is_first = function
+    | [] -> (code, ptree)
+    | n :: t ->
+        let (new_code, new_ptree) =
+          if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
+          else
+            let node_preds = ptree_get_some n preds
+            in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+            in let final_node_preds = match !last_duplicate with
+              | None -> node_preds_nolast
+              | Some n' -> n' :: node_preds_nolast
+            in if not (is_empty final_node_preds) then
+              let n' = !next_int
+              in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
+              in begin
+                next_int := !next_int + 1;
+                nb_duplicated := !nb_duplicated + 1;
+                last_duplicate := Some (P.of_int n');
+                (newc, newp)
+              end
+            else (code, ptree)
+        in begin
+          last_node := Some n;
+          f new_code new_ptree false t
+        end
+  in let new_code, new_ptree = f code ptree true trace
+  in (new_code, new_ptree, !nb_duplicated)
+
+let superblockify_traces code preds traces =
+  let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *)
+  in let ptree = make_identity_ptree code
+  in let rec f code ptree = function
+    | [] -> (code, ptree, 0)
+    | trace :: traces ->
+        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+        in if (nb_duplicated < max_nb_duplicated)
+          then (Printf.printf "End duplication\n"; f new_code new_ptree traces)
+          else (Printf.printf "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
+  in let new_code, new_ptree, _ = f code ptree traces
+  in (new_code, new_ptree)
+
+let rec invert_iconds_trace code = function
+  | [] -> code
+  | n :: ln ->
+      let code' = match ptree_get_some n code with
+        | Icond (c, lr, ifso, ifnot, info) -> (match info with
+            | Some true -> begin
+                (* Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+                PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)) code
+              end
+            | _ -> code)
+        | _ -> code
+      in invert_iconds_trace code' ln
+
+let rec invert_iconds code = function
+  | [] -> code
+  | t :: ts ->
+      let code' = if !Clflags.option_finvertcond then invert_iconds_trace code t
+                  else code
+      in invert_iconds code' ts
+
+let duplicate_aux f =
+  let entrypoint = f.fn_entrypoint in
+  if !Clflags.option_fduplicate < 0 then
+    ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
+  else
+    let code = update_directions (f.fn_code) entrypoint in
+    let traces = select_traces code entrypoint in
+    let icond_code = invert_iconds code traces in
+    let preds = get_predecessors_rtl icond_code in
+    if !Clflags.option_fduplicate >= 1 then
+      let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in
+      ((new_code, f.fn_entrypoint), pTreeId)
+    else
+      ((icond_code, entrypoint), make_identity_ptree code)
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
new file mode 100644
index 00000000..6b598dc7
--- /dev/null
+++ b/backend/Duplicateproof.v
@@ -0,0 +1,523 @@
+(** Correctness proof for code duplication *)
+Require Import AST Linking Errors Globalenvs Smallstep.
+Require Import Coqlib Maps Events Values.
+Require Import Op RTL Duplicate.
+
+Local Open Scope positive_scope.
+
+(** * Definition of [match_states] (independently of the translation) *)
+
+(* est-ce plus simple de prendre dupmap: node -> node, avec un noeud hors CFG à la place de None ? *)
+Inductive match_inst (dupmap: PTree.t node): instruction -> instruction -> Prop :=
+  | match_inst_nop: forall n n',
+      dupmap!n' = (Some n) -> match_inst dupmap (Inop n) (Inop n')
+  | match_inst_op: forall n n' op lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Iop op lr r n) (Iop op lr r n')
+  | match_inst_load: forall n n' tm m a lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Iload tm m a lr r n) (Iload tm m a lr r n')
+  | match_inst_store: forall n n' m a lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Istore m a lr r n) (Istore m a lr r n')
+  | match_inst_call: forall n n' s ri lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Icall s ri lr r n) (Icall s ri lr r n')
+  | match_inst_tailcall: forall s ri lr,
+      match_inst dupmap (Itailcall s ri lr) (Itailcall s ri lr)
+  | match_inst_builtin: forall n n' ef la br,
+      dupmap!n' = (Some n) -> match_inst dupmap (Ibuiltin ef la br n) (Ibuiltin ef la br n')
+  | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr i i',
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot i) (Icond c lr ifso' ifnot' i')
+  | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr i i',
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot i) (Icond (negate_condition c) lr ifnot' ifso' i')
+  | match_inst_jumptable: forall ln ln' r,
+      list_forall2 (fun n n' => (dupmap!n' = (Some n))) ln ln' ->
+      match_inst dupmap (Ijumptable r ln) (Ijumptable r ln')
+  | match_inst_return: forall or, match_inst dupmap (Ireturn or) (Ireturn or).
+
+Record match_function dupmap f f': Prop := {
+  dupmap_correct: forall n n', dupmap!n' = Some n ->
+    (forall i, (fn_code f)!n = Some i -> exists i', (fn_code f')!n' = Some i' /\ match_inst dupmap i i');
+  dupmap_entrypoint: dupmap!(fn_entrypoint f') = Some (fn_entrypoint f);
+  preserv_fnsig: fn_sig f = fn_sig f';
+  preserv_fnparams: fn_params f = fn_params f';
+  preserv_fnstacksize: fn_stacksize f = fn_stacksize f'
+}.
+
+Inductive match_fundef: RTL.fundef -> RTL.fundef -> Prop :=
+  | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
+  | match_External ef: match_fundef (External ef) (External ef).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframe_intro 
+      dupmap res f sp pc rs f' pc'
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc):
+      match_stackframes (Stackframe res f sp pc rs) (Stackframe res f' sp pc' rs).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro 
+      dupmap st f sp pc rs m st' f' pc'
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc):
+      match_states (State st f sp pc rs m) (State st' f' sp pc' rs m)
+  | match_states_call:
+    forall st st' f f' args m
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_fundef f f'),
+      match_states (Callstate st f args m) (Callstate st' f' args m)
+  | match_states_return:
+    forall st st' v m
+      (STACKS: list_forall2 match_stackframes st st'),
+      match_states (Returnstate st v m) (Returnstate st' v m).
+
+(** * Auxiliary properties *)
+
+
+Theorem transf_function_preserves:
+  forall f f',
+  transf_function f = OK f' ->
+     fn_sig f = fn_sig f' /\ fn_params f = fn_params f' /\ fn_stacksize f = fn_stacksize f'.
+Proof.
+  intros. unfold transf_function in H. destruct (duplicate_aux _) as (tcte & mp). destruct tcte as (tc & te). monadInv H.
+  repeat (split; try reflexivity).
+Qed.
+
+
+Lemma verify_mapping_mn_rec_step:
+  forall dupmap lb b f f',
+  In b lb ->
+  verify_mapping_mn_rec dupmap f f' lb = OK tt ->
+  verify_mapping_mn dupmap f f' b = OK tt.
+Proof.
+  induction lb; intros.
+  - monadInv H0. inversion H.
+  - inversion H.
+    + subst. monadInv H0. destruct x. assumption.
+    + monadInv H0. destruct x0. eapply IHlb; assumption.
+Qed.
+
+Lemma verify_is_copy_correct:
+  forall dupmap n n',
+  verify_is_copy dupmap n n' = OK tt ->
+  dupmap ! n' = Some n.
+Proof.
+  intros. unfold verify_is_copy in H. destruct (_ ! n') eqn:REVM; [|inversion H].
+  destruct (n ?= p) eqn:NP; try (inversion H; fail).
+  eapply Pos.compare_eq in NP. subst.
+  reflexivity.
+Qed.
+
+Lemma verify_is_copy_list_correct:
+  forall dupmap ln ln',
+  verify_is_copy_list dupmap ln ln' = OK tt ->
+  list_forall2 (fun n n' => dupmap ! n' = Some n) ln ln'.
+Proof.
+  induction ln.
+  - intros. destruct ln'; monadInv H. constructor.
+  - intros. destruct ln'; monadInv H. destruct x. apply verify_is_copy_correct in EQ.
+    eapply IHln in EQ0. constructor; assumption.
+Qed.
+
+Lemma verify_match_inst_correct:
+  forall dupmap i i',
+  verify_match_inst dupmap i i' = OK tt ->
+  match_inst dupmap i i'.
+Proof.
+  intros. unfold verify_match_inst in H.
+  destruct i; try (inversion H; fail).
+(* Inop *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    constructor; eauto.
+(* Iop *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (eq_operation _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Iload *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (trapping_mode_eq _ _); try discriminate.
+    destruct (chunk_eq _ _); try discriminate.
+    destruct (eq_addressing _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Istore *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (chunk_eq _ _); try discriminate.
+    destruct (eq_addressing _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Icall *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (signature_eq _ _); try discriminate.
+    destruct (product_eq _ _ _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. subst.
+    constructor. assumption.
+(* Itailcall *)
+  - destruct i'; try (inversion H; fail).
+    destruct (signature_eq _ _); try discriminate.
+    destruct (product_eq _ _ _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate. subst. clear H.
+    constructor.
+(* Ibuiltin *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (external_function_eq _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (builtin_res_eq_pos _ _); try discriminate. subst.
+    constructor. assumption.
+(* Icond *)
+  - destruct i'; try (inversion H; fail).
+    destruct (list_eq_dec _ _ _); try discriminate. subst.
+    destruct (eq_condition _ _); try discriminate.
+    + monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      destruct x0. eapply verify_is_copy_correct in EQ1.
+      constructor; assumption.
+    + destruct (eq_condition _ _); try discriminate.
+      monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      destruct x0. eapply verify_is_copy_correct in EQ1.
+      constructor; assumption.
+(* Ijumptable *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_list_correct in EQ.
+    destruct (Pos.eq_dec _ _); try discriminate. subst.
+    constructor. assumption.
+(* Ireturn *)
+  - destruct i'; try (inversion H; fail).
+    destruct (option_eq _ _ _); try discriminate. subst. clear H.
+    constructor.
+Qed.
+
+
+Lemma verify_mapping_mn_correct mp n n' i f f' tc:
+  mp ! n' = Some n ->
+  (fn_code f) ! n = Some i ->
+  (fn_code f') = tc ->
+  verify_mapping_mn mp f f' (n', n) = OK tt ->
+  exists i',
+     tc ! n' = Some i'
+  /\ match_inst mp i i'.
+Proof.
+  unfold verify_mapping_mn; intros H H0 H1 H2. rewrite H0 in H2. clear H0. rewrite H1 in H2. clear H1.
+  destruct (tc ! n') eqn:TCN; [| inversion H2].
+  exists i0. split; auto.
+  eapply verify_match_inst_correct. assumption.
+Qed.
+
+
+Lemma verify_mapping_mn_rec_correct:
+  forall mp n n' i f f' tc,
+  mp ! n' = Some n ->
+  (fn_code f) ! n = Some i ->
+  (fn_code f') = tc ->
+  verify_mapping_mn_rec mp f f' (PTree.elements mp) = OK tt ->
+  exists i',
+     tc ! n' = Some i'
+  /\ match_inst mp i i'.
+Proof.
+  intros. exploit PTree.elements_correct. eapply H. intros IN.
+  eapply verify_mapping_mn_rec_step in H2; eauto.
+  eapply verify_mapping_mn_correct; eauto.
+Qed.
+
+Theorem transf_function_correct f f':
+    transf_function f = OK f' -> exists dupmap, match_function dupmap f f'.
+Proof.
+  unfold transf_function.
+  intros TRANSF.
+  destruct (duplicate_aux _) as (tcte & mp). destruct tcte as (tc & te).
+  monadInv TRANSF.
+  unfold verify_mapping in EQ. monadInv EQ.
+  exists mp; constructor 1; simpl; auto.
+  + (* correct *) 
+  intros until n'. intros REVM i FNC.
+  unfold verify_mapping_match_nodes in EQ. simpl in EQ. destruct x1.
+  eapply verify_mapping_mn_rec_correct; eauto.
+  simpl; eauto.
+  + (* entrypoint *)
+  intros. unfold verify_mapping_entrypoint in EQ0. simpl in EQ0.
+  eapply verify_is_copy_correct; eauto.
+  destruct x0; auto.
+Qed.
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> match_fundef f f'.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  + exploit transf_function_correct; eauto.
+    intros (dupmap & MATCH_F).
+    eapply match_Internal; eauto.
+  + eapply match_External.
+Qed.
+
+(** * Preservation proof *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: program.
+Variable tprog: program.
+
+Hypothesis TRANSL: match_prog prog tprog.
+
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+(* UNUSED LEMMA ?
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+*)
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tge v = Some tf /\ linkorder cunit prog.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+  + unfold incl; auto.
+  + eapply linkorder_refl.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma function_sig_translated:
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. simpl. symmetry. apply transf_function_preserves. assumption.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma list_nth_z_dupmap:
+  forall dupmap ln ln' (pc pc': node) val,
+  list_nth_z ln val = Some pc ->
+  list_forall2 (fun n n' => dupmap!n' = Some n) ln ln' ->
+  exists pc',
+     list_nth_z ln' val = Some pc'
+  /\ dupmap!pc' = Some pc.
+Proof.
+  induction ln; intros until val; intros LNZ LFA.
+  - inv LNZ.
+  - inv LNZ. destruct (zeq val 0) eqn:ZEQ.
+    + inv H0. destruct ln'; inv LFA.
+      simpl. exists p. split; auto.
+    + inv LFA. simpl. rewrite ZEQ. exploit IHln. 2: eapply H0. all: eauto.
+      intros (pc'1 & LNZ & REV). exists pc'1. split; auto. congruence.
+Qed.
+
+Theorem transf_initial_states:
+  forall s1, initial_state prog s1 ->
+  exists s2, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inv H.
+  exploit function_ptr_translated; eauto. intros (tf & FIND & TRANSF).
+  eexists. split.
+  - econstructor; eauto.
+    + eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+    + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main. eauto.
+    + destruct f.
+      * monadInv TRANSF. rewrite <- H3. symmetry; eapply transf_function_preserves. assumption.
+      * monadInv TRANSF. assumption.
+  - constructor; eauto.
+    + constructor. 
+    + apply transf_fundef_correct; auto.
+Qed.
+
+Theorem transf_final_states:
+  forall s1 s2 r,
+  match_states s1 s2 -> final_state s1 r -> final_state s2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem step_simulation:
+  forall s1 t s1', step ge s1 t s1' ->
+  forall s2 (MS: match_states s1 s2),
+  exists s2',
+     step tge s2 t s2'
+  /\ match_states s1' s2'.
+Proof.
+  Local Hint Resolve transf_fundef_correct: core.
+  induction 1; intros; inv MS.
+(* Inop *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3).
+    inv H3.
+    eexists. split.
+    + eapply exec_Inop; eauto.
+    + econstructor; eauto.
+(* Iop *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iop; eauto. erewrite eval_operation_preserved; eauto.
+    + econstructor; eauto.
+(* Iload *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload; eauto; (* is the follow still needed?*) erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+(* Iload notrap1 *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload_notrap1; eauto; erewrite eval_addressing_preserved; eauto. 
+    + econstructor; eauto.
+(* Iload notrap2 *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload_notrap2; eauto; erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+      
+(* Istore *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Istore; eauto; erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+(* Icall *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    destruct ros.
+    * simpl in H0. apply functions_translated in H0.
+      destruct H0 as (tf & cunit & TFUN & GFIND & LO).
+      eexists. split.
+      + eapply exec_Icall. eassumption. simpl. eassumption.
+        apply function_sig_translated. assumption.
+      + repeat (econstructor; eauto).
+    * simpl in H0. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
+      apply function_ptr_translated in H0. destruct H0 as (tf & GFF & TF).
+      eexists. split.
+      + eapply exec_Icall. eassumption. simpl. rewrite symbols_preserved. rewrite GFS.
+        eassumption. apply function_sig_translated. assumption.
+      + repeat (econstructor; eauto).
+(* Itailcall *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H10 & H11). inv H11.
+    pose symbols_preserved as SYMPRES.
+    destruct ros.
+    * simpl in H0. apply functions_translated in H0.
+      destruct H0 as (tf & cunit & TFUN & GFIND & LO).
+      eexists. split.
+      + eapply exec_Itailcall. eassumption. simpl. eassumption.
+        apply function_sig_translated. assumption.
+        erewrite <- preserv_fnstacksize; eauto.
+      + repeat (constructor; auto).
+    * simpl in H0. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
+      apply function_ptr_translated in H0. destruct H0 as (tf & GFF & TF).
+      eexists. split.
+      + eapply exec_Itailcall. eassumption. simpl. rewrite symbols_preserved. rewrite GFS.
+        eassumption. apply function_sig_translated. assumption.
+        erewrite <- preserv_fnstacksize; eauto.
+      + repeat (constructor; auto).
+(* Ibuiltin *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Ibuiltin; eauto. eapply eval_builtin_args_preserved; eauto.
+      eapply external_call_symbols_preserved; eauto. eapply senv_preserved.
+    + econstructor; eauto.
+(* Icond *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    * (* match_inst_cond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto.
+      + econstructor; eauto. destruct b; auto.
+    * (* match_inst_revcond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto. rewrite eval_negate_condition. rewrite H0. simpl. eauto.
+      + econstructor; eauto. destruct b; auto.
+(* Ijumptable *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    exploit list_nth_z_dupmap; eauto. intros (pc'1 & LNZ & REVM).
+    eexists. split.
+    + eapply exec_Ijumptable; eauto.
+    + econstructor; eauto.
+(* Ireturn *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Ireturn; eauto. erewrite <- preserv_fnstacksize; eauto.
+    + econstructor; eauto.
+(* exec_function_internal *)
+  - inversion TRANSF as [dupmap f0 f0' MATCHF|]; subst. eexists. split.
+    + eapply exec_function_internal. erewrite <- preserv_fnstacksize; eauto.
+    + erewrite preserv_fnparams; eauto.
+      econstructor; eauto. apply dupmap_entrypoint. assumption.
+(* exec_function_external *)
+  - inversion TRANSF as [|]; subst. eexists. split.
+    + econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor. assumption.
+(* exec_return *)
+  - inv STACKS. destruct b1 as [res' f' sp' pc' rs']. eexists. split.
+    + constructor.
+    + inv H1. econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (semantics prog) (semantics tprog).
+Proof.
+  eapply forward_simulation_step with match_states.
+  - eapply senv_preserved.
+  - eapply transf_initial_states.
+  - eapply transf_final_states.
+  - eapply step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v
new file mode 100644
index 00000000..7cfd411f
--- /dev/null
+++ b/backend/ForwardMoves.v
@@ -0,0 +1,333 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+(* Static analysis *)
+
+Module RELATION.
+  
+Definition t := (PTree.t reg).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty reg.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition reg_beq (x y : reg) :=
+  if Pos.eq_dec x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq reg_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct reg_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold reg_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (Pos.eq_dec R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if Pos.eq_dec v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
+
+  Parameter t: Type.
+  Parameter eq: t -> t -> Prop.
+  Axiom eq_refl: forall x, eq x x.
+  Axiom eq_sym: forall x y, eq x y -> eq y x.
+  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Parameter beq: t -> t -> bool.
+  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
+  Parameter ge: t -> t -> Prop.
+  Axiom ge_refl: forall x y, eq x y -> ge x y.
+  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Parameter lub: t -> t -> t.
+  Axiom ge_lub_left: forall x y, ge (lub x y) x.
+  Axiom ge_lub_right: forall x y, ge (lub x y) y.
+
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => if Pos.eq_dec dst x then false else true)
+                (PTree.remove dst rel).
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (match PTree.get src rel with
+                 | Some src' => src'
+                 | None => src
+                 end) (kill dst rel).
+
+Fixpoint kill_builtin_res (res : builtin_res reg) (rel : RELATION.t) :=
+  match res with
+  | BR z => kill z rel
+  | BR_none => rel
+  | BR_splitlong hi lo => kill_builtin_res hi (kill_builtin_res lo rel)
+  end.
+
+Definition apply_instr instr x :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _
+  | Istore _ _ _ _ _ => Some x
+  | Iop Omove (src :: nil) dst _ => Some (move src dst x)
+  | Iop _ _ dst _
+  | Iload _ _ _ _ dst _
+  | Icall _ _ _ dst _ => Some (kill dst x)
+  | Ibuiltin _ _ res _ => Some (RELATION.top) (* TODO (kill_builtin_res res x) *)
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition get_r (rel : RELATION.t) (x : reg) :=
+  match PTree.get x rel with
+  | None => x
+  | Some src => src
+  end.
+
+Definition get_rb (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => get_r rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => get_rb (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    Iop op (subst_args fmap pc args) dst s
+  | Iload trap chunk addr args dst s =>
+    Iload trap chunk addr (subst_args fmap pc args) dst s
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) src s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 i =>
+    Icond cond (subst_args fmap pc args) s1 s2 i
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ForwardMovesproof.v b/backend/ForwardMovesproof.v
new file mode 100644
index 00000000..826d4250
--- /dev/null
+++ b/backend/ForwardMovesproof.v
@@ -0,0 +1,801 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ForwardMoves.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+(*
+Definition fmap_sem (fmap : option (PMap.t RB.t)) (pc : node) (rs : regset) :=
+  forall x : reg,
+    (rs # (subst_arg fmap pc x)) = (rs # x).
+ *)
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Definition get_rb_sem (rb : RB.t) (rs : regset) :=
+  match rb with
+  | None => False
+  | Some rel =>
+    forall x : reg,
+      (rs # (get_r rel x)) = (rs # x)
+  end.
+
+Lemma get_rb_sem_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall rs : regset,
+      (get_rb_sem rb2 rs) -> (get_rb_sem rb1 rs).
+Proof.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ];
+    unfold get_rb_sem;
+    simpl;
+    intros GE rs RB2RS;
+    try contradiction.
+  unfold RELATION.ge in GE.
+  unfold get_r in *.
+  intro x.
+  pose proof (GE x) as GEx.
+  pose proof (RB2RS x) as RB2RSx.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => get_rb_sem (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold get_rb_sem in *.
+  destruct (map # pc).
+  2: contradiction.
+  apply SEM.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_ok:
+  forall dst,
+  forall mpc,
+  forall rs,
+  forall v,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs # dst <- v.
+Proof.
+  unfold get_rb_sem.
+  intros until v.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    rewrite Regmap.gss.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    apply Regmap.gss.
+  }
+  rewrite (Regmap.gso v rs NEQ).
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      subst dst.
+      rewrite Regmap.gso by assumption.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by assumption.
+  reflexivity.
+Qed.
+
+Lemma kill_weaken:
+  forall dst,
+  forall mpc,
+  forall rs,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs.
+Proof.
+  unfold get_rb_sem.
+  intros until rs.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      reflexivity.
+    }
+    assumption.
+  }
+  reflexivity.
+Qed.
+
+Lemma top_ok :
+  forall rs, get_rb_sem (Some RELATION.top) rs.
+Proof.
+  unfold get_rb_sem, RELATION.top.
+  intros.
+  unfold get_r.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma move_ok:
+  forall mpc : RELATION.t,
+  forall src res : reg,
+  forall rs : regset,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (move src res mpc)) (rs # res <- (rs # src)).
+Proof.
+  unfold get_rb_sem, move.
+  intros until rs.
+  intros SEM x.
+  unfold get_r in *.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    pose proof (SEM src) as SEMsrc.
+    destruct (mpc ! src) as [mpcsrc | ] in *.
+    {
+      destruct (Pos.eq_dec x mpcsrc).
+      {
+        subst mpcsrc.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    destruct (Pos.eq_dec x src).
+    {
+      subst src.
+      rewrite Regmap.gss.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    reflexivity.
+  }
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  unfold kill.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x) as [ r |].
+  {
+    destruct (Pos.eq_dec res r).
+    {
+      subst r.
+      rewrite Regmap.gso by congruence.
+      trivial.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Definition killed_twice:
+  forall rel : RELATION.t,
+  forall res,
+    RELATION.eq (kill res rel) (kill res (kill res rel)).
+Proof.
+  unfold kill, RELATION.eq.
+  intros.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.grs.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! x) as [relx | ]; trivial.
+  destruct (Pos.eq_dec res relx); trivial.
+  destruct (Pos.eq_dec res relx); congruence.
+Qed.
+
+Lemma get_rb_killed:
+  forall mpc,
+  forall rs,
+  forall rel,
+  forall res,
+  forall vres,
+    (get_rb_sem (Some mpc) rs) ->
+    (RELATION.ge mpc (kill res rel)) ->
+    (get_rb_sem (Some mpc) rs # res <- vres).
+Proof.
+  simpl.
+  intros until vres.
+  intros SEM GE x.
+  pose proof (GE x) as GEx.
+  pose proof (SEM x) as SEMx.
+  unfold get_r in *.
+  destruct (mpc ! x) as [mpcx | ] in *; trivial.
+  unfold kill in GEx.
+  rewrite PTree.gfilter1 in GEx.
+  destruct (Pos.eq_dec res x) as [ | res_NE_x].
+  {
+    subst res.
+    rewrite PTree.grs in GEx.
+    discriminate.
+  }
+  rewrite PTree.gro in GEx by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  destruct (rel ! x) as [relx | ]; try discriminate.
+  destruct (Pos.eq_dec res relx) as [ res_EQ_relx | res_NE_relx] in *; try discriminate.
+  rewrite Regmap.gso by congruence.
+  congruence.
+Qed.
+  
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (fmap_sem (forward_map f) pc rs) ->
+    (is_killed_in_fmap (forward_map f) pc res) ->
+      match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma op_cases:
+  forall op,
+  forall args,
+  forall dst,
+  forall s,
+  forall x,
+    (exists src, op=Omove /\ args = src :: nil /\
+                 (apply_instr (Iop op args dst s) x) = Some (move src dst x))
+    \/
+    (apply_instr (Iop op args dst s) x) = Some (kill dst x).
+Proof.
+  destruct op; try (right; simpl; reflexivity).
+  destruct args as [| arg0 args0t]; try (right; simpl; reflexivity).
+  destruct args0t as [| arg1 args1t]; try (right; simpl; reflexivity).
+  left.
+  eauto.
+Qed.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0.
+  rewrite subst_args_ok by assumption.
+  apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  rewrite MPC in GE.
+  rewrite H in GE.
+  
+  destruct (op_cases op args res pc' mpc) as [[src [OP [ARGS MOVE]]] | KILL].
+  {
+    subst op.
+    subst args.
+    rewrite MOVE in GE.
+    simpl in H0.
+    simpl in GE.
+    apply get_rb_sem_ge with (rb2 := Some (move src res mpc)).
+    assumption.
+    replace v with (rs # src) by congruence.
+    apply move_ok.
+    assumption.
+  }
+  rewrite KILL in GE.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  assumption.
+  apply kill_ok.
+  assumption.
+  
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. constructor; auto. constructor.
+
+  {
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  {
+    replace (Some (kill res mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_weaken.
+  assumption.
+  }
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  unfold fmap_sem in *.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  rewrite H in GE.
+  simpl in GE.
+  unfold is_killed_in_fmap, is_killed_in_map.
+  unfold RB.ge in GE.
+  destruct (map # pc') as [mpc'|] eqn:MPC' in *; trivial.
+  eauto.
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. auto.
+  
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    replace (Some RELATION.top) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply top_ok.
+  
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite subst_args_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite subst_arg_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite subst_arg_ok by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  unfold is_killed_in_fmap in H8.
+  unfold is_killed_in_map in H8.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  destruct H8 as [rel' RGE].
+  eapply get_rb_killed; eauto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 67da47da..785b0a2d 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -238,7 +238,6 @@ type graph = {
    according to their types.  A variable can be forced into class 2
    by giving it a negative spill cost. *)
 
-
 let class_of_reg r =
   if Conventions1.is_float_reg r then 1 else 0
 
diff --git a/backend/Inlining.v b/backend/Inlining.v
index f7ee4166..8c7e1898 100644
--- a/backend/Inlining.v
+++ b/backend/Inlining.v
@@ -364,9 +364,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
   | Iop op args res s =>
       set_instr (spc ctx pc)
                 (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s))
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       set_instr (spc ctx pc)
-                (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
+                (Iload trap chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
   | Istore chunk addr args src s =>
       set_instr (spc ctx pc)
                 (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s))
@@ -397,9 +397,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
   | Ibuiltin ef args res s =>
       set_instr (spc ctx pc)
                 (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s))
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 info =>
       set_instr (spc ctx pc)
-                (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2))
+                (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) info)
   | Ijumptable r tbl =>
       set_instr (spc ctx pc)
                 (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl))
diff --git a/backend/Inliningaux.ml b/backend/Inliningaux.ml
index 842e0c93..cf308962 100644
--- a/backend/Inliningaux.ml
+++ b/backend/Inliningaux.ml
@@ -16,8 +16,9 @@ open FSetAVL
 open Maps
 open Op
 open Ordered
-open !RTL
-
+open! RTL
+open Camlcoq
+   
 module PSet = Make(OrderedPositive)
 
 type inlining_info = {
@@ -57,7 +58,7 @@ let used_in_globvar io gv =
 let fun_inline_analysis id io fn =
   let inst io nid = function
     | Iop (op, args, dest, succ) -> used_id io (globals_operation op)
-    | Iload (chunk, addr, args, dest, succ)
+    | Iload (_, chunk, addr, args, dest, succ)
     | Istore (chunk, addr, args, dest, succ) -> used_id io (globals_addressing addr)
     | Ibuiltin (ef, args, dest, succ) -> used_id io (globals_of_builtin_args args)
     | Icall (_, Coq_inr cid, _, _, _)
@@ -83,13 +84,15 @@ let static_called_once id io =
   else
     false
 
-(* To be considered: heuristics based on size of function? *)
+(* D. Monniaux: attempt at heuristic based on size *)
+let small_enough (f : coq_function) =
+  P.to_int (RTL.max_pc_function f) <= !Clflags.option_inline_auto_threshold
 
 let should_inline (io: inlining_info) (id: ident) (f: coq_function) =
   if !Clflags.option_finline then begin
     match C2C.atom_inline id with
     | C2C.Inline -> true
     | C2C.Noinline -> false
-    | C2C.No_specifier -> static_called_once id io
+    | C2C.No_specifier -> static_called_once id io || small_enough f
   end else
     false
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index 181f40bf..c4efaf18 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -744,7 +744,7 @@ Lemma match_stacks_free_right:
   match_stacks F m m1' stk stk' sp.
 Proof.
   intros. eapply match_stacks_invariant; eauto.
-  intros. eapply Mem.perm_free_1; eauto.
+  intros. eapply Mem.perm_free_1; eauto with ordered_type.
   intros. eapply Mem.perm_free_3; eauto.
 Qed.
 
@@ -929,6 +929,15 @@ Proof.
   intros. inv H. eauto.
 Qed.
 
+Lemma eval_addressing_none:
+  forall sp' ctx addr rs,
+  eval_addressing ge (Vptr sp' (Ptrofs.repr (dstk ctx))) addr rs = None ->
+  eval_addressing ge (Vptr sp' Ptrofs.zero) (saddr ctx addr) rs = None.
+Proof.
+  intros until rs; intro Heval.
+  destruct addr; destruct rs as [| r0 rs1]; simpl in *; trivial; discriminate.
+Qed.
+
 Theorem step_simulation:
   forall S1 t S2,
   step ge S1 t S2 ->
@@ -976,6 +985,51 @@ Proof.
   apply match_stacks_inside_set_reg; auto.
   apply agree_set_reg; auto.
 
+- (* load notrap1 *)
+  exploit tr_funbody_inv; eauto. intros TR; inv TR.
+  left; econstructor; split.
+  eapply plus_one. eapply exec_Iload_notrap1. eassumption.
+  rewrite eval_addressing_preserved with (ge1:=ge) (ge2:=tge).
+  exploit eval_addressing_inj_none.
+  4: eassumption.
+  intros. eapply symbol_address_inject.
+  eapply match_stacks_inside_globals; eauto.
+  eauto.
+  instantiate (1 := rs'##(sregs ctx args)). eapply agree_val_regs; eauto.
+  rewrite Ptrofs.add_zero_l.
+  apply eval_addressing_none.
+  exact symbols_preserved.
+  econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+
+- (* load notrap2 *)
+  exploit tr_funbody_inv; eauto. intros TR; inv TR.
+
+  exploit eval_addressing_inject.
+    eapply match_stacks_inside_globals; eauto.
+    eexact SP.
+    instantiate (2 := rs##args). instantiate (1 := rs'##(sregs ctx args)). eapply agree_val_regs; eauto.
+    eauto.
+  fold (saddr ctx addr). intros [a' [P Q]].
+
+  destruct (Mem.loadv chunk m' a') eqn:Hload'.
+  + left; econstructor; split.
+    eapply plus_one.
+    eapply exec_Iload; eauto.
+    try (rewrite <- P; apply eval_addressing_preserved; exact symbols_preserved).
+    econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+    
+  + left; econstructor; split.
+    eapply plus_one.
+    eapply exec_Iload_notrap2; eauto.
+    try (rewrite <- P; apply eval_addressing_preserved; exact symbols_preserved).
+    econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+  
 - (* store *)
   exploit tr_funbody_inv; eauto. intros TR; inv TR.
   exploit eval_addressing_inject.
@@ -1043,7 +1097,7 @@ Proof.
   eapply match_stacks_bound with (bound := sp').
   eapply match_stacks_invariant; eauto.
     intros. eapply Mem.perm_free_3; eauto.
-    intros. eapply Mem.perm_free_1; eauto.
+    intros. eapply Mem.perm_free_1; eauto with ordered_type.
     intros. eapply Mem.perm_free_3; eauto.
   erewrite Mem.nextblock_free; eauto. red in VB; xomega.
   eapply agree_val_regs; eauto.
@@ -1135,7 +1189,7 @@ Proof.
   eapply match_stacks_bound with (bound := sp').
   eapply match_stacks_invariant; eauto.
     intros. eapply Mem.perm_free_3; eauto.
-    intros. eapply Mem.perm_free_1; eauto.
+    intros. eapply Mem.perm_free_1; eauto with ordered_type.
     intros. eapply Mem.perm_free_3; eauto.
   erewrite Mem.nextblock_free; eauto. red in VB; xomega.
   destruct or; simpl. apply agree_val_reg; auto. auto.
@@ -1182,7 +1236,7 @@ Proof.
     subst b1. rewrite D in H8; inv H8. eelim Plt_strict; eauto.
     intros. eapply Mem.perm_alloc_1; eauto.
     intros. exploit Mem.perm_alloc_inv. eexact A. eauto.
-    rewrite dec_eq_false; auto.
+    rewrite dec_eq_false; auto with ordered_type.
   auto. auto. auto. eauto. auto.
   rewrite H5. apply agree_regs_init_regs. eauto. auto. inv H1; auto. congruence. auto.
   eapply Mem.valid_new_block; eauto.
diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v
index c345c942..eba026ec 100644
--- a/backend/Inliningspec.v
+++ b/backend/Inliningspec.v
@@ -270,10 +270,10 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
       Ple res ctx.(mreg) ->
       c!(spc ctx pc) = Some (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
       tr_instr ctx pc (Iop op args res s) c
-  | tr_load: forall ctx pc c chunk addr args res s,
+  | tr_load: forall ctx pc c trap chunk addr args res s,
       Ple res ctx.(mreg) ->
-      c!(spc ctx pc) = Some (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
-      tr_instr ctx pc (Iload chunk addr args res s) c
+      c!(spc ctx pc) = Some (Iload trap chunk (saddr ctx addr) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
+      tr_instr ctx pc (Iload trap chunk addr args res s) c
   | tr_store: forall ctx pc c chunk addr args src s,
       c!(spc ctx pc) = Some (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s)) ->
       tr_instr ctx pc (Istore chunk addr args src s) c
@@ -312,9 +312,9 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
       match res with BR r => Ple r ctx.(mreg) | _ => True end ->
       c!(spc ctx pc) = Some (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s)) ->
       tr_instr ctx pc (Ibuiltin ef args res s) c
-  | tr_cond: forall ctx pc cond args s1 s2 c,
-      c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) ->
-      tr_instr ctx pc (Icond cond args s1 s2) c
+  | tr_cond: forall ctx pc cond args s1 s2 c i,
+      c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) i) ->
+      tr_instr ctx pc (Icond cond args s1 s2 i) c
   | tr_jumptable: forall ctx pc r tbl c,
       c!(spc ctx pc) = Some (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) ->
       tr_instr ctx pc (Ijumptable r tbl) c
diff --git a/backend/Json.ml b/backend/Json.ml
index b8f66c08..bd4d6ff9 100644
--- a/backend/Json.ml
+++ b/backend/Json.ml
@@ -10,7 +10,6 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-open Format
 open Camlcoq
 
 
@@ -18,16 +17,21 @@ open Camlcoq
 
 (* Print a string as json string *)
 let pp_jstring oc s =
-  fprintf oc "\"%s\"" s
+  output_string oc "\"";
+  output_string oc s;
+  output_string oc "\""
 
 (* Print a bool as json bool *)
-let pp_jbool oc = fprintf oc "%B"
+let pp_jbool oc b = output_string oc (string_of_bool b)
 
 (* Print an int as json int *)
-let pp_jint oc = fprintf oc "%d"
+let pp_jint oc i = output_string oc (string_of_int i)
 
 (* Print an int32 as json int *)
-let pp_jint32 oc = fprintf oc "%ld"
+let pp_jint32 oc i = output_string oc (Int32.to_string i)
+
+(* Print an int64 as json int *)
+let pp_jint64 oc i = output_string oc (Int64.to_string i)
 
 (* Print optional value *)
 let pp_jopt pp_elem oc = function
@@ -36,15 +40,19 @@ let pp_jopt pp_elem oc = function
 
 (* Print opening and closing curly braces for json dictionaries *)
 let pp_jobject_start pp =
-  fprintf pp "@[<v 1>{"
+  output_string pp "\n{"
 
 let pp_jobject_end pp =
-  fprintf pp "@;<0 -1>}@]"
+  output_string pp "}"
 
 (* Print a member of a json dictionary *)
 let pp_jmember ?(first=false) pp name pp_mem mem =
-  let sep = if first then "" else "," in
-  fprintf pp "%s@ \"%s\": %a" sep name pp_mem mem
+  if not first then output_string pp ",";
+  output_string pp " ";
+  pp_jstring pp name;
+  output_string pp " :";
+  pp_mem pp mem;
+  output_string pp "\n"
 
 (* Print singleton object *)
 let pp_jsingle_object pp name pp_mem mem =
@@ -54,29 +62,31 @@ let pp_jsingle_object pp name pp_mem mem =
 
 (* Print a list as json array *)
 let pp_jarray elem pp l =
-  match l with
-  | []  ->  fprintf pp "[]";
+  let pp_sep () = output_string pp ", " in
+  output_string pp "[";
+  begin match l with
+  | []  -> ()
   | hd::tail ->
-    fprintf pp "@[<v 1>[";
-    fprintf pp "%a" elem hd;
-    List.iter (fun l -> fprintf pp ",@ %a" elem l) tail;
-  fprintf pp "@;<0 -1>]@]"
+    elem pp hd;
+    List.iter (fun l -> pp_sep (); elem pp l) tail;
+  end;
+  output_string pp "]"
 
 (* Helper functions for printing coq integer and floats *)
 let pp_int pp i =
-  fprintf pp "%ld" (camlint_of_coqint i)
+  pp_jint32 pp (camlint_of_coqint i)
 
 let pp_int64 pp i =
-  fprintf pp "%Ld" (camlint64_of_coqint i)
+  pp_jint64 pp (camlint64_of_coqint i)
 
 let pp_float32 pp f =
-  fprintf pp "%ld" (camlint_of_coqint (Floats.Float32.to_bits f))
+  pp_jint32 pp (camlint_of_coqint (Floats.Float32.to_bits f))
 
 let pp_float64 pp f =
-  fprintf pp "%Ld" (camlint64_of_coqint (Floats.Float.to_bits f))
+  pp_jint64 pp (camlint64_of_coqint (Floats.Float.to_bits f))
 
 let pp_z pp z =
-  fprintf pp "%s" (Z.to_string z)
+  output_string pp (Z.to_string z)
 
 (* Helper functions for printing assembler constructs *)
 let pp_atom pp a =
@@ -106,4 +116,4 @@ let reset_id () =
 
 let pp_id_const pp () =
   let i = next_id () in
-  pp_jsingle_object pp "Integer" (fun pp i -> fprintf pp "%d" i) i
+  pp_jsingle_object pp "Integer" pp_jint i
diff --git a/backend/JsonAST.ml b/backend/JsonAST.ml
index 4e57106f..8905e252 100644
--- a/backend/JsonAST.ml
+++ b/backend/JsonAST.ml
@@ -15,7 +15,6 @@ open Asm
 open AST
 open C2C
 open Json
-open Format
 open Sections
 
 
@@ -54,8 +53,8 @@ let pp_section pp sec =
   | Section_ais_annotation -> () (* There should be no info in the debug sections *)
 
 let pp_int_opt pp = function
-  | None -> fprintf pp "0"
-  | Some i -> fprintf pp "%d" i
+  | None -> output_string pp "0"
+  | Some i -> pp_jint pp i
 
 let pp_fundef pp_inst pp (name,fn) =
   let alignment = atom_alignof name
@@ -119,9 +118,8 @@ let pp_program pp pp_inst prog =
   pp_jobject_end pp
 
 let pp_mnemonics pp mnemonic_names =
-  let mnemonic_names = List.sort (String.compare) mnemonic_names in
-  let new_line pp () = pp_print_string pp "\n" in
-  pp_print_list ~pp_sep:new_line pp_print_string pp mnemonic_names
+  let new_line pp () = Format.pp_print_string pp "\n" in
+  Format.pp_print_list ~pp_sep:new_line Format.pp_print_string pp mnemonic_names
 
 let jdump_magic_number = "CompCertJDUMPRelease: " ^ Version.version
 
@@ -153,4 +151,4 @@ let pp_ast pp pp_inst ast sourcename =
     pp_jmember pp "Compilation Unit" pp_jstring sourcename;
     pp_jmember pp "Asm Ast" (fun pp prog -> pp_program pp pp_inst prog) ast;
     pp_jobject_end pp;
-    Format.pp_print_flush pp ()
+    flush pp
diff --git a/backend/JsonAST.mli b/backend/JsonAST.mli
index 7afdce51..c32439e4 100644
--- a/backend/JsonAST.mli
+++ b/backend/JsonAST.mli
@@ -13,4 +13,4 @@
 
 
 val pp_mnemonics : Format.formatter -> string list -> unit
-val pp_ast : Format.formatter -> (Format.formatter  -> Asm.code -> unit) -> (Asm.coq_function AST.fundef, 'a) AST.program  -> string -> unit
+val pp_ast : out_channel -> (out_channel  -> Asm.code -> unit) -> (Asm.coq_function AST.fundef, 'a) AST.program  -> string -> unit
diff --git a/backend/LTL.v b/backend/LTL.v
index 5e7eec8c..3edd60a2 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -29,7 +29,7 @@ Definition node := positive.
 
 Inductive instruction: Type :=
   | Lop (op: operation) (args: list mreg) (res: mreg)
-  | Lload (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
+  | Lload (trap : trapping_mode) (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
   | Lgetstack (sl: slot) (ofs: Z) (ty: typ) (dst: mreg)
   | Lsetstack (src: mreg) (sl: slot) (ofs: Z) (ty: typ)
   | Lstore (chunk: memory_chunk) (addr: addressing) (args: list mreg) (src: mreg)
@@ -37,7 +37,7 @@ Inductive instruction: Type :=
   | Ltailcall (sg: signature) (ros: mreg + ident)
   | Lbuiltin (ef: external_function) (args: list (builtin_arg loc)) (res: builtin_res mreg)
   | Lbranch (s: node)
-  | Lcond (cond: condition) (args: list mreg) (s1 s2: node)
+  | Lcond (cond: condition) (args: list mreg) (s1 s2: node) (info: option bool)
   | Ljumptable (arg: mreg) (tbl: list node)
   | Lreturn.
 
@@ -209,11 +209,24 @@ Inductive step: state -> trace -> state -> Prop :=
       rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) ->
       step (Block s f sp (Lop op args res :: bb) rs m)
         E0 (Block s f sp bb rs' m)
-  | exec_Lload: forall s f sp chunk addr args dst bb rs m a v rs',
+  | exec_Lload: forall s f sp trap chunk addr args dst bb rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
-      step (Block s f sp (Lload chunk addr args dst :: bb) rs m)
+      step (Block s f sp (Lload trap chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs',
+      eval_addressing ge sp addr (reglist rs args) = None ->
+      rs' = Locmap.set (R dst)  (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = Locmap.set (R dst) (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
         E0 (Block s f sp bb rs' m)
   | exec_Lgetstack: forall s f sp sl ofs ty dst bb rs m rs',
       rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) ->
@@ -250,11 +263,11 @@ Inductive step: state -> trace -> state -> Prop :=
   | exec_Lbranch: forall s f sp pc bb rs m,
       step (Block s f sp (Lbranch pc :: bb) rs m)
         E0 (State s f sp pc rs m)
-  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m,
+  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m i,
       eval_condition cond (reglist rs args) m = Some b ->
       pc = (if b then pc1 else pc2) ->
       rs' = undef_regs (destroyed_by_cond cond) rs ->
-      step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m)
+      step (Block s f sp (Lcond cond args pc1 pc2 i :: bb) rs m)
         E0 (State s f sp pc rs' m)
   | exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs',
       rs (R arg) = Vint n ->
@@ -315,7 +328,7 @@ Fixpoint successors_block (b: bblock) : list node :=
   | nil => nil                          (**r should never happen *)
   | Ltailcall _ _ :: _ => nil
   | Lbranch s :: _ => s :: nil
-  | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil
+  | Lcond _ _ s1 s2 _ :: _ => s1 :: s2 :: nil
   | Ljumptable _ tbl :: _ => tbl
   | Lreturn :: _ => nil
   | instr :: b' => successors_block b'
diff --git a/backend/Linear.v b/backend/Linear.v
index 447c6ba6..1443f795 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -28,7 +28,7 @@ Inductive instruction: Type :=
   | Lgetstack: slot -> Z -> typ -> mreg -> instruction
   | Lsetstack: mreg -> slot -> Z -> typ -> instruction
   | Lop: operation -> list mreg -> mreg -> instruction
-  | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
+  | Lload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lcall: signature -> mreg + ident -> instruction
   | Ltailcall: signature -> mreg + ident -> instruction
@@ -160,11 +160,28 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Lop op args res :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lload:
-      forall s f sp chunk addr args dst b rs m a v rs',
+      forall s f sp trap chunk addr args dst b rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
-      step (State s f sp (Lload chunk addr args dst :: b) rs m)
+      step (State s f sp (Lload trap chunk addr args dst :: b) rs m)
+        E0 (State s f sp b rs' m)
+  | exec_Lload_notrap1:
+      forall s f sp chunk addr args dst b rs m rs',
+      eval_addressing ge sp addr (reglist rs args) = None ->
+      rs' = Locmap.set (R dst)
+                       (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
+        E0 (State s f sp b rs' m)
+  | exec_Lload_notrap2:
+      forall s f sp chunk addr args dst b rs m a rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = Locmap.set (R dst)
+                       (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lstore:
       forall s f sp chunk addr args src b rs m m' a rs',
diff --git a/backend/Linearize.v b/backend/Linearize.v
index 2cfa4d3c..66b36428 100644
--- a/backend/Linearize.v
+++ b/backend/Linearize.v
@@ -163,8 +163,8 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
   | nil => k
   | LTL.Lop op args res :: b' =>
       Lop op args res :: linearize_block b' k
-  | LTL.Lload chunk addr args dst :: b' =>
-      Lload chunk addr args dst :: linearize_block b' k
+  | LTL.Lload trap chunk addr args dst :: b' =>
+      Lload trap chunk addr args dst :: linearize_block b' k
   | LTL.Lgetstack sl ofs ty dst :: b' =>
       Lgetstack sl ofs ty dst :: linearize_block b' k
   | LTL.Lsetstack src sl ofs ty :: b' =>
@@ -179,7 +179,7 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
       Lbuiltin ef args res :: linearize_block b' k
   | LTL.Lbranch s :: b' =>
       add_branch s k
-  | LTL.Lcond cond args s1 s2 :: b' =>
+  | LTL.Lcond cond args s1 s2 _ :: b' =>
       if starts_with s1 k then
         Lcond (negate_condition cond) args s2 :: add_branch s1 k
       else
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index 902724e0..bfa056ca 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -1,4 +1,4 @@
-(* *********************************************************************)
+
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
@@ -12,7 +12,6 @@
 
 open LTL
 open Maps
-open Camlcoq
 
 (* Trivial enumeration, in decreasing order of PC *)
 
@@ -29,6 +28,8 @@ let enumerate_aux f reach =
 
 (* More clever enumeration that flattens basic blocks *)
 
+open Camlcoq
+
 module IntSet = Set.Make(struct type t = int let compare = compare end)
 
 (* Determine join points: reachable nodes that have > 1 predecessor *)
@@ -80,7 +81,7 @@ let basic_blocks f joins =
        | [] -> assert false
        | Lbranch s :: _ -> next_in_block blk minpc s
        | Ltailcall (sig0, ros) :: _ -> end_block blk minpc
-       | Lcond (cond, args, ifso, ifnot) :: _ ->
+       | Lcond (cond, args, ifso, ifnot, _) :: _ ->
              end_block blk minpc; start_block ifso; start_block ifnot
        | Ljumptable(arg, tbl) :: _ ->
              end_block blk minpc; List.iter start_block tbl
@@ -110,5 +111,405 @@ let flatten_blocks blks =
 
 (* Build the enumeration *)
 
-let enumerate_aux f reach =
+let enumerate_aux_flat f reach =
   flatten_blocks (basic_blocks f (join_points f))
+
+(**
+ * Enumeration based on traces as identified by Duplicate.v 
+ *
+ * The Duplicate phase heuristically identifies the most frequented paths. Each
+ * Icond is modified so that the preferred condition is a fallthrough (ifnot)
+ * rather than a branch (ifso).
+ *
+ * The enumeration below takes advantage of this - preferring to layout nodes
+ * following the fallthroughs of the Lcond branches.
+ *
+ * It is slightly adapted from the work of Petris and Hansen 90 on intraprocedural
+ * code positioning - only we do it on a broader grain, since we don't have the exact
+ * frequencies (we only know which branch is the preferred one)
+ *)
+
+let get_some = function
+| None -> failwith "Did not get some"
+| Some thing -> thing
+
+exception EmptyList
+
+let rec last_element = function
+  | [] -> raise EmptyList
+  | e :: [] -> e
+  | e' :: e :: l -> last_element (e::l)
+
+let print_plist l =
+  let rec f = function
+  | [] -> ()
+  | n :: l -> Printf.printf "%d, " (P.to_int n); f l
+  in begin
+    Printf.printf "[";
+    f l;
+    Printf.printf "]"
+  end
+
+(* adapted from the above join_points function, but with PTree *)
+let get_join_points code entry =
+  let reached = ref (PTree.map (fun n i -> false) code) in
+  let reached_twice = ref (PTree.map (fun n i -> false) code) in
+  let rec traverse pc =
+    if get_some @@ PTree.get pc !reached then begin
+      if not (get_some @@ PTree.get pc !reached_twice) then
+        reached_twice := PTree.set pc true !reached_twice
+    end else begin
+      reached := PTree.set pc true !reached;
+      traverse_succs (successors_block @@ get_some @@ PTree.get pc code)
+    end
+  and traverse_succs = function
+    | [] -> ()
+    | [pc] -> traverse pc
+    | pc :: l -> traverse pc; traverse_succs l
+  in traverse entry; !reached_twice
+
+let forward_sequences code entry =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let join_points = get_join_points code entry in
+  (* returns the list of traversed nodes, and a list of nodes to start traversing next *)
+  let rec traverse_fallthrough code node =
+    (* Printf.printf "Traversing %d..\n" (P.to_int node); *)
+    if not (get_some @@ PTree.get node !visited) then begin
+      visited := PTree.set node true !visited;
+      match PTree.get node code with
+      | None -> failwith "No such node"
+      | Some bb ->
+          let ln, rem = match (last_element bb) with
+          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+          | Lbuiltin _ -> assert false
+          | Ltailcall _ | Lreturn -> begin (* Printf.printf "STOP tailcall/return\n"; *) ([], []) end
+          | Lbranch n ->
+              if get_some @@ PTree.get n join_points then ([], [n])
+              else let ln, rem = traverse_fallthrough code n in (ln, rem)
+          | Lcond (_, _, ifso, ifnot, info) -> (match info with
+            | None -> begin (* Printf.printf "STOP Lcond None\n"; *) ([], [ifso; ifnot]) end
+            | Some false ->
+                if get_some @@ PTree.get ifnot join_points then ([], [ifso; ifnot])
+                else let ln, rem = traverse_fallthrough code ifnot in (ln, [ifso] @ rem)
+            | Some true ->
+                let errstr = Printf.sprintf ("Inconsistency detected in node %d: ifnot is not the preferred branch") (P.to_int node) in
+                  failwith errstr)
+          | Ljumptable(_, ln) -> begin (* Printf.printf "STOP Ljumptable\n"; *) ([], ln) end
+          in ([node] @ ln, rem)
+      end
+    else ([], [])
+  in let rec f code = function
+  | [] -> []
+  | node :: ln ->
+      let fs, rem_from_node = traverse_fallthrough code node
+      in [fs] @ ((f code rem_from_node) @ (f code ln))
+  in (f code [entry])
+
+(** Unused code
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+module LPInt = struct
+  type t = P.t list
+  let rec compare x y =
+    match x with
+    | [] -> ( match y with
+      | [] -> 0
+      | _ -> 1 )
+    | e :: l -> match y with
+      | [] -> -1
+      | e' :: l' ->
+          let e_cmp = PInt.compare e e' in
+          if e_cmp == 0 then compare l l' else e_cmp
+end
+
+module LPSet = Set.Make(LPInt)
+
+let iter_lpset f s = Seq.iter f (LPSet.to_seq s)
+
+let first_of = function
+  | [] -> None
+  | e :: l -> Some e
+
+let rec last_of = function
+  | [] -> None
+  | e :: l -> (match l with [] -> Some e | e :: l -> last_of l)
+
+let can_be_merged code s s' =
+  let last_s = get_some @@ last_of s in
+  let first_s' = get_some @@ first_of s' in
+  match get_some @@ PTree.get last_s code with
+  | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+  | Lbuiltin _ | Ltailcall _ | Lreturn -> false
+  | Lbranch n -> n == first_s'
+  | Lcond (_, _, ifso, ifnot, info) -> (match info with
+    | None -> false
+    | Some false -> ifnot == first_s'
+    | Some true -> failwith "Inconsistency detected - ifnot is not the preferred branch")
+  | Ljumptable (_, ln) ->
+      match ln with
+      | [] -> false
+      | n :: ln -> n == first_s'
+
+let merge s s' = Some s
+
+let try_merge code (fs: (BinNums.positive list) list) =
+  let seqs = ref (LPSet.of_list fs) in
+  let oldLength = ref (LPSet.cardinal !seqs) in
+  let continue = ref true in
+  let found = ref false in
+  while !continue do
+    begin
+      found := false;
+      iter_lpset (fun s ->
+        if !found then ()
+        else iter_lpset (fun s' ->
+          if (!found || s == s') then ()
+          else if (can_be_merged code s s') then
+            begin
+              seqs := LPSet.remove s !seqs;
+              seqs := LPSet.remove s' !seqs;
+              seqs := LPSet.add (get_some (merge s s')) !seqs;
+              found := true;
+            end
+          else ()
+        ) !seqs
+      ) !seqs;
+      if !oldLength == LPSet.cardinal !seqs then
+        continue := false
+      else
+        oldLength := LPSet.cardinal !seqs
+    end
+  done;
+  !seqs
+*)
+
+(** Code adapted from Duplicateaux.get_loop_headers
+  *
+  * Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+type pos = BinNums.positive
+
+module PP = struct
+  type t = pos * pos
+  let compare a b =
+    let ax, ay = a in
+    let bx, by = b in
+    let dx = compare ax bx in
+    if (dx == 0) then compare ay by
+    else dx
+end
+
+module PPMap = Map.Make(PP)
+
+type vstate = Unvisited | Processed | Visited
+
+let get_loop_edges code entry =
+  let visited = ref (PTree.map (fun n i -> Unvisited) code) in
+  let is_loop_edge = ref PPMap.empty
+  in let rec dfs_visit code from = function
+  | [] -> ()
+  | node :: ln ->
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> ()
+      | Processed -> begin
+          let from_node = get_some from in
+          is_loop_edge := PPMap.add (from_node, node) true !is_loop_edge;
+          visited := PTree.set node Visited !visited
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          let bb = get_some @@ PTree.get node code in
+          let next_visits = (match (last_element bb) with
+          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+          | Lbuiltin _ -> assert false
+          | Ltailcall _ | Lreturn -> []
+          | Lbranch n -> [n]
+          | Lcond (_, _, ifso, ifnot, _) -> [ifso; ifnot]
+          | Ljumptable(_, ln) -> ln
+          ) in dfs_visit code (Some node) next_visits;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code from ln
+        end
+  in begin
+    dfs_visit code None [entry];
+    !is_loop_edge
+  end
+
+let ppmap_is_true pp ppmap = PPMap.mem pp ppmap && PPMap.find pp ppmap
+
+module Int = struct
+  type t = int
+  let compare x y = compare x y
+end
+
+module ISet = Set.Make(Int)
+
+let print_iset s = begin
+  Printf.printf "{";
+  ISet.iter (fun e -> Printf.printf "%d, " e) s;
+  Printf.printf "}"
+end
+
+let print_depmap dm = begin
+  Printf.printf "[|";
+  Array.iter (fun s -> print_iset s; Printf.printf ", ") dm;
+  Printf.printf "|]\n"
+end
+
+let construct_depmap code entry fs =
+  let is_loop_edge = get_loop_edges code entry in
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let depmap = Array.map (fun e -> ISet.empty) fs in
+  let find_index_of_node n =
+    let index = ref 0 in
+    begin
+      Array.iteri (fun i s ->
+        match List.find_opt (fun e -> e == n) s with
+        | Some _ -> index := i
+        | None -> ()
+      ) fs;
+      !index
+    end
+  in let check_and_update_depmap from target =
+    (* Printf.printf "From %d to %d\n" (P.to_int from) (P.to_int target); *)
+    if not (ppmap_is_true (from, target) is_loop_edge) then
+      let in_index_fs = find_index_of_node from in
+      let out_index_fs = find_index_of_node target in
+      if out_index_fs != in_index_fs then
+        depmap.(out_index_fs) <- ISet.add in_index_fs depmap.(out_index_fs)
+      else ()
+    else ()
+  in let rec dfs_visit code = function
+  | [] -> ()
+  | node :: ln ->
+      begin
+        match (get_some @@ PTree.get node !visited) with
+        | true -> ()
+        | false -> begin
+            visited := PTree.set node true !visited;
+            let bb = get_some @@ PTree.get node code in
+            let next_visits =
+              match (last_element bb) with
+              | Ltailcall _ | Lreturn -> []
+              | Lbranch n -> (check_and_update_depmap node n; [n])
+              | Lcond (_, _, ifso, ifnot, _) -> begin
+                  check_and_update_depmap node ifso;
+                  check_and_update_depmap node ifnot;
+                  [ifso; ifnot]
+                end
+              | Ljumptable(_, ln) -> begin
+                  List.iter (fun n -> check_and_update_depmap node n) ln;
+                  ln
+                end
+              (* end of bblocks should not be another value than one of the above *)
+              | _ -> failwith "last_element gave an invalid output"
+            in dfs_visit code next_visits
+          end;
+        dfs_visit code ln
+      end
+  in begin
+    dfs_visit code [entry];
+    depmap
+  end
+
+let print_sequence s =
+  Printf.printf "[";
+  List.iter (fun n -> Printf.printf "%d, " (P.to_int n)) s;
+  Printf.printf "]\n"
+
+let print_ssequence ofs =
+  Printf.printf "[";
+  List.iter (fun s -> print_sequence s) ofs;
+  Printf.printf "]\n"
+
+let order_sequences code entry fs =
+  let fs_a = Array.of_list fs in
+  let depmap = construct_depmap code entry fs_a in
+  let fs_evaluated = Array.map (fun e -> false) fs_a in
+  let ordered_fs = ref [] in
+  let evaluate s_id =
+    begin
+      assert (not fs_evaluated.(s_id));
+      ordered_fs := fs_a.(s_id) :: !ordered_fs;
+      fs_evaluated.(s_id) <- true;
+      (* Printf.printf "++++++\n";
+      Printf.printf "Scheduling %d\n" s_id;
+      Printf.printf "Initial depmap: "; print_depmap depmap; *)
+      Array.iteri (fun i deps ->
+        depmap.(i) <- ISet.remove s_id deps
+      ) depmap;
+      (* Printf.printf "Final depmap: "; print_depmap depmap; *)
+    end
+  in let choose_best_of candidates =
+    let current_best_id = ref None in
+    let current_best_score = ref None in
+    begin
+      List.iter (fun id ->
+        match !current_best_id with
+        | None -> begin
+            current_best_id := Some id;
+            match fs_a.(id) with
+            | [] -> current_best_score := None
+            | n::l -> current_best_score := Some (P.to_int n)
+          end
+        | Some b -> begin
+            match fs_a.(id) with
+            | [] -> ()
+            | n::l -> let nscore = P.to_int n in
+              match !current_best_score with
+              | None -> (current_best_id := Some id; current_best_score := Some nscore)
+              | Some bs -> if nscore > bs then (current_best_id := Some id; current_best_score := Some nscore)
+          end
+      ) candidates;
+      !current_best_id
+    end
+  in let select_next () =
+    let candidates = ref [] in
+    begin
+      Array.iteri (fun i deps ->
+        begin
+          (* Printf.printf "Deps of %d: " i; print_iset deps; Printf.printf "\n"; *)
+          (* FIXME - if we keep it that way (no dependency check), remove all the unneeded stuff *)
+          if ((* deps == ISet.empty && *) not fs_evaluated.(i)) then
+            candidates := i :: !candidates
+        end
+      ) depmap;
+      if not (List.length !candidates > 0) then begin
+        Array.iteri (fun i deps ->
+          if (not fs_evaluated.(i)) then candidates := i :: !candidates
+        ) depmap;
+      end;
+      get_some (choose_best_of !candidates)
+    end
+  in begin
+    Printf.printf "-------------------------------\n";
+    Printf.printf "depmap: "; print_depmap depmap;
+    Printf.printf "forward sequences identified: "; print_ssequence fs;
+    while List.length !ordered_fs != List.length fs do
+      let next_id = select_next () in
+      evaluate next_id
+    done;
+    Printf.printf "forward sequences ordered: "; print_ssequence (List.rev (!ordered_fs));
+    List.rev (!ordered_fs)
+  end
+
+let enumerate_aux_trace f reach =
+  let code = f.fn_code in
+  let entry = f.fn_entrypoint in
+  let fs = forward_sequences code entry in
+  let ofs = order_sequences code entry fs in
+  List.flatten ofs
+
+let enumerate_aux f reach =
+  if !Clflags.option_ftracelinearize then enumerate_aux_trace f reach
+  else enumerate_aux_flat f reach
diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v
index 10a3d8b2..18dc52a5 100644
--- a/backend/Linearizeproof.v
+++ b/backend/Linearizeproof.v
@@ -585,45 +585,61 @@ Proof.
   intros; eapply reachable_successors; eauto.
   eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto.
 
-  (* Lop *)
+- (* Lop *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   instantiate (1 := v); rewrite <- H; apply eval_operation_preserved.
   exact symbols_preserved.
   econstructor; eauto.
 
-  (* Lload *)
+- (* Lload *)
   left; econstructor; split. simpl.
-  apply plus_one. econstructor.
+  apply plus_one. eapply exec_Lload.
   instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved.
   exact symbols_preserved. eauto. eauto.
   econstructor; eauto.
 
-  (* Lgetstack *)
+- (* Lload notrap1 *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lload_notrap1.
+  rewrite <- H.
+  apply eval_addressing_preserved.
+  exact symbols_preserved. eauto.
+  econstructor; eauto.
+  
+- (* Lload notrap2 *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lload_notrap2.
+  rewrite <- H.
+  apply eval_addressing_preserved.
+  exact symbols_preserved. eauto. eauto.
+  econstructor; eauto.
+
+- (* Lgetstack *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   econstructor; eauto.
 
-  (* Lsetstack *)
+- (* Lsetstack *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   econstructor; eauto.
 
-  (* Lstore *)
+- (* Lstore *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor.
   instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved.
   exact symbols_preserved. eauto. eauto.
   econstructor; eauto.
 
-  (* Lcall *)
+- (* Lcall *)
   exploit find_function_translated; eauto. intros [tfd [A B]].
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   symmetry; eapply sig_preserved; eauto.
   econstructor; eauto. constructor; auto. econstructor; eauto.
 
-  (* Ltailcall *)
+- (* Ltailcall *)
   exploit find_function_translated; eauto. intros [tfd [A B]].
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
@@ -633,18 +649,18 @@ Proof.
   rewrite (match_parent_locset _ _ STACKS).
   econstructor; eauto.
 
-  (* Lbuiltin *)
+- (* Lbuiltin *)
   left; econstructor; split. simpl.
   apply plus_one. eapply exec_Lbuiltin; eauto.
   eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 
-  (* Lbranch *)
+- (* Lbranch *)
   assert ((reachable f)!!pc = true). apply REACH; simpl; auto.
   right; split. simpl; omega. split. auto. simpl. econstructor; eauto.
 
-  (* Lcond *)
+- (* Lcond *)
   assert (REACH1: (reachable f)!!pc1 = true) by (apply REACH; simpl; auto).
   assert (REACH2: (reachable f)!!pc2 = true) by (apply REACH; simpl; auto).
   simpl linearize_block.
@@ -670,18 +686,18 @@ Proof.
   apply plus_one. eapply exec_Lcond_false. eauto. eauto.
   econstructor; eauto.
 
-  (* Ljumptable *)
+- (* Ljumptable *)
   assert (REACH': (reachable f)!!pc = true).
     apply REACH. simpl. eapply list_nth_z_in; eauto.
   right; split. simpl; omega. split. auto. econstructor; eauto.
 
-  (* Lreturn *)
+- (* Lreturn *)
   left; econstructor; split.
   simpl. apply plus_one. econstructor; eauto.
   rewrite (stacksize_preserved _ _ TRF). eauto.
   rewrite (match_parent_locset _ _ STACKS). econstructor; eauto.
 
-  (* internal functions *)
+- (* internal functions *)
   assert (REACH: (reachable f)!!(LTL.fn_entrypoint f) = true).
     apply reachable_entrypoint.
   monadInv H7.
@@ -691,13 +707,13 @@ Proof.
   generalize EQ; intro EQ'; monadInv EQ'. simpl.
   econstructor; eauto. simpl. eapply is_tail_add_branch. constructor.
 
-  (* external function *)
+- (* external function *)
   monadInv H8. left; econstructor; split.
   apply plus_one. eapply exec_function_external; eauto.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 
-  (* return *)
+- (* return *)
   inv H3. inv H1.
   left; econstructor; split.
   apply plus_one. econstructor.
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 1fe23a9d..3fe61470 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -76,7 +76,7 @@ Definition wt_instr (i: instruction) : bool :=
           let (targs, tres) := type_of_operation op in
           subtype tres (mreg_type res)
       end
-  | Lload chunk addr args dst =>
+  | Lload trap chunk addr args dst =>
       subtype (type_of_chunk chunk) (mreg_type dst)
   | Ltailcall sg ros =>
       zeq (size_arguments sg) 0
@@ -321,17 +321,34 @@ Local Opaque mreg_type.
   + (* other ops *)
     destruct (type_of_operation op) as [ty_args ty_res] eqn:TYOP. InvBooleans.
     econstructor; eauto.
+ 
     apply wt_setreg; auto; try (apply wt_undef_regs; auto).
     eapply Val.has_subtype; eauto.
+    
     change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto.
     red; intros; subst op. simpl in ISMOVE.
-    destruct args; try discriminate. destruct args; discriminate. 
+    destruct args; try discriminate. destruct args; discriminate.
+    (* no longer needed apply wt_undef_regs; auto. *)
 - (* load *)
   simpl in *; InvBooleans.
   econstructor; eauto.
   apply wt_setreg. eapply Val.has_subtype; eauto.
   destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
   apply wt_undef_regs; auto.
+- (* load notrap1 *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setreg. eapply Val.has_subtype; eauto.
+  unfold default_notrap_load_value.
+  constructor.
+  apply wt_undef_regs; auto.
+- (* load notrap2 *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setreg. eapply Val.has_subtype; eauto.
+  unfold default_notrap_load_value.
+  constructor.
+  apply wt_undef_regs; auto.
 - (* store *)
   simpl in *; InvBooleans.
   econstructor. eauto. eauto. eauto.
diff --git a/backend/Liveness.v b/backend/Liveness.v
index 16533158..9652b363 100644
--- a/backend/Liveness.v
+++ b/backend/Liveness.v
@@ -79,7 +79,7 @@ Definition transfer
             reg_list_live args (reg_dead res after)
           else
             after
-      | Iload chunk addr args dst s =>
+      | Iload trap chunk addr args dst s =>
           if Regset.mem dst after then
             reg_list_live args (reg_dead dst after)
           else
@@ -94,7 +94,7 @@ Definition transfer
       | Ibuiltin ef args res s =>
           reg_list_live (params_of_builtin_args args)
             (reg_list_dead (params_of_builtin_res res) after)
-      | Icond cond args ifso ifnot =>
+      | Icond cond args ifso ifnot _ =>
           reg_list_live args after
       | Ijumptable arg tbl =>
           reg_live arg after
diff --git a/backend/Mach.v b/backend/Mach.v
index 9fdee9eb..1c6fdb18 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -56,7 +56,7 @@ Inductive instruction: Type :=
   | Msetstack: mreg -> ptrofs -> typ -> instruction
   | Mgetparam: ptrofs -> typ -> mreg -> instruction
   | Mop: operation -> list mreg -> mreg -> instruction
-  | Mload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
+  | Mload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mcall: signature -> mreg + ident -> instruction
   | Mtailcall: signature -> mreg + ident -> instruction
@@ -321,11 +321,24 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Mop op args res :: c) rs m)
         E0 (State s f sp c rs' m)
   | exec_Mload:
-      forall s f sp chunk addr args dst c rs m a v rs',
+      forall s f sp trap chunk addr args dst c rs m a v rs',
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- v) ->
-      step (State s f sp (Mload chunk addr args dst :: c) rs m)
+      step (State s f sp (Mload trap chunk addr args dst :: c) rs m)
+        E0 (State s f sp c rs' m)
+  | exec_Mload_notrap1:
+      forall s f sp chunk addr args dst c rs m rs',
+      eval_addressing ge sp addr rs##args = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+      step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
+        E0 (State s f sp c rs' m)
+  | exec_Mload_notrap2:
+      forall s f sp chunk addr args dst c rs m a rs',
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+      step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
         E0 (State s f sp c rs' m)
   | exec_Mstore:
       forall s f sp chunk addr args src c rs m m' a rs',
diff --git a/backend/NeedDomain.v b/backend/NeedDomain.v
index b35c90b2..3c2d8e20 100644
--- a/backend/NeedDomain.v
+++ b/backend/NeedDomain.v
@@ -594,7 +594,8 @@ Proof.
 Qed.
 
 (** Modular arithmetic operations: add, mul, opposite.
-    (But not subtraction because of the pointer - pointer case. *)
+    Also subtraction, but only on 64-bit targets, otherwise
+    the pointer - pointer case does not fit. *)
 
 Definition modarith (x: nval) :=
   match x with
@@ -615,6 +616,19 @@ Proof.
 - inv H; auto. inv H0; auto. destruct w1; auto.
 Qed.
 
+Lemma sub_sound:
+  forall v1 w1 v2 w2 x,
+  vagree v1 w1 (modarith x) -> vagree v2 w2 (modarith x) ->
+  Archi.ptr64 = true ->
+  vagree (Val.sub v1 v2) (Val.sub w1 w2) x.
+Proof.
+  unfold modarith; intros. destruct x; simpl in *.
+- auto.
+- unfold Val.sub; rewrite H1; InvAgree.
+  apply eqmod_iagree. apply eqmod_sub; apply iagree_eqmod; auto.
+- inv H; auto. inv H0; auto. destruct w1; auto.
+Qed.
+
 Remark modarith_idem: forall nv, modarith (modarith nv) = modarith nv.
 Proof.
   destruct nv; simpl; auto. f_equal; apply complete_mask_idem.
@@ -680,7 +694,7 @@ Definition sign_ext (n: Z) (x: nval) :=
 Lemma sign_ext_sound:
   forall v w x n,
   vagree v w (sign_ext n x) ->
-  0 < n < Int.zwordsize ->
+  0 < n ->
   vagree (Val.sign_ext n v) (Val.sign_ext n w) x.
 Proof.
   unfold sign_ext; intros. destruct x; simpl in *.
@@ -889,7 +903,8 @@ Lemma default_needs_of_operation_sound:
   eval_operation ge (Vptr sp Ptrofs.zero) op args1 m1 = Some v1 ->
   vagree_list args1 args2 nil
   \/ vagree_list args1 args2 (default nv :: nil)
-  \/ vagree_list args1 args2 (default nv :: default nv :: nil) ->
+  \/ vagree_list args1 args2 (default nv :: default nv :: nil)
+  \/ vagree_list args1 args2 (default nv :: default nv :: default nv :: nil) ->
   nv <> Nothing ->
   exists v2,
      eval_operation ge (Vptr sp Ptrofs.zero) op args2 m2 = Some v2
@@ -901,7 +916,8 @@ Proof.
   {
     destruct H0. auto with na.
     destruct H0. inv H0; constructor; auto with na.
-    inv H0; constructor; auto with na. inv H8; constructor; auto with na.
+    destruct H0. inv H0. constructor. inv H8; constructor; auto with na. 
+    inv H0; constructor; auto with na. inv H8; constructor; auto with na. inv H9; constructor; auto with na.
   }
   exploit (@eval_operation_inj _ _ _ _ ge ge inject_id).
   eassumption. auto. auto. auto.
diff --git a/backend/OpHelpers.v b/backend/OpHelpers.v
index 53414dab..b9b97903 100644
--- a/backend/OpHelpers.v
+++ b/backend/OpHelpers.v
@@ -6,16 +6,16 @@ Require Import Op CminorSel.
   runtime library functions.  The following type class collects
   the names of these functions. *)
 
-Definition sig_l_l := mksignature (Tlong :: nil) (Some Tlong) cc_default.
-Definition sig_l_f := mksignature (Tlong :: nil) (Some Tfloat) cc_default.
-Definition sig_l_s := mksignature (Tlong :: nil) (Some Tsingle) cc_default.
-Definition sig_f_l := mksignature (Tfloat :: nil) (Some Tlong) cc_default.
-Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) (Some Tlong) cc_default.
-Definition sig_li_l := mksignature (Tlong :: Tint :: nil) (Some Tlong) cc_default.
-Definition sig_ii_l := mksignature (Tint :: Tint :: nil) (Some Tlong) cc_default.
-Definition sig_ii_i := mksignature (Tint :: Tint :: nil) (Some Tint) cc_default.
-Definition sig_ff_f := mksignature (Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default.
-Definition sig_ss_s := mksignature (Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default.
+Definition sig_l_l := mksignature (Tlong :: nil) Tlong cc_default.
+Definition sig_l_f := mksignature (Tlong :: nil) Tfloat cc_default.
+Definition sig_l_s := mksignature (Tlong :: nil) Tsingle cc_default.
+Definition sig_f_l := mksignature (Tfloat :: nil) Tlong cc_default.
+Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) Tlong cc_default.
+Definition sig_li_l := mksignature (Tlong :: Tint :: nil) Tlong cc_default.
+Definition sig_ii_l := mksignature (Tint :: Tint :: nil) Tlong cc_default.
+Definition sig_ii_i := mksignature (Tint :: Tint :: nil) Tint cc_default.
+Definition sig_ff_f := mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default.
+Definition sig_ss_s := mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default.
 
 Class helper_functions := mk_helper_functions {
   i64_dtos: ident;                      (**r float64 -> signed long *)
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index 8652b2c5..d82e6f84 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -99,7 +99,7 @@ let exists_constants () =
 
 let current_function_stacksize = ref 0l
 let current_function_sig =
-  ref { sig_args = []; sig_res = None; sig_cc = cc_default }
+  ref { sig_args = []; sig_res = Tvoid; sig_cc = cc_default }
 
 (* Functions for printing of symbol names *)
 let elf_symbol oc symb =
@@ -268,8 +268,8 @@ let re_asm_param_2 = Str.regexp "%\\([QR]?\\)\\([0-9]+\\)"
 let print_inline_asm print_preg oc txt sg args res =
   let (operands, ty_operands) =
     match sg.sig_res with
-    | None -> (args, sg.sig_args)
-    | Some tres -> (builtin_arg_of_res res :: args, tres :: sg.sig_args) in
+    | Tvoid -> (args, sg.sig_args)
+    | tres -> (builtin_arg_of_res res :: args, proj_rettype tres :: sg.sig_args) in
   let print_fragment = function
   | Str.Text s ->
       output_string oc s
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 8c255a65..c9a6d399 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -16,7 +16,7 @@
 (** Pretty-printer for Cminor *)
 
 open Format
-open !Camlcoq
+open! Camlcoq
 open Integers
 open AST
 open PrintAST
@@ -193,9 +193,7 @@ let print_sig p sg =
   List.iter
     (fun t -> fprintf p "%s ->@ " (name_of_type t))
     sg.sig_args;
-  match sg.sig_res with
-  | None -> fprintf p "void"
-  | Some ty -> fprintf p "%s" (name_of_type ty)
+  fprintf p "%s" (name_of_rettype sg.sig_res)
 
 let rec just_skips s =
   match s with
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index 1c449e74..d8f2ac12 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -61,9 +61,10 @@ let print_succ pp s dfl =
 let print_instruction pp succ = function
   | Lop(op, args, res) ->
       fprintf pp "%a = %a" mreg res (print_operation mreg) (op, args)
-  | Lload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]"
-         mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+  | Lload(trap,chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a]%a"
+        mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+        print_trapping_mode trap
   | Lgetstack(sl, ofs, ty, dst) ->
       fprintf pp "%a = %a" mreg dst slot (sl, ofs, ty)
   | Lsetstack(src, sl, ofs, ty) ->
@@ -82,10 +83,11 @@ let print_instruction pp succ = function
         (print_builtin_args loc) args
   | Lbranch s ->
       print_succ pp s succ
-  | Lcond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %d else goto %d"
+  | Lcond(cond, args, s1, s2, info) ->
+      fprintf pp "if (%a) goto %d else goto %d (prediction: %s)"
         (print_condition mreg) (cond, args)
         (P.to_int s1) (P.to_int s2)
+        (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
   | Ljumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
       fprintf pp "jumptable (%a)" mreg arg;
diff --git a/backend/PrintLTLin.ml b/backend/PrintLTLin.ml
deleted file mode 100644
index 4e8efd16..00000000
--- a/backend/PrintLTLin.ml
+++ /dev/null
@@ -1,115 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Pretty-printer for LTLin code *)
-
-open Format
-open Camlcoq
-open Datatypes
-open Maps
-open AST
-open Integers
-open Locations
-open Machregsaux
-open LTLin
-open PrintAST
-open PrintOp
-
-let reg pp loc =
-  match loc with
-  | R r ->
-      begin match name_of_register r with
-      | Some s -> fprintf pp "%s" s
-      | None -> fprintf pp "<unknown reg>"
-      end
-  | S (Local(ofs, ty)) ->
-      fprintf pp "local(%s,%ld)" (name_of_type ty) (camlint_of_coqint ofs)
-  | S (Incoming(ofs, ty)) ->
-      fprintf pp "incoming(%s,%ld)" (name_of_type ty) (camlint_of_coqint ofs)
-  | S (Outgoing(ofs, ty)) ->
-      fprintf pp "outgoing(%s,%ld)" (name_of_type ty) (camlint_of_coqint ofs)
-
-let rec regs pp = function
-  | [] -> ()
-  | [r] -> reg pp r
-  | r1::rl -> fprintf pp "%a, %a" reg r1 regs rl
-
-let ros pp = function
-  | Coq_inl r -> reg pp r
-  | Coq_inr s -> fprintf pp "\"%s\"" (extern_atom s)
-
-let print_instruction pp i =
-  match i with
-  | Lop(op, args, res) ->
-      fprintf pp "%a = %a@ "
-         reg res (PrintOp.print_operation reg) (op, args)
-  | Lload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]@ "
-         reg dst (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args)
-  | Lstore(chunk, addr, args, src) ->
-      fprintf pp "%s[%a] = %a@ "
-         (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args)
-         reg src
-  | Lcall(sg, fn, args, res) ->
-      fprintf pp "%a = %a(%a)@ "
-        reg res ros fn regs args
-  | Ltailcall(sg, fn, args) ->
-      fprintf pp "tailcall %a(%a)@ "
-        ros fn regs args
-  | Lbuiltin(ef, args, res) ->
-      fprintf pp "%a = builtin %s(%a)@ "
-        reg res (name_of_external ef) regs args
-  | Llabel lbl ->
-      fprintf pp "%ld:@ " (P.to_int32 lbl)
-  | Lgoto lbl ->
-      fprintf pp "goto %ld@ " (P.to_int32 lbl)
-  | Lcond(cond, args, lbl) ->
-      fprintf pp "if (%a) goto %ld@ "
-        (PrintOp.print_condition reg) (cond, args)
-        (P.to_int32 lbl)
-  | Ljumptable(arg, tbl) ->
-      let tbl = Array.of_list tbl in
-      fprintf pp "@[<v 2>jumptable (%a)" reg arg;
-      for i = 0 to Array.length tbl - 1 do
-        fprintf pp "@ case %d: goto %ld" i (P.to_int32 tbl.(i))
-      done;
-      fprintf pp "@]@ "
-  | Lreturn None ->
-      fprintf pp "return@ "
-  | Lreturn (Some arg) ->
-      fprintf pp "return %a@ " reg arg
-
-let print_function pp id f =
-  fprintf pp "@[<v 2>%s(%a) {@ " (extern_atom id) regs f.fn_params;
-  List.iter (print_instruction pp) f.fn_code;
-  fprintf pp "@;<0 -2>}@]@."
-
-let print_globdef pp (id, gd) =
-  match gd with
-  | Gfun(Internal f) -> print_function pp id f
-  | _ -> ()
-
-let print_program pp prog =
-  List.iter (print_globdef pp) prog.prog_defs
-
-let destination : string option ref = ref None
-
-let print_if prog =
-  match !destination with
-  | None -> ()
-  | Some f ->
-      let oc = open_out f in
-      let pp = formatter_of_out_channel oc in
-      print_program pp prog;
-      close_out oc
diff --git a/backend/PrintMach.ml b/backend/PrintMach.ml
index 517f3037..70e65832 100644
--- a/backend/PrintMach.ml
+++ b/backend/PrintMach.ml
@@ -48,10 +48,11 @@ let print_instruction pp i =
   | Mop(op, args, res) ->
       fprintf pp "\t%a = %a\n"
          reg res (PrintOp.print_operation reg) (op, args)
-  | Mload(chunk, addr, args, dst) ->
-      fprintf pp "\t%a = %s[%a]\n"
+  | Mload(trap, chunk, addr, args, dst) ->
+      fprintf pp "\t%a = %s[%a]%a\n"
          reg dst (name_of_chunk chunk)
          (PrintOp.print_addressing reg) (addr, args)
+         print_trapping_mode trap
   | Mstore(chunk, addr, args, src) ->
       fprintf pp "\t%s[%a] = %a\n"
          (name_of_chunk chunk)
diff --git a/backend/PrintRTL.ml b/backend/PrintRTL.ml
index 841540b6..b2ef05ca 100644
--- a/backend/PrintRTL.ml
+++ b/backend/PrintRTL.ml
@@ -50,10 +50,11 @@ let print_instruction pp (pc, i) =
       fprintf pp "%a = %a\n"
          reg res (PrintOp.print_operation reg) (op, args);
       print_succ pp s (pc - 1)
-  | Iload(chunk, addr, args, dst, s) ->
-      fprintf pp "%a = %s[%a]\n"
+  | Iload(trap, chunk, addr, args, dst, s) ->
+      fprintf pp "%a = %s[%a]%a\n"
          reg dst (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args);
+         (PrintOp.print_addressing reg) (addr, args)
+         print_trapping_mode trap;
       print_succ pp s (pc - 1)
   | Istore(chunk, addr, args, src, s) ->
       fprintf pp "%s[%a] = %a\n"
@@ -74,10 +75,11 @@ let print_instruction pp (pc, i) =
         (name_of_external ef)
         (print_builtin_args reg) args;
       print_succ pp s (pc - 1)
-  | Icond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %d else goto %d\n"
+  | Icond(cond, args, s1, s2, info) ->
+      fprintf pp "if (%a) goto %d else goto %d (prediction: %s)\n"
         (PrintOp.print_condition reg) (cond, args)
         (P.to_int s1) (P.to_int s2)
+        (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
   | Ijumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
       fprintf pp "jumptable (%a)\n" reg arg;
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index 6432682a..d1b79623 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -86,9 +86,10 @@ let print_instruction pp succ = function
       fprintf pp "(%a) = (%a) using %a, %a" vars dsts vars srcs var t1 var t2
   | Xop(op, args, res) ->
       fprintf pp "%a = %a" var res (print_operation var) (op, args)
-  | Xload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]"
-         var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+  | Xload(trap, chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a]%a"
+        var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+        print_trapping_mode trap
   | Xstore(chunk, addr, args, src) ->
       fprintf pp "%s[%a] = %a"
          (name_of_chunk chunk) (print_addressing var) (addr, args) var src
@@ -103,7 +104,7 @@ let print_instruction pp succ = function
         (print_builtin_args var) args
   | Xbranch s ->
       print_succ pp s succ
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       fprintf pp "if (%a) goto %d else goto %d"
         (print_condition var) (cond, args)
         (P.to_int s1) (P.to_int s2)
diff --git a/backend/RTL.v b/backend/RTL.v
index 9599a24a..dec59ca2 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -43,11 +43,12 @@ Inductive instruction: Type :=
       (** [Iop op args dest succ] performs the arithmetic operation [op]
           over the values of registers [args], stores the result in [dest],
           and branches to [succ]. *)
-  | Iload: memory_chunk -> addressing -> list reg -> reg -> node -> instruction
-      (** [Iload chunk addr args dest succ] loads a [chunk] quantity from
+  | Iload: trapping_mode -> memory_chunk -> addressing -> list reg -> reg -> node -> instruction
+      (** [Iload trap chunk addr args dest succ] loads a [chunk] quantity from
           the address determined by the addressing mode [addr] and the
           values of the [args] registers, stores the quantity just read
-          into [dest], and branches to [succ]. *)
+          into [dest], and branches to [succ].
+          If trap=NOTRAP, then failures lead to a default value written to [dest]. *)
   | Istore: memory_chunk -> addressing -> list reg -> reg -> node -> instruction
       (** [Istore chunk addr args src succ] stores the value of register
           [src] in the [chunk] quantity at the
@@ -66,11 +67,12 @@ Inductive instruction: Type :=
       (** [Ibuiltin ef args dest succ] calls the built-in function
           identified by [ef], giving it the values of [args] as arguments.
           It stores the return value in [dest] and branches to [succ]. *)
-  | Icond: condition -> list reg -> node -> node -> instruction
-      (** [Icond cond args ifso ifnot] evaluates the boolean condition
+  | Icond: condition -> list reg -> node -> node -> option bool -> instruction
+      (** [Icond cond args ifso ifnot info] evaluates the boolean condition
           [cond] over the values of registers [args].  If the condition
           is true, it transitions to [ifso].  If the condition is false,
-          it transitions to [ifnot]. *)
+          it transitions to [ifnot]. [info] is a ghost field there to provide
+          information relative to branch prediction. *)
   | Ijumptable: reg -> list node -> instruction
       (** [Ijumptable arg tbl] transitions to the node that is the [n]-th
           element of the list [tbl], where [n] is the unsigned integer
@@ -212,12 +214,25 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp pc rs m)
         E0 (State s f sp pc' (rs#res <- v) m)
   | exec_Iload:
-      forall s f sp pc rs m chunk addr args dst pc' a v,
-      (fn_code f)!pc = Some(Iload chunk addr args dst pc') ->
+      forall s f sp pc rs m trap chunk addr args dst pc' a v,
+      (fn_code f)!pc = Some(Iload trap chunk addr args dst pc') ->
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       step (State s f sp pc rs m)
         E0 (State s f sp pc' (rs#dst <- v) m)
+  | exec_Iload_notrap1:
+      forall s f sp pc rs m chunk addr args dst pc',
+      (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = None ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m)
+  | exec_Iload_notrap2:
+      forall s f sp pc rs m chunk addr args dst pc' a,
+      (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m)
   | exec_Istore:
       forall s f sp pc rs m chunk addr args src pc' a m',
       (fn_code f)!pc = Some(Istore chunk addr args src pc') ->
@@ -248,8 +263,8 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp pc rs m)
          t (State s f sp pc' (regmap_setres res vres rs) m')
   | exec_Icond:
-      forall s f sp pc rs m cond args ifso ifnot b pc',
-      (fn_code f)!pc = Some(Icond cond args ifso ifnot) ->
+      forall s f sp pc rs m cond args ifso ifnot b pc' predb,
+      (fn_code f)!pc = Some(Icond cond args ifso ifnot predb) ->
       eval_condition cond rs##args m = Some b ->
       pc' = (if b then ifso else ifnot) ->
       step (State s f sp pc rs m)
@@ -299,8 +314,8 @@ Proof.
 Qed.
 
 Lemma exec_Iload':
-  forall s f sp pc rs m chunk addr args dst pc' rs' a v,
-  (fn_code f)!pc = Some(Iload chunk addr args dst pc') ->
+  forall s f sp pc rs m trap chunk addr args dst pc' rs' a v,
+  (fn_code f)!pc = Some(Iload trap chunk addr args dst pc') ->
   eval_addressing ge sp addr rs##args = Some a ->
   Mem.loadv chunk m a = Some v ->
   rs' = (rs#dst <- v) ->
@@ -384,12 +399,12 @@ Definition successors_instr (i: instruction) : list node :=
   match i with
   | Inop s => s :: nil
   | Iop op args res s => s :: nil
-  | Iload chunk addr args dst s => s :: nil
+  | Iload trap chunk addr args dst s => s :: nil
   | Istore chunk addr args src s => s :: nil
   | Icall sig ros args res s => s :: nil
   | Itailcall sig ros args => nil
   | Ibuiltin ef args res s => s :: nil
-  | Icond cond args ifso ifnot => ifso :: ifnot :: nil
+  | Icond cond args ifso ifnot _ => ifso :: ifnot :: nil
   | Ijumptable arg tbl => tbl
   | Ireturn optarg => nil
   end.
@@ -403,14 +418,14 @@ Definition instr_uses (i: instruction) : list reg :=
   match i with
   | Inop s => nil
   | Iop op args res s => args
-  | Iload chunk addr args dst s => args
+  | Iload trap chunk addr args dst s => args
   | Istore chunk addr args src s => src :: args
   | Icall sig (inl r) args res s => r :: args
   | Icall sig (inr id) args res s => args
   | Itailcall sig (inl r) args => r :: args
   | Itailcall sig (inr id) args => args
   | Ibuiltin ef args res s => params_of_builtin_args args
-  | Icond cond args ifso ifnot => args
+  | Icond cond args ifso ifnot _ => args
   | Ijumptable arg tbl => arg :: nil
   | Ireturn None => nil
   | Ireturn (Some arg) => arg :: nil
@@ -422,13 +437,13 @@ Definition instr_defs (i: instruction) : option reg :=
   match i with
   | Inop s => None
   | Iop op args res s => Some res
-  | Iload chunk addr args dst s => Some dst
+  | Iload trap chunk addr args dst s => Some dst
   | Istore chunk addr args src s => None
   | Icall sig ros args res s => Some res
   | Itailcall sig ros args => None
   | Ibuiltin ef args res s =>
       match res with BR r => Some r | _ => None end
-  | Icond cond args ifso ifnot => None
+  | Icond cond args ifso ifnot _ => None
   | Ijumptable arg tbl => None
   | Ireturn optarg => None
   end.
@@ -462,7 +477,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
   match i with
   | Inop s => m
   | Iop op args res s => fold_left Pos.max args (Pos.max res m)
-  | Iload chunk addr args dst s => fold_left Pos.max args (Pos.max dst m)
+  | Iload trap chunk addr args dst s => fold_left Pos.max args (Pos.max dst m)
   | Istore chunk addr args src s => fold_left Pos.max args (Pos.max src m)
   | Icall sig (inl r) args res s => fold_left Pos.max args (Pos.max r (Pos.max res m))
   | Icall sig (inr id) args res s => fold_left Pos.max args (Pos.max res m)
@@ -471,7 +486,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
   | Ibuiltin ef args res s =>
       fold_left Pos.max (params_of_builtin_args args)
         (fold_left Pos.max (params_of_builtin_res res) m)
-  | Icond cond args ifso ifnot => fold_left Pos.max args m
+  | Icond cond args ifso ifnot _ => fold_left Pos.max args m
   | Ijumptable arg tbl => Pos.max arg m
   | Ireturn None => m
   | Ireturn (Some arg) => Pos.max arg m
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index 9d7a8506..ac98f3a1 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -410,12 +410,11 @@ Fixpoint convert_builtin_args {A: Type} (al: list (builtin_arg expr)) (rl: list
       a1' :: convert_builtin_args al rl1
   end.
 
-Definition convert_builtin_res (map: mapping) (oty: option typ) (r: builtin_res ident) : mon (builtin_res reg) :=
-  match r, oty with
-  | BR id, _ => do r <- find_var map id; ret (BR r)
-  | BR_none, None => ret BR_none
-  | BR_none, Some _ => do r <- new_reg; ret (BR r)
-  | _, _ => error (Errors.msg "RTLgen: bad builtin_res")
+Definition convert_builtin_res (map: mapping) (ty: rettype) (r: builtin_res ident) : mon (builtin_res reg) :=
+  match r with
+  | BR id => do r <- find_var map id; ret (BR r)
+  | BR_none => if rettype_eq ty Tvoid then ret BR_none else (do r <- new_reg; ret (BR r))
+  | _ => error (Errors.msg "RTLgen: bad builtin_res")
   end.
 
 (** Translation of an expression.  [transl_expr map a rd nd]
@@ -436,7 +435,7 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node)
       transl_exprlist map al rl no
   | Eload chunk addr al =>
       do rl <- alloc_regs map al;
-      do no <- add_instr (Iload chunk addr rl rd nd);
+      do no <- add_instr (Iload TRAP chunk addr rl rd nd);
          transl_exprlist map al rl no
   | Econdition a b c =>
       do nfalse <- transl_expr map c rd nd;
@@ -480,7 +479,7 @@ with transl_condexpr (map: mapping) (a: condexpr) (ntrue nfalse: node)
   match a with
   | CEcond c al =>
       do rl <- alloc_regs map al;
-      do nt <- add_instr (Icond c rl ntrue nfalse);
+      do nt <- add_instr (Icond c rl ntrue nfalse None);
          transl_exprlist map al rl nt
   | CEcondition a b c =>
       do nc <- transl_condexpr map c ntrue nfalse;
@@ -667,10 +666,7 @@ Fixpoint reserve_labels (s: stmt) (ms: labelmap * state)
 (** Translation of a CminorSel function. *)
 
 Definition ret_reg (sig: signature) (rd: reg) : option reg :=
-  match sig.(sig_res) with
-  | None => None
-  | Some ty => Some rd
-  end.
+  if rettype_eq sig.(sig_res) Tvoid then None else Some rd.
 
 Definition transl_fun (f: CminorSel.function) (ngoto: labelmap): mon (node * list reg) :=
   do (rparams, map1) <- add_vars init_mapping f.(CminorSel.fn_params);
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 17022a7d..30ad7d82 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -639,8 +639,8 @@ Lemma new_reg_return_ok:
   map_valid map s1 ->
   return_reg_ok s2 map (ret_reg sig r).
 Proof.
-  intros. unfold ret_reg. destruct (sig_res sig); constructor.
-  eauto with rtlg. eauto with rtlg.
+  intros. unfold ret_reg.
+  destruct (rettype_eq (sig_res sig) Tvoid); constructor; eauto with rtlg.
 Qed.
 
 (** * Relational specification of the translation *)
@@ -707,7 +707,7 @@ Inductive tr_expr (c: code):
       tr_expr c map pr (Eop op al) ns nd rd dst
   | tr_Eload: forall map pr chunk addr al ns nd rd n1 rl dst,
       tr_exprlist c map pr al ns n1 rl ->
-      c!n1 = Some (Iload chunk addr rl rd nd) ->
+      c!n1 = Some (Iload TRAP chunk addr rl rd nd) ->
       reg_map_ok map rd dst -> ~In rd pr ->
       tr_expr c map pr (Eload chunk addr al) ns nd rd dst
   | tr_Econdition: forall map pr a ifso ifnot ns nd rd ntrue nfalse dst,
@@ -744,9 +744,9 @@ Inductive tr_expr (c: code):
 
 with tr_condition (c: code):
        mapping -> list reg -> condexpr -> node -> node -> node -> Prop :=
-  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl,
+  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl i,
       tr_exprlist c map pr bl ns n1 rl ->
-      c!n1 = Some (Icond cond rl ntrue nfalse) ->
+      c!n1 = Some (Icond cond rl ntrue nfalse i) ->
       tr_condition c map pr (CEcond cond bl) ns ntrue nfalse
   | tr_CEcondition: forall map pr a1 a2 a3 ns ntrue nfalse n2 n3,
       tr_condition c map pr a1 ns n2 n3 ->
@@ -1224,9 +1224,9 @@ Lemma convert_builtin_res_charact:
 Proof.
   destruct res; simpl; intros.
 - monadInv TR. constructor.  unfold find_var in EQ. destruct (map_vars map)!x; inv EQ; auto.
-- destruct oty; monadInv TR.
-+ constructor. eauto with rtlg.
+- destruct (rettype_eq oty Tvoid); monadInv TR.
 + constructor.
++ constructor. eauto with rtlg.
 - monadInv TR.
 Qed.
 
@@ -1350,7 +1350,7 @@ Proof.
   intros [C D].
   eapply tr_function_intro; eauto with rtlg.
   eapply transl_stmt_charact; eauto with rtlg.
-  unfold ret_reg. destruct (sig_res (CminorSel.fn_sig f)).
-  constructor. eauto with rtlg. eauto with rtlg.
+  unfold ret_reg. destruct (rettype_eq (sig_res (CminorSel.fn_sig f)) Tvoid).
   constructor.
+  constructor; eauto with rtlg.
 Qed.
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 8336d1bf..15ed6d8a 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -104,11 +104,11 @@ Inductive wt_instr : instruction -> Prop :=
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
-      forall chunk addr args dst s,
+      forall trap chunk addr args dst s,
       map env args = type_of_addressing addr ->
       env dst = type_of_chunk chunk ->
       valid_successor s ->
-      wt_instr (Iload chunk addr args dst s)
+      wt_instr (Iload trap chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
       map env args = type_of_addressing addr ->
@@ -139,11 +139,11 @@ Inductive wt_instr : instruction -> Prop :=
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
-      forall cond args s1 s2,
+      forall cond args s1 s2 i,
       map env args = type_of_condition cond ->
       valid_successor s1 ->
       valid_successor s2 ->
-      wt_instr (Icond cond args s1 s2)
+      wt_instr (Icond cond args s1 s2 i)
   | wt_Ijumptable:
       forall arg tbl,
       env arg = Tint ->
@@ -151,11 +151,12 @@ Inductive wt_instr : instruction -> Prop :=
       list_length_z tbl * 4 <= Int.max_unsigned ->
       wt_instr (Ijumptable arg tbl)
   | wt_Ireturn_none:
-      funct.(fn_sig).(sig_res) = None ->
+      funct.(fn_sig).(sig_res) = Tvoid ->
       wt_instr (Ireturn None)
   | wt_Ireturn_some:
       forall arg ty,
-      funct.(fn_sig).(sig_res) = Some ty ->
+      funct.(fn_sig).(sig_res) <> Tvoid ->
+      env arg = proj_sig_res funct.(fn_sig) ->
       env arg = ty ->
       wt_instr (Ireturn (Some arg)).
 
@@ -282,7 +283,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
       else
        (let (targs, tres) := type_of_operation op in
         do e1 <- S.set_list e args targs; S.set e1 res tres)
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
       S.set e1 dst (type_of_chunk chunk)
@@ -298,7 +299,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Itailcall sig ros args =>
       do e1 <- type_ros e ros;
       do e2 <- S.set_list e1 args sig.(sig_args);
-      if opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res) then
+      if rettype_eq sig.(sig_res) f.(fn_sig).(sig_res) then
         if tailcall_is_possible sig
         then OK e2
         else Error(msg "tailcall not possible")
@@ -312,7 +313,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
         | _ => type_builtin_args e args sig.(sig_args)
         end;
       type_builtin_res e1 res (proj_sig_res sig)
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 _ =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
       S.set_list e args (type_of_condition cond)
@@ -323,9 +324,9 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
       then OK e1
       else Error(msg "jumptable too big")
   | Ireturn optres =>
-      match optres, f.(fn_sig).(sig_res) with
-      | None, None => OK e
-      | Some r, Some t => S.set e r t
+      match optres, rettype_eq f.(fn_sig).(sig_res) Tvoid with
+      | None, left _ => OK e
+      | Some r, right _ => S.set e r (proj_sig_res f.(fn_sig))
       | _, _ => Error(msg "bad return")
       end
   end.
@@ -468,7 +469,7 @@ Proof.
   destruct l; try discriminate. destruct l; monadInv EQ0. eauto with ty.
   destruct (type_of_operation o) as [targs tres] eqn:TYOP. monadInv EQ0. eauto with ty.
 - (* tailcall *)
-  destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
+  destruct (rettype_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
   destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2.
   eauto with ty.
 - (* builtin *)
@@ -477,7 +478,8 @@ Proof.
   destruct (zle (list_length_z l * 4) Int.max_unsigned); inv EQ2.
   eauto with ty.
 - (* return *)
-  simpl in H. destruct o as [r|] eqn: RET; destruct (sig_res (fn_sig f)) as [t|] eqn: RES; try discriminate.
+  simpl in H.
+  destruct o as [r|] eqn: RET; destruct (rettype_eq (sig_res (fn_sig f)) Tvoid); try discriminate.
   eauto with ty.
   inv H; auto with ty.
 Qed.
@@ -519,7 +521,7 @@ Proof.
   eapply S.set_sound; eauto with ty.
   eauto with ty.
 - (* tailcall *)
-  destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
+  destruct (rettype_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
   destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2.
   constructor.
   eapply type_ros_sound; eauto with ty.
@@ -543,8 +545,9 @@ Proof.
   eapply check_successors_sound; eauto.
   auto.
 - (* return *)
-  simpl in H. destruct o as [r|] eqn: RET; destruct (sig_res (fn_sig f)) as [t|] eqn: RES; try discriminate.
-  econstructor. eauto. eapply S.set_sound; eauto with ty.
+  simpl in H.
+  destruct o as [r|] eqn: RET; destruct (rettype_eq (sig_res (fn_sig f)) Tvoid); try discriminate.
+  econstructor. auto. eapply S.set_sound; eauto with ty. eauto.
   inv H. constructor. auto.
 Qed.
 
@@ -721,9 +724,9 @@ Proof.
   rewrite check_successor_complete by auto; simpl.
   apply IHtbl0; intros; auto.
 - (* return none *)
-  rewrite H0. exists e; auto.
+  rewrite H0, dec_eq_true. exists e; auto.
 - (* return some *)
-  rewrite H0. apply S.set_complete; auto.
+  rewrite dec_eq_false by auto. apply S.set_complete; auto.
 Qed.
 
 Lemma type_code_complete:
@@ -841,14 +844,24 @@ Proof.
 Qed.
 
 Lemma wt_exec_Iload:
-  forall env f chunk addr args dst s m a v rs,
-  wt_instr f env (Iload chunk addr args dst s) ->
+  forall env f trap chunk addr args dst s m a v rs,
+  wt_instr f env (Iload trap chunk addr args dst s) ->
   Mem.loadv chunk m a = Some v ->
   wt_regset env rs ->
   wt_regset env (rs#dst <- v).
 Proof.
   intros. destruct a; simpl in H0; try discriminate. inv H.
-  eapply wt_regset_assign; eauto. rewrite H8; eapply Mem.load_type; eauto.
+  eapply wt_regset_assign; eauto. rewrite H9; eapply Mem.load_type; eauto.
+Qed.
+
+Lemma wt_exec_Iload_notrap:
+  forall env f chunk addr args dst s rs,
+  wt_instr f env (Iload NOTRAP chunk addr args dst s) ->
+  wt_regset env rs ->
+  wt_regset env (rs#dst <- (default_notrap_load_value chunk)).
+Proof.
+  intros. 
+  eapply wt_regset_assign; eauto. simpl. trivial.
 Qed.
 
 Lemma wt_exec_Ibuiltin:
@@ -872,7 +885,7 @@ Qed.
 
 Inductive wt_stackframes: list stackframe -> signature -> Prop :=
   | wt_stackframes_nil: forall sg,
-      sg.(sig_res) = Some Tint ->
+      sg.(sig_res) = Tint ->
       wt_stackframes nil sg
   | wt_stackframes_cons:
       forall s res f sp pc rs env sg,
@@ -930,6 +943,10 @@ Proof.
   econstructor; eauto. eapply wt_exec_Iop; eauto.
   (* Iload *)
   econstructor; eauto. eapply wt_exec_Iload; eauto.
+  (* Iload notrap1*)
+  econstructor; eauto. eapply wt_exec_Iload_notrap; eauto.
+  (* Iload notrap2*)
+  econstructor; eauto. eapply wt_exec_Iload_notrap; eauto.
   (* Istore *)
   econstructor; eauto.
   (* Icall *)
@@ -964,13 +981,13 @@ Proof.
   econstructor; eauto.
   (* Ireturn *)
   econstructor; eauto.
-  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. auto.
+  inv WTI; simpl. auto. rewrite <- H3. auto.
   (* internal function *)
   simpl in *. inv H5.
   econstructor; eauto.
   inv H1. apply wt_init_regs; auto. rewrite wt_params0. auto.
   (* external function *)
-  econstructor; eauto. simpl.
+  econstructor; eauto.
   eapply external_call_well_typed; eauto.
   (* return *)
   inv H1. econstructor; eauto.
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index 7db8a866..ffe26933 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -249,18 +249,18 @@ let block_of_RTL_instr funsig tyenv = function
             else
               let t = new_temp (tyenv res) in (t :: args2', t) in
       movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s])
-  | RTL.Iload(chunk, addr, args, dst, s) ->
+  | RTL.Iload(trap, chunk, addr, args, dst, s) ->
       if Archi.splitlong && chunk = Mint64 then begin
         match offset_addressing addr (coqint_of_camlint 4l) with
         | None -> assert false
         | Some addr' ->
-            [Xload(Mint32, addr, vregs tyenv args,
+            [Xload(trap, Mint32, addr, vregs tyenv args,
                    V((if Archi.big_endian then dst else twin_reg dst), Tint));
-             Xload(Mint32, addr', vregs tyenv args,
+             Xload(trap, Mint32, addr', vregs tyenv args,
                    V((if Archi.big_endian then twin_reg dst else dst), Tint));
              Xbranch s]
       end else
-        [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
+        [Xload(trap, chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
   | RTL.Istore(chunk, addr, args, src, s) ->
       if Archi.splitlong && chunk = Mint64 then begin
         match offset_addressing addr (coqint_of_camlint 4l) with
@@ -295,8 +295,8 @@ let block_of_RTL_instr funsig tyenv = function
          (Xbuiltin(ef, args2, res2) ::
             movelist (params_of_builtin_res res2) (params_of_builtin_res res1)
                [Xbranch s])
-  | RTL.Icond(cond, args, s1, s2) ->
-      [Xcond(cond, vregs tyenv args, s1, s2)]
+  | RTL.Icond(cond, args, s1, s2, i) ->
+      [Xcond(cond, vregs tyenv args, s1, s2, i)]
   | RTL.Ijumptable(arg, tbl) ->
       [Xjumptable(vreg tyenv arg, tbl)]
   | RTL.Ireturn None ->
@@ -364,7 +364,7 @@ let live_before instr after =
       if VSet.mem res after
       then vset_addlist args (VSet.remove res after)
       else after
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       if VSet.mem dst after
       then vset_addlist args (VSet.remove dst after)
       else after
@@ -380,7 +380,7 @@ let live_before instr after =
       vset_addargs args (vset_removeres res after)
   | Xbranch s ->
       after
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       List.fold_right VSet.add args after
   | Xjumptable(arg, tbl) ->
       VSet.add arg after
@@ -459,7 +459,7 @@ let dce_instr instr after k =
       if VSet.mem res after
       then instr :: k
       else k
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       if VSet.mem dst after
       then instr :: k
       else k
@@ -550,7 +550,7 @@ let spill_costs f =
         (* temps must not be spilled *)
     | Xop(op, args, res) ->
         charge_list 10 1 args; charge 10 1 res
-    | Xload(chunk, addr, args, dst) ->
+    | Xload(trap, chunk, addr, args, dst) ->
         charge_list 10 1 args; charge 10 1 dst
     | Xstore(chunk, addr, args, src) ->
         charge_list 10 1 args; charge 10 1 src
@@ -575,7 +575,7 @@ let spill_costs f =
             charge_list 10 1 (params_of_builtin_res res)
         end
     | Xbranch _ -> ()
-    | Xcond(cond, args, _, _) ->
+    | Xcond(cond, args, _, _, _) ->
         charge_list 10 1 args
     | Xjumptable(arg, _) ->
         charge 10 1 arg
@@ -677,7 +677,7 @@ let add_interfs_instr g instr live =
             (vset_addlist (res :: argl) (VSet.remove res live))
       end;
       add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op)
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       add_interfs_def g dst live;
       add_interfs_destroyed g (VSet.remove dst live)
                               (destroyed_by_load chunk addr)
@@ -718,7 +718,7 @@ let add_interfs_instr g instr live =
       end
   | Xbranch s ->
       ()
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       add_interfs_destroyed g live (destroyed_by_cond cond)
   | Xjumptable(arg, tbl) ->
       add_interfs_destroyed g live destroyed_by_jumptable
@@ -782,7 +782,7 @@ let tospill_instr alloc instr ts =
       ts
   | Xop(op, args, res) ->
       addlist_tospill alloc args (add_tospill alloc res ts)
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       addlist_tospill alloc args (add_tospill alloc dst ts)
   | Xstore(chunk, addr, args, src) ->
       addlist_tospill alloc args (add_tospill alloc src ts)
@@ -797,7 +797,7 @@ let tospill_instr alloc instr ts =
          (addlist_tospill alloc (params_of_builtin_res res) ts)
   | Xbranch s ->
       ts
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       addlist_tospill alloc args ts
   | Xjumptable(arg, tbl) ->
       add_tospill alloc arg ts
@@ -964,10 +964,10 @@ let spill_instr tospill eqs instr =
                add res tmp (kill tmp (kill res eqs2)))
           end
       end
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       let (args', c1, eqs1) = reload_vars tospill eqs args in
       let (dst', c2, eqs2) = save_var tospill eqs1 dst in
-      (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2)
+      (c1 @ Xload(trap, chunk, addr, args', dst') :: c2, eqs2)
   | Xstore(chunk, addr, args, src) ->
       let (args', c1, eqs1) = reload_vars tospill eqs args in
       let (src', c2, eqs2) = reload_var tospill eqs1 src in
@@ -990,9 +990,9 @@ let spill_instr tospill eqs instr =
       (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
   | Xbranch s ->
       ([instr], eqs)
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, i) ->
      let (args', c1, eqs1) = reload_vars tospill eqs args in
-     (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
+     (c1 @ [Xcond(cond, args', s1, s2, i)], eqs1)
   | Xjumptable(arg, tbl) ->
       let (arg', c1, eqs1) = reload_var tospill eqs arg in
       (c1 @ [Xjumptable(arg', tbl)], eqs1)
@@ -1115,8 +1115,8 @@ let transl_instr alloc instr k =
             LTL.Lop(Omove, [rarg1], rres) ::
             LTL.Lop(op, rres :: rargl, rres) :: k
       end
-  | Xload(chunk, addr, args, dst) ->
-      LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
+  | Xload(trap, chunk, addr, args, dst) ->
+      LTL.Lload(trap, chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
   | Xstore(chunk, addr, args, src) ->
       LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k
   | Xcall(sg, vos, args, res) ->
@@ -1128,8 +1128,8 @@ let transl_instr alloc instr k =
                        AST.map_builtin_res (mreg_of alloc) res) :: k
   | Xbranch s ->
       LTL.Lbranch s :: []
-  | Xcond(cond, args, s1, s2) ->
-      LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
+  | Xcond(cond, args, s1, s2, i) ->
+      LTL.Lcond(cond, mregs_of alloc args, s1, s2, i) :: []
   | Xjumptable(arg, tbl) ->
       LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
   | Xreturn optarg ->
diff --git a/backend/Renumber.v b/backend/Renumber.v
index 10f58251..2727b979 100644
--- a/backend/Renumber.v
+++ b/backend/Renumber.v
@@ -43,12 +43,12 @@ Definition renum_instr (i: instruction) : instruction :=
   match i with
   | Inop s => Inop (renum_pc s)
   | Iop op args res s => Iop op args res (renum_pc s)
-  | Iload chunk addr args res s => Iload chunk addr args res (renum_pc s)
+  | Iload trap chunk addr args res s => Iload trap chunk addr args res (renum_pc s)
   | Istore chunk addr args src s => Istore chunk addr args src (renum_pc s)
   | Icall sg ros args res s => Icall sg ros args res (renum_pc s)
   | Itailcall sg ros args => i
   | Ibuiltin ef args res s => Ibuiltin ef args res (renum_pc s)
-  | Icond cond args s1 s2 => Icond cond args (renum_pc s1) (renum_pc s2)
+  | Icond cond args s1 s2 info => Icond cond args (renum_pc s1) (renum_pc s2) info
   | Ijumptable arg tbl => Ijumptable arg (List.map renum_pc tbl)
   | Ireturn or => i
   end.
diff --git a/backend/Renumberproof.v b/backend/Renumberproof.v
index 7cda9425..2e161965 100644
--- a/backend/Renumberproof.v
+++ b/backend/Renumberproof.v
@@ -175,6 +175,18 @@ Proof.
   rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
   eapply exec_Iload; eauto.
   constructor; auto. eapply reach_succ; eauto. simpl; auto.
+  (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto. eapply reach_succ; eauto. simpl; auto.
+  (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. eapply reach_succ; eauto. simpl; auto.
 (* store *)
   econstructor; split.
   assert (eval_addressing tge sp addr rs ## args = Some a).
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index a8ee8453..1873da4d 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -764,8 +764,8 @@ Lemma eval_divlu_mull:
 Proof.
   intros. unfold divlu_mull. exploit (divlu_mul_shift x); eauto. intros [A B].
   assert (A0: eval_expr ge sp e m le (Eletvar O) (Vlong x)) by (constructor; auto).
-  exploit eval_mullhu. eauto. eexact A0. instantiate (1 := Int64.repr M). intros (v1 & A1 & B1).
-  exploit eval_shrluimm. eauto. eexact A1. instantiate (1 := Int.repr p). intros (v2 & A2 & B2).
+  exploit eval_mullhu. try apply HELPERS. eexact A0. instantiate (1 := Int64.repr M). intros (v1 & A1 & B1).
+  exploit eval_shrluimm. try apply HELPERS. eexact A1. instantiate (1 := Int.repr p). intros (v2 & A2 & B2).
   simpl in B1; inv B1. simpl in B2. replace (Int.ltu (Int.repr p) Int64.iwordsize') with true in B2. inv B2.
   rewrite B. assumption.
   unfold Int.ltu. rewrite Int.unsigned_repr. rewrite zlt_true; auto. tauto.
@@ -835,17 +835,17 @@ Proof.
   intros. unfold divls_mull.
   assert (A0: eval_expr ge sp e m le (Eletvar O) (Vlong x)).
   { constructor; auto. }
-  exploit eval_mullhs. eauto. eexact A0. instantiate (1 := Int64.repr M).  intros (v1 & A1 & B1).
-  exploit eval_addl; auto; try apply HELPERS. eexact A1. eexact A0. intros (v2 & A2 & B2).
-  exploit eval_shrluimm. eauto. eexact A0. instantiate (1 := Int.repr 63). intros (v3 & A3 & B3).
+  exploit eval_mullhs. try apply HELPERS. eexact A0. instantiate (1 := Int64.repr M).  intros (v1 & A1 & B1).
+  exploit eval_addl. auto. eexact A1. eexact A0. intros (v2 & A2 & B2).
+  exploit eval_shrluimm. try apply HELPERS. eexact A0. instantiate (1 := Int.repr 63). intros (v3 & A3 & B3).
   set (a4 := if zlt M Int64.half_modulus
              then mullhs (Eletvar 0) (Int64.repr M)
              else addl (mullhs (Eletvar 0) (Int64.repr M)) (Eletvar 0)).
   set (v4 := if zlt M Int64.half_modulus then v1 else v2).
   assert (A4: eval_expr ge sp e m le a4 v4).
   { unfold a4, v4; destruct (zlt M Int64.half_modulus); auto. }
-  exploit eval_shrlimm. eauto. eexact A4. instantiate (1 := Int.repr p). intros (v5 & A5 & B5).
-  exploit eval_addl; auto; try apply HELPERS. eexact A5. eexact A3. intros (v6 & A6 & B6).
+  exploit eval_shrlimm. try apply HELPERS. eexact A4. instantiate (1 := Int.repr p). intros (v5 & A5 & B5).
+  exploit eval_addl. auto. eexact A5. eexact A3. intros (v6 & A6 & B6).
   assert (RANGE: forall x, 0 <= x < 64 -> Int.ltu (Int.repr x) Int64.iwordsize' = true).
   { intros. unfold Int.ltu. rewrite Int.unsigned_repr. rewrite zlt_true by tauto. auto.
     assert (64 < Int.max_unsigned) by (compute; auto). omega. }
@@ -949,8 +949,7 @@ Proof.
   intros until y. unfold divf. destruct (divf_match b); intros.
 - unfold divfimm. destruct (Float.exact_inverse n2) as [n2' | ] eqn:EINV.
   + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
-    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-    simpl; eauto.
+    repeat (econstructor; eauto). 
     destruct x; simpl; auto. erewrite Float.div_mul_inverse; eauto.
   + apply eval_divf_base; trivial.
 - apply eval_divf_base; trivial.
@@ -965,8 +964,7 @@ Proof.
   intros until y. unfold divfs. destruct (divfs_match b); intros.
 - unfold divfsimm. destruct (Float32.exact_inverse n2) as [n2' | ] eqn:EINV.
   + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
-    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-    simpl; eauto.
+    repeat (econstructor; eauto). 
     destruct x; simpl; auto. erewrite Float32.div_mul_inverse; eauto.
   + apply eval_divfs_base; trivial.
 - apply eval_divfs_base; trivial.
diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml
index 574c31f0..26a79fd7 100644
--- a/backend/Selectionaux.ml
+++ b/backend/Selectionaux.ml
@@ -68,6 +68,8 @@ let rec cost_expr = function
 
 let fast_cmove ty =
   match Configuration.arch, Configuration.model with
+  | "aarch64", _ ->
+      (match ty with Tint | Tlong | Tfloat | Tsingle -> true | _ -> false)
   | "arm", _ ->
       (match ty with Tint | Tfloat | Tsingle -> true | _ -> false)
   | "powerpc", "e5500" -> 
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 0be96167..9e0f22cc 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -1265,8 +1265,8 @@ Proof.
   econstructor; eauto.
   econstructor; eauto. apply set_var_lessdef; auto.
 - (* store *)
-  exploit sel_expr_correct. eauto. eauto. eexact H. eauto. eauto. intros [vaddr' [A B]].
-  exploit sel_expr_correct. eauto. eauto. eexact H0. eauto. eauto. intros [v' [C D]].
+  exploit sel_expr_correct. try apply LINK. try apply HF. eexact H. eauto. eauto. intros [vaddr' [A B]].
+  exploit sel_expr_correct. try apply LINK. try apply HF. eexact H0. eauto. eauto. intros [v' [C D]].
   exploit Mem.storev_extends; eauto. intros [m2' [P Q]].
   left; econstructor; split.
   eapply eval_store; eauto.
diff --git a/backend/Splitting.ml b/backend/Splitting.ml
index 40f09c3d..3ca45c3b 100644
--- a/backend/Splitting.ml
+++ b/backend/Splitting.ml
@@ -151,8 +151,8 @@ let ren_instr f maps pc i =
   | Inop s -> Inop s
   | Iop(op, args, res, s) ->
       Iop(op, ren_regs before args, ren_reg after res, s)
-  | Iload(chunk, addr, args, dst, s) ->
-      Iload(chunk, addr, ren_regs before args, ren_reg after dst, s)
+  | Iload(trap, chunk, addr, args, dst, s) ->
+      Iload(trap, chunk, addr, ren_regs before args, ren_reg after dst, s)
   | Istore(chunk, addr, args, src, s) ->
       Istore(chunk, addr, ren_regs before args, ren_reg before src, s)
   | Icall(sg, ros, args, res, s) ->
@@ -162,8 +162,8 @@ let ren_instr f maps pc i =
   | Ibuiltin(ef, args, res, s) ->
       Ibuiltin(ef, List.map (AST.map_builtin_arg (ren_reg before)) args,
                    AST.map_builtin_res (ren_reg after) res, s)
-  | Icond(cond, args, s1, s2) ->
-      Icond(cond, ren_regs before args, s1, s2)
+  | Icond(cond, args, s1, s2, i) ->
+      Icond(cond, ren_regs before args, s1, s2, i)
   | Ijumptable(arg, tbl) ->
       Ijumptable(ren_reg before arg, tbl)
   | Ireturn optarg ->
diff --git a/backend/Stacking.v b/backend/Stacking.v
index 7b382d05..0e3f2832 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -133,8 +133,8 @@ Definition transl_instr
       end
   | Lop op args res =>
       Mop (transl_op fe op) args res :: k
-  | Lload chunk addr args dst =>
-      Mload chunk (transl_addr fe addr) args dst :: k
+  | Lload trap chunk addr args dst =>
+      Mload trap chunk (transl_addr fe addr) args dst :: k
   | Lstore chunk addr args src =>
       Mstore chunk (transl_addr fe addr) args src :: k
   | Lcall sig ros =>
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 326fab61..d3fcdb91 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -1918,6 +1918,46 @@ Proof.
   apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
   apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
 
+- (* Lload notrap1*)
+  assert (eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = None) as Haddress.
+  eapply eval_addressing_inject_none; eauto.
+  eapply globalenv_inject_preserves_globals. eapply sep_proj2. eapply sep_proj2. eapply sep_proj2. eexact SEP.
+  eapply agree_reglist; eauto.
+  econstructor; split.
+  apply plus_one. apply exec_Mload_notrap1.
+  rewrite <- Haddress. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  
+- (* Lload notrap2 *)
+  assert (exists a',
+          eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
+       /\ Val.inject j a a').
+  eapply eval_addressing_inject; eauto.
+  eapply globalenv_inject_preserves_globals. eapply sep_proj2. eapply sep_proj2. eapply sep_proj2. eexact SEP.
+  eapply agree_reglist; eauto.
+  destruct H1 as [a' [A B]].
+
+  destruct ( Mem.loadv chunk m' a') as [v'|] eqn:Hloadv.
+  {
+  econstructor; split.
+  apply plus_one. apply exec_Mload with (a:=a') (v:=v'); eauto.
+  try (rewrite <- A; apply eval_addressing_preserved; auto; exact symbols_preserved).
+  econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  }
+  {
+  econstructor; split.
+  apply plus_one. apply exec_Mload_notrap2 with (a:=a'); eauto.
+  try (rewrite <- A; apply eval_addressing_preserved; auto; exact symbols_preserved).
+  
+  econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  }
+
 - (* Lstore *)
   assert (exists a',
           eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
diff --git a/backend/Tailcall.v b/backend/Tailcall.v
index 939abeea..b7a62d74 100644
--- a/backend/Tailcall.v
+++ b/backend/Tailcall.v
@@ -82,7 +82,7 @@ Definition transf_instr (f: function) (pc: node) (instr: instruction) :=
   | Icall sig ros args res s =>
       if is_return niter f s res
       && tailcall_is_possible sig
-      && opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res)
+      && rettype_eq sig.(sig_res) f.(fn_sig).(sig_res)
       then Itailcall sig ros args
       else instr
   | _ => instr
diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v
index 06e314f3..79a5c1cf 100644
--- a/backend/Tailcallproof.v
+++ b/backend/Tailcallproof.v
@@ -157,12 +157,10 @@ Lemma transf_instr_charact:
   transf_instr_spec f instr (transf_instr f pc instr).
 Proof.
   intros. unfold transf_instr. destruct instr; try constructor.
-  caseEq (is_return niter f n r && tailcall_is_possible s &&
-          opt_typ_eq (sig_res s) (sig_res (fn_sig f))); intros.
-  destruct (andb_prop _ _ H0). destruct (andb_prop _ _ H1).
-  eapply transf_instr_tailcall; eauto.
-  eapply is_return_charact; eauto.
-  constructor.
+  destruct (is_return niter f n r && tailcall_is_possible s &&
+            rettype_eq (sig_res s) (sig_res (fn_sig f))) eqn:B.
+- InvBooleans. eapply transf_instr_tailcall; eauto. eapply is_return_charact; eauto.
+- constructor.
 Qed.
 
 Lemma transf_instr_lookup:
@@ -438,6 +436,43 @@ Proof.
   apply eval_addressing_preserved. exact symbols_preserved. eauto.
   econstructor; eauto. apply set_reg_lessdef; auto.
 
+- (* load notrap1 *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
+  left.
+  exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split.
+  eapply exec_Iload_notrap1.
+  eassumption.
+  eapply eval_addressing_lessdef_none. eassumption.
+  erewrite eval_addressing_preserved.
+  eassumption. exact symbols_preserved.
+
+  econstructor; eauto. apply set_reg_lessdef; auto.
+
+- (* load notrap2 *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
+  left.
+
+  exploit eval_addressing_lessdef; eauto.
+  intros [a' [ADDR' ALD]].
+  
+  destruct (Mem.loadv chunk m' a') eqn:Echunk2.
+  + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- v) m'); split.
+    eapply exec_Iload with (a:=a'). eassumption.
+    erewrite eval_addressing_preserved.
+    eassumption.
+    exact symbols_preserved.
+    assumption.
+    econstructor; eauto. apply set_reg_lessdef; auto.
+  + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split.
+    eapply exec_Iload_notrap2. eassumption.
+    erewrite eval_addressing_preserved.
+    eassumption.
+    exact symbols_preserved.
+    assumption.
+    econstructor; eauto. apply set_reg_lessdef; auto.
+    
 - (* store *)
   TransfInstr.
   assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index da1ce45a..a4c4a195 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -78,11 +78,11 @@ Definition record_gotos (f: LTL.function) : U.t :=
 Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
   match i with
   | Lbranch s => Lbranch (U.repr uf s)
-  | Lcond cond args s1 s2 =>
+  | Lcond cond args s1 s2 info =>
       let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
       if peq s1' s2'
       then Lbranch s1'
-      else Lcond cond args s1' s2'
+      else Lcond cond args s1' s2' info
   | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
   | _ => i
   end.
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index 4f95ac9b..d3b8a9f0 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -441,6 +441,31 @@ Proof.
   rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
   eauto. eauto.
   econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap1 *)
+  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap1.
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap2 *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  destruct (Mem.loadv chunk tm ta) eqn:Htload.
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap2.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
 - (* Lgetstack *)
   left; simpl; econstructor; split.
   econstructor; eauto.
diff --git a/backend/Unusedglob.v b/backend/Unusedglob.v
index 8ac7c4ce..93ca7af4 100644
--- a/backend/Unusedglob.v
+++ b/backend/Unusedglob.v
@@ -46,14 +46,14 @@ Definition ref_instruction (i: instruction) : list ident :=
   match i with
   | Inop _ => nil
   | Iop op _ _ _ => globals_operation op
-  | Iload _ addr _ _ _ => globals_addressing addr
+  | Iload _ _ addr _ _ _ => globals_addressing addr
   | Istore _ addr _ _ _ => globals_addressing addr
   | Icall _ (inl r) _ _ _ => nil
   | Icall _ (inr id) _ _ _ => id :: nil
   | Itailcall _ (inl r) _ => nil
   | Itailcall _ (inr id) _ => id :: nil
   | Ibuiltin _ args _ _ => globals_of_builtin_args args
-  | Icond cond _ _ _ => nil
+  | Icond cond _ _ _ _ => nil
   | Ijumptable _ _ => nil
   | Ireturn _ => nil
   end.
diff --git a/backend/Unusedglobproof.v b/backend/Unusedglobproof.v
index 680daba7..fa120b6d 100644
--- a/backend/Unusedglobproof.v
+++ b/backend/Unusedglobproof.v
@@ -915,7 +915,7 @@ Proof.
             /\ Val.inject j a ta).
   { apply eval_addressing_inj with (ge1 := ge) (sp1 := Vptr sp0 Ptrofs.zero) (vl1 := rs##args).
     intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
-    apply KEPT. red. exists pc, (Iload chunk addr args dst pc'); auto.
+    apply KEPT. red. exists pc, (Iload trap chunk addr args dst pc'); auto.
     econstructor; eauto.
     apply regs_inject; auto.
     assumption. }
@@ -924,6 +924,36 @@ Proof.
   econstructor; split. eapply exec_Iload; eauto.
   econstructor; eauto. apply set_reg_inject; auto.
 
+- (* load notrap1 *)
+  assert (eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = None).
+  { eapply eval_addressing_inj_none.
+    intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
+    apply KEPT. red. exists pc, (Iload NOTRAP chunk addr args dst pc'); auto.
+    econstructor; eauto.
+    rewrite Ptrofs.add_zero; reflexivity.
+    apply regs_inject; auto.
+    eassumption.
+    assumption. }
+ 
+  econstructor; split. eapply exec_Iload_notrap1; eauto.
+  econstructor; eauto. apply set_reg_inject; auto.
+
+- (* load notrap2 *)
+  assert (A: exists ta,
+               eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = Some ta
+            /\ Val.inject j a ta).
+  { apply eval_addressing_inj with (ge1 := ge) (sp1 := Vptr sp0 Ptrofs.zero) (vl1 := rs##args).
+    intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
+    apply KEPT. red. exists pc, (Iload NOTRAP chunk addr args dst pc'); auto.
+    econstructor; eauto.
+    apply regs_inject; auto.
+    assumption. }
+  destruct A as (ta & B & C).
+  destruct (Mem.loadv chunk tm ta) eqn:Echunk2.
+  + econstructor; split. eapply exec_Iload; eauto.
+    econstructor; eauto. apply set_reg_inject; auto.
+  + econstructor; split. eapply exec_Iload_notrap2; eauto.
+    econstructor; eauto. apply set_reg_inject; auto.
 - (* store *)
   assert (A: exists ta,
                eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = Some ta
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index 8dbb67a7..2e79d1a9 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -139,9 +139,14 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
   | Some(Iop op args res s) =>
       let a := eval_static_operation op (aregs ae args) in
       VA.State (AE.set res a ae) am
-  | Some(Iload chunk addr args dst s) =>
+  | Some(Iload TRAP chunk addr args dst s) =>
       let a := loadv chunk rm am (eval_static_addressing addr (aregs ae args)) in
       VA.State (AE.set dst a ae) am
+
+  (* TODO: maybe a case analysis on the results of loadv? *)
+               
+  | Some(Iload NOTRAP chunk addr args dst s) =>
+      VA.State (AE.set dst Vtop ae) am
   | Some(Istore chunk addr args src s) =>
       let am' := storev chunk am (eval_static_addressing addr (aregs ae args)) (areg ae src) in
       VA.State ae am'
@@ -151,7 +156,7 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
       VA.Bot
   | Some(Ibuiltin ef args res s) =>
       transfer_builtin ae am rm ef args res
-  | Some(Icond cond args s1 s2) =>
+  | Some(Icond cond args s1 s2 _) =>
       VA.State ae am
   | Some(Ijumptable arg tbl) =>
       VA.State ae am
@@ -1039,9 +1044,8 @@ Proof.
   red; simpl; intros. destruct (plt b (Mem.nextblock m)).
   exploit RO; eauto. intros (R & P & Q).
   split; auto.
-  split. apply bmatch_incr with bc; auto. apply bmatch_inv with m; auto.
-  intros. eapply Mem.loadbytes_unchanged_on_1. eapply external_call_readonly; eauto.
-  auto. intros; red. apply Q.
+  split. apply bmatch_incr with bc; auto. apply bmatch_ext with m; auto.
+  intros. eapply external_call_readonly with (m2 := m'); eauto.
   intros; red; intros; elim (Q ofs).
   eapply external_call_max_perm with (m2 := m'); eauto.
   destruct (j' b); congruence.
@@ -1148,10 +1152,10 @@ Proof.
 - constructor.
 - assert (Plt sp bound') by eauto with va.
   eapply sound_stack_public_call; eauto. apply IHsound_stack; intros.
-  apply INV. xomega. rewrite SAME; auto. xomega. auto. auto.
+  apply INV. xomega. rewrite SAME; auto with ordered_type. xomega. auto. auto.
 - assert (Plt sp bound') by eauto with va.
   eapply sound_stack_private_call; eauto. apply IHsound_stack; intros.
-  apply INV. xomega. rewrite SAME; auto. xomega. auto. auto.
+  apply INV. xomega. rewrite SAME; auto with ordered_type. xomega. auto. auto.
   apply bmatch_ext with m; auto. intros. apply INV. xomega. auto. auto. auto.
 Qed.
 
@@ -1268,11 +1272,29 @@ Proof.
   apply ematch_update; auto. eapply eval_static_operation_sound; eauto with va.
 
 - (* load *)
+  destruct trap.
+  + eapply sound_succ_state; eauto. simpl; auto.
+    unfold transfer; rewrite H. eauto.
+    apply ematch_update; auto. eapply loadv_sound; eauto with va.
+    eapply eval_static_addressing_sound; eauto with va.
+  + eapply sound_succ_state; eauto. simpl; auto.
+    unfold transfer; rewrite H. eauto.
+    apply ematch_update; auto.
+    eapply vmatch_top.
+    eapply loadv_sound; try eassumption.
+    eapply eval_static_addressing_sound; eauto with va.
+- (* load notrap1 *)
   eapply sound_succ_state; eauto. simpl; auto.
   unfold transfer; rewrite H. eauto.
-  apply ematch_update; auto. eapply loadv_sound; eauto with va.
-  eapply eval_static_addressing_sound; eauto with va.
-
+  apply ematch_update; auto.
+  unfold default_notrap_load_value.
+  constructor.
+- (* load notrap2 *)
+  eapply sound_succ_state; eauto. simpl; auto.
+  unfold transfer; rewrite H. eauto.
+  apply ematch_update; auto.
+  unfold default_notrap_load_value.
+  constructor.
 - (* store *)
   exploit eval_static_addressing_sound; eauto with va. intros VMADDR.
   eapply sound_succ_state; eauto. simpl; auto.
@@ -1362,7 +1384,7 @@ Proof.
   apply sound_stack_exten with bc.
   apply sound_stack_inv with m. auto.
   intros. apply Q. red. eapply Plt_trans; eauto.
-  rewrite C; auto.
+  rewrite C; auto with ordered_type.
   exact AA.
 * (* public builtin call *)
   exploit anonymize_stack; eauto.
@@ -1381,7 +1403,7 @@ Proof.
   apply sound_stack_exten with bc.
   apply sound_stack_inv with m. auto.
   intros. apply Q. red. eapply Plt_trans; eauto.
-  rewrite C; auto.
+  rewrite C; auto with ordered_type.
   exact AA.
   }
   unfold transfer_builtin in TR.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index fd3bd5ae..779e7bb9 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -2093,6 +2093,7 @@ Proof.
 Qed.
 
 Definition sign_ext (nbits: Z) (v: aval) :=
+  if zle nbits 0 then Uns (provenance v) 0 else
   match v with
   | I i => I (Int.sign_ext nbits i)
   | Uns p n => if zlt n nbits then Uns p n else sgn p nbits
@@ -2101,20 +2102,39 @@ Definition sign_ext (nbits: Z) (v: aval) :=
   end.
 
 Lemma sign_ext_sound:
-  forall nbits v x, 0 < nbits -> vmatch v x -> vmatch (Val.sign_ext nbits v) (sign_ext nbits x).
+  forall nbits v x, vmatch v x -> vmatch (Val.sign_ext nbits v) (sign_ext nbits x).
 Proof.
   assert (DFL: forall p nbits i, 0 < nbits -> vmatch (Vint (Int.sign_ext nbits i)) (sgn p nbits)).
   {
     intros. apply vmatch_sgn. apply is_sign_ext_sgn; auto with va.
   }
-  intros. inv H0; simpl; auto with va.
-- destruct (zlt n nbits); eauto with va.
+  intros. unfold sign_ext. destruct (zle nbits 0).
+- destruct v; simpl; auto with va. constructor. omega. 
+  rewrite Int.sign_ext_below by auto. red; intros; apply Int.bits_zero.
+- inv H; simpl; auto with va.
++ destruct (zlt n nbits); eauto with va.
   constructor; auto. eapply is_sign_ext_uns; eauto with va.
-- destruct (zlt n nbits); auto with va.
-- apply vmatch_sgn. apply is_sign_ext_sgn; auto with va.
++ destruct (zlt n nbits); auto with va.
++ apply vmatch_sgn. apply is_sign_ext_sgn; auto with va.
   apply Z.min_case; auto with va.
 Qed.
 
+Definition zero_ext_l (s: Z) := unop_long (Int64.zero_ext s).
+
+Lemma zero_ext_l_sound:
+  forall s v x, vmatch v x -> vmatch (Val.zero_ext_l s v) (zero_ext_l s x).
+Proof.
+  intros s. exact (unop_long_sound (Int64.zero_ext s)).
+Qed.
+
+Definition sign_ext_l (s: Z) := unop_long (Int64.sign_ext s).
+
+Lemma sign_ext_l_sound:
+  forall s v x, vmatch v x -> vmatch (Val.sign_ext_l s v) (sign_ext_l s x).
+Proof.
+  intros s. exact (unop_long_sound (Int64.sign_ext s)).
+Qed.
+
 Definition longofint (v: aval) :=
   match v with
   | I i => L (Int64.repr (Int.signed i))
@@ -3482,11 +3502,6 @@ Proof.
 - omegaContradiction.
 Qed.
 
-Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8.
-Proof.
-  destruct chunk; simpl; omega.
-Qed.
-
 Remark inval_before_contents:
   forall i c chunk' av' j,
   (inval_before i (i - 7) c)##j = Some (ACval chunk' av') ->
@@ -4712,6 +4727,7 @@ Hint Resolve cnot_sound symbol_address_sound
        negfs_sound absfs_sound
        addfs_sound subfs_sound mulfs_sound divfs_sound
        zero_ext_sound sign_ext_sound longofint_sound longofintu_sound
+       zero_ext_l_sound sign_ext_l_sound
        singleoffloat_sound floatofsingle_sound
        intoffloat_sound intuoffloat_sound floatofint_sound floatofintu_sound
        intofsingle_sound intuofsingle_sound singleofint_sound singleofintu_sound
diff --git a/backend/XTL.ml b/backend/XTL.ml
index f10efeed..1d8e89c0 100644
--- a/backend/XTL.ml
+++ b/backend/XTL.ml
@@ -30,13 +30,13 @@ type instruction =
   | Xspill of var * var
   | Xparmove of var list * var list * var * var
   | Xop of operation * var list * var
-  | Xload of memory_chunk * addressing * var list * var
+  | Xload of trapping_mode * memory_chunk * addressing * var list * var
   | Xstore of memory_chunk * addressing * var list * var
   | Xcall of signature * (var, ident) sum * var list * var list
   | Xtailcall of signature * (var, ident) sum * var list
   | Xbuiltin of external_function * var builtin_arg list * var builtin_res
   | Xbranch of node
-  | Xcond of condition * var list * node * node
+  | Xcond of condition * var list * node * node * bool option
   | Xjumptable of var * node list
   | Xreturn of var list
 
@@ -105,7 +105,7 @@ let twin_reg r =
 let rec successors_block = function
   | Xbranch s :: _ -> [s]
   | Xtailcall(sg, vos, args) :: _ -> []
-  | Xcond(cond, args, s1, s2) :: _ -> [s1; s2]
+  | Xcond(cond, args, s1, s2, _) :: _ -> [s1; s2]
   | Xjumptable(arg, tbl) :: _ -> tbl
   | Xreturn  _:: _ -> []
   | instr :: blk -> successors_block blk
@@ -159,7 +159,7 @@ let type_instr = function
       let (targs, tres) = type_of_operation op in
       set_vars_type args targs;
       set_var_type res tres
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       set_vars_type args (type_of_addressing addr);
       set_var_type dst (type_of_chunk chunk)
   | Xstore(chunk, addr, args, src) ->
@@ -179,7 +179,7 @@ let type_instr = function
       type_builtin_res res (proj_sig_res sg)
   | Xbranch s ->
       ()
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       set_vars_type args (type_of_condition cond)
   | Xjumptable(arg, tbl) ->
       set_var_type arg Tint
diff --git a/backend/XTL.mli b/backend/XTL.mli
index 54988d4b..7b7f7186 100644
--- a/backend/XTL.mli
+++ b/backend/XTL.mli
@@ -31,13 +31,13 @@ type instruction =
   | Xspill of var * var
   | Xparmove of var list * var list * var * var
   | Xop of operation * var list * var
-  | Xload of memory_chunk * addressing * var list * var
+  | Xload of trapping_mode * memory_chunk * addressing * var list * var
   | Xstore of memory_chunk * addressing * var list * var
   | Xcall of signature * (var, ident) sum * var list * var list
   | Xtailcall of signature * (var, ident) sum * var list
   | Xbuiltin of external_function * var builtin_arg list * var builtin_res
   | Xbranch of node
-  | Xcond of condition * var list * node * node
+  | Xcond of condition * var list * node * node * bool option
   | Xjumptable of var * node list
   | Xreturn of var list
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-06 22:45:05 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-06 22:45:05 +0200
commit	5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633 (patch)
tree	299bdd3c6068f121ca243d8602addcd27d690fd2 /backend
parent	c420bc8d3b87d71c38209b5ab8bca22875466362 (diff)
parent	c6356cdc5f567a317afcb99cb004354cf7dcce0f (diff)
download	compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.tar.gz compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.zip