Merge branch 'mppa-work' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into mppa-work

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-23 19:59:21 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-23 19:59:21 +0200
commit: 433c67f9a05e8cab773d1755aa3eb16f8af588e6 (patch)
tree: dadda4120b592e3900854133b3222cf4fcf3b4af
parent: 7a30a72809448535785cd47d26d9415f6ada93e3 (diff)
parent: 63f425b8ca329820e629c109fb0a2454ea7e2f27 (diff)
download: compcert-kvx-433c67f9a05e8cab773d1755aa3eb16f8af588e6.tar.gz
compcert-kvx-433c67f9a05e8cab773d1755aa3eb16f8af588e6.zip
139 files changed, 9973 insertions, 589 deletions
diff --git a/Makefile b/Makefile
index 2cd40800..ba8add27 100644
--- a/Makefile
+++ b/Makefile
@@ -57,6 +57,7 @@ FLOCQ=\
 # General-purpose libraries (in lib/)
 
 VLIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \
+  HashedSet.v \
   Iteration.v Zbits.v Integers.v Archi.v IEEE754_extra.v Floats.v \
   Parmov.v UnionFind.v Wfsimpl.v \
   Postorder.v FSetAVLplus.v IntvSets.v Decidableplus.v BoolEqual.v
@@ -79,21 +80,27 @@ BACKEND=\
   RTLgen.v RTLgenspec.v RTLgenproof.v \
   Tailcall.v Tailcallproof.v \
   Inlining.v Inliningspec.v Inliningproof.v \
+  Profiling.v Profilingproof.v \
+  ProfilingExploit.v ProfilingExploitproof.v \
   Renumber.v Renumberproof.v \
   Duplicate.v Duplicateproof.v \
   RTLtyping.v \
   Kildall.v Liveness.v \
   ValueDomain.v ValueAOp.v ValueAnalysis.v \
   ConstpropOp.v Constprop.v ConstpropOpproof.v Constpropproof.v \
+  Inject.v Injectproof.v \
   CSEdomain.v CombineOp.v CSE.v CombineOpproof.v CSEproof.v \
   CSE2deps.v CSE2depsproof.v \
   CSE2.v CSE2proof.v \
+  CSE3analysis.v CSE3analysisproof.v CSE3.v CSE3proof.v \
+  LICM.v LICMproof.v \
   NeedDomain.v NeedOp.v Deadcode.v Deadcodeproof.v \
   Unusedglob.v Unusedglobproof.v \
   Machregs.v Locations.v Conventions1.v Conventions.v LTL.v \
   ForwardMoves.v ForwardMovesproof.v \
+  FirstNop.v FirstNopproof.v \
   Allnontrap.v Allnontrapproof.v \
-  Allocation.v Allocproof.v \
+  Allocation.v Allocationproof.v \
   Tunneling.v Tunnelingproof.v \
   Linear.v Lineartyping.v \
   Linearize.v Linearizeproof.v \
@@ -194,6 +201,8 @@ tools/ndfun: tools/ndfun.ml
 	ocamlopt -o tools/ndfun str.cmxa tools/ndfun.ml
 tools/modorder: tools/modorder.ml
 	ocamlopt -o tools/modorder str.cmxa tools/modorder.ml
+tools/compiler_expand: tools/compiler_expand.ml
+	ocamlopt -o $@ $+
 
 latexdoc:
 	cd doc; $(COQDOC) --latex -o doc/doc.tex -g $(FILES)
@@ -209,6 +218,9 @@ latexdoc:
 	@tools/ndfun $*.vp > $*.v || { rm -f $*.v; exit 2; }
 	@chmod a-w $*.v
 
+driver/Compiler.v: driver/Compiler.vexpand tools/compiler_expand
+	tools/compiler_expand driver/Compiler.vexpand $@
+
 compcert.ini: Makefile.config
 	(echo "stdlib_path=$(LIBDIR)"; \
          echo "prepro=$(CPREPRO)"; \
diff --git a/PROFILING.md b/PROFILING.md
new file mode 100644
index 00000000..3f4cbc46
--- /dev/null
+++ b/PROFILING.md
@@ -0,0 +1,34 @@
+This version of CompCert includes a profiling system. It tells CompCert's optimization phases for each conditional branch instruction which of the two branches was more frequently taken. This system is not available for all combinations of target architecture and operating system; see below.
+
+For using this profiling system one has to
+1. Compile a special version of the program that will count, for each branch, the number of times it was taken, and recording this information to a file.
+2. Execute this special version on representative examples. It will record the frequencies of execution of branches to a log file.
+3. Recompile the program, telling CompCert to use the information in the log file.
+
+This system does not use the same formats as gcc's gcov profiles, since it depends heavily on compiler internals. It seems however possible to profile and optimize programs consisting of modules compiled with gcc and CompCert by using both system simultaneously: compiler uses separate log files.
+
+To compile the special version that logs frequencies to files, use the option `-fprofile-arcs`. This option has to be specified at compile time but is not needed at link time (however, a reminder: if you link using another compiled than CompCert, you need to link against `libcompcert.a`). You may mix object files compiled with and without this option.
+
+This version may experience significant slowdown compared to normally compiled code, so do not use `-fprofile-arcs` for production code.
+
+At the end of execution of the program, frequency information will be logged to a file whose default name is `compcert_profiling.dat` (in the current directory). Another name may be used by specifying it using the `COMPCERT_PROFILING_DATA` environment variable. If this variable contains an empty string, no logging is done (but the slowdown still applies).
+
+Data are appended to the log file, never deleted, so it is safe to run the program several times on several test cases to accumulate data.
+
+Depending on the platform, this logging system is or is not thread-safe and is or is not compatible with position-independent code (PIC). In non thread-safe configurations, if two different execution threads execute code to be profiled, the profiling counters may end up with incorrect values.
+
+| Target platform | Available? | Thread-safe | PIC |
+|-----------------|------------|-------------|-----|
+| AArch64         | Yes        | Yes         | No  |
+| ARM             | Yes        | No          | No  |
+| IA32            | Yes        | No          | No  |
+| K1c             | Yes        | Yes         | No  |
+| PowerPC         | No         |             |     |
+| PowerPC 64      | No         |             |     |
+| Risc-V 32       | No         |             |     |
+| Risc-V 64       | No         |             |     |
+| x86-64          | Yes        | Yes         | Yes |
+
+For recompiling the program using profiling information, use `-fprofile-use= compcert_profiling.dat -ftracelinearize` (substitute the appropriate filename for `compcert_profiling.dat` if needed). Experiments show performance improvement on K1c, not on other platforms.
+
+The same options (except for `-fprofile-use=` and `-fprofile-arcs`) should be used to compile the logging and optimized versions of the program: only functions that are exactly the same in the intermediate representation will be optimized according to profiling information.
diff --git a/aarch64/Archi.v b/aarch64/Archi.v
index aef4ab77..7d7b6887 100644
--- a/aarch64/Archi.v
+++ b/aarch64/Archi.v
@@ -86,3 +86,5 @@ Global Opaque ptr64 big_endian splitlong
 (** Whether to generate position-independent code or not *)
 
 Parameter pic_code: unit -> bool.
+
+Definition has_notrap_loads := false.
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 471ad501..b0787d0a 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -400,7 +400,7 @@ let expand_instruction instr =
         expand_annot_val kind txt targ args res
      | EF_memcpy(sz, al) ->
         expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
-     | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+     | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
         emit instr
      | _ ->
         assert false
diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v
index b2a2308e..3d27f48f 100644
--- a/aarch64/Machregs.v
+++ b/aarch64/Machregs.v
@@ -158,6 +158,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_memcpy sz al => R15 :: R17 :: R29 :: nil
   | EF_inline_asm txt sg clob => destroyed_by_clobber clob
+  | EF_profiling _ _ => R15 :: R17 :: nil
   | _ => nil
   end.
 
diff --git a/aarch64/Op.v b/aarch64/Op.v
index c0b9d435..afc25aa6 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -938,14 +938,19 @@ Definition is_trapping_op (op : operation) :=
   end.
                 
 
+Definition args_of_operation op :=
+  if eq_operation op Omove
+  then 1%nat
+  else List.length (fst (type_of_operation op)).
+
 Lemma is_trapping_op_sound:
   forall op vl sp m,
-    op <> Omove ->
     is_trapping_op op = false ->
-    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    (List.length vl) = args_of_operation op ->
     eval_operation genv sp op vl m <> None.
 Proof.
-  destruct op; intros; simpl in *; try congruence.
+  unfold args_of_operation.
+  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index e54673dd..8d74daf4 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -133,7 +133,9 @@ module Target : TARGET =
 
     let name_of_section = function
       | Section_text         -> ".text"
-      | Section_data i | Section_small_data i ->
+      | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+      | Section_data(i, false) | Section_small_data i ->
           if i then ".data" else common_section ()
       | Section_const i | Section_small_const i ->
           if i || (not !Clflags.option_fcommon) then ".section	.rodata" else "COMM"
@@ -227,6 +229,28 @@ module Target : TARGET =
     | EOuxtw n -> fprintf oc ", uxtw #%a" coqint n
     | EOuxtx n -> fprintf oc ", uxtx #%a" coqint n
 
+    let next_profiling_label =
+      let atomic_incr_counter = ref 0 in
+      fun () ->
+      let r = sprintf ".Lcompcert_atomic_incr%d" !atomic_incr_counter in
+      incr atomic_incr_counter; r;;
+                
+    let print_profiling_logger oc id kind =
+      assert (kind >= 0);
+      assert (kind <= 1);
+      fprintf oc "%s begin profiling %a %d: atomic increment\n" comment
+        Profilingaux.pp_id id kind;
+      let ofs = profiling_offset id kind and lbl = next_profiling_label () in
+      fprintf oc "	adrp	x15, %s+%d\n" profiling_counter_table_name ofs;
+      fprintf oc "	add	x15, x15, :lo12:(%s+%d)\n" profiling_counter_table_name ofs;
+      fprintf oc "%s:\n" lbl;
+      fprintf oc "	ldaxr	x17, [x15]\n";
+      fprintf oc "	add	x17, x17, 1\n";
+      fprintf oc "	stlxr	w17, x17, [x15]\n";
+      fprintf oc "	cbnz	w17, %s\n" lbl;
+      fprintf oc "%s end profiling %a %d\n" comment
+        Profilingaux.pp_id id kind;;
+
 (* Printing of instructions *)
     let print_instruction oc = function
     (* Branches *)
@@ -519,6 +543,8 @@ module Target : TARGET =
              fprintf oc "%s begin inline assembly\n\t" comment;
              print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
              fprintf oc "%s end inline assembly\n" comment
+         | EF_profiling (id, coq_kind) ->
+            print_profiling_logger oc id (Z.to_int coq_kind)
          | _ ->
              assert false
         end
@@ -575,7 +601,24 @@ module Target : TARGET =
         section oc Section_text;
       end
 
+    let aarch64_profiling_stub oc nr_items
+          profiling_id_table_name
+          profiling_counter_table_name =
+	fprintf oc "	adrp	x2, %s\n" profiling_counter_table_name;
+	fprintf oc "	adrp	x1, %s\n" profiling_id_table_name;
+	fprintf oc "	add	x2, x2, :lo12:%s\n" profiling_counter_table_name;
+	fprintf oc "	add	x1, x1, :lo12:%s\n" profiling_id_table_name;
+	fprintf oc "	mov	w0, %d\n" nr_items;
+        fprintf oc "	b	%s\n" profiling_write_table_helper ;;
+
+    let print_atexit oc to_be_called =
+      	fprintf oc "	adrp	x0, %s\n" to_be_called;
+	fprintf oc "	add	x0, x0, :lo12:%s\n" to_be_called;
+	fprintf oc "	b	atexit\n";;
+        
+
     let print_epilogue oc =
+      print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) aarch64_profiling_stub oc;
       if !Clflags.option_g then begin
         Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
         section oc Section_text;
diff --git a/arm/Archi.v b/arm/Archi.v
index 16d6c71d..738341cc 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -97,3 +97,5 @@ Parameter abi: abi_kind.
 (** Whether instructions added with Thumb2 are supported. True for ARMv6T2
   and above. *)
 Parameter thumb2_support: bool.
+
+Definition has_notrap_loads := false.
diff --git a/arm/AsmToJSON.ml b/arm/AsmToJSON.ml
index e850fed6..669d8c0c 100644
--- a/arm/AsmToJSON.ml
+++ b/arm/AsmToJSON.ml
@@ -177,6 +177,7 @@ let pp_instructions pp ic =
         | EF_annot_val _
         | EF_builtin _
         | EF_debug _
+        | EF_profiling _
         | EF_external _
         | EF_free
         | EF_malloc
diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml
index 89aab5c7..6996c9bb 100644
--- a/arm/Asmexpand.ml
+++ b/arm/Asmexpand.ml
@@ -619,7 +619,7 @@ let expand_instruction instr =
 	   | EF_memcpy(sz, al) ->
 	      expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz))
 		(Int32.to_int (camlint_of_coqint al)) args
-	   | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+	   | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
               emit instr
 	   | _ ->
               assert false
diff --git a/arm/Constantexpand.ml b/arm/Constantexpand.ml
index 408b291e..8cc32c1f 100644
--- a/arm/Constantexpand.ml
+++ b/arm/Constantexpand.ml
@@ -106,6 +106,7 @@ let estimate_size = function
   | Pbuiltin (ef,_,_) ->
     begin match ef with
     | EF_inline_asm _ -> 256
+    | EF_profiling _ -> 40
     | _ -> 0 end
   | Pcfi_adjust _
   | Pcfi_rel_offset _
diff --git a/arm/Machregs.v b/arm/Machregs.v
index ae0ff6bf..1ec8f0a1 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -153,6 +153,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_memcpy sz al => R2 :: R3 :: R12 :: F7 :: nil
   | EF_inline_asm txt sg clob => destroyed_by_clobber clob
+  | EF_profiling _ _ => R2 :: R3 :: R12 :: nil
   | _ => nil
   end.
 
diff --git a/arm/Op.v b/arm/Op.v
index 671bdbe4..25e48ce1 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -531,14 +531,19 @@ Definition is_trapping_op (op : operation) :=
   end.
                 
 
+Definition args_of_operation op :=
+  if eq_operation op Omove
+  then 1%nat
+  else List.length (fst (type_of_operation op)).
+
 Lemma is_trapping_op_sound:
   forall op vl sp m,
-    op <> Omove ->
     is_trapping_op op = false ->
-    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    (List.length vl) = args_of_operation op ->
     eval_operation genv sp op vl m <> None.
 Proof.
-  destruct op; intros; simpl in *; try congruence.
+  unfold args_of_operation.
+  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml
index 03e06a65..839530c6 100644
--- a/arm/TargetPrinter.ml
+++ b/arm/TargetPrinter.ml
@@ -147,7 +147,9 @@ struct
 
   let name_of_section = function
     | Section_text -> ".text"
-    | Section_data i | Section_small_data i ->
+    | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+    | Section_data(i, false) | Section_small_data(i) ->
       if i then ".data" else common_section ()
     | Section_const i | Section_small_const i ->
       if i || (not !Clflags.option_fcommon) then ".section	.rodata" else "COMM"
@@ -202,6 +204,38 @@ struct
     | SOasr(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n
     | SOror(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n
 
+
+    let next_profiling_label =
+      let profiling_label_counter = ref 0 in
+      fun () ->
+      let r = sprintf ".Lprofiling_label%d" !profiling_label_counter in
+      incr profiling_label_counter; r;;
+                   
+    let print_profiling_logger oc id kind =
+      assert (kind >= 0);
+      assert (kind <= 1);
+      let ofs = profiling_offset id kind and olbl = next_profiling_label () in
+      fprintf oc "%s begin profiling %a %d: non-atomic increment\n" comment
+        Profilingaux.pp_id id kind;
+      fprintf oc "	ldr	r2, %s\n" olbl;
+      fprintf oc "	ldr	r3, [r2, #%d]\n"
+        (if Configuration.is_big_endian then 8 else 0);
+      fprintf oc "	ldr	r12, [r2, #%d]\n"
+        (if Configuration.is_big_endian then 0 else 8);
+      fprintf oc "	adds	r3, r3, #1\n";
+      fprintf oc "	adc	r12, r12, #0\n";
+      fprintf oc "	str	r3, [r2, #%d]\n"
+        (if Configuration.is_big_endian then 8 else 0);
+      fprintf oc "	str	r12, [r2, #%d]\n"
+        (if Configuration.is_big_endian then 0 else 8);
+      let jlbl = next_profiling_label () in
+      fprintf oc "	b	%s\n" jlbl;
+      fprintf oc "%s:\n" olbl;
+      fprintf oc "	.word	%s + %d\n" profiling_counter_table_name ofs;
+      fprintf oc "%s:\n" jlbl;
+      fprintf oc "%s end profiling %a %d\n" comment
+        Profilingaux.pp_id id kind;;
+
   let print_instruction oc = function
     (* Core instructions *)
     | Padc (r1,r2,so) ->
@@ -482,6 +516,7 @@ struct
           fprintf oc "%s begin inline assembly\n\t" comment;
           print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
           fprintf oc "%s end inline assembly\n" comment
+        | EF_profiling(id, coq_kind) -> print_profiling_logger oc id  (Z.to_int coq_kind)
         | _ ->
           assert false
       end
@@ -549,6 +584,11 @@ struct
     if !Clflags.option_mthumb then
       fprintf oc "	.thumb_func\n"
 
+
+  let text_print_fun_info oc name =
+    fprintf oc "	.type	%s, %%function\n" name;
+    fprintf oc "	.size	%s, . - %s\n" name name
+
   let print_fun_info oc name =
     fprintf oc "	.type	%a, %%function\n" symbol name;
     fprintf oc "	.size	%a, . - %a\n" symbol name symbol name
@@ -596,9 +636,22 @@ struct
     if !Clflags.option_g then begin
       section oc Section_text;
       cfi_section oc
-    end
+      end
+
+  let arm_profiling_stub oc nr_items
+          profiling_id_table_name
+          profiling_counter_table_name =
+        fprintf oc "	ldr	r2, = %s\n" profiling_counter_table_name;
+        fprintf oc "	ldr	r1, = %s\n" profiling_id_table_name;
+	fprintf oc "	mov	r0, #%d\n" nr_items;
+	fprintf oc "	b	%s\n" profiling_write_table_helper;;
+  
+  let print_atexit oc to_be_called =
+      	fprintf oc "	ldr	r0, = %s\n" to_be_called;
+	fprintf oc "	b	atexit\n";;
 
   let print_epilogue oc =
+    print_profiling_epilogue text_print_fun_info (Init_atexit print_atexit) arm_profiling_stub oc;
     if !Clflags.option_g then begin
       Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
       section oc Section_text;
diff --git a/backend/Allocproof.v b/backend/Allocationproof.v
index 3c7df58a..3c7df58a 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocationproof.v
diff --git a/backend/CSE.v b/backend/CSE.v
index 1936d4e4..838d96a6 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -493,7 +493,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
               | _ =>
                   empty_numbering
               end
-          | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ =>
+          | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ | EF_profiling _ _ =>
               set_res_unknown before res
           end
       | Icond cond args ifso ifnot _ =>
diff --git a/backend/CSE2.v b/backend/CSE2.v
index dabbaa22..00b1821e 100644
--- a/backend/CSE2.v
+++ b/backend/CSE2.v
@@ -29,7 +29,7 @@ Proof.
   decide equality.
 Defined.
 
-Module RELATION.
+Module RELATION <: SEMILATTICE_WITHOUT_BOTTOM.
   
 Definition t := (PTree.t sym_val).
 Definition eq (r1 r2 : t) :=
@@ -138,119 +138,6 @@ Qed.
 
 End RELATION.
 
-Module Type SEMILATTICE_WITHOUT_BOTTOM.
-
-  Parameter t: Type.
-  Parameter eq: t -> t -> Prop.
-  Axiom eq_refl: forall x, eq x x.
-  Axiom eq_sym: forall x y, eq x y -> eq y x.
-  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
-  Parameter beq: t -> t -> bool.
-  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
-  Parameter ge: t -> t -> Prop.
-  Axiom ge_refl: forall x y, eq x y -> ge x y.
-  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
-  Parameter lub: t -> t -> t.
-  Axiom ge_lub_left: forall x y, ge (lub x y) x.
-  Axiom ge_lub_right: forall x y, ge (lub x y) y.
-
-End SEMILATTICE_WITHOUT_BOTTOM.
-
-Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
-  Definition t := option L.t.
-  Definition eq (a b : t) :=
-    match a, b with
-    | None, None => True
-    | Some x, Some y => L.eq x y
-    | Some _, None | None, Some _ => False
-    end.
-  
-  Lemma eq_refl: forall x, eq x x.
-  Proof.
-    unfold eq; destruct x; trivial.
-    apply L.eq_refl.
-  Qed.
-
-  Lemma eq_sym: forall x y, eq x y -> eq y x.
-  Proof.
-    unfold eq; destruct x; destruct y; trivial.
-    apply L.eq_sym.
-  Qed.
-  
-  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
-  Proof.
-    unfold eq; destruct x; destruct y; destruct z; trivial.
-    - apply L.eq_trans.
-    - contradiction.
-  Qed.
-  
-  Definition beq (x y : t) :=
-    match x, y with
-    | None, None => true
-    | Some x, Some y => L.beq x y
-    | Some _, None | None, Some _ => false
-    end.
-  
-  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
-  Proof.
-    unfold beq, eq.
-    destruct x; destruct y; trivial; try congruence.
-    apply L.beq_correct.
-  Qed.
-  
-  Definition ge (x y : t) :=
-    match x, y with
-    | None, Some _ => False
-    | _, None => True
-    | Some a, Some b => L.ge a b
-    end.
-  
-  Lemma ge_refl: forall x y, eq x y -> ge x y.
-  Proof.
-    unfold eq, ge.
-    destruct x; destruct y; trivial.
-    apply L.ge_refl.
-  Qed.
-  
-  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
-  Proof.
-    unfold ge.
-    destruct x; destruct y; destruct z; trivial; try contradiction.
-    apply L.ge_trans.
-  Qed.
-  
-  Definition bot: t := None.
-  Lemma ge_bot: forall x, ge x bot.
-  Proof.
-    unfold ge, bot.
-    destruct x; trivial.
-  Qed.
-  
-  Definition lub (a b : t) :=
-    match a, b with
-    | None, _ => b
-    | _, None => a
-    | (Some x), (Some y) => Some (L.lub x y)
-    end.
-
-  Lemma ge_lub_left: forall x y, ge (lub x y) x.
-  Proof.
-    unfold ge, lub.
-    destruct x; destruct y; trivial.
-    - apply L.ge_lub_left.
-    - apply L.ge_refl.
-      apply L.eq_refl.
-  Qed.
-  
-  Lemma ge_lub_right: forall x y, ge (lub x y) y.
-  Proof.
-    unfold ge, lub.
-    destruct x; destruct y; trivial.
-    - apply L.ge_lub_right.
-    - apply L.ge_refl.
-      apply L.eq_refl.
-  Qed.
-End ADD_BOTTOM.
 
 Module RB := ADD_BOTTOM(RELATION).
 Module DS := Dataflow_Solver(RB)(NodeSetForward).
@@ -375,7 +262,7 @@ Definition load (chunk: memory_chunk) (addr : addressing)
   | None => load1 chunk addr dst args rel
   end.
 
-Fixpoint kill_builtin_res res rel :=
+Definition kill_builtin_res res rel :=
   match res with
   | BR r => kill_reg r rel
   | _ => rel
diff --git a/backend/CSE3.v b/backend/CSE3.v
new file mode 100644
index 00000000..2203ad14
--- /dev/null
+++ b/backend/CSE3.v
@@ -0,0 +1,93 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+Require Import CSE3analysis HashedSet.
+Require Import RTLtyping.
+
+Local Open Scope error_monad_scope.
+
+Axiom preanalysis : typing_env -> RTL.function -> invariants * analysis_hints.
+
+Section REWRITE.
+  Context {ctx : eq_context}.
+
+Definition find_op_in_fmap fmap pc op args :=
+  match PMap.get pc fmap with
+  | Some rel => rhs_find (ctx:=ctx) pc (SOp op) args rel
+  | None => None
+  end.
+
+Definition find_load_in_fmap fmap pc chunk addr args :=
+  match PMap.get pc fmap with
+  | Some rel => rhs_find (ctx:=ctx) pc (SLoad chunk addr) args rel
+  | None => None
+  end.
+
+Definition forward_move_b (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => forward_move (ctx := ctx) rel x
+  end.
+
+Definition subst_arg (fmap : PMap.t RB.t) (pc : node) (x : reg) : reg :=
+  forward_move_b (PMap.get pc fmap) x.
+
+Definition forward_move_l_b (rb : RB.t) (xl : list reg) :=
+  match rb with
+  | None => xl
+  | Some rel => forward_move_l (ctx := ctx) rel xl
+  end.
+
+Definition subst_args fmap pc xl :=
+  forward_move_l_b (PMap.get pc fmap) xl.
+
+Definition transf_instr (fmap : PMap.t RB.t)
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    let args' := subst_args fmap pc args in
+    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    | None => Iop op args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Iload trap chunk addr args dst s =>
+    let args' := subst_args fmap pc args in
+    match find_load_in_fmap fmap pc chunk addr args' with
+    | None => Iload trap chunk addr args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) (subst_arg fmap pc src) s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 expected =>
+    Icond cond (subst_args fmap pc args) s1 s2 expected
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+End REWRITE.
+
+Definition transf_function (f: function) : res function :=
+  do tenv <- type_function f;
+  let (invariants, hints) := preanalysis tenv f in
+  let ctx := context_from_hints hints in
+  if check_inductiveness (ctx:=ctx) f tenv invariants
+  then
+    OK {| fn_sig := f.(fn_sig);
+          fn_params := f.(fn_params);
+          fn_stacksize := f.(fn_stacksize);
+          fn_code := PTree.map (transf_instr (ctx := ctx) invariants)
+                               f.(fn_code);
+          fn_entrypoint := f.(fn_entrypoint) |}
+  else Error (msg "cse3: not inductive").
+
+Definition transf_fundef (fd: fundef) : res fundef :=
+  AST.transf_partial_fundef transf_function fd.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v
new file mode 100644
index 00000000..ef487c86
--- /dev/null
+++ b/backend/CSE3analysis.v
@@ -0,0 +1,449 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+Require Import HashedSet.
+Require List Compopts.
+
+Definition typing_env := reg -> typ.
+
+Definition loadv_storev_compatible_type
+           (chunk : memory_chunk) (ty : typ) : bool :=
+  match chunk, ty with
+  | Mint32, Tint
+  | Mint64, Tlong
+  | Mfloat32, Tsingle
+  | Mfloat64, Tfloat => true
+  | _, _ => false
+  end.
+
+Module RELATION <: SEMILATTICE_WITHOUT_BOTTOM.
+  Definition t := PSet.t.
+  Definition eq (x : t) (y : t) := x = y.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq. trivial.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq. congruence.
+  Qed.
+
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq. congruence.
+  Qed.
+  
+  Definition beq (x y : t) := if PSet.eq x y then true else false.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq.
+    intros.
+    destruct PSet.eq; congruence.
+  Qed.
+  
+  Definition ge (x y : t) := (PSet.is_subset x y) = true.
+
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    intros.
+    subst y.
+    apply PSet.is_subset_spec.
+    trivial.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    intros.
+    rewrite PSet.is_subset_spec in *.
+    intuition.
+  Qed.
+  
+  Definition lub := PSet.inter.
+  Definition glb := PSet.union.
+  
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    intros.
+    apply PSet.is_subset_spec.
+    intro.
+    rewrite PSet.ginter.
+    rewrite andb_true_iff.
+    intuition.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    intros.
+    apply PSet.is_subset_spec.
+    intro.
+    rewrite PSet.ginter.
+    rewrite andb_true_iff.
+    intuition.
+  Qed.
+
+  Definition top := PSet.empty.
+End RELATION.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Inductive sym_op : Type :=
+| SOp : operation -> sym_op
+| SLoad : memory_chunk -> addressing -> sym_op.
+
+Definition eq_dec_sym_op : forall s s' : sym_op, {s = s'} + {s <> s'}.
+Proof.
+  generalize eq_operation.
+  generalize eq_addressing.
+  generalize chunk_eq.
+  decide equality.
+Defined.
+
+Definition eq_dec_args : forall l l' : list reg, { l = l' } + { l <> l' }.
+Proof.
+  apply List.list_eq_dec.
+  exact peq.
+Defined.
+
+Record equation :=
+  mkequation
+    { eq_lhs : reg;
+      eq_op : sym_op;
+      eq_args : list reg }.
+
+Definition eq_dec_equation :
+  forall eq eq' : equation, {eq = eq'} + {eq <> eq'}.
+Proof.
+  generalize peq.
+  generalize eq_dec_sym_op.
+  generalize eq_dec_args.
+  decide equality.
+Defined.
+
+Definition eq_id := node.
+
+Definition add_i_j (i : reg) (j : eq_id) (m : Regmap.t PSet.t) :=
+  Regmap.set i (PSet.add j (Regmap.get i m)) m.
+
+Definition add_ilist_j (ilist : list reg) (j : eq_id) (m : Regmap.t PSet.t) :=
+  List.fold_left (fun already i => add_i_j i j already) ilist m.
+
+Definition get_reg_kills (eqs : PTree.t equation) :
+  Regmap.t PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
+                add_i_j (eq_lhs eq) eqno
+                        (add_ilist_j (eq_args eq) eqno already)) eqs
+             (PMap.init PSet.empty).
+
+Definition eq_depends_on_mem eq :=
+  match eq_op eq with
+  | SLoad _ _ => true
+  | SOp op => op_depends_on_memory op
+  end.
+
+Definition get_mem_kills (eqs : PTree.t equation) : PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
+                if eq_depends_on_mem eq
+                then PSet.add eqno already
+                else already) eqs PSet.empty.
+
+Definition is_move (op : operation) :
+  { op = Omove } + { op <> Omove }.
+Proof.
+  destruct op; try (right ; congruence).
+  left; trivial.
+Qed.
+
+Definition is_smove (sop : sym_op) :
+  { sop = SOp Omove } + { sop <> SOp Omove }.
+Proof.
+  destruct sop; try (right ; congruence).
+  destruct (is_move o).
+  - left; congruence.
+  - right; congruence.
+Qed.
+
+Definition get_moves (eqs : PTree.t equation) :
+  Regmap.t PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
+                if is_smove (eq_op eq)
+                then add_i_j (eq_lhs eq) eqno already
+                else already) eqs (PMap.init PSet.empty).
+  
+Record eq_context := mkeqcontext
+                       { eq_catalog : eq_id -> option equation;
+                         eq_find_oracle : node -> equation -> option eq_id;
+                         eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t;
+                         eq_kill_reg : reg -> PSet.t;
+                         eq_kill_mem : unit -> PSet.t;
+                         eq_moves : reg -> PSet.t }.
+
+Section OPERATIONS.
+  Context {ctx : eq_context}.
+  
+  Definition kill_reg (r : reg) (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_reg ctx r).
+  
+  Definition kill_mem (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_mem ctx tt).
+
+  Definition pick_source (l : list reg) := (* todo: take min? *)
+    match l with
+    | h::t => Some h
+    | nil => None
+    end.
+  
+  Definition forward_move (rel : RELATION.t)  (x : reg) : reg :=
+    match pick_source (PSet.elements (PSet.inter rel (eq_moves ctx x))) with
+    | None => x
+    | Some eqno =>
+      match eq_catalog ctx eqno with
+      | Some eq =>
+        if is_smove (eq_op eq) && peq x (eq_lhs eq)
+        then
+          match eq_args eq with
+          | src::nil => src
+          | _ => x
+          end
+        else x
+      | _ => x
+      end
+    end.
+
+  Definition forward_move_l (rel : RELATION.t) : list reg -> list reg :=
+    List.map (forward_move rel).
+
+  Section PER_NODE.
+    Variable no : node.
+    
+  Definition eq_find  (eq : equation) :=
+    match eq_find_oracle ctx no eq with
+    | Some id =>
+      match eq_catalog ctx id with
+      | Some eq' => if eq_dec_equation eq eq' then Some id else None
+      | None => None
+      end
+    | None => None
+    end.
+
+
+  Definition rhs_find (sop : sym_op) (args : list reg) (rel : RELATION.t) : option reg :=
+    match pick_source (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel)) with
+    | None => None
+    | Some src =>
+      match eq_catalog ctx src with
+      | None => None
+      | Some eq =>
+        if eq_dec_sym_op sop (eq_op eq) && eq_dec_args args (eq_args eq)
+        then Some (eq_lhs eq)
+        else None
+      end
+    end.
+
+  Definition oper2 (dst : reg) (op: sym_op)(args : list reg)
+           (rel : RELATION.t) : RELATION.t :=
+    let rel' := kill_reg dst rel in
+    match eq_find {| eq_lhs := dst;
+                     eq_op  := op;
+                     eq_args:= args |} with
+    | Some id => PSet.add id rel'
+    | None => rel'
+    end.
+
+  Definition oper1 (dst : reg) (op: sym_op) (args : list reg)
+             (rel : RELATION.t) : RELATION.t :=
+    if List.in_dec peq dst args
+    then kill_reg dst rel
+    else oper2 dst op args rel.
+
+  
+  Definition move (src dst : reg) (rel : RELATION.t) : RELATION.t :=
+    match eq_find {| eq_lhs := dst;
+                     eq_op  := SOp Omove;
+                     eq_args:= src::nil |} with
+    | Some eq_id => PSet.add eq_id (kill_reg dst rel)
+    | None => kill_reg dst rel
+    end.
+
+  Definition oper (dst : reg) (op: sym_op) (args : list reg)
+             (rel : RELATION.t) : RELATION.t :=
+    if is_smove op
+    then
+      match args with
+      | src::nil =>
+        move (forward_move rel src) dst rel
+      | _ => kill_reg dst rel
+      end
+    else
+      let args' := forward_move_l rel args in
+      match rhs_find op args' rel with
+      | Some r => (* FIXME RELATION.glb ( *) move r dst rel (* )
+                                                               (oper1 dst op args' rel) *)
+      | None => oper1 dst op args' rel
+      end.
+  
+  Definition clever_kill_store
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg)
+             (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel
+      (PSet.filter
+         (fun eqno =>
+            match eq_catalog ctx eqno with
+            | None => false
+            | Some eq =>
+              match eq_op eq with
+              | SOp op => true
+              | SLoad chunk' addr' =>
+                may_overlap chunk addr args chunk' addr' (eq_args eq)
+              end
+            end)
+         (PSet.inter rel (eq_kill_mem ctx tt))).
+
+  Definition store2
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg)
+             (rel : RELATION.t) : RELATION.t :=
+    if Compopts.optim_CSE3_alias_analysis tt
+    then clever_kill_store chunk addr args src rel
+    else kill_mem rel.
+
+  Definition store1
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg) (ty: typ)
+             (rel : RELATION.t) : RELATION.t :=
+    let rel' := store2 chunk addr args src rel in
+    if loadv_storev_compatible_type chunk ty
+    then
+      match eq_find {| eq_lhs := src;
+                       eq_op  := SLoad chunk addr;
+                       eq_args:= args |} with
+      | Some id => PSet.add id rel'
+      | None => rel'
+      end
+    else rel'.
+    
+  Definition store
+             (chunk : memory_chunk) (addr: addressing) (args : list reg)
+             (src : reg) (ty: typ)
+             (rel : RELATION.t) : RELATION.t :=
+    store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel.
+
+  Definition kill_builtin_res res rel :=
+    match res with
+    | BR r => kill_reg r rel
+    | _ => rel
+    end.
+
+  Definition apply_external_call ef (rel : RELATION.t) : RELATION.t :=
+    match ef with
+    | EF_builtin name sg =>
+      match Builtins.lookup_builtin_function name sg with
+      | Some bf => rel
+      | None => if Compopts.optim_CSE3_across_calls tt
+                then kill_mem rel
+                else RELATION.top
+      end
+    | EF_runtime name sg =>
+      if Compopts.optim_CSE3_across_calls tt
+      then 
+        match Builtins.lookup_builtin_function name sg with
+        | Some bf => rel
+        | None => kill_mem rel
+        end
+      else RELATION.top
+    | EF_malloc
+    | EF_external _ _
+    | EF_free =>
+      if Compopts.optim_CSE3_across_calls tt
+      then kill_mem rel
+      else RELATION.top
+    | EF_vstore _ 
+    | EF_memcpy _ _ (* FIXME *)
+    | EF_inline_asm _ _ _ => kill_mem rel
+    | _ => rel
+    end.
+
+  Definition apply_instr (tenv : typing_env) (instr : RTL.instruction) (rel : RELATION.t) : RB.t :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _ => Some rel
+  | Istore chunk addr args src _ =>
+    Some (store chunk addr args src (tenv (forward_move rel src)) rel)
+  | Iop op args dst _ => Some (oper dst (SOp op) args rel)
+  | Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel)
+  | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
+  | Ibuiltin ef _ res _ => Some (kill_builtin_res res (apply_external_call ef rel))
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+  End PER_NODE.
+
+Definition apply_instr' (tenv : typing_env) code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr pc tenv instr x
+    end
+  end.
+
+Definition invariants := PMap.t RB.t.
+
+Definition rel_leb (x y : RELATION.t) : bool := (PSet.is_subset y x).
+
+Definition relb_leb (x y : RB.t) : bool :=
+  match x, y with
+  | None, _ => true
+  | (Some _), None => false
+  | (Some x), (Some y) => rel_leb x y
+  end.
+
+Definition check_inductiveness (fn : RTL.function) (tenv: typing_env) (inv: invariants) :=
+  (RB.beq (Some RELATION.top) (PMap.get (fn_entrypoint fn) inv)) &&
+  PTree_Properties.for_all (fn_code fn) 
+      (fun pc instr =>
+         match PMap.get pc inv with
+         | None => true
+         | Some rel =>
+           let rel' := apply_instr pc tenv instr rel in
+           List.forallb
+             (fun pc' => relb_leb rel' (PMap.get pc' inv))
+             (RTL.successors_instr instr)
+         end).
+
+Definition internal_analysis
+  (tenv : typing_env)
+  (f : RTL.function) : option invariants := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' tenv (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+End OPERATIONS.
+
+Record analysis_hints :=
+  mkanalysis_hints
+    { hint_eq_catalog :  PTree.t equation;
+      hint_eq_find_oracle : node -> equation -> option eq_id;
+      hint_eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t }.
+
+Definition context_from_hints (hints : analysis_hints) :=
+  let eqs := hint_eq_catalog hints in
+  let reg_kills := get_reg_kills eqs in 
+  let mem_kills := get_mem_kills eqs in
+  let moves := get_moves eqs in
+  {|
+    eq_catalog := fun eq_id => PTree.get eq_id eqs;
+    eq_find_oracle := hint_eq_find_oracle hints ;
+    eq_rhs_oracle  := hint_eq_rhs_oracle hints;
+    eq_kill_reg := fun reg => PMap.get reg reg_kills;
+    eq_kill_mem := fun _ => mem_kills;
+    eq_moves    := fun reg => PMap.get reg moves
+  |}.
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
new file mode 100644
index 00000000..3f7d5bb9
--- /dev/null
+++ b/backend/CSE3analysisaux.ml
@@ -0,0 +1,130 @@
+open CSE3analysis
+open Maps
+open HashedSet
+open Camlcoq
+
+let flatten_eq eq =
+  ((P.to_int eq.eq_lhs), eq.eq_op, List.map P.to_int eq.eq_args);;
+
+let imp_add_i_j s i j =
+  s := PMap.set i (PSet.add j (PMap.get i !s)) !s;;
+
+let string_of_chunk = function
+    | AST.Mint8signed -> "int8signed"
+    | AST.Mint8unsigned -> "int8unsigned"
+    | AST.Mint16signed -> "int16signed"
+    | AST.Mint16unsigned -> "int16unsigned"
+    | AST.Mint32 -> "int32"
+    | AST.Mint64 -> "int64"
+    | AST.Mfloat32 -> "float32"
+    | AST.Mfloat64 -> "float64"
+    | AST.Many32 -> "any32"
+    | AST.Many64 -> "any64";;
+
+let print_reg channel i =
+  Printf.fprintf channel "r%d" i;;
+
+let print_eq channel (lhs, sop, args) =
+  match sop with
+  | SOp op ->
+     Printf.printf "%a = %a\n" print_reg lhs (PrintOp.print_operation print_reg) (op, args)
+  | SLoad(chunk, addr) ->
+     Printf.printf "%a = %s @ %a\n" print_reg lhs (string_of_chunk chunk)
+       (PrintOp.print_addressing print_reg) (addr, args);;
+
+let pp_set oc s =
+  Printf.fprintf oc "{ ";
+  List.iter (fun i -> Printf.fprintf oc "%d; " (P.to_int i)) (PSet.elements s);
+  Printf.fprintf oc "}";;
+
+let pp_rhs oc (sop, args) =
+  match sop with
+  | SOp op -> PrintOp.print_operation PrintRTL.reg oc (op, args)
+  | SLoad(chunk, addr) ->
+     Printf.fprintf oc "%s[%a]"
+       (PrintAST.name_of_chunk chunk)
+         (PrintOp.print_addressing PrintRTL.reg) (addr, args);;
+
+let pp_eq oc eq =
+  Printf.fprintf oc "x%d = %a" (P.to_int eq.eq_lhs)
+    pp_rhs (eq.eq_op, eq.eq_args);;
+
+let pp_P oc x = Printf.fprintf oc "%d" (P.to_int x)
+              
+let pp_option pp oc = function
+  | None -> output_string oc "none"
+  | Some x -> pp oc x;;
+
+let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
+  let cur_eq_id = ref 0
+  and cur_catalog = ref PTree.empty
+  and eq_table = Hashtbl.create 100
+  and rhs_table = Hashtbl.create 100
+  and cur_kill_reg = ref (PMap.init PSet.empty)
+  and cur_kill_mem = ref PSet.empty
+  and cur_moves = ref (PMap.init PSet.empty) in
+  let eq_find_oracle node eq =
+    let o = Hashtbl.find_opt eq_table (flatten_eq eq) in
+    (if !Clflags.option_debug_compcert > 1
+     then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node)
+            pp_eq eq (pp_option pp_P) o);
+    o
+  and rhs_find_oracle node sop args =
+    let o =
+      match Hashtbl.find_opt rhs_table (sop, List.map P.to_int args) with
+      | None -> PSet.empty
+      | Some s -> s in
+    (if !Clflags.option_debug_compcert > 1
+     then Printf.printf "@%d: rhs_find %a = %a\n"
+            (P.to_int node) pp_rhs (sop, args) pp_set o);
+    o in
+  let mutating_eq_find_oracle node eq : P.t option =
+    let (flat_eq_lhs, flat_eq_op, flat_eq_args) as flat_eq = flatten_eq eq in
+    let o =
+    match Hashtbl.find_opt eq_table flat_eq with
+    | Some x ->
+       Some x
+    | None ->
+       (* TODO print_eq stderr flat_eq; *)
+       incr cur_eq_id;
+       let id = !cur_eq_id in
+       let coq_id = P.of_int id in
+       begin
+         Hashtbl.add eq_table flat_eq coq_id;
+         (cur_catalog := PTree.set coq_id eq !cur_catalog);
+         Hashtbl.add rhs_table (flat_eq_op, flat_eq_args)
+           (PSet.add coq_id
+              (match Hashtbl.find_opt rhs_table (flat_eq_op, flat_eq_args) with
+               | None -> PSet.empty
+               | Some s -> s));
+         List.iter
+           (fun reg -> imp_add_i_j cur_kill_reg reg coq_id)
+           (eq.eq_lhs :: eq.eq_args);
+         (if eq_depends_on_mem eq
+          then cur_kill_mem := PSet.add coq_id !cur_kill_mem);
+         (match eq.eq_op, eq.eq_args with
+          | (SOp Op.Omove), [rhs] -> imp_add_i_j cur_moves eq.eq_lhs coq_id
+          | _, _ -> ());
+         Some coq_id
+       end
+    in
+    (if !Clflags.option_debug_compcert > 1
+     then Printf.printf "@%d: mutating_eq_find %a -> %a\n" (P.to_int node)
+      pp_eq eq (pp_option pp_P) o);    
+    o
+  in
+  match
+    internal_analysis
+      { eq_catalog     = (fun eq_id -> PTree.get eq_id !cur_catalog);
+        eq_find_oracle = mutating_eq_find_oracle;
+        eq_rhs_oracle  = rhs_find_oracle ;
+        eq_kill_reg    = (fun reg -> PMap.get reg !cur_kill_reg);
+        eq_kill_mem    = (fun () -> !cur_kill_mem);
+        eq_moves       = (fun reg -> PMap.get reg !cur_moves)
+      } tenv f
+  with None -> failwith "CSE3analysisaux analysis failed, try re-running with -fno-cse3"
+     | Some invariants ->
+        invariants,
+        { hint_eq_catalog    = !cur_catalog;
+          hint_eq_find_oracle= eq_find_oracle;
+          hint_eq_rhs_oracle = rhs_find_oracle };;
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
new file mode 100644
index 00000000..c65a6d9e
--- /dev/null
+++ b/backend/CSE3analysisproof.v
@@ -0,0 +1,1003 @@
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE3analysis CSE2deps CSE2depsproof HashedSet.
+Require Import RTLtyping.
+Require Import Lia.
+
+Lemma rel_leb_correct:
+  forall x x',
+    rel_leb x x' = true <-> RELATION.ge x' x.
+Proof.
+  unfold rel_leb, RELATION.ge.
+  split; auto.
+Qed.
+
+Hint Resolve rel_leb_correct : cse3.
+
+Lemma relb_leb_correct:
+  forall x x',
+    relb_leb x x' = true <-> RB.ge x' x.
+Proof.
+  unfold relb_leb, RB.ge.
+  destruct x; destruct x'; split; trivial; try contradiction; discriminate.
+Qed.
+
+Hint Resolve relb_leb_correct : cse3.
+
+Theorem loadv_storev_really_same:
+  forall chunk: memory_chunk,
+  forall m1: mem,
+  forall addr v: val,
+  forall m2: mem,
+  forall ty : typ,
+  forall TYPE: Val.has_type v ty,
+  forall STORE: Mem.storev chunk m1 addr v = Some m2,
+  forall COMPATIBLE: loadv_storev_compatible_type chunk ty = true,
+    Mem.loadv chunk m2 addr = Some v.
+Proof.
+  intros.
+  rewrite Mem.loadv_storev_same with (m1:=m1) (v:=v) by assumption.
+  f_equal.
+  destruct chunk; destruct ty; try discriminate.
+  all: destruct v; trivial; try contradiction.
+  all: unfold Val.load_result, Val.has_type in *.
+  all: destruct Archi.ptr64; trivial; discriminate.
+Qed.
+
+Lemma subst_args_notin :
+  forall (rs : regset) dst v args,
+    ~ In dst args ->
+    (rs # dst <- v) ## args = rs ## args.
+Proof.
+  induction args; simpl; trivial.
+  intro NOTIN.
+  destruct (peq a dst).
+  {
+    subst a.
+    intuition congruence.
+  }
+  rewrite Regmap.gso by congruence.
+  f_equal.
+  apply IHargs.
+  intuition congruence.
+Qed.
+
+Lemma add_i_j_adds : forall i j m,
+    PSet.contains (Regmap.get i (add_i_j i j m)) j = true.
+Proof.
+  intros.
+  unfold add_i_j.
+  rewrite Regmap.gss.
+  auto with pset.
+Qed.
+Hint Resolve add_i_j_adds: cse3.
+
+Lemma add_i_j_monotone : forall i j i' j' m,
+    PSet.contains (Regmap.get i' m) j' = true ->
+    PSet.contains (Regmap.get i' (add_i_j i j m)) j' = true.
+Proof.
+  intros.
+  unfold add_i_j.
+  destruct (peq i i').
+  - subst i'.
+    rewrite Regmap.gss.
+    destruct (peq j j').
+    + subst j'.
+      apply PSet.gadds.
+    + eauto with pset.
+  - rewrite Regmap.gso.
+    assumption.
+    congruence.
+Qed.
+
+Hint Resolve add_i_j_monotone: cse3.
+
+Lemma add_ilist_j_monotone : forall ilist j i' j' m,
+    PSet.contains (Regmap.get i' m) j' = true ->
+    PSet.contains (Regmap.get i' (add_ilist_j ilist j m)) j' = true.
+Proof.
+  induction ilist; simpl; intros until m; intro CONTAINS; auto with cse3.
+Qed.
+Hint Resolve add_ilist_j_monotone: cse3.
+
+Lemma add_ilist_j_adds : forall ilist j m,
+    forall i, In i ilist ->
+              PSet.contains (Regmap.get i (add_ilist_j ilist j m)) j = true.
+Proof.
+  induction ilist; simpl; intros until i; intro IN.
+  contradiction.
+  destruct IN as [HEAD | TAIL]; subst; auto with cse3.
+Qed.
+Hint Resolve add_ilist_j_adds: cse3.
+
+Definition xlget_kills (eqs : list (eq_id * equation)) (m :  Regmap.t PSet.t) :
+  Regmap.t PSet.t :=
+  List.fold_left (fun already (item : eq_id * equation) =>
+    add_i_j (eq_lhs (snd item)) (fst item)
+            (add_ilist_j (eq_args (snd item)) (fst item) already)) eqs m.
+
+
+Definition xlget_mem_kills (eqs : list (positive * equation)) (m : PSet.t) : PSet.t :=
+(fold_left
+       (fun (a : PSet.t) (p : positive * equation) =>
+        if eq_depends_on_mem (snd p) then PSet.add (fst p) a else a)
+       eqs m).
+
+Lemma xlget_kills_monotone :
+  forall eqs m i j,
+    PSet.contains (Regmap.get i m) j = true ->
+    PSet.contains (Regmap.get i (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  auto with cse3.
+Qed.
+
+Hint Resolve xlget_kills_monotone : cse3.
+
+Lemma xlget_mem_kills_monotone :
+  forall eqs m j,
+    PSet.contains m j = true ->
+    PSet.contains (xlget_mem_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  destruct eq_depends_on_mem.
+  - apply IHeqs.
+    destruct (peq (fst a) j).
+    + subst j. apply PSet.gadds.
+    + rewrite PSet.gaddo by congruence.
+      trivial.
+  - auto.
+Qed.
+
+Hint Resolve xlget_mem_kills_monotone : cse3.
+
+Lemma xlget_kills_has_lhs :
+  forall eqs m lhs sop args j,
+    In (j, {| eq_lhs := lhs;
+              eq_op  := sop;
+              eq_args:= args |}) eqs ->
+    PSet.contains (Regmap.get lhs (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros until j.
+  intro HEAD_TAIL.
+  destruct HEAD_TAIL as [HEAD | TAIL]; subst; simpl.
+  - auto with cse3.
+  - eapply IHeqs. eassumption.
+Qed.
+Hint Resolve xlget_kills_has_lhs : cse3.
+
+Lemma xlget_kills_has_arg :
+  forall eqs m lhs sop arg args j,
+    In (j, {| eq_lhs := lhs;
+              eq_op  := sop;
+              eq_args:= args |}) eqs ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros until j.
+  intros HEAD_TAIL ARG.
+  destruct HEAD_TAIL as [HEAD | TAIL]; subst; simpl.
+  - auto with cse3.
+  - eapply IHeqs; eassumption.
+Qed.
+
+Hint Resolve xlget_kills_has_arg : cse3.
+
+Lemma get_kills_has_lhs :
+  forall eqs lhs sop args j,
+    PTree.get j eqs = Some {| eq_lhs := lhs;
+                              eq_op  := sop;
+                              eq_args:= args |} ->
+    PSet.contains (Regmap.get lhs (get_reg_kills eqs)) j = true.
+Proof.
+  unfold get_reg_kills.
+  intros.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : Regmap.t PSet.t) (p : positive * equation) =>
+        add_i_j (eq_lhs (snd p)) (fst p)
+          (add_ilist_j (eq_args (snd p)) (fst p) a))) with xlget_kills.
+  eapply xlget_kills_has_lhs.
+  apply PTree.elements_correct.
+  eassumption.
+Qed.
+
+Hint Resolve get_kills_has_lhs : cse3.
+
+Lemma context_from_hints_get_kills_has_lhs :
+  forall hints lhs sop args j,
+    PTree.get j (hint_eq_catalog hints) = Some {| eq_lhs := lhs;
+                              eq_op  := sop;
+                              eq_args:= args |} ->
+    PSet.contains  (eq_kill_reg (context_from_hints hints) lhs) j = true.
+Proof.
+  intros; simpl.
+  eapply get_kills_has_lhs.
+  eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_lhs : cse3.
+
+Lemma get_kills_has_arg :
+  forall eqs lhs sop arg args j,
+    PTree.get j eqs = Some {| eq_lhs := lhs;
+                              eq_op  := sop;
+                              eq_args:= args |} ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (get_reg_kills eqs)) j = true.
+Proof.
+  unfold get_reg_kills.
+  intros.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : Regmap.t PSet.t) (p : positive * equation) =>
+        add_i_j (eq_lhs (snd p)) (fst p)
+          (add_ilist_j (eq_args (snd p)) (fst p) a))) with xlget_kills.
+  eapply xlget_kills_has_arg.
+  - apply PTree.elements_correct.
+    eassumption.
+  - assumption.
+Qed.
+
+Hint Resolve get_kills_has_arg : cse3.
+
+Lemma context_from_hints_get_kills_has_arg :
+  forall hints lhs sop arg args j,
+    PTree.get j (hint_eq_catalog hints) = Some {| eq_lhs := lhs;
+                              eq_op  := sop;
+                              eq_args:= args |} ->
+    In arg args ->
+    PSet.contains (eq_kill_reg (context_from_hints hints) arg) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_arg; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_arg : cse3.
+
+Lemma xlget_kills_has_eq_depends_on_mem :
+  forall eqs eq j m,
+    In (j, eq) eqs ->
+    eq_depends_on_mem eq = true ->
+    PSet.contains (xlget_mem_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros.
+  destruct H.
+  { subst a.
+    simpl.
+    rewrite H0.
+    apply xlget_mem_kills_monotone.
+    apply PSet.gadds.
+  }
+  eauto.
+Qed.
+
+Hint Resolve xlget_kills_has_eq_depends_on_mem : cse3.
+
+Lemma get_kills_has_eq_depends_on_mem :
+  forall eqs eq j,
+    PTree.get j eqs = Some eq ->
+    eq_depends_on_mem eq = true ->
+    PSet.contains (get_mem_kills eqs) j = true.
+Proof.
+  intros.
+  unfold get_mem_kills.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : PSet.t) (p : positive * equation) =>
+        if eq_depends_on_mem (snd p) then PSet.add (fst p) a else a)
+       (PTree.elements eqs) PSet.empty)
+    with (xlget_mem_kills (PTree.elements eqs) PSet.empty).
+  eapply xlget_kills_has_eq_depends_on_mem.
+  apply PTree.elements_correct.
+  eassumption.
+  trivial.
+Qed.
+  
+Lemma context_from_hints_get_kills_has_eq_depends_on_mem :
+  forall hints eq j,
+    PTree.get j (hint_eq_catalog hints) = Some eq ->
+    eq_depends_on_mem eq = true ->
+    PSet.contains (eq_kill_mem (context_from_hints hints) tt) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_eq_depends_on_mem; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_eq_depends_on_mem : cse3.
+
+Definition eq_involves (eq : equation) (i : reg) :=
+  i = (eq_lhs eq) \/ In i (eq_args eq).
+
+Section SOUNDNESS.
+  Context {F V : Type}.
+  Context {genv: Genv.t F V}.
+  Context {sp : val}.
+
+  Context {ctx : eq_context}.
+
+  Definition sem_rhs (sop : sym_op) (args : list reg)
+             (rs : regset) (m : mem) (v' : val) :=
+    match sop with
+    | SOp op =>
+      match eval_operation genv sp op (rs ## args) m with
+      | Some v => v' = v
+      | None => False
+      end
+    | SLoad chunk addr =>
+      match
+        match eval_addressing genv sp addr (rs ## args) with
+        | Some a => Mem.loadv chunk m a
+        | None => None
+        end
+      with
+      | Some dat => v' = dat
+      | None => v' = default_notrap_load_value chunk
+      end
+    end.
+    
+  Definition sem_eq (eq : equation) (rs : regset) (m : mem) :=
+    sem_rhs (eq_op eq) (eq_args eq) rs m (rs # (eq_lhs eq)).
+
+  Definition sem_rel (rel : RELATION.t) (rs : regset) (m : mem) :=
+    forall i eq,
+      PSet.contains rel i = true ->
+      eq_catalog ctx i = Some eq ->
+      sem_eq eq rs m.
+
+  Lemma sem_rel_glb:
+    forall rel1 rel2 rs m,
+      (sem_rel (RELATION.glb rel1 rel2) rs m) <->
+      ((sem_rel rel1 rs m) /\
+       (sem_rel rel2 rs m)).
+  Proof.
+    intros.
+    unfold sem_rel, RELATION.glb.
+    split.
+    - intro IMPLIES.
+      split;
+        intros i eq CONTAINS;
+        specialize IMPLIES with (i:=i) (eq0:=eq);
+        rewrite PSet.gunion in IMPLIES;
+        rewrite orb_true_iff in IMPLIES;
+        intuition.
+    - intros (IMPLIES1 & IMPLIES2) i eq.
+      rewrite PSet.gunion.
+      rewrite orb_true_iff.
+      specialize IMPLIES1 with (i:=i) (eq0:=eq).
+      specialize IMPLIES2 with (i:=i) (eq0:=eq).
+      intuition.
+  Qed.
+
+  Hypothesis ctx_kill_reg_has_lhs :
+    forall lhs sop args j,
+      eq_catalog ctx j = Some {| eq_lhs := lhs;
+                                 eq_op  := sop;
+                                 eq_args:= args |} ->
+      PSet.contains (eq_kill_reg ctx lhs) j = true.
+
+  Hypothesis ctx_kill_reg_has_arg :
+    forall lhs sop args j,
+      eq_catalog ctx j = Some {| eq_lhs := lhs;
+                                 eq_op  := sop;
+                                 eq_args:= args |} ->
+      forall arg,
+      In arg args ->
+      PSet.contains (eq_kill_reg ctx arg) j = true.
+
+  Hypothesis ctx_kill_mem_has_depends_on_mem :
+    forall eq j,
+      eq_catalog ctx j = Some eq ->
+      eq_depends_on_mem eq = true ->
+      PSet.contains (eq_kill_mem ctx tt) j = true.
+
+  Theorem kill_reg_sound :
+    forall rel rs m dst v,
+      (sem_rel rel rs m) ->
+      (sem_rel (kill_reg (ctx:=ctx) dst rel) (rs#dst <- v) m).
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_reg.
+    intros until v.
+    intros REL i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    destruct eq as [lhs sop args]; simpl.
+    specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
+    specialize ctx_kill_reg_has_arg with (lhs := lhs) (sop := sop) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+    simpl in *.
+    assert ({In dst args} + {~In dst args}) as IN_ARGS.
+    {
+      apply List.in_dec.
+      apply peq.
+    }
+    destruct IN_ARGS as [IN_ARGS | NOTIN_ARGS].
+    { intuition.
+      congruence.
+    }
+    destruct (peq dst lhs).
+    {
+      congruence.
+    }
+    rewrite subst_args_notin by assumption.
+    destruct sop.
+    - destruct (eval_operation genv sp o rs ## args m) as [ev | ]; trivial.
+      rewrite Regmap.gso by congruence.
+      assumption.
+    - rewrite Regmap.gso by congruence.
+      assumption.
+  Qed.
+
+  Hint Resolve kill_reg_sound : cse3.
+
+  Theorem kill_reg_sound2 :
+    forall rel rs m dst,
+      (sem_rel rel rs m) ->
+      (sem_rel (kill_reg (ctx:=ctx) dst rel) rs m).
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_reg.
+    intros until dst.
+    intros REL i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    destruct eq as [lhs sop args]; simpl.
+    specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
+    specialize ctx_kill_reg_has_arg with (lhs := lhs) (sop := sop) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+  Qed.
+    
+  Lemma pick_source_sound :
+    forall (l : list reg),
+      match pick_source l with
+      | Some x => In x l
+      | None => True
+      end.
+  Proof.
+    unfold pick_source.
+    destruct l; simpl; trivial.
+    left; trivial.
+  Qed.
+    
+  Hint Resolve pick_source_sound : cse3.
+
+  Theorem forward_move_sound :
+    forall rel rs m x,
+      (sem_rel rel rs m) ->
+      rs # (forward_move (ctx := ctx) rel x) = rs # x.
+  Proof.
+    unfold sem_rel, forward_move.
+    intros until x.
+    intro REL.
+    pose proof (pick_source_sound (PSet.elements (PSet.inter rel (eq_moves ctx x)))) as ELEMENT.
+    destruct (pick_source (PSet.elements (PSet.inter rel (eq_moves ctx x)))).
+    2: reflexivity.
+    destruct (eq_catalog ctx r) as [eq | ] eqn:CATALOG.
+    2: reflexivity.
+    specialize REL with (i := r) (eq0 := eq).
+    destruct (is_smove (eq_op eq)) as [MOVE | ].
+    2: reflexivity.
+    destruct (peq x (eq_lhs eq)).
+    2: reflexivity.
+    simpl.
+    subst x.
+    rewrite PSet.elements_spec in ELEMENT.
+    rewrite PSet.ginter in ELEMENT.
+    rewrite andb_true_iff in ELEMENT.
+    unfold sem_eq in REL.
+    rewrite MOVE in REL.
+    simpl in REL.
+    destruct (eq_args eq) as [ | h t].
+    reflexivity.
+    destruct t.
+    2: reflexivity.
+    simpl in REL.
+    intuition congruence.
+  Qed.
+
+  Hint Resolve forward_move_sound : cse3.
+
+  Theorem forward_move_l_sound :
+    forall rel rs m l,
+      (sem_rel rel rs m) ->
+      rs ## (forward_move_l (ctx := ctx) rel l) = rs ## l.
+  Proof.
+    induction l; simpl; intros; trivial.
+    erewrite forward_move_sound by eassumption.
+    intuition congruence.
+  Qed.
+  
+  Hint Resolve forward_move_l_sound : cse3.
+
+  Theorem kill_mem_sound :
+    forall rel rs m m',
+      (sem_rel rel rs m) ->
+      (sem_rel (kill_mem (ctx:=ctx) rel) rs m').
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_mem.
+    intros until m'.
+    intros REL i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    intros SUBTRACT CATALOG.
+    rewrite PSet.gsubtract in SUBTRACT.
+    rewrite andb_true_iff in SUBTRACT.
+    intuition.
+    destruct (eq_op eq) as [op | chunk addr] eqn:OP.
+    - specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
+      unfold eq_depends_on_mem in ctx_kill_mem_has_depends_on_mem.
+      rewrite OP in ctx_kill_mem_has_depends_on_mem.
+      rewrite (op_depends_on_memory_correct genv sp op) with (m2 := m).
+      assumption.
+      destruct (op_depends_on_memory op) in *; trivial.
+      rewrite ctx_kill_mem_has_depends_on_mem in H0; trivial.
+      discriminate H0.
+    - specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
+      destruct eq as [lhs op args]; simpl in *.
+      rewrite OP in ctx_kill_mem_has_depends_on_mem.
+      rewrite negb_true_iff in H0.
+      rewrite OP in CATALOG.
+      intuition.
+      congruence.
+  Qed.
+
+  Hint Resolve kill_mem_sound : cse3.
+
+  Theorem eq_find_sound:
+    forall no eq id,
+      eq_find (ctx := ctx) no eq = Some id ->
+      eq_catalog ctx id = Some eq.
+  Proof.
+    unfold eq_find.
+    intros.
+    destruct (eq_find_oracle ctx no eq) as [ id' | ].
+    2: discriminate.
+    destruct (eq_catalog ctx id') as [eq' |] eqn:CATALOG.
+    2: discriminate.
+    destruct (eq_dec_equation eq eq').
+    2: discriminate.
+    congruence.
+  Qed.
+
+  Hint Resolve eq_find_sound : cse3.
+
+  Theorem rhs_find_sound:
+    forall no sop args rel src rs m,
+      sem_rel rel rs m ->
+      rhs_find (ctx := ctx) no sop args rel = Some src ->
+      sem_rhs sop args rs m (rs # src).
+  Proof.
+    unfold rhs_find, sem_rel, sem_eq.
+    intros until m.
+    intros REL FIND.
+    pose proof (pick_source_sound (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel))) as SOURCE.
+    destruct (pick_source (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel))) as [ src' | ].
+    2: discriminate.
+    rewrite PSet.elements_spec in SOURCE.
+    rewrite PSet.ginter in SOURCE.
+    rewrite andb_true_iff in SOURCE.
+    destruct (eq_catalog ctx src') as [eq | ] eqn:CATALOG.
+    2: discriminate.
+    specialize REL with (i := src') (eq0 := eq).
+    destruct (eq_dec_sym_op sop (eq_op eq)).
+    2: discriminate.
+    destruct (eq_dec_args args (eq_args eq)).
+    2: discriminate.
+    simpl in FIND.
+    intuition congruence.
+  Qed.
+
+  Hint Resolve rhs_find_sound : cse3.
+  
+  Theorem forward_move_rhs_sound :
+    forall sop args rel rs m v,
+      (sem_rel rel rs m) ->
+      (sem_rhs sop args rs m v) ->
+      (sem_rhs sop (forward_move_l (ctx := ctx) rel args) rs m v).
+  Proof.
+    intros until v.
+    intros REL RHS.
+    destruct sop; simpl in *.
+    all: erewrite forward_move_l_sound by eassumption; assumption.
+  Qed.
+
+  Hint Resolve forward_move_rhs_sound : cse3.
+
+  Lemma arg_not_replaced:
+    forall (rs : regset) dst v args,
+      ~ In dst args ->
+      (rs # dst <- v) ## args = rs ## args.
+  Proof.
+    induction args; simpl; trivial.
+    intuition.
+    f_equal; trivial.
+    apply Regmap.gso; congruence.
+  Qed.
+
+  Lemma sem_rhs_depends_on_args_only:
+    forall sop args rs dst m v,
+      sem_rhs sop args rs m v ->
+      ~ In dst args ->
+      sem_rhs sop args (rs # dst <- v) m v.
+  Proof.
+    unfold sem_rhs.
+    intros.
+    rewrite arg_not_replaced by assumption.
+    assumption.
+  Qed.
+  
+  Lemma replace_sound:
+    forall no eqno dst sop args rel rs m v,
+    sem_rel rel rs m ->
+    sem_rhs sop args rs m  v ->
+    ~ In dst args ->
+    eq_find (ctx := ctx) no
+            {| eq_lhs := dst;
+               eq_op  := sop;
+               eq_args:= args |} = Some eqno ->
+    sem_rel (PSet.add eqno (kill_reg (ctx := ctx) dst rel)) (rs # dst <- v) m.
+  Proof.
+    intros until v.
+    intros REL RHS NOTIN FIND i eq CONTAINS CATALOG.
+    destruct (peq i eqno).
+    - subst i.
+      rewrite eq_find_sound with (no := no) (eq0 := {| eq_lhs := dst; eq_op := sop; eq_args := args |}) in CATALOG by exact FIND.
+      clear FIND.
+      inv CATALOG.
+      unfold sem_eq.
+      simpl in *.
+      rewrite Regmap.gss.
+      apply sem_rhs_depends_on_args_only; auto.
+    - rewrite PSet.gaddo in CONTAINS by congruence.
+      eapply kill_reg_sound; eauto.
+  Qed.
+
+  Lemma sem_rhs_det:
+    forall {sop} {args} {rs} {m} {v} {v'},
+      sem_rhs sop args rs m v ->
+      sem_rhs sop args rs m v' ->
+      v = v'.
+  Proof.
+    intros until v'. intro SEMv.
+    destruct sop; simpl in *.
+    - destruct eval_operation.
+      congruence.
+      contradiction.
+    - destruct eval_addressing.
+      + destruct Mem.loadv; congruence.
+      + congruence.
+  Qed.
+
+  Theorem oper2_sound:
+    forall no dst sop args rel rs m v,
+      sem_rel rel rs m ->
+      not (In dst args) ->
+      sem_rhs sop args rs m v ->
+      sem_rel (oper2 (ctx := ctx) no dst sop args rel) (rs # dst <- v) m.
+  Proof.
+    unfold oper2.
+    intros until v.
+    intros REL NOTIN RHS.    
+    pose proof (eq_find_sound no {| eq_lhs := dst; eq_op := sop; eq_args := args |}) as EQ_FIND_SOUND.
+    destruct eq_find.
+    2: auto with cse3; fail.
+    specialize EQ_FIND_SOUND with (id := e).
+    intuition.
+    intros i eq CONTAINS.
+    destruct (peq i e).
+    { subst i.
+      rewrite H.
+      clear H.
+      intro Z.
+      inv Z.
+      unfold sem_eq.
+      simpl.
+      rewrite Regmap.gss.
+      apply sem_rhs_depends_on_args_only; auto.
+    }
+    rewrite PSet.gaddo in CONTAINS by congruence.
+    apply (kill_reg_sound rel rs m dst v REL i eq); auto.
+  Qed.
+
+  Hint Resolve oper2_sound : cse3.
+  
+  Theorem oper1_sound:
+    forall no dst sop args rel rs m v,
+      sem_rel rel rs m ->
+      sem_rhs sop args rs m v ->
+      sem_rel (oper1 (ctx := ctx) no dst sop args rel) (rs # dst <- v) m.
+  Proof.
+    intros.
+    unfold oper1.
+    destruct in_dec; auto with cse3.
+  Qed.
+
+  Hint Resolve oper1_sound : cse3.
+
+  Lemma move_sound :
+    forall no : node,
+    forall rel : RELATION.t,
+    forall src dst : reg,
+    forall rs m,
+      sem_rel rel rs m ->
+      sem_rel (move (ctx:=ctx) no src dst rel) (rs # dst <- (rs # src)) m.
+  Proof.
+    unfold move.
+    intros until m.
+    intro REL.
+    pose proof (eq_find_sound no  {| eq_lhs := dst; eq_op := SOp Omove; eq_args := src :: nil |}) as EQ_FIND_SOUND.
+    destruct eq_find.
+    - intros i eq CONTAINS.
+      destruct (peq i e).
+      + subst i.
+        rewrite (EQ_FIND_SOUND e) by trivial.
+        intro Z.
+        inv Z.
+        unfold sem_eq.
+        simpl.
+        destruct (peq src dst).
+        * subst dst.
+          reflexivity.
+        * rewrite Regmap.gss.
+          rewrite Regmap.gso by congruence.
+          reflexivity.
+      + intros.
+        rewrite PSet.gaddo in CONTAINS by congruence.
+        apply (kill_reg_sound rel rs m dst (rs # src) REL i); auto.
+    - apply kill_reg_sound; auto.
+  Qed.
+
+  Hint Resolve move_sound : cse3.
+  
+  Theorem oper_sound:
+    forall no dst sop args rel rs m v,
+      sem_rel rel rs m ->
+      sem_rhs sop args rs m v ->
+      sem_rel (oper (ctx := ctx) no dst sop args rel) (rs # dst <- v) m.
+  Proof.
+    intros until v.
+    intros REL RHS.
+    unfold oper.
+    destruct (is_smove sop).
+    - subst.
+      simpl in RHS.
+      destruct args. contradiction.
+      destruct args. 2: contradiction.
+      cbn in *.
+      subst.
+      rewrite <- (forward_move_sound rel rs m r) by auto.
+      apply move_sound; auto.
+    - destruct rhs_find as [src |] eqn:RHS_FIND.
+      + (* FIXME apply sem_rel_glb; split. *)
+        * pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND.
+          eapply forward_move_rhs_sound in RHS.
+          2: eassumption.
+          rewrite <- (sem_rhs_det SOUND RHS).
+          apply move_sound; auto.
+        (* FIXME * apply oper1_sound; auto. *)
+      + apply oper1_sound; auto.
+        apply forward_move_rhs_sound; auto.
+  Qed.
+
+  Hint Resolve oper_sound : cse3.
+
+
+  Theorem clever_kill_store_sound:
+    forall chunk addr args a src rel rs m m',
+      sem_rel rel rs m ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs # src) = Some m' ->
+      sem_rel (clever_kill_store (ctx:=ctx) chunk addr args src rel) rs m'.
+  Proof.
+    unfold clever_kill_store.
+    intros until m'. intros REL ADDR STORE i eq CONTAINS CATALOG.
+    autorewrite with pset in CONTAINS.
+    destruct (PSet.contains rel i) eqn:RELi; simpl in CONTAINS.
+    2: discriminate.
+    rewrite CATALOG in CONTAINS.
+    unfold sem_rel in REL.
+    specialize REL with (i := i) (eq0 := eq).
+    destruct eq; simpl in *.
+    unfold sem_eq in *.
+    simpl in *.
+    destruct eq_op as [op' | chunk' addr']; simpl.
+    - destruct (op_depends_on_memory op') eqn:DEPENDS.
+      + erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto.
+        discriminate.
+      + rewrite op_depends_on_memory_correct with (m2:=m); trivial.
+        apply REL; auto.
+    - simpl in REL.
+      erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto.
+      simpl in CONTAINS.
+      rewrite negb_true_iff in CONTAINS.
+      destruct (eval_addressing genv sp addr' rs ## eq_args) as [a'|] eqn:ADDR'.
+      + erewrite may_overlap_sound with (chunk:=chunk) (addr:=addr) (args:=args) (chunk':=chunk') (addr':=addr') (args':=eq_args); try eassumption.
+        apply REL; auto.
+      + apply REL; auto.
+  Qed.
+
+  Hint Resolve clever_kill_store_sound : cse3.
+
+  Theorem store2_sound:
+    forall chunk addr args a src rel rs m m',
+      sem_rel rel rs m ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs # src) = Some m' ->
+      sem_rel (store2 (ctx:=ctx) chunk addr args src rel) rs m'.
+  Proof.
+    unfold store2.
+    intros.
+    destruct (Compopts.optim_CSE3_alias_analysis tt); eauto with cse3.
+  Qed.
+  
+  Hint Resolve store2_sound : cse3.
+
+  Theorem store1_sound:
+    forall no chunk addr args a src rel tenv rs m m',
+      sem_rel rel rs m ->
+      wt_regset tenv rs ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs#src) = Some m' ->
+      sem_rel (store1 (ctx:=ctx) no chunk addr args src (tenv src) rel) rs m'.
+  Proof.
+    unfold store1.
+    intros until m'.
+    intros REL WT ADDR STORE.
+    assert (sem_rel (store2 (ctx:=ctx) chunk addr args src rel) rs m') as REL' by eauto with cse3.
+    destruct loadv_storev_compatible_type eqn:COMPATIBLE.
+    2: auto; fail.
+    destruct eq_find as [eq_id | ] eqn:FIND.
+    2: auto; fail.
+    intros i eq CONTAINS CATALOG.
+    destruct (peq i eq_id).
+    { subst i.
+      rewrite eq_find_sound with (no:=no) (eq0:={| eq_lhs := src; eq_op := SLoad chunk addr; eq_args := args |}) in CATALOG; trivial.
+      inv CATALOG.
+      unfold sem_eq.
+      simpl.
+      rewrite ADDR.
+      rewrite loadv_storev_really_same with (m1:=m) (v:=rs#src) (ty:=(tenv src)); trivial.
+    }
+    unfold sem_rel in REL'.
+    rewrite PSet.gaddo in CONTAINS by congruence.
+    eauto.
+  Qed.
+  
+  Hint Resolve store1_sound : cse3.
+    
+  Theorem store_sound:
+    forall no chunk addr args a src rel tenv rs m m',
+      sem_rel rel rs m ->
+      wt_regset tenv rs ->
+      eval_addressing genv sp addr (rs ## args) = Some a ->
+      Mem.storev chunk m a (rs#src) = Some m' ->
+      sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'.
+  Proof.
+    unfold store.
+    intros until m'.
+    intros REL WT ADDR STORE.
+    rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+    rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
+    apply store1_sound with (a := a) (m := m); trivial.
+    (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
+    assumption. *)
+  Qed.
+
+  Hint Resolve store_sound : cse3.
+
+  Lemma kill_builtin_res_sound:
+    forall res (m : mem) (rs : regset) vres (rel : RELATION.t)
+           (REL : sem_rel rel rs m),
+      (sem_rel (kill_builtin_res (ctx:=ctx) res rel)
+               (regmap_setres res vres rs) m).
+  Proof.
+    destruct res; simpl; intros; trivial.
+    apply kill_reg_sound; trivial.
+  Qed.
+
+  Hint Resolve kill_builtin_res_sound : cse3.
+
+  Lemma top_sound:
+    forall rs m, (sem_rel RELATION.top rs m).
+  Proof.
+    unfold RELATION.top, sem_rel.
+    intros.
+    rewrite PSet.gempty in H.
+    discriminate.
+  Qed.
+
+  Hint Resolve top_sound : cse3.
+
+  Lemma external_call_sound:
+    forall ge ef (rel : RELATION.t) (m m' : mem) (rs : regset) vargs t vres
+           (REL : sem_rel rel rs m)
+           (CALL : external_call ef ge vargs m t vres m'),
+      sem_rel (apply_external_call (ctx:=ctx) ef rel) rs m'.
+  Proof.
+    destruct ef; intros; simpl in *.
+    all: eauto using kill_mem_sound.
+    all: unfold builtin_or_external_sem in *.
+    1, 2, 3, 5, 6: destruct (Compopts.optim_CSE3_across_calls tt).
+    all: eauto using kill_mem_sound, top_sound.
+    1, 2, 3: destruct (Builtins.lookup_builtin_function name sg).
+    all: eauto using kill_mem_sound, top_sound.
+    all: inv CALL; eauto using kill_mem_sound.
+  Qed.
+
+  Hint Resolve external_call_sound : cse3.
+
+  Section INDUCTIVENESS.
+    Variable fn : RTL.function.
+    Variable tenv : typing_env.
+    Variable inv: invariants.
+    
+    Definition is_inductive_step (pc pc' : node) :=
+      forall instr,
+        PTree.get pc (fn_code fn) = Some instr ->
+        In pc' (successors_instr instr) ->
+        RB.ge (PMap.get pc' inv)
+              (apply_instr' (ctx:=ctx) tenv (fn_code fn) pc (PMap.get pc inv)).
+
+    Definition is_inductive_allstep :=
+      forall pc pc', is_inductive_step pc pc'.
+
+    Lemma checked_is_inductive_allstep:
+      (check_inductiveness (ctx:=ctx) fn tenv inv) = true ->
+      is_inductive_allstep.
+    Proof.
+      unfold check_inductiveness, is_inductive_allstep, is_inductive_step.
+      rewrite andb_true_iff.
+      rewrite PTree_Properties.for_all_correct.
+      intros (ENTRYPOINT & ALL).
+      intros until instr.
+      intros INSTR IN_SUCC.
+      specialize ALL with (x := pc) (a := instr).
+      pose proof (ALL INSTR) as AT_PC.
+      destruct (inv # pc).
+      2: apply RB.ge_bot.
+      rewrite List.forallb_forall in AT_PC.
+      unfold apply_instr'.
+      rewrite INSTR.
+      apply relb_leb_correct.
+      auto.
+    Qed.
+    
+    Lemma checked_is_inductive_entry:
+      (check_inductiveness (ctx:=ctx) fn tenv inv) = true ->
+      inv # (fn_entrypoint fn) = Some RELATION.top.
+    Proof.
+      unfold check_inductiveness, is_inductive_allstep, is_inductive_step.
+      rewrite andb_true_iff.
+      intros (ENTRYPOINT & ALL).
+      apply RB.beq_correct in ENTRYPOINT.
+      unfold RB.eq, RELATION.eq in ENTRYPOINT.
+      destruct (inv # (fn_entrypoint fn)) as [rel | ].
+      2: contradiction.
+      f_equal.
+      symmetry.
+      assumption.
+    Qed.
+  End INDUCTIVENESS.
+
+  Hint Resolve checked_is_inductive_allstep checked_is_inductive_entry : cse3.
+End SOUNDNESS.
diff --git a/backend/CSE3proof.v b/backend/CSE3proof.v
new file mode 100644
index 00000000..ccbfd198
--- /dev/null
+++ b/backend/CSE3proof.v
@@ -0,0 +1,880 @@
+(*
+Replace available expressions by the register containing their value.
+
+Proofs.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE3 CSE3analysis CSE3analysisproof.
+Require Import RTLtyping.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+  intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Section SOUNDNESS.
+Variable sp : val.
+Variable ctx : eq_context.
+
+Definition sem_rel_b (rel : RB.t) (rs : regset) (m : mem) :=
+  match rel with
+  | None => False
+  | Some rel => sem_rel (ctx:=ctx) (genv:=ge) (sp:=sp) rel rs m
+  end.
+
+Lemma forward_move_b_sound :
+  forall rel rs m x,
+    (sem_rel_b rel rs m) ->
+    rs # (forward_move_b (ctx := ctx) rel x) = rs # x.
+Proof.
+    destruct rel as [rel | ]; simpl; intros.
+    2: contradiction.
+    eapply forward_move_sound; eauto.
+  Qed.
+
+  Lemma forward_move_l_b_sound :
+    forall rel rs m x,
+      (sem_rel_b rel rs m) ->
+      rs ## (forward_move_l_b (ctx := ctx) rel x) = rs ## x.
+  Proof.
+    destruct rel as [rel | ]; simpl; intros.
+    2: contradiction.
+    eapply forward_move_l_sound; eauto.
+  Qed.
+
+  Definition fmap_sem (fmap : PMap.t RB.t) (pc : node) (rs : regset) (m : mem) :=
+    sem_rel_b (PMap.get pc fmap) rs m.
+  
+  Lemma subst_arg_ok:
+    forall invariants,
+    forall pc,
+    forall rs,
+    forall m,
+    forall arg,
+    forall (SEM : fmap_sem invariants pc rs m),
+      rs # (subst_arg (ctx:=ctx) invariants pc arg) = rs # arg.
+  Proof.
+    intros.
+    apply forward_move_b_sound with (m:=m).
+    assumption.
+  Qed.
+  
+  Lemma subst_args_ok:
+    forall invariants,
+    forall pc,
+    forall rs,
+    forall m,
+    forall args,
+    forall (SEM : fmap_sem invariants pc rs m),
+      rs ## (subst_args (ctx:=ctx) invariants pc args) = rs ## args.
+  Proof.
+    intros.
+    apply forward_move_l_b_sound with (m:=m).
+    assumption.
+  Qed.
+End SOUNDNESS.
+
+Lemma functions_translated:
+  forall (v: val) (f: RTL.fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf,
+    Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  apply (Genv.find_funct_transf_partial TRANSF).
+Qed.
+
+Lemma function_ptr_translated:
+  forall (b: block) (f: RTL.fundef),
+  Genv.find_funct_ptr ge b = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  apply (Genv.find_funct_ptr_transf_partial TRANSF).
+Qed.
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof.
+  apply (Genv.find_symbol_match TRANSF).
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  apply (Genv.senv_match TRANSF).
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  destruct f; simpl; intros.
+  - monadInv H.
+    monadInv EQ.
+    destruct preanalysis as [invariants hints].
+    destruct check_inductiveness.
+    2: discriminate.
+    inv EQ1.
+    reflexivity.
+  - monadInv H.
+    reflexivity.
+Qed.
+
+Lemma stacksize_preserved:
+  forall f tf, transf_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma params_preserved:
+  forall f tf, transf_function f = OK tf -> fn_params tf = fn_params f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma entrypoint_preserved:
+  forall f tf, transf_function f = OK tf -> fn_entrypoint tf = fn_entrypoint f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma sig_preserved2:
+  forall f tf, transf_function f = OK tf -> fn_sig tf = fn_sig f.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  reflexivity.
+Qed.
+
+Lemma transf_function_is_typable:
+  forall f tf, transf_function f = OK tf ->
+               exists tenv, type_function f = OK tenv.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  exists x.
+  assumption.
+Qed.
+Lemma transf_function_invariants_inductive:
+  forall f tf tenv, transf_function f = OK tf ->
+    type_function f = OK tenv ->
+    check_inductiveness (ctx:=(context_from_hints (snd (preanalysis tenv f))))
+                        f tenv (fst (preanalysis tenv f)) = true.
+Proof.
+  unfold transf_function; destruct f; simpl; intros.
+  monadInv H.
+  replace x with tenv in * by congruence.
+  clear x.
+  destruct preanalysis as [invariants hints].
+  destruct check_inductiveness; trivial; discriminate.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+    find_function ge ros rs = Some fd ->
+    exists tfd,
+      find_function tge ros rs = Some tfd /\ transf_fundef fd = OK tfd.
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Inductive match_stackframes: list stackframe -> list stackframe -> signature -> Prop :=
+  | match_stackframes_nil: forall sg,
+      sg.(sig_res) = Tint ->
+      match_stackframes nil nil sg
+  | match_stackframes_cons:
+      forall res f sp pc rs s tf ts sg tenv
+        (STACKS: match_stackframes s ts (fn_sig tf))
+        (FUN: transf_function f = OK tf)
+        (WTF: type_function f = OK tenv)
+        (WTRS: wt_regset tenv rs)
+        (WTRES: tenv res = proj_sig_res sg)
+        (REL: forall m vres,
+            sem_rel_b sp (context_from_hints (snd (preanalysis tenv f)))
+                      ((fst (preanalysis tenv f))#pc) (rs#res <- vres) m),
+
+      match_stackframes
+        (Stackframe res f sp pc rs :: s)
+        (Stackframe res tf sp pc rs :: ts)
+        sg.
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s f sp pc rs m ts tf tenv
+        (STACKS: match_stackframes s ts (fn_sig tf))
+        (FUN: transf_function f = OK tf)
+        (WTF: type_function f = OK tenv)
+        (WTRS: wt_regset tenv rs)
+        (REL: sem_rel_b sp (context_from_hints (snd (preanalysis tenv f))) ((fst (preanalysis tenv f))#pc) rs m),
+      match_states (State s f sp pc rs m)
+                   (State ts tf sp pc rs m)
+  | match_states_call:
+      forall s f args m ts tf
+        (STACKS: match_stackframes s ts (funsig tf))
+        (FUN: transf_fundef f = OK tf)
+        (WTARGS: Val.has_type_list args (sig_args (funsig tf))),
+      match_states (Callstate s f args m)
+                   (Callstate ts tf args m)
+  | match_states_return:
+      forall s res m ts sg
+        (STACKS: match_stackframes s ts sg)
+        (WTRES: Val.has_type res (proj_sig_res sg)),
+      match_states (Returnstate s res m)
+                   (Returnstate ts res m).
+
+Lemma match_stackframes_change_sig:
+  forall s ts sg sg',
+  match_stackframes s ts sg ->
+  sg'.(sig_res) = sg.(sig_res) ->
+  match_stackframes s ts sg'.
+Proof.
+  intros. inv H.
+  constructor. congruence.
+  econstructor; eauto.
+  unfold proj_sig_res in *. rewrite H0; auto.
+Qed.
+
+Lemma transf_function_at:
+  forall f tf pc tenv instr
+    (TF : transf_function f = OK tf)
+    (TYPE : type_function f = OK tenv)
+    (PC : (fn_code f) ! pc = Some instr),
+    (fn_code tf) ! pc = Some (transf_instr
+       (ctx := (context_from_hints (snd (preanalysis tenv f))))
+       (fst (preanalysis tenv f))
+       pc instr).
+Proof.
+  intros.
+  unfold transf_function in TF.
+  monadInv TF.
+  replace x with tenv in * by congruence.
+  clear EQ.
+  destruct (preanalysis tenv f) as [invariants hints].
+  destruct check_inductiveness.
+  2: discriminate.
+  inv EQ0.
+  simpl.
+  rewrite PTree.gmap.
+  rewrite PC.
+  reflexivity.
+Qed.
+
+Ltac TR_AT := erewrite transf_function_at by eauto.
+
+Hint Resolve wt_instrs type_function_correct : wt.
+
+Lemma wt_undef :
+  forall tenv rs dst,
+    wt_regset tenv rs ->
+    wt_regset tenv rs # dst <- Vundef.
+Proof.
+  unfold wt_regset.
+  intros.
+  destruct (peq r dst).
+  { subst dst.
+    rewrite Regmap.gss.
+    constructor.
+  }
+  rewrite Regmap.gso by congruence.
+  auto.
+Qed.
+
+Lemma rel_ge:
+  forall inv inv'
+         (GE : RELATION.ge inv' inv)
+         ctx sp rs m
+         (REL: sem_rel (genv:=ge) (sp:=sp) (ctx:=ctx) inv rs m),
+  sem_rel (genv:=ge) (sp:=sp) (ctx:=ctx) inv' rs m.
+Proof.
+  unfold sem_rel, RELATION.ge.
+  intros.
+  apply (REL i); trivial.
+  eapply HashedSet.PSet.is_subset_spec1; eassumption.
+Qed.
+
+Hint Resolve rel_ge : cse3.
+
+Lemma sem_rhs_sop :
+  forall sp op rs args m v,
+  eval_operation ge sp op rs ## args m = Some v ->
+  sem_rhs (genv:=ge) (sp:=sp) (SOp op) args rs m v.
+Proof.
+  intros. simpl.
+  rewrite H.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sop : cse3.
+
+Lemma sem_rhs_sload :
+  forall sp chunk addr rs args m a v,
+  eval_addressing ge sp addr rs ## args = Some a ->
+  Mem.loadv chunk m a = Some v ->
+  sem_rhs (genv:=ge) (sp:=sp) (SLoad chunk addr) args rs m v.
+Proof.
+  intros. simpl.
+  rewrite H. rewrite H0.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sload : cse3.
+
+Lemma sem_rhs_sload_notrap1 :
+  forall sp chunk addr rs args m,
+  eval_addressing ge sp addr rs ## args = None ->
+  sem_rhs (genv:=ge) (sp:=sp) (SLoad chunk addr) args rs m Vundef.
+Proof.
+  intros. simpl.
+  rewrite H.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sload_notrap1 : cse3.
+
+Lemma sem_rhs_sload_notrap2 :
+  forall sp chunk addr rs args m a,
+  eval_addressing ge sp addr rs ## args = Some a ->
+  Mem.loadv chunk m a = None ->
+  sem_rhs (genv:=ge) (sp:=sp) (SLoad chunk addr) args rs m Vundef.
+Proof.
+  intros. simpl.
+  rewrite H. rewrite H0.
+  reflexivity.
+Qed.
+
+Hint Resolve sem_rhs_sload_notrap2 : cse3.
+
+Lemma sem_rel_top:
+  forall ctx sp rs m, sem_rel (genv:=ge) (sp:=sp) (ctx:=ctx) RELATION.top rs m.
+Proof.
+  unfold sem_rel, RELATION.top.
+  intros.
+  rewrite HashedSet.PSet.gempty in *.
+  discriminate.
+Qed.
+
+Hint Resolve sem_rel_top : cse3.
+
+Lemma sem_rel_b_top:
+  forall ctx sp rs m, sem_rel_b sp ctx (Some RELATION.top) rs m.
+Proof.
+  intros. simpl.
+  apply sem_rel_top.
+Qed.
+
+Hint Resolve sem_rel_b_top : cse3.
+
+Ltac IND_STEP :=
+        match goal with
+        REW: (fn_code ?fn) ! ?mpc = Some ?minstr
+      |-
+        sem_rel_b ?sp (context_from_hints (snd (preanalysis ?tenv ?fn))) ((fst (preanalysis ?tenv ?fn)) # ?mpc') ?rs ?m =>
+        assert (is_inductive_allstep (ctx:= (context_from_hints (snd (preanalysis tenv fn)))) fn tenv (fst  (preanalysis tenv fn))) as IND by
+        (apply checked_is_inductive_allstep;
+          eapply transf_function_invariants_inductive; eassumption);
+        unfold is_inductive_allstep, is_inductive_step, apply_instr' in IND;
+        specialize IND with (pc:=mpc) (pc':=mpc') (instr:=minstr);
+        simpl in IND;
+        rewrite REW in IND;
+        simpl in IND;
+        destruct ((fst (preanalysis tenv fn)) # mpc') as [zinv' | ];
+        destruct ((fst (preanalysis tenv fn)) # mpc) as [zinv | ];
+        simpl in *;
+        intuition;
+        eapply rel_ge; eauto with cse3 (* ; for printing
+        idtac mpc mpc' fn minstr *)
+      end.
+
+Lemma if_same : forall {T : Type} (b : bool) (x : T),
+    (if b then x else x) = x.
+Proof.
+  destruct b; trivial.
+Qed.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 -> 
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS.
+  - (* Inop *)
+    exists (State ts tf sp pc' rs m). split.
+    + apply exec_Inop; auto.
+      TR_AT. reflexivity.
+    + econstructor; eauto.
+      IND_STEP.
+  - (* Iop *)
+    exists (State ts tf sp pc' (rs # res <- v) m). split.
+    + pose (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iop op args res pc')) as instr'.
+      assert (instr' = (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iop op args res pc'))) by reflexivity.
+      unfold transf_instr, find_op_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SOp op)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct (if is_trivial_op op
+               then None
+               else
+                rhs_find pc (SOp op)
+                  (subst_args (fst (preanalysis tenv f)) pc args) t) eqn:FIND.
+        ** destruct (is_trivial_op op). discriminate.
+           apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND.
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iop with (op := op) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)).
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_operation_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iop with (op := op) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)).
+        TR_AT.
+        { subst instr'.
+          rewrite if_same in H1.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_operation_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * eapply wt_exec_Iop with (f:=f); try eassumption.
+        eauto with wt.
+      * IND_STEP.
+        apply oper_sound; eauto with cse3.
+
+  - (* Iload *)
+    exists (State ts tf sp pc' (rs # dst <- v) m). split.
+    + pose (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload trap chunk addr args dst pc')) as instr'.
+      assert (instr' = (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload trap chunk addr args dst pc'))) by reflexivity.
+      unfold transf_instr, find_load_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SLoad chunk addr)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct rhs_find eqn:FIND.
+        ** apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND. (* ADDR *)
+           rewrite H1 in FIND_SOUND. (* LOAD *)
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iload with (trap := trap) (chunk := chunk) (a := a) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iload with (chunk := chunk) (trap := trap) (addr := addr) (a := a) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * eapply wt_exec_Iload with (f:=f); try eassumption.
+        eauto with wt.
+      * IND_STEP.
+        apply oper_sound; eauto with cse3.
+        
+  - (* Iload notrap1 *)
+    exists (State ts tf sp pc' (rs # dst <- Vundef) m). split.
+    + pose (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc')) as instr'.
+      assert (instr' = (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc'))) by reflexivity.
+      unfold transf_instr, find_load_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SLoad chunk addr)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct rhs_find eqn:FIND.
+        ** apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND. (* ADDR *)
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * apply wt_undef; assumption.
+      * IND_STEP.
+        apply oper_sound; eauto with cse3.
+        
+  - (* Iload notrap2 *)
+    exists (State ts tf sp pc' (rs # dst <- Vundef) m). split.
+    + pose (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc')) as instr'.
+      assert (instr' = (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload NOTRAP chunk addr args dst pc'))) by reflexivity.
+      unfold transf_instr, find_load_in_fmap in instr'.
+      destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
+      pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SLoad chunk addr)
+                (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
+      * destruct rhs_find eqn:FIND.
+        ** apply exec_Iop with (op := Omove) (args := r :: nil).
+           TR_AT.
+           subst instr'.
+           congruence.
+           simpl.
+           specialize FIND_SOUND with (src := r) (rs := rs) (m := m).
+           simpl in FIND_SOUND.
+           rewrite subst_args_ok with (sp:=sp) (m:=m) in FIND_SOUND.
+           rewrite H0 in FIND_SOUND. (* ADDR *)
+           rewrite H1 in FIND_SOUND. (* LOAD *)
+           rewrite FIND_SOUND; auto.
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+        ** apply exec_Iload_notrap2 with (chunk := chunk) (a := a) (addr := addr) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+      * apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (a := a) (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); trivial.
+           TR_AT.
+           { subst instr'.
+           congruence. }
+           rewrite subst_args_ok with (sp:=sp) (m:=m).
+           {
+           rewrite eval_addressing_preserved with (ge1:=ge) by exact symbols_preserved.
+           assumption.
+           }
+           unfold fmap_sem.
+           change ((fst (preanalysis tenv f)) # pc)
+                  with (@PMap.get (option RELATION.t) pc (@fst invariants analysis_hints (preanalysis tenv f))).
+           rewrite INV_PC.
+           assumption.
+    + econstructor; eauto.
+      * apply wt_undef; assumption.
+      * IND_STEP.
+        apply oper_sound; eauto with cse3.
+
+  - (* Istore *)
+    exists (State ts tf sp pc' rs m'). split.
+    + eapply exec_Istore with (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args))
+      (src := (subst_arg (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc src)) ; try eassumption.
+      * TR_AT. reflexivity.
+      * rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+        rewrite eval_addressing_preserved with (ge1 := ge) by exact symbols_preserved.
+        eassumption.
+      * rewrite subst_arg_ok with (sp:=sp) (m:=m) by trivial.
+        assumption.
+    + econstructor; eauto.
+      IND_STEP.
+      apply store_sound with (a0:=a) (m0:=m); eauto with cse3.
+      
+  - (* Icall *)
+    destruct (find_function_translated ros rs fd H0) as [tfd [HTFD1 HTFD2]].
+    econstructor. split.
+    + eapply exec_Icall; try eassumption.
+      * TR_AT. reflexivity.
+      * apply sig_preserved; auto.
+    + rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+      assert (wt_instr f tenv (Icall (funsig fd) ros args res pc')) as WTcall by eauto with wt.
+      inv WTcall.
+      constructor; trivial.
+      * econstructor; eauto.
+        ** rewrite sig_preserved with (f:=fd); assumption.
+        ** intros.
+           IND_STEP.
+           apply kill_reg_sound; eauto with cse3.
+           eapply kill_mem_sound; eauto with cse3.
+      * rewrite sig_preserved with (f:=fd) by trivial.
+        rewrite <- H7.
+        apply wt_regset_list; auto.
+  - (* Itailcall *)
+    destruct (find_function_translated ros rs fd H0) as [tfd [HTFD1 HTFD2]].
+    econstructor. split.
+    + eapply exec_Itailcall; try eassumption.
+      * TR_AT. reflexivity.
+      * apply sig_preserved; auto.
+      * rewrite stacksize_preserved with (f:=f); eauto.
+    + rewrite subst_args_ok with (m:=m) (sp := (Vptr stk Ptrofs.zero)) by trivial.
+      assert (wt_instr f tenv (Itailcall (funsig fd) ros args)) as WTcall by eauto with wt.
+      inv WTcall.
+      constructor; trivial.
+      * rewrite sig_preserved with (f:=fd) by trivial.
+        inv STACKS.
+        ** econstructor; eauto.
+           rewrite H7.
+           rewrite <- sig_preserved2 with (tf:=tf) by trivial.
+           assumption.
+        ** econstructor; eauto.
+           unfold proj_sig_res in *.
+           rewrite H7.
+           rewrite WTRES.
+           rewrite sig_preserved2 with (f:=f) by trivial.
+           reflexivity.
+      * rewrite sig_preserved with (f:=fd) by trivial.
+        rewrite <- H6.
+        apply wt_regset_list; auto.
+  - (* Ibuiltin *)
+    econstructor. split.
+    + eapply exec_Ibuiltin; try eassumption.
+      * TR_AT. reflexivity.
+      * eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+      * eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + econstructor; eauto.
+      * eapply wt_exec_Ibuiltin with (f:=f); eauto with wt.
+      * IND_STEP.
+        apply kill_builtin_res_sound; eauto with cse3.
+        eapply external_call_sound; eauto with cse3.
+        
+  - (* Icond *)
+    econstructor. split.
+    + eapply exec_Icond with (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); try eassumption.
+      * TR_AT. reflexivity.
+      * rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+        eassumption.
+      * reflexivity.
+    + econstructor; eauto.
+      destruct b; IND_STEP.
+      
+  - (* Ijumptable *)
+    econstructor. split.
+    + eapply exec_Ijumptable with (arg := (subst_arg (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc arg)); try eassumption.
+      * TR_AT. reflexivity.
+      * rewrite subst_arg_ok with (sp:=sp) (m:=m) by trivial.
+        assumption.
+    + econstructor; eauto.
+      assert (In pc' tbl) as IN_LIST by (eapply list_nth_z_in; eassumption).
+      IND_STEP.
+
+  - (* Ireturn *)
+    destruct or as [arg | ].
+    -- econstructor. split.
+       + eapply exec_Ireturn with (or := Some (subst_arg (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc arg)).
+         * TR_AT. reflexivity.
+         * rewrite stacksize_preserved with (f:=f); eauto.
+       + simpl.
+         rewrite subst_arg_ok with (sp:=(Vptr stk Ptrofs.zero)) (m:=m) by trivial.
+         econstructor; eauto.
+         apply type_function_correct in WTF.
+         apply wt_instrs with (pc:=pc) (instr:=(Ireturn (Some arg))) in WTF.
+         2: assumption.
+         inv WTF.
+         rewrite sig_preserved2 with (f:=f) by assumption.
+         rewrite <- H3.
+         unfold wt_regset in WTRS.
+         apply WTRS.
+    -- econstructor. split.
+       + eapply exec_Ireturn; try eassumption.
+         * TR_AT; reflexivity.
+         * rewrite stacksize_preserved with (f:=f); eauto.
+       + econstructor; eauto.
+         simpl. trivial.
+  - (* Callstate internal *)
+    monadInv FUN.
+    rename x into tf.
+    destruct (transf_function_is_typable f tf EQ) as [tenv TENV].
+    econstructor; split.
+    + apply exec_function_internal.
+      rewrite stacksize_preserved with (f:=f); eauto.
+    + rewrite params_preserved with (tf:=tf) (f:=f) by assumption.
+      rewrite entrypoint_preserved with (tf:=tf) (f:=f) by assumption.
+      econstructor; eauto.
+      * apply type_function_correct in TENV.
+        inv TENV.
+        simpl in WTARGS.
+        rewrite sig_preserved2 with (f:=f) in WTARGS by assumption.
+        apply wt_init_regs.
+        rewrite <- wt_params in WTARGS.
+        assumption.
+      * rewrite @checked_is_inductive_entry with (tenv:=tenv) (ctx:=(context_from_hints (snd (preanalysis tenv f)))).
+        ** apply sem_rel_b_top.
+        ** apply transf_function_invariants_inductive with (tf:=tf); auto.
+           
+  - (* external *)
+    simpl in FUN.
+    inv FUN.
+    econstructor. split.
+    + eapply exec_function_external.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + econstructor; eauto.
+      eapply external_call_well_typed; eauto.
+  - (* return *)
+    inv STACKS.
+    econstructor. split.
+    + eapply exec_return.
+    + econstructor; eauto.
+      apply wt_regset_assign; trivial.
+      rewrite WTRES0.
+      exact WTRES.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto.
+  intros (tf & A & B).
+  exists (Callstate nil tf nil m0); split.
+  - econstructor; eauto.
+    + eapply (Genv.init_mem_match TRANSF); eauto.
+    + replace (prog_main tprog) with (prog_main prog).
+      rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main; eauto.
+    + rewrite <- H3. eapply sig_preserved; eauto.
+  - constructor; trivial.
+    + constructor. rewrite sig_preserved with (f:=f) by assumption.
+      rewrite H3. reflexivity.
+    + rewrite sig_preserved with (f:=f) by assumption.
+      rewrite H3. reflexivity.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> final_state S1 r -> final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  - apply senv_preserved.
+  - eexact transf_initial_states.
+  - eexact transf_final_states.
+  - intros. eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 5bbb7508..a7465cee 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -1318,6 +1318,7 @@ Proof.
   + apply CASE2; inv H1; auto.
   + apply CASE1.
   + apply CASE2; inv H1; auto.
+  + apply CASE2; inv H1; auto.
 * apply set_res_lessdef; auto.
 
 - (* Icond *)
diff --git a/backend/Cminor.v b/backend/Cminor.v
index 91a4c104..dcebbb86 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -77,6 +77,7 @@ Inductive unary_operation : Type :=
   | Osingleoflongu: unary_operation.       (**r unsigned long to float32 *)
 
 Inductive binary_operation : Type :=
+  | Oexpect: typ -> binary_operation       (**r first value, second is expected*)
   | Oadd: binary_operation                 (**r integer addition *)
   | Osub: binary_operation                 (**r integer subtraction *)
   | Omul: binary_operation                 (**r integer multiplication *)
@@ -301,6 +302,7 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
 Definition eval_binop
             (op: binary_operation) (arg1 arg2: val) (m: mem): option val :=
   match op with
+  | Oexpect ty => Some (Val.normalize arg1 ty)
   | Oadd => Some (Val.add arg1 arg2)
   | Osub => Some (Val.sub arg1 arg2)
   | Omul => Some (Val.mul arg1 arg2)
diff --git a/backend/CminorSel.v b/backend/CminorSel.v
index 96cb8ae6..26f47e23 100644
--- a/backend/CminorSel.v
+++ b/backend/CminorSel.v
@@ -50,7 +50,7 @@ with exprlist : Type :=
   | Econs: expr -> exprlist -> exprlist
 
 with condexpr : Type :=
-  | CEcond : condition -> exprlist -> condexpr
+  | CEcond : condition -> option bool -> exprlist -> condexpr
   | CEcondition : condexpr -> condexpr -> condexpr -> condexpr
   | CElet: expr -> condexpr -> condexpr.
 
@@ -207,10 +207,10 @@ with eval_exprlist: letenv -> exprlist -> list val -> Prop :=
       eval_exprlist le (Econs a1 al) (v1 :: vl)
 
 with eval_condexpr: letenv -> condexpr -> bool -> Prop :=
-  | eval_CEcond: forall le cond al vl vb,
+  | eval_CEcond: forall le cond expected al vl vb,
       eval_exprlist le al vl ->
       eval_condition cond vl m = Some vb ->
-      eval_condexpr le (CEcond cond al) vb
+      eval_condexpr le (CEcond cond expected al) vb
   | eval_CEcondition: forall le a b c va v,
       eval_condexpr le a va ->
       eval_condexpr le (if va then b else c) v ->
@@ -495,7 +495,7 @@ with lift_exprlist (p: nat) (a: exprlist) {struct a}: exprlist :=
 
 with lift_condexpr (p: nat) (a: condexpr) {struct a}: condexpr :=
   match a with
-  | CEcond c al => CEcond c (lift_exprlist p al)
+  | CEcond c expected al => CEcond c expected (lift_exprlist p al)
   | CEcondition a b c => CEcondition (lift_condexpr p a) (lift_condexpr p b) (lift_condexpr p c)
   | CElet a b => CElet (lift_expr p a) (lift_condexpr (S p) b)
   end.
diff --git a/backend/Cminortyping.v b/backend/Cminortyping.v
index 92ec45f2..8945cecf 100644
--- a/backend/Cminortyping.v
+++ b/backend/Cminortyping.v
@@ -64,6 +64,7 @@ Definition type_binop (op: binary_operation) : typ * typ * typ :=
   | Ocmpf _ => (Tfloat, Tfloat, Tint)
   | Ocmpfs _ => (Tsingle, Tsingle, Tint)
   | Ocmpl _ | Ocmplu _ => (Tlong, Tlong, Tint)
+  | Oexpect ty => (ty, ty, ty)
   end.
 
 Module RTLtypes <: TYPE_ALGEBRA.
diff --git a/backend/FirstNop.v b/backend/FirstNop.v
new file mode 100644
index 00000000..f7e5261e
--- /dev/null
+++ b/backend/FirstNop.v
@@ -0,0 +1,18 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Definition transf_function (f: function) : function :=
+  let start_pc := Pos.succ (max_pc_function f) in
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.set start_pc (Inop f.(fn_entrypoint)) f.(fn_code);
+     fn_entrypoint := start_pc |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
diff --git a/backend/FirstNopproof.v b/backend/FirstNopproof.v
new file mode 100644
index 00000000..a5d63c25
--- /dev/null
+++ b/backend/FirstNopproof.v
@@ -0,0 +1,273 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import FirstNop.
+Require Import Lia.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some i.
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  destruct (peq pc (Pos.succ (max_pc_function f))) as [EQ | NEQ].
+  { assert (pc <= (max_pc_function f))%positive as LE by (eapply max_pc_function_sound; eassumption).
+    subst pc.
+    lia.
+  }
+  rewrite PTree.gso by congruence.
+  assumption.
+Qed.
+
+Hint Resolve transf_function_at : firstnop.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+(*
+Lemma match_pc_refl : forall f pc, match_pc f pc pc.
+Proof.
+  unfold match_pc.
+  left.
+  trivial.
+Qed.
+
+Hint Resolve match_pc_refl : firstnop.
+
+Lemma initial_jump:
+  forall f,
+  (fn_code (transf_function f)) ! (Pos.succ (max_pc_function f)) =
+  Some (Inop (fn_entrypoint f)).
+Proof.
+  intros. unfold transf_function. simpl.
+  apply PTree.gss.
+Qed.
+
+Hint Resolve initial_jump : firstnop.
+ *)
+
+Lemma match_pc_same :
+  forall f pc i,
+    PTree.get pc (fn_code f) = Some i ->
+    PTree.get pc (fn_code (transf_function f)) = Some i.
+Proof.
+  intros.
+  unfold transf_function. simpl.
+  rewrite <- H.
+  apply PTree.gso.
+  pose proof (max_pc_function_sound f pc i H) as LE.
+  unfold Ple in LE.
+  lia.
+Qed.
+
+Hint Resolve match_pc_same : firstnop.
+
+
+Definition measure (S: RTL.state) : nat :=
+  match S with
+  | State _ _ _ _ _ _ => 0%nat
+  | Callstate _ _ _ _ => 1%nat
+  | Returnstate _ _ _ => 0%nat
+  end.
+
+Lemma step_simulation:
+  forall S1 t S2,
+  step ge S1 t S2 ->
+  forall S1' (MS: match_states S1 S1'),
+  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
+  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+  induction 1; intros; inv MS.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Inop; eauto with firstnop.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iop with (v:=v); eauto with firstnop.
+      rewrite <- H0.
+      apply eval_operation_preserved.
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iload with (v:=v); eauto with firstnop.
+      all: rewrite <- H0.
+      all: auto using eval_addressing_preserved, symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iload_notrap1; eauto with firstnop.
+      all: rewrite <- H0;
+      apply eval_addressing_preserved;
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Iload_notrap2; eauto with firstnop.
+      all: rewrite <- H0;
+      apply eval_addressing_preserved;
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Istore; eauto with firstnop.
+      all: rewrite <- H0;
+      apply eval_addressing_preserved;
+      apply symbols_preserved.
+    + constructor; auto with firstnop.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Icall.
+      apply match_pc_same. exact H.
+      apply find_function_translated.
+      exact H0.
+      apply sig_preserved.
+    + constructor.
+      constructor; auto.
+      constructor.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Itailcall.
+      apply match_pc_same. exact H.
+      apply find_function_translated.
+      exact H0.
+      apply sig_preserved.
+      unfold transf_function; simpl.
+      eassumption.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Ibuiltin; eauto with firstnop.
+      eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Icond; eauto with firstnop.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Ijumptable; eauto with firstnop.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_Ireturn; eauto with firstnop.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_two.
+      * eapply exec_function_internal; eauto with firstnop.
+      * eapply exec_Inop.
+        unfold transf_function; simpl.
+        rewrite PTree.gss.
+        reflexivity.
+      * auto.
+    + constructor; auto.
+  - left. econstructor. split.
+    + eapply plus_one. eapply exec_function_external; eauto with firstnop.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor; auto.
+  - left.
+    inv STACKS. inv H1.
+    econstructor; split.
+    + eapply plus_one. eapply exec_return; eauto.
+    + constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_star.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Inject.v b/backend/Inject.v
new file mode 100644
index 00000000..971a5423
--- /dev/null
+++ b/backend/Inject.v
@@ -0,0 +1,122 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Inductive inj_instr : Type :=
+  | INJnop
+  | INJop: operation -> list reg -> reg -> inj_instr
+  | INJload: memory_chunk -> addressing -> list reg -> reg -> inj_instr.
+
+Definition inject_instr (i : inj_instr) (pc' : node) : instruction :=
+  match i with
+  | INJnop => Inop pc'
+  | INJop op args dst => Iop op args dst pc'
+  | INJload chunk addr args dst => Iload NOTRAP chunk addr args dst pc'
+  end.
+
+Fixpoint inject_list (prog : code) (pc : node) (dst : node)
+         (l : list inj_instr) : node * code :=
+  let pc' := Pos.succ pc in
+  match l with
+  | nil => (pc', PTree.set pc (Inop dst) prog)
+  | h::t =>
+    inject_list (PTree.set pc (inject_instr h pc') prog)
+                pc' dst t
+  end.
+
+Definition successor (i : instruction) : node :=
+  match i with
+  | Inop pc' => pc'
+  | Iop _ _ _ pc' => pc'
+  | Iload _ _ _ _ _ pc' => pc'
+  | Istore _ _ _ _ pc' => pc'
+  | Icall _ _ _ _ pc' => pc'
+  | Ibuiltin _ _ _ pc' => pc'
+  | Icond _ _ pc' _ _ => pc'
+  | Itailcall _ _ _
+  | Ijumptable _ _
+  | Ireturn _ => 1
+  end.
+
+Definition alter_successor (i : instruction) (pc' : node) : instruction :=
+  match i with
+  | Inop _ => Inop pc'
+  | Iop op args dst _ => Iop op args dst pc'
+  | Iload trap chunk addr args dst _ => Iload trap chunk addr args dst pc'
+  | Istore chunk addr args src _ => Istore chunk addr args src pc'
+  | Ibuiltin ef args res _ => Ibuiltin ef args res pc'
+  | Icond cond args _ pc2 expected => Icond cond args pc' pc2 expected
+  | Icall sig ros args res _ => Icall sig ros args res pc'
+  | Itailcall _ _ _
+  | Ijumptable _ _
+  | Ireturn _ => i
+  end.
+
+Definition inject_at (prog : code) (pc extra_pc : node)
+           (l : list inj_instr) : node * code :=
+  match PTree.get pc prog with
+  | Some i =>
+    inject_list (PTree.set pc (alter_successor i extra_pc) prog)
+                extra_pc (successor i) l
+  | None => inject_list prog extra_pc 1 l (* does not happen *)
+  end.
+
+Definition inject_at' (already : node * code) pc l :=
+  let (extra_pc, prog) := already in
+  inject_at prog pc extra_pc l.
+
+Definition inject_l (prog : code) extra_pc injections :=
+  List.fold_left (fun already (injection : node * (list inj_instr)) =>
+                    inject_at' already (fst injection) (snd injection))
+    injections
+    (extra_pc, prog).
+(*
+Definition inject' (prog : code) (extra_pc : node) (injections : PTree.t (list inj_instr)) :=
+  PTree.fold inject_at' injections (extra_pc, prog).
+
+Definition inject prog extra_pc injections : code :=
+  snd (inject' prog extra_pc injections).
+*)
+
+Section INJECTOR.
+  Variable gen_injections : function -> node -> reg -> PTree.t (list inj_instr).
+
+  Definition valid_injection_instr (max_reg : reg) (i : inj_instr) :=
+    match i with
+    | INJnop => true
+    | INJop op args res => (max_reg <? res) && (negb (is_trapping_op op)
+       && (Datatypes.length args =? args_of_operation op)%nat) 
+    | INJload _ _ _ res => max_reg <? res
+    end.
+  
+  Definition valid_injections1 max_pc max_reg :=
+    List.forallb
+         (fun injection =>
+            ((fst injection) <=? max_pc) &&
+            (List.forallb (valid_injection_instr max_reg) (snd injection))
+         ).
+
+  Definition valid_injections f :=
+    valid_injections1 (max_pc_function f) (max_reg_function f).
+  
+  Definition transf_function (f : function) : res function :=
+    let max_pc := max_pc_function f in
+    let max_reg := max_reg_function f in
+    let injections := PTree.elements (gen_injections f max_pc max_reg) in
+    if valid_injections1 max_pc max_reg injections
+    then
+      OK {| fn_sig := f.(fn_sig);
+            fn_params := f.(fn_params);
+            fn_stacksize := f.(fn_stacksize);
+            fn_code := snd (inject_l (fn_code f) (Pos.succ max_pc) injections);
+            fn_entrypoint := f.(fn_entrypoint) |}
+    else Error (msg "Inject.transf_function: injections at bad locations").
+
+Definition transf_fundef (fd: fundef) : res fundef :=
+  AST.transf_partial_fundef transf_function fd.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
+End INJECTOR.
diff --git a/backend/Injectproof.v b/backend/Injectproof.v
new file mode 100644
index 00000000..75fed25f
--- /dev/null
+++ b/backend/Injectproof.v
@@ -0,0 +1,1794 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Globalenvs Values Events.
+Require Import Inject.
+Require Import Lia.
+
+Local Open Scope positive.
+
+Lemma inject_list_preserves:
+  forall l prog pc dst pc0,
+    pc0 < pc ->
+    PTree.get pc0 (snd (inject_list prog pc dst l)) = PTree.get pc0 prog.
+Proof.
+  induction l; intros; simpl.
+  - apply PTree.gso. lia.
+  - rewrite IHl by lia.
+    apply PTree.gso. lia.
+Qed.
+
+Fixpoint pos_add_nat x n :=
+  match n with
+  | O => x
+  | S n' => Pos.succ (pos_add_nat x n')
+  end.
+
+Lemma pos_add_nat_increases : forall x n, x <= (pos_add_nat x n).
+Proof.
+  induction n; simpl; lia.
+Qed.
+
+Lemma pos_add_nat_succ : forall n x,
+    Pos.succ (pos_add_nat x n) = pos_add_nat (Pos.succ x) n.
+Proof.
+  induction n; simpl; intros; trivial.
+  rewrite IHn.
+  reflexivity.
+Qed.
+
+Lemma pos_add_nat_monotone : forall x n1 n2,
+    (n1 < n2) % nat ->
+    (pos_add_nat x n1) < (pos_add_nat x n2).
+Proof.
+  induction n1; destruct n2; intros.
+  - lia.
+  - simpl.
+    pose proof (pos_add_nat_increases x n2).
+    lia.
+  - lia.
+  - simpl.
+    specialize IHn1 with n2.
+    lia.
+Qed.
+
+Lemma inject_list_increases:
+  forall l prog pc dst,
+    (fst (inject_list prog pc dst l)) = pos_add_nat pc (S (List.length l)).
+Proof.
+  induction l; simpl; intros; trivial.
+  rewrite IHl.
+  simpl.
+  rewrite <- pos_add_nat_succ.
+  reflexivity.
+Qed.
+
+Program Fixpoint bounded_nth
+  {T : Type} (k : nat) (l : list T) (BOUND : (k < List.length l)%nat) : T :=
+  match k, l with
+  | O, h::_ => h
+  | (S k'), _::l' => bounded_nth k' l' _
+  | _, nil => _
+  end.
+Obligation 1.
+Proof.
+  simpl in BOUND.
+  lia.
+Qed.
+Obligation 2.
+Proof.
+  simpl in BOUND.
+  lia.
+Qed.
+
+Program Definition bounded_nth_S_statement : Prop :=
+  forall (T : Type) (k : nat) (h : T) (l : list T) (BOUND : (k < List.length l)%nat),
+    bounded_nth (S k) (h::l) _ = bounded_nth k l BOUND.
+Obligation 1.
+lia.
+Qed.
+
+Lemma bounded_nth_proof_irr :
+  forall {T : Type} (k : nat) (l : list T)
+         (BOUND1 BOUND2 : (k < List.length l)%nat),
+    (bounded_nth k l BOUND1) = (bounded_nth k l BOUND2).
+Proof.
+  induction k; destruct l; simpl; intros; trivial; lia.
+Qed.
+
+Lemma bounded_nth_S : bounded_nth_S_statement.
+Proof.
+  unfold bounded_nth_S_statement.
+  induction k; destruct l; simpl; intros; trivial.
+  1, 2: lia.
+  apply bounded_nth_proof_irr.
+Qed.
+
+Lemma inject_list_injected:
+  forall l prog pc dst k (BOUND : (k < (List.length l))%nat),
+    PTree.get (pos_add_nat pc k) (snd (inject_list prog pc dst l)) =
+    Some (inject_instr (bounded_nth k l BOUND) (Pos.succ (pos_add_nat pc k))).
+Proof.
+  induction l; simpl; intros.
+  - lia.
+  - simpl.
+    destruct k as [ | k]; simpl pos_add_nat.
+    + simpl bounded_nth.
+      rewrite inject_list_preserves by lia.
+      apply PTree.gss.
+    + rewrite pos_add_nat_succ.
+      erewrite IHl.
+      f_equal. f_equal.
+      simpl.
+      apply bounded_nth_proof_irr.
+      Unshelve.
+      lia.
+Qed.
+
+Lemma inject_list_injected_end:
+  forall l prog pc dst,
+    PTree.get (pos_add_nat pc (List.length l))
+              (snd (inject_list prog pc dst l)) =
+    Some (Inop dst).
+Proof.
+  induction l; simpl; intros.
+  - apply PTree.gss.
+  - rewrite pos_add_nat_succ.
+    apply IHl.
+Qed.
+    
+Lemma inject_at_preserves :
+  forall prog pc extra_pc l pc0,
+    pc0 < extra_pc ->
+    pc0 <> pc ->
+    PTree.get pc0 (snd (inject_at prog pc extra_pc l)) = PTree.get pc0 prog.
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc prog) eqn:GET.
+  - rewrite inject_list_preserves; trivial.
+    apply PTree.gso; lia.
+  - apply inject_list_preserves; trivial.
+Qed.
+
+Lemma inject_at_redirects:
+  forall prog pc extra_pc l i,
+    pc < extra_pc ->
+    PTree.get pc prog = Some i ->
+    PTree.get pc (snd (inject_at prog pc extra_pc l)) =
+    Some (alter_successor i extra_pc).
+Proof.
+  intros until i. intros BEFORE GET. unfold inject_at.
+  rewrite GET.
+  rewrite inject_list_preserves by trivial.
+  apply PTree.gss.
+Qed.
+
+Lemma inject_at_redirects_none:
+  forall prog pc extra_pc l,
+    pc < extra_pc ->
+    PTree.get pc prog = None ->
+    PTree.get pc (snd (inject_at prog pc extra_pc l)) = None.
+Proof.
+  intros until l. intros BEFORE GET. unfold inject_at.
+  rewrite GET.
+  rewrite inject_list_preserves by trivial.
+  assumption.
+Qed.
+
+Lemma inject_at_increases:
+  forall prog pc extra_pc l,
+    (fst (inject_at prog pc extra_pc l)) = pos_add_nat extra_pc (S (List.length l)).  
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc prog).
+  all: apply inject_list_increases.
+Qed.
+
+Lemma inject_at_injected:
+  forall l prog pc extra_pc k (BOUND : (k < (List.length l))%nat),
+    PTree.get (pos_add_nat extra_pc k) (snd (inject_at prog pc extra_pc l)) =
+    Some (inject_instr (bounded_nth k l BOUND) (Pos.succ (pos_add_nat extra_pc k))).
+Proof.
+  intros. unfold inject_at.
+  destruct (prog ! pc); apply inject_list_injected.
+Qed.
+
+Lemma inject_at_injected_end:
+  forall l prog pc extra_pc i,
+    PTree.get pc prog = Some i ->
+    PTree.get (pos_add_nat extra_pc (List.length l))
+              (snd (inject_at prog pc extra_pc l)) =
+    Some (Inop (successor i)).
+Proof.
+  intros until i. intro REW. unfold inject_at.
+  rewrite REW.
+  apply inject_list_injected_end.
+Qed.
+
+Lemma pair_expand:
+  forall { A B : Type } (p : A*B),
+    p = ((fst p), (snd p)).
+Proof.
+  destruct p; simpl; trivial.
+Qed.
+
+Fixpoint inject_l_position extra_pc
+         (injections : list (node * (list inj_instr)))
+         (k : nat) {struct injections} : node :=
+  match injections with
+  | nil => extra_pc
+  | (pc,l)::l' =>
+    match k with
+    | O => extra_pc
+    | S k' =>
+      inject_l_position
+        (Pos.succ (pos_add_nat extra_pc (List.length l))) l' k'
+    end
+  end.
+
+Lemma inject_l_position_increases : forall injections pc k,
+    pc <= inject_l_position pc injections k.
+Proof.
+  induction injections; simpl; intros.
+  lia.
+  destruct a as [_ l].
+  destruct k.
+  lia.
+  specialize IHinjections with (pc := (Pos.succ (pos_add_nat pc (Datatypes.length l)))) (k := k).
+  assert (pc <= (pos_add_nat pc (Datatypes.length l))) by apply pos_add_nat_increases.
+  lia.
+Qed.
+                    
+Definition inject_l (prog : code) extra_pc injections :=
+  List.fold_left (fun already (injection : node * (list inj_instr)) =>
+                    inject_at' already (fst injection) (snd injection))
+    injections
+    (extra_pc, prog).
+
+Lemma inject_l_preserves :
+  forall injections prog extra_pc pc0,
+    pc0 < extra_pc ->
+    List.forallb (fun injection => if peq (fst injection) pc0 then false else true) injections = true ->
+    PTree.get pc0 (snd (inject_l prog extra_pc injections)) = PTree.get pc0 prog.
+Proof.
+  induction injections;
+    intros until pc0; intros BEFORE ALL; simpl; trivial.
+  unfold inject_l.
+  destruct a as [pc l]. simpl.
+  simpl in ALL.
+  rewrite andb_true_iff in ALL.
+  destruct ALL as [NEQ ALL].
+  rewrite pair_expand with (p := inject_at prog pc extra_pc l).
+  fold (inject_l (snd (inject_at prog pc extra_pc l))
+              (fst (inject_at prog pc extra_pc l))
+              injections).
+  rewrite IHinjections; trivial.
+  - apply inject_at_preserves; trivial.
+    destruct (peq pc pc0); congruence.
+  - rewrite inject_at_increases.
+    pose proof (pos_add_nat_increases extra_pc (S (Datatypes.length l))).
+    lia.
+Qed.
+
+Lemma nth_error_nil : forall { T : Type} k,
+    nth_error (@nil T) k = None.
+Proof.
+  destruct k; simpl; trivial.
+Qed.
+
+Lemma inject_l_injected:
+  forall injections prog injnum pc l extra_pc k
+         (BELOW : forallb (fun injection => (fst injection) <? extra_pc) injections = true)
+         (NUMBER : nth_error injections injnum = Some (pc, l))
+         (BOUND : (k < (List.length l))%nat),
+    PTree.get (pos_add_nat (inject_l_position extra_pc injections injnum) k)
+              (snd (inject_l prog extra_pc injections)) =
+    Some (inject_instr (bounded_nth k l BOUND)
+       (Pos.succ (pos_add_nat (inject_l_position extra_pc injections injnum) k))).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER.
+  }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  destruct a as [pc' l'].
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at prog pc' extra_pc l').
+  progress fold (inject_l (snd (inject_at prog pc' extra_pc l'))
+              (fst (inject_at prog pc' extra_pc l'))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - apply inject_at_injected.
+    - rewrite inject_at_increases.
+      apply pos_add_nat_monotone.
+      lia.
+    - rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      pose proof (pos_add_nat_increases extra_pc k).
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (pc := pc); trivial.
+  rewrite forallb_forall.
+  rewrite forallb_forall in BELOW2.
+  intros loc IN.
+  specialize BELOW2 with loc.
+  apply BELOW2 in IN.
+  pose proof (pos_add_nat_increases extra_pc (Datatypes.length l')).
+  rewrite Pos.ltb_lt.
+  rewrite Pos.ltb_lt in IN.
+  lia.
+Qed.
+
+Lemma inject_l_injected_end:
+  forall injections prog injnum pc i l extra_pc
+         (BEFORE : PTree.get pc prog = Some i)
+         (DISTINCT : list_norepet (map fst injections))
+         (BELOW : forallb (fun injection => (fst injection) <? extra_pc) injections = true)
+         (NUMBER : nth_error injections injnum = Some (pc, l)),
+    PTree.get (pos_add_nat (inject_l_position extra_pc injections injnum)
+                           (List.length l))
+              (snd (inject_l prog extra_pc injections)) =
+    Some (Inop (successor i)).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER.
+  }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  destruct a as [pc' l'].
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at prog pc' extra_pc l').
+  progress fold (inject_l (snd (inject_at prog pc' extra_pc l'))
+              (fst (inject_at prog pc' extra_pc l'))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - apply inject_at_injected_end; trivial.
+    - rewrite inject_at_increases.
+      apply pos_add_nat_monotone.
+      lia.
+    - rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      pose proof (pos_add_nat_increases extra_pc (Datatypes.length l)).
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (pc := pc); trivial.
+  {
+    rewrite <- BEFORE.
+    apply inject_at_preserves.
+    {
+      apply nth_error_In in NUMBER.
+      rewrite forallb_forall in BELOW2.
+      specialize BELOW2 with (pc, l).
+      apply BELOW2 in NUMBER.
+      apply Pos.ltb_lt in NUMBER.
+      simpl in NUMBER.
+      assumption.
+    }
+    simpl in DISTINCT.
+    inv DISTINCT.
+    intro SAME.
+    subst pc'.
+    apply nth_error_in in NUMBER.
+    assert (In (fst (pc, l)) (map fst injections)) as Z.
+    { apply in_map. assumption.
+    }
+    simpl in Z.
+    auto.
+  }
+  { inv DISTINCT.
+    assumption.
+  }
+  {
+    rewrite forallb_forall.
+    rewrite forallb_forall in BELOW2.
+    intros loc IN.
+    specialize BELOW2 with loc.
+    apply BELOW2 in IN.
+    pose proof (pos_add_nat_increases extra_pc (Datatypes.length l')).
+    rewrite Pos.ltb_lt.
+    rewrite Pos.ltb_lt in IN.
+    assert (pos_add_nat extra_pc (Datatypes.length l') <
+            pos_add_nat extra_pc (S (Datatypes.length l'))).
+    { apply pos_add_nat_monotone.
+      lia.
+    }
+    lia.
+  }
+Qed.
+
+
+Lemma inject_l_redirects:
+  forall injections prog injnum pc i l extra_pc
+         (BEFORE : PTree.get pc prog = Some i)
+         (DISTINCT : list_norepet (map fst injections))
+         (BELOW : forallb (fun injection => (fst injection) <? extra_pc) injections = true)
+         (NUMBER : nth_error injections injnum = Some (pc, l)),
+    PTree.get pc (snd (inject_l prog extra_pc injections)) =
+    Some (alter_successor i (inject_l_position extra_pc injections injnum)).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER.
+  }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  destruct a as [pc' l'].
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at prog pc' extra_pc l').
+  progress fold (inject_l (snd (inject_at prog pc' extra_pc l'))
+              (fst (inject_at prog pc' extra_pc l'))
+              injections).
+  simpl in BELOW1.
+  apply Pos.ltb_lt in BELOW1.
+  inv DISTINCT.
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - apply inject_at_redirects; trivial.
+    - rewrite inject_at_increases.
+      pose proof (pos_add_nat_increases extra_pc (S (Datatypes.length l))).
+      lia.
+    - rewrite forallb_forall.
+      intros loc IN.
+      destruct loc as [pc' l'].
+      simpl in *.
+      destruct peq; trivial.
+      subst pc'.
+      apply in_map with (f := fst) in IN.
+      simpl in IN.
+      exfalso.
+      auto.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (pc := pc) (l := l); trivial.
+  {
+    rewrite <- BEFORE.
+    apply nth_error_In in NUMBER.
+    rewrite forallb_forall in BELOW2.
+    specialize BELOW2 with (pc, l).
+    simpl in BELOW2.
+    rewrite Pos.ltb_lt in BELOW2.
+    apply inject_at_preserves; auto.
+    assert (In (fst (pc, l)) (map fst injections)) as Z.
+    { apply in_map. assumption.
+    }
+    simpl in Z.
+    intro EQ.
+    subst pc'.
+    auto.
+  }
+  {
+    rewrite forallb_forall.
+    rewrite forallb_forall in BELOW2.
+    intros loc IN.
+    specialize BELOW2 with loc.
+    apply BELOW2 in IN.
+    pose proof (pos_add_nat_increases extra_pc (Datatypes.length l')).
+    rewrite Pos.ltb_lt.
+    rewrite Pos.ltb_lt in IN.
+    assert (pos_add_nat extra_pc (Datatypes.length l') <
+            pos_add_nat extra_pc (S (Datatypes.length l'))).
+    { apply pos_add_nat_monotone.
+      lia.
+    }
+    lia.
+  }
+Qed.
+
+(*
+Lemma inject'_preserves :
+  forall injections prog extra_pc pc0,
+    pc0 < extra_pc ->
+    PTree.get pc0 injections = None ->
+    PTree.get pc0 (snd (inject' prog extra_pc injections)) = PTree.get pc0 prog.
+Proof.
+  intros. unfold inject'.
+  rewrite PTree.fold_spec.
+  change (fold_left
+        (fun (a : node * code) (p : positive * list inj_instr) =>
+         inject_at' a (fst p) (snd p)) (PTree.elements injections)
+        (extra_pc, prog)) with (inject_l prog extra_pc (PTree.elements injections)).
+  apply inject_l_preserves; trivial.
+  rewrite List.forallb_forall.
+  intros injection IN.
+  destruct injection as [pc l].
+  simpl.
+  apply PTree.elements_complete in IN.
+  destruct (peq pc pc0); trivial.
+  congruence.
+Qed.
+
+Lemma inject_preserves :
+  forall injections prog extra_pc pc0,
+    pc0 < extra_pc ->
+    PTree.get pc0 injections = None ->
+    PTree.get pc0 (inject prog extra_pc injections) = PTree.get pc0 prog.
+Proof.
+  unfold inject'.
+  apply inject'_preserves.
+Qed.
+*)
+
+Section INJECTOR.
+  Variable gen_injections : function -> node -> reg -> PTree.t (list inj_instr).
+
+  Definition match_prog (p tp: RTL.program) :=
+    match_program (fun ctx f tf => transf_fundef gen_injections f = OK tf) eq p tp.
+
+  Lemma transf_program_match:
+    forall p tp, transf_program gen_injections p = OK tp -> match_prog p tp.
+  Proof.
+    intros. eapply match_transform_partial_program; eauto.
+  Qed.
+
+  Section PRESERVATION.
+
+    Variables prog tprog: program.
+    Hypothesis TRANSF: match_prog prog tprog.
+    Let ge := Genv.globalenv prog.
+    Let tge := Genv.globalenv tprog.
+
+    Definition match_regs (f : function) (rs rs' : regset) :=
+      forall r, r <= max_reg_function f -> (rs'#r = rs#r).
+
+    Lemma match_regs_refl : forall f rs, match_regs f rs rs.
+    Proof.
+      unfold match_regs. intros. trivial.
+    Qed.
+
+    Lemma match_regs_trans : forall f rs1 rs2 rs3,
+        match_regs f rs1 rs2 -> match_regs f rs2 rs3 -> match_regs f rs1 rs3.
+    Proof.
+      unfold match_regs. intros until rs3. intros M12 M23 r.
+      specialize M12 with r.
+      specialize M23 with r.
+      intuition congruence.
+    Qed.
+
+    Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+    | match_frames_intro: forall res f tf sp pc pc' rs trs
+        (FUN : transf_function gen_injections f = OK tf)
+        (REGS : match_regs f rs trs)
+        (STAR:
+           forall ts m trs1,
+           exists trs2,
+             (match_regs f trs1 trs2) /\
+             Smallstep.star RTL.step tge
+                                      (State ts tf sp pc' trs1 m) E0
+                                      (State ts tf sp pc trs2 m)),
+        match_frames (Stackframe res f sp pc rs)
+                     (Stackframe res tf sp pc' trs).
+
+    Inductive match_states: state -> state -> Prop :=
+    | match_states_intro:
+      forall s f tf sp pc rs trs m ts
+        (FUN : transf_function gen_injections f = OK tf)
+        (STACKS: list_forall2 match_frames s ts)
+        (REGS: match_regs f rs trs),
+      match_states (State s f sp pc rs m) (State ts tf sp pc trs m)
+    | match_states_call:
+        forall s fd tfd args m ts
+        (FUN : transf_fundef gen_injections fd = OK tfd)
+        (STACKS: list_forall2 match_frames s ts),
+          match_states (Callstate s fd args m) (Callstate ts tfd args m)
+    | match_states_return:
+        forall s res m ts
+               (STACKS: list_forall2 match_frames s ts),
+          match_states (Returnstate s res m)
+                       (Returnstate ts res m).
+
+    Lemma functions_translated:
+      forall (v: val) (f: RTL.fundef),
+        Genv.find_funct ge v = Some f ->
+        exists tf,
+          Genv.find_funct tge v = Some tf /\
+          transf_fundef gen_injections f = OK tf.
+    Proof.
+      apply (Genv.find_funct_transf_partial TRANSF).
+    Qed.
+
+    Lemma function_ptr_translated:
+      forall (b: block) (f: RTL.fundef),
+        Genv.find_funct_ptr ge b = Some f ->
+        exists tf,
+          Genv.find_funct_ptr tge b = Some tf /\
+          transf_fundef gen_injections f = OK tf.
+    Proof.
+      apply (Genv.find_funct_ptr_transf_partial TRANSF).
+    Qed.
+
+    Lemma symbols_preserved:
+      forall id,
+        Genv.find_symbol tge id = Genv.find_symbol ge id.
+    Proof.
+      apply (Genv.find_symbol_match TRANSF).
+    Qed.
+
+    Lemma senv_preserved:
+      Senv.equiv ge tge.
+    Proof.
+      apply (Genv.senv_match TRANSF).
+    Qed.
+
+    Lemma sig_preserved:
+      forall f tf, transf_fundef gen_injections f = OK tf
+                   -> funsig tf = funsig f.
+    Proof.
+      destruct f; simpl; intros; monadInv H; trivial.
+      unfold transf_function in *.
+      destruct valid_injections1 in EQ.
+      2: discriminate.
+      inv EQ.
+      reflexivity.
+    Qed.
+
+    Lemma stacksize_preserved:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_stacksize tf = fn_stacksize f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma params_preserved:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_params tf = fn_params f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma entrypoint_preserved:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_entrypoint tf = fn_entrypoint f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma sig_preserved2:
+      forall f tf, transf_function gen_injections f = OK tf ->
+                   fn_sig tf = fn_sig f.
+    Proof.
+      destruct f.
+      unfold transf_function.
+      intros.
+      destruct valid_injections1 in H.
+      2: discriminate.
+      inv H.
+      reflexivity.
+    Qed.
+
+    Lemma transf_initial_states:
+      forall S1, RTL.initial_state prog S1 ->
+                 exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+    Proof.
+      intros. inversion H.
+      exploit function_ptr_translated; eauto.
+      intros (tf & A & B).
+      exists (Callstate nil tf nil m0); split.
+      - econstructor; eauto.
+        + eapply (Genv.init_mem_match TRANSF); eauto.
+        + replace (prog_main tprog) with (prog_main prog).
+          rewrite symbols_preserved. eauto.
+          symmetry. eapply match_program_main; eauto.
+        + rewrite <- H3. eapply sig_preserved; eauto.
+      - constructor; trivial.
+        constructor.
+    Qed.
+
+    Lemma transf_final_states:
+      forall S1 S2 r, match_states S1 S2 ->
+                      final_state S1 r -> final_state S2 r.
+    Proof.
+      intros. inv H0. inv H. inv STACKS. constructor.
+    Qed.
+
+    Lemma assign_above:
+      forall f trs res v,
+        (max_reg_function f) < res ->
+        match_regs f trs trs # res <- v.
+    Proof.
+      unfold match_regs.
+      intros.
+      apply Regmap.gso.
+      lia.
+    Qed.
+    
+    Lemma transf_function_inj_step:
+      forall ts f tf sp pc trs m ii
+        (FUN : transf_function gen_injections f = OK tf)
+        (GET : (fn_code tf) ! pc = Some (inject_instr ii (Pos.succ pc)))
+        (VALID : valid_injection_instr (max_reg_function f) ii = true),
+      exists trs',
+        RTL.step tge
+              (State ts tf sp pc trs m) E0
+              (State ts tf sp (Pos.succ pc) trs' m) /\
+        match_regs (f : function) trs trs'.
+    Proof.
+      destruct ii as [ |op args res | chunk addr args res]; simpl; intros.
+      - exists trs.
+        split.
+        * apply exec_Inop; assumption.
+        * apply match_regs_refl.
+      - repeat rewrite andb_true_iff in VALID.
+        rewrite negb_true_iff in VALID.
+        destruct VALID as (MAX_REG & NOTRAP & LENGTH).
+        rewrite Pos.ltb_lt in MAX_REG.
+        rewrite Nat.eqb_eq in LENGTH.
+        destruct (eval_operation ge sp op trs ## args m) as [v | ] eqn:EVAL.
+        + exists (trs # res <- v).
+          split.
+          * apply exec_Iop with (op := op) (args := args) (res := res); trivial.
+            rewrite eval_operation_preserved with (ge1 := ge).
+            assumption.
+            exact symbols_preserved.
+          * apply assign_above; auto.
+        + exfalso.
+          generalize EVAL.
+          apply is_trapping_op_sound; trivial.
+          rewrite map_length.
+          assumption.
+      - rewrite Pos.ltb_lt in VALID.
+        destruct (eval_addressing ge sp addr trs ## args) as [a | ] eqn:ADDR.
+        + destruct (Mem.loadv chunk m a) as [v | ] eqn:LOAD.
+          * exists (trs # res <- v).
+            split.
+            ** apply exec_Iload with (trap := NOTRAP) (chunk := chunk) (addr := addr) (args := args) (dst := res) (a := a); trivial.
+               all: try rewrite eval_addressing_preserved with (ge1 := ge).
+               all: auto using symbols_preserved.
+            ** apply assign_above; auto.
+          * exists (trs # res <- Vundef).
+            split.
+            ** apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (args := args) (dst := res) (a := a); trivial.
+               all: rewrite eval_addressing_preserved with (ge1 := ge).
+               all: auto using symbols_preserved.
+            ** apply assign_above; auto.
+        + exists (trs # res <- Vundef).
+          split.
+          * apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := args) (dst := res); trivial.
+            all: rewrite eval_addressing_preserved with (ge1 := ge).
+            all: auto using symbols_preserved.
+          * apply assign_above; auto.
+    Qed.
+
+    Lemma bounded_nth_In: forall {T : Type} (l : list T) k LESS,
+        In (bounded_nth k l LESS) l.
+    Proof.
+      induction l; simpl; intros.
+      lia.
+      destruct k; simpl.
+      - left; trivial.
+      - right. apply IHl.
+    Qed.
+
+    Lemma transf_function_inj_starstep_rec :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (k : nat)
+        (CUR : (k <= (List.length inj_code))%nat)
+        (trs : regset),
+      exists trs',
+        match_regs (f : function) trs trs' /\
+        Smallstep.star RTL.step tge
+              (State ts tf sp (pos_add_nat inj_pc
+                                    ((List.length inj_code) - k)%nat) trs m) E0
+              (State ts tf sp (pos_add_nat inj_pc (List.length inj_code)) trs' m).
+    Proof.
+      induction k; simpl; intros.
+      { rewrite Nat.sub_0_r.
+        exists trs.
+        split.
+        - apply match_regs_refl.
+        - constructor.
+      }
+      assert (k <= Datatypes.length inj_code)%nat as KK by lia.
+      pose proof (IHk KK) as IH.
+      clear IHk KK.
+      pose proof FUN as VALIDATE.
+      unfold transf_function, valid_injections1 in VALIDATE.
+      destruct forallb eqn:FORALL in VALIDATE.
+      2: discriminate.
+      injection VALIDATE.
+      intro TF.
+      symmetry in TF.
+      pose proof (inject_l_injected (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) (fn_code f) inj_n src_pc inj_code (Pos.succ (max_pc_function f)) ((List.length inj_code) - (S k))%nat) as INJECTED.
+      lapply INJECTED.
+      { clear INJECTED.
+        intro INJECTED.
+        assert ((Datatypes.length inj_code - S k <
+                 Datatypes.length inj_code)%nat) as LESS by lia.
+        pose proof (INJECTED INJ LESS) as INJ'.
+        replace (snd
+            (inject_l (fn_code f) (Pos.succ (max_pc_function f))
+                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))))) with (fn_code tf) in INJ'.
+        2: rewrite TF; simpl; reflexivity.                                              apply transf_function_inj_step with (f:=f) (ts:=ts) (sp:=sp) (trs:=trs) (m := m) in INJ'.
+        2: assumption.
+        {
+          destruct INJ' as [trs'' [STEP STEPMATCH]].
+          destruct (IH trs'') as [trs' [STARSTEPMATCH STARSTEP]].
+          exists trs'.
+          split.
+          { apply match_regs_trans with (rs2 := trs''); assumption. }
+          eapply Smallstep.star_step with (t1:=E0) (t2:=E0).
+          {
+            rewrite POSITION in STEP.
+            exact STEP.
+          }
+          {
+            replace (Datatypes.length inj_code - k)%nat
+              with (S (Datatypes.length inj_code - (S k)))%nat in STARSTEP by lia.
+            simpl pos_add_nat in STARSTEP.
+            exact STARSTEP.
+          }
+          constructor.
+        }
+        rewrite forallb_forall in FORALL.
+        specialize FORALL with  (src_pc, inj_code).
+        lapply FORALL.
+        {
+          simpl.
+          rewrite andb_true_iff.
+          intros (SRC & ALL_VALID).
+          rewrite forallb_forall in ALL_VALID.
+          apply ALL_VALID.
+          apply bounded_nth_In.
+        }
+        apply nth_error_In with (n := inj_n).
+        assumption.
+      }
+      rewrite forallb_forall in FORALL.
+      rewrite forallb_forall.
+      intros x INx.
+      rewrite Pos.ltb_lt.
+      pose proof (FORALL x INx) as ALLx.
+      rewrite andb_true_iff in ALLx.
+      destruct ALLx as [ALLx1 ALLx2].
+      rewrite Pos.leb_le in ALLx1.
+      lia.
+    Qed.
+    
+    Lemma transf_function_inj_starstep :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (trs : regset),
+      exists trs',
+        match_regs (f : function) trs trs' /\
+        Smallstep.star RTL.step tge
+              (State ts tf sp inj_pc trs m) E0
+              (State ts tf sp (pos_add_nat inj_pc (List.length inj_code)) trs' m).
+    Proof.
+      intros.
+      replace (State ts tf sp inj_pc trs m) with (State ts tf sp (pos_add_nat inj_pc ((List.length inj_code) - (List.length inj_code))%nat) trs m).
+      eapply transf_function_inj_starstep_rec; eauto.
+      f_equal.
+      rewrite <- minus_n_n.
+      reflexivity.
+    Qed.
+
+    Lemma transf_function_inj_end :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code i
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (SRC: (fn_code f) ! src_pc = Some i)
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (trs : regset),
+        RTL.step tge
+          (State ts tf sp (pos_add_nat inj_pc (List.length inj_code)) trs m) E0
+          (State ts tf sp (successor i) trs m).
+    Proof.
+      intros.
+      pose proof FUN as VALIDATE.
+      unfold transf_function, valid_injections1 in VALIDATE.
+      destruct forallb eqn:FORALL in VALIDATE.
+      2: discriminate.
+      injection VALIDATE.
+      intro TF.
+      symmetry in TF.
+      pose proof (inject_l_injected_end (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) (fn_code f) inj_n src_pc i inj_code (Pos.succ (max_pc_function f))) as INJECTED.
+      lapply INJECTED.
+      2: assumption.
+      clear INJECTED.
+      intro INJECTED.
+      lapply INJECTED.
+      2: apply (PTree.elements_keys_norepet (gen_injections f (max_pc_function f) (max_reg_function f))); fail.
+      clear INJECTED.
+      intro INJECTED.
+      lapply INJECTED.
+      { clear INJECTED.
+        intro INJECTED.
+        pose proof (INJECTED INJ) as INJ'.
+        clear INJECTED.
+        replace (snd
+                   (inject_l (fn_code f) (Pos.succ (max_pc_function f))
+                             (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))))) with (fn_code tf) in INJ'.
+        2: rewrite TF; simpl; reflexivity.
+        rewrite POSITION in INJ'.
+        apply exec_Inop.
+        assumption.
+      }
+      clear INJECTED.
+      rewrite forallb_forall in FORALL.
+      rewrite forallb_forall.
+      intros x INx.
+      rewrite Pos.ltb_lt.
+      pose proof (FORALL x INx) as ALLx.
+      rewrite andb_true_iff in ALLx.
+      destruct ALLx as [ALLx1 ALLx2].
+      rewrite Pos.leb_le in ALLx1.
+      lia.
+    Qed.
+
+    Lemma transf_function_inj_plusstep :
+      forall ts f tf sp m inj_n src_pc inj_pc inj_code i
+        (FUN : transf_function gen_injections f = OK tf)
+        (INJ : nth_error (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n =
+               Some (src_pc, inj_code))
+        (SRC: (fn_code f) ! src_pc = Some i)
+        (POSITION : inject_l_position (Pos.succ (max_pc_function f))
+                                      (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) inj_n = inj_pc)
+        (trs : regset),
+      exists trs',
+        match_regs (f : function) trs trs' /\
+        Smallstep.plus RTL.step tge
+              (State ts tf sp inj_pc trs m) E0
+              (State ts tf sp (successor i) trs' m).
+    Proof.
+      intros.
+      destruct (transf_function_inj_starstep ts f tf sp m inj_n src_pc inj_pc inj_code FUN INJ POSITION trs) as [trs' [MATCH PLUS]].
+      exists trs'.
+      split. assumption.
+      eapply Smallstep.plus_right.
+      exact PLUS.
+      eapply transf_function_inj_end; eassumption.
+      reflexivity.
+    Qed.
+    
+    Lemma transf_function_preserves:
+      forall f tf pc 
+        (FUN : transf_function gen_injections f = OK tf)
+        (LESS : pc <= max_pc_function f)
+        (NOCHANGE : (gen_injections f (max_pc_function f) (max_reg_function f)) ! pc = None),
+        (fn_code tf) ! pc = (fn_code f) ! pc.
+    Proof.
+      intros.
+      unfold transf_function in FUN.
+      destruct valid_injections1 in FUN.
+      2: discriminate.
+      inv FUN.
+      simpl.
+      apply inject_l_preserves.
+      lia.
+      rewrite forallb_forall.
+      intros x INx.
+      destruct peq; trivial.
+      subst pc.
+      exfalso.
+      destruct x as [pc ii].
+      simpl in *.
+      apply PTree.elements_complete in INx.
+      congruence.
+    Qed.
+    
+    Lemma transf_function_redirects:
+      forall f tf pc injl ii
+        (FUN : transf_function gen_injections f = OK tf)
+        (LESS : pc <= max_pc_function f)
+        (INJECTION : (gen_injections f (max_pc_function f) (max_reg_function f)) ! pc = Some injl)
+        (INSTR: (fn_code f) ! pc = Some ii),
+      exists pc' : node,
+        (fn_code tf) ! pc = Some (alter_successor ii pc') /\
+        (forall ts sp m trs,
+            exists trs',
+            match_regs f trs trs' /\
+            Smallstep.plus RTL.step tge
+              (State ts tf sp pc' trs m) E0
+              (State ts tf sp (successor ii) trs' m)).        
+    Proof.
+      intros.
+      apply PTree.elements_correct in INJECTION.
+      apply In_nth_error in INJECTION.
+      destruct INJECTION as [injn INJECTION].
+      exists (inject_l_position (Pos.succ (max_pc_function f))
+                                (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) injn).
+      split.
+      { unfold transf_function in FUN.
+        destruct (valid_injections1) eqn:VALID in FUN.
+        2: discriminate.
+        inv FUN.
+        simpl.
+        apply inject_l_redirects with (l := injl); auto.
+        apply PTree.elements_keys_norepet.
+        unfold valid_injections1 in VALID.
+        rewrite forallb_forall in VALID.
+        rewrite forallb_forall.
+        intros x INx.
+        pose proof (VALID x INx) as VALIDx.
+        clear VALID.
+        rewrite andb_true_iff in VALIDx.
+        rewrite Pos.leb_le in VALIDx.
+        destruct VALIDx as [VALIDx1 VALIDx2].
+        rewrite Pos.ltb_lt.
+        lia.
+      }
+      intros.
+      pose proof (transf_function_inj_plusstep ts f tf sp m injn pc
+                 (inject_l_position (Pos.succ (max_pc_function f))
+                                    (PTree.elements (gen_injections f (max_pc_function f) (max_reg_function f))) injn)
+                 injl ii FUN INJECTION INSTR) as TRANS.
+      lapply TRANS.
+      2: reflexivity.
+      clear TRANS.
+      intro TRANS.
+      exact (TRANS trs).
+    Qed.
+
+    Lemma transf_function_preserves_uses:
+      forall f tf pc rs trs ii
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some ii),
+        trs ## (instr_uses ii) = rs ## (instr_uses ii).
+    Proof.
+      intros.
+      assert (forall r, In r (instr_uses ii) ->
+                        trs # r = rs # r) as SAME.
+      {
+        intros r INr.
+        apply MATCH.
+        apply (max_reg_function_use f pc ii); auto.
+      }
+      induction (instr_uses ii); simpl; trivial.
+      f_equal.
+      - apply SAME. constructor; trivial.
+      - apply IHl. intros.
+        apply SAME. right. assumption.
+    Qed.
+
+    (*
+    Lemma transf_function_preserves_builtin_arg:
+      forall rs trs ef res sp m pc'
+        (arg : builtin_arg reg)
+        (SAME : (forall r,
+                    In r (instr_uses (Ibuiltin ef args res pc')) ->
+                    trs # r = rs # r) )
+        varg
+        (EVAL : eval_builtin_arg ge (fun r => rs#r) sp m arg varg),
+        eval_builtin_arg ge (fun r => trs#r) sp m arg varg.
+    Proof.
+     *)
+    
+    Lemma transf_function_preserves_builtin_args_rec:
+      forall rs trs ef res sp m pc'
+        (args : list (builtin_arg reg))
+        (SAME : (forall r,
+                    In r (instr_uses (Ibuiltin ef args res pc')) ->
+                       trs # r = rs # r) )
+        (vargs : list val)
+        (EVAL : eval_builtin_args ge (fun r => rs#r) sp m args vargs),
+        eval_builtin_args ge (fun r => trs#r) sp m args vargs.
+    Proof.
+      unfold eval_builtin_args.
+      induction args; intros; inv EVAL.
+      - constructor.
+      - constructor.
+        + induction H1.
+          all: try (constructor; auto; fail).
+          * rewrite <- SAME.
+            apply eval_BA.
+            simpl.
+            left. reflexivity.
+          * constructor.
+            ** apply IHeval_builtin_arg1.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+            ** apply IHeval_builtin_arg2.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+          * constructor.
+            ** apply IHeval_builtin_arg1.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+            ** apply IHeval_builtin_arg2.
+               intros r INr.
+               apply SAME.
+               simpl.
+               simpl in INr.
+               rewrite in_app in INr.
+               rewrite in_app.
+               rewrite in_app.
+               tauto.
+        + apply IHargs.
+          2: assumption.
+          intros r INr.
+          apply SAME.
+          simpl.
+          apply in_or_app.
+          right.
+          exact INr.
+    Qed.
+    
+    Lemma transf_function_preserves_builtin_args:
+      forall f tf pc rs trs ef res sp m pc'
+        (args : list (builtin_arg reg))
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some (Ibuiltin ef args res pc'))
+        (vargs : list val)
+        (EVAL : eval_builtin_args ge (fun r => rs#r) sp m args vargs),
+        eval_builtin_args ge (fun r => trs#r) sp m args vargs.
+    Proof.
+      intros.
+      apply transf_function_preserves_builtin_args_rec with (rs := rs) (ef := ef) (res := res) (pc' := pc').
+      intros r INr.
+      apply MATCH.
+      apply (max_reg_function_use f pc (Ibuiltin ef args res pc')).
+      all: auto.
+    Qed.
+    
+    Lemma match_regs_write:
+      forall f rs trs res v
+             (MATCH : match_regs f rs trs),
+        match_regs f (rs # res <- v) (trs # res <- v).
+    Proof.
+      intros.
+      intros r LESS.
+      destruct (peq r res).
+      {
+        subst r.
+        rewrite Regmap.gss.
+        symmetry.
+        apply Regmap.gss.
+      }
+      rewrite Regmap.gso.
+      rewrite Regmap.gso.
+      all: trivial.
+      apply MATCH.
+      trivial.
+    Qed.
+
+    Lemma match_regs_setres:
+      forall f res rs trs vres
+             (MATCH : match_regs f rs trs),
+        match_regs f (regmap_setres res vres rs) (regmap_setres res vres trs).
+    Proof.
+      induction res; simpl; intros; trivial.
+      apply match_regs_write; auto.
+    Qed.
+    
+    Lemma transf_function_preserves_ros:
+      forall f tf pc rs trs ros args res fd pc' sig
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some (Icall sig ros args res pc'))
+        (FIND : find_function ge ros rs = Some fd),
+      exists tfd, find_function tge ros trs = Some tfd
+                  /\ transf_fundef gen_injections fd = OK tfd.
+    Proof.
+      intros; destruct ros as  [r|id].
+      - apply functions_translated; auto.
+        replace (trs # r) with (hd Vundef (trs ## (instr_uses (Icall sig (inl r) args res pc')))) by reflexivity.
+        rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+      - simpl. rewrite symbols_preserved.
+        simpl in FIND.
+        destruct (Genv.find_symbol ge id); try congruence.
+        eapply function_ptr_translated; eauto.
+    Qed.
+
+    Lemma transf_function_preserves_ros_tail:
+      forall f tf pc rs trs ros args fd sig
+        (FUN : transf_function gen_injections f = OK tf)
+        (MATCH : match_regs f rs trs)
+        (INSTR : (fn_code f) ! pc = Some (Itailcall sig ros args))
+        (FIND : find_function ge ros rs = Some fd),
+      exists tfd, find_function tge ros trs = Some tfd
+                  /\ transf_fundef gen_injections fd = OK tfd.
+    Proof.
+      intros; destruct ros as  [r|id].
+      - apply functions_translated; auto.
+        replace (trs # r) with (hd Vundef (trs ## (instr_uses (Itailcall sig (inl r) args)))) by reflexivity.
+        rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+      - simpl. rewrite symbols_preserved.
+        simpl in FIND.
+        destruct (Genv.find_symbol ge id); try congruence.
+        eapply function_ptr_translated; eauto.
+    Qed.
+      
+    Theorem transf_step_correct:
+      forall s1 t s2, step ge s1 t s2 ->
+         forall ts1 (MS: match_states s1 ts1),
+         exists ts2, Smallstep.plus step tge ts1 t ts2 /\ match_states s2 ts2.
+    Proof.
+      induction 1; intros ts1 MS; inv MS; try (inv TRC).
+      - (* nop *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Inop.
+               exact ALTER.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs); assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Inop.
+            rewrite transf_function_preserves with (f:=f); eauto.
+            eapply max_pc_function_sound; eauto.
+          * constructor; trivial.
+            
+      - (* op *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # res <- v).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iop with (op := op) (args := args).
+               exact ALTER.
+               rewrite eval_operation_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iop op args res pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+                 simpl.
+                 eassumption.
+               }
+               exact symbols_preserved.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # res <- v); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iop with (op := op) (args := args).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_operation_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iop op args res pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+                 simpl.
+                 eassumption.
+               }
+               exact symbols_preserved.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+
+      - (* load *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # dst <- v).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iload with (trap := trap) (chunk := chunk) (addr := addr) (args := args) (a := a).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload trap chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+               eassumption.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # dst <- v); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iload with (trap := trap) (chunk := chunk) (addr := addr) (args := args) (a := a).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload trap chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** eassumption.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+        
+      - (* load notrap1 *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # dst <- Vundef).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := args).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # dst <- Vundef); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iload_notrap1 with (chunk := chunk) (addr := addr) (args := args).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+
+      - (* load notrap2 *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m)
+             (trs := trs # dst <- Vundef).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (args := args) (a := a).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+               eassumption.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs # dst <- Vundef); trivial.
+            apply match_regs_write.
+            assumption.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Iload_notrap2 with (chunk := chunk) (addr := addr) (args := args) (a := a).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace args with (instr_uses (Iload NOTRAP chunk addr args dst pc')) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** eassumption.
+          * constructor; trivial.
+            apply match_regs_write.
+            assumption.
+        
+      - (* store *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m') (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Istore with (chunk := chunk) (addr := addr) (args := args) (a := a) (src := src).
+               exact ALTER.
+               rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace (trs ## args) with (tl (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+               replace (trs # src) with (hd Vundef (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               simpl.
+               eassumption.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** reflexivity.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := trs); trivial.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Istore with (chunk := chunk) (addr := addr) (args := args) (a := a) (src := src).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** rewrite eval_addressing_preserved with (ge1 := ge).
+               {
+                 replace (trs ## args) with (tl (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+                 rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               }
+               exact symbols_preserved.
+            ** replace (trs # src) with (hd Vundef (trs ## (instr_uses (Istore chunk addr args src pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               simpl.
+               eassumption.
+          * constructor; trivial.
+      - (* call *)
+        destruct (transf_function_preserves_ros f tf pc rs trs ros args res fd pc' (funsig fd) FUN REGS H H0) as [tfd [TFD1 TFD2]].
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          simpl in ALTER.
+          econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Icall with (args := args) (sig := (funsig fd)) (ros := ros).
+            exact ALTER.
+            exact TFD1.
+            apply sig_preserved; auto.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Icall (funsig fd) (inl r) args res pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros.
+               destruct (SKIP ts0 sp m0 trs1) as [trs2 [MATCH PLUS]].
+               exists trs2. split. assumption.
+               apply Smallstep.plus_star. exact PLUS.
+               
+            ** replace (trs ## args) with (trs ## (instr_uses (Icall (funsig fd) (inr id) args res pc'))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros.
+               destruct (SKIP ts0 sp m0 trs1) as [trs2 [MATCH PLUS]].
+               exists trs2. split. assumption.
+               apply Smallstep.plus_star. exact PLUS.
+               
+        + econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Icall with (args := args) (sig := (funsig fd)) (ros := ros).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** exact TFD1.
+            ** apply sig_preserved; auto.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Icall (funsig fd) (inl r) args res pc')))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros. exists trs1. split.
+               apply match_regs_refl. constructor.
+               
+            ** replace (trs ## args) with (trs ## (instr_uses (Icall (funsig fd) (inr id) args res pc'))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+               constructor; auto.
+               constructor; auto.
+
+               intros. exists trs1. split.
+               apply match_regs_refl. constructor.
+        
+      -  (* tailcall *)
+        destruct (transf_function_preserves_ros_tail f tf pc rs trs ros args fd (funsig fd) FUN REGS H H0) as [tfd [TFD1 TFD2]].
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          simpl in ALTER.
+          econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Itailcall with (args := args) (sig := (funsig fd)) (ros := ros).
+            exact ALTER.
+            exact TFD1.
+            apply sig_preserved; auto.
+            rewrite stacksize_preserved with (f:=f) by trivial.
+            eassumption.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Itailcall (funsig fd) (inl r) args)))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+            ** replace (trs ## args) with (trs ## (instr_uses (Itailcall (funsig fd) (inr id) args))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+        + econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Itailcall with (args := args) (sig := (funsig fd)) (ros := ros).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** exact TFD1.
+            ** apply sig_preserved; auto.
+            ** rewrite stacksize_preserved with (f:=f) by trivial.
+               eassumption.
+          * destruct ros as [r | id].
+            ** replace (trs ## args) with (tl (trs ## (instr_uses (Itailcall (funsig fd) (inl r) args)))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+            ** replace (trs ## args) with (trs ## (instr_uses (Itailcall (funsig fd) (inr id) args))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               apply match_states_call; auto.
+
+      - (* builtin *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m')
+             (trs := (regmap_setres res vres trs)).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * eapply Smallstep.plus_left.
+            ** apply exec_Ibuiltin with (ef := ef) (args := args) (res := res) (vargs := vargs).
+               *** exact ALTER.
+               *** apply eval_builtin_args_preserved with (ge1 := ge); eauto.
+                   exact symbols_preserved.
+                   apply transf_function_preserves_builtin_args with (f:=f) (tf:=tf) (pc:=pc) (rs:=rs) (ef:=ef) (res0:=res) (pc':=pc'); auto.
+               *** eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+            ** apply Smallstep.plus_star.
+               exact PLUS.
+            ** symmetry. apply E0_right.
+          * constructor; trivial.
+            apply match_regs_trans with (rs2 := (regmap_setres res vres trs)); trivial.
+            apply match_regs_setres.
+            assumption.
+        + econstructor; split.
+          * eapply Smallstep.plus_one.
+            apply exec_Ibuiltin with (ef := ef) (args := args) (res := res) (vargs := vargs).
+            ** rewrite transf_function_preserves with (f:=f); eauto.
+               eapply max_pc_function_sound; eauto.
+            ** apply eval_builtin_args_preserved with (ge1 := ge); eauto.
+               exact symbols_preserved.
+               apply transf_function_preserves_builtin_args with (f:=f) (tf:=tf) (pc:=pc) (rs:=rs) (ef:=ef) (res0:=res) (pc':=pc'); auto.
+            ** eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+          * constructor; auto.
+            apply match_regs_setres.
+            assumption.
+            
+      - (* cond *)
+        destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + destruct b eqn:B.
+          ++ exploit transf_function_redirects; eauto.
+             { eapply max_pc_function_sound; eauto. }
+             intros [pc_inj [ALTER SKIP]].
+             specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+             destruct SKIP as [trs' [MATCH PLUS]].
+             econstructor; split.
+             * eapply Smallstep.plus_left.
+               ** apply exec_Icond with (b := true) (cond := cond) (args := args) (ifso := pc_inj) (ifnot := ifnot) (predb := predb).
+                   exact ALTER.
+                   replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+                   rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+                   simpl. reflexivity.
+               ** apply Smallstep.plus_star.
+                  exact PLUS.
+               ** reflexivity.
+             * simpl. constructor; auto.
+               apply match_regs_trans with (rs2:=trs); auto.
+
+          ++ exploit transf_function_redirects; eauto.
+             { eapply max_pc_function_sound; eauto. }
+             intros [pc_inj [ALTER SKIP]].
+             specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+             destruct SKIP as [trs' [MATCH PLUS]].
+             econstructor; split.
+             * eapply Smallstep.plus_one.
+               apply exec_Icond with (b := false) (cond := cond) (args := args) (ifso := pc_inj) (ifnot := ifnot) (predb := predb).
+               exact ALTER.
+               replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               simpl. reflexivity.
+             * simpl. constructor; auto.
+        + destruct b eqn:B.
+          * econstructor; split.
+            ** eapply Smallstep.plus_one.
+               apply exec_Icond with (b := true) (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (predb := predb).
+               *** rewrite transf_function_preserves with (f:=f); eauto.
+                   eapply max_pc_function_sound; eauto.
+               *** replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+                   rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               *** reflexivity.
+            ** constructor; auto.
+          *  econstructor; split.
+            ** eapply Smallstep.plus_one.
+               apply exec_Icond with (b := false) (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (predb := predb).
+               *** rewrite transf_function_preserves with (f:=f); eauto.
+                   eapply max_pc_function_sound; eauto.
+               *** replace args with (instr_uses (Icond cond args ifso ifnot predb)) by reflexivity.
+                   rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               *** reflexivity.
+            ** constructor; auto.
+                       
+      - destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := sp) (m := m) (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ijumptable with (arg := arg) (tbl := tbl) (n := n); trivial.
+            replace (trs # arg) with (hd Vundef (trs ## (instr_uses (Ijumptable arg tbl)))) by reflexivity.
+            rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+            eassumption.
+          * constructor; trivial.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ijumptable with (arg := arg) (tbl := tbl) (n := n); trivial.
+            rewrite transf_function_preserves with (f:=f); eauto.
+            eapply max_pc_function_sound; eauto.
+            replace (trs # arg) with (hd Vundef (trs ## (instr_uses (Ijumptable arg tbl)))) by reflexivity.
+            rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+            eassumption.
+          * constructor; trivial.
+      - (* return *)
+         destruct ((gen_injections f (max_pc_function f) (max_reg_function f)) ! pc) eqn:INJECTION.
+        + exploit transf_function_redirects; eauto.
+          { eapply max_pc_function_sound; eauto. }
+          intros [pc_inj [ALTER SKIP]].
+          specialize SKIP with (ts := ts) (sp := (Vptr stk Ptrofs.zero)) (m := m) (trs := trs).
+          destruct SKIP as [trs' [MATCH PLUS]].
+          econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ireturn.
+            exact ALTER.
+            rewrite stacksize_preserved with (f:=f); eassumption.
+          * destruct or as [r | ]; simpl.
+            ** replace (trs # r) with (hd Vundef (trs ## (instr_uses (Ireturn (Some r))))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               constructor; auto.
+            ** constructor; auto.
+        + econstructor; split.
+          * apply Smallstep.plus_one.
+            apply exec_Ireturn.
+            rewrite transf_function_preserves with (f:=f); eauto.
+            eapply max_pc_function_sound; eauto.
+            rewrite stacksize_preserved with (f:=f); eassumption.
+          * destruct or as [r | ]; simpl.
+            ** replace (trs # r) with (hd Vundef (trs ## (instr_uses (Ireturn (Some r))))) by reflexivity.
+               rewrite transf_function_preserves_uses with (f := f) (tf := tf) (pc := pc) (rs := rs); trivial.
+               constructor; auto.
+            ** constructor; auto.
+
+      - (* internal call *)
+        monadInv FUN.
+        econstructor; split.
+        + apply Smallstep.plus_one.
+          apply exec_function_internal.
+          rewrite stacksize_preserved with (f:=f) by assumption.
+          eassumption.
+        + rewrite entrypoint_preserved with (f:=f)(tf:=x) by assumption.
+          constructor; auto.
+          rewrite params_preserved with (f:=f)(tf:=x) by assumption.
+          apply match_regs_refl.
+      - (* external call *)
+        monadInv FUN.
+        econstructor; split.
+        + apply Smallstep.plus_one.
+          apply exec_function_external.
+          eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+        + constructor; auto.
+        
+      - (* return *)
+        inv STACKS. inv H1.
+        destruct (STAR bl m (trs # res <- vres)) as [trs2 [MATCH' STAR']].
+        econstructor; split.
+        + eapply Smallstep.plus_left.
+          * apply exec_return.
+          * exact STAR'.
+          * reflexivity.
+        + constructor; trivial.
+          apply match_regs_trans with (rs2 := (trs # res <- vres)).
+          apply match_regs_write.
+          assumption.
+          assumption.
+    Qed.
+
+    Theorem transf_program_correct:
+      Smallstep.forward_simulation (semantics prog) (semantics tprog).
+    Proof.
+      eapply Smallstep.forward_simulation_plus.
+      apply senv_preserved.
+      eexact transf_initial_states.
+      eexact transf_final_states.
+      eexact transf_step_correct.
+    Qed.
+    
+End PRESERVATION.
+End INJECTOR.
diff --git a/backend/JsonAST.ml b/backend/JsonAST.ml
index 8905e252..c73bf30d 100644
--- a/backend/JsonAST.ml
+++ b/backend/JsonAST.ml
@@ -31,7 +31,7 @@ let pp_section pp sec =
     pp_jobject_end pp in
   match sec with
   | Section_text -> pp_simple "Text"
-  | Section_data init -> pp_complex "Data" init
+  | Section_data(init, thread_local) -> pp_complex "Data" init (* FIXME *)
   | Section_small_data init -> pp_complex "Small Data" init
   | Section_const init -> pp_complex "Const" init
   | Section_small_const init -> pp_complex "Small Const" init
diff --git a/backend/LICM.v b/backend/LICM.v
new file mode 100644
index 00000000..0a0a1c7d
--- /dev/null
+++ b/backend/LICM.v
@@ -0,0 +1,9 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+Require Inject.
+
+Axiom gen_injections : function -> node -> reg -> PTree.t (list Inject.inj_instr).
+
+Definition transf_program : program -> res program :=
+  Inject.transf_program gen_injections.
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
new file mode 100644
index 00000000..4ebc7844
--- /dev/null
+++ b/backend/LICMaux.ml
@@ -0,0 +1,252 @@
+open RTL;;
+open Camlcoq;;
+open Maps;;
+open Kildall;;
+open HashedSet;;
+open Inject;;
+
+type reg = P.t;;
+
+module Dominator =
+  struct
+    type t = Unreachable | Dominated of int | Multiple
+    let bot = Unreachable and top = Multiple
+    let beq a b =
+      match a, b with
+      | Unreachable, Unreachable
+      | Multiple, Multiple -> true
+      | (Dominated x), (Dominated y) -> x = y
+      | _ -> false
+    let lub a b =
+      match a, b with
+      | Multiple, _
+      | _, Multiple -> Multiple
+      | Unreachable, x
+      | x, Unreachable -> x
+      | (Dominated x), (Dominated y) when x=y -> a
+      | (Dominated _), (Dominated _) -> Multiple
+
+    let pp oc = function
+      | Unreachable -> output_string oc "unreachable"
+      | Multiple -> output_string oc "multiple"
+      | Dominated x -> Printf.fprintf oc "%d" x;;
+  end
+
+module Dominator_Solver = Dataflow_Solver(Dominator)(NodeSetForward)
+
+let apply_dominator (is_marked : node -> bool) (pc : node)
+      (before : Dominator.t) : Dominator.t =
+  match before with
+  | Dominator.Unreachable -> before
+  | _ ->
+     if is_marked pc
+     then Dominator.Dominated (P.to_int pc)
+     else before;;
+
+let dominated_parts1 (f : coq_function) :
+      (bool PTree.t) * (Dominator.t PMap.t option) =
+  let headers = Duplicateaux.get_loop_headers f.fn_code f.fn_entrypoint in
+  let dominated = Dominator_Solver.fixpoint f.fn_code RTL.successors_instr
+    (apply_dominator (fun pc -> match PTree.get pc headers with
+                                | Some x -> x
+                                | None -> false)) f.fn_entrypoint
+    Dominator.top in
+  (headers, dominated);;
+
+let dominated_parts (f : coq_function) : Dominator.t PMap.t * PSet.t PTree.t =
+  let (headers, dominated) = dominated_parts1 f in
+  match dominated with
+  | None -> failwith "dominated_parts 1"
+  | Some dominated ->
+     let singletons =
+       PTree.fold (fun before pc flag ->
+         if flag
+         then PTree.set pc (PSet.add pc PSet.empty) before
+         else before) headers PTree.empty in
+     (dominated,
+     PTree.fold (fun before pc ii ->
+         match PMap.get pc dominated with
+         | Dominator.Dominated x ->
+            let px = P.of_int x in
+            (match PTree.get px before with
+             | None -> failwith "dominated_parts 2"
+             | Some old ->
+                PTree.set px (PSet.add pc old) before)
+         | _ -> before) f.fn_code singletons);;
+
+let graph_traversal (initial_node : P.t)
+  (successor_iterator : P.t -> (P.t -> unit) -> unit) : PSet.t =
+  let seen = ref PSet.empty
+  and stack = Stack.create () in
+  Stack.push initial_node stack;
+  while not (Stack.is_empty stack)
+  do
+    let vertex = Stack.pop stack in
+    if not (PSet.contains !seen vertex)
+    then
+      begin
+        seen := PSet.add vertex !seen;
+        successor_iterator vertex (fun x -> Stack.push x stack) 
+      end
+  done;
+  !seen;;
+
+let filter_dominated_part (predecessors : P.t list PTree.t)
+      (header : P.t) (dominated_part : PSet.t) =
+  graph_traversal header
+    (fun (vertex : P.t) (f : P.t -> unit) ->
+      match PTree.get vertex predecessors with
+      | None -> ()
+      | Some l ->
+         List.iter
+           (fun x ->
+             if PSet.contains dominated_part x
+             then f x) l
+    );;
+
+let inner_loops (f : coq_function) =
+  let (dominated, parts) = dominated_parts f
+  and predecessors = Kildall.make_predecessors f.fn_code RTL.successors_instr in
+  (dominated, predecessors, PTree.map (filter_dominated_part predecessors) parts);;
+
+let map_reg mapper r =
+  match PTree.get r mapper with
+  | None -> r
+  | Some x -> x;;
+
+let rewrite_loop_body (last_alloc : reg ref)
+      (insns : RTL.code) (header : P.t) (loop_body : PSet.t) =
+  let seen = ref PSet.empty
+  and stack = Stack.create ()
+  and rewritten = ref [] in
+  let add_inj ii = rewritten := ii::!rewritten in
+  Stack.push (header, PTree.empty) stack;
+  while not (Stack.is_empty stack)
+  do
+    let (pc, mapper) = Stack.pop stack in
+    if not (PSet.contains !seen pc)
+    then
+      begin
+        seen := PSet.add pc !seen;
+        match PTree.get pc insns with
+        | None -> ()
+        | Some ii ->
+           let mapper' =
+             match ii with
+             | Iop(op, args, res, pc') when not (Op.is_trapping_op op) ->
+                let new_res = P.succ !last_alloc in
+                last_alloc := new_res;
+                add_inj (INJop(op,
+                               (List.map (map_reg mapper) args),
+                               new_res));
+                PTree.set res new_res mapper
+             | Iload(trap, chunk, addr, args, res, pc')
+                  when Archi.has_notrap_loads &&
+                       !Clflags.option_fnontrap_loads ->
+                let new_res = P.succ !last_alloc in
+                last_alloc := new_res;
+                add_inj (INJload(chunk, addr,
+                                 (List.map (map_reg mapper) args),
+                                 new_res));
+                PTree.set res new_res mapper
+             | _ -> mapper in
+           List.iter (fun x ->
+               if PSet.contains loop_body x
+               then Stack.push (x, mapper') stack)
+             (successors_instr ii)
+      end
+  done;
+  List.rev !rewritten;;
+
+let pp_inj_instr (oc : out_channel) (ii : inj_instr) =
+  match ii with
+  | INJnop -> output_string oc "nop"
+  | INJop(op, args, res) ->
+     Printf.fprintf oc "%a = %a"
+       PrintRTL.reg res (PrintOp.print_operation PrintRTL.reg) (op, args)
+  | INJload(chunk, addr, args, dst) ->
+     Printf.fprintf oc "%a = %s[%a]"
+       PrintRTL.reg dst (PrintAST.name_of_chunk chunk)
+         (PrintOp.print_addressing PrintRTL.reg) (addr, args);;
+
+let pp_inj_list (oc : out_channel) (l : inj_instr list) =
+  List.iter (Printf.fprintf oc "%a; " pp_inj_instr) l;;
+
+let pp_injections (oc : out_channel) (injections : inj_instr list PTree.t) =
+  List.iter
+    (fun (pc, injl) ->
+      Printf.fprintf oc "%d : %a\n" (P.to_int pc) pp_inj_list injl)
+    (PTree.elements injections);;
+
+let compute_injections1 (f : coq_function) =
+  let (dominated, predecessors, loop_bodies) = inner_loops f
+  and last_alloc = ref (max_reg_function f) in
+  (dominated, predecessors,
+   PTree.map (fun header body ->
+    (body, rewrite_loop_body last_alloc f.fn_code header body)) loop_bodies);;
+
+let compute_injections (f : coq_function) : inj_instr list PTree.t =
+  let (dominated, predecessors, injections) = compute_injections1 f in
+  let output_map = ref PTree.empty in
+  List.iter
+    (fun (header, (body, inj)) ->
+      match PTree.get header predecessors with
+      | None -> failwith "compute_injections"
+      | Some l ->
+         List.iter (fun predecessor ->
+             if (PMap.get predecessor dominated)<>Dominator.Unreachable &&
+                  not (PSet.contains body predecessor)
+             then output_map := PTree.set predecessor inj !output_map) l)
+    (PTree.elements injections);
+  !output_map;;
+  
+let pp_list pp_item oc l =
+  output_string oc "{ ";
+  let first = ref true in
+  List.iter (fun x ->
+      (if !first
+       then first := false
+       else output_string oc ", ");
+      pp_item oc x) l;
+  output_string oc " }";;
+
+let pp_pset oc s =
+  pp_list (fun oc -> Printf.fprintf oc "%d") oc
+    (List.sort (fun x y -> y - x) (List.map P.to_int (PSet.elements s)));;
+
+let print_dominated_parts oc f =
+  List.iter (fun (header, nodes) ->
+      Printf.fprintf oc "%d : %a\n" (P.to_int header) pp_pset nodes)
+    (PTree.elements (snd (dominated_parts f)));;
+
+let print_inner_loops oc f =
+  List.iter (fun (header, nodes) ->
+      Printf.fprintf oc "%d : %a\n" (P.to_int header) pp_pset nodes)
+    (PTree.elements (let (_,_,l) = (inner_loops f) in l));;
+
+let print_dominated_parts1 oc f =
+  match snd (dominated_parts1 f) with
+  | None -> output_string oc "error\n"
+  | Some parts ->
+     List.iter
+       (fun (pc, instr) ->
+         Printf.fprintf oc "%d : %a\n" (P.to_int pc) Dominator.pp
+           (PMap.get pc parts)
+       )
+       (PTree.elements f.fn_code);;
+  
+let loop_headers (f : coq_function) : RTL.node list =
+  List.map fst (List.filter snd (PTree.elements (Duplicateaux.get_loop_headers f.fn_code f.fn_entrypoint)));;
+
+let print_loop_headers f =
+  print_endline "Loop headers";
+  List.iter
+    (fun i -> Printf.printf "%d " (P.to_int i))
+    (loop_headers f);
+  print_newline ();;
+
+let gen_injections (f : coq_function) (coq_max_pc : node) (coq_max_reg : reg):
+      (Inject.inj_instr list) PTree.t =
+  let injections = compute_injections f in
+  (* let () = pp_injections stdout injections in *)
+  injections;;
diff --git a/backend/LICMproof.v b/backend/LICMproof.v
new file mode 100644
index 00000000..2b76b668
--- /dev/null
+++ b/backend/LICMproof.v
@@ -0,0 +1,27 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+Require Import LICM.
+Require Injectproof.
+
+Definition match_prog : program -> program -> Prop :=
+  Injectproof.match_prog gen_injections.
+
+Section PRESERVATION.
+
+  Variables prog tprog: program.
+  Hypothesis TRANSF: match_prog prog tprog.
+
+  Lemma transf_program_match:
+    forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+  Proof.
+    intros. eapply match_transform_partial_program_contextual; eauto.
+  Qed.
+  
+  Theorem transf_program_correct :
+    Smallstep.forward_simulation (semantics prog) (semantics tprog).
+  Proof.
+    apply Injectproof.transf_program_correct with (gen_injections := gen_injections).
+    exact TRANSF.
+  Qed.
+End PRESERVATION.
diff --git a/backend/PrintAsm.ml b/backend/PrintAsm.ml
index 155f5e55..0635e32d 100644
--- a/backend/PrintAsm.ml
+++ b/backend/PrintAsm.ml
@@ -121,7 +121,7 @@ module Printer(Target:TARGET) =
           let sec =
             match C2C.atom_sections name with
             | [s] -> s
-            |  _  -> Section_data true
+            |  _  -> Section_data (true, false)
           and align =
             match C2C.atom_alignof name with
             | Some a -> a
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index d82e6f84..7fa10aee 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -111,6 +111,10 @@ let elf_symbol_offset oc (symb, ofs) =
   if ofs <> 0L then fprintf oc " + %Ld" ofs
 
 (* Functions for fun and var info *)
+let elf_text_print_fun_info oc name =
+  fprintf oc "	.type	%s, @function\n" name;
+  fprintf oc "	.size	%s, . - %s\n" name name
+
 let elf_print_fun_info oc name =
   fprintf oc "	.type	%a, @function\n" elf_symbol name;
   fprintf oc "	.size	%a, . - %a\n" elf_symbol name elf_symbol name
@@ -303,6 +307,7 @@ let print_version_and_options oc comment =
     fprintf oc " %s" Commandline.argv.(i)
   done;
   fprintf oc "\n"
+  
 (** Get the name of the common section if it is used otherwise the given section
     name, with bss as default *)
 
@@ -310,4 +315,84 @@ let common_section ?(sec = ".bss") () =
   if !Clflags.option_fcommon then
     "COMM"
   else
-    sec
+    sec;;
+
+(* Profiling *)
+let profiling_table : (Digest.t, int) Hashtbl.t = Hashtbl.create 1000;;
+let next_profiling_position = ref 0;;
+let profiling_position (x : Digest.t) : int =
+  match Hashtbl.find_opt profiling_table x with
+  | None -> let y = !next_profiling_position in
+            next_profiling_position := succ y;
+            Hashtbl.replace profiling_table x y;
+            y
+  | Some y -> y;;
+
+let profiling_ids () =
+  let nr_items = !next_profiling_position in
+  let ar = Array.make nr_items "" in
+  Hashtbl.iter
+    (fun x y -> ar.(y) <- x)
+    profiling_table;
+  ar;;
+
+let print_profiling_id oc id =
+  assert (String.length id = 16);
+  output_string oc "	.byte";
+  for i=0 to 15 do
+    fprintf oc " 0x%02x" (Char.code (String.get id i));
+    if i < 15 then output_char oc ','
+  done;
+  output_char oc '\n';;
+
+let profiling_counter_table_name = ".compcert_profiling_counters"
+and profiling_id_table_name = ".compcert_profiling_ids"
+and profiling_write_table = ".compcert_profiling_save_for_this_object"
+and profiling_init = ".compcert_profiling_init"
+and profiling_write_table_helper = "_compcert_write_profiling_table"
+and dtor_section = ".dtors.65435,\"aw\",@progbits"
+(* and fini_section = ".fini_array_00100,\"aw\"" *)
+and init_section = ".init_array,\"aw\"";;
+
+type finalizer_call_method =
+  | Dtors
+  | Init_atexit of (out_channel -> string -> unit);;
+
+let write_symbol_pointer oc sym =
+  if Archi.ptr64
+  then fprintf oc "	.8byte	%s\n" sym
+  else fprintf oc "	.4byte	%s\n" sym;;
+
+let print_profiling_epilogue declare_function finalizer_call_method print_profiling_stub oc =
+  if !Clflags.option_profile_arcs
+  then
+    let nr_items = !next_profiling_position in
+    if nr_items > 0
+    then
+      begin
+        fprintf oc "	.lcomm	%s, %d\n"
+          profiling_counter_table_name (nr_items * 16);
+        fprintf oc "	.section	.rodata\n";
+        fprintf oc "%s:\n" profiling_id_table_name;
+        Array.iter (print_profiling_id oc) (profiling_ids ());
+        fprintf oc "	.text\n";
+        fprintf oc "%s:\n" profiling_write_table;
+        print_profiling_stub oc nr_items
+          profiling_id_table_name
+          profiling_counter_table_name;
+        declare_function oc profiling_write_table;
+        match finalizer_call_method with
+        | Dtors ->
+           fprintf oc "	.section	%s\n" dtor_section;
+           write_symbol_pointer oc profiling_write_table
+        | Init_atexit(atexit_call) ->
+           fprintf oc "	.section	%s\n" init_section;
+           write_symbol_pointer oc profiling_init;
+           fprintf oc "	.text\n";
+           fprintf oc "%s:\n" profiling_init;
+           atexit_call oc profiling_write_table;
+           declare_function oc profiling_init
+      end;;
+
+let profiling_offset id kind =
+  ((profiling_position id)*2 + kind)*8;;
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index c9a6d399..051225a4 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -34,6 +34,7 @@ let precedence = function
   | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddfs|Osubfs|Oaddl|Osubl), _, _) -> (12, LtoR)
   | Ebinop((Oshl|Oshr|Oshru|Oshll|Oshrl|Oshrlu), _, _) -> (11, LtoR)
   | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpfs _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
+  | Ebinop((Oexpect _), _, _) -> (9, LtoR)
   | Ebinop((Oand|Oandl), _, _) -> (8, LtoR)
   | Ebinop((Oxor|Oxorl), _, _) -> (7, LtoR)
   | Ebinop((Oor|Oorl), _, _) -> (6, LtoR)
@@ -89,6 +90,7 @@ let comparison_name = function
   | Cge -> ">="
 
 let name_of_binop = function
+  | Oexpect _ -> "expect"
   | Oadd -> "+"
   | Osub -> "-"
   | Omul -> "*"
diff --git a/backend/Profiling.v b/backend/Profiling.v
new file mode 100644
index 00000000..4cba49ee
--- /dev/null
+++ b/backend/Profiling.v
@@ -0,0 +1,65 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Parameter function_id : function -> AST.profiling_id.
+Parameter branch_id : AST.profiling_id -> node -> AST.profiling_id.
+
+Section PER_FUNCTION_ID.
+  Variable f_id : AST.profiling_id.
+  
+  Definition inject_profiling_call (prog : code)
+             (pc extra_pc ifso ifnot : node) : node * code :=
+    let id := branch_id f_id pc in
+    let extra_pc' := Pos.succ extra_pc in
+    let prog' := PTree.set extra_pc
+                           (Ibuiltin (EF_profiling id 0%Z) nil BR_none ifnot) prog in
+    let prog'':= PTree.set extra_pc'
+                           (Ibuiltin (EF_profiling id 1%Z) nil BR_none ifso) prog' in
+    (Pos.succ extra_pc', prog'').
+  
+  Definition inject_at (prog : code) (pc extra_pc : node) : node * code :=
+    match PTree.get pc prog with
+    | Some (Icond cond args ifso ifnot expected) =>
+      inject_profiling_call
+        (PTree.set pc
+                   (Icond cond args (Pos.succ extra_pc) extra_pc expected) prog)
+        pc extra_pc ifso ifnot
+    | _ => inject_profiling_call prog pc extra_pc 1 1 (* does not happen *)
+    end.
+
+  Definition inject_at' (already : node * code) pc :=
+    let (extra_pc, prog) := already in
+    inject_at prog pc extra_pc.
+
+  Definition inject_l (prog : code) extra_pc injections :=
+    List.fold_left (fun already (inject_pc : node) =>
+                      inject_at' already inject_pc)
+                   injections
+                   (extra_pc, prog).
+
+  Definition gen_conditions (prog : code) :=
+    List.map fst (PTree.elements (PTree.filter1
+                                    (fun instr =>
+                                       match instr with
+                                       | Icond cond args ifso ifnot expected => true
+                                       | _ => false
+                                       end) prog)).
+End PER_FUNCTION_ID.
+
+Definition transf_function (f : function) : function :=
+  let max_pc := max_pc_function f in
+  let conditions := gen_conditions (fn_code f) in
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := snd (inject_l (function_id f) (fn_code f) (Pos.succ max_pc) conditions);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ProfilingExploit.v b/backend/ProfilingExploit.v
new file mode 100644
index 00000000..cfca1a12
--- /dev/null
+++ b/backend/ProfilingExploit.v
@@ -0,0 +1,30 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+Local Open Scope positive.
+
+Parameter function_id : function -> AST.profiling_id.
+Parameter branch_id : AST.profiling_id -> node -> AST.profiling_id.
+Parameter condition_oracle : AST.profiling_id -> option bool.
+
+Definition transf_instr (f_id : AST.profiling_id)
+           (pc : node) (i : instruction) : instruction :=
+  match i with
+  | Icond cond args ifso ifnot None =>
+    Icond cond args ifso ifnot (condition_oracle (branch_id f_id pc))
+  | _ => i
+  end.
+
+Definition transf_function (f : function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (function_id f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ProfilingExploitproof.v b/backend/ProfilingExploitproof.v
new file mode 100644
index 00000000..bc68c38e
--- /dev/null
+++ b/backend/ProfilingExploitproof.v
@@ -0,0 +1,224 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ProfilingExploit.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr (function_id f) pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+  | match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  constructor; auto. 
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. constructor; auto. constructor.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* cond *)
+- destruct predb.
+  + econstructor; split.
+    eapply exec_Icond; eauto.
+    constructor; auto.
+  + simpl transf_instr in H1.
+    destruct condition_oracle in H1.
+    * econstructor; split.
+      eapply exec_Icond; eauto.
+      constructor; auto.
+    * econstructor; split.
+      eapply exec_Icond; eauto.
+      constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Profilingaux.ml b/backend/Profilingaux.ml
new file mode 100644
index 00000000..ec0ae304
--- /dev/null
+++ b/backend/Profilingaux.ml
@@ -0,0 +1,73 @@
+open Camlcoq
+open RTL
+open Maps
+   
+type identifier = Digest.t
+
+let pp_id channel (x : identifier) =
+  assert(String.length x = 16);
+  for i=0 to 15 do
+    Printf.fprintf channel "%02x" (Char.code (String.get x i))
+  done
+
+let print_anonymous_function pp f =
+  let instrs =
+    List.sort
+      (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+      (List.rev_map
+        (fun (pc, i) -> (P.to_int pc, i))
+        (PTree.elements f.fn_code)) in
+  PrintRTL.print_succ pp f.fn_entrypoint
+    (match instrs with (pc1, _) :: _ -> pc1 | [] -> -1);
+  List.iter (PrintRTL.print_instruction pp) instrs;
+  Printf.fprintf pp "}\n\n"
+  
+let function_id (f : coq_function) : identifier =
+  let digest = Digest.string (Marshal.to_string f []) in
+  (*
+  Printf.fprintf stderr "FUNCTION hash = %a\n" pp_id digest;
+  print_anonymous_function stderr f;
+   *)
+  digest
+
+let branch_id (f_id : identifier) (node : P.t) : identifier =
+  Digest.string (f_id ^ (Int64.to_string (P.to_int64 node)));;
+
+let profiling_counts : (identifier, (Int64.t*Int64.t)) Hashtbl.t = Hashtbl.create 1000;;
+
+let get_counts id =
+  match Hashtbl.find_opt profiling_counts id with
+  | Some x -> x
+  | None -> (0L, 0L);;
+  
+let add_profiling_counts id counter0 counter1 =
+  let (old0, old1) = get_counts id in
+    Hashtbl.replace profiling_counts id (Int64.add old0 counter0,
+                                         Int64.add old1 counter1);;
+
+let input_counter (ic : in_channel) : Int64.t =
+  let r = ref Int64.zero in
+  for i=0 to 7
+  do
+    r := Int64.add !r (Int64.shift_left (Int64.of_int (input_byte ic)) (8*i))
+  done;
+  !r;;
+  
+let load_profiling_info (filename : string) : unit =
+  let ic = open_in filename in
+  try
+    while true do
+      let id : identifier = really_input_string ic 16 in
+      let counter0 = input_counter ic in
+      let counter1 = input_counter ic in
+      (* Printf.fprintf stderr "%a : %Ld %Ld\n" pp_id id counter0 counter1 *)
+      add_profiling_counts id counter0 counter1
+    done
+  with End_of_file -> close_in ic;;
+
+let condition_oracle (id : identifier) : bool option =
+  let (count0, count1) = get_counts id in
+  (* (if count0 <> 0L || count1 <> 0L then
+    Printf.fprintf stderr "%a : %Ld %Ld\n" pp_id id count0 count1); *)
+  if count0 = count1 then None
+  else Some(count1 > count0);;
diff --git a/backend/Profilingproof.v b/backend/Profilingproof.v
new file mode 100644
index 00000000..fc04c77e
--- /dev/null
+++ b/backend/Profilingproof.v
@@ -0,0 +1,687 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import Profiling.
+Require Import Lia.
+
+Local Open Scope positive.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma pair_expand:
+  forall { A B : Type } (p : A*B),
+    p = ((fst p), (snd p)).
+Proof.
+  destruct p; simpl; trivial.
+Qed.
+
+Lemma inject_profiling_call_preserves:
+  forall id body pc extra_pc ifso ifnot pc0,
+    pc0 < extra_pc ->
+    PTree.get pc0 (snd (inject_profiling_call id body pc extra_pc ifso ifnot)) = PTree.get pc0 body.
+Proof.
+  intros. simpl.
+  rewrite PTree.gso by lia.
+  apply PTree.gso.
+  lia.
+Qed.
+    
+Lemma inject_at_preserves :
+  forall id body pc extra_pc pc0,
+    pc0 < extra_pc ->
+    pc0 <> pc ->
+    PTree.get pc0 (snd (inject_at id body pc extra_pc)) = PTree.get pc0 body.
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc body) eqn:GET.
+  - destruct i.
+    all: try (rewrite inject_profiling_call_preserves; trivial; fail).
+    rewrite inject_profiling_call_preserves by trivial.
+    apply PTree.gso; lia.
+  - apply inject_profiling_call_preserves; trivial.
+Qed.
+
+Lemma inject_profiling_call_increases:
+  forall id body pc extra_pc ifso ifnot,
+    fst (inject_profiling_call id body pc extra_pc ifso ifnot) = extra_pc + 2.
+Proof.
+  intros.
+  simpl.
+  lia.
+Qed.
+
+Lemma inject_at_increases:
+  forall id body pc extra_pc,
+    (fst (inject_at id body pc extra_pc)) = extra_pc + 2.
+Proof.
+  intros. unfold inject_at.
+  destruct (PTree.get pc body).
+  - destruct i; apply inject_profiling_call_increases.
+  - apply inject_profiling_call_increases.
+Qed.
+
+Lemma inject_l_preserves :
+  forall id injections body extra_pc pc0,
+    pc0 < extra_pc ->
+    List.forallb (fun injection => if peq injection pc0 then false else true) injections = true ->
+    PTree.get pc0 (snd (inject_l id body extra_pc injections)) = PTree.get pc0 body.
+Proof.
+  induction injections;
+    intros until pc0; intros BEFORE ALL; simpl; trivial.
+  unfold inject_l.
+  simpl in ALL.
+  rewrite andb_true_iff in ALL.
+  destruct ALL as [NEQ ALL].
+  simpl.
+  rewrite pair_expand with (p := inject_at id body a extra_pc).
+  progress fold (inject_l id (snd (inject_at id body a extra_pc))
+              (fst (inject_at id body a extra_pc))
+              injections).
+  rewrite IHinjections; trivial.
+  - apply inject_at_preserves; trivial.
+    destruct (peq a pc0); congruence.
+  - rewrite inject_at_increases.
+    lia.
+Qed.
+
+Fixpoint inject_l_position extra_pc
+         (injections : list node)
+         (k : nat) {struct injections} : node :=
+  match injections with
+  | nil => extra_pc
+  | pc::l' =>
+    match k with
+    | O => extra_pc
+    | S k' => inject_l_position (extra_pc + 2) l' k'
+    end
+  end.
+
+Lemma inject_l_position_increases : forall injections pc k,
+    pc <= inject_l_position pc injections k.
+Proof.
+  induction injections; simpl; intros.
+  lia.
+  destruct k.
+  lia.
+  specialize IHinjections with (pc := pc + 2) (k := k).
+  lia.
+Qed.
+
+Lemma inject_l_injected_pc:
+  forall f_id injections cond args ifso ifnot expected body injnum pc extra_pc
+         (INSTR : body ! pc = Some  (Icond cond args ifso ifnot expected))
+         (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+         (NOREPET : list_norepet injections)
+         (NUMBER : nth_error injections injnum = Some pc),
+    PTree.get pc (snd (inject_l f_id body extra_pc injections)) =
+    Some (Icond cond args
+                (Pos.succ (inject_l_position extra_pc injections injnum))
+                (inject_l_position extra_pc injections injnum) expected).
+Proof.
+  induction injections; simpl; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER. }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  rewrite Pos.ltb_lt in BELOW1.
+  unfold inject_l.
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+  progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+              (fst (inject_at f_id body a extra_pc))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - unfold inject_at.
+      rewrite INSTR.
+      unfold inject_profiling_call. simpl.
+      rewrite PTree.gso by lia.
+      rewrite PTree.gso by lia.
+      apply PTree.gss.
+    - rewrite inject_at_increases.
+      lia.
+    - inv NOREPET.
+      rewrite forallb_forall.
+      intros x IN.
+      destruct peq as [EQ | ]; trivial.
+      subst x.
+      contradiction.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  apply IHinjections with (ifso := ifso) (ifnot := ifnot).
+  - rewrite inject_at_preserves; trivial.
+    + rewrite forallb_forall in BELOW2.
+      rewrite <- Pos.ltb_lt.
+      apply nth_error_In in NUMBER.
+      auto.
+    + inv NOREPET.
+      intro ZZZ.
+      subst a.
+      apply nth_error_In in NUMBER.
+      auto.
+
+  - rewrite forallb_forall in BELOW2.
+    rewrite forallb_forall.
+    intros.
+    specialize BELOW2 with x.
+    rewrite Pos.ltb_lt in *.
+    intuition lia.
+  - inv NOREPET. trivial.
+  - trivial.
+Qed.
+ 
+Lemma inject_l_injected0:
+  forall f_id  cond args ifso ifnot expected injections body injnum pc extra_pc
+         (INSTR : body ! pc = Some  (Icond cond args ifso ifnot expected))
+         (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+         (NOREPET : list_norepet injections)
+         (NUMBER : nth_error injections injnum = Some pc),
+    PTree.get (inject_l_position extra_pc injections injnum)
+              (snd (inject_l f_id body extra_pc injections)) =
+    Some (Ibuiltin (EF_profiling (branch_id f_id pc) 0%Z) nil BR_none ifnot).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER. }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+  progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+              (fst (inject_at f_id body a extra_pc))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - unfold inject_at.
+      rewrite INSTR.
+      unfold inject_profiling_call. simpl.
+      rewrite PTree.gso by lia.
+      apply PTree.gss.
+    - rewrite inject_at_increases.
+      lia.
+    -  rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  
+  apply IHinjections.
+  - rewrite inject_at_preserves; trivial.
+    + rewrite forallb_forall in BELOW2.
+      rewrite <- Pos.ltb_lt.
+      apply nth_error_In in NUMBER.
+      auto.
+    + inv NOREPET.
+      intro ZZZ.
+      subst a.
+      apply nth_error_In in NUMBER.
+      auto.
+
+  - rewrite forallb_forall in BELOW2.
+    rewrite forallb_forall.
+    intros.
+    specialize BELOW2 with x.
+    rewrite Pos.ltb_lt in *.
+    intuition lia.
+  - inv NOREPET. trivial.
+  - trivial.
+Qed.
+
+Lemma inject_l_injected1:
+  forall f_id  cond args ifso ifnot expected injections body injnum pc extra_pc
+         (INSTR : body ! pc = Some  (Icond cond args ifso ifnot expected))
+         (BELOW : forallb (fun pc => pc <? extra_pc) injections = true)
+         (NOREPET : list_norepet injections)
+         (NUMBER : nth_error injections injnum = Some pc),
+    PTree.get (Pos.succ (inject_l_position extra_pc injections injnum))
+              (snd (inject_l f_id body extra_pc injections)) =
+    Some (Ibuiltin (EF_profiling (branch_id f_id pc) 1%Z) nil BR_none ifso).
+Proof.
+  induction injections; intros.
+  { rewrite nth_error_nil in NUMBER.
+    discriminate NUMBER. }
+  simpl in BELOW.
+  rewrite andb_true_iff in BELOW.
+  destruct BELOW as [BELOW1 BELOW2].
+  unfold inject_l.
+  simpl fold_left.
+  rewrite pair_expand with (p := inject_at f_id body a extra_pc).
+  progress fold (inject_l f_id (snd (inject_at f_id body a extra_pc))
+              (fst (inject_at f_id body a extra_pc))
+              injections).
+  destruct injnum as [ | injnum']; simpl in NUMBER.
+  { inv NUMBER.
+    rewrite inject_l_preserves; simpl.
+    - unfold inject_at.
+      rewrite INSTR.
+      unfold inject_profiling_call. simpl.
+      apply PTree.gss.
+    - rewrite inject_at_increases.
+      lia.
+    -  rewrite forallb_forall.
+      rewrite forallb_forall in BELOW2.
+      intros loc IN.
+      specialize BELOW2 with loc.
+      apply BELOW2 in IN.
+      destruct peq as [EQ | ]; trivial.
+      rewrite EQ in IN.
+      rewrite Pos.ltb_lt in IN.
+      lia.
+  }
+  simpl.
+  rewrite inject_at_increases.
+  
+  apply IHinjections.
+  - rewrite inject_at_preserves; trivial.
+    + rewrite forallb_forall in BELOW2.
+      rewrite <- Pos.ltb_lt.
+      apply nth_error_In in NUMBER.
+      auto.
+    + inv NOREPET.
+      intro ZZZ.
+      subst a.
+      apply nth_error_In in NUMBER.
+      auto.
+
+  - rewrite forallb_forall in BELOW2.
+    rewrite forallb_forall.
+    intros.
+    specialize BELOW2 with x.
+    rewrite Pos.ltb_lt in *.
+    intuition lia.
+  - inv NOREPET. trivial.
+  - trivial.
+Qed.
+  
+Lemma transf_function_at:
+  forall f pc i
+    (CODE : f.(fn_code)!pc = Some i)
+    (INSTR : match i with
+             | Icond _ _ _ _ _ => False
+             | _ => True
+             end),
+    (transf_function f).(fn_code)!pc = Some i.
+Proof.
+  intros.
+  unfold transf_function; simpl.
+  rewrite inject_l_preserves.
+  assumption.
+  - pose proof (max_pc_function_sound f pc i CODE) as LE.
+    unfold Ple in LE.
+    lia.
+  - rewrite forallb_forall.
+    intros x IN.
+    destruct peq; trivial.
+    subst x.
+    unfold gen_conditions in IN.
+    rewrite in_map_iff in IN.
+    destruct IN as [[pc' i'] [EQ IN]].
+    simpl in EQ.
+    subst pc'.
+    apply PTree.elements_complete in IN.
+    rewrite PTree.gfilter1 in IN.
+    rewrite CODE in IN.
+    destruct i; try discriminate; contradiction.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma funsig_preserved:
+  forall fd,
+    funsig (transf_fundef fd) = funsig fd.
+Proof.
+  destruct fd; simpl; trivial.
+Qed.
+
+Lemma stacksize_preserved:
+  forall f,
+    fn_stacksize (transf_function f) = fn_stacksize f.
+Proof.
+  destruct f; simpl; trivial.
+Qed.
+
+Hint Resolve symbols_preserved funsig_preserved external_call_symbols_preserved senv_preserved stacksize_preserved : profiling.
+
+Lemma step_simulation:
+  forall s1 t s2 (STEP : step ge s1 t s2)
+  s1' (MS: match_states s1 s1'),
+  exists s2', plus step tge s1' t s2' /\ match_states s2 s2'.
+Proof.
+  induction 1; intros; inv MS.
+  - econstructor; split.
+    + apply plus_one. apply exec_Inop.
+      erewrite transf_function_at; eauto. apply I.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Iop with (op:=op) (args:=args).
+      * erewrite transf_function_at; eauto. apply I.
+      * rewrite eval_operation_preserved with (ge1:=ge);
+          eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Iload with (trap:=trap) (chunk:=chunk)
+                                            (addr:=addr) (args:=args) (a:=a).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Iload_notrap1 with (chunk:=chunk)
+                                            (addr:=addr) (args:=args).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  -  econstructor; split.
+    + apply plus_one. apply exec_Iload_notrap2 with (chunk:=chunk)
+                                            (addr:=addr) (args:=args) (a:=a).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  -  econstructor; split.
+    + apply plus_one. apply exec_Istore with (chunk:=chunk) (src := src)
+                                            (addr:=addr) (args:=args) (a:=a).
+      erewrite transf_function_at; eauto. apply I.
+      rewrite eval_addressing_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_Icall with (sig:=(funsig fd)) (ros:=ros).
+      erewrite transf_function_at; eauto. apply I.
+      apply find_function_translated with (fd := fd).
+      all: eauto with profiling.
+    + constructor; auto.
+      constructor; auto.
+      constructor.
+  - econstructor; split.
+    + apply plus_one. apply exec_Itailcall with (sig:=(funsig fd)) (ros:=ros).
+      erewrite transf_function_at; eauto. apply I.
+      apply find_function_translated with (fd := fd).
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one.
+      apply exec_Ibuiltin with (ef:=ef) (args:=args) (vargs:=vargs).
+      erewrite transf_function_at; eauto. apply I.
+      apply eval_builtin_args_preserved with (ge1:=ge).
+      all: eauto with profiling.
+    + constructor; auto.
+  - destruct b.
+    + assert (In pc (gen_conditions (fn_code f))) as IN.
+              { unfold gen_conditions.
+                rewrite in_map_iff.
+                exists (pc,  (Icond cond args ifso ifnot predb)).
+                split; simpl; trivial.
+                apply PTree.elements_correct.
+                rewrite PTree.gfilter1.
+                rewrite H.
+                reflexivity.
+              }
+      apply In_nth_error in IN.
+      destruct IN as [n IN].
+      econstructor; split.
+      * eapply plus_two.
+        ++ eapply exec_Icond with (cond := cond) (args := args) (predb := predb) (b := true).
+           unfold transf_function. simpl.
+           erewrite inject_l_injected_pc with (cond := cond) (args := args).
+           ** reflexivity.
+           ** eassumption.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** assumption.
+           ** reflexivity.
+        ++ apply exec_Ibuiltin with (ef :=  (EF_profiling (branch_id (function_id f) pc) 1%Z)) (args := nil) (vargs := nil).
+           apply inject_l_injected1 with (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (expected := predb).
+           ** exact H.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** constructor.
+           ** constructor.
+        ++ reflexivity.
+      * simpl. constructor; auto.
+        
+    + assert (In pc (gen_conditions (fn_code f))) as IN.
+              { unfold gen_conditions.
+                rewrite in_map_iff.
+                exists (pc,  (Icond cond args ifso ifnot predb)).
+                split; simpl; trivial.
+                apply PTree.elements_correct.
+                rewrite PTree.gfilter1.
+                rewrite H.
+                reflexivity.
+              }
+      apply In_nth_error in IN.
+      destruct IN as [n IN].
+      econstructor; split.
+      * eapply plus_two.
+        ++ eapply exec_Icond with (cond := cond) (args := args) (predb := predb) (b := false).
+           unfold transf_function. simpl.
+           erewrite inject_l_injected_pc with (cond := cond) (args := args).
+           ** reflexivity.
+           ** eassumption.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** assumption.
+           ** reflexivity.
+        ++ apply exec_Ibuiltin with (ef :=  (EF_profiling (branch_id (function_id f) pc) 0%Z)) (args := nil) (vargs := nil).
+           apply inject_l_injected0 with (cond := cond) (args := args) (ifso := ifso) (ifnot := ifnot) (expected := predb).
+           ** exact H.
+           ** unfold gen_conditions.
+              rewrite forallb_forall.
+              intros x INx.
+              rewrite in_map_iff in INx.
+              destruct INx as [[x' i'] [EQ INx]].
+              simpl in EQ.
+              subst x'.
+              apply PTree.elements_complete in INx.
+              rewrite PTree.gfilter1 in INx.
+              assert (x <= max_pc_function f) as MAX.
+              { destruct ((fn_code f) ! x) eqn:CODEx.
+                2: discriminate.
+                apply max_pc_function_sound with (i:=i).
+                assumption.
+              }
+              rewrite Pos.ltb_lt.
+              lia.
+           ** unfold gen_conditions.
+              apply PTree.elements_keys_norepet.
+           ** exact IN.
+           ** constructor.
+           ** constructor.
+        ++ reflexivity.
+      * simpl. constructor; auto.
+        
+  - econstructor; split.
+    + apply plus_one.
+      apply exec_Ijumptable with (arg:=arg) (tbl:=tbl) (n:=n).
+      erewrite transf_function_at; eauto. apply I.
+      all: eauto with profiling.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one.
+      apply exec_Ireturn.
+      erewrite transf_function_at; eauto. apply I.
+      rewrite stacksize_preserved. eassumption.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_function_internal.
+      rewrite stacksize_preserved. eassumption.
+    + constructor; auto.
+  - econstructor; split.
+    + apply plus_one. apply exec_function_external.
+      eauto with profiling.
+    + constructor; auto.
+  - inv STACKS. inv H1.
+    econstructor; split.
+    + apply plus_one. apply exec_return.
+    + constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_plus.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index ac98f3a1..243d7b7c 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -477,9 +477,9 @@ with transl_exprlist (map: mapping) (al: exprlist) (rl: list reg) (nd: node)
 with transl_condexpr (map: mapping) (a: condexpr) (ntrue nfalse: node)
                      {struct a} : mon node :=
   match a with
-  | CEcond c al =>
+  | CEcond c expected al =>
       do rl <- alloc_regs map al;
-      do nt <- add_instr (Icond c rl ntrue nfalse None);
+      do nt <- add_instr (Icond c rl ntrue nfalse expected);
          transl_exprlist map al rl nt
   | CEcondition a b c =>
       do nc <- transl_condexpr map c ntrue nfalse;
diff --git a/backend/RTLgenaux.ml b/backend/RTLgenaux.ml
index e39d3b56..26688e23 100644
--- a/backend/RTLgenaux.ml
+++ b/backend/RTLgenaux.ml
@@ -41,7 +41,7 @@ and size_exprs = function
   | Econs(e1, el) -> size_expr e1 + size_exprs el
 
 and size_condexpr = function
-  | CEcond(c, args) -> size_exprs args
+  | CEcond(c, expected, args) -> size_exprs args
   | CEcondition(c1, c2, c3) ->
       1 + size_condexpr c1 + size_condexpr c2 + size_condexpr c3
   | CElet(a, c) ->
diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v
index b94ec22f..e62aff22 100644
--- a/backend/RTLgenproof.v
+++ b/backend/RTLgenproof.v
@@ -799,11 +799,11 @@ Proof.
 Qed.
 
 Lemma transl_condexpr_CEcond_correct:
-  forall le cond al vl vb,
+  forall le cond expected al vl vb,
   eval_exprlist ge sp e m le al vl ->
   transl_exprlist_prop le al vl ->
   eval_condition cond vl m = Some vb ->
-  transl_condexpr_prop le (CEcond cond al) vb.
+  transl_condexpr_prop le (CEcond cond expected al) vb.
 Proof.
   intros; red; intros. inv TE.
   exploit H0; eauto. intros [rs1 [tm1 [EX1 [ME1 [RES1 [OTHER1 EXT1]]]]]].
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 30ad7d82..36b8409d 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -744,10 +744,10 @@ Inductive tr_expr (c: code):
 
 with tr_condition (c: code):
        mapping -> list reg -> condexpr -> node -> node -> node -> Prop :=
-  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl i,
+  | tr_CEcond: forall map pr cond expected bl ns ntrue nfalse n1 rl i,
       tr_exprlist c map pr bl ns n1 rl ->
       c!n1 = Some (Icond cond rl ntrue nfalse i) ->
-      tr_condition c map pr (CEcond cond bl) ns ntrue nfalse
+      tr_condition c map pr (CEcond cond expected bl) ns ntrue nfalse
   | tr_CEcondition: forall map pr a1 a2 a3 ns ntrue nfalse n2 n3,
       tr_condition c map pr a1 ns n2 n3 ->
       tr_condition c map pr a2 n2 ntrue nfalse ->
diff --git a/backend/Selection.v b/backend/Selection.v
index 4ab3331e..342bd8ca 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -35,12 +35,13 @@ Local Open Scope error_monad_scope.
 
 (** Conversion of conditions *)
 
-Function condexpr_of_expr (e: expr) : condexpr :=
+Function condexpr_of_expr (e: expr) (expected : option bool) : condexpr :=
   match e with
-  | Eop (Ocmp c) el => CEcond c el
-  | Econdition a b c => CEcondition a (condexpr_of_expr b) (condexpr_of_expr c)
-  | Elet a b => CElet a (condexpr_of_expr b)
-  | _ => CEcond (Ccompuimm Cne Int.zero) (e ::: Enil)
+  | Eop (Ocmp c) el => CEcond c expected el
+  | Econdition a b c => CEcondition a (condexpr_of_expr b expected)
+                                      (condexpr_of_expr c expected)
+  | Elet a b => CElet a (condexpr_of_expr b expected)
+  | _ => CEcond (Ccompuimm Cne Int.zero) expected (e ::: Enil)
   end.
 
 Function condition_of_expr (e: expr) : condition * exprlist :=
@@ -120,6 +121,7 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
 
 Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   match op with
+  | Cminor.Oexpect ty => arg1
   | Cminor.Oadd => add arg1 arg2
   | Cminor.Osub => sub arg1 arg2
   | Cminor.Omul => mul arg1 arg2
@@ -166,7 +168,7 @@ Definition sel_select (ty: typ) (cnd ifso ifnot: expr) : expr :=
    let (cond, args) := condition_of_expr cnd in
    match SelectOp.select ty cond args ifso ifnot with
    | Some a => a
-   | None => Econdition (condexpr_of_expr cnd) ifso ifnot
+   | None => Econdition (condexpr_of_expr cnd None) ifso ifnot
    end.
 
 (** Conversion from Cminor expression to Cminorsel expressions *)
@@ -243,7 +245,8 @@ Definition sel_builtin_res (optid: option ident) : builtin_res ident :=
 Function sel_known_builtin (bf: builtin_function) (args: exprlist) :=
   match bf, args with
   | BI_platform b, _ =>
-      SelectOp.platform_builtin b args
+    SelectOp.platform_builtin b args
+(*  | BI_standard BI_expect, a1 ::: a2 ::: Enil => Some a1 *)
   | BI_standard (BI_select ty), a1 ::: a2 ::: a3 ::: Enil =>
       Some (sel_select ty a1 a2 a3)
   | BI_standard BI_fabs, a1 ::: Enil =>
@@ -291,16 +294,16 @@ Fixpoint sel_switch (arg: nat) (t: comptree): exitexpr :=
   | CTaction act =>
       XEexit act
   | CTifeq key act t' =>
-      XEcondition (condexpr_of_expr (make_cmp_eq (Eletvar arg) key))
+      XEcondition (condexpr_of_expr (make_cmp_eq (Eletvar arg) key) None)
                   (XEexit act)
                   (sel_switch arg t')
   | CTiflt key t1 t2 =>
-      XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar arg) key))
+      XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar arg) key) None)
                   (sel_switch arg t1)
                   (sel_switch arg t2)
   | CTjumptable ofs sz tbl t' =>
       XElet (make_sub (Eletvar arg) ofs)
-        (XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar O) sz))
+        (XEcondition (condexpr_of_expr (make_cmp_ltu (Eletvar O) sz) None)
                      (XEjumptable (make_to_int (Eletvar O)) tbl)
                      (sel_switch (S arg) t'))
   end.
@@ -375,6 +378,22 @@ Definition if_conversion
   | _, _ => None
   end.
 
+Definition extract_expect1 (e : Cminor.expr) : option bool :=
+  match e with
+  | Cminor.Ebinop (Cminor.Oexpect ty) e1 (Cminor.Econst (Cminor.Ointconst c)) =>
+    Some (if Int.eq_dec c Int.zero then false else true)
+  | Cminor.Ebinop (Cminor.Oexpect ty) e1 (Cminor.Econst (Cminor.Olongconst c)) =>
+    Some (if Int64.eq_dec c Int64.zero then false else true)
+  | _ => None
+  end.
+
+Definition extract_expect (e : Cminor.expr) : option bool :=
+  match e with
+  | Cminor.Ebinop (Cminor.Ocmpu Cne) e1 (Cminor.Econst (Cminor.Ointconst c)) =>
+    if Int.eq_dec c Int.zero then extract_expect1 e1 else None
+  | _ => extract_expect1 e
+  end.
+
 (** Conversion from Cminor statements to Cminorsel statements. *)
 
 Fixpoint sel_stmt (ki: known_idents) (env: typenv) (s: Cminor.stmt) : res stmt :=
@@ -402,8 +421,10 @@ Fixpoint sel_stmt (ki: known_idents) (env: typenv) (s: Cminor.stmt) : res stmt :
       match if_conversion ki env e ifso ifnot with
       | Some s => OK s
       | None =>
-          do ifso' <- sel_stmt ki env ifso; do ifnot' <- sel_stmt ki env ifnot;
-          OK (Sifthenelse (condexpr_of_expr (sel_expr e)) ifso' ifnot')
+        do ifso' <- sel_stmt ki env ifso;
+        do ifnot' <- sel_stmt ki env ifnot;
+        OK (Sifthenelse (condexpr_of_expr (sel_expr e)
+                          (extract_expect e)) ifso' ifnot')
       end
   | Cminor.Sloop body =>
       do body' <- sel_stmt ki env body; OK (Sloop body')
diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml
index 26a79fd7..5a8bde8c 100644
--- a/backend/Selectionaux.ml
+++ b/backend/Selectionaux.ml
@@ -39,6 +39,7 @@ let cost_unop = function
   | Osingleoflong | Osingleoflongu -> assert false
 
 let cost_binop = function
+  | Oexpect _ -> 0
   | Oadd  | Osub -> 1
   | Omul -> 2
   | Odiv  | Odivu | Omod  | Omodu -> assert false
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index aa53c9cb..955c45a4 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -196,12 +196,12 @@ Variable e: env.
 Variable m: mem.
 
 Lemma eval_condexpr_of_expr:
-  forall a le v b,
+  forall expected a le v b,
   eval_expr tge sp e m le a v ->
   Val.bool_of_val v b ->
-  eval_condexpr tge sp e m le (condexpr_of_expr a) b.
+  eval_condexpr tge sp e m le (condexpr_of_expr a expected) b.
 Proof.
-  intros until a. functional induction (condexpr_of_expr a); intros.
+  intros until a. functional induction (condexpr_of_expr a expected); intros.
 (* compare *)
   inv H. econstructor; eauto.
   simpl in H6. inv H6. apply Val.bool_of_val_of_optbool. auto.
@@ -310,46 +310,47 @@ Lemma eval_sel_binop:
   exists v', eval_expr tge sp e m le (sel_binop op a1 a2) v' /\ Val.lessdef v v'.
 Proof.
   destruct op; simpl; intros; FuncInv; try subst v.
-  apply eval_add; auto.
-  apply eval_sub; auto.
-  apply eval_mul; auto.
-  eapply eval_divs; eauto.
-  eapply eval_divu; eauto.
-  eapply eval_mods; eauto.
-  eapply eval_modu; eauto.
-  apply eval_and; auto.
-  apply eval_or; auto.
-  apply eval_xor; auto.
-  apply eval_shl; auto.
-  apply eval_shr; auto.
-  apply eval_shru; auto.
-  apply eval_addf; auto.
-  apply eval_subf; auto.
-  apply eval_mulf; auto.
-  apply eval_divf; auto.
-  apply eval_addfs; auto.
-  apply eval_subfs; auto.
-  apply eval_mulfs; auto.
-  apply eval_divfs; auto.
-  eapply eval_addl; eauto.
-  eapply eval_subl; eauto.
-  eapply eval_mull; eauto.
-  eapply eval_divls; eauto.
-  eapply eval_divlu; eauto.
-  eapply eval_modls; eauto.
-  eapply eval_modlu; eauto.
-  eapply eval_andl; eauto.
-  eapply eval_orl; eauto.
-  eapply eval_xorl; eauto.
-  eapply eval_shll; eauto.
-  eapply eval_shrl; eauto.
-  eapply eval_shrlu; eauto.
-  apply eval_comp; auto.
-  apply eval_compu; auto.
-  apply eval_compf; auto.
-  apply eval_compfs; auto.
-  exists v; split; auto. eapply eval_cmpl; eauto.
-  exists v; split; auto. eapply eval_cmplu; eauto.
+  - exists v1; split; trivial. apply Val.lessdef_normalize.
+  - apply eval_add; auto.
+  - apply eval_sub; auto.
+  - apply eval_mul; auto.
+  - eapply eval_divs; eauto.
+  - eapply eval_divu; eauto.
+  - eapply eval_mods; eauto.
+  - eapply eval_modu; eauto.
+  - apply eval_and; auto.
+  - apply eval_or; auto.
+  - apply eval_xor; auto.
+  - apply eval_shl; auto.
+  - apply eval_shr; auto.
+  - apply eval_shru; auto.
+  - apply eval_addf; auto.
+  - apply eval_subf; auto.
+  - apply eval_mulf; auto.
+  - apply eval_divf; auto.
+  - apply eval_addfs; auto.
+  - apply eval_subfs; auto.
+  - apply eval_mulfs; auto.
+  - apply eval_divfs; auto.
+  - eapply eval_addl; eauto.
+  - eapply eval_subl; eauto.
+  - eapply eval_mull; eauto.
+  - eapply eval_divls; eauto.
+  - eapply eval_divlu; eauto.
+  - eapply eval_modls; eauto.
+  - eapply eval_modlu; eauto.
+  - eapply eval_andl; eauto.
+  - eapply eval_orl; eauto.
+  - eapply eval_xorl; eauto.
+  - eapply eval_shll; eauto.
+  - eapply eval_shrl; eauto.
+  - eapply eval_shrlu; eauto.
+  - apply eval_comp; auto.
+  - apply eval_compu; auto.
+  - apply eval_compf; auto.
+  - apply eval_compfs; auto.
+  - exists v; split; auto. eapply eval_cmpl; eauto.
+  - exists v; split; auto. eapply eval_cmplu; eauto.
 Qed.
 
 Lemma eval_sel_select:
@@ -395,6 +396,13 @@ Proof.
   inv ARGS; try discriminate. inv H0; try discriminate.
   inv SEL.  
   simpl in SEM; inv SEM. apply eval_absf; auto.
+  (* + (* expect *)
+  inv ARGS; try discriminate.
+  inv H0; try discriminate.
+  inv H2; try discriminate.
+  simpl in SEM. inv SEM. inv SEL.
+  destruct v1; destruct v0.
+  all: econstructor; split; eauto. *)
 - eapply eval_platform_builtin; eauto.
 Qed.
 
diff --git a/backend/SplitLong.vp b/backend/SplitLong.vp
index dfe42df0..0f240602 100644
--- a/backend/SplitLong.vp
+++ b/backend/SplitLong.vp
@@ -10,6 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
+(* FIXME: expected branching information not propagated *)
 (** Instruction selection for 64-bit integer operations *)
 
 Require String.
@@ -256,7 +257,7 @@ Definition cmpl_ne_zero (e: expr) :=
 
 Definition cmplu_gen (ch cl: comparison) (e1 e2: expr) :=
   splitlong2 e1 e2 (fun h1 l1 h2 l2 =>
-    Econdition (CEcond (Ccomp Ceq) (h1:::h2:::Enil))
+    Econdition (CEcond (Ccomp Ceq) None (h1:::h2:::Enil))
                (Eop (Ocmp (Ccompu cl)) (l1:::l2:::Enil))
                (Eop (Ocmp (Ccompu ch)) (h1:::h2:::Enil))).
 
@@ -278,7 +279,7 @@ Definition cmplu (c: comparison) (e1 e2: expr) :=
 
 Definition cmpl_gen (ch cl: comparison) (e1 e2: expr) :=
   splitlong2 e1 e2 (fun h1 l1 h2 l2 =>
-    Econdition (CEcond (Ccomp Ceq) (h1:::h2:::Enil))
+    Econdition (CEcond (Ccomp Ceq) None (h1:::h2:::Enil))
                (Eop (Ocmp (Ccompu cl)) (l1:::l2:::Enil))
                (Eop (Ocmp (Ccomp ch)) (h1:::h2:::Enil))).
 
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index a4c4a195..78458582 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -101,5 +101,5 @@ Definition tunnel_function (f: LTL.function) : LTL.function :=
 Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
   transf_fundef tunnel_function f.
 
-Definition tunnel_program (p: LTL.program) : LTL.program :=
+Definition transf_program (p: LTL.program) : LTL.program :=
   transform_program tunnel_fundef p.
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index d3b8a9f0..cdf6c800 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -22,7 +22,7 @@ Definition match_prog (p tp: program) :=
   match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp.
 
 Lemma transf_program_match:
-  forall p, match_prog p (tunnel_program p).
+  forall p, match_prog p (transf_program p).
 Proof.
   intros. eapply match_transform_program; eauto.
 Qed.
diff --git a/backend/Unusedglob.v b/backend/Unusedglob.v
index 93ca7af4..3b8e19ad 100644
--- a/backend/Unusedglob.v
+++ b/backend/Unusedglob.v
@@ -126,7 +126,7 @@ Fixpoint filter_globdefs (used: IS.t) (accu defs: list (ident * globdef fundef u
 Definition global_defined (p: program) (pm: prog_map) (id: ident) : bool :=
   match pm!id with Some _ => true | None => ident_eq id (prog_main p) end.
 
-Definition transform_program (p: program) : res program :=
+Definition transf_program (p: program) : res program :=
   let pm := prog_defmap p in
   match used_globals p pm with
   | None => Error (msg "Unusedglob: analysis failed")
diff --git a/backend/Unusedglobproof.v b/backend/Unusedglobproof.v
index fa120b6d..160c0b18 100644
--- a/backend/Unusedglobproof.v
+++ b/backend/Unusedglobproof.v
@@ -428,9 +428,9 @@ Qed.
 End TRANSFORMATION.
 
 Theorem transf_program_match:
-  forall p tp, transform_program p = OK tp -> match_prog p tp.
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
 Proof.
-  unfold transform_program; intros p tp TR. set (pm := prog_defmap p) in *.
+  unfold transf_program; intros p tp TR. set (pm := prog_defmap p) in *.
   destruct (used_globals p pm) as [u|] eqn:U; try discriminate.
   destruct (IS.for_all (global_defined p pm) u) eqn:DEF; inv TR.
   exists u; split.
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index bc5173ca..75f5eb3e 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -46,16 +46,29 @@ let decl_atom : (AST.ident, atom_info) Hashtbl.t = Hashtbl.create 103
 
 let atom_is_static a =
   try
-    (Hashtbl.find decl_atom a).a_storage = C.Storage_static
+    match (Hashtbl.find decl_atom a).a_storage with
+    | C.Storage_static | C.Storage_thread_local_static -> true
+    | _ -> false
   with Not_found ->
     false
 
 let atom_is_extern a =
   try
-    (Hashtbl.find decl_atom a).a_storage = C.Storage_extern
+    match (Hashtbl.find decl_atom a).a_storage with
+    | C.Storage_extern| C.Storage_thread_local_extern -> true
+    | _ -> false
   with Not_found ->
     false
 
+let atom_is_thread_local a =
+  try
+    match (Hashtbl.find decl_atom a).a_storage with
+    | C.Storage_thread_local_extern| C.Storage_thread_local_static
+    | C.Storage_thread_local -> true
+    | _ -> false
+  with Not_found ->
+    false
+  
 let atom_alignof a =
   try
     (Hashtbl.find decl_atom a).a_alignment
@@ -168,9 +181,10 @@ let ais_annot_functions =
 let builtins_generic = {
   builtin_typedefs = [];
   builtin_functions =
-    ais_annot_functions
-      @
+    ais_annot_functions @
     [
+    "__builtin_expect",
+    (TInt(ILong, []), [TInt(ILong, []); TInt(ILong, [])], false);
     (* Integer arithmetic *)
     "__builtin_bswap64",
     (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
@@ -899,6 +913,14 @@ let rec convertExpr env e =
   | C.ECompound(ty1, ie) ->
       unsupported "compound literals"; ezero
 
+  | C.ECall({edesc = C.EVar {name = "__builtin_expect"}}, args) ->
+     (match args with
+      | [e1; e2] ->
+         ewrap (Ctyping.ebinop Cop.Oexpect (convertExpr env e1) (convertExpr env e2))
+      | _ -> 
+       error "__builtin_expect wants two arguments";
+       ezero)
+
   | C.ECall({edesc = C.EVar {name = "__builtin_debug"}}, args) when List.length args < 2 ->
       error "too few arguments to function call, expected at least 2, have 0";
       ezero
@@ -1236,7 +1258,8 @@ let convertFundef loc env fd =
   let vars =
     List.map
       (fun (sto, id, ty, init) ->
-        if sto = Storage_extern || sto = Storage_static then
+        if   sto = Storage_extern || sto = Storage_thread_local_extern
+          || sto = Storage_static || sto = Storage_thread_local_static then
           unsupported "'static' or 'extern' local variable";
         if init <> None then
           unsupported "initialized local variable";
@@ -1339,15 +1362,21 @@ let convertGlobvar loc env (sto, id, ty, optinit) =
   let init' =
     match optinit with
     | None ->
-        if sto = C.Storage_extern then [] else [AST.Init_space sz]
+       if sto = C.Storage_extern || sto = C.Storage_thread_local_extern
+       then [] else [AST.Init_space sz]
     | Some i ->
         convertInitializer env ty i in
   let (section, access) =
-    Sections.for_variable env loc id' ty (optinit <> None) in
+    Sections.for_variable env loc id' ty (optinit <> None)
+      (match sto with
+       | Storage_thread_local | Storage_thread_local_extern
+       | Storage_thread_local_static -> true
+       | _ -> false) in
   if Z.gt sz (Z.of_uint64 0xFFFF_FFFFL) then
     error "'%s' is too big (%s bytes)"
                    id.name (Z.to_string sz);
-  if sto <> C.Storage_extern && Cutil.incomplete_type env ty then
+  if sto <> C.Storage_extern && sto <> C.Storage_thread_local_extern
+     && Cutil.incomplete_type env ty then
     error "'%s' has incomplete type" id.name;
   Hashtbl.add decl_atom id'
     { a_storage = sto;
@@ -1446,7 +1475,7 @@ let cleanupGlobals p =
         if IdentSet.mem fd.fd_name !strong then
           error "multiple definitions of %s" fd.fd_name.name;
         strong := IdentSet.add fd.fd_name !strong
-    | C.Gdecl(Storage_extern, id, ty, init) ->
+    | C.Gdecl((Storage_extern|Storage_thread_local_extern), id, ty, init) ->
         extern := IdentSet.add id !extern
     | C.Gdecl(sto, id, ty, Some i) ->
         if IdentSet.mem id !strong then
@@ -1465,7 +1494,7 @@ let cleanupGlobals p =
         match g.gdesc with
         | C.Gdecl(sto, id, ty, init) ->
             let better_def_exists =
-              if sto = Storage_extern then
+              if sto = Storage_extern || sto = Storage_thread_local_extern then
                 IdentSet.mem id !strong || IdentSet.mem id !weak
               else if init = None then
                 IdentSet.mem id !strong
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index b08c3ad7..fbf9bbeb 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -509,6 +509,10 @@ Definition do_ef_debug (kind: positive) (text: ident) (targs: list typ)
        (w: world) (vargs: list val) (m: mem) : option (world * trace * val * mem) :=
   Some(w, E0, Vundef, m).
 
+Definition do_ef_profiling (id : profiling_id)
+       (w: world) (vargs: list val) (m: mem) : option (world * trace * val * mem) :=
+  Some(w, E0, Vundef, m).
+
 Definition do_builtin_or_external (name: string) (sg: signature)
        (w: world) (vargs: list val) (m: mem) : option (world * trace * val * mem) :=
   match lookup_builtin_function name sg with
@@ -531,6 +535,7 @@ Definition do_external (ef: external_function):
   | EF_annot_val kind text targ => do_ef_annot_val text targ
   | EF_inline_asm text sg clob => do_inline_assembly text sg ge
   | EF_debug kind text targs => do_ef_debug kind text targs
+  | EF_profiling id kind => do_ef_profiling id
   end.
 
 Lemma do_ef_external_sound:
@@ -598,6 +603,8 @@ Proof with try congruence.
   eapply do_inline_assembly_sound; eauto.
 - (* EF_debug *)
   unfold do_ef_debug. mydestr. split; constructor.
+- (* EF_profiling *)
+  unfold do_ef_profiling. mydestr. split; constructor.
 Qed.
 
 Lemma do_ef_external_complete:
@@ -652,6 +659,8 @@ Proof.
   eapply do_inline_assembly_complete; eauto.
 - (* EF_debug *)
   inv H. inv H0. reflexivity.
+- (* EF_profiling *)
+  inv H. inv H0. reflexivity.
 Qed.
 
 (** * Reduction of expressions *)
diff --git a/cfrontend/Cminorgenproof.v b/cfrontend/Cminorgenproof.v
index 5acb996d..744df818 100644
--- a/cfrontend/Cminorgenproof.v
+++ b/cfrontend/Cminorgenproof.v
@@ -1335,6 +1335,7 @@ Lemma eval_binop_compat:
   /\ Val.inject f v tv.
 Proof.
   destruct op; simpl; intros; inv H.
+- TrivialExists. apply Val.normalize_inject; auto.
 - TrivialExists. apply Val.add_inject; auto.
 - TrivialExists. apply Val.sub_inject; auto.
 - TrivialExists. inv H0; inv H1; constructor.
diff --git a/cfrontend/Cop.v b/cfrontend/Cop.v
index 143e87a3..47a02851 100644
--- a/cfrontend/Cop.v
+++ b/cfrontend/Cop.v
@@ -33,6 +33,7 @@ Inductive unary_operation : Type :=
   | Oabsfloat : unary_operation.        (**r floating-point absolute value *)
 
 Inductive binary_operation : Type :=
+  | Oexpect : binary_operation          (**r return first argument *)
   | Oadd : binary_operation             (**r addition (binary [+]) *)
   | Osub : binary_operation             (**r subtraction (binary [-]) *)
   | Omul : binary_operation             (**r multiplication (binary [*]) *)
@@ -763,6 +764,14 @@ Definition sem_mul (v1:val) (t1:type) (v2: val) (t2:type) (m:mem) : option val :
     (fun n1 n2 => Some(Vsingle(Float32.mul n1 n2)))
     v1 t1 v2 t2 m.
 
+Definition sem_expect (v1:val) (t1:type) (v2: val) (t2:type) (m:mem) : option val :=
+  sem_binarith
+    (fun sg n1 n2 => Some(Vint n1))
+    (fun sg n1 n2 => Some(Vlong n1))
+    (fun n1 n2 => Some(Vfloat n1))
+    (fun n1 n2 => Some(Vsingle n1))
+    v1 t1 v2 t2 m.
+
 Definition sem_div (v1:val) (t1:type) (v2: val) (t2:type) (m:mem) : option val :=
   sem_binarith
     (fun sg n1 n2 =>
@@ -1050,6 +1059,7 @@ Definition sem_binary_operation
     (v1: val) (t1: type) (v2: val) (t2:type)
     (m: mem): option val :=
   match op with
+  | Oexpect => sem_expect v1 t1 v2 t2 m
   | Oadd => sem_add cenv v1 t1 v2 t2 m
   | Osub => sem_sub cenv v1 t1 v2 t2 m
   | Omul => sem_mul v1 t1 v2 t2 m
@@ -1290,6 +1300,9 @@ Lemma sem_binary_operation_inj:
   exists tv, sem_binary_operation cenv op tv1 ty1 tv2 ty2 m' = Some tv /\ Val.inject f v tv.
 Proof.
   unfold sem_binary_operation; intros; destruct op.
+- (* expect *)
+  unfold sem_expect in *.
+  eapply sem_binarith_inject; eauto; intros; exact I.
 - (* add *)
   assert (A: forall cenv ty si v1' v2' tv1' tv2',
              Val.inject f v1' tv1' -> Val.inject f v2' tv2' ->
diff --git a/cfrontend/Cshmgen.v b/cfrontend/Cshmgen.v
index 5bd12d00..f78b52ae 100644
--- a/cfrontend/Cshmgen.v
+++ b/cfrontend/Cshmgen.v
@@ -259,6 +259,11 @@ Definition make_add_ptr_long (ce: composite_env) (ty: type) (e1 e2: expr) :=
     let n := make_intconst (Int.repr sz) in
     OK (Ebinop Oadd e1 (Ebinop Omul n (Eunop Ointoflong e2))).
 
+Definition make_expect (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
+  make_binarith (Oexpect AST.Tint) (Oexpect AST.Tint)
+                (Oexpect AST.Tfloat) (Oexpect AST.Tsingle)
+                (Oexpect AST.Tlong) (Oexpect AST.Tlong) e1 ty1 e2 ty2.
+
 Definition make_add (ce: composite_env) (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
   match classify_add ty1 ty2 with
   | add_case_pi ty si => make_add_ptr_int ce ty si e1 e2
@@ -421,6 +426,7 @@ Definition transl_binop (ce: composite_env)
                         (a: expr) (ta: type)
                         (b: expr) (tb: type) : res expr :=
   match op with
+  | Cop.Oexpect => make_expect a ta b tb
   | Cop.Oadd => make_add ce a ta b tb
   | Cop.Osub => make_sub ce a ta b tb
   | Cop.Omul => make_mul a ta b tb
diff --git a/cfrontend/Cshmgenproof.v b/cfrontend/Cshmgenproof.v
index 1ceb8e4d..c5ba19d5 100644
--- a/cfrontend/Cshmgenproof.v
+++ b/cfrontend/Cshmgenproof.v
@@ -619,6 +619,11 @@ End MAKE_BIN.
 
 Hint Extern 2 (@eq (option val) _ _) => (simpl; reflexivity) : cshm.
 
+Lemma make_expect_correct: binary_constructor_correct make_expect sem_expect.
+Proof.
+  apply make_binarith_correct; intros; auto.
+Qed.
+
 Lemma make_add_correct: binary_constructor_correct (make_add cunit.(prog_comp_env)) (sem_add prog.(prog_comp_env)).
 Proof.
   assert (A: forall ty si a b c e le m va vb v,
@@ -922,22 +927,23 @@ Lemma transl_binop_correct:
   eval_expr ge e le m c v.
 Proof.
   intros. destruct op; simpl in *.
-  eapply make_add_correct; eauto.
-  eapply make_sub_correct; eauto.
-  eapply make_mul_correct; eauto.
-  eapply make_div_correct; eauto.
-  eapply make_mod_correct; eauto.
-  eapply make_and_correct; eauto.
-  eapply make_or_correct; eauto.
-  eapply make_xor_correct; eauto.
-  eapply make_shl_correct; eauto.
-  eapply make_shr_correct; eauto.
-  eapply make_cmp_correct; eauto.
-  eapply make_cmp_correct; eauto.
-  eapply make_cmp_correct; eauto.
-  eapply make_cmp_correct; eauto.
-  eapply make_cmp_correct; eauto.
-  eapply make_cmp_correct; eauto.
+- eapply make_expect_correct; eauto.
+- eapply make_add_correct; eauto.
+- eapply make_sub_correct; eauto.
+- eapply make_mul_correct; eauto.
+- eapply make_div_correct; eauto.
+- eapply make_mod_correct; eauto.
+- eapply make_and_correct; eauto.
+- eapply make_or_correct; eauto.
+- eapply make_xor_correct; eauto.
+- eapply make_shl_correct; eauto.
+- eapply make_shr_correct; eauto.
+- eapply make_cmp_correct; eauto.
+- eapply make_cmp_correct; eauto.
+- eapply make_cmp_correct; eauto.
+- eapply make_cmp_correct; eauto.
+- eapply make_cmp_correct; eauto.
+- eapply make_cmp_correct; eauto.
 Qed.
 
 Lemma make_load_correct:
diff --git a/cfrontend/Ctyping.v b/cfrontend/Ctyping.v
index 00fcf8ab..bde4001f 100644
--- a/cfrontend/Ctyping.v
+++ b/cfrontend/Ctyping.v
@@ -111,6 +111,7 @@ Definition comparison_type (ty1 ty2: type) (m: string): res type :=
 
 Definition type_binop (op: binary_operation) (ty1 ty2: type) : res type :=
   match op with
+  | Oexpect => binarith_type ty1 ty2 "__builtin_expect"
   | Oadd =>
       match classify_add ty1 ty2 with
       | add_case_pi ty _ | add_case_ip _ ty
@@ -1546,6 +1547,8 @@ Lemma pres_sem_binop:
 Proof.
   intros until m; intros TY SEM WT1 WT2.
   destruct op; simpl in TY; simpl in SEM.
+- (* expect *)
+  unfold sem_expect in SEM. eapply pres_sem_binarith; eauto; intros; exact I.    
 - (* add *)
   unfold sem_add, sem_add_ptr_int, sem_add_ptr_long in SEM; DestructCases; auto with ty.
   eapply pres_sem_binarith; eauto; intros; exact I.
diff --git a/cfrontend/PrintClight.ml b/cfrontend/PrintClight.ml
index 0e735d2d..0aefde31 100644
--- a/cfrontend/PrintClight.ml
+++ b/cfrontend/PrintClight.ml
@@ -62,6 +62,7 @@ let precedence = function
   | Ebinop(Oand, _, _, _) -> (8, LtoR)
   | Ebinop(Oxor, _, _, _) -> (7, LtoR)
   | Ebinop(Oor, _, _, _) -> (6, LtoR)
+  | Ebinop(Oexpect, _, _, _) -> (5, LtoR)
 
 (* Expressions *)
 
diff --git a/cfrontend/PrintCsyntax.ml b/cfrontend/PrintCsyntax.ml
index 03dc5837..beca056f 100644
--- a/cfrontend/PrintCsyntax.ml
+++ b/cfrontend/PrintCsyntax.ml
@@ -30,6 +30,7 @@ let name_unop = function
   | Oabsfloat -> "__builtin_fabs"
 
 let name_binop = function
+  | Oexpect -> "expect"
   | Oadd -> "+"
   | Osub -> "-"
   | Omul -> "*"
@@ -158,6 +159,7 @@ let rec precedence = function
   | Ebinop(Oand, _, _, _) -> (8, LtoR)
   | Ebinop(Oxor, _, _, _) -> (7, LtoR)
   | Ebinop(Oor, _, _, _) -> (6, LtoR)
+  | Ebinop(Oexpect, _, _, _) -> (5, LtoR) (* fixme *)
   | Eseqand _ -> (5, LtoR)
   | Eseqor _ -> (4, LtoR)
   | Econdition _ -> (3, RtoL)
diff --git a/cfrontend/SimplExprspec.v b/cfrontend/SimplExprspec.v
index e7d57a1c..95e3957c 100644
--- a/cfrontend/SimplExprspec.v
+++ b/cfrontend/SimplExprspec.v
@@ -770,53 +770,53 @@ Proof.
 (* val *)
   simpl in H. destruct v; monadInv H; exists (@nil ident); split; auto with gensym.
 Opaque makeif.
-  intros. destruct dst; simpl in *; inv H2.
+- intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-  intros. destruct dst; simpl in *; inv H2.
+- intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-  intros. destruct dst; simpl in *; inv H2.
+- intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-  intros. destruct dst; simpl in *; inv H2.
+- intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
 (* var *)
-  monadInv H; econstructor; split; auto with gensym. UseFinish. constructor.
+- monadInv H; econstructor; split; auto with gensym. UseFinish. constructor.
 (* field *)
-  monadInv H0. exploit H; eauto. auto. intros [tmp [A B]]. UseFinish.
+- monadInv H0. exploit H; eauto. auto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
 (* valof *)
-  monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
   exploit transl_valof_meets_spec; eauto. intros [tmp2 [Csyntax D]]. UseFinish.
   exists (tmp1 ++ tmp2); split.
   intros; apply tr_expr_add_dest. econstructor; eauto with gensym.
   eauto with gensym.
 (* deref *)
-  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
 (* addrof *)
-  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. econstructor; eauto.
 (* unop *)
-  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
 (* binop *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]]. UseFinish.
   exists (tmp1 ++ tmp2); split.
   intros; apply tr_expr_add_dest. econstructor; eauto with gensym.
   eauto with gensym.
 (* cast *)
-  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
 (* seqand *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0.
   (* for value *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
@@ -840,7 +840,7 @@ Opaque makeif.
   apply list_disjoint_cons_r; eauto with gensym.
   apply contained_app; eauto with gensym.
 (* seqor *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0.
   (* for value *)
   exploit H0; eauto with gensym. intros [tmp2 [Csyntax D]].
@@ -864,7 +864,7 @@ Opaque makeif.
   apply list_disjoint_cons_r; eauto with gensym.
   apply contained_app; eauto with gensym.
 (* condition *)
-  monadInv H2. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H2. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0.
   (* for value *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
@@ -896,13 +896,13 @@ Opaque makeif.
   apply contained_app; eauto with gensym.
   apply contained_app; eauto with gensym.
 (* sizeof *)
-  monadInv H. UseFinish.
+- monadInv H. UseFinish.
   exists (@nil ident); split; auto with gensym. constructor.
 (* alignof *)
-  monadInv H. UseFinish.
+- monadInv H. UseFinish.
   exists (@nil ident); split; auto with gensym. constructor.
 (* assign *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   destruct dst; monadInv EQ2; simpl add_dest in *.
   (* for value *)
@@ -921,7 +921,7 @@ Opaque makeif.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
 (* assignop *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   exploit transl_valof_meets_spec; eauto. intros [tmp3 [E F]].
   destruct dst; monadInv EQ3; simpl add_dest in *.
@@ -941,7 +941,7 @@ Opaque makeif.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
 (* postincr *)
-  monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0; simpl add_dest in *.
   (* for value *)
   exists (x0 :: tmp1); split.
@@ -958,7 +958,7 @@ Opaque makeif.
   econstructor; eauto with gensym.
   apply contained_cons; eauto with gensym.
 (* comma *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto with gensym. intros [tmp2 [Csyntax D]].
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
@@ -968,7 +968,7 @@ Opaque makeif.
   destruct dst; simpl; auto with gensym.
   apply contained_app; eauto with gensym.
 (* call *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   destruct dst; monadInv EQ2; simpl add_dest in *.
   (* for value *)
@@ -986,7 +986,7 @@ Opaque makeif.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
 (* builtin *)
-  monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0; simpl add_dest in *.
   (* for value *)
   exists (x0 :: tmp1); split.
@@ -1001,13 +1001,13 @@ Opaque makeif.
   repeat rewrite app_ass. econstructor; eauto with gensym. congruence.
   apply contained_cons; eauto with gensym.
 (* loc *)
-  monadInv H.
+- monadInv H.
 (* paren *)
-  monadInv H0.
+- monadInv H0.
 (* nil *)
-  monadInv H; exists (@nil ident); split; auto with gensym. constructor.
+- monadInv H; exists (@nil ident); split; auto with gensym. constructor.
 (* cons *)
-  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
diff --git a/common/AST.v b/common/AST.v
index eb34d675..268e13d5 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -464,6 +464,11 @@ Qed.
 
 (** * External functions *)
 
+(* Identifiers for profiling information *)
+Parameter profiling_id : Type.
+Axiom profiling_id_eq : forall (x y : profiling_id), {x=y} + {x<>y}.
+Definition profiling_kind := Z.t.
+
 (** For most languages, the functions composing the program are either
   internal functions, defined within the language, or external functions,
   defined outside.  External functions include system calls but also
@@ -514,10 +519,13 @@ Inductive external_function : Type :=
          used with caution, as it can invalidate the semantic
          preservation theorem.  Generated only if [-finline-asm] is
          given. *)
-  | EF_debug (kind: positive) (text: ident) (targs: list typ).
+  | EF_debug (kind: positive) (text: ident) (targs: list typ)
      (** Transport debugging information from the front-end to the generated
          assembly.  Takes zero, one or several arguments like [EF_annot].
          Unlike [EF_annot], produces no observable event. *)
+  | EF_profiling (id: profiling_id) (kind : profiling_kind).
+     (** Count one profiling event for this identifier and kind.
+         Takes no argument. Produces no observable event. *)
 
 (** The type signature of an external function. *)
 
@@ -535,6 +543,7 @@ Definition ef_sig (ef: external_function): signature :=
   | EF_annot_val kind text targ => mksignature (targ :: nil) targ cc_default
   | EF_inline_asm text sg clob => sg
   | EF_debug kind text targs => mksignature targs Tvoid cc_default
+  | EF_profiling id kind => mksignature nil Tvoid cc_default
   end.
 
 (** Whether an external function should be inlined by the compiler. *)
@@ -553,6 +562,7 @@ Definition ef_inline (ef: external_function) : bool :=
   | EF_annot_val kind Text rg => true
   | EF_inline_asm text sg clob => true
   | EF_debug kind text targs => true
+  | EF_profiling id kind => true
   end.
 
 (** Whether an external function must reload its arguments. *)
@@ -568,7 +578,7 @@ Definition ef_reloads (ef: external_function) : bool :=
 
 Definition external_function_eq: forall (ef1 ef2: external_function), {ef1=ef2} + {ef1<>ef2}.
 Proof.
-  generalize ident_eq string_dec signature_eq chunk_eq typ_eq list_eq_dec zeq Int.eq_dec; intros.
+  generalize profiling_id_eq ident_eq string_dec signature_eq chunk_eq typ_eq list_eq_dec zeq Int.eq_dec; intros.
   decide equality.
 Defined.
 Global Opaque external_function_eq.
diff --git a/common/Events.v b/common/Events.v
index 28bb992a..033e2e03 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -1378,6 +1378,11 @@ Inductive extcall_debug_sem (ge: Senv.t):
   | extcall_debug_sem_intro: forall vargs m,
       extcall_debug_sem ge vargs m E0 Vundef m.
 
+Inductive extcall_profiling_sem (ge: Senv.t):
+              list val -> mem -> trace -> val -> mem -> Prop :=
+  | extcall_profiling_sem_intro: forall vargs m,
+      extcall_profiling_sem ge vargs m E0 Vundef m.
+
 Lemma extcall_debug_ok:
   forall targs,
   extcall_properties extcall_debug_sem
@@ -1412,6 +1417,40 @@ Proof.
   split. constructor. auto.
 Qed.
 
+Lemma extcall_profiling_ok:
+  forall targs,
+  extcall_properties extcall_profiling_sem
+                     (mksignature targs Tvoid cc_default).
+Proof.
+  intros; constructor; intros.
+(* well typed *)
+- inv H. simpl. auto.
+(* symbols *)
+- inv H0. econstructor; eauto.
+(* valid blocks *)
+- inv H; auto.
+(* perms *)
+- inv H; auto.
+(* readonly *)
+- inv H; auto.
+(* mem extends *)
+- inv H.
+  exists Vundef; exists m1'; intuition.
+  econstructor; eauto.
+(* mem injects *)
+- inv H0.
+  exists f; exists Vundef; exists m1'; intuition.
+  econstructor; eauto.
+  red; intros; congruence.
+(* trace length *)
+- inv H; simpl; omega.
+(* receptive *)
+- inv H; inv H0. exists Vundef, m1; constructor.
+(* determ *)
+- inv H; inv H0.
+  split. constructor. auto.
+Qed.
+
 (** ** Semantics of known built-in functions. *)
 
 (** Some built-in functions and runtime support functions have known semantics
@@ -1530,6 +1569,7 @@ Definition external_call (ef: external_function): extcall_sem :=
   | EF_annot_val kind txt targ => extcall_annot_val_sem txt targ
   | EF_inline_asm txt sg clb => inline_assembly_sem txt sg
   | EF_debug kind txt targs => extcall_debug_sem
+  | EF_profiling id kind  => extcall_profiling_sem
   end.
 
 Theorem external_call_spec:
@@ -1537,18 +1577,19 @@ Theorem external_call_spec:
   extcall_properties (external_call ef) (ef_sig ef).
 Proof.
   intros. unfold external_call, ef_sig; destruct ef.
-  apply external_functions_properties.
-  apply builtin_or_external_sem_ok.
-  apply builtin_or_external_sem_ok.
-  apply volatile_load_ok.
-  apply volatile_store_ok.
-  apply extcall_malloc_ok.
-  apply extcall_free_ok.
-  apply extcall_memcpy_ok.
-  apply extcall_annot_ok.
-  apply extcall_annot_val_ok.
-  apply inline_assembly_properties.
-  apply extcall_debug_ok.
+- apply external_functions_properties.
+- apply builtin_or_external_sem_ok.
+- apply builtin_or_external_sem_ok.
+- apply volatile_load_ok.
+- apply volatile_store_ok.
+- apply extcall_malloc_ok.
+- apply extcall_free_ok.
+- apply extcall_memcpy_ok.
+- apply extcall_annot_ok.
+- apply extcall_annot_val_ok.
+- apply inline_assembly_properties.
+- apply extcall_debug_ok.
+- apply extcall_profiling_ok.
 Qed.
 
 Definition external_call_well_typed_gen ef := ec_well_typed (external_call_spec ef).
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index 3f718428..38bbfa47 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -47,6 +47,13 @@ let name_of_chunk = function
   | Many32 -> "any32"
   | Many64 -> "any64"
 
+let spp_profiling_id () (x : Digest.t) : string =
+  let s = Buffer.create 32 in
+  for i=0 to 15 do
+    Printf.bprintf s "%02x" (Char.code (String.get x i))
+  done;
+  Buffer.contents s;;
+
 let name_of_external = function
   | EF_external(name, sg) -> sprintf "extern %S" (camlstring_of_coqstring name)
   | EF_builtin(name, sg) -> sprintf "builtin %S" (camlstring_of_coqstring name)
@@ -61,7 +68,9 @@ let name_of_external = function
   | EF_annot_val(kind,text, targ) ->  sprintf "annot_val %S" (camlstring_of_coqstring text)
   | EF_inline_asm(text, sg, clob) -> sprintf "inline_asm %S" (camlstring_of_coqstring text)
   | EF_debug(kind, text, targs) ->
-      sprintf "debug%d %S" (P.to_int kind) (extern_atom text)
+     sprintf "debug%d %S" (P.to_int kind) (extern_atom text)
+  | EF_profiling(id, kind) ->
+     sprintf "profiling %a %d" spp_profiling_id id (Z.to_int kind)
 
 let rec print_builtin_arg px oc = function
   | BA x -> px oc x
diff --git a/common/Sections.ml b/common/Sections.ml
index 839128a5..ea0b6dbc 100644
--- a/common/Sections.ml
+++ b/common/Sections.ml
@@ -17,7 +17,8 @@
 
 type section_name =
   | Section_text
-  | Section_data of bool          (* true = init data, false = uninit data *)
+  | Section_data of bool (* true = init data, false = uninit data *)
+                  * bool (* thread local? *)
   | Section_small_data of bool
   | Section_const of bool
   | Section_small_const of bool
@@ -47,8 +48,8 @@ type section_info = {
 }
 
 let default_section_info = {
-  sec_name_init = Section_data true;
-  sec_name_uninit = Section_data false;
+  sec_name_init = Section_data (true, false);
+  sec_name_uninit = Section_data (false, false);
   sec_writable = true;
   sec_executable = false;
   sec_access = Access_default
@@ -63,8 +64,13 @@ let builtin_sections = [
       sec_writable = false; sec_executable = true;
       sec_access = Access_default};
   "DATA",
-     {sec_name_init = Section_data true;
-      sec_name_uninit = Section_data false;
+     {sec_name_init = Section_data (true, false);
+      sec_name_uninit = Section_data (false, false);
+      sec_writable = true; sec_executable = false;
+      sec_access = Access_default};
+  "TDATA",
+     {sec_name_init = Section_data (true, true);
+      sec_name_uninit = Section_data (false, true);
       sec_writable = true; sec_executable = false;
       sec_access = Access_default};
   "SDATA",
@@ -175,7 +181,7 @@ let get_attr_section loc attr =
 
 (* Determine section for a variable definition *)
 
-let for_variable env loc id ty init =
+let for_variable env loc id ty init thrl =
   let attr = Cutil.attributes_of_type env ty in
   let readonly = List.mem C.AConst attr && not(List.mem C.AVolatile attr) in
   let si =
@@ -194,7 +200,8 @@ let for_variable env loc id ty init =
         let name =
           if readonly
           then if size <= !Clflags.option_small_const then "SCONST" else "CONST"
-          else if size <= !Clflags.option_small_data then "SDATA" else "DATA" in
+          else if size <= !Clflags.option_small_data then "SDATA" else
+            if thrl then "TDATA" else "DATA" in
         try
           Hashtbl.find current_section_table name
         with Not_found ->
diff --git a/common/Sections.mli b/common/Sections.mli
index d9fd9239..00c06c20 100644
--- a/common/Sections.mli
+++ b/common/Sections.mli
@@ -18,7 +18,8 @@
 
 type section_name =
   | Section_text
-  | Section_data of bool          (* true = init data, false = uninit data *)
+  | Section_data of bool (* true = init data, false = uninit data *)
+                  * bool (* thread local? *)
   | Section_small_data of bool
   | Section_const of bool
   | Section_small_const of bool
@@ -46,7 +47,7 @@ val define_section:
          -> ?writable:bool -> ?executable:bool -> ?access:access_mode -> unit -> unit
 val use_section_for: AST.ident -> string -> bool
 
-val for_variable: Env.t -> C.location -> AST.ident -> C.typ -> bool ->
+val for_variable: Env.t -> C.location -> AST.ident -> C.typ -> bool -> bool ->
                                           section_name * access_mode
 val for_function: Env.t -> C.location -> AST.ident -> C.attributes -> section_name list
 val for_stringlit: unit -> section_name
diff --git a/cparser/C.mli b/cparser/C.mli
index 15717565..3c271f3f 100644
--- a/cparser/C.mli
+++ b/cparser/C.mli
@@ -86,8 +86,11 @@ type attributes = attribute list
 
 type storage =
   | Storage_default (* used for toplevel names without explicit storage *)
+  | Storage_thread_local
   | Storage_extern
   | Storage_static
+  | Storage_thread_local_extern
+  | Storage_thread_local_static
   | Storage_auto    (* used for block-scoped names without explicit storage *)
   | Storage_register
 
diff --git a/cparser/Cabs.v b/cparser/Cabs.v
index 5f12e8a1..2dae061a 100644
--- a/cparser/Cabs.v
+++ b/cparser/Cabs.v
@@ -54,7 +54,7 @@ Inductive typeSpecifier := (* Merge all specifiers into one type *)
   | Tenum : option string -> option (list (string * option expression * loc)) -> list attribute -> typeSpecifier
 
 with storage :=
-  AUTO | STATIC | EXTERN | REGISTER | TYPEDEF
+  AUTO | STATIC | EXTERN | REGISTER | TYPEDEF | THREAD_LOCAL
 
 with cvspec :=
 | CV_CONST | CV_VOLATILE | CV_RESTRICT
diff --git a/cparser/Ceval.ml b/cparser/Ceval.ml
index ecf83779..7bae2fe2 100644
--- a/cparser/Ceval.ml
+++ b/cparser/Ceval.ml
@@ -354,7 +354,9 @@ and is_constant_lval env e =
       begin match Env.find_ident env id with
       | Env.II_ident(sto, _) ->
           begin match sto with
-          | Storage_default | Storage_extern | Storage_static -> true
+          | Storage_default | Storage_extern | Storage_static
+          | Storage_thread_local | Storage_thread_local_extern | Storage_thread_local_static
+            -> true
           | Storage_auto | Storage_register -> false
           end
       | Env.II_enum _ -> false   (* should not happen *)
diff --git a/cparser/Cleanup.ml b/cparser/Cleanup.ml
index 63ac8ac1..9f19395a 100644
--- a/cparser/Cleanup.ml
+++ b/cparser/Cleanup.ml
@@ -126,14 +126,14 @@ let add_enum e =
 *)
 
 let visible_decl (sto, id, ty, init) =
-  sto = Storage_default &&
+  (sto = Storage_default || sto = Storage_thread_local) &&
   match ty with TFun _ -> false | _ -> true
 
 let visible_fundef f =
   match f.fd_storage with
-  | Storage_default -> not f.fd_inline
-  | Storage_extern -> true
-  | Storage_static -> false
+  | Storage_default | Storage_thread_local -> not f.fd_inline
+  | Storage_extern | Storage_thread_local_extern -> true
+  | Storage_static | Storage_thread_local_static -> false
   | Storage_auto | Storage_register -> assert false
 
 let rec add_init_globdecls accu = function
diff --git a/cparser/Cprint.ml b/cparser/Cprint.ml
index 9aeec421..78970990 100644
--- a/cparser/Cprint.ml
+++ b/cparser/Cprint.ml
@@ -361,6 +361,9 @@ let storage pp = function
   | Storage_default -> ()
   | Storage_extern -> fprintf pp "extern "
   | Storage_static -> fprintf pp "static "
+  | Storage_thread_local -> fprintf pp "_Thread_local"
+  | Storage_thread_local_extern -> fprintf pp "extern _Thread_local"
+  | Storage_thread_local_static -> fprintf pp "static _Thread_local"
   | Storage_auto -> ()   (* used only in blocks, where it can be omitted *)
   | Storage_register -> fprintf pp "register "
 
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index 9e17cb7e..0504ad0b 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -152,6 +152,9 @@ let name_of_storage_class = function
   | Storage_default -> "<default>"
   | Storage_extern -> "'extern'"
   | Storage_static -> "'static'"
+  | Storage_thread_local -> "'_Thread_local'"
+  | Storage_thread_local_extern -> "'_Thread_local extern'"
+  | Storage_thread_local_static -> "'_Thread_local static'"
   | Storage_auto -> "'auto'"
   | Storage_register -> "'register'"
 
@@ -177,15 +180,29 @@ let combine_toplevel_definitions loc env s old_sto old_ty sto ty =
     | Storage_static,Storage_static
     | Storage_extern,Storage_extern
     | Storage_default,Storage_default -> sto
-    | _,Storage_static ->
+    | Storage_thread_local_static,Storage_thread_local_static
+    | Storage_thread_local_extern,Storage_thread_local_extern
+    | Storage_thread_local,Storage_thread_local -> sto
+    | _,Storage_static | _,Storage_thread_local_static ->
 	error loc "static declaration of '%s' follows non-static declaration" s;
         sto
     | Storage_static,_ -> Storage_static (* Static stays static *)
-    | Storage_extern,_ -> if is_function_type env new_ty then Storage_extern else sto
+    | Storage_thread_local_static,_ -> Storage_thread_local_static (* Thread-local static stays static *)
+    | (Storage_extern|Storage_thread_local_extern),_ -> if is_function_type env new_ty then Storage_extern else sto
     | Storage_default,Storage_extern ->
       if is_global_defined s && is_function_type env ty then
         warning loc Extern_after_definition "this extern declaration follows a non-extern definition and is ignored";
       Storage_extern
+    | Storage_thread_local,Storage_thread_local_extern ->
+      if is_global_defined s && is_function_type env ty then
+        warning loc Extern_after_definition "this extern declaration follows a non-extern definition and is ignored";
+      Storage_extern
+    | Storage_thread_local, Storage_default ->
+       error loc "Non thread-local declaration follows thread-local";
+       sto
+    | Storage_default, (Storage_thread_local|Storage_thread_local_extern) ->
+       error loc "Thread-local declaration follows non thread-local";
+       sto
     | _,Storage_extern -> old_sto
     (* "auto" and "register" don't appear in toplevel definitions.
        Normally this was checked earlier.  Generate error message
@@ -639,13 +656,26 @@ let rec elab_specifier ?(only = false) loc env specifier =
       restrict := cv = CV_RESTRICT;
       attr := add_attributes (elab_cvspec env cv) !attr
   | SpecStorage st ->
-      if !sto <> Storage_default && st <> TYPEDEF then
+      if !sto <> Storage_default && st <> TYPEDEF && st <> THREAD_LOCAL then
         error loc "multiple storage classes in declaration specifier";
       begin match st with
       | AUTO -> sto := Storage_auto
       | STATIC -> sto := Storage_static
       | EXTERN -> sto := Storage_extern
       | REGISTER -> sto := Storage_register
+      | THREAD_LOCAL ->
+         sto := (match !sto with
+                 | Storage_static | Storage_thread_local_static ->
+                    Storage_thread_local_static
+                 | Storage_extern | Storage_thread_local_extern ->
+                    Storage_thread_local_extern
+                 | Storage_default | Storage_thread_local ->
+                    Storage_thread_local
+                 | Storage_auto|Storage_register ->
+                    error loc "_Thread_local on auto or register variable";
+                    !sto
+                )
+
       | TYPEDEF ->
           if !typedef then
             error loc "multiple uses of 'typedef'";
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index e44a330f..b36b3e81 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -72,6 +72,7 @@ let () =
       ("goto", fun loc -> GOTO loc);
       ("if", fun loc -> IF loc);
       ("inline", fun loc -> INLINE loc);
+      ("_Thread_local", fun loc -> THREAD_LOCAL loc);
       ("_Noreturn", fun loc -> NORETURN loc);
       ("int", fun loc -> INT loc);
       ("long", fun loc -> LONG loc);
@@ -542,6 +543,7 @@ and singleline_comment = parse
       | Pre_parser.IF loc -> loop (Parser.IF_ loc)
       | Pre_parser.INC loc -> loop (Parser.INC loc)
       | Pre_parser.INLINE loc -> loop (Parser.INLINE loc)
+      | Pre_parser.THREAD_LOCAL loc -> loop (Parser.THREAD_LOCAL loc)
       | Pre_parser.INT loc -> loop (Parser.INT loc)
       | Pre_parser.LBRACE loc -> loop (Parser.LBRACE loc)
       | Pre_parser.LBRACK loc -> loop (Parser.LBRACK loc)
diff --git a/cparser/Parser.vy b/cparser/Parser.vy
index 03bfa590..4f3b9789 100644
--- a/cparser/Parser.vy
+++ b/cparser/Parser.vy
@@ -32,7 +32,7 @@ Require Cabs.
   LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN
 
 %token<Cabs.loc> LPAREN RPAREN LBRACK RBRACK LBRACE RBRACE DOT COMMA
-  SEMICOLON ELLIPSIS TYPEDEF EXTERN STATIC RESTRICT AUTO REGISTER INLINE
+  SEMICOLON ELLIPSIS TYPEDEF EXTERN STATIC RESTRICT AUTO REGISTER INLINE THREAD_LOCAL
   NORETURN CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
   STRUCT UNION ENUM UNDERSCORE_BOOL PACKED ALIGNAS ATTRIBUTE ASM
 
@@ -397,6 +397,8 @@ storage_class_specifier:
     { (Cabs.AUTO, loc) }
 | loc = REGISTER
     { (Cabs.REGISTER, loc) }
+| loc = THREAD_LOCAL
+    { (Cabs.THREAD_LOCAL, loc) }
 
 (* 6.7.2 *)
 type_specifier:
diff --git a/cparser/Rename.ml b/cparser/Rename.ml
index 64412194..aeeb9326 100644
--- a/cparser/Rename.ml
+++ b/cparser/Rename.ml
@@ -257,13 +257,16 @@ let rec reserve_public env = function
         match dcl.gdesc with
         | Gdecl(sto, id, _, _) ->
             begin match sto with
-            | Storage_default | Storage_extern -> enter_public env id
+            | Storage_default  | Storage_thread_local
+            | Storage_extern   | Storage_thread_local_extern ->
+               enter_public env id
             | Storage_static -> env
             | _ -> assert false
             end
         | Gfundef f ->
             begin match f.fd_storage with
-            | Storage_default | Storage_extern -> enter_public env f.fd_name
+            | Storage_default | Storage_extern
+              -> enter_public env f.fd_name
             | Storage_static -> env
             | _ -> assert false
             end
diff --git a/cparser/deLexer.ml b/cparser/deLexer.ml
index de0e9b6e..43c1a679 100644
--- a/cparser/deLexer.ml
+++ b/cparser/deLexer.ml
@@ -30,6 +30,7 @@ let delex (symbol : string) : string =
   | "BUILTIN_VA_ARG" -> "__builtin_va_arg"
   | "CONST" -> "const"
   | "INLINE" -> "inline"
+  | "THREAD_LOCAL" -> "_Thread_local"
   | "PACKED" -> "__packed__"
   | "RESTRICT" -> "restrict"
   | "SIGNED" -> "signed"
diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly
index 669ecf5e..e21a3519 100644
--- a/cparser/pre_parser.mly
+++ b/cparser/pre_parser.mly
@@ -54,7 +54,7 @@
   COLON AND MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN LEFT_ASSIGN
   RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN LPAREN RPAREN LBRACK RBRACK
   LBRACE RBRACE DOT COMMA SEMICOLON ELLIPSIS TYPEDEF EXTERN STATIC RESTRICT
-  AUTO REGISTER INLINE NORETURN CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE
+  AUTO REGISTER INLINE THREAD_LOCAL NORETURN CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE
   UNDERSCORE_BOOL CONST VOLATILE VOID STRUCT UNION ENUM CASE DEFAULT IF ELSE
   SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN BUILTIN_VA_ARG ALIGNOF
   ATTRIBUTE ALIGNAS PACKED ASM BUILTIN_OFFSETOF
@@ -430,6 +430,7 @@ storage_class_specifier_no_typedef:
 | STATIC
 | AUTO
 | REGISTER
+| THREAD_LOCAL
     {}
 
 (* [declaration_specifier_no_type] matches declaration specifiers
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 467d41aa..14d15ba6 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -26,7 +26,10 @@ let option_ffloatconstprop = ref 2
 let option_ftailcalls = ref true
 let option_fconstprop = ref true
 let option_fcse = ref true
-let option_fcse2 = ref true
+let option_fcse2 = ref false
+let option_fcse3 = ref true
+let option_fcse3_alias_analysis = ref true
+let option_fcse3_across_calls = ref false
 let option_fredundancy = ref true
 let option_fduplicate = ref (-1)
 let option_finvertcond = ref true
@@ -77,8 +80,16 @@ let use_standard_headers =  ref Configuration.has_standard_headers
 let option_fglobaladdrtmp = ref false
 let option_fglobaladdroffset = ref false
 let option_fxsaddr = ref true  
-let option_faddx = ref false  
+let option_faddx = ref false
+let option_fmadd = ref true
+let option_div_i32 = ref "stsud"
+let option_div_i64 = ref "stsud" 
 let option_fcoalesce_mem = ref true
 let option_fforward_moves = ref false
+let option_fmove_loop_invariants = ref true
+let option_fnontrap_loads = ref true
 let option_all_loads_nontrap = ref false
 let option_inline_auto_threshold = ref 0
+let option_profile_arcs = ref false
+let option_fbranch_probabilities = ref true
+let option_debug_compcert = ref 0
diff --git a/driver/Compiler.v b/driver/Compiler.vexpand
index 002c55fe..0f59aab7 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.vexpand
@@ -35,23 +35,7 @@ Require Cshmgen.
 Require Cminorgen.
 Require Selection.
 Require RTLgen.
-Require Tailcall.
-Require Inlining.
-Require Renumber.
-Require Duplicate.
-Require Constprop.
-Require CSE.
-Require ForwardMoves.
-Require CSE2.
-Require Deadcode.
-Require Unusedglob.
-Require Allnontrap.
-Require Allocation.
-Require Tunneling.
-Require Linearize.
-Require CleanupLabels.
-Require Debugvar.
-Require Stacking.
+EXPAND_RTL_REQUIRE
 Require Asmgen.
 (** Proofs of semantic preservation. *)
 Require SimplExprproof.
@@ -60,23 +44,7 @@ Require Cshmgenproof.
 Require Cminorgenproof.
 Require Selectionproof.
 Require RTLgenproof.
-Require Tailcallproof.
-Require Inliningproof.
-Require Renumberproof.
-Require Duplicateproof.
-Require Constpropproof.
-Require CSEproof.
-Require ForwardMovesproof.
-Require CSE2proof.
-Require Deadcodeproof.
-Require Unusedglobproof.
-Require Allnontrapproof.
-Require Allocproof.
-Require Tunnelingproof.
-Require Linearizeproof.
-Require CleanupLabelsproof.
-Require Debugvarproof.
-Require Stackingproof.
+EXPAND_RTL_REQUIRE_PROOF
 Require Import Asmgenproof.
 (** Command-line flags. *)
 Require Import Compopts.
@@ -128,40 +96,9 @@ Definition partial_if {A: Type}
 Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
    OK f
    @@ print (print_RTL 0)
-   @@ total_if Compopts.optim_tailcalls (time "Tail calls" Tailcall.transf_program)
-   @@ print (print_RTL 1)
-  @@@ time "Inlining" Inlining.transf_program
-   @@ print (print_RTL 2)
-   @@ time "Renumbering" Renumber.transf_program
-   @@ print (print_RTL 3)
-  @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
-   @@ print (print_RTL 4)
-   @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
-   @@ print (print_RTL 5)
-   @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program)
-   @@ print (print_RTL 6)
-  @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program)
-   @@ print (print_RTL 7)
-   @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program)
-   @@ print (print_RTL 8)
-   @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program
-   @@ print (print_RTL 9)
-  @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
-   @@ print (print_RTL 10)
-   @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program
-   @@ print (print_RTL 11)
-  @@@ time "Unused globals" Unusedglob.transform_program
-   @@ print (print_RTL 12)
-  @@@ time "Register allocation" Allocation.transf_program
-   @@ print print_LTL
-   @@ time "Branch tunneling" Tunneling.tunnel_program
-  @@@ time "CFG linearization" Linearize.transf_program
-   @@ time "Label cleanup" CleanupLabels.transf_program
-  @@@ partial_if Compopts.debug (time "Debugging info for local variables" Debugvar.transf_program)
-  @@@ time "Mach generation" Stacking.transf_program
-   @@ print print_Mach
+EXPAND_RTL_TRANSF_PROGRAM
   @@@ time "Total Mach->Asm generation" Asmgen.transf_program.
-
+   
 Definition transf_cminor_program (p: Cminor.program) : res Asm.program :=
    OK p
    @@ print print_Cminor
@@ -251,24 +188,7 @@ Definition CompCert's_passes :=
   ::: mkpass Cminorgenproof.match_prog
   ::: mkpass Selectionproof.match_prog
   ::: mkpass RTLgenproof.match_prog
-  ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog)
-  ::: mkpass Inliningproof.match_prog
-  ::: mkpass Renumberproof.match_prog
-  ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog)
-  ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
-  ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog)
-  ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog)
-  ::: mkpass (match_if Compopts.optim_CSE2 CSE2proof.match_prog)
-  ::: mkpass (match_if Compopts.optim_forward_moves ForwardMovesproof.match_prog)
-  ::: mkpass (match_if Compopts.optim_redundancy Deadcodeproof.match_prog)
-  ::: mkpass (match_if Compopts.all_loads_nontrap Allnontrapproof.match_prog)
-  ::: mkpass Unusedglobproof.match_prog
-  ::: mkpass Allocproof.match_prog
-  ::: mkpass Tunnelingproof.match_prog
-  ::: mkpass Linearizeproof.match_prog
-  ::: mkpass CleanupLabelsproof.match_prog
-  ::: mkpass (match_if Compopts.debug Debugvarproof.match_prog)
-  ::: mkpass Stackingproof.match_prog
+EXPAND_RTL_MKPASS
   ::: mkpass Asmgenproof.match_prog
   ::: pass_nil _.
 
@@ -299,24 +219,7 @@ Proof.
   destruct (RTLgen.transl_program p5) as [p6|e] eqn:P6; cbn in T; try discriminate.
   unfold transf_rtl_program, time in T. rewrite ! compose_print_identity in T.
   cbn in T.
-  set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *.
-  destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; cbn in T; try discriminate.
-  set (p9 := Renumber.transf_program p8) in *.
-  destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; cbn in T; try discriminate.
-  set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
-  set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
-  destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; cbn in T; try discriminate.
-  set (p13bis := total_if optim_CSE2 CSE2.transf_program p13) in *.
-  set (p13ter := total_if optim_forward_moves ForwardMoves.transf_program p13bis) in *.
-  destruct (partial_if optim_redundancy Deadcode.transf_program p13ter) as [p14|e] eqn:P14; cbn in T; try discriminate.
-  set (p14bis := total_if all_loads_nontrap Allnontrap.transf_program p14) in *.
-  destruct (Unusedglob.transform_program p14bis) as [p15|e] eqn:P15; cbn in T; try discriminate.
-  destruct (Allocation.transf_program p15) as [p16|e] eqn:P16; cbn in T; try discriminate.
-  set (p17 := Tunneling.tunnel_program p16) in *.
-  destruct (Linearize.transf_program p17) as [p18|e] eqn:P18; cbn in T; try discriminate.
-  set (p19 := CleanupLabels.transf_program p18) in *.
-  destruct (partial_if debug Debugvar.transf_program p19) as [p20|e] eqn:P20; cbn in T; try discriminate.
-  destruct (Stacking.transf_program p20) as [p21|e] eqn:P21; cbn in T; try discriminate.
+EXPAND_RTL_PROOF
   unfold match_prog; simpl.
   exists p1; split. apply SimplExprproof.transf_program_match; auto.
   exists p2; split. apply SimplLocalsproof.match_transf_program; auto.
@@ -324,24 +227,7 @@ Proof.
   exists p4; split. apply Cminorgenproof.transf_program_match; auto.
   exists p5; split. apply Selectionproof.transf_program_match; auto.
   exists p6; split. apply RTLgenproof.transf_program_match; auto.
-  exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match.
-  exists p8; split. apply Inliningproof.transf_program_match; auto.
-  exists p9; split. apply Renumberproof.transf_program_match; auto.
-  exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto.
-  exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
-  exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match.
-  exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
-  exists p13bis; split. apply total_if_match. apply CSE2proof.transf_program_match.
-  exists p13ter; split. eapply total_if_match; eauto. apply ForwardMovesproof.transf_program_match.
-  exists p14; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match.
-  exists p14bis; split. eapply total_if_match; eauto. apply Allnontrapproof.transf_program_match.
-  exists p15; split. apply Unusedglobproof.transf_program_match; auto.
-  exists p16; split. apply Allocproof.transf_program_match; auto.
-  exists p17; split. apply Tunnelingproof.transf_program_match.
-  exists p18; split. apply Linearizeproof.transf_program_match; auto.
-  exists p19; split. apply CleanupLabelsproof.transf_program_match; auto.
-  exists p20; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match.
-  exists p21; split. apply Stackingproof.transf_program_match; auto.
+EXPAND_RTL_PROOF2
   exists tp; split. apply Asmgenproof.transf_program_match; auto.
   reflexivity.
 Qed.
@@ -393,7 +279,9 @@ Ltac DestructM :=
       destruct H as (p & M & MM); clear H
   end.
   repeat DestructM. subst tp.
-  assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p25)).
+  assert (F: forward_simulation (Cstrategy.semantics p)
+EXPAND_ASM_SEMANTICS
+         ).
   {
   eapply compose_forward_simulations.
     eapply SimplExprproof.transl_program_correct; eassumption.
@@ -407,31 +295,9 @@ Ltac DestructM :=
     eapply Selectionproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply RTLgenproof.transf_program_correct; eassumption.
+EXPAND_RTL_FORWARD_SIMULATIONS
   eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Tailcallproof.transf_program_correct.
-  eapply compose_forward_simulations.
-    eapply Inliningproof.transf_program_correct; eassumption.
-  eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption.
-  eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct.
-  eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct.
-  eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Renumberproof.transf_program_correct.
-  eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact CSEproof.transf_program_correct.
-  eapply compose_forward_simulations.
-  eapply match_if_simulation. eassumption. exact CSE2proof.transf_program_correct.
-  eapply compose_forward_simulations.
-  eapply match_if_simulation. eassumption. exact ForwardMovesproof.transf_program_correct; eassumption.
-  eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Deadcodeproof.transf_program_correct; eassumption.
-  eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Allnontrapproof.transf_program_correct.
-  eapply compose_forward_simulations.
-    eapply Unusedglobproof.transf_program_correct; eassumption.
-  eapply compose_forward_simulations.
-    eapply Allocproof.transf_program_correct; eassumption.
+    eapply Allocationproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply Tunnelingproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 848657e5..3c5ccf36 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -42,6 +42,18 @@ Parameter optim_CSE: unit -> bool.
 (** Flag -fcse2.  For DMonniaux's common subexpression elimination. *)
 Parameter optim_CSE2: unit -> bool.
 
+(** Flag -fcse3.  For DMonniaux's common subexpression elimination. *)
+Parameter optim_CSE3: unit -> bool.
+
+(** Flag -fcse3-alias-analysis.  For DMonniaux's common subexpression elimination. Perform a simple alias analysis. *)
+Parameter optim_CSE3_alias_analysis: unit -> bool.
+
+(** Flag -fcse3-across-calls. For DMonniaux's common subexpression elimination. Propagate information across function calls (may increase register pressure). *)
+Parameter optim_CSE3_across_calls: unit -> bool.
+
+(** Flag -fmove-loop-invariants. *)
+Parameter optim_move_loop_invariants: unit -> bool.
+
 (** Flag -fredundancy.  For dead code elimination. *)
 Parameter optim_redundancy: unit -> bool.
 
@@ -60,6 +72,9 @@ Parameter optim_xsaddr: unit -> bool.
 (** FIXME TEMPORARY Flag -fcoaelesce-mem. Fuse (default true) *)
 Parameter optim_coalesce_mem: unit -> bool.
 
+(* FIXME TEMPORARY Flag -faddx. Fuse (default true) *)
+Parameter optim_madd: unit -> bool.
+
 (** FIXME TEMPORARY Flag -faddx. Fuse (default false) *)
 Parameter optim_addx: unit -> bool.
 
@@ -75,6 +90,12 @@ Parameter all_loads_nontrap: unit -> bool.
 (** Flag -fforward-moves. Forward moves after CSE. *)
 Parameter optim_forward_moves: unit -> bool.
 
+(** Flag -fprofile-arcs. Add profiling logger. *)
+Parameter profile_arcs : unit -> bool.
+
+(** Flag -fbranch_probabilities. Use profiling information if available *)
+Parameter branch_probabilities : unit -> bool.
+
 (* TODO is there a more appropriate place? *)
 Require Import Coqlib.
 Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 388482a0..b9060ca7 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -196,7 +196,11 @@ Processing options:
   -ffloat-const-prop <n>  Control constant propagation of floats
                    (<n>=0: none, <n>=1: limited, <n>=2: full; default is full)
   -fcse          Perform common subexpression elimination [on]
-  -fcse2         Perform inter-loop common subexpression elimination [on]
+  -fcse2         Perform inter-loop common subexpression elimination [off]
+  -fcse3         Perform inter-loop common subexpression elimination [on]
+  -fcse3-alias-analysis Perform inter-loop common subexpression elimination with alias analysis [on]
+  -fcse3-across-calls   Propagate CSE3 information across function calls [off]
+  -fmove-loop-invariants Perform loop-invariant code motion [off]
   -fredundancy   Perform redundancy elimination [on]
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on]
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
@@ -222,7 +226,10 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>)
   -falign-functions <n>  Set alignment (in bytes) of function entry points
   -falign-branch-targets <n>  Set alignment (in bytes) of branch targets
   -falign-cond-branches <n>  Set alignment (in bytes) of conditional branches
-  -fcommon       Put uninitialized globals in the common section [on].
+  -fcommon       Put uninitialized globals in the common section [on]
+  -fprofile-arcs  Profile branches [off].
+  -fprofile-use= filename  Use profiling information in filename
+  -fbranch-probabilities Use profiling information (if available) for branches [on]
 |} ^
  target_help ^
  toolchain_help ^
@@ -270,7 +277,7 @@ let dump_mnemonics destfile =
 
 let optimization_options = [
     option_ftailcalls; option_fifconversion; option_fconstprop;
-    option_fcse; option_fcse2;
+    option_fcse; option_fcse2; option_fcse3;
     option_fpostpass;
     option_fredundancy; option_finline; option_finline_functions_called_once;
 ]
@@ -289,6 +296,10 @@ let cmdline_actions =
     [Exact("-f" ^ name ^ "="), String 
       (fun s -> (strref := (if s == "" then "list" else s)); ref := true)
      ] in
+  let f_str name strref default =
+    [Exact("-f" ^ name ^ "="), String 
+      (fun s -> (strref := (if s == "" then default else s)))
+     ] in
   let check_align n =
     if n <= 0 || ((n land (n - 1)) <> 0) then
       error no_loc "requested alignment %d is not a power of 2" n
@@ -327,7 +338,9 @@ let cmdline_actions =
   _Regexp "-O[123]$", Unit (set_all optimization_options);
   Exact "-Os", Set option_Osize;
   Exact "-Obranchless", Set option_Obranchless;
+  Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s);
   Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
+  Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n);
   Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
   Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
   Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); 
@@ -397,6 +410,10 @@ let cmdline_actions =
   @ f_opt "const-prop" option_fconstprop
   @ f_opt "cse" option_fcse
   @ f_opt "cse2" option_fcse2
+  @ f_opt "cse3" option_fcse3
+  @ f_opt "cse3-alias-analysis" option_fcse3_alias_analysis
+  @ f_opt "cse3-across-calls" option_fcse3_across_calls
+  @ f_opt "move-loop-invariants" option_fmove_loop_invariants
   @ f_opt "redundancy" option_fredundancy
   @ f_opt "postpass" option_fpostpass
   @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ]
@@ -408,11 +425,17 @@ let cmdline_actions =
   @ f_opt "globaladdrtmp" option_fglobaladdrtmp
   @ f_opt "globaladdroffset" option_fglobaladdroffset
   @ f_opt "xsaddr" option_fxsaddr
+  @ f_str "div-i32" option_div_i32 "stsud"
+  @ f_str "div-i64" option_div_i64 "stsud"
   @ f_opt "addx" option_faddx
+  @ f_opt "madd" option_fmadd
+  @ f_opt "nontrap-loads" option_fnontrap_loads
   @ f_opt "coalesce-mem" option_fcoalesce_mem
   @ f_opt "all-loads-nontrap" option_all_loads_nontrap
   @ f_opt "forward-moves" option_fforward_moves
-(* Code generation options *)
+ (* Code generation options *)
+  @ f_opt "profile-arcs" option_profile_arcs
+  @ f_opt "branch-probabilities" option_fbranch_probabilities
   @ f_opt "fpu" option_ffpu
   @ f_opt "sse" option_ffpu (* backward compatibility *)
   @ [
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 9b568951..b40d444a 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -36,6 +36,9 @@ Require Parser.
 Require Initializers.
 Require Asmaux.
 
+Require CSE3.
+Require CSE3analysis.
+
 (* Standard lib *)
 Require Import ExtrOcamlBasic.
 Require Import ExtrOcamlString.
@@ -84,6 +87,9 @@ Extract Inlined Constant Inlining.inlining_info => "Inliningaux.inlining_info".
 Extract Inlined Constant Inlining.inlining_analysis => "Inliningaux.inlining_analysis".
 Extraction Inline Inlining.ret Inlining.bind.
 
+(* Loop invariant code motion *)
+Extract Inlined Constant LICM.gen_injections => "LICMaux.gen_injections".
+
 (* Allocation *)
 Extract Constant Allocation.regalloc => "Regalloc.regalloc".
 
@@ -113,6 +119,15 @@ Extract Constant Compopts.optim_CSE =>
   "fun _ -> !Clflags.option_fcse".
 Extract Constant Compopts.optim_CSE2 =>
   "fun _ -> !Clflags.option_fcse2".
+Extract Constant Compopts.optim_CSE3 =>
+  "fun _ -> !Clflags.option_fcse3".
+Extract Constant Compopts.optim_CSE3_alias_analysis =>
+  "fun _ -> !Clflags.option_fcse3_alias_analysis".
+Extract Constant Compopts.optim_CSE3_across_calls =>
+  "fun _ -> !Clflags.option_fcse3_across_calls".
+Extract Constant Compopts.optim_move_loop_invariants =>
+  "fun _ -> !Clflags.option_fmove_loop_invariants".
+
 Extract Constant Compopts.optim_redundancy =>
   "fun _ -> !Clflags.option_fredundancy".
 Extract Constant Compopts.optim_postpass =>
@@ -129,6 +144,8 @@ Extract Constant Compopts.optim_xsaddr =>
   "fun _ -> !Clflags.option_fxsaddr".
 Extract Constant Compopts.optim_addx =>
   "fun _ -> !Clflags.option_faddx".
+Extract Constant Compopts.optim_madd =>
+  "fun _ -> !Clflags.option_fmadd".
 Extract Constant Compopts.optim_coalesce_mem =>
   "fun _ -> !Clflags.option_fcoalesce_mem".
 Extract Constant Compopts.optim_forward_moves =>
@@ -137,6 +154,10 @@ Extract Constant Compopts.va_strict =>
   "fun _ -> false".
 Extract Constant Compopts.all_loads_nontrap =>
   "fun _ -> !Clflags.option_all_loads_nontrap".
+Extract Constant Compopts.profile_arcs =>
+"fun _ -> !Clflags.option_profile_arcs".
+Extract Constant Compopts.branch_probabilities =>
+  "fun _ -> !Clflags.option_fbranch_probabilities".
 
 (* Compiler *)
 Extract Constant Compiler.print_Clight => "PrintClight.print_if".
@@ -147,9 +168,17 @@ Extract Constant Compiler.print_Mach => "PrintMach.print_if".
 Extract Constant Compiler.print => "fun (f: 'a -> unit) (x: 'a) -> f x; x".
 Extract Constant Compiler.time  => "Timing.time_coq".
 Extract Constant Compopts.time  => "Timing.time_coq".
-
 (*Extraction Inline Compiler.apply_total Compiler.apply_partial.*)
 
+(* Profiling *)
+Extract Constant AST.profiling_id => "Digest.t".
+Extract Constant AST.profiling_id_eq => "Digest.equal".
+Extract Constant Profiling.function_id => "Profilingaux.function_id".
+Extract Constant Profiling.branch_id => "Profilingaux.branch_id".
+Extract Constant ProfilingExploit.function_id => "Profilingaux.function_id".
+Extract Constant ProfilingExploit.branch_id => "Profilingaux.branch_id".
+Extract Constant ProfilingExploit.condition_oracle => "Profilingaux.condition_oracle".
+
 (* Cabs *)
 Extract Constant Cabs.loc =>
 "{ lineno : int;
@@ -160,6 +189,12 @@ Extract Constant Cabs.loc =>
 Extract Inlined Constant Cabs.string => "String.t".
 Extract Constant Cabs.char_code => "int64".
 
+Extract Inlined Constant CSE3.preanalysis => "CSE3analysisaux.preanalysis".
+
+Extract Inductive HashedSet.PSet_internals.pset => "HashedSetaux.pset" [ "HashedSetaux.empty" "HashedSetaux.node" ] "HashedSetaux.pset_match".
+
+Extract Inlined Constant HashedSet.PSet_internals.pset_eq => "(==)" (* "HashedSetaux.eq" *).
+
 (* Processor-specific extraction directives *)
 
 Load extractionMachdep.
@@ -182,6 +217,7 @@ Set Extraction AccessOpaque.
 Cd "extraction".
 
 Separate Extraction
+   CSE3analysis.internal_analysis CSE3analysis.eq_depends_on_mem
    Compiler.transf_c_program Compiler.transf_cminor_program
    Cexec.do_initial_state Cexec.do_step Cexec.at_final_state
    Ctypes.merge_attributes Ctypes.remove_attributes Ctypes.build_composite_env
@@ -204,4 +240,5 @@ Separate Extraction
    Floats.Float32.from_parsed Floats.Float.from_parsed
    Globalenvs.Senv.invert_symbol
    Parser.translation_unit_file
-   Compopts.optim_postpass.
+   Compopts.optim_postpass
+   Archi.has_notrap_loads.
diff --git a/lib/HashedSet.v b/lib/HashedSet.v
new file mode 100644
index 00000000..00e01612
--- /dev/null
+++ b/lib/HashedSet.v
@@ -0,0 +1,1402 @@
+Require Import ZArith.
+Require Import Bool.
+Require Import List.
+Require Coq.Logic.Eqdep_dec.
+
+(* begin from Maps *)
+Fixpoint prev_append (i j: positive) {struct i} : positive :=
+  match i with
+  | xH => j
+  | xI i' => prev_append i' (xI j)
+  | xO i' => prev_append i' (xO j)
+  end.
+
+Definition prev (i: positive) : positive :=
+  prev_append i xH.
+
+Lemma prev_append_prev i j:
+  prev (prev_append i j) = prev_append j i.
+Proof.
+  revert j. unfold prev.
+  induction i as [i IH|i IH|]. 3: reflexivity.
+  intros j. simpl. rewrite IH. reflexivity.
+  intros j. simpl. rewrite IH. reflexivity.
+Qed.
+
+Lemma prev_involutive i :
+  prev (prev i) = i.
+Proof (prev_append_prev i xH).
+
+Lemma prev_append_inj i j j' :
+  prev_append i j = prev_append i j' -> j = j'.
+Proof.
+  revert j j'.
+  induction i as [i Hi|i Hi|]; intros j j' H; auto;
+    specialize (Hi _ _ H); congruence.
+Qed.
+
+(* end from Maps *)
+
+Lemma orb_idem: forall b, orb b b = b.
+Proof.
+  destruct b; reflexivity.
+Qed.
+
+Lemma andb_idem: forall b, andb b b = b.
+Proof.
+  destruct b; reflexivity.
+Qed.
+
+Lemma andb_negb_false: forall b, andb b (negb b) = false.
+Proof.
+  destruct b; reflexivity.
+Qed.
+
+Hint Rewrite orb_false_r andb_false_r andb_true_r orb_true_r orb_idem andb_idem  andb_negb_false : pset.
+
+Module PSet_internals.
+Inductive pset : Type :=
+| Empty : pset
+| Node : pset -> bool -> pset -> pset.
+
+Definition empty := Empty.
+
+Definition is_empty x :=
+  match x with
+  | Empty => true
+  | Node _ _ _ => false
+  end.
+
+Fixpoint wf x :=
+  match x with
+  | Empty => true
+  | Node b0 f b1 =>
+    (wf b0) && (wf b1) &&
+    ((negb (is_empty b0)) || f || (negb (is_empty b1)))
+  end.
+
+Definition iswf x := (wf x)=true.
+  
+Lemma empty_wf : iswf empty.
+Proof.
+  reflexivity.
+Qed.
+
+Definition pset_eq :
+  forall s s': pset, { s=s' } + { s <> s' }.
+Proof.
+  induction s; destruct s'; repeat decide equality.
+Qed.
+
+Fixpoint contains (s : pset) (i : positive) {struct i} : bool :=
+  match s with
+  | Empty => false
+  | Node b0 f b1 =>
+    match i with
+    | xH => f
+    | xO ii => contains b0 ii
+    | xI ii => contains b1 ii
+    end
+  end.
+
+Lemma gempty :
+  forall i : positive,
+    contains Empty i = false.
+Proof.
+  destruct i; simpl; reflexivity.
+Qed.
+
+Hint Resolve gempty : pset.
+Hint Rewrite gempty : pset.
+
+Definition node (b0 : pset) (f : bool) (b1 : pset) : pset :=
+  match b0, f, b1 with
+  | Empty, false, Empty => Empty
+  | _, _, _ => Node b0 f b1
+  end.
+
+Lemma wf_node :
+  forall b0 f b1,
+    iswf b0 -> iswf b1 -> iswf (node b0 f b1).
+Proof.
+  destruct b0; destruct f; destruct b1; simpl.
+  all: unfold iswf; simpl; intros; trivial.
+  all: autorewrite with pset; trivial.
+  all: rewrite H.
+  all: rewrite H0.
+  all: reflexivity.
+Qed.
+
+Hint Resolve wf_node: pset.
+
+Lemma gnode :
+  forall b0 f b1 i,
+    contains (node b0 f b1) i =
+    contains (Node b0 f b1) i.
+Proof.
+  destruct b0; simpl; trivial.
+  destruct f; simpl; trivial.
+  destruct b1; simpl; trivial.
+  intro.
+  rewrite gempty.
+  destruct i; simpl; trivial.
+  all: symmetry; apply gempty.
+Qed.
+
+Hint Rewrite gnode : pset.
+
+Fixpoint add (i : positive) (s : pset) {struct i} : pset :=
+  match s with
+  | Empty =>
+    match i with
+    | xH => Node Empty true Empty
+    | xO ii => Node (add ii Empty) false Empty
+    | xI ii => Node Empty false (add ii Empty)
+    end
+  | Node b0 f b1 =>
+    match i with
+    | xH => Node b0 true b1
+    | xO ii => Node (add ii b0) f b1
+    | xI ii => Node b0 f (add ii b1)
+    end
+  end.
+
+Lemma add_nonempty:
+  forall i s, is_empty (add i s) = false.
+Proof.
+  induction i; destruct s; simpl; trivial.
+Qed.
+
+Hint Rewrite add_nonempty : pset.
+Hint Resolve add_nonempty : pset.
+
+Lemma wf_add:
+  forall i s, (iswf s) -> (iswf (add i s)).
+Proof.
+  induction i; destruct s; simpl; trivial.
+  all: unfold iswf in *; simpl.
+  all: autorewrite with pset; simpl; trivial.
+  1,3: auto with pset.
+  all: intro Z.
+  all: repeat rewrite andb_true_iff in Z.
+  all: intuition.
+Qed.
+
+Hint Resolve wf_add : pset.
+
+Theorem gadds :
+  forall i : positive,
+  forall s : pset,
+    contains (add i s) i = true.
+Proof.
+  induction i; destruct s; simpl; auto.
+Qed.
+
+Hint Resolve gadds : pset.
+Hint Rewrite gadds : pset.
+
+Theorem gaddo :
+  forall i j : positive,
+  forall s : pset,
+    i <> j ->
+    contains (add i s) j = contains s j.
+Proof.
+  induction i; destruct j; destruct s; simpl; intro; auto with pset.
+  5, 6: congruence.
+  all: rewrite IHi by congruence.
+  all: trivial.
+  all: apply gempty.
+Qed.
+
+Hint Resolve gaddo : pset.
+
+Fixpoint remove (i : positive) (s : pset) { struct i } : pset :=
+  match i with
+  | xH =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => node b0 false b1
+    end
+  | xO ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => node (remove ii b0) f b1
+    end
+  | xI ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => node b0 f (remove ii b1)
+    end
+  end.
+
+Lemma wf_remove :
+  forall i s, (iswf s) -> (iswf (remove i s)).
+Proof.
+  induction i; destruct s; simpl; trivial.
+  all: unfold iswf in *; simpl.
+  all: intro Z.
+  all: repeat rewrite andb_true_iff in Z.
+  all: apply wf_node.
+  all: intuition.
+  all: apply IHi.
+  all: assumption.
+Qed.
+  
+
+Fixpoint remove_noncanon (i : positive) (s : pset) { struct i } : pset :=
+  match i with
+  | xH =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => Node b0 false b1
+    end
+  | xO ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => Node (remove_noncanon ii b0) f b1
+    end
+  | xI ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => Node b0 f (remove_noncanon ii b1)
+    end
+  end.
+
+Lemma remove_noncanon_same:
+  forall i j s, (contains (remove i s) j) = (contains (remove_noncanon i s) j).
+Proof.
+  induction i; destruct s; simpl; trivial.
+  all: rewrite gnode.
+  3: reflexivity.
+  all: destruct j; simpl; trivial.
+Qed.
+
+Lemma remove_empty :
+  forall i, remove i Empty = Empty.
+Proof.
+  induction i; simpl; trivial.
+Qed.
+
+Hint Rewrite remove_empty : pset.
+Hint Resolve remove_empty : pset.
+
+Lemma gremove_noncanon_s :
+  forall i : positive,
+  forall s : pset,
+    contains (remove_noncanon i s) i = false.
+Proof.
+  induction i; destruct s; simpl; trivial.
+Qed.
+
+Theorem gremoves :
+  forall i : positive,
+  forall s : pset,
+    contains (remove i s) i = false.
+Proof.
+  intros.
+  rewrite remove_noncanon_same.
+  apply gremove_noncanon_s.
+Qed.
+
+Hint Resolve gremoves : pset.
+Hint Rewrite gremoves : pset.
+
+Lemma gremove_noncanon_o :
+  forall i j : positive,
+  forall s : pset,
+    i<>j ->
+    contains (remove_noncanon i s) j = contains s j.
+Proof.
+  induction i; destruct j; destruct s; simpl; intro; trivial.
+  1, 2: rewrite IHi by congruence.
+  1, 2: reflexivity.
+  congruence.
+Qed.
+
+Theorem gremoveo :
+  forall i j : positive,
+  forall s : pset,
+    i<>j ->
+    contains (remove i s) j = contains s j.
+Proof.
+  intros.
+  rewrite remove_noncanon_same.
+  apply gremove_noncanon_o.
+  assumption.
+Qed.
+
+Hint Resolve gremoveo : pset.
+
+Fixpoint union_nonopt (s s' : pset) : pset :=
+  match s, s' with
+  | Empty, _ => s'
+  | _, Empty => s
+  | (Node b0 f b1), (Node b0' f' b1') =>
+    Node (union_nonopt b0 b0') (orb f f') (union_nonopt b1 b1')
+  end.
+
+Theorem gunion_nonopt:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (union_nonopt s s')) j = orb (contains s j) (contains s' j).
+Proof.
+  induction s; destruct s'; intro; simpl; autorewrite with pset; simpl; trivial.
+  destruct j; simpl; trivial.
+Qed.
+
+
+Fixpoint union (s s' : pset) : pset :=
+  if pset_eq s s' then s else
+  match s, s' with
+  | Empty, _ => s'
+  | _, Empty => s
+  | (Node b0 f b1), (Node b0' f' b1') =>
+    Node (union b0 b0') (orb f f') (union b1 b1')
+  end.
+
+Lemma union_nonempty1:
+  forall s s',
+    (is_empty s) = false -> is_empty (union s s')= false.
+Proof.
+  induction s; destruct s'; simpl; try discriminate.
+  all: destruct pset_eq; simpl; trivial.
+Qed.
+
+Lemma union_nonempty2:
+  forall s s',
+    (is_empty s') = false -> is_empty (union s s')= false.
+Proof.
+  induction s; destruct s'; simpl; try discriminate.
+  all: destruct pset_eq; simpl; trivial; discriminate.
+Qed.
+
+Hint Resolve union_nonempty1 union_nonempty2 : pset.
+
+Lemma wf_union :
+  forall s s', (iswf s) -> (iswf s') -> (iswf (union s s')).
+Proof.
+  induction s; destruct s'; intros; simpl.
+  all: destruct pset_eq; trivial.
+  unfold iswf in *. simpl in *.
+  repeat rewrite andb_true_iff in H.
+  repeat rewrite andb_true_iff in H0.
+  rewrite IHs1.
+  rewrite IHs2.
+  simpl.
+  all: intuition.
+  repeat rewrite orb_true_iff in H2, H3.
+  repeat rewrite negb_true_iff in H2, H3.
+  repeat rewrite orb_true_iff.
+  repeat rewrite negb_true_iff.
+  intuition auto with pset.
+Qed.
+
+Hint Resolve wf_union : pset.
+
+Theorem gunion:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (union s s')) j = orb (contains s j) (contains s' j).
+Proof.
+  induction s; destruct s'; intro; simpl.
+  all: destruct pset_eq as [EQ | NEQ]; try congruence.
+  all: autorewrite with pset; simpl; trivial.
+  - rewrite <- EQ.
+    symmetry.
+    apply orb_idem.
+  - destruct j; simpl; trivial.
+Qed.
+
+Fixpoint inter_noncanon (s s' : pset) : pset :=
+  if pset_eq s s' then s else
+  match s, s' with
+  | Empty, _ | _, Empty => Empty
+  | (Node b0 f b1), (Node b0' f' b1') =>
+    Node (inter_noncanon b0 b0') (andb f f') (inter_noncanon b1 b1')
+  end.
+
+Lemma ginter_noncanon:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (inter_noncanon s s')) j = andb (contains s j) (contains s' j).
+Proof.
+  induction s; destruct s'; intro; simpl.
+  all: destruct pset_eq as [EQ | NEQ]; try congruence.
+  all: autorewrite with pset; simpl; trivial.
+  - rewrite <- EQ.
+    symmetry.
+    apply andb_idem.
+  - destruct j; simpl; trivial.
+Qed.
+
+Fixpoint inter (s s' : pset) : pset :=
+  if pset_eq s s' then s else
+  match s, s' with
+  | Empty, _ | _, Empty => Empty
+  | (Node b0 f b1), (Node b0' f' b1') =>
+    node (inter b0 b0') (andb f f') (inter b1 b1')
+  end.
+
+Lemma wf_inter :
+  forall s s', (iswf s) -> (iswf s') -> (iswf (inter s s')).
+Proof.
+  induction s; destruct s'; intros; simpl.
+  all: destruct pset_eq; trivial.
+  unfold iswf in H, H0.
+  simpl in H, H0.
+  repeat rewrite andb_true_iff in H.
+  repeat rewrite andb_true_iff in H0.
+  fold (iswf s1) in *.
+  fold (iswf s2) in *.
+  intuition.
+Qed.
+
+Hint Resolve wf_inter : pset.
+
+Lemma inter_noncanon_same:
+  forall s s' j, (contains (inter s s') j) = (contains (inter_noncanon s s') j).
+Proof.
+  induction s; destruct s'; simpl; trivial.
+  destruct pset_eq; trivial.
+  destruct j; rewrite gnode; simpl; auto.
+Qed.
+
+Theorem ginter:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (inter s s')) j = andb (contains s j) (contains s' j).
+Proof.
+  intros.
+  rewrite inter_noncanon_same.
+  apply ginter_noncanon.
+Qed.
+
+Hint Resolve ginter gunion : pset.
+Hint Rewrite ginter gunion : pset.
+
+Fixpoint subtract_noncanon (s s' : pset) : pset :=
+  if pset_eq s s' then Empty else
+  match s, s' with
+  | Empty, _ => Empty
+  | _, Empty => s
+  | (Node b0 f b1), (Node b0' f' b1') =>
+    Node (subtract_noncanon b0 b0') (andb f (negb f')) (subtract_noncanon b1 b1')
+  end.
+
+Lemma gsubtract_noncanon:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (subtract_noncanon s s')) j = andb (contains s j) (negb (contains s' j)).
+Proof.
+  induction s; destruct s'; intro; simpl.
+  all: destruct pset_eq as [EQ | NEQ]; try congruence.
+  all: autorewrite with pset; simpl; trivial.
+  - rewrite <- EQ.
+    symmetry.
+    apply andb_negb_false.
+  - destruct j; simpl; trivial.
+Qed.
+
+Fixpoint subtract (s s' : pset) : pset :=
+  if pset_eq s s' then Empty else
+  match s, s' with
+  | Empty, _ => Empty
+  | _, Empty => s
+  | (Node b0 f b1), (Node b0' f' b1') =>
+    node (subtract b0 b0') (andb f (negb f')) (subtract b1 b1')
+  end.
+
+Lemma wf_subtract :
+  forall s s', (iswf s) -> (iswf s') -> (iswf (subtract s s')).
+Proof.
+  induction s; destruct s'; intros; simpl.
+  all: destruct pset_eq; trivial.
+  reflexivity.
+  
+  unfold iswf in H, H0.
+  simpl in H, H0.
+  
+  repeat rewrite andb_true_iff in H.
+  repeat rewrite andb_true_iff in H0.
+  fold (iswf s1) in *.
+  fold (iswf s2) in *.
+  intuition.
+Qed.
+
+Hint Resolve wf_subtract : pset.
+
+Lemma subtract_noncanon_same:
+  forall s s' j, (contains (subtract s s') j) = (contains (subtract_noncanon s s') j).
+Proof.
+  induction s; destruct s'; simpl; trivial.
+  destruct pset_eq; trivial.
+  destruct j; rewrite gnode; simpl; auto.
+Qed.
+
+Theorem gsubtract:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (subtract s s')) j = andb (contains s j) (negb (contains s' j)).
+Proof.
+  intros.
+  rewrite subtract_noncanon_same.
+  apply gsubtract_noncanon.
+Qed.
+
+Hint Resolve gsubtract : pset.
+Hint Rewrite gsubtract : pset.
+
+Lemma wf_is_nonempty :
+  forall s, iswf s -> is_empty s = false -> exists i, contains s i = true.
+Proof.
+  induction s; simpl; trivial.
+  discriminate.
+  intro WF.
+  unfold iswf in WF.
+  simpl in WF.
+  repeat rewrite andb_true_iff in WF.
+  repeat rewrite orb_true_iff in WF.
+  repeat rewrite negb_true_iff in WF.
+  fold (iswf s1) in WF.
+  fold (iswf s2) in WF.
+  intuition.
+  - destruct H5 as [i K].
+    exists (xO i).
+    simpl.
+    assumption.
+  - exists xH.
+    simpl.
+    assumption.
+  - destruct H5 as [i K].
+    exists (xI i).
+    simpl.
+    assumption.
+Qed.
+
+Hint Resolve wf_is_nonempty : pset.
+
+Lemma wf_is_empty1 :
+  forall s, iswf s -> (forall i, (contains s i) = false) -> is_empty s = true.
+Proof.
+  induction s; trivial.
+  intro WF.
+  unfold iswf in WF.
+  simpl in WF.
+  repeat rewrite andb_true_iff in WF.
+  fold (iswf s1) in WF.
+  fold (iswf s2) in WF.
+  intro ALL.
+  intuition.
+  exfalso.
+  repeat rewrite orb_true_iff in H0.
+  repeat rewrite negb_true_iff in H0.
+  intuition.
+  - rewrite H in H0. discriminate.
+    intro i.
+    specialize ALL with (xO i).
+    simpl in ALL.
+    assumption.
+  - specialize ALL with xH.
+    simpl in ALL.
+    congruence.
+  - rewrite H3 in H4. discriminate.
+    intro i.
+    specialize ALL with (xI i).
+    simpl in ALL.
+    assumption.
+Qed.
+  
+Hint Resolve wf_is_empty1 : pset.
+
+Lemma wf_eq :
+  forall s s', iswf s -> iswf s' -> s <> s' ->
+               exists i, (contains s i) <> (contains s' i).
+Proof.
+  induction s; destruct s'; intros WF WF' DIFF; simpl.
+  - congruence.
+  - assert (exists i, (contains (Node s'1 b s'2) i)= true) as K by auto with pset.
+    destruct K as [i Z].
+    exists i.
+    rewrite Z.
+    rewrite gempty.
+    discriminate.
+  - assert (exists i, (contains (Node s1 b s2) i)= true) as K by auto with pset.
+    destruct K as [i Z].
+    exists i.
+    rewrite Z.
+    rewrite gempty.
+    discriminate.
+  - destruct (pset_eq s1 s'1).
+    + subst s'1.
+      destruct (pset_eq s2 s'2).
+      * subst s'2.
+        exists xH.
+        simpl.
+        congruence.
+      * specialize IHs2 with s'2.
+        unfold iswf in WF.
+        simpl in WF.
+        repeat rewrite andb_true_iff in WF.
+        fold (iswf s1) in WF.
+        fold (iswf s2) in WF.
+        unfold iswf in WF'.
+        simpl in WF'.
+        repeat rewrite andb_true_iff in WF'.
+        fold (iswf s'2) in WF'.
+        intuition.
+        destruct H1 as [i K].
+        exists (xI i).
+        simpl.
+        assumption.
+    + specialize IHs1 with s'1.
+      unfold iswf in WF.
+      simpl in WF.
+      repeat rewrite andb_true_iff in WF.
+      fold (iswf s1) in WF.
+      fold (iswf s2) in WF.
+      unfold iswf in WF'.
+      simpl in WF'.
+      repeat rewrite andb_true_iff in WF'.
+      fold (iswf s'1) in WF'.
+      fold (iswf s'2) in WF'.
+      intuition.
+      destruct H1 as [i K].
+      exists (xO i).
+      simpl.
+      assumption.
+Qed.
+
+Theorem eq_correct:
+  forall s s',
+    (iswf s) -> (iswf s') ->
+    (forall i, (contains s i) = (contains s' i)) <-> s = s'.
+Proof.
+  intros s s' WF WF'.
+  split.
+  {
+    intro ALL.
+    destruct (pset_eq s s') as [ | INEQ]; trivial.
+    exfalso.
+    destruct (wf_eq s s' WF WF' INEQ) as [i K].
+    specialize ALL with i.
+    congruence.
+  }
+  intro EQ.
+  subst s'.
+  trivial.
+Qed.
+
+Lemma wf_irrelevant:
+  forall s,
+  forall WF WF' : iswf s, WF = WF'.
+Proof.
+  unfold iswf.
+  intros.
+  apply Coq.Logic.Eqdep_dec.eq_proofs_unicity_on.
+  decide equality.
+Qed.
+  
+Fixpoint xelements (s : pset) (i : positive)
+                       (k: list positive) {struct s}
+                       : list positive :=
+  match s with
+  | Empty => k
+  | Node b0 false b1 =>
+    xelements b0 (xO i) (xelements b1 (xI i) k)
+  | Node b0 true b1 =>
+    xelements b0 (xO i) ((prev i) :: xelements b1 (xI i) k)
+  end.
+
+Definition elements (m : pset) := xelements m xH nil.
+
+  Remark xelements_append:
+    forall (m: pset) i k1 k2,
+    xelements m i (k1 ++ k2) = xelements m i k1 ++ k2.
+  Proof.
+    induction m; intros; simpl.
+  - auto.
+  - destruct b; rewrite IHm2; rewrite <- IHm1; auto.
+  Qed.
+
+  Remark xelements_empty:
+    forall i, xelements Empty i nil = nil.
+  Proof.
+    intros; reflexivity.
+  Qed.
+
+  Remark xelements_node:
+    forall (m1: pset) o (m2: pset) i,
+    xelements (Node m1 o m2) i nil =
+       xelements m1 (xO i) nil
+    ++ (if o then (prev i) :: nil else nil)
+    ++ xelements m2 (xI i) nil.
+  Proof.
+    intros. simpl. destruct o; simpl;
+    rewrite <- xelements_append; trivial.
+  Qed.
+
+  Lemma xelements_incl:
+    forall (m: pset) (i : positive) k x,
+      In x k -> In x (xelements m i k).
+  Proof.
+    induction m; intros; simpl.
+    auto.
+    destruct b.
+    apply IHm1. simpl; right; auto.
+    auto.
+  Qed.
+
+  Lemma xelements_correct:
+    forall (m: pset) (i j : positive) k,
+      contains m i=true -> In (prev (prev_append i j)) (xelements m j k).
+  Proof.
+    induction m; intros; simpl.
+    - rewrite gempty in H. discriminate.
+    - destruct b; destruct i; simpl; simpl in H; auto.
+      + apply xelements_incl. simpl.
+        right. auto.
+      + apply xelements_incl. simpl.
+        left. trivial.
+      + apply xelements_incl. auto.
+      + discriminate.
+    Qed.
+
+  Theorem elements_correct:
+    forall (m: pset) (i: positive),
+    contains m i = true -> In i (elements m).
+  Proof.
+    intros m i H.
+    generalize (xelements_correct m i xH nil H). rewrite prev_append_prev. exact id.
+  Qed.
+
+  Lemma in_xelements:
+    forall (m: pset) (i k: positive),
+    In k (xelements m i nil) ->
+    exists j, k = prev (prev_append j i) /\ contains m j = true.
+  Proof.
+    induction m; intros.
+  - rewrite xelements_empty in H. contradiction.
+  - rewrite xelements_node in H. rewrite ! in_app_iff in H. destruct H as [P | [P | P]].
+    + specialize IHm1 with (k := k) (i := xO i).
+      intuition.
+      destruct H as [j [Q R]].
+      exists (xO j).
+      auto.
+    + destruct b; simpl in P; intuition auto. subst k. exists xH; auto.
+    + specialize IHm2 with (k := k) (i := xI i).
+      intuition.
+      destruct H as [j [Q R]].
+      exists (xI j).
+      auto.
+  Qed.
+
+  Theorem elements_complete:
+    forall (m: pset) (i: positive),
+    In i (elements m) -> contains m i = true.
+  Proof.
+    unfold elements. intros m i H.
+    destruct (in_xelements m 1 i H) as [j [P Q]].
+    rewrite prev_append_prev in P. change i with (prev_append 1 i) in P.
+    replace j with i in * by (apply prev_append_inj; auto).
+    assumption.
+  Qed.
+
+
+  Fixpoint xfold {B: Type} (f: B -> positive -> B)
+                 (i: positive) (m: pset) (v: B) {struct m} : B :=
+    match m with
+    | Empty => v
+    | Node l false r =>
+        let v1 := xfold f (xO i) l v in
+        xfold f (xI i) r v1
+    | Node l true r =>
+        let v1 := xfold f (xO i) l v in
+        let v2 := f v1 (prev i) in
+        xfold f (xI i) r v2
+    end.
+
+  Definition fold {B : Type} (f: B -> positive -> B) (m: pset) (v: B) :=
+    xfold f xH m v.
+
+
+  Lemma xfold_xelements:
+    forall {B: Type} (f: B -> positive -> B) m i v l,
+    List.fold_left f l (xfold f i m v) =
+    List.fold_left f (xelements m i l) v.
+  Proof.
+    induction m; intros; simpl; trivial.
+    destruct b; simpl.
+    all: rewrite <- IHm1; simpl; rewrite <- IHm2; trivial.
+  Qed.
+
+  Theorem fold_spec:
+    forall {B: Type} (f: B -> positive -> B) (v: B) (m: pset),
+    fold f m v =
+    List.fold_left f (elements m) v.
+  Proof.
+    intros. unfold fold, elements. rewrite <- xfold_xelements. auto.
+  Qed.
+
+  Fixpoint is_subset (s s' : pset) {struct s} :=
+    if pset_eq s s' then true else
+    match s, s' with
+    | Empty, _ => true
+    | _, Empty => false
+    | (Node b0 f b1), (Node b0' f' b1') =>
+      ((negb f) || f') &&
+      (is_subset b0 b0') &&
+      (is_subset b1 b1')
+    end.
+
+  Theorem is_subset_spec1:
+    forall s s',
+      is_subset s s' = true ->
+      (forall i, contains s i = true -> contains s' i = true).
+  Proof.
+    induction s; destruct s'; simpl; intros; trivial.
+    all: destruct pset_eq.
+    all: try discriminate.
+    all: try rewrite gempty in *.
+    all: try discriminate.
+    { congruence.
+    }
+    repeat rewrite andb_true_iff in H.
+    repeat rewrite orb_true_iff in H.
+    repeat rewrite negb_true_iff in H.
+    specialize IHs1 with (s' := s'1).
+    specialize IHs2 with (s' := s'2).
+    intuition.
+    - destruct i; simpl in *; auto. congruence.
+    - destruct i; simpl in *; auto.
+  Qed.
+  
+  Theorem is_subset_spec2:
+    forall s s',
+      iswf s ->
+      (forall i, contains s i = true -> contains s' i = true) ->
+      is_subset s s' = true.
+  Proof.
+    induction s; destruct s'; simpl.
+    all: intro WF.
+    all: unfold iswf in WF.
+    all: simpl in WF.
+    all: repeat rewrite andb_true_iff in WF.
+    all: destruct pset_eq; trivial.
+    all: fold (iswf s1) in WF.
+    all: fold (iswf s2) in WF.
+    - repeat rewrite orb_true_iff in WF.
+      repeat rewrite negb_true_iff in WF.
+      intuition.
+      + destruct (wf_is_nonempty s1 H2 H1) as [i K].
+        specialize H with (xO i).
+        simpl in H.
+        auto.
+      + specialize H with xH.
+        simpl in H.
+        auto.
+      + destruct (wf_is_nonempty s2 H3 H0) as [i K].
+        specialize H with (xI i).
+        simpl in H.
+        auto.
+    - intro CONTAINS.
+      repeat rewrite andb_true_iff.
+      specialize IHs1 with (s' := s'1).
+      specialize IHs2 with (s' := s'2).
+      intuition.
+      + specialize CONTAINS with xH.
+        simpl in CONTAINS.
+        destruct b; destruct b0; intuition congruence.
+      + apply H.
+        intros.
+        specialize CONTAINS with (xO i).
+        simpl in CONTAINS.
+        auto.
+      + apply H3.
+        intros.
+        specialize CONTAINS with (xI i).
+        simpl in CONTAINS.
+        auto.
+  Qed.
+
+  Fixpoint xfilter (fn : positive -> bool)
+           (s : pset) (i : positive) {struct s} : pset :=
+  match s with
+  | Empty => Empty
+  | Node b0 f b1 =>
+    node (xfilter fn b0 (xO i))
+         (f && (fn (prev i)))
+         (xfilter fn b1 (xI i))
+  end.
+  
+  Lemma gxfilter:
+    forall fn s j i,
+      contains (xfilter fn s i) j =
+      contains s j &&
+      (fn (prev (prev_append j i))).
+  Proof.
+    induction s; simpl; intros; trivial.
+    {
+      rewrite gempty.
+      trivial.
+    }
+    rewrite gnode.
+    destruct j; simpl; auto.
+  Qed.
+
+  Definition filter (fn : positive -> bool) m := xfilter fn m xH.
+
+  Lemma gfilter:
+    forall fn s j,
+      contains (filter fn s) j =
+      contains s j && (fn j).
+  Proof.
+    intros.
+    unfold filter.
+    rewrite gxfilter.
+    rewrite prev_append_prev.
+    reflexivity.
+  Qed.
+
+  Lemma wf_xfilter:
+    forall fn s j,
+      iswf s -> iswf (xfilter fn s j).
+  Proof.
+    induction s; intros; trivial.
+    simpl.
+    unfold iswf in H.
+    simpl in H.
+    repeat rewrite andb_true_iff in H.
+    fold (iswf s1) in H.
+    fold (iswf s2) in H.
+    intuition.
+  Qed.
+
+  Lemma wf_filter:
+    forall fn s,
+      iswf s -> iswf (filter fn s).
+  Proof.
+    intros.
+    apply wf_xfilter; auto.
+  Qed.
+End PSet_internals.
+
+Module Type POSITIVE_SET.
+Parameter t : Type.
+Parameter empty : t.
+
+Parameter contains: t -> positive -> bool.
+
+Axiom gempty :
+  forall i : positive,
+    contains empty i = false.
+
+Parameter add : positive -> t -> t.
+
+Axiom gaddo :
+  forall i j : positive,
+  forall s : t,
+    i <> j ->
+    contains (add i s) j = contains s j.
+
+Axiom gadds :
+  forall i : positive,
+  forall s : t,
+    contains (add i s) i = true.
+
+Parameter remove : positive -> t -> t.
+
+Axiom gremoves :
+  forall i : positive,
+  forall s : t,
+    contains (remove i s) i = false.
+
+Axiom gremoveo :
+  forall i j : positive,
+  forall s : t,
+    i<>j ->
+    contains (remove i s) j = contains s j.
+
+Parameter union : t -> t -> t.
+
+Axiom gunion:
+  forall s s' : t,
+  forall j : positive,
+    (contains (union s s')) j = orb (contains s j) (contains s' j).
+
+Parameter inter : t -> t -> t.
+
+Axiom ginter:
+  forall s s' : t,
+  forall j : positive,
+    (contains (inter s s')) j = andb (contains s j) (contains s' j).
+
+Parameter subtract : t -> t -> t.
+
+Axiom gsubtract:
+  forall s s' : t,
+  forall j : positive,
+    (contains (subtract s s')) j = andb (contains s j) (negb (contains s' j)).
+
+Axiom uneq_exists :
+  forall s s', s <> s' ->
+               exists i, (contains s i) <> (contains s' i).
+
+Parameter eq:
+  forall s s' : t, {s = s'} + {s <> s'}.
+
+Axiom eq_spec :
+  forall s s',
+    (forall i, (contains s i) = (contains s' i)) <-> s = s'.
+
+Parameter elements : t -> list positive.
+
+Axiom elements_correct:
+  forall (m: t) (i: positive),
+    contains m i = true -> In i (elements m).
+
+Axiom elements_complete:
+  forall (m: t) (i: positive),
+    In i (elements m) -> contains m i = true.
+
+Axiom elements_spec:
+  forall (m: t) (i: positive),
+    In i (elements m) <-> contains m i = true.
+
+Parameter fold:
+  forall {B : Type},
+    (B -> positive -> B) -> t -> B -> B.
+
+Axiom fold_spec:
+  forall {B: Type} (f: B -> positive -> B) (v: B) (m: t),
+    fold f m v =
+    List.fold_left f (elements m) v.
+
+Parameter is_subset : t -> t -> bool.
+
+Axiom is_subset_spec1:
+  forall s s',
+    is_subset s s' = true ->
+    (forall i, contains s i = true -> contains s' i = true).
+
+Axiom is_subset_spec2:
+  forall s s',
+    (forall i, contains s i = true -> contains s' i = true) ->
+    is_subset s s' = true.
+
+Axiom is_subset_spec:
+  forall s s',
+    is_subset s s' = true <->
+    (forall i, contains s i = true -> contains s' i = true).
+
+Parameter filter: (positive -> bool) -> t -> t.
+
+Axiom gfilter:
+  forall fn s j,
+    contains (filter fn s) j =
+    contains s j && (fn j).
+  
+End POSITIVE_SET.
+
+Module PSet : POSITIVE_SET.
+
+Record pset : Type := mkpset
+{
+  pset_x : PSet_internals.pset ;
+  pset_wf : PSet_internals.wf pset_x = true
+}.
+
+Definition t := pset.
+
+Program Definition empty : t := mkpset PSet_internals.empty _.
+
+Definition contains (s : t) (i : positive) :=
+  PSet_internals.contains (pset_x s) i.
+
+Theorem gempty :
+  forall i : positive,
+    contains empty i = false.
+Proof.
+  intro.
+  unfold empty, contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Program Definition add (i : positive) (s : t) :=
+  mkpset (PSet_internals.add i (pset_x s)) _.
+Obligation 1.
+  destruct s.
+  apply PSet_internals.wf_add.
+  simpl.
+  assumption.
+Qed.
+
+Theorem gaddo :
+  forall i j : positive,
+  forall s : t,
+    i <> j ->
+    contains (add i s) j = contains s j.
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Theorem gadds :
+  forall i : positive,
+  forall s : pset,
+    contains (add i s) i = true.
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Program Definition remove (i : positive) (s : t) :=
+  mkpset (PSet_internals.remove i (pset_x s)) _.
+Obligation 1.
+  destruct s.
+  apply PSet_internals.wf_remove.
+  simpl.
+  assumption.
+Qed.
+
+Theorem gremoves :
+  forall i : positive,
+  forall s : pset,
+    contains (remove i s) i = false.
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Theorem gremoveo :
+  forall i j : positive,
+  forall s : pset,
+    i<>j ->
+    contains (remove i s) j = contains s j.
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Program Definition union (s s' : t) :=
+  mkpset (PSet_internals.union (pset_x s) (pset_x s')) _.
+Obligation 1.
+  destruct s; destruct s'.
+  apply PSet_internals.wf_union; simpl; assumption.
+Qed.
+
+Theorem gunion:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (union s s')) j = orb (contains s j) (contains s' j).
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Program Definition inter (s s' : t) :=
+  mkpset (PSet_internals.inter (pset_x s) (pset_x s')) _.
+Obligation 1.
+  destruct s; destruct s'.
+  apply PSet_internals.wf_inter; simpl; assumption.
+Qed.
+
+Theorem ginter:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (inter s s')) j = andb (contains s j) (contains s' j).
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Program Definition subtract (s s' : t) :=
+  mkpset (PSet_internals.subtract (pset_x s) (pset_x s')) _.
+Obligation 1.
+  destruct s; destruct s'.
+  apply PSet_internals.wf_subtract; simpl; assumption.
+Qed.
+
+Theorem gsubtract:
+  forall s s' : pset,
+  forall j : positive,
+    (contains (subtract s s')) j = andb (contains s j) (negb (contains s' j)).
+Proof.
+  intros.
+  unfold contains.
+  simpl.
+  auto with pset.
+Qed.
+
+Theorem uneq_exists :
+  forall s s', s <> s' ->
+               exists i, (contains s i) <> (contains s' i).
+Proof.
+  destruct s as [s WF]; destruct s' as [s' WF']; simpl.
+  intro UNEQ.
+  destruct (PSet_internals.pset_eq s s').
+  { subst s'.
+    pose proof (PSet_internals.wf_irrelevant s WF WF').
+    subst WF'.
+    congruence.
+  }
+  unfold contains; simpl.
+  apply PSet_internals.wf_eq; trivial.
+Qed.
+
+Definition eq:
+  forall s s' : t, {s = s'} + {s <> s'}.
+Proof.
+  destruct s as [s WF].
+  destruct s' as [s' WF'].
+  destruct (PSet_internals.pset_eq s s'); simpl.
+  {
+    subst s'.
+    left.
+    pose proof (PSet_internals.wf_irrelevant s WF WF').
+    subst WF'.
+    reflexivity.
+  }
+  right.
+  congruence.
+Qed.
+
+Theorem eq_spec :
+  forall s s',
+    (forall i, (contains s i) = (contains s' i)) <-> s = s'.
+Proof.
+  intros.
+  split; intro K.
+  2: subst s'; reflexivity.
+  destruct s as [s WF].
+  destruct s' as [s' WF'].
+  unfold contains in K.
+  simpl in K.
+  fold (PSet_internals.iswf s) in WF.
+  fold (PSet_internals.iswf s') in WF'.
+  assert (s = s').
+  {
+    apply PSet_internals.eq_correct; assumption.
+  }
+  subst s'.
+  pose proof (PSet_internals.wf_irrelevant s WF WF').
+  subst WF'.
+  reflexivity.
+Qed.
+
+
+Definition elements (m : t) := PSet_internals.elements (pset_x m).
+
+
+Theorem elements_correct:
+  forall (m: pset) (i: positive),
+    contains m i = true -> In i (elements m).
+Proof.
+  destruct m; unfold elements, contains; simpl.
+  apply PSet_internals.elements_correct.
+Qed.
+
+Theorem elements_complete:
+  forall (m: pset) (i: positive),
+    In i (elements m) -> contains m i = true.
+Proof.
+  destruct m; unfold elements, contains; simpl.
+  apply PSet_internals.elements_complete.
+Qed.
+
+
+Theorem elements_spec:
+  forall (m: pset) (i: positive),
+    In i (elements m) <-> contains m i = true.
+Proof.
+  intros. split.
+  - apply elements_complete.
+  - apply elements_correct.
+Qed.
+
+Definition fold {B : Type} (f : B -> positive -> B) (m : t) (v : B) : B :=
+  PSet_internals.fold f (pset_x m) v.
+
+Theorem fold_spec:
+  forall {B: Type} (f: B -> positive -> B) (v: B) (m: pset),
+    fold f m v =
+    List.fold_left f (elements m) v.
+Proof.
+  destruct m; unfold fold, elements; simpl.
+  apply PSet_internals.fold_spec.
+Qed.
+
+Definition is_subset (s s' : t) := PSet_internals.is_subset (pset_x s) (pset_x s').
+
+Theorem is_subset_spec1:
+  forall s s',
+    is_subset s s' = true ->
+    (forall i, contains s i = true -> contains s' i = true).
+Proof.
+  unfold is_subset, contains.
+  intros s s' H.
+  apply PSet_internals.is_subset_spec1.
+  assumption.
+Qed.
+
+Theorem is_subset_spec2:
+  forall s s',
+    (forall i, contains s i = true -> contains s' i = true) ->
+    is_subset s s' = true.
+Proof.
+  destruct s; destruct s';
+    unfold is_subset, contains;
+    intros.
+  apply PSet_internals.is_subset_spec2.
+  all: simpl.
+  all: assumption.
+Qed.
+
+Hint Resolve is_subset_spec1 is_subset_spec2 : pset.
+
+Theorem is_subset_spec:
+  forall s s',
+    is_subset s s' = true <->
+    (forall i, contains s i = true -> contains s' i = true).
+Proof.
+  intros.
+  split;
+  eauto with pset.
+Qed.
+
+Program Definition filter (fn : positive -> bool) (m : t) : t :=
+  (mkpset (PSet_internals.filter fn (pset_x m)) _).
+Obligation 1.
+  apply PSet_internals.wf_filter.
+  unfold PSet_internals.iswf.
+  destruct m.
+  assumption.
+Qed.
+
+Theorem gfilter:
+  forall fn s j,
+    contains (filter fn s) j =
+    contains s j && (fn j).
+Proof.
+  unfold contains, filter.
+  simpl.
+  intros.
+  apply PSet_internals.gfilter.
+Qed.
+End PSet.
+
+Hint Resolve PSet.gaddo PSet.gadds PSet.gremoveo PSet.gremoves PSet.gunion PSet.ginter PSet.gsubtract PSet.gfilter PSet.is_subset_spec1 PSet.is_subset_spec2 : pset.
+
+Hint Rewrite PSet.gadds PSet.gremoves PSet.gunion PSet.ginter PSet.gsubtract PSet.gfilter : pset.
diff --git a/lib/HashedSetaux.ml b/lib/HashedSetaux.ml
new file mode 100644
index 00000000..8329c249
--- /dev/null
+++ b/lib/HashedSetaux.ml
@@ -0,0 +1,55 @@
+type uid = int
+
+let uid_base = min_int
+let next_uid = ref (uid_base+1)
+
+let incr_uid () =
+  let r = !next_uid in
+  if r = max_int
+  then failwith "HashedSet: no more uid"
+  else next_uid := succ r;;
+
+let cur_uid () = !next_uid;;
+
+type pset =
+  | Empty
+  | Node of uid * pset * bool * pset;;
+
+let get_uid = function
+  | Empty -> uid_base
+  | Node(uid, _, _, _) -> uid;;
+
+module HashedPSet =
+  struct
+    type t = pset
+           
+    let hash = function
+      | Empty -> 0
+      | Node(_, b0, f, b1) -> Hashtbl.hash (get_uid b0, f, get_uid b1);;
+
+    let equal x x' = match x, x' with
+      | Empty, Empty -> true
+      | Node(_, b0, f, b1), Node(_, b0', f', b1') ->
+         b0 == b0' && f == f' && b1 == b1'
+      | _, _ -> false
+  end;;
+
+module PSetHash = Weak.Make(HashedPSet);;
+
+let htable = PSetHash.create 1000;;
+
+let qnode b0 f b1 =
+  let x = Node(cur_uid(), b0, f, b1) in
+  match PSetHash.find_opt htable x with
+  | None -> PSetHash.add htable x; incr_uid(); x
+  | Some y -> y;;
+
+let node (b0, f, b1) = qnode b0 f b1;;
+
+let empty = Empty;;
+
+let pset_match empty_case node_case = function
+  | Empty -> empty_case ()
+  | Node(_, b0, f, b1) -> node_case b0 f b1;;
+
+let eq (x : pset) (y : pset) = (x==y);;
diff --git a/lib/HashedSetaux.mli b/lib/HashedSetaux.mli
new file mode 100644
index 00000000..14beac41
--- /dev/null
+++ b/lib/HashedSetaux.mli
@@ -0,0 +1,6 @@
+type pset
+val qnode : pset -> bool -> pset -> pset
+val node : pset * bool * pset -> pset
+val empty : pset
+val pset_match : (unit -> 'a) -> (pset -> bool -> pset -> 'a) -> pset -> 'a
+val eq : pset -> pset -> bool
diff --git a/lib/Lattice.v b/lib/Lattice.v
index 85fc03f3..8ea736ad 100644
--- a/lib/Lattice.v
+++ b/lib/Lattice.v
@@ -30,7 +30,8 @@ Local Unset Case Analysis Schemes.
   [bot], and an upper bound operation [lub].
   Note that we do not demand that [lub] computes the least upper bound. *)
 
-Module Type SEMILATTICE.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
 
   Parameter t: Type.
   Parameter eq: t -> t -> Prop.
@@ -42,25 +43,124 @@ Module Type SEMILATTICE.
   Parameter ge: t -> t -> Prop.
   Axiom ge_refl: forall x y, eq x y -> ge x y.
   Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
-  Parameter bot: t.
-  Axiom ge_bot: forall x, ge x bot.
   Parameter lub: t -> t -> t.
   Axiom ge_lub_left: forall x y, ge (lub x y) x.
   Axiom ge_lub_right: forall x y, ge (lub x y) y.
 
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module Type SEMILATTICE.
+  Include SEMILATTICE_WITHOUT_BOTTOM.
+  Parameter bot: t.
+  Axiom ge_bot: forall x, ge x bot.
 End SEMILATTICE.
 
 (** A semi-lattice ``with top'' is similar, but also has a greatest
   element [top]. *)
 
 Module Type SEMILATTICE_WITH_TOP.
-
   Include SEMILATTICE.
   Parameter top: t.
   Axiom ge_top: forall x, ge top x.
-
 End SEMILATTICE_WITH_TOP.
 
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM) <: SEMILATTICE.
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
 (** * Semi-lattice over maps *)
 
 Set Implicit Arguments.
diff --git a/lib/extra/HashedMap.v b/lib/extra/HashedMap.v
new file mode 100644
index 00000000..df724867
--- /dev/null
+++ b/lib/extra/HashedMap.v
@@ -0,0 +1,448 @@
+Require Import ZArith.
+Require Import Bool.
+Require Import List.
+Require Coq.Logic.Eqdep_dec.
+
+(* begin from Maps *)
+Fixpoint prev_append (i j: positive) {struct i} : positive :=
+  match i with
+  | xH => j
+  | xI i' => prev_append i' (xI j)
+  | xO i' => prev_append i' (xO j)
+  end.
+
+Definition prev (i: positive) : positive :=
+  prev_append i xH.
+
+Lemma prev_append_prev i j:
+  prev (prev_append i j) = prev_append j i.
+Proof.
+  revert j. unfold prev.
+  induction i as [i IH|i IH|]. 3: reflexivity.
+  intros j. simpl. rewrite IH. reflexivity.
+  intros j. simpl. rewrite IH. reflexivity.
+Qed.
+
+Lemma prev_involutive i :
+  prev (prev i) = i.
+Proof (prev_append_prev i xH).
+
+Lemma prev_append_inj i j j' :
+  prev_append i j = prev_append i j' -> j = j'.
+Proof.
+  revert j j'.
+  induction i as [i Hi|i Hi|]; intros j j' H; auto;
+    specialize (Hi _ _ H); congruence.
+Qed.
+
+(* end from Maps *)
+
+Lemma orb_idem: forall b, orb b b = b.
+Proof.
+  destruct b; reflexivity.
+Qed.
+
+Lemma andb_idem: forall b, andb b b = b.
+Proof.
+  destruct b; reflexivity.
+Qed.
+
+Lemma andb_negb_false: forall b, andb b (negb b) = false.
+Proof.
+  destruct b; reflexivity.
+Qed.
+
+Hint Rewrite orb_false_r andb_false_r andb_true_r orb_true_r orb_idem andb_idem  andb_negb_false : pmap.
+
+Parameter T : Type.
+Parameter T_eq_dec : forall (x y : T), {x = y} + {x <> y}.
+
+Inductive pmap : Type :=
+| Empty : pmap
+| Node : pmap -> option T -> pmap -> pmap.
+Definition empty := Empty.
+
+Definition is_empty x :=
+  match x with
+  | Empty => true
+  | Node _ _ _ => false
+  end.
+
+Definition is_some (x : option T) :=
+  match x with
+  | Some _ => true
+  | None => false
+  end.
+
+Fixpoint wf x :=
+  match x with
+  | Empty => true
+  | Node b0 f b1 =>
+    (wf b0) && (wf b1) &&
+    ((negb (is_empty b0)) || (is_some f) || (negb (is_empty b1)))
+  end.
+
+Definition iswf x := (wf x)=true.
+  
+Lemma empty_wf : iswf empty.
+Proof.
+  reflexivity.
+Qed.
+
+Definition pmap_eq :
+  forall s s': pmap, { s=s' } + { s <> s' }.
+Proof.
+  generalize T_eq_dec.
+  induction s; destruct s'; repeat decide equality.
+Qed.
+
+Fixpoint get (i : positive) (s : pmap) {struct i} : option T :=
+  match s with
+  | Empty => None
+  | Node b0 f b1 =>
+    match i with
+    | xH => f
+    | xO ii => get ii b0
+    | xI ii => get ii b1
+    end
+  end.
+
+Lemma gempty :
+  forall i : positive,
+    get i Empty = None.
+Proof.
+  destruct i; simpl; reflexivity.
+Qed.
+
+Hint Resolve gempty : pmap.
+Hint Rewrite gempty : pmap.
+
+Definition node (b0 : pmap) (f : option T) (b1 : pmap) : pmap :=
+  match b0, f, b1 with
+  | Empty, None, Empty => Empty
+  | _, _, _ => Node b0 f b1
+  end.
+
+Lemma wf_node :
+  forall b0 f b1,
+    iswf b0 -> iswf b1 -> iswf (node b0 f b1).
+Proof.
+  destruct b0; destruct f; destruct b1; simpl.
+  all: unfold iswf; simpl; intros; trivial.
+  all: autorewrite with pmap; trivial.
+  all: rewrite H.
+  all: rewrite H0.
+  all: reflexivity.
+Qed.
+
+Hint Resolve wf_node: pmap.
+
+Lemma gnode :
+  forall b0 f b1 i,
+    get i (node b0 f b1) =
+    get i (Node b0 f b1).
+Proof.
+  destruct b0; simpl; trivial.
+  destruct f; simpl; trivial.
+  destruct b1; simpl; trivial.
+  intro.
+  rewrite gempty.
+  destruct i; simpl; trivial.
+  all: symmetry; apply gempty.
+Qed.
+
+Hint Rewrite gnode : pmap.
+
+Fixpoint set (i : positive) (j : T) (s : pmap) {struct i} : pmap :=
+  match s with
+  | Empty =>
+    match i with
+    | xH => Node Empty (Some j) Empty
+    | xO ii => Node (set ii j Empty) None Empty
+    | xI ii => Node Empty None (set ii j Empty)
+    end
+  | Node b0 f b1 =>
+    match i with
+    | xH => Node b0 (Some j) b1
+    | xO ii => Node (set ii j b0) f b1
+    | xI ii => Node b0 f (set ii j b1)
+    end
+  end.
+
+Lemma set_nonempty:
+  forall i j s, is_empty (set i j s) = false.
+Proof.
+  induction i; destruct s; simpl; trivial.
+Qed.
+
+Hint Rewrite set_nonempty : pmap.
+Hint Resolve set_nonempty : pmap.
+
+Lemma wf_set:
+  forall i j s, (iswf s) -> (iswf (set i j s)).
+Proof.
+  induction i; destruct s; simpl; trivial.
+  all: unfold iswf in *; simpl.
+  all: autorewrite with pmap; simpl; trivial.
+  1,3: auto with pmap.
+  all: intro Z.
+  all: repeat rewrite andb_true_iff in Z.
+  all: intuition.
+Qed.
+
+Hint Resolve wf_set : pset.
+
+Theorem gss :
+  forall (i : positive) (j : T) (s : pmap),
+    get i (set i j s) = Some j.
+Proof.
+  induction i; destruct s; simpl; auto.
+Qed.
+
+Hint Resolve gss : pmap.
+Hint Rewrite gss : pmap.
+
+Theorem gso :
+  forall (i j : positive) (k : T) (s : pmap),
+    i <> j ->
+    get j (set i k s) = get j s.
+Proof.
+  induction i; destruct j; destruct s; simpl; intro; auto with pmap.
+  5, 6: congruence.
+  all: rewrite IHi by congruence.
+  all: trivial.
+  all: apply gempty.
+Qed.
+
+Hint Resolve gso : pmap.
+
+Fixpoint remove (i : positive) (s : pmap) { struct i } : pmap :=
+  match i with
+  | xH =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => node b0 None b1
+    end
+  | xO ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => node (remove ii b0) f b1
+    end
+  | xI ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => node b0 f (remove ii b1)
+    end
+  end.
+
+Lemma wf_remove :
+  forall i s, (iswf s) -> (iswf (remove i s)).
+Proof.
+  induction i; destruct s; simpl; trivial.
+  all: unfold iswf in *; simpl.
+  all: intro Z.
+  all: repeat rewrite andb_true_iff in Z.
+  all: apply wf_node.
+  all: intuition.
+  all: apply IHi.
+  all: assumption.
+Qed.
+
+Fixpoint remove_noncanon (i : positive) (s : pmap) { struct i } : pmap :=
+  match i with
+  | xH =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => Node b0 None b1
+    end
+  | xO ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => Node (remove_noncanon ii b0) f b1
+    end
+  | xI ii =>
+    match s with
+    | Empty => Empty
+    | Node b0 f b1 => Node b0 f (remove_noncanon ii b1)
+    end
+  end.
+
+Lemma remove_noncanon_same:
+  forall i j s, (get j (remove i s)) = (get j (remove_noncanon i s)).
+Proof.
+  induction i; destruct s; simpl; trivial.
+  all: rewrite gnode.
+  3: reflexivity.
+  all: destruct j; simpl; trivial.
+Qed.
+
+Lemma remove_empty :
+  forall i, remove i Empty = Empty.
+Proof.
+  induction i; simpl; trivial.
+Qed.
+
+Hint Rewrite remove_empty : pmap.
+Hint Resolve remove_empty : pmap.
+
+Lemma gremove_noncanon_s :
+  forall i : positive,
+  forall s : pmap,
+    get i (remove_noncanon i s) = None.
+Proof.
+  induction i; destruct s; simpl; trivial.
+Qed.
+
+Theorem grs :
+  forall i : positive,
+  forall s : pmap,
+    get i (remove i s) = None.
+Proof.
+  intros.
+  rewrite remove_noncanon_same.
+  apply gremove_noncanon_s.
+Qed.
+
+Hint Resolve grs : pmap.
+Hint Rewrite grs : pmap.
+
+Lemma gremove_noncanon_o :
+  forall i j : positive,
+  forall s : pmap,
+    i<>j ->
+    get j (remove_noncanon i s) = get j s.
+Proof.
+  induction i; destruct j; destruct s; simpl; intro; trivial.
+  1, 2: rewrite IHi by congruence.
+  1, 2: reflexivity.
+  congruence.
+Qed.
+
+Theorem gro :
+  forall (i j : positive) (s : pmap),
+    i<>j ->
+    get j (remove i s) = get j s.
+Proof.
+  intros.
+  rewrite remove_noncanon_same.
+  apply gremove_noncanon_o.
+  assumption.
+Qed.
+
+Hint Resolve gro : pmap.
+
+Section MAP2.
+  
+  Variable f : option T -> option T -> option T.
+
+  Section NONE_NONE.
+    Hypothesis f_none_none : f None None = None.
+
+    Fixpoint map2_Empty (b : pmap) :=
+      match b with
+      | Empty => Empty
+      | Node b0 bf b1 =>
+        node (map2_Empty b0) (f None bf) (map2_Empty b1)
+      end.
+
+    Lemma gmap2_Empty: forall i b,
+        get i (map2_Empty b) = f None (get i b).
+    Proof.
+      induction i; destruct b as [ | b0 bf b1]; intros; simpl in *.
+      all: try congruence.
+      - replace
+          (match node (map2_Empty b0) (f None bf) (map2_Empty b1) with
+           | Empty => None
+           | Node _ _ c1 => get i c1
+           end)
+          with (get (xI i) (node (map2_Empty b0) (f None bf) (map2_Empty b1))).
+        + rewrite gnode.
+          simpl. apply IHi.
+        + destruct node; auto with pmap.
+      - replace
+          (match node (map2_Empty b0) (f None bf) (map2_Empty b1) with
+           | Empty => None
+           | Node c0 _ _ => get i c0
+           end)
+          with (get (xO i) (node (map2_Empty b0) (f None bf) (map2_Empty b1))).
+        + rewrite gnode.
+          simpl. apply IHi.
+        + destruct node; auto with pmap.
+      - change (match node (map2_Empty b0) (f None bf) (map2_Empty b1) with
+                | Empty => None
+                | Node _ cf _ => cf
+                end) with (get xH (node (map2_Empty b0) (f None bf) (map2_Empty b1))).
+        rewrite gnode. reflexivity.
+    Qed.
+    
+    Fixpoint map2 (a b : pmap) :=
+      match a with
+      | Empty => map2_Empty b
+      | (Node a0 af a1) =>
+        match b with
+        | (Node b0 bf b1) =>
+          node (map2 a0 b0) (f af bf) (map2 a1 b1)
+        | Empty =>
+          node (map2 a0 Empty) (f af None) (map2 a1 Empty)
+        end
+      end.
+  
+    Lemma gmap2: forall a b i,
+        get i (map2 a b) = f (get i a) (get i b).
+    Proof.
+      induction a as [ | a0 IHa0 af a1 IHa1]; intros; simpl.
+      { rewrite gmap2_Empty.
+        rewrite gempty.
+        reflexivity. }
+      destruct b as [ | b0 bf b1 ]; simpl; rewrite gnode.
+      - destruct i; simpl.
+        + rewrite IHa1. rewrite gempty.
+          reflexivity.
+        + rewrite IHa0. rewrite gempty.
+          reflexivity.
+        + reflexivity.
+      - destruct i; simpl; congruence.
+    Qed.
+  End NONE_NONE.
+
+  Section IDEM.
+    Hypothesis f_idem : forall x, f x x = x.
+    
+    Fixpoint map2_idem (a b : pmap) :=
+      if pmap_eq a b then a else
+      match a with
+      | Empty => map2_Empty b
+      | (Node a0 af a1) =>
+        match b with
+        | (Node b0 bf b1) =>
+          node (map2_idem a0 b0) (f af bf) (map2_idem a1 b1)
+        | Empty =>
+          node (map2_idem a0 Empty) (f af None) (map2_idem a1 Empty)
+        end
+      end.
+ 
+    Lemma gmap2_idem: forall a b i,
+        get i (map2_idem a b) = f (get i a) (get i b).
+    Proof.
+      induction a as [ | a0 IHa0 af a1 IHa1]; intros; simpl.
+      { destruct pmap_eq.
+        - subst b. rewrite gempty. congruence.
+        - rewrite gempty.
+          rewrite gmap2_Empty by congruence.
+          reflexivity.
+      }
+      destruct pmap_eq.
+      { subst b.
+        congruence.
+      }
+      destruct b as [ | b0 bf b1 ]; simpl; rewrite gnode.
+      - destruct i; simpl.
+        + rewrite IHa1. rewrite gempty.
+          reflexivity.
+        + rewrite IHa0. rewrite gempty.
+          reflexivity.
+        + reflexivity.
+      - destruct i; simpl; congruence.
+    Qed.
+  End IDEM.
+End MAP2.
diff --git a/mppa_k1c/Archi.v b/mppa_k1c/Archi.v
index 69b32c7c..587f768e 100644
--- a/mppa_k1c/Archi.v
+++ b/mppa_k1c/Archi.v
@@ -26,11 +26,11 @@ Definition big_endian := false.
 Definition align_int64 := 8%Z.
 Definition align_float64 := 8%Z.
 
-Definition splitlong := negb ptr64.
+Definition splitlong := false.
 
 Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
 Proof.
-  unfold splitlong. destruct ptr64; simpl; congruence. 
+  unfold splitlong. congruence. 
 Qed.
 
 (** THIS IS NOT CHECKED ! NONE OF THIS ! *)
@@ -77,3 +77,5 @@ Global Opaque ptr64 big_endian splitlong
 (** Whether to generate position-independent code or not *)
 
 Parameter pic_code: unit -> bool.
+
+Definition has_notrap_loads := true.
diff --git a/mppa_k1c/Asmexpand.ml b/mppa_k1c/Asmexpand.ml
index 8ab10bc5..e388d2aa 100644
--- a/mppa_k1c/Asmexpand.ml
+++ b/mppa_k1c/Asmexpand.ml
@@ -591,6 +591,7 @@ let expand_instruction instr =
      | EF_external _ -> failwith "asmexpand: external"
      | EF_inline_asm _ -> emit instr
      | EF_runtime _ -> failwith "asmexpand: runtime"
+     | EF_profiling _ -> emit instr
      end
   | _ ->
      emit instr
diff --git a/mppa_k1c/Machregs.v b/mppa_k1c/Machregs.v
index 8098b5d1..cff1164c 100644
--- a/mppa_k1c/Machregs.v
+++ b/mppa_k1c/Machregs.v
@@ -171,6 +171,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
     if Z.leb sz 15
     then R62 :: R63 :: R61 :: nil
     else R62 :: R63 :: R61 :: R60 :: nil
+  | EF_profiling _ _ => R62 :: R63 ::nil
   | _ => nil
   end.
 
diff --git a/mppa_k1c/Op.v b/mppa_k1c/Op.v
index 4e874ca8..012d67d0 100644
--- a/mppa_k1c/Op.v
+++ b/mppa_k1c/Op.v
@@ -1037,14 +1037,19 @@ Definition is_trapping_op (op : operation) :=
   | _ => false
   end.
 
+Definition args_of_operation op :=
+  if eq_operation op Omove
+  then 1%nat
+  else List.length (fst (type_of_operation op)).
+
 Lemma is_trapping_op_sound:
   forall op vl sp m,
-    op <> Omove ->
     is_trapping_op op = false ->
-    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    (List.length vl) = args_of_operation op ->
     eval_operation genv sp op vl m <> None.
 Proof.
-  destruct op; intros; simpl in *; try congruence.
+  unfold args_of_operation.
+  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
diff --git a/mppa_k1c/SelectOp.vp b/mppa_k1c/SelectOp.vp
index ec3985c5..bd481cbb 100644
--- a/mppa_k1c/SelectOp.vp
+++ b/mppa_k1c/SelectOp.vp
@@ -168,13 +168,21 @@ Nondetfunction add (e1: expr) (e2: expr) :=
   | t1, Eop (Oaddimm n2) (t2:::Enil) =>
       addimm n2 (Eop Oadd (t1:::t2:::Enil))
   | t1, (Eop Omul (t2:::t3:::Enil)) =>
-      Eop Omadd (t1:::t2:::t3:::Enil)
+    if Compopts.optim_madd tt
+    then Eop Omadd (t1:::t2:::t3:::Enil)
+    else Eop Oadd (e1:::e2:::Enil)
   | (Eop Omul (t2:::t3:::Enil)), t1 =>
-      Eop Omadd (t1:::t2:::t3:::Enil)
+    if Compopts.optim_madd tt
+    then Eop Omadd (t1:::t2:::t3:::Enil)
+    else Eop Oadd (e1:::e2:::Enil)
   | t1, (Eop (Omulimm n) (t2:::Enil)) =>
-      Eop (Omaddimm n) (t1:::t2:::Enil)
+    if Compopts.optim_madd tt
+    then Eop (Omaddimm n) (t1:::t2:::Enil)
+    else Eop Oadd (e1:::e2:::Enil)
   | (Eop (Omulimm n) (t2:::Enil)), t1 =>
-    Eop (Omaddimm n) (t1:::t2:::Enil)
+    if Compopts.optim_madd tt
+    then Eop (Omaddimm n) (t1:::t2:::Enil)
+    else Eop Oadd (e1:::e2:::Enil)
   | (Eop (Oshlimm n) (t1:::Enil)), t2 =>
     add_shlimm n t1 t2
   | t2, (Eop (Oshlimm n) (t1:::Enil)) =>
@@ -197,7 +205,9 @@ Nondetfunction sub (e1: expr) (e2: expr) :=
   | t1, (Eop Omul (t2:::t3:::Enil)) =>
       Eop Omsub (t1:::t2:::t3:::Enil)
   | t1, (Eop (Omulimm n) (t2:::Enil)) =>
-      Eop (Omaddimm (Int.neg n)) (t1:::t2:::Enil)
+    if Compopts.optim_madd tt
+    then Eop (Omaddimm (Int.neg n)) (t1:::t2:::Enil)
+    else Eop Osub (e1:::e2:::Enil)
   | _, _ => Eop Osub (e1:::e2:::Enil)
   end.
 
@@ -452,18 +462,10 @@ Definition mods_base (e1: expr) (e2: expr) :=
   Eexternal i32_smod sig_ii_i (e1 ::: e2 ::: Enil).
 
 Definition divu_base (e1: expr) (e2: expr) :=
-  Eop Olowlong
-     ((Eexternal i64_udiv sig_ll_l
-       ((Eop Ocast32unsigned (e1 ::: Enil)):::
-       (Eop Ocast32unsigned (e2 ::: Enil))::: Enil))
-      :::Enil).
+  Eexternal i32_udiv sig_ii_i (e1 ::: e2 ::: Enil).
   
 Definition modu_base (e1: expr) (e2: expr) :=
-  Eop Olowlong
-     ((Eexternal i64_umod sig_ll_l
-       ((Eop Ocast32unsigned (e1 ::: Enil)):::
-       (Eop Ocast32unsigned (e2 ::: Enil))::: Enil))
-      :::Enil).
+  Eexternal i32_umod sig_ii_i (e1 ::: e2 ::: Enil).
 
 Definition shrximm (e1: expr) (n2: int) :=
   if Int.eq n2 Int.zero then e1 else Eop (Oshrximm n2) (e1:::Enil).
@@ -712,4 +714,4 @@ End SELECT.
 
 (* Local Variables: *)
 (* mode: coq *)
-(* End: *)
-\ No newline at end of file
+(* End: *)
diff --git a/mppa_k1c/SelectOpproof.v b/mppa_k1c/SelectOpproof.v
index 23d2d5b7..28294934 100644
--- a/mppa_k1c/SelectOpproof.v
+++ b/mppa_k1c/SelectOpproof.v
@@ -350,13 +350,19 @@ Proof.
     apply eval_addimm. EvalOp.
     repeat rewrite Val.add_assoc. reflexivity.
   - (* Omadd *)
-    subst. TrivialExists.
+    subst. destruct (Compopts.optim_madd tt); TrivialExists;
+    repeat (eauto; econstructor; simpl).
   - (* Omadd rev *)
-    subst. rewrite Val.add_commut. TrivialExists.
+    subst. destruct (Compopts.optim_madd tt); TrivialExists;
+    repeat (eauto; econstructor; simpl).
+    simpl. rewrite Val.add_commut. reflexivity.
   - (* Omaddimm *)
-    subst. TrivialExists.
+    subst. destruct (Compopts.optim_madd tt); TrivialExists;
+    repeat (eauto; econstructor; simpl).
   - (* Omaddimm rev *)
-    subst. rewrite Val.add_commut. TrivialExists.
+    subst. destruct (Compopts.optim_madd tt); TrivialExists;
+    repeat (eauto; econstructor; simpl).
+    simpl. rewrite Val.add_commut. reflexivity.
     (* Oaddx *)
   - subst. pose proof eval_addx as ADDX.
     unfold binary_constructor_sound in ADDX.
@@ -380,11 +386,14 @@ Proof.
   - subst. rewrite Val.sub_add_l. apply eval_addimm; EvalOp.
   - subst. rewrite Val.sub_add_r. apply eval_addimm; EvalOp.
   - TrivialExists. simpl. subst. reflexivity.
-  - TrivialExists. simpl. subst.
-    rewrite sub_add_neg.
-    rewrite neg_mul_distr_r.
-    unfold Val.neg.
-    reflexivity.
+  - destruct (Compopts.optim_madd tt).
+    + TrivialExists. simpl. subst.
+      rewrite sub_add_neg.
+      rewrite neg_mul_distr_r.
+      unfold Val.neg.
+      reflexivity.
+    + TrivialExists. repeat (eauto; econstructor).
+      simpl. subst. reflexivity.
   - TrivialExists.
 Qed.
 
@@ -929,6 +938,12 @@ Theorem eval_divu_base:
     Val.divu x y = Some z ->
     exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
+  intros; unfold divu_base.
+  econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+(* For using 64-bit unsigned division for 32-bit
+
   intros until z.
   intros Hax Hby Hdiv. unfold divu_base.
   pose proof (divu_is_divlu x y) as DIVU.
@@ -948,7 +963,8 @@ Proof.
   }
   congruence.
 Qed.
-
+ *)
+  
 Theorem eval_modu_base:
   forall le a b x y z,
     eval_expr ge sp e m le a x ->
@@ -956,6 +972,12 @@ Theorem eval_modu_base:
     Val.modu x y = Some z ->
     exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
 Proof.
+  intros; unfold modu_base.
+  econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+(* for using 64-bit unsigned modulo for 32-bit
+
   intros until z.
   intros Hax Hby Hmod. unfold modu_base.
   pose proof (modu_is_modlu x y) as MODU.
@@ -975,7 +997,8 @@ Proof.
   }
   congruence.
 Qed.
-
+ *)
+  
 Theorem eval_shrximm:
   forall le a n x z,
     eval_expr ge sp e m le a x ->
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 930b1c51..01751f19 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -34,11 +34,57 @@ module Target (*: TARGET*) =
 
     let comment = "#"
 
+    type idiv_function_kind =
+      | Idiv_system
+      | Idiv_stsud
+      | Idiv_fp;;
+
+    let idiv_function_kind = function
+        "stsud" -> Idiv_stsud
+      | "system" -> Idiv_system
+      | "fp" -> Idiv_fp
+      | _ -> failwith "unknown integer division kind";;
+    
+    let idiv_function_kind_32bit () = idiv_function_kind !Clflags.option_div_i32;;
+    let idiv_function_kind_64bit () = idiv_function_kind !Clflags.option_div_i64;;
+    
     let subst_symbol = function
-        "__compcert_i64_udiv" -> "__udivdi3"
-      | "__compcert_i64_sdiv" -> "__divdi3"
-      | "__compcert_i64_umod" -> "__umoddi3"
-      | "__compcert_i64_smod" -> "__moddi3"
+        "__compcert_i64_udiv" ->
+        (match idiv_function_kind_64bit () with
+         | Idiv_system | Idiv_fp -> "__udivdi3"
+         | Idiv_stsud -> "__compcert_i64_udiv_stsud")
+      | "__compcert_i64_sdiv" ->
+        (match idiv_function_kind_64bit() with
+         | Idiv_system | Idiv_fp -> "__divdi3"
+         | Idiv_stsud -> "__compcert_i64_sdiv_stsud")
+      | "__compcert_i64_umod" ->
+        (match idiv_function_kind_64bit() with
+         | Idiv_system | Idiv_fp -> "__umoddi3"
+         | Idiv_stsud -> "__compcert_i64_umod_stsud")
+      | "__compcert_i64_smod" ->
+        (match idiv_function_kind_64bit() with
+         | Idiv_system | Idiv_fp -> "__moddi3"
+         | Idiv_stsud -> "__compcert_i64_smod_stsud")
+      | "__compcert_i32_sdiv" as s ->
+        (match idiv_function_kind_32bit() with
+         | Idiv_system -> s
+         | Idiv_fp -> "__compcert_i32_sdiv_fp"
+         | Idiv_stsud -> "__compcert_i32_sdiv_stsud")
+      | "__compcert_i32_udiv" as s ->
+        (match idiv_function_kind_32bit() with
+         | Idiv_system -> s
+         | Idiv_fp -> "__compcert_i32_udiv_fp"
+         | Idiv_stsud -> "__compcert_i32_udiv_stsud")
+      | "__compcert_i32_smod" as s ->
+        (match idiv_function_kind_32bit() with
+         | Idiv_system -> s
+         | Idiv_fp -> "__compcert_i32_smod_fp"
+         | Idiv_stsud -> "__compcert_i32_smod_stsud")
+      | "__compcert_i32_umod" as s ->
+        (match idiv_function_kind_32bit() with
+         | Idiv_system -> s
+         | Idiv_fp -> "__compcert_i32_umod_fp"
+         | Idiv_stsud -> "__compcert_i32_umod_stsud")
       | "__compcert_f64_div" -> "__divdf3"
       | "__compcert_f32_div" -> "__divsf3"
       | x -> x;;
@@ -157,8 +203,12 @@ module Target (*: TARGET*) =
 
     let name_of_section = function
       | Section_text         -> ".text"
-      | Section_data i | Section_small_data i ->
-          if i then ".data" else "COMM"
+      | Section_data(true, true) ->
+         ".section .tdata,\"awT\",@progbits"
+      | Section_data(false, true) ->        
+         ".section .tbss,\"awT\",@nobits"
+      | Section_data(i, false) | Section_small_data(i) ->
+         (if i then ".data" else "COMM")
       | Section_const i | Section_small_const i ->
           if i then ".section	.rodata" else "COMM"
       | Section_string       -> ".section	.rodata"
@@ -211,14 +261,20 @@ module Target (*: TARGET*) =
 
 (* Generate code to load the address of id + ofs in register r *)
 
-(* FIXME DMonniaux ugly ugly hack to get at standard __thread data *)
     let loadsymbol oc r id ofs =
       if Archi.pic_code () then begin
         assert (ofs = Integers.Ptrofs.zero);
-        fprintf oc "	make	%a = %s\n" ireg r (extern_atom id)
-      end else begin
-        if (extern_atom id) = "_impure_thread_data" then begin
-            fprintf oc "	addd	%a = $r13, @tprel(%a)\n" ireg r symbol_offset (id, ofs)         
+        if C2C.atom_is_thread_local id then begin
+            (* fprintf oc "	addd	%a = $r13, @tprel(%s)\n" ireg r (extern_atom id) *)
+            fprintf oc "	addd	%a = $r13, @tlsle(%s)\n" ireg r (extern_atom id)
+        end else begin
+            fprintf oc "	make	%a = %s\n" ireg r (extern_atom id)
+        end
+     end else
+     begin
+        if C2C.atom_is_thread_local id then begin
+            (* fprintf oc "	addd	%a = $r13, @tprel(%a)\n" ireg r symbol_offset (id, ofs) *)
+            fprintf oc "	addd	%a = $r13, @tlsle(%a)\n" ireg r symbol_offset (id, ofs)
         end else begin            
             fprintf oc "	make	%a = %a\n" ireg r symbol_offset (id, ofs)
         end
@@ -239,7 +295,20 @@ module Target (*: TARGET*) =
   (*let w oc =
       if Archi.ptr64 then output_string oc "w"
   *)
-(* Offset part of a load or store *)
+
+    (* Profiling *)
+    
+
+    let k1c_profiling_stub oc nr_items
+          profiling_id_table_name
+          profiling_counter_table_name =
+          fprintf oc "	make $r0 = %d\n" nr_items;
+          fprintf oc "	make $r1 = %s\n" profiling_id_table_name;
+          fprintf oc "	make $r2 = %s\n" profiling_counter_table_name;
+          fprintf oc "	goto	%s\n" profiling_write_table_helper;
+          fprintf oc "	;;\n";;
+
+    (* Offset part of a load or store *)
 
     let offset oc n = ptrofs oc n 
 
@@ -328,6 +397,18 @@ module Target (*: TARGET*) =
               fprintf oc "%s begin inline assembly\n\t" comment;
               print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
               fprintf oc "%s end inline assembly\n" comment
+          | EF_profiling(id, coq_kind) ->
+             let kind = Z.to_int coq_kind in
+             assert (kind >= 0);
+             assert (kind <= 1);
+             fprintf oc "%s profiling %a %d\n" comment
+               Profilingaux.pp_id id kind;
+             fprintf oc "	make	$r63 = %s\n" profiling_counter_table_name;
+             fprintf oc "	make	$r62 = 1\n";
+             fprintf oc "	;;\n";
+             fprintf oc "	afaddd	%d[$r63] = $r62\n"
+               (profiling_offset id kind);
+             fprintf oc "	;;\n"
           | _ ->
               assert false
          end
@@ -789,8 +870,9 @@ module Target (*: TARGET*) =
       if !Clflags.option_g then begin
         section oc Section_text;
       end
-
+       
     let print_epilogue oc =
+      print_profiling_epilogue elf_text_print_fun_info Dtors k1c_profiling_stub oc;
       if !Clflags.option_g then begin
         Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
         section oc Section_text;
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index 10f38391..8f96dafc 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -71,3 +71,5 @@ Global Opaque ptr64 big_endian splitlong
               default_nan_32 choose_nan_32
               fma_order fma_invalid_mul_is_nan
               float_of_single_preserves_sNaN.
+
+Definition has_notrap_loads := false.
diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml
index f4d4285a..38f4bc75 100644
--- a/powerpc/AsmToJSON.ml
+++ b/powerpc/AsmToJSON.ml
@@ -365,6 +365,7 @@ let pp_instructions pp ic =
       | EF_annot_val _
       | EF_builtin _
       | EF_debug _
+      | EF_profiling _
       | EF_external _
       | EF_free
       | EF_malloc
diff --git a/powerpc/Op.v b/powerpc/Op.v
index b73cb14b..a0ee5bb8 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -592,14 +592,20 @@ Definition is_trapping_op (op : operation) :=
   | _ => false
   end.
 
+Definition args_of_operation op :=
+  if eq_operation op Omove
+  then 1%nat
+  else List.length (fst (type_of_operation op)).
+
+
 Lemma is_trapping_op_sound:
   forall op vl sp m,
-    op <> Omove ->
     is_trapping_op op = false ->
-    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    (List.length vl) = args_of_operation op ->
     eval_operation genv sp op vl m <> None.
 Proof.
-  destruct op; intros; simpl in *; try congruence.
+  unfold args_of_operation.
+  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 50b1bdd6..52f4f855 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -472,7 +472,7 @@ Definition intuoffloat (e: expr) :=
   else
     Elet e
     (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
-      (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
+      (Econdition (CEcond (Ccompf Clt) None (Eletvar 1 ::: Eletvar 0 ::: Enil))
         (intoffloat (Eletvar 1))
         (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat.
 
diff --git a/powerpc/TargetPrinter.ml b/powerpc/TargetPrinter.ml
index 0f608d25..3ea03786 100644
--- a/powerpc/TargetPrinter.ml
+++ b/powerpc/TargetPrinter.ml
@@ -117,7 +117,9 @@ module Linux_System : SYSTEM =
 
     let name_of_section = function
       | Section_text -> ".text"
-      | Section_data i ->
+      | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+      | Section_data(i, false) ->
           if i then
             ".data"
           else
@@ -218,7 +220,9 @@ module Diab_System : SYSTEM =
 
     let name_of_section = function
       | Section_text -> ".text"
-      | Section_data i -> if i then ".data" else common_section ()
+      | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+      | Section_data (i, false) -> if i then ".data" else common_section ()
       | Section_small_data i -> if i then ".sdata" else ".sbss"
       | Section_const _ -> ".text"
       | Section_small_const _ -> ".sdata2"
diff --git a/riscV/Archi.v b/riscV/Archi.v
index 61d129d0..9bdaad99 100644
--- a/riscV/Archi.v
+++ b/riscV/Archi.v
@@ -72,3 +72,5 @@ Global Opaque ptr64 big_endian splitlong
 (** Whether to generate position-independent code or not *)
 
 Parameter pic_code: unit -> bool.
+
+Definition has_notrap_loads := false.
diff --git a/riscV/Op.v b/riscV/Op.v
index a71696c7..14d07e0b 100644
--- a/riscV/Op.v
+++ b/riscV/Op.v
@@ -682,15 +682,21 @@ Definition is_trapping_op (op : operation) :=
   | Ofloatoflong | Ofloatoflongu => true
   | _ => false
   end.
+                
+
+Definition args_of_operation op :=
+  if eq_operation op Omove
+  then 1%nat
+  else List.length (fst (type_of_operation op)).
 
 Lemma is_trapping_op_sound:
   forall op vl sp m,
-    op <> Omove ->
     is_trapping_op op = false ->
-    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    (List.length vl) = args_of_operation op ->
     eval_operation genv sp op vl m <> None.
 Proof.
-  destruct op; intros; simpl in *; try congruence.
+  unfold args_of_operation.
+  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
diff --git a/riscV/TargetPrinter.ml b/riscV/TargetPrinter.ml
index 64bcea4c..1f02ca71 100644
--- a/riscV/TargetPrinter.ml
+++ b/riscV/TargetPrinter.ml
@@ -107,7 +107,9 @@ module Target : TARGET =
 
     let name_of_section = function
       | Section_text         -> ".text"
-      | Section_data i | Section_small_data i ->
+      | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+      | Section_data(i, false) | Section_small_data i ->
           if i then ".data" else common_section ()
       | Section_const i | Section_small_const i ->
           if i || (not !Clflags.option_fcommon) then ".section	.rodata" else "COMM"
diff --git a/runtime/Makefile b/runtime/Makefile
index e3f008a9..ebce458b 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -1,6 +1,6 @@
 include ../Makefile.config
 
-CFLAGS=-O1 -g -Wall
+CFLAGS=-O1 -Wall
 
 ifeq ($(ARCH),x86)
 ifeq ($(MODEL),64)
@@ -23,9 +23,10 @@ OBJS=i64_dtou.o i64_utod.o i64_utof.o vararg.o
 else ifeq ($(ARCH),powerpc64)
 OBJS=i64_dtou.o i64_stof.o i64_utod.o i64_utof.o vararg.o
 else ifeq ($(ARCH),mppa_k1c)
-OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o vararg.o\
-		 i64_dtos.o i64_dtou.o i64_utod.o i64_utof.o i64_stod.o i64_stof.o\
-		 i64_shl.o i64_shr.o
+OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o \
+     i64_udivmod_stsud.o i32_divmod.o \
+     i64_utod.o i64_utof.o i64_stod.o i64_stof.o \
+     vararg.o
 DOMAKE:=$(shell (cd mppa_k1c && make))
 else ifeq ($(ARCH),aarch64)
 OBJS=vararg.o
@@ -37,6 +38,8 @@ OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
   vararg.o
 endif
 
+OBJS+=write_profiling_table.o
+
 LIB=libcompcert.a
 
 INCLUDES=include/float.h include/stdarg.h include/stdbool.h \
@@ -70,7 +73,7 @@ $(LIB): $(OBJS)
 # generated assembly
 
 %.o: c/%.c c/i64.h ../ccomp
-	../ccomp -O2 -S -o $*.s -I./c c/$*.c
+	../ccomp -g -O2 -S -o $*.s -I./c c/$*.c
 	sed -i -e 's/i64_/__compcert_i64_/g' $*.s
 	$(CASMRUNTIME) -o $*.o $*.s
 	@rm $*.s
diff --git a/runtime/c/write_profiling_table.c b/runtime/c/write_profiling_table.c
new file mode 100644
index 00000000..0ce7a948
--- /dev/null
+++ b/runtime/c/write_profiling_table.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+typedef uint8_t md5_hash[16];
+typedef uint64_t condition_counters[2];
+
+static void write_id(FILE *fp, md5_hash *hash) {
+  fwrite(hash, 16, 1, fp);
+}
+
+#define BYTE(counter, i) ((counter >> (8*i)) & 0xFF)
+static void write_counter(FILE *fp, uint64_t counter) {
+  putc(BYTE(counter, 0), fp);
+  putc(BYTE(counter, 1), fp);
+  putc(BYTE(counter, 2), fp);
+  putc(BYTE(counter, 3), fp);
+  putc(BYTE(counter, 4), fp);
+  putc(BYTE(counter, 5), fp);
+  putc(BYTE(counter, 6), fp);
+  putc(BYTE(counter, 7), fp);
+}
+
+void _compcert_write_profiling_table(unsigned int nr_items,
+				      md5_hash id_table[],
+				      condition_counters counter_table[]) {
+  errno = 0;
+
+  const char *filename = getenv("COMPCERT_PROFILING_DATA");
+  if (filename) {
+    if (!*filename) return;
+  } else {
+    filename = "compcert_profiling.dat";
+  }
+  
+  FILE *fp = fopen(filename, "a");
+  //fprintf(stderr, "successfully opened profiling file\n");
+  if (fp == NULL) {
+    perror("open CompCert profiling data for writing");
+    return;
+  }
+  
+  for(unsigned int i=0; i<nr_items; i++) {
+    write_id(fp, &id_table[i]);
+    write_counter(fp, counter_table[i][0]);
+    write_counter(fp, counter_table[i][1]);
+  }
+  //fprintf(stderr, "successfully written profiling file\n");
+
+  fclose(fp);
+  //fprintf(stderr, "successfully closed profiling file\n");
+  if (errno != 0) {
+    perror("write CompCert profiling data");
+    return;
+  }
+  // fprintf(stderr, "write CompCert profiling data: no error\n");
+}
diff --git a/runtime/include/ccomp_k1c_fixes.h b/runtime/include/ccomp_k1c_fixes.h
index 718ac3e5..7f111742 100644
--- a/runtime/include/ccomp_k1c_fixes.h
+++ b/runtime/include/ccomp_k1c_fixes.h
@@ -6,7 +6,7 @@
 #endif
 
 #undef __GNUC__
-#define __thread
+#define __thread _Thread_local
 
 struct __int128_ccomp { long __int128_ccomp_low; long __int128_ccomp_high; };
 
@@ -25,6 +25,6 @@ extern long long __compcert_afaddd(void *address, unsigned long long incr);
 extern int __compcert_afaddw(void *address, unsigned int incr);
 #endif
 
-#define __builtin_expect(x, y) (x)
+/* #define __builtin_expect(x, y) (x) */
 #define __builtin_ctz(x) __builtin_k1_ctzw(x)
 #define __builtin_clz(x) __builtin_k1_clzw(x)
diff --git a/runtime/mppa_k1c/i32_divmod.s b/runtime/mppa_k1c/i32_divmod.s
new file mode 100644
index 00000000..d2b4e8d5
--- /dev/null
+++ b/runtime/mppa_k1c/i32_divmod.s
@@ -0,0 +1,120 @@
+/* K1C
+32-bit unsigned/signed integer division/modulo (udiv5)
+
+D. Monniaux, CNRS, VERIMAG */
+
+	
+	.globl __compcert_i32_sdiv_fp
+__compcert_i32_sdiv_fp:
+	compw.lt $r2 = $r0, 0
+	compw.lt $r3 = $r1, 0
+	absw $r0 = $r0
+	absw $r1 = $r1
+	;;
+	xord $r2 = $r2, $r3
+	make $r3 = 0
+	goto __compcert_i32_divmod_fp
+	;;
+	
+	.globl __compcert_i32_smod_fp
+__compcert_i32_smod_fp:
+	compw.lt $r2 = $r0, 0
+	absw $r0 = $r0
+	absw $r1 = $r1
+	make $r3 = 1
+	goto __compcert_i32_divmod_fp
+	;;
+	
+	.globl __compcert_i32_umod_fp
+__compcert_i32_umod_fp:
+	make $r2 = 0
+	make $r3 = 1
+	goto __compcert_i32_divmod_fp
+	;;
+
+	.globl __compcert_i32_udiv_fp
+__compcert_i32_udiv_fp:
+	make $r2 = 0
+	make $r3 = 0
+	;;
+
+/*
+r0 : a
+r1 : b
+r2 : negate result?
+r3 : return mod?
+*/
+
+	.globl __compcert_i32_divmod_fp
+__compcert_i32_divmod_fp:
+	zxwd $r7 = $r1
+	zxwd $r1 = $r0
+#ifndef NO_SHORTCUT
+	compw.ltu $r8 = $r0, $r1
+	cb.weqz $r1? .ERR # return 0 if divide by 0
+#endif
+	;;
+# a in r1, b in r7
+	floatud.rn.s $r5 = $r7, 0
+#ifndef NO_SHORTCUT
+	compd.eq $r8 = $r7, 1
+	cb.wnez $r8? .LESS # shortcut if a < b
+#endif
+	;;
+# b (double) in r5
+	make $r6 = 0x3ff0000000000000 # 1.0
+	fnarrowdw.rn.s $r11 = $r5
+#	cb.wnez $r8, .RET1 # if b=1
+	;;
+# b (single) in r11
+	floatud.rn.s $r10 = $r1, 0
+	finvw.rn.s $r11 = $r11
+	;;
+	fwidenlwd.s $r11 = $r11
+	;;
+# invb0 in r11
+	copyd $r9 = $r11
+	ffmsd.rn.s $r6 = $r11, $r5
+# alpha in r6
+	;;
+	ffmad.rn.s $r9 = $r11, $r6
+# 1/b in r9
+	;;
+	fmuld.rn.s $r0 = $r10, $r9
+# a/b in r1
+	;;
+	fixedud.rn.s $r0 = $r0, 0
+	;;
+	msbfd $r1 = $r0, $r7
+	;;
+	addd $r6 = $r0, -1
+	addd $r8 = $r1, $r7
+	;;
+	cmoved.dltz $r1? $r0 = $r6
+	cmoved.dltz $r1? $r1 = $r8
+	;;
+	negw $r4 = $r0
+	negw $r5 = $r1
+	;;
+	cmoved.wnez $r2? $r0 = $r4
+	cmoved.wnez $r2? $r1 = $r5
+	;;
+.END:
+	cmoved.wnez $r3? $r0 = $r1
+	ret
+	;;
+#ifndef NO_SHORTCUT
+
+.LESS:
+	make $r0 = 0
+	negw $r5 = $r1
+	;;
+	cmoved.wnez $r2? $r1 = $r5
+	goto .END
+	;;
+	
+.ERR:
+	make $r0 = 0
+	ret
+	;;
+#endif
diff --git a/runtime/mppa_k1c/i64_sdiv.c b/runtime/mppa_k1c/i64_sdiv.c
index 60269cae..b98d9316 100644
--- a/runtime/mppa_k1c/i64_sdiv.c
+++ b/runtime/mppa_k1c/i64_sdiv.c
@@ -1,10 +1,5 @@
 extern long __divdi3 (long a, long b);
 
-long i64_sdiv (long a, long b)
-{
-  return __divdi3 (a, b);
-}
-
 int i32_sdiv (int a, int b)
 {
   return __divdi3 (a, b);
diff --git a/runtime/mppa_k1c/i64_smod.c b/runtime/mppa_k1c/i64_smod.c
index 26ffb39b..3371eecf 100644
--- a/runtime/mppa_k1c/i64_smod.c
+++ b/runtime/mppa_k1c/i64_smod.c
@@ -1,40 +1,5 @@
-#if COMPLIQUE
-unsigned long long
-udivmoddi4(unsigned long long num, unsigned long long den, int modwanted);
-
-long long
-i64_smod (long long a, long long b)
-{
-  int neg = 0;
-  long long res;
-
-  if (a < 0)
-    {
-      a = -a;
-      neg = 1;
-    }
-
-  if (b < 0)
-    b = -b;
-
-  res = udivmoddi4 (a, b, 1);
-
-  if (neg)
-    res = -res;
-
-  return res;
-}
-
-#else
 extern long __moddi3 (long a, long b);
-
-long i64_smod (long a, long b)
-{
-  return __moddi3 (a, b);
-}
-
 int i32_smod (int a, int b)
 {
   return __moddi3 (a, b);
 }
-#endif
diff --git a/runtime/mppa_k1c/i64_udiv.c b/runtime/mppa_k1c/i64_udiv.c
index e69de29b..75f4bbf5 100644
--- a/runtime/mppa_k1c/i64_udiv.c
+++ b/runtime/mppa_k1c/i64_udiv.c
@@ -0,0 +1,6 @@
+extern unsigned long __udivdi3 (unsigned long a, unsigned long b);
+
+unsigned i32_udiv (unsigned a, unsigned b)
+{
+  return __udivdi3 (a, b);
+}
diff --git a/runtime/mppa_k1c/i64_udivmod.c b/runtime/mppa_k1c/i64_udivmod.c
index 74b39874..ca48cd87 100644
--- a/runtime/mppa_k1c/i64_udivmod.c
+++ b/runtime/mppa_k1c/i64_udivmod.c
@@ -1,3 +1,4 @@
+#if 0
 /* THIS IS THE PREVIOUS VERSION, USED ON BOSTAN AND ANDEY */
 unsigned long long
 udivmoddi4(unsigned long long num, unsigned long long den, int modwanted)
@@ -26,3 +27,4 @@ udivmoddi4(unsigned long long num, unsigned long long den, int modwanted)
 
     return modwanted ? r : q;
 }
+#endif
diff --git a/runtime/mppa_k1c/i64_udivmod_stsud.s b/runtime/mppa_k1c/i64_udivmod_stsud.s
new file mode 100644
index 00000000..50d0a767
--- /dev/null
+++ b/runtime/mppa_k1c/i64_udivmod_stsud.s
@@ -0,0 +1,218 @@
+/*
+Integer division for K1c
+
+David Monniaux, CNRS / Verimag
+	*/
+	
+	.globl dm_udivmoddi4
+dm_udivmoddi4:
+	sxwd $r2 = $r2
+	make $r5 = 0
+	compd.ltu $r3 = $r0, $r1
+	;;
+
+	clzd $r3 = $r1
+	clzd $r4 = $r0
+	cb.dnez $r3? .L74
+	;;
+
+	sbfw $r4 = $r4, $r3
+	;;
+
+	zxwd $r3 = $r4
+	slld $r1 = $r1, $r4
+	;;
+
+	compd.ltu $r6 = $r0, $r1
+	;;
+
+	cb.dnez $r6? .L4C
+	;;
+
+	make $r5 = 1
+	sbfd $r0 = $r1, $r0
+	;;
+
+	slld $r5 = $r5, $r4
+	;;
+
+.L4C:
+	cb.deqz $r3? .L74
+	;;
+
+	srld $r1 = $r1, 1
+	zxwd $r3 = $r4
+	;;
+
+	loopdo $r3, .LOOP
+	;;
+
+	stsud $r0 = $r1, $r0
+	;;
+
+.LOOP:
+	addd $r5 = $r0, $r5
+	srld $r0 = $r0, $r4
+	;;
+
+	slld $r4 = $r0, $r4
+	;;
+
+	sbfd $r5 = $r4, $r5
+	;;
+
+.L74:
+	cmoved.deqz $r2? $r0 = $r5
+	ret
+	;;
+
+/*
+r0 : a
+r1 : b
+r2 : negate result?
+r3 : return mod?
+*/
+
+	.globl __compcert_i32_sdiv_stsud
+__compcert_i32_sdiv_stsud:
+	compw.lt $r2 = $r0, 0
+	compw.lt $r3 = $r1, 0
+	absw $r0 = $r0
+	absw $r1 = $r1
+	;;
+	zxwd $r0 = $r0
+	zxwd $r1 = $r1
+	xord $r2 = $r2, $r3
+	make $r3 = 0
+	goto __compcert_i64_divmod_stsud
+	;;
+	
+	.globl __compcert_i32_smod_stsud
+__compcert_i32_smod_stsud:
+	compw.lt $r2 = $r0, 0
+	absw $r0 = $r0
+	absw $r1 = $r1
+	make $r3 = 1
+	;;
+	zxwd $r0 = $r0
+	zxwd $r1 = $r1
+	goto __compcert_i64_divmod_stsud
+	;;
+	
+	.globl __compcert_i32_umod_stsud
+__compcert_i32_umod_stsud:
+	make $r2 = 0
+	make $r3 = 1
+	zxwd $r0 = $r0
+	zxwd $r1 = $r1
+	goto __compcert_i64_divmod_stsud
+	;;
+
+	.globl __compcert_i32_udiv_stsud
+__compcert_i32_udiv_stsud:
+	make $r2 = 0
+	make $r3 = 0
+	zxwd $r0 = $r0
+	zxwd $r1 = $r1
+	goto __compcert_i64_divmod_stsud
+	;;
+	
+	.globl __compcert_i64_umod_stsud
+__compcert_i64_umod_stsud:
+	make $r2 = 0
+	make $r3 = 1
+	goto __compcert_i64_divmod_stsud
+	;;
+
+	.globl __compcert_i64_udiv_stsud
+__compcert_i64_udiv_stsud:
+	make $r2 = 0
+	make $r3 = 0
+	goto __compcert_i64_divmod_stsud
+	;;
+
+	.globl __compcert_i64_sdiv_stsud
+__compcert_i64_sdiv_stsud:
+	compd.lt $r2 = $r0, 0
+	compd.lt $r3 = $r1, 0
+	absd $r0 = $r0
+	absd $r1 = $r1
+	;;
+	xord $r2 = $r2, $r3
+	make $r3 = 0
+	goto __compcert_i64_divmod_stsud
+	;;
+	
+	.globl __compcert_i64_smod_stsud
+__compcert_i64_smod_stsud:
+	compd.lt $r2 = $r0, 0
+	absd $r0 = $r0
+	absd $r1 = $r1
+	make $r3 = 1
+	goto __compcert_i64_divmod_stsud
+	;;
+
+	.globl __compcert_i64_divmod_stsud
+__compcert_i64_divmod_stsud:
+	make $r5 = 0
+	compd.ltu $r7 = $r0, $r1
+	;;
+
+	clzd $r7 = $r1
+	clzd $r4 = $r0
+	cb.dnez $r7? .ZL74
+	;;
+
+	sbfw $r4 = $r4, $r7
+	;;
+
+	zxwd $r7 = $r4
+	slld $r1 = $r1, $r4
+	;;
+
+	compd.ltu $r6 = $r0, $r1
+	;;
+
+	cb.dnez $r6? .ZL4C
+	;;
+
+	make $r5 = 1
+	sbfd $r0 = $r1, $r0
+	;;
+
+	slld $r5 = $r5, $r4
+	;;
+
+.ZL4C:
+	cb.deqz $r7? .ZL74
+	;;
+
+	srld $r1 = $r1, 1
+	zxwd $r7 = $r4
+	;;
+
+	loopdo $r7, .ZLOOP
+	;;
+
+	stsud $r0 = $r1, $r0
+	;;
+
+.ZLOOP:
+	addd $r5 = $r0, $r5
+	srld $r0 = $r0, $r4
+	;;
+
+	slld $r4 = $r0, $r4
+	;;
+
+	sbfd $r5 = $r4, $r5
+	;;
+
+.ZL74:
+	cmoved.weqz $r3? $r0 = $r5
+	;;
+	negd $r5 = $r0
+	;;
+	cmoved.wnez $r2? $r0 = $r5
+	ret
+	;;
diff --git a/runtime/mppa_k1c/i64_umod.c b/runtime/mppa_k1c/i64_umod.c
index e69de29b..59e35960 100644
--- a/runtime/mppa_k1c/i64_umod.c
+++ b/runtime/mppa_k1c/i64_umod.c
@@ -0,0 +1,6 @@
+extern unsigned long __umoddi3 (unsigned long a, unsigned long b);
+
+unsigned i32_umod (unsigned a, unsigned b)
+{
+  return __umoddi3 (a, b);
+}
diff --git a/test/monniaux/cse2/noloopinvariant.c b/test/monniaux/cse2/noloopinvariant.c
new file mode 100644
index 00000000..5c7789bf
--- /dev/null
+++ b/test/monniaux/cse2/noloopinvariant.c
@@ -0,0 +1,6 @@
+int toto(int *t, int n) {
+  for(int i=1; i<n; i++) {
+    if (t[i] > t[0]) return i;
+  }
+  return 0;
+}
diff --git a/test/monniaux/cse2/storeload.c b/test/monniaux/cse2/storeload.c
new file mode 100644
index 00000000..028fb835
--- /dev/null
+++ b/test/monniaux/cse2/storeload.c
@@ -0,0 +1,5 @@
+int toto(int *p, int x) {
+  p[0] = x;
+  p[1] = 3;
+  return *p;
+}
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index c7dc582b..5011b18c 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -45,11 +45,16 @@ static inline cycle_t get_cycle(void) {
   return cycles;
 }
 
-#elif defined (__ARM_ARCH) && (__ARM_ARCH >= 6)
+#elif defined (__ARM_ARCH) // && (__ARM_ARCH >= 6)
 #if (__ARM_ARCH < 8)
 typedef uint32_t cycle_t;
 #define PRcycle PRId32
 
+#ifdef ARM_NO_PRIVILEGE
+static inline cycle_t get_cycle(void) {
+  return 0;
+}
+#else
 /* need this kernel module
 https://github.com/zertyz/MTL/tree/master/cpp/time/kernel/arm */
 static inline cycle_t get_cycle(void) {
@@ -57,14 +62,20 @@ static inline cycle_t get_cycle(void) {
   __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (cycles));
   return cycles;
 }
+#endif
 #else
 #define PRcycle PRId64
 typedef uint64_t cycle_t;
+
+#ifdef ARM_NO_PRIVILEGE
+static inline cycle_t get_cycle(void) {
+  return 0;
+}
+#else
 /* need this kernel module:
 https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0
 
 on 5+ kernels, remove first argument of access_ok macro */
-
 static inline cycle_t get_cycle(void)
 {
   uint64_t val;
@@ -72,6 +83,7 @@ static inline cycle_t get_cycle(void)
   return val;
 }
 #endif
+#endif
 
 #else
 #define PRcycle PRId32
diff --git a/test/monniaux/division/sum_div.c b/test/monniaux/division/sum_div.c
deleted file mode 100644
index 87256922..00000000
--- a/test/monniaux/division/sum_div.c
+++ /dev/null
@@ -1,18 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include "../clock.h"
- 
-int main(int argc, char **argv) {
-  unsigned modulus = argc < 2 ? 3371 : atoi(argv[1]);
-  clock_prepare();
-  clock_start();
-  unsigned total=0, total_mod=0;
-  for(int i=0; i<1000; i++) {
-    total += i;
-    total_mod = (total_mod + i)%modulus;
-  }
-  clock_stop();
-  print_total_clock();
-  printf("%u %u %d\n", total, total_mod, total%modulus == total_mod);
-  return 0;
-}
diff --git a/test/monniaux/expect/expect.c b/test/monniaux/expect/expect.c
new file mode 100644
index 00000000..30e0742a
--- /dev/null
+++ b/test/monniaux/expect/expect.c
@@ -0,0 +1,7 @@
+#ifndef PREDICTED
+#define PREDICTED 0
+#endif
+
+int expect(int x, int *y, int *z) {
+  return __builtin_expect(x, PREDICTED) ? *y : *z;
+}
diff --git a/test/monniaux/licm/addv.c b/test/monniaux/licm/addv.c
new file mode 100644
index 00000000..bb0098d0
--- /dev/null
+++ b/test/monniaux/licm/addv.c
@@ -0,0 +1,6 @@
+void addv(double x, double y, int n, int *z)
+{
+  for(int i=0; i<n; i++) {
+    z[i] += (int) (x*y);
+  }
+}
diff --git a/test/monniaux/minisat/Makefile.on_marte b/test/monniaux/minisat/Makefile.on_marte
new file mode 100644
index 00000000..af7b9145
--- /dev/null
+++ b/test/monniaux/minisat/Makefile.on_marte
@@ -0,0 +1,16 @@
+EXE=minisat.ccomp.exe minisat.ccomp.trace-linearize.exe \
+  minisat.gcc-O3.exe \
+  minisat.ccomp.profiled.exe      minisat.gcc-O3.profiled.exe
+
+LOG=$(EXE:.exe=.dat)
+
+all: $(LOG)
+
+%.log : %.exe
+	rm -f $@
+	for i in `seq 1 1000` ; do ./$< sudoku.sat >> $@; done
+
+%.dat : %.log
+	grep 'time cycles: ' $< | sed -e 's/time cycles: //' | awk '{ total += $$1; count++ } END { print total/count }' > $@
+
+.SECONDARY:
diff --git a/test/monniaux/minisat/Makefile.profiled b/test/monniaux/minisat/Makefile.profiled
new file mode 100644
index 00000000..77ba8b43
--- /dev/null
+++ b/test/monniaux/minisat/Makefile.profiled
@@ -0,0 +1,64 @@
+# -*- mode: makefile; -*-
+
+CFILES=main.c solver.c clock.c
+CCOMP=../../../ccomp
+
+#GCC=aarch64-linux-gnu-gcc
+GCC=k1-cos-gcc
+#EXECUTE=qemu-aarch64
+#EXECUTE=qemu-arm
+#EXECUTE=k1-cluster --
+#EXECUTE_CYCLES=k1-cluster --cycle-based --
+
+LIBS=-lm
+PROFILING_DAT=compcert_profiling.dat
+EXAMPLE=sudoku.sat
+CCOMPFLAGS=-static -finline-asm -finline-auto-threshold 50 
+GCCFLAGS=-static
+ALL=minisat.ccomp.log minisat.ccomp.trace-linearize.log minisat.ccomp.profiled.log minisat.gcc-O3.log minisat.gcc-O3.profiled.log
+
+all: $(ALL)
+exe: $(ALL:.log=.exe)
+
+minisat.ccomp.exe: $(CFILES)
+	$(CCOMP) $(CCOMPFLAGS) $(CFILES) -o $@ $(LIBS)
+
+minisat.ccomp.profile-arcs.exe: $(CFILES)
+	$(CCOMP) -DARM_NO_PRIVILEGE $(CCOMPFLAGS) -fprofile-arcs $(CFILES) -o $@ $(LIBS)
+
+minisat.gcc-O3.exe: $(CFILES)
+	$(GCC) $(GCCFLAGS) -O3 $(CFILES) -o $@ $(LIBS)
+
+clock.gcc-O3.noprofile.o : clock.c
+	$(GCC) -DARM_NO_PRIVILEGE $(GCCFLAGS) -O3 -c $< -o @
+
+minisat.gcc-O3.profile-arcs.exe: main.c solver.c clock.gcc-O3.noprofile.o
+	$(GCC) -DARM_NO_PRIVILEGE $(GCCFLAGS) -fprofile-arcs -O3 $+ -o $@ $(LIBS)
+
+gcda: minisat.gcc-O3.profile-arcs.exe
+	$(EXECUTE) ./$< $(EXAMPLE)
+
+main.gcda solver.gcda: gcda
+
+minisat.gcc-O3.profiled.exe: $(CFILES) $(GCDAFILES)
+	$(GCC) $(GCCFLAGS) -O3 -fprofile-use $(CFILES) -o $@ $(LIBS)
+
+minisat.ccomp.trace-linearize.exe: $(CFILES)
+	$(CCOMP) $(CCOMPFLAGS) -fduplicate 0 -ftracelinearize $(CFILES) -o $@ $(LIBS)
+
+$(PROFILING_DAT): minisat.ccomp.profile-arcs.exe
+	-rm -f $(PROFILING_DAT)
+	$(EXECUTE) ./$< $(EXAMPLE)
+
+minisat.ccomp.profiled.exe: $(CFILES) $(PROFILING_DAT)
+	$(CCOMP) $(CCOMPFLAGS) -fprofile-use= $(PROFILING_DAT) -ftracelinearize $(CFILES) -o $@ $(LIBS)
+
+%.log : %.exe
+	$(EXECUTE_CYCLES) $< $(EXAMPLE) 2>&1 | tee $@
+
+clean:
+	-rm -f *.log *.exe $(PROFILING_DAT) $(GCDAFILES)
+
+.PHONY: clean gcda exe all
+
+.SECONDARY:
diff --git a/test/monniaux/minisat/clock.c b/test/monniaux/minisat/clock.c
new file mode 120000
index 00000000..d6bade99
--- /dev/null
+++ b/test/monniaux/minisat/clock.c
@@ -0,0 +1 @@
+../clock.c
+\ No newline at end of file
diff --git a/test/monniaux/minisat/cycles.h b/test/monniaux/minisat/cycles.h
new file mode 120000
index 00000000..84e54d21
--- /dev/null
+++ b/test/monniaux/minisat/cycles.h
@@ -0,0 +1 @@
+../cycles.h
+\ No newline at end of file
diff --git a/test/monniaux/minisat/k1c.inline_50.log b/test/monniaux/minisat/k1c.inline_50.log
new file mode 100644
index 00000000..438a06b4
--- /dev/null
+++ b/test/monniaux/minisat/k1c.inline_50.log
@@ -0,0 +1,14 @@
+==> minisat.ccomp.log <==
+time cycles: 3252345
+
+==> minisat.ccomp.profiled.log <==
+time cycles: 3150170
+
+==> minisat.ccomp.trace-linearize.log <==
+time cycles: 3192299
+
+==> minisat.gcc-O3.log <==
+time cycles: 2780324
+
+==> minisat.gcc-O3.profiled.log <==
+time cycles: 2487533
diff --git a/test/monniaux/minisat/solver.h b/test/monniaux/minisat/solver.h
index c9ce0219..4b96b017 100644
--- a/test/monniaux/minisat/solver.h
+++ b/test/monniaux/minisat/solver.h
@@ -19,6 +19,8 @@ OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWA
 **************************************************************************************************/
 // Modified to compile with MS Visual Studio 6.0 by Alan Mishchenko
 
+#include <stdint.h>
+
 #ifndef solver_h
 #define solver_h
 
@@ -39,11 +41,14 @@ static const bool  false     = 0;
 typedef int                lit;
 typedef char               lbool;
 
+#if 0
 #ifdef _WIN32
 typedef signed __int64     uint64;   // compatible with MS VS 6.0
 #else
 typedef unsigned long long uint64;
 #endif
+#endif
+typedef uint64_t uint64;
 
 static const int   var_Undef = -1;
 static const lit   lit_Undef = -2;
diff --git a/test/monniaux/profiling/profiling_call.c b/test/monniaux/profiling/profiling_call.c
new file mode 100644
index 00000000..ce20241d
--- /dev/null
+++ b/test/monniaux/profiling/profiling_call.c
@@ -0,0 +1,27 @@
+/*
+For knowing how to write assembly profiling stubs.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+
+typedef uint8_t md5_hash[16];
+typedef uint64_t condition_counters[2];
+
+void _compcert_write_profiling_table(unsigned int nr_items,
+				     md5_hash id_table[],
+				     condition_counters counter_table[]);
+
+static md5_hash id_table[42] = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}};
+static condition_counters counter_table[42];
+
+void write_profile(void) {
+  _compcert_write_profiling_table(42, id_table, counter_table);
+}
+
+static _Atomic uint64_t counter;
+
+void incr_counter(void) {
+  counter++;
+}
diff --git a/test/monniaux/thread_local/thread_local.c b/test/monniaux/thread_local/thread_local.c
new file mode 100644
index 00000000..7a50db0a
--- /dev/null
+++ b/test/monniaux/thread_local/thread_local.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+
+_Thread_local int toto;
+_Thread_local int toto2 = 45;
+
+int foobar(void) {
+  return toto;
+}
+
+int main() {
+  printf("%d %d\n", toto, toto2);
+  return 0;
+}
diff --git a/test/monniaux/thread_local/thread_local2.c b/test/monniaux/thread_local/thread_local2.c
new file mode 100644
index 00000000..ba244ac6
--- /dev/null
+++ b/test/monniaux/thread_local/thread_local2.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include <pthread.h>
+
+_Thread_local int toto;
+_Thread_local int toto2 = 45;
+
+void* poulet(void * dummy) {
+  printf("%p %p\n", &toto, &toto2);
+  return NULL;
+}
+
+int main() {
+  pthread_t thr;
+  poulet(NULL);
+  pthread_create(&thr, NULL, poulet, NULL);
+  pthread_join(thr, NULL);
+  return 0;
+}
diff --git a/test/mppa/hardcheck.sh b/test/mppa/hardcheck.sh
index 82b63182..b6538f0e 100755
--- a/test/mppa/hardcheck.sh
+++ b/test/mppa/hardcheck.sh
@@ -3,4 +3,4 @@
 
 source do_test.sh
 
-do_test hardcheck
+do_test hardcheck 1
diff --git a/test/mppa/hardtest.sh b/test/mppa/hardtest.sh
index 09511da6..6321bc7d 100755
--- a/test/mppa/hardtest.sh
+++ b/test/mppa/hardtest.sh
@@ -3,4 +3,4 @@
 
 source do_test.sh
 
-do_test hardtest
+do_test hardtest 1
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
new file mode 100644
index 00000000..025dbacc
--- /dev/null
+++ b/tools/compiler_expand.ml
@@ -0,0 +1,169 @@
+(*
+The Compcert verified compiler
+
+Compiler.vexpand -> Compiler.v
+
+Expand the list of RTL compiler passes into Compiler.v
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+type is_partial = TOTAL | PARTIAL;;
+type print_result = Noprint | Print of string;;
+type when_triggered = Always | Option of string;;
+
+let rtl_passes =
+[|
+TOTAL, (Option "optim_tailcalls"), (Some "Tail calls"), "Tailcall";
+PARTIAL, Always, (Some "Inlining"), "Inlining";
+TOTAL, (Option "profile_arcs"), (Some "Profiling insertion"), "Profiling";
+TOTAL, (Option "branch_probabilities"), (Some "Profiling use"), "ProfilingExploit";
+TOTAL, (Option "optim_move_loop_invariants"), (Some "Inserting initial nop"), "FirstNop";
+TOTAL, Always, (Some "Renumbering"), "Renumber";
+PARTIAL, (Option "optim_duplicate"),  (Some "Tail-duplicating"), "Duplicate";
+TOTAL, Always, (Some "Renumbering pre constprop"), "Renumber";
+TOTAL, (Option "optim_constprop"), (Some "Constant propagation"), "Constprop";
+PARTIAL, (Option "optim_move_loop_invariants"), (Some "LICM"), "LICM";
+TOTAL, Always, (Some "Renumbering pre CSE"), "Renumber";
+PARTIAL, (Option "optim_CSE"), (Some "CSE"), "CSE";
+TOTAL, (Option "optim_CSE2"), (Some "CSE2"), "CSE2";
+PARTIAL, (Option "optim_CSE3"), (Some "CSE3"), "CSE3";
+TOTAL, (Option "optim_forward_moves"), (Some "Forwarding moves"), "ForwardMoves";
+PARTIAL, (Option "optim_redundancy"), (Some "Redundancy elimination"), "Deadcode";
+TOTAL, (Option "all_loads_nontrap"), None, "Allnontrap";
+PARTIAL, Always, (Some "Unused globals"), "Unusedglob"
+|];;
+
+let post_rtl_passes =
+[|
+  PARTIAL, Always, (Some "Register allocation"), "Allocation", (Print "LTL");
+  TOTAL, Always, (Some "Branch tunneling"), "Tunneling", Noprint;
+  PARTIAL, Always, (Some "CFG linearization"), "Linearize", Noprint;
+  TOTAL, Always, (Some "Label cleanup"), "CleanupLabels", Noprint;
+  PARTIAL, (Option "debug"), (Some "Debugging info for local variables"), "Debugvar", Noprint;
+  PARTIAL, Always, (Some "Mach generation"), "Stacking", (Print "Mach")
+|];;
+
+let all_passes =
+  Array.concat
+    [Array.mapi
+       (fun i (a,b,c,d) -> (a,b,c,d, Print (Printf.sprintf "RTL %d" (i+1))))
+       rtl_passes;
+     post_rtl_passes];;
+
+let totality = function TOTAL -> "total" | PARTIAL -> "partial";;
+
+let print_rtl_require oc =
+  Array.iter (fun (partial, trigger, time_label, pass_name, printing) ->
+      Printf.fprintf oc "Require %s.\n" pass_name)
+    all_passes;;
+
+let print_rtl_require_proof oc =
+  Array.iter (fun (partial, trigger, time_label, pass_name, printing) ->
+      Printf.fprintf oc "Require %sproof.\n" pass_name)
+    all_passes;;
+
+let print_rtl_transf oc =
+  Array.iteri
+    (fun i (partial, trigger, time_label, pass_name, printing) ->
+      output_string oc (match partial with
+                        | TOTAL -> "   @@ "
+                        | PARTIAL -> "  @@@ ");
+      (match trigger with
+       | Always -> ()
+       | Option s ->
+          Printf.fprintf oc "%s_if Compopts.%s " (totality partial) s);
+      output_char oc '(';
+      (match time_label with
+       | None -> ()
+       | Some s ->
+          Printf.fprintf oc "time \"%s\" " s);
+      Printf.fprintf oc "%s.transf_program)\n" pass_name;
+      (match printing with
+       | Noprint -> ()
+       | Print s ->
+           Printf.fprintf oc "   @@ print (print_%s)\n" s)
+    ) all_passes;;
+
+let print_rtl_mkpass oc =
+  Array.iter (fun (partial, trigger, time_label, pass_name, printing) ->
+      output_string oc "  ::: mkpass (";
+      (match trigger with
+       | Always -> ()
+       | Option s ->
+          Printf.fprintf oc "match_if Compopts.%s " s);
+      Printf.fprintf oc "%sproof.match_prog)\n" pass_name)
+    all_passes;;
+
+let print_if kind oc = function
+  | Always -> ()
+  | Option s -> Printf.fprintf oc "%s_if %s " kind s;;
+
+let numbering_base = 7
+                   
+let print_rtl_proof oc =
+  Array.iteri (fun i (partial, trigger, time_label, pass_name, printing) ->
+      let j = i+numbering_base in
+      match partial with
+      | TOTAL ->
+         Printf.fprintf oc "set (p%d := %a%s.transf_program p%d) in *.\n"
+           j (print_if "total") trigger pass_name (pred j)
+      | PARTIAL ->
+         Printf.fprintf oc "destruct (%a%s.transf_program p%d) as [p%d|e] eqn:P%d; cbn in T; try discriminate.\n"
+           (print_if "partial") trigger pass_name (pred j) j j)
+    all_passes;;
+
+let print_rtl_proof2 oc =
+  Array.iteri (fun i (partial, trigger, time_label, pass_name, printing) ->
+      let j = i+numbering_base in
+      Printf.fprintf oc "  exists p%d; split. " j;
+      (match trigger with
+       | Always -> ()
+       | Option _ ->
+          (match partial with
+           | TOTAL -> output_string oc "apply total_if_match. "
+           | PARTIAL -> output_string oc "eapply partial_if_match; eauto. "));
+      Printf.fprintf oc "apply %sproof.transf_program_match; auto.\n" pass_name)
+    all_passes;;
+
+let print_rtl_forward_simulations oc =
+  Array.iter (fun (partial, trigger, time_label, pass_name) ->
+      output_string oc "  eapply compose_forward_simulations.\n    ";
+      (match trigger with
+       | Always -> ()
+       | Option s -> output_string oc "eapply match_if_simulation. eassumption. ");
+      Printf.fprintf oc "eapply %sproof.transf_program_correct; eassumption." pass_name
+    )
+    rtl_passes;;
+
+if (Array.length Sys.argv)<>3
+then exit 1;;
+
+let filename_in = Sys.argv.(1) and filename_out = Sys.argv.(2) in
+    let ic = open_in filename_in and oc = open_out filename_out in
+    try
+      while true
+      do
+        match input_line ic with
+        | "EXPAND_RTL_TRANSF_PROGRAM" ->
+           print_rtl_transf oc
+        | "EXPAND_RTL_REQUIRE" ->
+           print_rtl_require oc
+        | "EXPAND_RTL_REQUIRE_PROOF" ->
+           print_rtl_require_proof oc
+        | "EXPAND_RTL_MKPASS" ->
+           print_rtl_mkpass oc
+        | "EXPAND_RTL_PROOF" ->
+           print_rtl_proof oc
+        | "EXPAND_RTL_PROOF2" ->
+           print_rtl_proof2 oc
+        | "EXPAND_ASM_SEMANTICS" ->
+           Printf.fprintf oc "    (Asm.semantics p%d)\n"
+             ((Array.length all_passes) + 7)
+        | "EXPAND_RTL_FORWARD_SIMULATIONS" ->
+           print_rtl_forward_simulations oc
+        | line -> (output_string oc line;
+                   output_char oc '\n')
+      done
+    with End_of_file ->
+      (close_in ic; close_out oc);;
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index b8353046..ad667e3d 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -552,7 +552,7 @@ let expand_instruction instr =
           expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
        | EF_annot_val(kind,txt, targ) ->
           expand_annot_val kind txt targ args res
-       | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+       | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
           emit instr
        | _ ->
           assert false
diff --git a/x86/Op.v b/x86/Op.v
index 15672bbe..28e6dbd8 100644
--- a/x86/Op.v
+++ b/x86/Op.v
@@ -760,14 +760,19 @@ Definition is_trapping_op (op : operation) :=
   | _ => false
   end.
 
+Definition args_of_operation op :=
+  if eq_operation op Omove
+  then 1%nat
+  else List.length (fst (type_of_operation op)).
+
 Lemma is_trapping_op_sound:
   forall op vl sp m,
-    op <> Omove ->
     is_trapping_op op = false ->
-    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    (List.length vl) = args_of_operation op ->
     eval_operation genv sp op vl m <> None.
 Proof.
-  destruct op; intros; simpl in *; try congruence.
+  unfold args_of_operation.
+  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index a23c37d5..2a09207b 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -503,7 +503,7 @@ Definition intuoffloat (e: expr) :=
   if Archi.splitlong then
     Elet e
       (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
-        (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
+        (Econdition (CEcond (Ccompf Clt) None (Eletvar 1 ::: Eletvar 0 ::: Enil))
           (intoffloat (Eletvar 1))
           (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat
   else
@@ -516,7 +516,7 @@ Nondetfunction floatofintu (e: expr) :=
     if Archi.splitlong then
       let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
       Elet e
-        (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
+        (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) None (Eletvar O ::: Enil))
           (floatofint (Eletvar O))
           (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
     else
diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml
index 6159437e..38eff731 100644
--- a/x86/TargetPrinter.ml
+++ b/x86/TargetPrinter.ml
@@ -133,7 +133,9 @@ module ELF_System : SYSTEM =
 
     let name_of_section = function
       | Section_text -> ".text"
-      | Section_data i | Section_small_data i ->
+      | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+      | Section_data(i, false) | Section_small_data i ->
           if i then ".data" else common_section ()
       | Section_const i | Section_small_const i ->
           if i || (not !Clflags.option_fcommon) then ".section	.rodata" else "COMM"
@@ -165,7 +167,44 @@ module ELF_System : SYSTEM =
 
     let print_var_info = elf_print_var_info
 
-    let print_epilogue _ = ()
+    let print_atexit oc to_be_called =
+      if Archi.ptr64
+      then
+        begin
+          fprintf oc "	leaq	%s(%%rip), %%rdi\n" to_be_called;
+	  fprintf oc "	jmp	atexit\n"
+        end
+      else
+        begin
+          fprintf oc "	pushl	$%s\n" to_be_called;
+          fprintf oc "	call	atexit\n";
+          fprintf oc "	addl	$4, %%esp\n";
+          fprintf oc "	ret\n"
+        end
+
+    let x86_profiling_stub oc nr_items
+          profiling_id_table_name
+          profiling_counter_table_name =
+      if Archi.ptr64
+      then
+        begin
+	  fprintf oc "  leaq	%s(%%rip), %%rdx\n" profiling_counter_table_name;
+	  fprintf oc "  leaq	%s(%%rip), %%rsi\n" profiling_id_table_name;
+          fprintf oc "  movl	$%d, %%edi\n" nr_items;
+          fprintf oc "  jmp	%s\n" profiling_write_table_helper
+        end
+      else
+        begin
+          fprintf oc "	pushl	$%s\n" profiling_counter_table_name;
+	  fprintf oc "	pushl	$%s\n" profiling_id_table_name;
+	  fprintf oc "	pushl	$%d\n" nr_items;
+          fprintf oc "	call	%s\n" profiling_write_table_helper ;
+          fprintf oc "	addl	$12, %%esp\n";
+          fprintf oc "	ret\n"
+        end;;
+    
+    let print_epilogue oc =
+      print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) x86_profiling_stub oc;;
 
     let print_comm_decl oc name sz al =
       fprintf oc "	.comm	%a, %s, %d\n" symbol name (Z.to_string sz) al
@@ -191,7 +230,9 @@ module MacOS_System : SYSTEM =
 
     let name_of_section = function
       | Section_text -> ".text"
-      | Section_data i | Section_small_data i ->
+      | Section_data(i, true) ->
+         failwith "_Thread_local unsupported on this platform"
+      | Section_data(i, false) | Section_small_data i ->
           if i || (not !Clflags.option_fcommon) then ".data" else "COMM"
       | Section_const i  | Section_small_const i ->
           if i || (not !Clflags.option_fcommon) then ".const" else "COMM"
@@ -268,7 +309,9 @@ module Cygwin_System : SYSTEM =
 
     let name_of_section = function
       | Section_text -> ".text"
-      | Section_data i | Section_small_data i ->
+      | Section_data(i, true) ->
+        failwith "_Thread_local unsupported on this platform"
+      | Section_data(i, false) | Section_small_data i ->
           if i then ".data" else common_section ()
       | Section_const i | Section_small_const i ->
           if i || (not !Clflags.option_fcommon) then ".section	.rdata,\"dr\"" else "COMM"
@@ -395,8 +438,28 @@ module Target(System: SYSTEM):TARGET =
         fprintf oc "%a(%%rip)" label lbl
       end
 
-
-
+    let print_profiling_logger oc id kind =
+      assert (kind >= 0);
+      assert (kind <= 1);
+      let ofs = profiling_offset id kind in
+      if Archi.ptr64
+      then
+        begin
+          fprintf oc "%s profiling %a %d: atomic increment\n" comment
+            Profilingaux.pp_id id kind;
+          fprintf oc "	lock addq	$1, %s+%d(%%rip)\n"
+            profiling_counter_table_name ofs
+        end
+      else
+        begin
+          fprintf oc "%s begin profiling %a %d: increment\n" comment
+            Profilingaux.pp_id id kind;
+          fprintf oc "	addl	$1, %s+%d\n" profiling_counter_table_name ofs;
+          fprintf oc "	adcl	$1, %s+%d\n" profiling_counter_table_name (ofs+4);
+          fprintf oc "%s end profiling %a %d: increment\n" comment
+            Profilingaux.pp_id id kind;
+        end
+      
 (* Printing of instructions *)
 
 (* Reminder on X86 assembly syntaxes:
@@ -834,6 +897,8 @@ module Target(System: SYSTEM):TARGET =
               fprintf oc "%s begin inline assembly\n\t" comment;
               print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
               fprintf oc "%s end inline assembly\n" comment
+          | EF_profiling(id, coq_kind) ->
+            print_profiling_logger oc id (Z.to_int coq_kind)          
           | _ ->
               assert false
           end
diff --git a/x86_32/Archi.v b/x86_32/Archi.v
index e9d05c14..4681784d 100644
--- a/x86_32/Archi.v
+++ b/x86_32/Archi.v
@@ -64,3 +64,5 @@ Global Opaque ptr64 big_endian splitlong
               default_nan_32 choose_nan_32
               fma_order fma_invalid_mul_is_nan
               float_of_single_preserves_sNaN.
+
+Definition has_notrap_loads := false.
diff --git a/x86_64/Archi.v b/x86_64/Archi.v
index 959d8dc1..0e3c55f8 100644
--- a/x86_64/Archi.v
+++ b/x86_64/Archi.v
@@ -64,3 +64,5 @@ Global Opaque ptr64 big_endian splitlong
               default_nan_32 choose_nan_32
               fma_order fma_invalid_mul_is_nan
               float_of_single_preserves_sNaN.
+
+Definition has_notrap_loads := false.
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-23 19:59:21 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-23 19:59:21 +0200
commit	433c67f9a05e8cab773d1755aa3eb16f8af588e6 (patch)
tree	dadda4120b592e3900854133b3222cf4fcf3b4af
parent	7a30a72809448535785cd47d26d9415f6ada93e3 (diff)
parent	63f425b8ca329820e629c109fb0a2454ea7e2f27 (diff)
download	compcert-kvx-433c67f9a05e8cab773d1755aa3eb16f8af588e6.tar.gz compcert-kvx-433c67f9a05e8cab773d1755aa3eb16f8af588e6.zip