Merge remote-tracking branch 'origin/mppa-work' into mppa-expect

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-06 22:45:05 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-06 22:45:05 +0200
commit: 5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633 (patch)
tree: 299bdd3c6068f121ca243d8602addcd27d690fd2
parent: c420bc8d3b87d71c38209b5ab8bca22875466362 (diff)
parent: c6356cdc5f567a317afcb99cb004354cf7dcce0f (diff)
download: compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.tar.gz
compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.zip
364 files changed, 34874 insertions, 5860 deletions
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..02ab53c1
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,3 @@
+# Files that should be ignored by Github linguist
+test/* linguist-vendored
+doc/* linguist-documentation
diff --git a/.gitignore b/.gitignore
index 302382b8..e886bc10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,8 @@
 **.out
 **.tok
 *.vo
+*.vok
+*.vos
 *.glob
 *.o
 *.a
@@ -48,6 +50,9 @@
 /mppa_k1c/ConstpropOp.v
 /mppa_k1c/SelectOp.v
 /mppa_k1c/SelectLong.v
+/aarch64/ConstpropOp.v
+/aarch64/SelectOp.v
+/aarch64/SelectLong.v
 /backend/SelectDiv.v
 /backend/SplitLong.v
 /cparser/Parser.v
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 00000000..1f854fc3
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,240 @@
+stages:
+  - build
+
+check-admitted:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_x86_64.sh
+    - make check-admitted
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+    
+build_x86_64:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_x86_64.sh
+    - make -j "$NJOBS"
+    - make -C test all test
+    - ulimit -s65536 && make -C test/monniaux/yarpgen
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_ia32:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-multilib
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_ia32.sh
+    - make -j "$NJOBS"
+    - make -C test all test
+    - ulimit -s65536 && make -C test/monniaux/yarpgen BITS=32 TARGET_CC='gcc -m32'
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_aarch64:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-aarch64-linux-gnu qemu-user
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_aarch64.sh
+    - make -j "$NJOBS"
+    - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test
+    - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_arm:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-arm-linux-gnueabi qemu-user
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_arm.sh
+    - make -j "$NJOBS"
+    - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test
+    - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+
+build_armhf:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-arm-linux-gnueabihf qemu-user
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_armhf.sh
+    - make -j "$NJOBS"
+    - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test
+    - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_ppc:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_ppc.sh
+    - make -j "$NJOBS"
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_ppc64:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-powerpc64-linux-gnu
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_ppc64.sh
+    - make -j "$NJOBS"
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_rv64:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_rv64.sh
+    - make -j "$NJOBS"
+    - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test
+    - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_rv32:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+    - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_rv32.sh -no-runtime-lib
+    - make -j "$NJOBS"
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
+
+build_k1c:
+  stage: build
+  image: "coqorg/coq"
+  before_script:
+    - opam switch 4.07.1+flambda
+    - eval `opam config env`
+    - opam install -y menhir
+  script:
+    - ./config_k1c.sh -no-runtime-lib
+    - make -j "$NJOBS"
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+      when: always
+    - if: '$CI_COMMIT_BRANCH == "master"'
+      when: always
+    - when: manual
diff --git a/Changelog b/Changelog
index e5e701d0..8cf4e548 100644
--- a/Changelog
+++ b/Changelog
@@ -1,3 +1,103 @@
+Release 3.7, 2020-03-31
+=======================
+
+ISO C conformance:
+- Functions declared `extern` then implemented `inline` remain `extern`
+- The type of a wide char constant is `wchar_t`, not `int`
+- Support vertical tabs and treat them as whitespace
+- Define the semantics of `free(NULL)`
+
+Bug fixing:
+- Take sign into account for conversions from 32-bit integers to 64-bit pointers
+- PowerPC: more precise determination of small data accesses
+- AArch64: when addressing global variables, check for correct alignment
+- PowerPC, ARM: double rounding error in int64->float32 conversions
+
+ABI conformance:
+- x86, AArch64: re-normalize values of small integer types returned by
+  function calls
+- PowerPC: `float` arguments passed on stack are passed in 64-bit format
+- RISC-V: use the new ELF psABI instead of the old ABI from ISA 2.1
+
+Usability and diagnostics:
+- Unknown builtin functions trigger a specific error message
+- Improved error messages
+
+Coq formalization:
+- Revised modeling of the PowerPC/EREF `isel` instruction
+- Weaker `ec_readonly` condition over external calls
+  (permissions can be dropped on read-only locations)
+
+Coq and OCaml development:
+- Compatibility with Coq version 8.10.1, 8.10.2, 8.11.0
+- Compatibility with OCaml 4.10 and up
+- Compatibility with Menhir 20200123 and up
+- Coq versions prior to 8.8.0 are no longer supported
+- OCaml versions prior to 4.05.0 are no longer supported
+
+
+Release 3.6, 2019-09-17
+=======================
+
+New features and optimizations:
+- New port targeting the AArch64 architecture: ARMv8 in 64-bit mode.
+- New optimization: if-conversion.  Some `if`/`else` statements
+  and `a ? b : c` conditional expressions are compiled to branchless
+  conditional move instructions, when supported by the target processor
+- New optimization flag: `-Obranchless`, to favor the generation of
+  branchless instruction sequences, even if probably slower than branches.
+- Built-in functions can now be given a formal semantics within
+  CompCert, instead of being treated as I/O interactions.
+  Currently, `__builtin_fsqrt` and `__builtin_bswap*` have semantics.
+- Extend constant propagation and CSE optimizations to built-in
+  functions that have known semantics.
+- New "polymorphic" built-in function: `__builtin_sel(a,b,c)`.
+  Similar to `a ? b : c` but `b` and `c` are always evaluated,
+  and a branchless conditional move instruction is produced if possible.
+- x86 64 bits: faster, branchless instruction sequences are produced
+  for conversions between `double` and `unsigned int`.
+- `__builtin_bswap64` is now available for all platforms.
+
+Usability and diagnostics:
+- Improved the DWARF debug information generated in -g mode.
+- Added options -fcommon and -fno-common to control the generation
+  of "common" declarations for uninitialized global.
+- Check for reserved keywords `_Complex` and `_Imaginary`.
+- Reject function declarations with multiple `void` parameters.
+- Define macros `__COMPCERT_MAJOR__`, `__COMPCERT_MINOR__`, and
+  `__COMPCERT_VERSION__` with CompCert's version number. (#284)
+- Prepend `$(DESTDIR)` to the installation target. (#169)
+- Extended inline asm: print register names according to the
+  types of the corresponding arguments (e.g. for x86_64,
+  `%eax` if int and `%rax` if long).
+
+Bug fixing:
+- Introduce distinct scopes for iteration and selection statements,
+  as required by ISO C99.
+- Handle dependencies in sequences of declarations
+  (e.g. `int * x, sz = sizeof(x);`).  (#267)
+- Corrected the check for overflow in integer literals.
+- On x86, __builtin_fma was producing wrong code in some cases.
+- `float` arguments to `__builtin_annot` and `__builtin_ais_annot`
+  were uselessly promoted to type `double`.
+
+Coq formalization and development:
+- Improved C parser based on Menhir version 20190626:
+  fewer run-time checks, faster validation, no axioms. (#276)
+- Compatibility with Coq versions 8.9.1 and 8.10.0.
+- Compatibility with OCaml versions 4.08.0 and 4.08.1.
+- Updated to Flocq version 3.1.
+- Revised the construction of NaN payloads in processor descriptions
+  so as to accommodate FMA.
+- Removed some definitions and lemmas from lib/Coqlib.v, using Coq's
+  standard library instead.
+
+The clightgen tool:
+- Fix normalization of Clight `switch` statements. (#285)
+- Add more tracing options: `-dprepro`, `-dall`. (#298)
+- Fix the output of `-dclight`. (#314)
+
+
 Release 3.5, 2019-02-27
 =======================
 
diff --git a/Makefile b/Makefile
index aa99786b..2cd40800 100644
--- a/Makefile
+++ b/Makefile
@@ -80,14 +80,19 @@ BACKEND=\
   Tailcall.v Tailcallproof.v \
   Inlining.v Inliningspec.v Inliningproof.v \
   Renumber.v Renumberproof.v \
+  Duplicate.v Duplicateproof.v \
   RTLtyping.v \
   Kildall.v Liveness.v \
   ValueDomain.v ValueAOp.v ValueAnalysis.v \
   ConstpropOp.v Constprop.v ConstpropOpproof.v Constpropproof.v \
   CSEdomain.v CombineOp.v CSE.v CombineOpproof.v CSEproof.v \
+  CSE2deps.v CSE2depsproof.v \
+  CSE2.v CSE2proof.v \
   NeedDomain.v NeedOp.v Deadcode.v Deadcodeproof.v \
   Unusedglob.v Unusedglobproof.v \
   Machregs.v Locations.v Conventions1.v Conventions.v LTL.v \
+  ForwardMoves.v ForwardMovesproof.v \
+  Allnontrap.v Allnontrapproof.v \
   Allocation.v Allocproof.v \
   Tunneling.v Tunnelingproof.v \
   Linear.v Lineartyping.v \
@@ -262,7 +267,7 @@ endif
 
 
 clean:
-	rm -f $(patsubst %, %/*.vo, $(DIRS))
+	rm -f $(patsubst %, %/*.vo*, $(DIRS))
 	rm -f $(patsubst %, %/.*.aux, $(DIRS))
 	rm -rf doc/html doc/*.glob
 	rm -f driver/Version.ml
diff --git a/Makefile.extr b/Makefile.extr
index 51e9be59..f2d06def 100644
--- a/Makefile.extr
+++ b/Makefile.extr
@@ -50,13 +50,12 @@ INCLUDES=$(patsubst %,-I %, $(DIRS))
 # Control of warnings:
 
 WARNINGS=-w +a-4-9-27-42 -strict-sequence -safe-string -warn-error +a #Deprication returns with ocaml 4.03
-extraction/%.cmx: WARNINGS +=-w -20-27-32..34-39-41-44..45-60
-extraction/%.cmo: WARNINGS +=-w -20-27-32..34-39-41-44..45-60
-
+extraction/%.cmx: WARNINGS +=-w -20-27-32..34-39-41-44..45-60-67
+extraction/%.cmo: WARNINGS +=-w -20-27-32..34-39-41-44..45-60-67
 cparser/pre_parser.cmx: WARNINGS += -w -41
 cparser/pre_parser.cmo: WARNINGS += -w -41
 
-COMPFLAGS+=-g $(INCLUDES) $(MENHIR_INCLUDES) $(WARNINGS)
+COMPFLAGS+=-g $(INCLUDES) -I "$(MENHIR_DIR)" $(WARNINGS)
 
 # Using .opt compilers if available
 
diff --git a/Makefile.menhir b/Makefile.menhir
index 98bfc750..7909b2f6 100644
--- a/Makefile.menhir
+++ b/Makefile.menhir
@@ -41,7 +41,11 @@ MENHIR_FLAGS = -v --no-stdlib -la 1
 # Using Menhir in --table mode requires MenhirLib.
 
 ifeq ($(MENHIR_TABLE),true)
-  MENHIR_LIBS = menhirLib.cmx
+  ifeq ($(wildcard $(MENHIR_DIR)/menhirLib.cmxa),)
+    MENHIR_LIBS = menhirLib.cmx
+  else
+    MENHIR_LIBS = menhirLib.cmxa
+  endif
 else
   MENHIR_LIBS =
 endif
diff --git a/VERSION b/VERSION
index d4563a62..b60e8d9b 100644
--- a/VERSION
+++ b/VERSION
@@ -1,3 +1,3 @@
-version=3.5
+version=3.7
 buildnr=
 tag=
diff --git a/aarch64/Archi.v b/aarch64/Archi.v
new file mode 100644
index 00000000..aef4ab77
--- /dev/null
+++ b/aarch64/Archi.v
@@ -0,0 +1,88 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Architecture-dependent parameters for AArch64 *)
+
+Require Import ZArith List.
+(*From Flocq*)
+Require Import Binary Bits.
+
+Definition ptr64 := true.
+
+Definition big_endian := false.
+
+Definition align_int64 := 8%Z.
+Definition align_float64 := 8%Z.
+
+Definition splitlong := false.
+
+Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
+Proof.
+  unfold splitlong, ptr64; congruence.
+Qed.
+
+Definition default_nan_64 := (false, iter_nat 51 _ xO xH).
+Definition default_nan_32 := (false, iter_nat 22 _ xO xH).
+
+(** Choose the first signaling NaN, if any;
+    otherwise choose the first NaN;
+    otherwise use default. *)
+
+Definition choose_nan (is_signaling: positive -> bool) 
+                      (default: bool * positive)
+                      (l0: list (bool * positive)) : bool * positive :=
+  let fix choose_snan (l1: list (bool * positive)) :=
+    match l1 with
+    | nil =>
+        match l0 with nil => default | n :: _ => n end
+    | ((s, p) as n) :: l1 =>
+        if is_signaling p then n else choose_snan l1
+    end
+  in choose_snan l0.
+
+Lemma choose_nan_idem: forall is_signaling default n,
+  choose_nan is_signaling default (n :: n :: nil) =
+  choose_nan is_signaling default (n :: nil).
+Proof.
+  intros. destruct n as [s p]; unfold choose_nan; simpl.
+  destruct (is_signaling p); auto. 
+Qed.
+
+Definition choose_nan_64 :=
+  choose_nan (fun p => negb (Pos.testbit p 51)) default_nan_64.
+
+Definition choose_nan_32 :=
+  choose_nan (fun p => negb (Pos.testbit p 22)) default_nan_32.
+
+Lemma choose_nan_64_idem: forall n,
+  choose_nan_64 (n :: n :: nil) = choose_nan_64 (n :: nil).
+Proof. intros; apply choose_nan_idem. Qed.
+
+Lemma choose_nan_32_idem: forall n,
+  choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
+Proof. intros; apply choose_nan_idem. Qed.
+
+Definition fma_order {A: Type} (x y z: A) := (z, x, y).
+
+Definition fma_invalid_mul_is_nan := true.
+
+Definition float_of_single_preserves_sNaN := false.
+
+Global Opaque ptr64 big_endian splitlong
+              default_nan_64 choose_nan_64
+              default_nan_32 choose_nan_32
+              fma_order fma_invalid_mul_is_nan
+              float_of_single_preserves_sNaN.
+
+(** Whether to generate position-independent code or not *)
+
+Parameter pic_code: unit -> bool.
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
new file mode 100644
index 00000000..47cd3051
--- /dev/null
+++ b/aarch64/Asm.v
@@ -0,0 +1,1312 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Abstract syntax and semantics for AArch64 assembly language *)
+
+Require Import Coqlib Zbits Maps.
+Require Import AST Integers Floats.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Locations Conventions.
+Require Stacklayout.
+
+(** * Abstract syntax *)
+
+(** Integer registers, floating-point registers. *)
+
+(** In assembly files, [Xn] denotes the full 64-bit register
+    and [Wn] the low 32 bits of [Xn]. *)
+
+Inductive ireg: Type :=
+  | X0  | X1  | X2  | X3  | X4  | X5  |  X6  | X7
+  | X8  | X9  | X10 | X11 | X12 | X13 |  X14 | X15
+  | X16 | X17 | X18 | X19 | X20 | X21 |  X22 | X23
+  | X24 | X25 | X26 | X27 | X28 | X29 |  X30.
+
+Inductive ireg0: Type :=
+  | RR0 (r: ireg) | XZR.
+
+Inductive iregsp: Type :=
+  | RR1 (r: ireg) | XSP.
+
+Coercion RR0: ireg >-> ireg0.
+Coercion RR1: ireg >-> iregsp.
+
+Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+(** In assembly files, [Dn] denotes the low 64-bit of a vector register,
+    and [Sn] the low 32 bits. *)
+
+Inductive freg: Type :=
+  | D0  | D1  | D2  | D3  | D4  | D5  |  D6  | D7
+  | D8  | D9  | D10 | D11 | D12 | D13 |  D14 | D15
+  | D16 | D17 | D18 | D19 | D20 | D21 |  D22 | D23
+  | D24 | D25 | D26 | D27 | D28 | D29 |  D30 | D31.
+
+Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+(** Bits in the condition register. *)
+
+Inductive crbit: Type :=
+  | CN: crbit    (**r negative *)
+  | CZ: crbit    (**r zero *)
+  | CC: crbit    (**r carry *)
+  | CV: crbit.   (**r overflow *)
+
+Lemma crbit_eq: forall (x y: crbit), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+(** We model the following registers of the ARM architecture. *)
+
+Inductive preg: Type :=
+  | IR: ireg -> preg   (**r 64- or 32-bit integer registers *)
+  | FR: freg -> preg   (**r double- or single-precision float registers *)
+  | CR: crbit -> preg  (**r bits in the condition register *)
+  | SP: preg           (**r register X31 used as stack pointer *)
+  | PC: preg.          (**r program counter *)
+
+Coercion IR: ireg >-> preg.
+Coercion FR: freg >-> preg.
+Coercion CR: crbit >-> preg.
+
+Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}.
+Proof. decide equality. apply ireg_eq. apply freg_eq. apply crbit_eq. Defined.
+
+Module PregEq.
+  Definition t := preg.
+  Definition eq := preg_eq.
+End PregEq.
+
+Module Pregmap := EMap(PregEq).
+
+Definition preg_of_iregsp (r: iregsp) : preg :=
+  match r with RR1 r => IR r | XSP => SP end.
+
+Coercion preg_of_iregsp: iregsp >-> preg.
+
+(** Conventional name for return address ([RA]) *)
+
+Notation "'RA'" := X30 (only parsing) : asm.
+
+(** The instruction set.  Most instructions correspond exactly to
+  actual AArch64 instructions. See the ARM reference manuals for more
+  details.  Some instructions, described below, are
+  pseudo-instructions: they expand to canned instruction sequences
+  during the printing of the assembly code.  *)
+
+Definition label := positive.
+
+Inductive isize: Type :=
+  | W                                                (**r 32-bit integer operation *)
+  | X.                                               (**r 64-bit integer operation *)
+
+Inductive fsize: Type :=
+  | S                                                (**r 32-bit, single-precision FP operation *)
+  | D.                                               (**r 64-bit, double-precision FP operation *)
+
+Inductive testcond : Type :=
+  | TCeq: testcond    (**r equal *)
+  | TCne: testcond    (**r not equal *)
+  | TChs: testcond    (**r unsigned higher or same *)
+  | TClo: testcond    (**r unsigned lower *)
+  | TCmi: testcond    (**r negative *)
+  | TCpl: testcond    (**r positive *)
+  | TChi: testcond    (**r unsigned higher *)
+  | TCls: testcond    (**r unsigned lower or same *)
+  | TCge: testcond    (**r signed greater or equal *)
+  | TClt: testcond    (**r signed less than *)
+  | TCgt: testcond    (**r signed greater *)
+  | TCle: testcond.   (**r signed less than or equal *)
+
+Inductive addressing: Type :=
+  | ADimm (base: iregsp) (n: int64)                  (**r base plus immediate offset *)
+  | ADreg (base: iregsp) (r: ireg)                   (**r base plus reg *)
+  | ADlsl (base: iregsp) (r: ireg) (n: int)          (**r base plus reg LSL n *)
+  | ADsxt (base: iregsp) (r: ireg) (n: int)          (**r base plus SIGN-EXT(reg) LSL n *)
+  | ADuxt (base: iregsp) (r: ireg) (n: int)          (**r base plus ZERO-EXT(reg) LSL n *)
+  | ADadr (base: iregsp) (id: ident) (ofs: ptrofs)   (**r base plus low address of [id + ofs] *)
+  | ADpostincr (base: iregsp) (n: int64).            (**r base plus offset; base is updated after *)
+
+Inductive shift_op: Type :=
+  | SOnone
+  | SOlsl (n: int)
+  | SOlsr (n: int)
+  | SOasr (n: int)
+  | SOror (n: int).
+
+Inductive extend_op: Type :=
+  | EOsxtb (n: int)
+  | EOsxth (n: int)
+  | EOsxtw (n: int)
+  | EOuxtb (n: int)
+  | EOuxth (n: int)
+  | EOuxtw (n: int)
+  | EOuxtx (n: int).
+
+Inductive instruction: Type :=
+  (** Branches *)
+  | Pb (lbl: label)                                                   (**r branch *)
+  | Pbc (c: testcond) (lbl: label)                                    (**r conditional branch *)
+  | Pbl (id: ident) (sg: signature)                                   (**r jump to function and link *)
+  | Pbs (id: ident) (sg: signature)                                   (**r jump to function *)
+  | Pblr (r: ireg) (sg: signature)                                    (**r indirect jump and link *)
+  | Pbr (r: ireg) (sg: signature)                                     (**r indirect jump *)
+  | Pret (r: ireg)                                                    (**r return *)
+  | Pcbnz (sz: isize) (r: ireg) (lbl: label)                          (**r branch if not zero *)
+  | Pcbz (sz: isize) (r: ireg) (lbl: label)                           (**r branch if zero *)
+  | Ptbnz (sz: isize) (r: ireg) (n: int) (lbl: label)                 (**r branch if bit n is not zero *)
+  | Ptbz (sz: isize) (r: ireg) (n: int) (lbl: label)                  (**r branch if bit n is zero *)
+  (** Memory loads and stores *)
+  | Pldrw (rd: ireg) (a: addressing)                                  (**r load int32 *)
+  | Pldrw_a (rd: ireg) (a: addressing)                                (**r load int32 as any32 *)
+  | Pldrx (rd: ireg) (a: addressing)                                  (**r load int64 *)
+  | Pldrx_a (rd: ireg) (a: addressing)                                (**r load int64 as any64 *)
+  | Pldrb (sz: isize) (rd: ireg) (a: addressing)                      (**r load int8, zero-extend *)
+  | Pldrsb (sz: isize) (rd: ireg) (a: addressing)                     (**r load int8, sign-extend *)
+  | Pldrh (sz: isize) (rd: ireg) (a: addressing)                      (**r load int16, zero-extend *)
+  | Pldrsh (sz: isize) (rd: ireg) (a: addressing)                     (**r load int16, sign-extend *)
+  | Pldrzw (rd: ireg) (a: addressing)                                 (**r load int32, zero-extend to int64 *)
+  | Pldrsw (rd: ireg) (a: addressing)                                 (**r load int32, sign-extend to int64 *)
+  | Pldp (rd1 rd2: ireg) (a: addressing)                               (**r load two int64 *)
+  | Pstrw (rs: ireg) (a: addressing)                                  (**r store int32 *)
+  | Pstrw_a (rs: ireg) (a: addressing)                                (**r store int32 as any32 *)
+  | Pstrx (rs: ireg) (a: addressing)                                  (**r store int64 *)
+  | Pstrx_a (rs: ireg) (a: addressing)                                (**r store int64 as any64 *)
+  | Pstrb (rs: ireg) (a: addressing)                                  (**r store int8 *)
+  | Pstrh (rs: ireg) (a: addressing)                                  (**r store int16 *)
+  | Pstp (rs1 rs2: ireg) (a: addressing)                              (**r store two int64 *)
+  (** Integer arithmetic, immediate *)
+  | Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z)              (**r addition *)
+  | Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z)              (**r subtraction *)
+  | Pcmpimm (sz: isize) (r1: ireg) (n: Z)                             (**r compare *)
+  | Pcmnimm (sz: isize) (r1: ireg) (n: Z)                             (**r compare negative *)
+  (** Move integer register *)
+  | Pmov (rd: iregsp) (r1: iregsp)
+  (** Logical, immediate *)
+  | Pandimm (sz: isize) (rd: ireg) (r1: ireg0) (n: Z)                 (**r and *)
+  | Peorimm (sz: isize) (rd: ireg) (r1: ireg0) (n: Z)                 (**r xor *)
+  | Porrimm (sz: isize) (rd: ireg) (r1: ireg0) (n: Z)                 (**r or *)
+  | Ptstimm (sz: isize) (r1: ireg) (n: Z)                             (**r and, then set flags *)
+  (** Move wide immediate *)
+  | Pmovz (sz: isize) (rd: ireg) (n: Z) (pos: Z)                      (**r move [n << pos] to [rd] *)
+  | Pmovn (sz: isize) (rd: ireg) (n: Z) (pos: Z)                      (**r move [NOT(n << pos)] to [rd] *)
+  | Pmovk (sz: isize) (rd: ireg) (n: Z) (pos: Z)                      (**r insert 16 bits of [n] at [pos] in rd *)
+  (** PC-relative addressing *)
+  | Padrp (rd: ireg) (id: ident) (ofs: ptrofs)                        (**r set [rd] to high address of [id + ofs] *)
+  | Paddadr (rd: ireg) (r1: ireg) (id: ident) (ofs: ptrofs)           (**r add the low address of [id + ofs] *)
+  (** Bit-field operations *)
+  | Psbfiz (sz: isize) (rd: ireg) (r1: ireg) (r: int) (s: Z)          (**r sign extend and shift left *)
+  | Psbfx (sz: isize) (rd: ireg) (r1: ireg) (r: int) (s: Z)           (**r shift right and sign extend *)
+  | Pubfiz (sz: isize) (rd: ireg) (r1: ireg) (r: int) (s: Z)          (**r zero extend and shift left *)
+  | Pubfx (sz: isize) (rd: ireg) (r1: ireg) (r: int) (s: Z)           (**r shift right and zero extend *)
+  (** Integer arithmetic, shifted register *)
+  | Padd (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r addition *)
+  | Psub (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r subtraction *)
+  | Pcmp (sz: isize) (r1: ireg0) (r2: ireg) (s: shift_op)             (**r compare *)
+  | Pcmn (sz: isize) (r1: ireg0) (r2: ireg) (s: shift_op)             (**r compare negative *)
+  (** Integer arithmetic, extending register *)
+  | Paddext (rd: iregsp) (r1: iregsp) (r2: ireg) (x: extend_op)       (**r int64-int32 add *)
+  | Psubext (rd: iregsp) (r1: iregsp) (r2: ireg) (x: extend_op)       (**r int64-int32 sub *)
+  | Pcmpext (r1: ireg) (r2: ireg) (x: extend_op)                      (**r int64-int32 cmp *)
+  | Pcmnext (r1: ireg) (r2: ireg) (x: extend_op)                      (**r int64-int32 cmn *)
+  (** Logical, shifted register *)
+  | Pand (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r and *)
+  | Pbic (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r and-not *)
+  | Peon (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r xor-not *)
+  | Peor (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r xor *)
+  | Porr (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r or *)
+  | Porn (sz: isize) (rd: ireg) (r1: ireg0) (r2: ireg) (s: shift_op)  (**r or-not *)
+  | Ptst (sz: isize) (r1: ireg0) (r2: ireg) (s: shift_op)             (**r and, then set flags *)
+  (** Variable shifts *)
+  | Pasrv (sz: isize) (rd: ireg) (r1 r2: ireg)                        (**r arithmetic right shift *)
+  | Plslv (sz: isize) (rd: ireg) (r1 r2: ireg)                        (**r left shift *)
+  | Plsrv (sz: isize) (rd: ireg) (r1 r2: ireg)                        (**r logical right shift *)
+  | Prorv (sz: isize) (rd: ireg) (r1 r2: ireg)                        (**r rotate right *)
+  (** Bit operations *)
+  | Pcls (sz: isize) (rd r1: ireg)                                    (**r count leading sign bits *)
+  | Pclz (sz: isize) (rd r1: ireg)                                    (**r count leading zero bits *)
+  | Prev (sz: isize) (rd r1: ireg)                                    (**r reverse bytes *)
+  | Prev16 (sz: isize) (rd r1: ireg)                                  (**r reverse bytes in each 16-bit word *)
+  (** Conditional data processing *)
+  | Pcsel (rd: ireg) (r1 r2: ireg) (c: testcond)                      (**r int conditional move *)
+  | Pcset (rd: ireg) (c: testcond)                                    (**r set to 1/0 if cond is true/false *)
+(*
+  | Pcsetm (rd: ireg) (c: testcond)                                   (**r set to -1/0 if cond is true/false *)
+*)
+  (** Integer multiply/divide *)
+  | Pmadd (sz: isize) (rd: ireg) (r1 r2: ireg) (r3: ireg0)            (**r multiply-add *)
+  | Pmsub (sz: isize) (rd: ireg) (r1 r2: ireg) (r3: ireg0)            (**r multiply-sub *)
+  | Psmulh (rd: ireg) (r1 r2: ireg)                                   (**r signed multiply high *)
+  | Pumulh (rd: ireg) (r1 r2: ireg)                                   (**r unsigned multiply high *)
+  | Psdiv (sz: isize) (rd: ireg) (r1 r2: ireg)                        (**r signed division *)
+  | Pudiv (sz: isize) (rd: ireg) (r1 r2: ireg)                        (**r unsigned division *)
+  (** Floating-point loads and stores *)
+  | Pldrs (rd: freg) (a: addressing)                                  (**r load float32 (single precision) *)
+  | Pldrd (rd: freg) (a: addressing)                                  (**r load float64 (double precision) *)
+  | Pldrd_a (rd: freg) (a: addressing)                                (**r load float64 as any64 *)
+  | Pstrs (rs: freg) (a: addressing)                                  (**r store float32 *)
+  | Pstrd (rs: freg) (a: addressing)                                  (**r store float64 *)
+  | Pstrd_a (rs: freg) (a: addressing)                                (**r store float64 as any64 *)
+  (** Floating-point move *)
+  | Pfmov (rd r1: freg)
+  | Pfmovimms (rd: freg) (f: float32)                                 (**r load float32 constant *)
+  | Pfmovimmd (rd: freg) (f: float)                                   (**r load float64 constant *)
+  | Pfmovi (fsz: fsize) (rd: freg) (r1: ireg0)                        (**r copy int reg to FP reg *)
+  (** Floating-point conversions *)
+  | Pfcvtds (rd r1: freg)                                             (**r convert float32 to float64 *)
+  | Pfcvtsd (rd r1: freg)                                             (**r convert float64 to float32 *)
+  | Pfcvtzs (isz: isize) (fsz: fsize) (rd: ireg) (r1: freg)           (**r convert float to signed int *)
+  | Pfcvtzu (isz: isize) (fsz: fsize) (rd: ireg) (r1: freg)           (**r convert float to unsigned int *)
+  | Pscvtf (fsz: fsize) (isz: isize) (rd: freg) (r1: ireg)            (**r convert signed int to float *)
+  | Pucvtf (fsz: fsize) (isz: isize) (rd: freg) (r1: ireg)            (**r convert unsigned int to float *)
+  (** Floating-point arithmetic *)
+  | Pfabs (sz: fsize) (rd r1: freg)                                   (**r absolute value *)
+  | Pfneg (sz: fsize) (rd r1: freg)                                   (**r negation *)
+  | Pfsqrt (sz: fsize) (rd r1: freg)                                  (**r square root *)
+  | Pfadd (sz: fsize) (rd r1 r2: freg)                                (**r addition *)
+  | Pfdiv (sz: fsize) (rd r1 r2: freg)                                (**r division *)
+  | Pfmul (sz: fsize) (rd r1 r2: freg)                                (**r multiplication *)
+  | Pfnmul (sz: fsize) (rd r1 r2: freg)                               (**r multiply-negate *)
+  | Pfsub (sz: fsize) (rd r1 r2: freg)                                (**r subtraction *)
+  | Pfmadd (sz: fsize) (rd r1 r2 r3: freg)                            (**r [rd = r3 + r1 * r2] *)
+  | Pfmsub (sz: fsize) (rd r1 r2 r3: freg)                            (**r [rd = r3 - r1 * r2] *)
+  | Pfnmadd (sz: fsize) (rd r1 r2 r3: freg)                           (**r [rd = - r3 - r1 * r2] *)
+  | Pfnmsub (sz: fsize) (rd r1 r2 r3: freg)                           (**r [rd = - r3 + r1 * r2] *)
+  (** Floating-point comparison *)
+  | Pfcmp (sz: fsize) (r1 r2: freg)                                   (**r compare [r1] and [r2] *)
+  | Pfcmp0 (sz: fsize) (r1: freg)                                     (**r compare [r1] and [+0.0] *)
+  (** Floating-point conditional select *)
+  | Pfsel (rd r1 r2: freg) (cond: testcond)
+  (** Pseudo-instructions *)
+  | Pallocframe (sz: Z) (linkofs: ptrofs)                             (**r allocate new stack frame *)
+  | Pfreeframe (sz: Z) (linkofs: ptrofs)                              (**r deallocate stack frame and restore previous frame *)
+  | Plabel (lbl: label)                                               (**r define a code label *)
+  | Ploadsymbol (rd: ireg) (id: ident)                                (**r load the address of [id] *)
+  | Pcvtsw2x (rd: ireg) (r1: ireg)                                    (**r sign-extend 32-bit int to 64-bit *)
+  | Pcvtuw2x (rd: ireg) (r1: ireg)                                    (**r zero-extend 32-bit int to 64-bit *)
+  | Pcvtx2w (rd: ireg)                                                (**r retype a 64-bit int as a 32-bit int *)
+  | Pbtbl (r1: ireg) (tbl: list label)                                (**r N-way branch through a jump table *)
+  | Pbuiltin (ef: external_function)
+             (args: list (builtin_arg preg)) (res: builtin_res preg)  (**r built-in function (pseudo) *)
+  | Pnop                                                              (**r no operation *)
+  | Pcfi_adjust (ofs: int)                                            (**r .cfi_adjust debug directive *)
+  | Pcfi_rel_offset (ofs: int)                                        (**r .cfi_rel_offset debug directive *)
+.
+
+Definition code := list instruction.
+Record function : Type := mkfunction { fn_sig: signature; fn_code: code }.
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+
+(** * Operational semantics *)
+
+(** The semantics operates over a single mapping from registers
+  (type [preg]) to values.  We maintain (but do not enforce)
+  the convention that integer registers are mapped to values of
+  type [Tint], float registers to values of type [Tfloat],
+  and condition bits to either [Vzero] or [Vone]. *)
+
+Definition regset := Pregmap.t val.
+Definition genv := Genv.t fundef unit.
+
+(** The value of an [ireg0] is either the value of the integer register,
+    or 0. *)
+
+Definition ir0w (rs: regset) (r: ireg0) : val :=
+  match r with RR0 r => rs (IR r) | XZR => Vint Int.zero end.
+Definition ir0x (rs: regset) (r: ireg0) : val :=
+  match r with RR0 r => rs (IR r) | XZR => Vlong Int64.zero end.
+
+(** Concise notations for accessing and updating the values of registers. *)
+
+Notation "a # b" := (a b) (at level 1, only parsing) : asm.
+Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm.
+Notation "a ## b" := (ir0w a b) (at level 1, only parsing) : asm.
+Notation "a ### b" := (ir0x a b) (at level 1, only parsing) : asm.
+
+Open Scope asm.
+
+(** Undefining some registers *)
+
+Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
+  match l with
+  | nil => rs
+  | r :: l' => undef_regs l' (rs#r <- Vundef)
+  end.
+
+(** Undefining the condition codes *)
+
+Definition undef_flags (rs: regset) : regset :=
+  fun r => match r with CR _ => Vundef | _ => rs r end.
+
+(** Assigning a register pair *)
+
+Definition set_pair (p: rpair preg) (v: val) (rs: regset) : regset :=
+  match p with
+  | One r => rs#r <- v
+  | Twolong rhi rlo => rs#rhi <- (Val.hiword v) #rlo <- (Val.loword v)
+  end.
+
+(** Assigning the result of a builtin *)
+
+Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset :=
+  match res with
+  | BR r => rs#r <- v
+  | BR_none => rs
+  | BR_splitlong hi lo => set_res lo (Val.loword v) (set_res hi (Val.hiword v) rs)
+  end.
+
+(** The two functions below axiomatize how the linker processes
+  symbolic references [symbol + offset].  It computes the
+  difference between the address and the PC, and splits it into:
+  - 12 low bits usable as an offset in an addressing mode;
+  - 21 high bits usable as argument to the ADRP instruction.
+
+  In CompCert's model, we cannot really describe PC-relative addressing,
+  but we can claim that the address of [symbol + offset] decomposes
+  as the sum of
+  - a low part, usable as an offset in an addressing mode;
+  - a high part, usable as argument to the ADRP instruction. *)
+
+Parameter symbol_low: genv -> ident -> ptrofs -> val.
+Parameter symbol_high: genv -> ident -> ptrofs -> val.
+
+Axiom symbol_high_low:
+  forall (ge: genv) (id: ident) (ofs: ptrofs),
+  Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+(** Looking up instructions in a code sequence by position. *)
+
+Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction :=
+  match c with
+  | nil => None
+  | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il
+  end.
+
+(** Position corresponding to a label *)
+
+Definition is_label (lbl: label) (instr: instruction) : bool :=
+  match instr with
+  | Plabel lbl' => if peq lbl lbl' then true else false
+  | _ => false
+  end.
+
+Lemma is_label_correct:
+  forall lbl instr,
+  if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl.
+Proof.
+  intros.  destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence.
+Qed.
+
+Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
+  match c with
+  | nil => None
+  | instr :: c' =>
+      if is_label lbl instr then Some (pos + 1) else label_pos lbl (pos + 1) c'
+  end.
+
+(** The semantics is purely small-step and defined as a function
+  from the current state (a register set + a memory state)
+  to either [Next rs' m'] where [rs'] and [m'] are the updated register
+  set and memory state after execution of the instruction at [rs#PC],
+  or [Stuck] if the processor is stuck. *)
+
+Inductive outcome: Type :=
+  | Next: regset -> mem -> outcome
+  | Stuck: outcome.
+
+(** Manipulations over the [PC] register: continuing with the next
+  instruction ([nextinstr]) or branching to a label ([goto_label]). *)
+
+Definition nextinstr (rs: regset) :=
+  rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one).
+
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
+  match label_pos lbl 0 (fn_code f) with
+  | None => Stuck
+  | Some pos =>
+      match rs#PC with
+      | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
+      | _ => Stuck
+    end
+  end.
+
+(** Testing a condition *)
+
+Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
+  match c with
+  | TCeq =>                             (**r equal *)
+      match rs#CZ with
+      | Vint n => Some (Int.eq n Int.one)
+      | _ => None
+      end
+  | TCne =>                             (**r not equal *)
+      match rs#CZ with
+      | Vint n => Some (Int.eq n Int.zero)
+      | _ => None
+      end
+  | TClo =>                             (**r unsigned less than  *)
+      match rs#CC with
+      | Vint n => Some (Int.eq n Int.zero)
+      | _ => None
+      end
+  | TCls =>                             (**r unsigned less or equal *)
+      match rs#CC, rs#CZ with
+      | Vint c, Vint z => Some (Int.eq c Int.zero || Int.eq z Int.one)
+      | _, _ => None
+      end
+  | TChs =>                             (**r unsigned greater or equal *)
+      match rs#CC with
+      | Vint n => Some (Int.eq n Int.one)
+      | _ => None
+      end
+  | TChi =>                             (**r unsigned greater *)
+      match rs#CC, rs#CZ with
+      | Vint c, Vint z => Some (Int.eq c Int.one && Int.eq z Int.zero)
+      | _, _ => None
+      end
+  | TClt =>                             (**r signed less than *)
+      match rs#CV, rs#CN with
+      | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.one)
+      | _, _ => None
+      end
+  | TCle =>                             (**r signed less or equal *)
+      match rs#CV, rs#CN, rs#CZ with
+      | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.one || Int.eq z Int.one)
+      | _, _, _ => None
+      end
+  | TCge =>                             (**r signed greater or equal *)
+      match rs#CV, rs#CN with
+      | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.zero)
+      | _, _ => None
+      end
+  | TCgt =>                             (**r signed greater *)
+      match rs#CV, rs#CN, rs#CZ with
+      | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.zero && Int.eq z Int.zero)
+      | _, _, _ => None
+      end
+  | TCpl =>                             (**r positive *)
+      match rs#CN with
+      | Vint n => Some (Int.eq n Int.zero)
+      | _ => None
+      end
+  | TCmi =>                             (**r negative *)
+      match rs#CN with
+      | Vint n => Some (Int.eq n Int.one)
+      | _ => None
+      end
+  end.
+
+(** Integer "is zero?" test *)
+
+Definition eval_testzero (sz: isize) (v: val) (m: mem): option bool :=
+  match sz with
+  | W => Val.cmpu_bool (Mem.valid_pointer m) Ceq v (Vint Int.zero)
+  | X => Val.cmplu_bool (Mem.valid_pointer m) Ceq v (Vlong Int64.zero)
+  end.
+
+(** Integer "bit is set?" test *)
+
+Definition eval_testbit (sz: isize) (v: val) (n: int): option bool :=
+  match sz with
+  | W => Val.cmp_bool Cne (Val.and v (Vint (Int.shl Int.one n))) (Vint Int.zero)
+  | X => Val.cmpl_bool Cne (Val.andl v (Vlong (Int64.shl' Int64.one n))) (Vlong Int64.zero)
+  end.
+
+(** Evaluating an addressing mode *)
+
+Definition eval_addressing (a: addressing) (rs: regset): val :=
+  match a with
+  | ADimm base n => Val.addl rs#base (Vlong n)
+  | ADreg base r => Val.addl rs#base rs#r
+  | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
+  | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
+  | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
+  | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs)
+  | ADpostincr base n => Vundef (* not modeled yet *)
+  end.
+
+(** Auxiliaries for memory accesses *)
+
+Definition exec_load (chunk: memory_chunk) (transf: val -> val)
+                     (a: addressing) (r: preg) (rs: regset) (m: mem) :=
+  match Mem.loadv chunk m (eval_addressing a rs) with
+  | None => Stuck
+  | Some v => Next (nextinstr (rs#r <- (transf v))) m
+  end.
+
+Definition exec_store (chunk: memory_chunk)
+                      (a: addressing) (v: val)
+                      (rs: regset) (m: mem) :=
+  match Mem.storev chunk m (eval_addressing a rs) v with
+  | None => Stuck
+  | Some m' => Next (nextinstr rs) m'
+  end.
+
+(** Comparisons *)
+
+Definition compare_int (rs: regset) (v1 v2: val) (m: mem) :=
+  rs#CN <- (Val.negative (Val.sub v1 v2))
+    #CZ <- (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
+    #CC <- (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
+    #CV <- (Val.sub_overflow v1 v2).
+
+Definition compare_long (rs: regset) (v1 v2: val) (m: mem) :=
+  rs#CN <- (Val.negativel (Val.subl v1 v2))
+    #CZ <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2))
+    #CC <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2))
+    #CV <- (Val.subl_overflow v1 v2).
+
+(** Semantics of [fcmp] instructions:
+<<
+==	N=0 Z=1 C=1 V=0
+<	N=1 Z=0 C=0 V=0
+>	N=0 Z=0 C=1 V=0
+unord	N=0 Z=0 C=1 V=1
+>>
+*)
+
+Definition compare_float (rs: regset) (v1 v2: val) :=
+  match v1, v2 with
+  | Vfloat f1, Vfloat f2 =>
+      rs#CN <- (Val.of_bool (Float.cmp Clt f1 f2))
+        #CZ <- (Val.of_bool (Float.cmp Ceq f1 f2))
+        #CC <- (Val.of_bool (negb (Float.cmp Clt f1 f2)))
+        #CV <- (Val.of_bool (negb (Float.ordered f1 f2)))
+  | _, _ =>
+      rs#CN <- Vundef
+        #CZ <- Vundef
+        #CC <- Vundef
+        #CV <- Vundef
+  end.
+
+Definition compare_single (rs: regset) (v1 v2: val) :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 =>
+      rs#CN <- (Val.of_bool (Float32.cmp Clt f1 f2))
+        #CZ <- (Val.of_bool (Float32.cmp Ceq f1 f2))
+        #CC <- (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+        #CV <- (Val.of_bool (negb (Float32.ordered f1 f2)))
+  | _, _ =>
+      rs#CN <- Vundef
+        #CZ <- Vundef
+        #CC <- Vundef
+        #CV <- Vundef
+  end.
+
+(** Insertion of bits into an integer *)
+
+Definition insert_in_int (x: val) (y: Z) (pos: Z) (len: Z) : val :=
+  match x with
+  | Vint n => Vint (Int.repr (Zinsert (Int.unsigned n) y pos len))
+  | _ => Vundef
+  end.
+
+Definition insert_in_long (x: val) (y: Z) (pos: Z) (len: Z) : val :=
+  match x with
+  | Vlong n => Vlong (Int64.repr (Zinsert (Int64.unsigned n) y pos len))
+  | _ => Vundef
+  end.
+
+(** Evaluation of shifted operands *)
+
+Definition eval_shift_op_int (v: val) (s: shift_op): val :=
+  match s with
+  | SOnone => v
+  | SOlsl n => Val.shl v (Vint n)
+  | SOlsr n => Val.shru v (Vint n)
+  | SOasr n => Val.shr v (Vint n)
+  | SOror n => Val.ror v (Vint n)
+  end.
+
+Definition eval_shift_op_long (v: val) (s: shift_op): val :=
+  match s with
+  | SOnone => v
+  | SOlsl n => Val.shll v (Vint n)
+  | SOlsr n => Val.shrlu v (Vint n)
+  | SOasr n => Val.shrl v (Vint n)
+  | SOror n => Val.rorl v (Vint n)
+  end.
+
+(** Evaluation of sign- or zero- extended operands *)
+
+Definition eval_extend (v: val) (x: extend_op): val :=
+  match x with
+  | EOsxtb n => Val.shll (Val.longofint (Val.sign_ext 8 v)) (Vint n)
+  | EOsxth n => Val.shll (Val.longofint (Val.sign_ext 16 v)) (Vint n)
+  | EOsxtw n => Val.shll (Val.longofint v) (Vint n)
+  | EOuxtb n => Val.shll (Val.longofintu (Val.zero_ext 8 v)) (Vint n)
+  | EOuxth n => Val.shll (Val.longofintu (Val.zero_ext 16 v)) (Vint n)
+  | EOuxtw n => Val.shll (Val.longofintu v) (Vint n)
+  | EOuxtx n => Val.shll v (Vint n)
+  end.
+
+(** Bit-level conversion from integers to FP numbers *)
+
+Definition float32_of_bits (v: val): val :=
+  match v with
+  | Vint n => Vsingle (Float32.of_bits n)
+  | _ => Vundef
+  end.
+
+Definition float64_of_bits (v: val): val :=
+  match v with
+  | Vlong n => Vfloat (Float.of_bits n)
+  | _ => Vundef
+  end.
+
+(** Execution of a single instruction [i] in initial state
+    [rs] and [m].  Return updated state.  For instructions
+    that correspond to actual AArch64 instructions, the cases are
+    straightforward transliterations of the informal descriptions
+    given in the ARMv8 reference manuals.  For pseudo-instructions,
+    refer to the informal descriptions given above.
+
+    Note that we set to [Vundef] the registers used as temporaries by
+    the expansions of the pseudo-instructions, so that the code we
+    generate cannot use those registers to hold values that must
+    survive the execution of the pseudo-instruction.
+*)
+
+Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome :=
+  match i with
+  (** Branches *)
+  | Pb lbl =>
+      goto_label f lbl rs m
+  | Pbc cond lbl =>
+      match eval_testcond cond rs with
+      | Some true => goto_label f lbl rs m
+      | Some false => Next (nextinstr rs) m
+      | None => Stuck
+      end
+  | Pbl id sg =>
+      Next (rs#RA <- (Val.offset_ptr rs#PC Ptrofs.one) #PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
+  | Pbs id sg =>
+      Next (rs#PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
+  | Pblr r sg =>
+      Next (rs#RA <- (Val.offset_ptr rs#PC Ptrofs.one) #PC <- (rs#r)) m
+  | Pbr r sg =>
+      Next (rs#PC <- (rs#r)) m
+  | Pret r =>
+      Next (rs#PC <- (rs#r)) m
+  | Pcbnz sz r lbl =>
+      match eval_testzero sz rs#r m with
+      | Some true => Next (nextinstr rs) m
+      | Some false => goto_label f lbl rs m
+      | None => Stuck
+      end
+  | Pcbz sz r lbl =>
+      match eval_testzero sz rs#r m with
+      | Some true => goto_label f lbl rs m
+      | Some false => Next (nextinstr rs) m
+      | None => Stuck
+      end
+  | Ptbnz sz r n lbl =>
+      match eval_testbit sz rs#r n with
+      | Some true => goto_label f lbl rs m
+      | Some false => Next (nextinstr rs) m
+      | None => Stuck
+      end
+  | Ptbz sz r n lbl =>
+      match eval_testbit sz rs#r n with
+      | Some true => Next (nextinstr rs) m
+      | Some false => goto_label f lbl rs m
+      | None => Stuck
+      end
+  (** Memory loads and stores *)
+  | Pldrw rd a =>
+      exec_load Mint32 (fun v => v) a rd rs m
+  | Pldrw_a rd a =>
+      exec_load Many32 (fun v => v) a rd rs m
+  | Pldrx rd a =>
+      exec_load Mint64 (fun v => v) a rd rs m
+  | Pldrx_a rd a =>
+      exec_load Many64 (fun v => v) a rd rs m
+  | Pldrb W rd a =>
+      exec_load Mint8unsigned (fun v => v) a rd rs m
+  | Pldrb X rd a =>
+      exec_load Mint8unsigned Val.longofintu a rd rs m
+  | Pldrsb W rd a =>
+      exec_load Mint8signed (fun v => v) a rd rs m
+  | Pldrsb X rd a =>
+      exec_load Mint8signed Val.longofint a rd rs m
+  | Pldrh W rd a =>
+      exec_load Mint16unsigned (fun v => v) a rd rs m
+  | Pldrh X rd a =>
+      exec_load Mint16unsigned Val.longofintu a rd rs m
+  | Pldrsh W rd a =>
+      exec_load Mint16signed (fun v => v) a rd rs m
+  | Pldrsh X rd a =>
+      exec_load Mint16signed Val.longofint a rd rs m
+  | Pldrzw rd a =>
+      exec_load Mint32 Val.longofintu a rd rs m
+  | Pldrsw rd a =>
+      exec_load Mint32 Val.longofint a rd rs m
+  | Pstrw r a =>
+      exec_store Mint32 a rs#r rs m
+  | Pstrw_a r a =>
+      exec_store Many32 a rs#r rs m
+  | Pstrx r a =>
+      exec_store Mint64 a rs#r rs m
+  | Pstrx_a r a =>
+      exec_store Many64 a rs#r rs m
+  | Pstrb r a =>
+      exec_store Mint8unsigned a rs#r rs m
+  | Pstrh r a =>
+      exec_store Mint16unsigned a rs#r rs m
+  (** Integer arithmetic, immediate *)
+  | Paddimm W rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.add rs#r1 (Vint (Int.repr n))))) m
+  | Paddimm X rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.addl rs#r1 (Vlong (Int64.repr n))))) m
+  | Psubimm W rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.sub rs#r1 (Vint (Int.repr n))))) m
+  | Psubimm X rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.subl rs#r1 (Vlong (Int64.repr n))))) m
+  | Pcmpimm W r1 n =>
+      Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)) m)) m
+  | Pcmpimm X r1 n =>
+      Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)) m)) m
+  | Pcmnimm W r1 n =>
+      Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))) m)) m
+  | Pcmnimm X r1 n =>
+      Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))) m)) m
+  (** Move integer register *)
+  | Pmov rd r1 =>
+      Next (nextinstr (rs#rd <- (rs#r1))) m
+  (** Logical, immediate *)
+  | Pandimm W rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.and rs##r1 (Vint (Int.repr n))))) m
+  | Pandimm X rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.andl rs###r1 (Vlong (Int64.repr n))))) m
+  | Peorimm W rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.xor rs##r1 (Vint (Int.repr n))))) m
+  | Peorimm X rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.xorl rs###r1 (Vlong (Int64.repr n))))) m
+  | Porrimm W rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.or rs##r1 (Vint (Int.repr n))))) m
+  | Porrimm X rd r1 n =>
+      Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Vlong (Int64.repr n))))) m
+  | Ptstimm W r1 n =>
+      Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero) m)) m
+  | Ptstimm X r1 n =>
+      Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero) m)) m
+  (** Move wide immediate *)
+  | Pmovz W rd n pos =>
+      Next (nextinstr (rs#rd <- (Vint (Int.repr (Z.shiftl n pos))))) m
+  | Pmovz X rd n pos =>
+      Next (nextinstr (rs#rd <- (Vlong (Int64.repr (Z.shiftl n pos))))) m
+  | Pmovn W rd n pos =>
+      Next (nextinstr (rs#rd <- (Vint (Int.repr (Z.lnot (Z.shiftl n pos)))))) m
+  | Pmovn X rd n pos =>
+      Next (nextinstr (rs#rd <- (Vlong (Int64.repr (Z.lnot (Z.shiftl n pos)))))) m
+  | Pmovk W rd n pos =>
+      Next (nextinstr (rs#rd <- (insert_in_int rs#rd n pos 16))) m
+  | Pmovk X rd n pos =>
+      Next (nextinstr (rs#rd <- (insert_in_long rs#rd n pos 16))) m
+  (** PC-relative addressing *)
+  | Padrp rd id ofs =>
+      Next (nextinstr (rs#rd <- (symbol_high ge id ofs))) m
+  | Paddadr rd r1 id ofs =>
+      Next (nextinstr (rs#rd <- (Val.addl rs#r1 (symbol_low ge id ofs)))) m
+  (** Bit-field operations *)
+  | Psbfiz W rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.shl (Val.sign_ext s rs#r1) (Vint r)))) m
+  | Psbfiz X rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.shll (Val.sign_ext_l s rs#r1) (Vint r)))) m
+  | Psbfx W rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.sign_ext s (Val.shr rs#r1 (Vint r))))) m
+  | Psbfx X rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.sign_ext_l s (Val.shrl rs#r1 (Vint r))))) m
+  | Pubfiz W rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.shl (Val.zero_ext s rs#r1) (Vint r)))) m
+  | Pubfiz X rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.shll (Val.zero_ext_l s rs#r1) (Vint r)))) m
+  | Pubfx W rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.zero_ext s (Val.shru rs#r1 (Vint r))))) m
+  | Pubfx X rd r1 r s =>
+      Next (nextinstr (rs#rd <- (Val.zero_ext_l s (Val.shrlu rs#r1 (Vint r))))) m
+  (** Integer arithmetic, shifted register *)
+  | Padd W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.add rs##r1 (eval_shift_op_int rs#r2 s)))) m
+  | Padd X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.addl rs###r1 (eval_shift_op_long rs#r2 s)))) m
+  | Psub W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.sub rs##r1 (eval_shift_op_int rs#r2 s)))) m
+  | Psub X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.subl rs###r1 (eval_shift_op_long rs#r2 s)))) m
+  | Pcmp W r1 r2 s =>
+      Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s) m)) m
+  | Pcmp X r1 r2 s =>
+      Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s) m)) m
+  | Pcmn W r1 r2 s =>
+      Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)) m)) m
+  | Pcmn X r1 r2 s =>
+      Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)) m)) m
+  (** Integer arithmetic, extending register *)
+  | Paddext rd r1 r2 x =>
+      Next (nextinstr (rs#rd <- (Val.addl rs#r1 (eval_extend rs#r2 x)))) m
+  | Psubext rd r1 r2 x =>
+      Next (nextinstr (rs#rd <- (Val.subl rs#r1 (eval_extend rs#r2 x)))) m
+  | Pcmpext r1 r2 x =>
+      Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x) m)) m
+  | Pcmnext r1 r2 x =>
+      Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)) m)) m
+  (** Logical, shifted register *)
+  | Pand W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.and rs##r1 (eval_shift_op_int rs#r2 s)))) m
+  | Pand X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)))) m
+  | Pbic W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.and rs##r1 (Val.notint (eval_shift_op_int rs#r2 s))))) m
+  | Pbic X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.andl rs###r1 (Val.notl (eval_shift_op_long rs#r2 s))))) m
+  | Peon W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.xor rs##r1 (Val.notint (eval_shift_op_int rs#r2 s))))) m
+  | Peon X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.xorl rs###r1 (Val.notl (eval_shift_op_long rs#r2 s))))) m
+  | Peor W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.xor rs##r1 (eval_shift_op_int rs#r2 s)))) m
+  | Peor X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.xorl rs###r1 (eval_shift_op_long rs#r2 s)))) m
+  | Porr W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.or rs##r1 (eval_shift_op_int rs#r2 s)))) m
+  | Porr X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.orl rs###r1 (eval_shift_op_long rs#r2 s)))) m
+  | Porn W rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.or rs##r1 (Val.notint (eval_shift_op_int rs#r2 s))))) m
+  | Porn X rd r1 r2 s =>
+      Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Val.notl (eval_shift_op_long rs#r2 s))))) m
+  | Ptst W r1 r2 s =>
+      Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero) m)) m
+  | Ptst X r1 r2 s =>
+      Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero) m)) m
+  (** Variable shifts *)
+  | Pasrv W rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shr rs#r1 rs#r2))) m
+  | Pasrv X rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shrl rs#r1 rs#r2))) m
+  | Plslv W rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shl rs#r1 rs#r2))) m
+  | Plslv X rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shll rs#r1 rs#r2))) m
+  | Plsrv W rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shru rs#r1 rs#r2))) m
+  | Plsrv X rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.shrlu rs#r1 rs#r2))) m
+  | Prorv W rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.ror rs#r1 rs#r2))) m
+  | Prorv X rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.rorl rs#r1 rs#r2))) m
+  (** Conditional data processing *)
+  | Pcsel rd r1 r2 cond =>
+      let v :=
+        match eval_testcond cond rs with
+        | Some true => rs#r1
+        | Some false => rs#r2
+        | None => Vundef
+        end in
+      Next (nextinstr (rs#rd <- v)) m
+  | Pcset rd cond =>
+      let v :=
+        match eval_testcond cond rs with
+        | Some true => Vint Int.one
+        | Some false => Vint Int.zero
+        | None => Vundef
+        end in
+      Next (nextinstr (rs#rd <- v)) m
+  (** Integer multiply/divide *)
+  | Pmadd W rd r1 r2 r3 =>
+      Next (nextinstr (rs#rd <- (Val.add rs##r3 (Val.mul rs#r1 rs#r2)))) m
+  | Pmadd X rd r1 r2 r3 =>
+      Next (nextinstr (rs#rd <- (Val.addl rs###r3 (Val.mull rs#r1 rs#r2)))) m
+  | Pmsub W rd r1 r2 r3 =>
+      Next (nextinstr (rs#rd <- (Val.sub rs##r3 (Val.mul rs#r1 rs#r2)))) m
+  | Pmsub X rd r1 r2 r3 =>
+      Next (nextinstr (rs#rd <- (Val.subl rs###r3 (Val.mull rs#r1 rs#r2)))) m
+  | Psmulh rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mullhs rs#r1 rs#r2))) m
+  | Pumulh rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mullhu rs#r1 rs#r2))) m
+  | Psdiv W rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.divs rs#r1 rs#r2)))) m
+  | Psdiv X rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.divls rs#r1 rs#r2)))) m
+  | Pudiv W rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.divu rs#r1 rs#r2)))) m
+  | Pudiv X rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.divlu rs#r1 rs#r2)))) m
+  (** Floating-point loads and stores *)
+  | Pldrs rd a =>
+      exec_load Mfloat32 (fun v => v) a rd rs m
+  | Pldrd rd a =>
+      exec_load Mfloat64 (fun v => v) a rd rs m
+  | Pldrd_a rd a =>
+      exec_load Many64 (fun v => v) a rd rs m
+  | Pstrs r a =>
+      exec_store Mfloat32 a rs#r rs m
+  | Pstrd r a =>
+      exec_store Mfloat64 a rs#r rs m
+  | Pstrd_a r a =>
+      exec_store Many64 a rs#r rs m
+  (** Floating-point move *)
+  | Pfmov rd r1 =>
+      Next (nextinstr (rs#rd <- (rs#r1))) m
+  | Pfmovimms rd f =>
+      Next (nextinstr (rs#rd <- (Vsingle f))) m
+  | Pfmovimmd rd f =>
+      Next (nextinstr (rs#rd <- (Vfloat f))) m
+  | Pfmovi S rd r1 =>
+      Next (nextinstr (rs#rd <- (float32_of_bits rs##r1))) m
+  | Pfmovi D rd r1 =>
+      Next (nextinstr (rs#rd <- (float64_of_bits rs###r1))) m
+  (** Floating-point conversions *)
+  | Pfcvtds rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.floatofsingle rs#r1))) m
+  | Pfcvtsd rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
+  | Pfcvtzs W S rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intofsingle rs#r1)))) m
+  | Pfcvtzs W D rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
+  | Pfcvtzs X S rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.longofsingle rs#r1)))) m
+  | Pfcvtzs X D rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.longoffloat rs#r1)))) m
+  | Pfcvtzu W S rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intuofsingle rs#r1)))) m
+  | Pfcvtzu W D rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intuoffloat rs#r1)))) m
+  | Pfcvtzu X S rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.longuofsingle rs#r1)))) m
+  | Pfcvtzu X D rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.longuoffloat rs#r1)))) m
+  | Pscvtf S W rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleofint rs#r1)))) m
+  | Pscvtf D W rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofint rs#r1)))) m
+  | Pscvtf S X rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleoflong rs#r1)))) m
+  | Pscvtf D X rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatoflong rs#r1)))) m
+  | Pucvtf S W rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleofintu rs#r1)))) m
+  | Pucvtf D W rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofintu rs#r1)))) m
+  | Pucvtf S X rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleoflongu rs#r1)))) m
+  | Pucvtf D X rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatoflongu rs#r1)))) m
+  (** Floating-point arithmetic *)
+  | Pfabs S rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.absfs rs#r1))) m
+  | Pfabs D rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.absf rs#r1))) m
+  | Pfneg S rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.negfs rs#r1))) m
+  | Pfneg D rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.negf rs#r1))) m
+  | Pfadd S rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.addfs rs#r1 rs#r2))) m
+  | Pfadd D rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.addf rs#r1 rs#r2))) m
+  | Pfdiv S rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.divfs rs#r1 rs#r2))) m
+  | Pfdiv D rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.divf rs#r1 rs#r2))) m
+  | Pfmul S rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mulfs rs#r1 rs#r2))) m
+  | Pfmul D rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mulf rs#r1 rs#r2))) m
+  | Pfnmul S rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.negfs (Val.mulfs rs#r1 rs#r2)))) m
+  | Pfnmul D rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.negf (Val.mulf rs#r1 rs#r2)))) m
+  | Pfsub S rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.subfs rs#r1 rs#r2))) m
+  | Pfsub D rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.subf rs#r1 rs#r2))) m
+  (** Floating-point comparison *)
+  | Pfcmp S r1 r2 =>
+      Next (nextinstr (compare_single rs rs#r1 rs#r2)) m
+  | Pfcmp D r1 r2 =>
+      Next (nextinstr (compare_float rs rs#r1 rs#r2)) m
+  | Pfcmp0 S r1 =>
+      Next (nextinstr (compare_single rs rs#r1 (Vsingle Float32.zero))) m
+  | Pfcmp0 D r1 =>
+      Next (nextinstr (compare_float rs rs#r1 (Vfloat Float.zero))) m
+  (** Floating-point conditional select *)
+  | Pfsel rd r1 r2 cond =>
+      let v :=
+        match eval_testcond cond rs with
+        | Some true => rs#r1
+        | Some false => rs#r2
+        | None => Vundef
+        end in
+      Next (nextinstr (rs#rd <- v)) m
+  (** Pseudo-instructions *)
+  | Pallocframe sz pos =>
+      let (m1, stk) := Mem.alloc m 0 sz in
+      let sp := (Vptr stk Ptrofs.zero) in
+      match Mem.storev Mint64 m1 (Val.offset_ptr sp pos) rs#SP with
+      | None => Stuck
+      | Some m2 => Next (nextinstr (rs #X29 <- (rs#SP) #SP <- sp #X16 <- Vundef)) m2
+      end
+  | Pfreeframe sz pos =>
+      match Mem.loadv Mint64 m (Val.offset_ptr rs#SP pos) with
+      | None => Stuck
+      | Some v =>
+          match rs#SP with
+          | Vptr stk ofs =>
+              match Mem.free m stk 0 sz with
+              | None => Stuck
+              | Some m' => Next (nextinstr (rs#SP <- v #X16 <- Vundef)) m'
+              end
+          | _ => Stuck
+          end
+      end
+  | Plabel lbl =>
+      Next (nextinstr rs) m
+  | Ploadsymbol rd id =>
+      Next (nextinstr (rs#rd <- (Genv.symbol_address ge id Ptrofs.zero))) m
+  | Pcvtsw2x rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.longofint rs#r1))) m
+  | Pcvtuw2x rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.longofintu rs#r1))) m
+  | Pcvtx2w rd =>
+      Next (nextinstr (rs#rd <- (Val.loword rs#rd))) m
+  | Pbtbl r tbl =>
+      match (rs#X16 <- Vundef)#r with
+      | Vint n =>
+          match list_nth_z tbl (Int.unsigned n) with
+          | None => Stuck
+          | Some lbl => goto_label f lbl (rs#X16 <- Vundef #X17 <- Vundef) m
+          end
+      | _ => Stuck
+      end
+  | Pbuiltin ef args res => Stuck    (**r treated specially below *)
+  (** The following instructions and directives are not generated directly
+      by Asmgen, so we do not model them. *)
+  | Pldp _ _ _
+  | Pstp _ _ _
+  | Pcls _ _ _
+  | Pclz _ _ _
+  | Prev _ _ _
+  | Prev16 _ _ _
+  | Pfsqrt _ _ _
+  | Pfmadd _ _ _ _ _
+  | Pfmsub _ _ _ _ _
+  | Pfnmadd _ _ _ _ _
+  | Pfnmsub _ _ _ _ _
+  | Pnop
+  | Pcfi_adjust _ 
+  | Pcfi_rel_offset _ =>
+      Stuck
+  end.
+
+(** Translation of the LTL/Linear/Mach view of machine registers
+  to the AArch64 view.  Note that no LTL register maps to [X16],
+  [X18], nor [X30].
+  [X18] is reserved as the platform register and never used by the
+  code generated by CompCert.
+  [X30] is used for the return address, and can also be used as temporary.
+  [X16] can be used as temporary. *)
+
+Definition preg_of (r: mreg) : preg :=
+  match r with
+  | R0 => X0   | R1 => X1   | R2 => X2   | R3 => X3
+  | R4 => X4   | R5 => X5   | R6 => X6   | R7 => X7
+  | R8 => X8   | R9 => X9   | R10 => X10 | R11 => X11
+  | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15
+  | R17 => X17 | R19 => X19
+  | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23
+  | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27
+  | R28 => X28 | R29 => X29
+  | F0 => D0   | F1 => D1   | F2 => D2   | F3 => D3
+  | F4 => D4   | F5 => D5   | F6 => D6   | F7 => D7
+  | F8 => D8   | F9 => D9   | F10 => D10 | F11 => D11
+  | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15
+  | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19
+  | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23
+  | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27
+  | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31
+  end.
+
+(** Undefine all registers except SP and callee-save registers *)
+
+Definition undef_caller_save_regs (rs: regset) : regset :=
+  fun r =>
+    if preg_eq r SP
+    || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs))
+    then rs r
+    else Vundef.
+
+(** Extract the values of the arguments of an external call.
+    We exploit the calling conventions from module [Conventions], except that
+    we use AArch64 registers instead of locations. *)
+
+Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
+  | extcall_arg_reg: forall r,
+      extcall_arg rs m (R r) (rs (preg_of r))
+  | extcall_arg_stack: forall ofs ty bofs v,
+      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
+      Mem.loadv (chunk_of_type ty) m
+                (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
+      extcall_arg rs m (Locations.S Outgoing ofs ty) v.
+
+Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
+  | extcall_arg_one: forall l v,
+      extcall_arg rs m l v ->
+      extcall_arg_pair rs m (One l) v
+  | extcall_arg_twolong: forall hi lo vhi vlo,
+      extcall_arg rs m hi vhi ->
+      extcall_arg rs m lo vlo ->
+      extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo).
+
+Definition extcall_arguments
+    (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
+  list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args.
+
+Definition loc_external_result (sg: signature) : rpair preg :=
+  map_rpair preg_of (loc_result sg).
+
+(** Execution of the instruction at [rs#PC]. *)
+
+Inductive state: Type :=
+  | State: regset -> mem -> state.
+
+Inductive step: state -> trace -> state -> Prop :=
+  | exec_step_internal:
+      forall b ofs f i rs m rs' m',
+      rs PC = Vptr b ofs ->
+      Genv.find_funct_ptr ge b = Some (Internal f) ->
+      find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some i ->
+      exec_instr f i rs m = Next rs' m' ->
+      step (State rs m) E0 (State rs' m')
+  | exec_step_builtin:
+      forall b ofs f ef args res rs m vargs t vres rs' m',
+      rs PC = Vptr b ofs ->
+      Genv.find_funct_ptr ge b = Some (Internal f) ->
+      find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some (Pbuiltin ef args res) ->
+      eval_builtin_args ge rs rs#SP m args vargs ->
+      external_call ef ge vargs m t vres m' ->
+      rs' = nextinstr
+              (set_res res vres
+                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+      step (State rs m) t (State rs' m')
+  | exec_step_external:
+      forall b ef args res rs m t rs' m',
+      rs PC = Vptr b Ptrofs.zero ->
+      Genv.find_funct_ptr ge b = Some (External ef) ->
+      external_call ef ge args m t res m' ->
+      extcall_arguments rs m (ef_sig ef) args ->
+      rs' = (set_pair (loc_external_result (ef_sig ef)) res (undef_caller_save_regs rs)) #PC <- (rs RA) ->
+      step (State rs m) t (State rs' m').
+
+End RELSEM.
+
+(** Execution of whole programs. *)
+
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall m0,
+      Genv.init_mem p = Some m0 ->
+      let ge := Genv.globalenv p in
+      let rs0 :=
+        (Pregmap.init Vundef)
+        # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero)
+        # RA <- Vnullptr
+        # SP <- Vnullptr in
+      initial_state p (State rs0 m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall rs m r,
+      rs#PC = Vnullptr ->
+      rs#X0 = Vint r ->
+      final_state (State rs m) r.
+
+Definition semantics (p: program) :=
+  Semantics step (initial_state p) final_state (Genv.globalenv p).
+
+(** Determinacy of the [Asm] semantics. *)
+
+Remark extcall_arguments_determ:
+  forall rs m sg args1 args2,
+  extcall_arguments rs m sg args1 -> extcall_arguments rs m sg args2 -> args1 = args2.
+Proof.
+  intros until m.
+  assert (A: forall l v1 v2,
+             extcall_arg rs m l v1 -> extcall_arg rs m l v2 -> v1 = v2).
+  { intros. inv H; inv H0; congruence. }
+  assert (B: forall p v1 v2,
+             extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2).
+  { intros. inv H; inv H0.
+    eapply A; eauto.
+    f_equal; eapply A; eauto. }
+  assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 ->
+             forall vl2, list_forall2 (extcall_arg_pair rs m) ll vl2 -> vl1 = vl2).
+  {
+    induction 1; intros vl2 EA; inv EA.
+    auto.
+    f_equal; eauto. }
+  intros. eapply C; eauto.
+Qed.
+
+Lemma semantics_determinate: forall p, determinate (semantics p).
+Proof.
+Ltac Equalities :=
+  match goal with
+  | [ H1: ?a = ?b, H2: ?a = ?c |- _ ] =>
+      rewrite H1 in H2; inv H2; Equalities
+  | _ => idtac
+  end.
+  intros; constructor; simpl; intros.
+- (* determ *)
+  inv H; inv H0; Equalities.
+  split. constructor. auto.
+  discriminate.
+  discriminate.
+  assert (vargs0 = vargs) by (eapply eval_builtin_args_determ; eauto). subst vargs0.
+  exploit external_call_determ. eexact H5. eexact H11. intros [A B].
+  split. auto. intros. destruct B; auto. subst. auto.
+  assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
+  exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+  split. auto. intros. destruct B; auto. subst. auto.
+- (* trace length *)
+  red; intros. inv H; simpl.
+  omega.
+  eapply external_call_trace_length; eauto.
+  eapply external_call_trace_length; eauto.
+- (* initial states *)
+  inv H; inv H0. f_equal. congruence.
+- (* final no step *)
+  inv H. red; intros; red; intros. inv H; rewrite H0 in *; discriminate.
+- (* final states *)
+  inv H; inv H0. congruence.
+Qed.
+
+(** Classification functions for processor registers (used in Asmgenproof). *)
+
+Definition data_preg (r: preg) : bool :=
+  match r with
+  | IR X16 => false
+  | IR X30 => false
+  | IR _ => true
+  | FR _ => true
+  | CR _ => false
+  | SP => true
+  | PC => false
+  end.
diff --git a/aarch64/AsmToJSON.ml b/aarch64/AsmToJSON.ml
new file mode 100644
index 00000000..b7cfc152
--- /dev/null
+++ b/aarch64/AsmToJSON.ml
@@ -0,0 +1,24 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Functions to serialize AArch64 Asm to JSON *)
+
+(* Dummy function *)
+
+let destination: string option ref = ref None
+
+let sdump_folder = ref ""
+
+let print_if prog sourcename =
+  ()
+
+let pp_mnemonics pp = ()
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
new file mode 100644
index 00000000..471ad501
--- /dev/null
+++ b/aarch64/Asmexpand.ml
@@ -0,0 +1,453 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Expanding built-ins and some pseudo-instructions by rewriting
+   of the AArch64 assembly code. *)
+
+open Asm
+open Asmexpandaux
+open AST
+open Camlcoq
+module Ptrofs = Integers.Ptrofs
+
+exception Error of string
+
+(* Useful constants *)
+
+let _0  = Z.zero
+let _1  = Z.one
+let _2  = Z.of_sint 2
+let _4  = Z.of_sint 4
+let _8  = Z.of_sint 8
+let _16  = Z.of_sint 16
+let _m1 = Z.of_sint (-1)
+
+(* Emit instruction sequences that set or offset a register by a constant. *)
+
+let expand_loadimm32 (dst: ireg) n =
+  List.iter emit (Asmgen.loadimm32 dst n [])
+
+let expand_addimm64 (dst: iregsp) (src: iregsp) n =
+  List.iter emit (Asmgen.addimm64 dst src n [])
+
+let expand_storeptr (src: ireg) (base: iregsp) ofs =
+  List.iter emit (Asmgen.storeptr src base ofs [])
+
+(* Handling of varargs *)
+
+(* Determine the number of int registers, FP registers, and stack locations
+   used to pass the fixed parameters. *)
+
+let rec next_arg_locations ir fr stk = function
+  | [] ->
+      (ir, fr, stk)
+  | (Tint | Tlong | Tany32 | Tany64) :: l ->
+      if ir < 8
+      then next_arg_locations (ir + 1) fr stk l
+      else next_arg_locations ir fr (stk + 8) l
+  | (Tfloat | Tsingle) :: l ->
+      if fr < 8
+      then next_arg_locations ir (fr + 1) stk l
+      else next_arg_locations ir fr (stk + 8) l
+
+(* Allocate memory on the stack and use it to save the registers
+   used for parameter passing.  As an optimization, do not save
+   the registers used to pass the fixed parameters. *)
+
+let int_param_regs = [| X0; X1; X2; X3; X4; X5; X6; X7 |]
+let float_param_regs = [| D0; D1; D2; D3; D4; D5; D6; D7 |]
+let size_save_register_area = 8*8 + 8*16
+
+let save_parameter_registers ir fr =
+  emit (Psubimm(X, XSP, XSP, Z.of_uint size_save_register_area));
+  let i = ref ir in
+  while !i < 8 do
+    let pos = 8*16 + !i*8 in
+    if !i land 1 = 0 then begin
+      emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1),
+                 ADimm(XSP, Z.of_uint pos)));
+      i := !i + 2
+    end else begin
+      emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos)));
+      i := !i + 1
+    end
+  done;
+  for i = fr to 7 do
+    let pos = i*16 in
+    emit (Pstrd(float_param_regs.(i), ADimm(XSP, Z.of_uint pos)))
+  done
+
+(* Initialize a va_list as per va_start.
+   Register r points to the following struct:
+
+   typedef struct __va_list {
+     void *__stack;             // next stack parameter
+     void *__gr_top;            // top of the save area for int regs
+     void *__vr_top;            // top of the save area for float regs
+     int__gr_offs;              // offset from gr_top to next int reg
+     int__vr_offs;              // offset from gr_top to next FP reg
+   }
+*)
+
+let current_function_stacksize = ref 0L
+
+let expand_builtin_va_start r =
+  if not (is_current_function_variadic ()) then
+    invalid_arg "Fatal error: va_start used in non-vararg function";
+  let (ir, fr, stk) =
+    next_arg_locations 0 0 0 (get_current_function_args ()) in
+  let stack_ofs = Int64.(add !current_function_stacksize (of_int stk))
+  and gr_top_ofs = !current_function_stacksize
+  and vr_top_ofs = Int64.(sub !current_function_stacksize 64L)
+  and gr_offs = - ((8 - ir) * 8)
+  and vr_offs = - ((8 - fr) * 16) in
+  (* va->__stack = sp + stack_ofs *)
+  expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 stack_ofs);
+  emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 0L)));
+  (* va->__gr_top = sp + gr_top_ofs *)
+  if gr_top_ofs <> stack_ofs then
+    expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 gr_top_ofs);
+  emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 8L)));
+  (* va->__vr_top = sp + vr_top_ofs *)
+  expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 vr_top_ofs);
+  emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 16L)));
+  (* va->__gr_offs = gr_offs *)
+  expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int gr_offs));
+  emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 24L)));
+  (* va->__vr_offs = vr_offs *)
+  expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int vr_offs));
+  emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 28L)))
+
+(* Handling of annotations *)
+
+let expand_annot_val kind txt targ args res =
+  emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
+  match args, res with
+  | [BA(IR src)], BR(IR dst) ->
+     if dst <> src then emit (Pmov (RR1 dst, RR1 src))
+  | [BA(FR src)], BR(FR dst) ->
+     if dst <> src then emit (Pfmov (dst, src))
+  | _, _ ->
+     raise (Error "ill-formed __builtin_annot_val")
+
+(* Handling of memcpy *)
+
+(* We assume unaligned memory accesses are efficient.  Hence we use
+   memory accesses as wide as we can, up to 16 bytes.
+   Temporary registers used: x15 x16 x17 x29 x30. *)
+
+let offset_in_range ofs =
+  (* The 512 upper bound comes from ldp/stp.  Single-register load/store
+     instructions support bigger offsets. *)
+  let ofs = Z.to_int64 ofs in 0L <= ofs && ofs < 512L
+  
+let memcpy_small_arg sz arg tmp =
+  match arg with
+  | BA (IR r) ->
+      (RR1 r, _0)
+  | BA_addrstack ofs ->
+      if offset_in_range ofs
+      && offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz)))
+      then (XSP, ofs)
+      else begin expand_addimm64 (RR1 tmp) XSP ofs; (RR1 tmp, _0) end
+  | _ ->
+      assert false
+
+let expand_builtin_memcpy_small sz al src dst =
+  let (tsrc, tdst) =
+    if dst <> BA (IR X17) then (X17, X29) else (X29, X17) in
+  let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
+  let (rdst, odst) = memcpy_small_arg sz dst tdst in
+  let rec copy osrc odst sz =
+    if sz >= 16 then begin
+      emit (Pldp(X16, X30, ADimm(rsrc, osrc)));
+      emit (Pstp(X16, X30, ADimm(rdst, odst)));
+      copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
+    end
+    else if sz >= 8 then begin
+      emit (Pldrx(X16, ADimm(rsrc, osrc)));
+      emit (Pstrx(X16, ADimm(rdst, odst)));
+      copy (Ptrofs.add osrc _8) (Ptrofs.add odst _8) (sz - 8)
+    end
+    else if sz >= 4 then begin
+      emit (Pldrw(X16, ADimm(rsrc, osrc)));
+      emit (Pstrw(X16, ADimm(rdst, odst)));
+      copy (Ptrofs.add osrc _4) (Ptrofs.add odst _4) (sz - 4)
+    end
+    else if sz >= 2 then begin
+      emit (Pldrh(W, X16, ADimm(rsrc, osrc)));
+      emit (Pstrh(X16, ADimm(rdst, odst)));
+      copy (Ptrofs.add osrc _2) (Ptrofs.add odst _2) (sz - 2)
+    end
+    else if sz >= 1 then begin
+      emit (Pldrb(W, X16, ADimm(rsrc, osrc)));
+      emit (Pstrb(X16, ADimm(rdst, odst)));
+      copy (Ptrofs.add osrc _1) (Ptrofs.add odst _1) (sz - 1)
+    end
+  in copy osrc odst sz
+
+let memcpy_big_arg arg tmp =
+  match arg with
+  | BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r))
+  | BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs
+  | _ -> assert false
+
+let expand_builtin_memcpy_big sz al src dst =
+  assert (sz >= 16);
+  memcpy_big_arg src X30;
+  memcpy_big_arg dst X29;
+  let lbl = new_label () in
+  expand_loadimm32 X15 (Z.of_uint (sz / 16));
+  emit (Plabel lbl);
+  emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16)));
+  emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16)));
+  emit (Psubimm(W, RR1 X15, RR1 X15, _1));
+  emit (Pcbnz(W, X15, lbl));
+  if sz mod 16 >= 8 then begin
+    emit (Pldrx(X16, ADpostincr(RR1 X30, _8)));
+    emit (Pstrx(X16, ADpostincr(RR1 X29, _8)))
+  end;
+  if sz mod 8 >= 4 then begin
+    emit (Pldrw(X16, ADpostincr(RR1 X30, _4)));
+    emit (Pstrw(X16, ADpostincr(RR1 X29, _4)))
+  end;
+  if sz mod 4 >= 2 then begin
+    emit (Pldrh(W, X16, ADpostincr(RR1 X30, _2)));
+    emit (Pstrh(X16, ADpostincr(RR1 X29, _2)))
+  end;
+  if sz mod 2 >= 1 then begin
+    emit (Pldrb(W, X16, ADpostincr(RR1 X30, _1)));
+    emit (Pstrb(X16, ADpostincr(RR1 X29, _1)))
+  end
+
+let expand_builtin_memcpy  sz al args =
+  let (dst, src) =
+    match args with [d; s] -> (d, s) | _ -> assert false in
+  if sz < 64
+  then expand_builtin_memcpy_small sz al src dst
+  else expand_builtin_memcpy_big sz al src dst
+
+(* Handling of volatile reads and writes *)
+
+let expand_builtin_vload_common chunk base ofs res =
+  let addr = ADimm(base, ofs) in
+  match chunk, res with
+  | Mint8unsigned, BR(IR res) ->
+     emit (Pldrb(W, res, addr))
+  | Mint8signed, BR(IR res) ->
+     emit (Pldrsb(W, res, addr))
+  | Mint16unsigned, BR(IR res) ->
+     emit (Pldrh(W, res, addr))
+  | Mint16signed, BR(IR res) ->
+     emit (Pldrsh(W, res, addr))
+  | Mint32, BR(IR res) ->
+     emit (Pldrw(res, addr))
+  | Mint64, BR(IR res) ->
+     emit (Pldrx(res, addr))
+  | Mfloat32, BR(FR res) ->
+     emit (Pldrs(res, addr))
+  | Mfloat64, BR(FR res) ->
+     emit (Pldrd(res, addr))
+  | _ ->
+     assert false
+
+let expand_builtin_vload chunk args res =
+  match args with
+  | [BA(IR addr)] ->
+      expand_builtin_vload_common chunk (RR1 addr) _0 res
+  | [BA_addrstack ofs] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vload_common chunk XSP ofs res
+      else begin
+        expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
+        expand_builtin_vload_common chunk (RR1 X16) _0 res
+      end
+  | [BA_addptr(BA(IR addr), BA_long ofs)] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vload_common chunk (RR1 addr) ofs res
+      else begin
+        expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+        expand_builtin_vload_common chunk (RR1 X16) _0 res
+      end
+  | _ ->
+      assert false
+
+let expand_builtin_vstore_common chunk base ofs src =
+  let addr = ADimm(base, ofs) in
+  match chunk, src with
+  | (Mint8signed | Mint8unsigned), BA(IR src) ->
+     emit (Pstrb(src, addr))
+  | (Mint16signed | Mint16unsigned), BA(IR src) ->
+     emit (Pstrh(src, addr))
+  | Mint32, BA(IR src) ->
+     emit (Pstrw(src, addr))
+  | Mint64, BA(IR src) ->
+     emit (Pstrx(src, addr))
+  | Mfloat32, BA(FR src) ->
+     emit (Pstrs(src, addr))
+  | Mfloat64, BA(FR src) ->
+     emit (Pstrd(src, addr))
+  | _ ->
+     assert false
+
+let expand_builtin_vstore chunk args =
+  match args with
+  | [BA(IR addr); src] ->
+      expand_builtin_vstore_common chunk (RR1 addr) _0 src
+  | [BA_addrstack ofs; src] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vstore_common chunk XSP ofs src
+      else begin
+        expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
+        expand_builtin_vstore_common chunk (RR1 X16) _0 src
+      end
+  | [BA_addptr(BA(IR addr), BA_long ofs); src] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vstore_common chunk (RR1 addr) ofs src
+      else begin
+        expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+        expand_builtin_vstore_common chunk (RR1 X16) _0 src
+      end
+  | _ ->
+      assert false
+
+(* Handling of compiler-inlined builtins *)
+
+let expand_builtin_inline name args res =
+  match name, args, res with
+  (* Synchronization *)
+  | "__builtin_membar", [], _ ->
+     ()
+  | "__builtin_nop", [], _ ->
+     emit Pnop
+  (* Byte swap *)
+  | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
+     emit (Prev(W, res, a1))
+  | "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
+     emit (Prev(X, res, a1))
+  | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+     emit (Prev16(W, res, a1));
+     emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
+  (* Count leading zeros and leading sign bits *)
+  | "__builtin_clz",  [BA(IR a1)], BR(IR res) ->
+     emit (Pclz(W, res, a1))
+  | ("__builtin_clzl" | "__builtin_clzll"),  [BA(IR a1)], BR(IR res) ->
+     emit (Pclz(X, res, a1)) 
+  | "__builtin_cls",  [BA(IR a1)], BR(IR res) ->
+     emit (Pcls(W, res, a1))
+  | ("__builtin_clsl" | "__builtin_clsll"),  [BA(IR a1)], BR(IR res) ->
+     emit (Pcls(X, res, a1))
+ (* Float arithmetic *)
+  | "__builtin_fabs",  [BA(FR a1)], BR(FR res) ->
+     emit (Pfabs(D, res, a1))
+  | "__builtin_fsqrt",  [BA(FR a1)], BR(FR res) ->
+     emit (Pfsqrt(D, res, a1))
+  | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfmadd(D, res, a1, a2, a3))
+  | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfmsub(D, res, a1, a2, a3))
+  | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfnmadd(D, res, a1, a2, a3))
+  | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfnmsub(D, res, a1, a2, a3))
+  (* Vararg *)
+  | "__builtin_va_start", [BA(IR a)], _ ->
+      expand_builtin_va_start a
+  (* Catch-all *)
+  | _ ->
+     raise (Error ("unrecognized builtin " ^ name))
+
+(* Expansion of instructions *)
+
+let expand_instruction instr =
+  match instr with
+  | Pallocframe (sz, ofs) ->
+      emit (Pmov (RR1 X29, XSP));
+      if is_current_function_variadic() then begin
+        let (ir, fr, _) =
+          next_arg_locations 0 0 0 (get_current_function_args ()) in
+        save_parameter_registers ir fr;
+        current_function_stacksize :=
+          Int64.(add (Z.to_int64 sz) (of_int size_save_register_area))
+      end else begin
+        current_function_stacksize := Z.to_int64 sz
+      end;
+      expand_addimm64 XSP XSP (Ptrofs.repr (Z.neg sz));
+      expand_storeptr X29 XSP ofs
+  | Pfreeframe (sz, ofs) ->
+      expand_addimm64 XSP XSP (coqint_of_camlint64 !current_function_stacksize)
+  | Pcvtx2w rd ->
+      (* no code generated, the upper 32 bits of rd will be ignored *)
+      ()
+  | Pbuiltin (ef,args,res) ->
+     begin match ef with
+     | EF_builtin (name,sg) ->
+        expand_builtin_inline (camlstring_of_coqstring name) args res
+     | EF_vload chunk ->
+        expand_builtin_vload chunk args res
+     | EF_vstore chunk ->
+        expand_builtin_vstore chunk args
+     | EF_annot_val (kind,txt,targ) ->
+        expand_annot_val kind txt targ args res
+     | EF_memcpy(sz, al) ->
+        expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
+     | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+        emit instr
+     | _ ->
+        assert false
+     end
+  | _ ->
+     emit instr
+
+let int_reg_to_dwarf = function
+  | X0 -> 0 | X1 -> 1 | X2 -> 2 | X3 -> 3 | X4 -> 4
+  | X5 -> 5 | X6 -> 6 | X7 -> 7 | X8 -> 8 | X9 -> 9
+  | X10 -> 10 | X11 -> 11 | X12 -> 12 | X13 -> 13 | X14 -> 14
+  | X15 -> 15 | X16 -> 16 | X17 -> 17 | X18 -> 18 | X19 -> 19
+  | X20 -> 20 | X21 -> 21 | X22 -> 22 | X23 -> 23 | X24 -> 24
+  | X25 -> 25 | X26 -> 26 | X27 -> 27 | X28 -> 28 | X29 -> 29
+  | X30 -> 30
+
+let float_reg_to_dwarf = function
+  | D0 -> 64 | D1 -> 65 | D2 -> 66 | D3 -> 67 | D4 -> 68
+  | D5 -> 69 | D6 -> 70 | D7 -> 71 | D8 -> 72 | D9 -> 73
+  | D10 -> 74 | D11 -> 75 | D12 -> 76 | D13 -> 77 | D14 -> 78
+  | D15 -> 79 | D16 -> 80 | D17 -> 81 | D18 -> 82 | D19 -> 83
+  | D20 -> 84 | D21 -> 85 | D22 -> 86 | D23 -> 87 | D24 -> 88
+  | D25 -> 89 | D26 -> 90 | D27 -> 91 | D28 -> 92 | D29 -> 93
+  | D30 -> 94 | D31 -> 95
+
+let preg_to_dwarf = function
+   | IR r -> int_reg_to_dwarf r
+   | FR r -> float_reg_to_dwarf r
+   | SP -> 31
+   | _ -> assert false
+
+let expand_function id fn =
+  try
+    set_current_function fn;
+    expand id (* sp= *) 31 preg_to_dwarf expand_instruction fn.fn_code;
+    Errors.OK (get_current_function ())
+  with Error s ->
+    Errors.Error (Errors.msg (coqstring_of_camlstring s))
+
+let expand_fundef id = function
+  | Internal f ->
+      begin match expand_function id f with
+      | Errors.OK tf -> Errors.OK (Internal tf)
+      | Errors.Error msg -> Errors.Error msg
+      end
+  | External ef ->
+      Errors.OK (External ef)
+
+let expand_program (p: Asm.program) : Asm.program Errors.res =
+  AST.transform_partial_program2 expand_fundef (fun id v -> Errors.OK v) p
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
new file mode 100644
index 00000000..024c9a17
--- /dev/null
+++ b/aarch64/Asmgen.v
@@ -0,0 +1,1172 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Translation from Mach to AArch64. *)
+
+Require Import Recdef Coqlib Zwf Zbits.
+Require Import Errors AST Integers Floats Op.
+Require Import Locations Mach Asm.
+
+Local Open Scope string_scope.
+Local Open Scope list_scope.
+Local Open Scope error_monad_scope.
+
+(** Alignment check for symbols *)
+
+Parameter symbol_is_aligned : ident -> Z -> bool.
+(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *)
+
+(** Extracting integer or float registers. *)
+
+Definition ireg_of (r: mreg) : res ireg :=
+  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgen.ireg_of") end.
+
+Definition freg_of (r: mreg) : res freg :=
+  match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end.
+
+(** Recognition of immediate arguments for logical integer operations.*)
+
+(** Valid immediate arguments are repetitions of a bit pattern [B]
+  of length [e] = 2, 4, 8, 16, 32 or 64.
+  The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*]
+  but must not be all zeros or all ones. *)
+
+(** The following automaton recognizes [0*1*0*|1*0*1*].
+<<
+               0          1          0
+              / \        / \        / \
+              \ /        \ /        \ /
+        -0--> [B] --1--> [D] --0--> [F]
+       /
+     [A]
+       \
+        -1--> [C] --0--> [E] --1--> [G]
+              / \        / \        / \
+              \ /        \ /        \ /
+               1          0          1
+>>
+*)
+
+Module Automaton.
+
+Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad.
+
+Definition start := SA.
+
+Definition next (s: state) (b: bool) :=
+  match s, b with
+    | SA,false => SB      | SA,true => SC
+    | SB,false => SB      | SB,true => SD
+    | SC,false => SE      | SC,true => SC
+    | SD,false => SF      | SD,true => SD
+    | SE,false => SE      | SE,true => SG
+    | SF,false => SF      | SF,true => Sbad
+    | SG,false => Sbad    | SG,true => SG
+    | Sbad,_ => Sbad
+  end.
+
+Definition accepting (s: state) :=
+  match s with
+  | SA | SB | SC | SD | SE | SF | SG => true
+  | Sbad => false
+  end.
+
+Fixpoint run (len: nat) (s: state) (x: Z) : bool :=
+  match len with
+  | Datatypes.O => accepting s
+  | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x)
+  end.
+
+End Automaton.
+
+(** The following function determines the candidate length [e],
+    ensuring that [x] is a repetition [BB...B] 
+    of a bit pattern [B] of length [e]. *)
+
+Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat :=
+  (** [test n] checks that the low [2n] bits of [x] are of the
+      form [BB], that is, two occurrences of the same [n] bits *)
+  let test (n: Z) : bool :=
+    Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in
+  (** If [test n] fails, we know that the candidate length [e] is
+      at least [2n].  Hence we test with decreasing values of [n]:
+      32, 16, 8, 4, 2. *)
+  if sixtyfour && negb (test 32) then 64%nat
+  else if negb (test 16) then 32%nat
+  else if negb (test 8) then 16%nat
+  else if negb (test 4) then 8%nat
+  else if negb (test 2) then 4%nat
+  else 2%nat.
+
+(** A valid logical immediate is 
+- neither [0] nor [-1];
+- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
+- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*].
+*)
+
+Definition is_logical_imm32 (x: int) : bool :=
+  negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) &&
+  Automaton.run (logical_imm_length (Int.unsigned x) false)
+                Automaton.start (Int.unsigned x).
+
+Definition is_logical_imm64 (x: int64) : bool :=
+  negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) &&
+  Automaton.run (logical_imm_length (Int64.unsigned x) true)
+                Automaton.start (Int64.unsigned x).
+
+(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *)
+
+Definition is_arith_imm32 (x: int) : bool :=
+  Int.eq x (Int.zero_ext 12 x)
+  || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)).
+
+Definition is_arith_imm64 (x: int64) : bool :=
+  Int64.eq x (Int64.zero_ext 12 x)
+  || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)).
+
+(** Decompose integer literals into 16-bit fragments *)
+
+Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) :=
+  match N with
+  | Datatypes.O => nil
+  | Datatypes.S N =>
+    let frag := Zzero_ext 16 (Z.shiftr n p) in
+    if Z.eqb frag 0 then
+      decompose_int N n (p + 16)
+    else
+      (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16)
+  end.
+
+Definition negate_decomposition (l: list (Z * Z)) :=
+  List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l.
+
+Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
+  List.fold_right (fun np k => Pmovk sz rd (fst np) (snd np) :: k) k l.
+
+Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
+  match l with
+  | nil => Pmovz sz rd 0 0 :: k
+  | (n1, p1) :: l => Pmovz sz rd n1 p1 :: loadimm_k sz rd l k
+  end.
+
+Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
+  match l with
+  | nil => Pmovn sz rd 0 0 :: k
+  | (n1, p1) :: l => Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k
+  end.
+
+Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code :=
+  let N := match sz with W => 2%nat | X => 4%nat end in
+  let dz := decompose_int N n 0 in
+  let dn := decompose_int N (Z.lnot n) 0 in
+  if Nat.leb (List.length dz) (List.length dn)
+  then loadimm_z sz rd dz k
+  else loadimm_n sz rd dn k.
+
+Definition loadimm32 (rd: ireg) (n: int) (k: code) : code :=
+  if is_logical_imm32 n
+  then Porrimm W rd XZR (Int.unsigned n) :: k
+  else loadimm W rd (Int.unsigned n) k.
+
+Definition loadimm64 (rd: ireg) (n: int64) (k: code) : code :=
+  if is_logical_imm64 n
+  then Porrimm X rd XZR (Int64.unsigned n) :: k
+  else loadimm X rd (Int64.unsigned n) k.
+
+(** Add immediate *)
+
+Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction)
+                        (rd r1: iregsp) (n: Z) (k: code) :=
+  let nlo := Zzero_ext 12 n in
+  let nhi := n - nlo in
+  if Z.eqb nhi 0 then
+    insn rd r1 nlo :: k
+  else if Z.eqb nlo 0 then
+    insn rd r1 nhi :: k
+  else
+    insn rd r1 nhi :: insn rd rd nlo :: k.
+
+Definition addimm32 (rd r1: ireg) (n: int) (k: code) : code :=
+  let m := Int.neg n in
+  if Int.eq n (Int.zero_ext 24 n) then
+    addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k
+  else if Int.eq m (Int.zero_ext 24 m) then
+    addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k
+  else if Int.lt n Int.zero then
+    loadimm32 X16 m (Psub W rd r1 X16 SOnone :: k)
+  else
+    loadimm32 X16 n (Padd W rd r1 X16 SOnone :: k).
+
+Definition addimm64 (rd r1: iregsp) (n: int64) (k: code) : code :=
+  let m := Int64.neg n in
+  if Int64.eq n (Int64.zero_ext 24 n) then
+    addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k
+  else if Int64.eq m (Int64.zero_ext 24 m) then
+    addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k
+  else if Int64.lt n Int64.zero then
+    loadimm64 X16 m (Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
+  else
+    loadimm64 X16 n (Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
+
+(** Logical immediate *)
+
+Definition logicalimm32
+              (insn1: ireg -> ireg0 -> Z -> instruction)
+              (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+              (rd r1: ireg) (n: int) (k: code) : code :=
+  if is_logical_imm32 n
+  then insn1 rd r1 (Int.unsigned n) :: k
+  else loadimm32 X16 n (insn2 rd r1 X16 SOnone :: k).
+
+Definition logicalimm64
+              (insn1: ireg -> ireg0 -> Z -> instruction)
+              (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+              (rd r1: ireg) (n: int64) (k: code) : code :=
+  if is_logical_imm64 n
+  then insn1 rd r1 (Int64.unsigned n) :: k
+  else loadimm64 X16 n (insn2 rd r1 X16 SOnone :: k).
+
+(** Sign- or zero-extended arithmetic *)
+
+Definition transl_extension (ex: extension) (a: int) : extend_op :=
+  match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end.
+
+Definition move_extended_base
+              (rd: ireg) (r1: ireg) (ex: extension) (k: code) : code :=
+  match ex with
+  | Xsgn32 => Pcvtsw2x rd r1 :: k
+  | Xuns32 => Pcvtuw2x rd r1 :: k
+  end.
+
+Definition move_extended
+              (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: code) : code :=
+  if Int.eq a Int.zero then
+    move_extended_base rd r1 ex k
+  else
+    move_extended_base rd r1 ex (Padd X rd XZR rd (SOlsl a) :: k).
+
+Definition arith_extended 
+              (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
+              (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+              (rd r1 r2: ireg) (ex: extension) (a: int) (k: code) : code :=
+  if Int.ltu a (Int.repr 5) then
+    insnX rd r1 r2 (transl_extension ex a) :: k
+  else
+    move_extended_base X16 r2 ex (insnS rd r1 X16 (SOlsl a) :: k).
+
+(** Extended right shift *)
+
+Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code :=
+  if Int.eq n Int.zero then
+    Pmov rd r1 :: k
+  else if Int.eq n Int.one then
+         Padd W X16 r1 r1 (SOlsr (Int.repr 31)) ::
+         Porr W rd XZR X16 (SOasr n) :: k
+       else
+         Porr W X16 XZR r1 (SOasr (Int.repr 31)) ::
+         Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) ::
+         Porr W rd XZR X16 (SOasr n) :: k.
+
+Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code :=
+  if Int.eq n Int.zero then
+    Pmov rd r1 :: k
+  else if Int.eq n Int.one then
+         Padd X X16 r1 r1 (SOlsr (Int.repr 63)) ::
+         Porr X rd XZR X16 (SOasr n) :: k
+       else
+         Porr X X16 XZR r1 (SOasr (Int.repr 63)) ::
+         Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) ::
+         Porr X rd XZR X16 (SOasr n) :: k.
+
+(** Load the address [id + ofs] in [rd] *)
+
+Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: code) : code :=
+  if Archi.pic_code tt then
+    if Ptrofs.eq ofs Ptrofs.zero then
+      Ploadsymbol rd id :: k
+    else
+      Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k
+  else
+    Padrp rd id ofs :: Paddadr rd rd id ofs :: k.
+
+(** Translate a shifted operand *)
+
+Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op :=
+  match s with
+  | Slsl => SOlsl a
+  | Slsr => SOlsr a
+  | Sasr => SOasr a
+  | Sror => SOror a
+  end.
+
+(** Translation of a condition.  Prepends to [k] the instructions
+  that evaluate the condition and leave its boolean result in one of
+  the bits of the condition register.  The bit in question is
+  determined by the [crbit_for_cond] function. *)
+
+Definition transl_cond
+              (cond: condition) (args: list mreg) (k: code) :=
+  match cond, args with
+  | (Ccomp c | Ccompu c), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp W r1 r2 SOnone :: k)
+  | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp W r1 r2 (transl_shift s a) :: k)
+  | (Ccompimm c n | Ccompuimm c n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_arith_imm32 n then
+            Pcmpimm W r1 (Int.unsigned n) :: k
+          else if is_arith_imm32 (Int.neg n) then
+            Pcmnimm W r1 (Int.unsigned (Int.neg n)) :: k
+          else
+            loadimm32 X16 n (Pcmp W r1 X16 SOnone :: k))
+  | (Cmaskzero n | Cmasknotzero n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_logical_imm32 n then
+            Ptstimm W r1 (Int.unsigned n) :: k
+          else
+            loadimm32 X16 n (Ptst W r1 X16 SOnone :: k))
+  | (Ccompl c | Ccomplu c), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp X r1 r2 SOnone :: k)
+  | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp X r1 r2 (transl_shift s a) :: k)
+  | (Ccomplimm c n | Ccompluimm c n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_arith_imm64 n then
+            Pcmpimm X r1 (Int64.unsigned n) :: k
+          else if is_arith_imm64 (Int64.neg n) then
+            Pcmnimm X r1 (Int64.unsigned (Int64.neg n)) :: k
+          else
+            loadimm64 X16 n (Pcmp X r1 X16 SOnone :: k))
+  | (Cmasklzero n | Cmasklnotzero n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_logical_imm64 n then
+            Ptstimm X r1 (Int64.unsigned n) :: k
+          else
+            loadimm64 X16 n (Ptst X r1 X16 SOnone :: k))
+  | Ccompf cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp D r1 r2 :: k)
+  | Cnotcompf cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp D r1 r2 :: k)
+  | Ccompfzero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 D r1 :: k)
+  | Cnotcompfzero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 D r1 :: k)
+  | Ccompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp S r1 r2 :: k)
+  | Cnotcompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp S r1 r2 :: k)
+  | Ccompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 S r1 :: k)
+  | Cnotcompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 S r1 :: k)
+  | _, _ =>
+      Error(msg "Asmgen.transl_cond")
+  end.
+
+Definition cond_for_signed_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCeq
+  | Cne => TCne
+  | Clt => TClt
+  | Cle => TCle
+  | Cgt => TCgt
+  | Cge => TCge
+  end.
+
+Definition cond_for_unsigned_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCeq
+  | Cne => TCne
+  | Clt => TClo
+  | Cle => TCls
+  | Cgt => TChi
+  | Cge => TChs
+  end.
+
+Definition cond_for_float_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCeq
+  | Cne => TCne
+  | Clt => TCmi
+  | Cle => TCls
+  | Cgt => TCgt
+  | Cge => TCge
+  end.
+
+Definition cond_for_float_not_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCne
+  | Cne => TCeq
+  | Clt => TCpl
+  | Cle => TChi
+  | Cgt => TCle
+  | Cge => TClt
+  end.
+
+Definition cond_for_cond (cond: condition) :=
+  match cond with
+  | Ccomp cmp => cond_for_signed_cmp cmp
+  | Ccompu cmp => cond_for_unsigned_cmp cmp
+  | Ccompshift cmp s a => cond_for_signed_cmp cmp
+  | Ccompushift cmp s a => cond_for_unsigned_cmp cmp
+  | Ccompimm cmp n => cond_for_signed_cmp cmp
+  | Ccompuimm cmp n => cond_for_unsigned_cmp cmp
+  | Cmaskzero n => TCeq
+  | Cmasknotzero n => TCne
+  | Ccompl cmp => cond_for_signed_cmp cmp
+  | Ccomplu cmp => cond_for_unsigned_cmp cmp
+  | Ccomplshift cmp s a => cond_for_signed_cmp cmp
+  | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp
+  | Ccomplimm cmp n => cond_for_signed_cmp cmp
+  | Ccompluimm cmp n => cond_for_unsigned_cmp cmp
+  | Cmasklzero n => TCeq
+  | Cmasklnotzero n => TCne
+  | Ccompf cmp => cond_for_float_cmp cmp
+  | Cnotcompf cmp => cond_for_float_not_cmp cmp
+  | Ccompfzero cmp => cond_for_float_cmp cmp
+  | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
+  | Ccompfs cmp => cond_for_float_cmp cmp
+  | Cnotcompfs cmp => cond_for_float_not_cmp cmp
+  | Ccompfszero cmp => cond_for_float_cmp cmp
+  | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
+  end.
+
+(** Translation of a conditional branch.  Prepends to [k] the instructions
+  that evaluate the condition and ranch to [lbl] if it holds.
+  We recognize some conditional branches that can be implemented
+  without setting then testing condition flags.  *)
+
+Definition transl_cond_branch_default
+              (c: condition) (args: list mreg) (lbl: label) (k: code) :=
+  transl_cond c args (Pbc (cond_for_cond c) lbl :: k).
+ 
+Definition transl_cond_branch
+              (c: condition) (args: list mreg) (lbl: label) (k: code) :=
+  match args, c with
+  | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) =>
+      if Int.eq n Int.zero
+      then (do r1 <- ireg_of a1; OK (Pcbnz W r1 lbl :: k))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) =>
+      if Int.eq n Int.zero
+      then (do r1 <- ireg_of a1; OK (Pcbz W r1 lbl :: k))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) =>
+      if Int64.eq n Int64.zero
+      then (do r1 <- ireg_of a1; OK (Pcbnz X r1 lbl :: k))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) =>
+      if Int64.eq n Int64.zero
+      then (do r1 <- ireg_of a1; OK (Pcbz X r1 lbl :: k))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, Cmaskzero n =>
+      match Int.is_power2 n with
+      | Some bit => do r1 <- ireg_of a1; OK (Ptbz W r1 bit lbl :: k)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | a1 :: nil, Cmasknotzero n =>
+      match Int.is_power2 n with
+      | Some bit => do r1 <- ireg_of a1; OK (Ptbnz W r1 bit lbl :: k)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | a1 :: nil, Cmasklzero n =>
+      match Int64.is_power2' n with
+      | Some bit => do r1 <- ireg_of a1; OK (Ptbz X r1 bit lbl :: k)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | a1 :: nil, Cmasklnotzero n =>
+      match Int64.is_power2' n with
+      | Some bit => do r1 <- ireg_of a1; OK (Ptbnz X r1 bit lbl :: k)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | _, _ =>
+      transl_cond_branch_default c args lbl k
+  end.
+  
+(** Translation of the arithmetic operation [res <- op(args)].
+    The corresponding instructions are prepended to [k]. *)
+
+Definition transl_op
+              (op: operation) (args: list mreg) (res: mreg) (k: code) :=
+  match op, args with
+  | Omove, a1 :: nil =>
+      match preg_of res, preg_of a1 with
+      | IR r, IR a => OK (Pmov r a :: k)
+      | FR r, FR a => OK (Pfmov r a :: k)
+      |  _  ,  _   => Error(msg "Asmgen.Omove")
+      end
+  | Ointconst n, nil =>
+      do rd <- ireg_of res;
+      OK (loadimm32 rd n k)
+  | Olongconst n, nil =>
+      do rd <- ireg_of res;
+      OK (loadimm64 rd n k)
+  | Ofloatconst f, nil =>
+      do rd <- freg_of res;
+      OK (if Float.eq_dec f Float.zero
+          then Pfmovi D rd XZR :: k
+          else Pfmovimmd rd f :: k)
+  | Osingleconst f, nil =>
+      do rd <- freg_of res;
+      OK (if Float32.eq_dec f Float32.zero
+          then Pfmovi S rd XZR :: k
+          else Pfmovimms rd f :: k)
+  | Oaddrsymbol id ofs, nil =>
+      do rd <- ireg_of res;
+      OK (loadsymbol rd id ofs k)
+  | Oaddrstack ofs, nil =>
+      do rd <- ireg_of res;
+      OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k)
+(** 32-bit integer arithmetic *)
+  | Oshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porr W rd XZR r1 (transl_shift s a) :: k)
+  | Oadd, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd W rd r1 r2 SOnone :: k)
+  | Oaddshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd W rd r1 r2 (transl_shift s a) :: k)
+  | Oaddimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (addimm32 rd r1 n k)
+  | Oneg, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub W rd XZR r1 SOnone :: k)
+  | Onegshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub W rd XZR r1 (transl_shift s a) :: k)
+  | Osub, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub W rd r1 r2 SOnone :: k)
+  | Osubshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub W rd r1 r2 (transl_shift s a) :: k)
+  | Omul, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pmadd W rd r1 r2 XZR :: k)
+  | Omuladd, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmadd W rd r2 r3 r1 :: k)
+  | Omulsub, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmsub W rd r2 r3 r1 :: k)
+  | Odiv, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psdiv W rd r1 r2 :: k)
+  | Odivu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pudiv W rd r1 r2 :: k)
+  | Oand, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand W rd r1 r2 SOnone :: k)
+  | Oandshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand W rd r1 r2 (transl_shift s a) :: k)
+  | Oandimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k)      
+  | Oor, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr W rd r1 r2 SOnone :: k)
+  | Oorshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr W rd r1 r2 (transl_shift s a) :: k)
+  | Oorimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k)      
+  | Oxor, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor W rd r1 r2 SOnone :: k)
+  | Oxorshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor W rd r1 r2 (transl_shift s a) :: k)
+  | Oxorimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k)      
+  | Onot, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn W rd XZR r1 SOnone :: k)
+  | Onotshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn W rd XZR r1 (transl_shift s a) :: k)
+  | Obic, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic W rd r1 r2 SOnone :: k)
+  | Obicshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic W rd r1 r2 (transl_shift s a) :: k)
+  | Oorn, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn W rd r1 r2 SOnone :: k)
+  | Oornshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn W rd r1 r2 (transl_shift s a) :: k)
+  | Oeqv, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon W rd r1 r2 SOnone :: k)
+  | Oeqvshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon W rd r1 r2 (transl_shift s a) :: k)
+  | Oshl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plslv W rd r1 r2 :: k)
+  | Oshr, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pasrv W rd r1 r2 :: k)
+  | Oshru, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plsrv W rd r1 r2 :: k)
+  | Oshrximm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (shrx32 rd r1 n k)
+  | Ozext s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz W rd r1 Int.zero s :: k)
+  | Osext s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz W rd r1 Int.zero s :: k)
+  | Oshlzext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
+  | Oshlsext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
+  | Ozextshr a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
+  | Osextshr a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
+(** 64-bit integer arithmetic *)
+  | Oshiftl s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porr X rd XZR r1 (transl_shift s a) :: k)
+  | Oextend x a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (move_extended rd r1 x a k)
+  (* [Omakelong] and [Ohighlong] should not occur *)
+  | Olowlong, a1 :: nil => 
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      assertion (ireg_eq rd r1);
+      OK (Pcvtx2w rd :: k)
+  | Oaddl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd X rd r1 r2 SOnone :: k)
+  | Oaddlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd X rd r1 r2 (transl_shift s a) :: k)
+  | Oaddlext x a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (arith_extended Paddext (Padd X) rd r1 r2 x a k)
+  | Oaddlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (addimm64 rd r1 n k)
+  | Onegl, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub X rd XZR r1 SOnone :: k)
+  | Oneglshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub X rd XZR r1 (transl_shift s a) :: k)
+  | Osubl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub X rd r1 r2 SOnone :: k)
+  | Osublshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub X rd r1 r2 (transl_shift s a) :: k)
+  | Osublext x a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (arith_extended Psubext (Psub X) rd r1 r2 x a k)
+  | Omull, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pmadd X rd r1 r2 XZR :: k)
+  | Omulladd, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmadd X rd r2 r3 r1 :: k)
+  | Omullsub, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmsub X rd r2 r3 r1 :: k)
+  | Omullhs, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psmulh rd r1 r2 :: k)
+  | Omullhu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pumulh rd r1 r2 :: k)
+  | Odivl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psdiv X rd r1 r2 :: k)
+  | Odivlu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pudiv X rd r1 r2 :: k)
+  | Oandl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand X rd r1 r2 SOnone :: k)
+  | Oandlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand X rd r1 r2 (transl_shift s a) :: k)
+  | Oandlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k)      
+  | Oorl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr X rd r1 r2 SOnone :: k)
+  | Oorlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr X rd r1 r2 (transl_shift s a) :: k)
+  | Oorlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k)      
+  | Oxorl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor X rd r1 r2 SOnone :: k)
+  | Oxorlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor X rd r1 r2 (transl_shift s a) :: k)
+  | Oxorlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k)      
+  | Onotl, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn X rd XZR r1 SOnone :: k)
+  | Onotlshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn X rd XZR r1 (transl_shift s a) :: k)
+  | Obicl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic X rd r1 r2 SOnone :: k)
+  | Obiclshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic X rd r1 r2 (transl_shift s a) :: k)
+  | Oornl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn X rd r1 r2 SOnone :: k)
+  | Oornlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn X rd r1 r2 (transl_shift s a) :: k)
+  | Oeqvl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon X rd r1 r2 SOnone :: k)
+  | Oeqvlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon X rd r1 r2 (transl_shift s a) :: k)
+  | Oshll, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plslv X rd r1 r2 :: k)
+  | Oshrl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pasrv X rd r1 r2 :: k)
+  | Oshrlu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plsrv X rd r1 r2 :: k)
+  | Oshrlximm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (shrx64 rd r1 n k)
+  | Ozextl s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz X rd r1 Int.zero s :: k)
+  | Osextl s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz X rd r1 Int.zero s :: k)
+  | Oshllzext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
+  | Oshllsext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
+  | Ozextshrl a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
+  | Osextshrl a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
+(** 64-bit floating-point arithmetic *)
+  | Onegf, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfneg D rd rs :: k)
+  | Oabsf, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfabs D rd rs :: k)
+  | Oaddf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfadd D rd rs1 rs2 :: k)
+  | Osubf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfsub D rd rs1 rs2 :: k)
+  | Omulf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfmul D rd rs1 rs2 :: k)
+  | Odivf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfdiv D rd rs1 rs2 :: k)
+(** 32-bit floating-point arithmetic *)
+  | Onegfs, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfneg S rd rs :: k)
+  | Oabsfs, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfabs S rd rs :: k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfadd S rd rs1 rs2 :: k)
+  | Osubfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfsub S rd rs1 rs2 :: k)
+  | Omulfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfmul S rd rs1 rs2 :: k)
+  | Odivfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfdiv S rd rs1 rs2 :: k)
+  | Osingleoffloat, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfcvtsd rd rs :: k)
+  | Ofloatofsingle, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfcvtds rd rs :: k)
+(** Conversions between int and float *)
+  | Ointoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs W D rd rs :: k)
+  | Ointuoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu W D rd rs :: k)
+  | Ofloatofint, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf D W rd rs :: k)
+  | Ofloatofintu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf D W rd rs :: k)
+  | Ointofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs W S rd rs :: k)
+  | Ointuofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu W S rd rs :: k)
+  | Osingleofint, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf S W rd rs :: k)
+  | Osingleofintu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf S W rd rs :: k)
+  | Olongoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs X D rd rs :: k)
+  | Olonguoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu X D rd rs :: k)
+  | Ofloatoflong, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf D X rd rs :: k)
+  | Ofloatoflongu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf D X rd rs :: k)
+  | Olongofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs X S rd rs :: k)
+  | Olonguofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu X S rd rs :: k)
+  | Osingleoflong, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf S X rd rs :: k)
+  | Osingleoflongu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf S X rd rs :: k)
+(** Boolean tests *)
+  | Ocmp c, _ =>
+      do rd <- ireg_of res;
+      transl_cond c args (Pcset rd (cond_for_cond c) :: k)
+(** Conditional move *)
+  | Osel cmp ty, a1 :: a2 :: args =>
+      match preg_of res with
+      | IR r => 
+          do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+          transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) :: k)
+      | FR r =>
+          do r1 <- freg_of a1; do r2 <- freg_of a2;
+          transl_cond cmp args (Pfsel r r1 r2 (cond_for_cond cmp) :: k)
+      | _ =>
+          Error(msg "Asmgen.Osel")
+      end
+  | _, _ =>
+      Error(msg "Asmgen.transl_op")
+  end.
+
+(** Translation of addressing modes *)
+
+Definition offset_representable (sz: Z) (ofs: int64) : bool :=
+  let isz := Int64.repr sz in
+  (** either unscaled 9-bit signed *)
+  Int64.eq ofs (Int64.sign_ext 9 ofs) ||
+  (** or scaled 12-bit unsigned *)
+  (Int64.eq (Int64.modu ofs isz) Int64.zero
+   && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))).
+ 
+Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg)
+                             (insn: Asm.addressing -> instruction) (k: code) : res code :=
+  match addr, args with
+  | Aindexed ofs, a1 :: nil =>
+      do r1 <- ireg_of a1;
+       if offset_representable sz ofs then
+        OK (insn (ADimm r1 ofs) :: k)
+      else
+        OK (loadimm64 X16 ofs (insn (ADreg r1 X16) :: k))
+  | Aindexed2, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (insn (ADreg r1 r2) :: k)
+  | Aindexed2shift a, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      if Int.eq a Int.zero then
+        OK (insn (ADreg r1 r2) :: k)
+      else if Int.eq (Int.shl Int.one a) (Int.repr sz) then
+        OK (insn (ADlsl r1 r2 a) :: k)
+      else
+        OK (Padd X X16 r1 r2 (SOlsl a) :: insn (ADimm X16 Int64.zero) :: k)
+  | Aindexed2ext x a, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then
+        OK (insn (match x with Xsgn32 => ADsxt r1 r2 a
+                             | Xuns32 => ADuxt r1 r2 a end) :: k)
+      else
+        OK (arith_extended Paddext (Padd X) X16 r1 r2 x a
+                           (insn (ADimm X16 Int64.zero) :: k))
+  | Aglobal id ofs, nil =>
+      assertion (negb (Archi.pic_code tt));
+      if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz
+      then OK (Padrp X16 id ofs :: insn (ADadr X16 id ofs) :: k)
+      else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) :: k))
+  | Ainstack ofs, nil =>
+      let ofs := Ptrofs.to_int64 ofs in
+      if offset_representable sz ofs then
+        OK (insn (ADimm XSP ofs) :: k)
+      else
+        OK (loadimm64 X16 ofs (insn (ADreg XSP X16) :: k))
+  | _, _ =>
+      Error(msg "Asmgen.transl_addressing")
+  end.
+
+(** Translation of loads and stores *)
+
+Definition transl_load (trap: trapping_mode)
+                       (chunk: memory_chunk) (addr: Op.addressing)
+                       (args: list mreg) (dst: mreg) (k: code) : res code :=
+  match trap with
+  | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on aarch64")
+  | TRAP =>
+  match chunk with
+  | Mint8unsigned =>
+      do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrb W rd) k
+  | Mint8signed =>
+      do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrsb W rd) k
+  | Mint16unsigned =>
+      do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrh W rd) k
+  | Mint16signed =>
+      do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrsh W rd) k
+  | Mint32 =>
+      do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw rd) k
+  | Mint64 =>
+      do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx rd) k
+  | Mfloat32 =>
+      do rd <- freg_of dst; transl_addressing 4 addr args (Pldrs rd) k
+  | Mfloat64 =>
+      do rd <- freg_of dst; transl_addressing 8 addr args (Pldrd rd) k
+  | Many32 =>
+      do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw_a rd) k
+  | Many64 =>
+      do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx_a rd) k
+  end
+  end.
+
+Definition transl_store (chunk: memory_chunk) (addr: Op.addressing)
+                        (args: list mreg) (src: mreg) (k: code) : res code :=
+  match chunk with
+  | Mint8unsigned | Mint8signed =>
+      do r1 <- ireg_of src; transl_addressing 1 addr args (Pstrb r1) k
+  | Mint16unsigned | Mint16signed =>
+      do r1 <- ireg_of src; transl_addressing 2 addr args (Pstrh r1) k
+  | Mint32 =>
+      do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw r1) k
+  | Mint64 =>
+      do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx r1) k
+  | Mfloat32 =>
+      do r1 <- freg_of src; transl_addressing 4 addr args (Pstrs r1) k
+  | Mfloat64 =>
+      do r1 <- freg_of src; transl_addressing 8 addr args (Pstrd r1) k
+  | Many32 =>
+      do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw_a r1) k
+  | Many64 =>
+      do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx_a r1) k
+  end.
+
+(** Register-indexed loads and stores *)
+
+Definition indexed_memory_access (insn: Asm.addressing -> instruction)
+                                 (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) :=
+  let ofs := Ptrofs.to_int64 ofs in
+  if offset_representable sz ofs
+  then insn (ADimm base ofs) :: k
+  else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
+
+Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) :=
+  match ty, preg_of dst with
+  | Tint,    IR rd => OK (indexed_memory_access (Pldrw rd) 4 base ofs k)
+  | Tlong,   IR rd => OK (indexed_memory_access (Pldrx rd) 8 base ofs k)
+  | Tsingle, FR rd => OK (indexed_memory_access (Pldrs rd) 4 base ofs k)
+  | Tfloat,  FR rd => OK (indexed_memory_access (Pldrd rd) 8 base ofs k)
+  | Tany32,  IR rd => OK (indexed_memory_access (Pldrw_a rd) 4 base ofs k)
+  | Tany64,  IR rd => OK (indexed_memory_access (Pldrx_a rd) 8 base ofs k)
+  | Tany64,  FR rd => OK (indexed_memory_access (Pldrd_a rd) 8 base ofs k)
+  | _, _           => Error (msg "Asmgen.loadind")
+  end.
+
+Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: code) :=
+  match ty, preg_of src with
+  | Tint,    IR rd => OK (indexed_memory_access (Pstrw rd) 4 base ofs k)
+  | Tlong,   IR rd => OK (indexed_memory_access (Pstrx rd) 8 base ofs k)
+  | Tsingle, FR rd => OK (indexed_memory_access (Pstrs rd) 4 base ofs k)
+  | Tfloat,  FR rd => OK (indexed_memory_access (Pstrd rd) 8 base ofs k)
+  | Tany32,  IR rd => OK (indexed_memory_access (Pstrw_a rd) 4 base ofs k)
+  | Tany64,  IR rd => OK (indexed_memory_access (Pstrx_a rd) 8 base ofs k)
+  | Tany64,  FR rd => OK (indexed_memory_access (Pstrd_a rd) 8 base ofs k)
+  | _, _           => Error (msg "Asmgen.storeind")
+  end.
+
+Definition loadptr (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: code) :=
+  indexed_memory_access (Pldrx dst) 8 base ofs k.
+
+Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) :=
+  indexed_memory_access (Pstrx src) 8 base ofs k.
+
+(** Function epilogue *)
+
+Definition make_epilogue (f: Mach.function) (k: code) :=
+  (* FIXME
+     Cannot be used because memcpy destroys X30;
+     issue being discussed with X. Leroy *)
+  (* if is_leaf_function f
+  then Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k
+  else*) loadptr XSP f.(fn_retaddr_ofs) RA
+         (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k).
+  
+(** Translation of a Mach instruction. *)
+
+Definition transl_instr (f: Mach.function) (i: Mach.instruction)
+                        (r29_is_parent: bool) (k: code) : res code :=
+  match i with
+  | Mgetstack ofs ty dst =>
+      loadind XSP ofs ty dst k
+  | Msetstack src ofs ty =>
+      storeind src XSP ofs ty k
+  | Mgetparam ofs ty dst =>
+      (* load via the frame pointer if it is valid *)
+      do c <- loadind X29 ofs ty dst k;
+      OK (if r29_is_parent then c else loadptr XSP f.(fn_link_ofs) X29 c)
+  | Mop op args res =>
+      transl_op op args res k
+  | Mload trap chunk addr args dst =>
+      transl_load trap chunk addr args dst k
+  | Mstore chunk addr args src =>
+      transl_store chunk addr args src k
+  | Mcall sig (inl r) =>
+      do r1 <- ireg_of r; OK (Pblr r1 sig :: k)
+  | Mcall sig (inr symb) =>
+      OK (Pbl symb sig :: k)
+  | Mtailcall sig (inl r) =>
+      do r1 <- ireg_of r;
+      OK (make_epilogue f (Pbr r1 sig :: k))
+  | Mtailcall sig (inr symb) =>
+      OK (make_epilogue f (Pbs symb sig :: k))
+  | Mbuiltin ef args res =>
+      OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) :: k)
+  | Mlabel lbl =>
+      OK (Plabel lbl :: k)
+  | Mgoto lbl =>
+      OK (Pb lbl :: k)
+  | Mcond cond args lbl =>
+      transl_cond_branch cond args lbl k
+  | Mjumptable arg tbl =>
+      do r <- ireg_of arg;
+      OK (Pbtbl r tbl :: k)
+  | Mreturn =>
+      OK (make_epilogue f (Pret RA :: k))
+  end.
+
+(** Translation of a code sequence *)
+
+Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
+  match i with
+  | Msetstack src ofs ty => before
+  | Mgetparam ofs ty dst => negb (mreg_eq dst R29)
+  | Mop op args res => before && negb (mreg_eq res R29)
+  | _ => false
+  end.
+
+(** This is the naive definition that we no longer use because it
+  is not tail-recursive.  It is kept as specification. *)
+
+Fixpoint transl_code (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
+  match il with
+  | nil => OK nil
+  | i1 :: il' =>
+      do k <- transl_code f il' (it1_is_parent it1p i1);
+      transl_instr f i1 it1p k
+  end.
+
+(** This is an equivalent definition in continuation-passing style
+  that runs in constant stack space. *)
+
+Fixpoint transl_code_rec (f: Mach.function) (il: list Mach.instruction)
+                         (it1p: bool) (k: code -> res code) :=
+  match il with
+  | nil => k nil
+  | i1 :: il' =>
+      transl_code_rec f il' (it1_is_parent it1p i1)
+        (fun c1 => do c2 <- transl_instr f i1 it1p c1; k c2)
+  end.
+
+Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
+  transl_code_rec f il it1p (fun c => OK c).
+
+(** Translation of a whole function.  Note that we must check
+  that the generated code contains less than [2^32] instructions,
+  otherwise the offset part of the [PC] code pointer could wrap
+  around, leading to incorrect executions. *)
+
+Definition transl_function (f: Mach.function) :=
+  do c <- transl_code' f f.(Mach.fn_code) true;
+  OK (mkfunction f.(Mach.fn_sig)
+        (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
+         storeptr RA XSP f.(fn_retaddr_ofs) c)).
+
+Definition transf_function (f: Mach.function) : res Asm.function :=
+  do tf <- transl_function f;
+  if zlt Ptrofs.max_unsigned (list_length_z tf.(fn_code))
+  then Error (msg "code size exceeded")
+  else OK tf.
+
+Definition transf_fundef (f: Mach.fundef) : res Asm.fundef :=
+  transf_partial_fundef transf_function f.
+
+Definition transf_program (p: Mach.program) : res Asm.program :=
+  transform_partial_program transf_fundef p.
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
new file mode 100644
index 00000000..6831509f
--- /dev/null
+++ b/aarch64/Asmgenproof.v
@@ -0,0 +1,1101 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for AArch64 code generation. *)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Mach Conventions Asm.
+Require Import Asmgen Asmgenproof0 Asmgenproof1.
+
+Definition match_prog (p: Mach.program) (tp: Asm.program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+  intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Mach.program.
+Variable tprog: Asm.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+Lemma functions_translated:
+  forall b f,
+  Genv.find_funct_ptr ge b = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSF).
+
+Lemma functions_transl:
+  forall fb f tf,
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  transf_function f = OK tf ->
+  Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+  intros. exploit functions_translated; eauto. intros [tf' [A B]].
+  monadInv B. rewrite H0 in EQ; inv EQ; auto.
+Qed.
+
+(** * Properties of control flow *)
+
+Lemma transf_function_no_overflow:
+  forall f tf,
+  transf_function f = OK tf -> list_length_z tf.(fn_code) <= Ptrofs.max_unsigned.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
+  omega.
+Qed.
+
+Lemma exec_straight_exec:
+  forall fb f c ep tf tc c' rs m rs' m',
+  transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
+  exec_straight tge tf tc rs m c' rs' m' ->
+  plus step tge (State rs m) E0 (State rs' m').
+Proof.
+  intros. inv H.
+  eapply exec_straight_steps_1; eauto.
+  eapply transf_function_no_overflow; eauto.
+  eapply functions_transl; eauto.
+Qed.
+
+Lemma exec_straight_at:
+  forall fb f c ep tf tc c' ep' tc' rs m rs' m',
+  transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
+  transl_code f c' ep' = OK tc' ->
+  exec_straight tge tf tc rs m tc' rs' m' ->
+  transl_code_at_pc ge (rs' PC) fb f c' ep' tf tc'.
+Proof.
+  intros. inv H.
+  exploit exec_straight_steps_2; eauto.
+  eapply transf_function_no_overflow; eauto.
+  eapply functions_transl; eauto.
+  intros [ofs' [PC' CT']].
+  rewrite PC'. constructor; auto.
+Qed.
+
+(** The following lemmas show that the translation from Mach to Asm
+  preserves labels, in the sense that the following diagram commutes:
+<<
+                          translation
+        Mach code ------------------------ Asm instr sequence
+            |                                          |
+            | Mach.find_label lbl       find_label lbl |
+            |                                          |
+            v                                          v
+        Mach code tail ------------------- Asm instr seq tail
+                          translation
+>>
+  The proof demands many boring lemmas showing that Asm constructor
+  functions do not introduce new labels.
+*)
+
+Section TRANSL_LABEL.
+
+Remark loadimm_z_label: forall sz rd l k, tail_nolabel k (loadimm_z sz rd l k).
+Proof. 
+  intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel.
+  induction l as [ | [n p] l]; simpl; TailNoLabel.
+Qed.
+
+Remark loadimm_n_label: forall sz rd l k, tail_nolabel k (loadimm_n sz rd l k).
+Proof. 
+  intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel.
+  induction l as [ | [n p] l]; simpl; TailNoLabel.
+Qed.
+
+Remark loadimm_label: forall sz rd n k, tail_nolabel k (loadimm sz rd n k).
+Proof.
+  unfold loadimm; intros. destruct Nat.leb; [apply loadimm_z_label|apply loadimm_n_label].
+Qed.
+Hint Resolve loadimm_label: labels.
+
+Remark loadimm32_label: forall r n k, tail_nolabel k (loadimm32 r n k).
+Proof.
+  unfold loadimm32; intros. destruct (is_logical_imm32 n); TailNoLabel.
+Qed.
+Hint Resolve loadimm32_label: labels.
+
+Remark loadimm64_label: forall r n k, tail_nolabel k (loadimm64 r n k).
+Proof.
+  unfold loadimm64; intros. destruct (is_logical_imm64 n); TailNoLabel.
+Qed.
+Hint Resolve loadimm64_label: labels.
+
+Remark addimm_aux: forall insn rd r1 n k,
+  (forall rd r1 n, nolabel (insn rd r1 n)) ->
+  tail_nolabel k (addimm_aux insn rd r1 n k).
+Proof.
+  unfold addimm_aux; intros. 
+  destruct Z.eqb. TailNoLabel. destruct Z.eqb; TailNoLabel.
+Qed.
+
+Remark addimm32_label: forall rd r1 n k, tail_nolabel k (addimm32 rd r1 n k).
+Proof.
+  unfold addimm32; intros. 
+  destruct Int.eq. apply addimm_aux; intros; red; auto.
+  destruct Int.eq. apply addimm_aux; intros; red; auto.
+  destruct Int.lt; eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+Hint Resolve addimm32_label: labels.
+
+Remark addimm64_label: forall rd r1 n k, tail_nolabel k (addimm64 rd r1 n k).
+Proof.
+  unfold addimm64; intros. 
+  destruct Int64.eq. apply addimm_aux; intros; red; auto.
+  destruct Int64.eq. apply addimm_aux; intros; red; auto.
+  destruct Int64.lt; eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+Hint Resolve addimm64_label: labels.
+
+Remark logicalimm32_label: forall insn1 insn2 rd r1 n k,
+  (forall rd r1 n, nolabel (insn1 rd r1 n)) ->
+  (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) ->
+  tail_nolabel k (logicalimm32 insn1 insn2 rd r1 n k).
+Proof.
+  unfold logicalimm32; intros.
+  destruct (is_logical_imm32 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+
+Remark logicalimm64_label: forall insn1 insn2 rd r1 n k,
+  (forall rd r1 n, nolabel (insn1 rd r1 n)) ->
+  (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) ->
+  tail_nolabel k (logicalimm64 insn1 insn2 rd r1 n k).
+Proof.
+  unfold logicalimm64; intros.
+  destruct (is_logical_imm64 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+
+Remark move_extended_label: forall rd r1 ex a k, tail_nolabel k (move_extended rd r1 ex a k).
+Proof.
+  unfold move_extended, move_extended_base; intros. destruct Int.eq, ex; TailNoLabel.
+Qed.
+Hint Resolve move_extended_label: labels.
+
+Remark arith_extended_label: forall insnX insnS rd r1 r2 ex a k,
+  (forall rd r1 r2 x, nolabel (insnX rd r1 r2 x)) ->
+  (forall rd r1 r2 s, nolabel (insnS rd r1 r2 s)) ->
+  tail_nolabel k (arith_extended insnX insnS rd r1 r2 ex a k).
+Proof.
+  unfold arith_extended; intros. destruct Int.ltu.
+  TailNoLabel.
+  destruct ex; simpl; TailNoLabel.
+Qed.
+
+Remark loadsymbol_label: forall r id ofs k, tail_nolabel k (loadsymbol r id ofs k).
+Proof.
+  intros; unfold loadsymbol.
+  destruct (Archi.pic_code tt); TailNoLabel. destruct Ptrofs.eq; TailNoLabel.
+Qed. 
+Hint Resolve loadsymbol_label: labels.
+
+Remark transl_cond_label: forall cond args k c,
+  transl_cond cond args k = OK c -> tail_nolabel k c.
+Proof.
+  unfold transl_cond; intros; destruct cond; TailNoLabel.
+- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+
+Remark transl_cond_branch_default_label: forall cond args lbl k c,
+  transl_cond_branch_default cond args lbl k = OK c -> tail_nolabel k c.
+Proof.
+  unfold transl_cond_branch_default; intros. 
+  eapply tail_nolabel_trans; [eapply transl_cond_label;eauto|TailNoLabel].
+Qed.
+Hint Resolve transl_cond_branch_default_label: labels.
+
+Remark transl_cond_branch_label: forall cond args lbl k c,
+  transl_cond_branch cond args lbl k = OK c -> tail_nolabel k c.
+Proof.
+  unfold transl_cond_branch; intros; destruct args; TailNoLabel; destruct cond; TailNoLabel.
+- destruct c0; TailNoLabel.
+- destruct c0; TailNoLabel.
+- destruct (Int.is_power2 n); TailNoLabel.
+- destruct (Int.is_power2 n); TailNoLabel.
+- destruct c0; TailNoLabel.
+- destruct c0; TailNoLabel.
+- destruct (Int64.is_power2' n); TailNoLabel.
+- destruct (Int64.is_power2' n); TailNoLabel.
+Qed.
+
+Remark transl_op_label:
+  forall op args r k c,
+  transl_op op args r k = OK c -> tail_nolabel k c.
+Proof.
+  unfold transl_op; intros; destruct op; TailNoLabel.
+- destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
+- destruct (Float.eq_dec n Float.zero); TailNoLabel.
+- destruct (Float32.eq_dec n Float32.zero); TailNoLabel.
+- apply logicalimm32_label; unfold nolabel; auto.
+- apply logicalimm32_label; unfold nolabel; auto.
+- apply logicalimm32_label; unfold nolabel; auto.
+- unfold shrx32. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel.
+- apply arith_extended_label; unfold nolabel; auto.
+- apply arith_extended_label; unfold nolabel; auto.
+- apply logicalimm64_label; unfold nolabel; auto.
+- apply logicalimm64_label; unfold nolabel; auto.
+- apply logicalimm64_label; unfold nolabel; auto.
+- unfold shrx64. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel.
+- eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
+- destruct (preg_of r); try discriminate; TailNoLabel;
+  (eapply tail_nolabel_trans; [eapply transl_cond_label; eauto | TailNoLabel]).
+Qed.
+
+Remark transl_addressing_label:
+  forall sz addr args insn k c,
+  transl_addressing sz addr args insn k = OK c ->
+  (forall ad, nolabel (insn ad)) ->
+  tail_nolabel k c.
+Proof.
+  unfold transl_addressing; intros; destruct addr; TailNoLabel;
+  eapply tail_nolabel_trans; TailNoLabel.
+  eapply tail_nolabel_trans. apply arith_extended_label; unfold nolabel; auto. TailNoLabel.
+Qed.
+
+Remark transl_load_label:
+  forall trap chunk addr args dst k c,
+  transl_load trap chunk addr args dst k = OK c -> tail_nolabel k c.
+Proof.
+  unfold transl_load; intros; destruct trap; try discriminate; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto.
+Qed.
+
+Remark transl_store_label:
+  forall chunk addr args src k c,
+  transl_store chunk addr args src k = OK c -> tail_nolabel k c.
+Proof.
+  unfold transl_store; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto.
+Qed.
+
+Remark indexed_memory_access_label:
+  forall insn sz base ofs k,
+  (forall ad, nolabel (insn ad)) ->
+  tail_nolabel k (indexed_memory_access insn sz base ofs k).
+Proof.
+  unfold indexed_memory_access; intros. destruct offset_representable.
+  TailNoLabel.
+  eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+
+Remark loadind_label:
+  forall base ofs ty dst k c,
+  loadind base ofs ty dst k = OK c -> tail_nolabel k c.
+Proof.
+  unfold loadind; intros.
+  destruct ty, (preg_of dst); inv H; apply indexed_memory_access_label; intros; exact I.
+Qed.
+
+Remark storeind_label:
+  forall src base ofs ty k c,
+  storeind src base ofs ty k = OK c -> tail_nolabel k c.
+Proof.
+  unfold storeind; intros.
+  destruct ty, (preg_of src); inv H; apply indexed_memory_access_label; intros; exact I.
+Qed.
+
+Remark loadptr_label:
+  forall base ofs dst k, tail_nolabel k (loadptr base ofs dst k).
+Proof.
+  intros. apply indexed_memory_access_label. unfold nolabel; auto.
+Qed.
+
+Remark storeptr_label:
+  forall src base ofs k, tail_nolabel k (storeptr src base ofs k).
+Proof.
+  intros. apply indexed_memory_access_label. unfold nolabel; auto.
+Qed.
+
+Remark make_epilogue_label:
+  forall f k, tail_nolabel k (make_epilogue f k).
+Proof.
+  unfold make_epilogue; intros.
+  (* FIXME destruct is_leaf_function.
+  { TailNoLabel. } *)
+  eapply tail_nolabel_trans.
+  apply loadptr_label.
+  TailNoLabel.
+Qed.
+
+Lemma transl_instr_label:
+  forall f i ep k c,
+  transl_instr f i ep k = OK c ->
+  match i with Mlabel lbl => c = Plabel lbl :: k | _ => tail_nolabel k c end.
+Proof.
+  unfold transl_instr; intros; destruct i; TailNoLabel.
+- eapply loadind_label; eauto.
+- eapply storeind_label; eauto.
+- destruct ep. eapply loadind_label; eauto.
+  eapply tail_nolabel_trans. apply loadptr_label. eapply loadind_label; eauto. 
+- eapply transl_op_label; eauto.
+- eapply transl_load_label; eauto. 
+- eapply transl_store_label; eauto.
+- destruct s0; monadInv H; TailNoLabel.
+- destruct s0; monadInv H; (eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]).
+- eapply transl_cond_branch_label; eauto.
+- eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel].
+Qed.
+
+Lemma transl_instr_label':
+  forall lbl f i ep k c,
+  transl_instr f i ep k = OK c ->
+  find_label lbl c = if Mach.is_label lbl i then Some k else find_label lbl k.
+Proof.
+  intros. exploit transl_instr_label; eauto.
+  destruct i; try (intros [A B]; apply B).
+  intros. subst c. simpl. auto.
+Qed.
+
+Lemma transl_code_label:
+  forall lbl f c ep tc,
+  transl_code f c ep = OK tc ->
+  match Mach.find_label lbl c with
+  | None => find_label lbl tc = None
+  | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_code f c' false = OK tc'
+  end.
+Proof.
+  induction c; simpl; intros.
+  inv H. auto.
+  monadInv H. rewrite (transl_instr_label' lbl _ _ _ _ _ EQ0).
+  generalize (Mach.is_label_correct lbl a).
+  destruct (Mach.is_label lbl a); intros.
+  subst a. simpl in EQ. exists x; auto.
+  eapply IHc; eauto.
+Qed.
+
+Lemma transl_find_label:
+  forall lbl f tf,
+  transf_function f = OK tf ->
+  match Mach.find_label lbl f.(Mach.fn_code) with
+  | None => find_label lbl tf.(fn_code) = None
+  | Some c => exists tc, find_label lbl tf.(fn_code) = Some tc /\ transl_code f c false = OK tc
+  end.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
+  monadInv EQ. rewrite transl_code'_transl_code in EQ0. unfold fn_code. 
+  simpl. destruct (storeptr_label X30 XSP (fn_retaddr_ofs f) x) as [A B]; rewrite B. 
+  eapply transl_code_label; eauto.
+Qed.
+
+End TRANSL_LABEL.
+
+(** A valid branch in a piece of Mach code translates to a valid ``go to''
+  transition in the generated Asm code. *)
+
+Lemma find_label_goto_label:
+  forall f tf lbl rs m c' b ofs,
+  Genv.find_funct_ptr ge b = Some (Internal f) ->
+  transf_function f = OK tf ->
+  rs PC = Vptr b ofs ->
+  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
+  exists tc', exists rs',
+     goto_label tf lbl rs m = Next rs' m
+  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
+  /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
+  intros [tc [A B]].
+  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
+  intros [pos' [P [Q R]]].
+  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
+  split. unfold goto_label. rewrite P. rewrite H1. auto.
+  split. rewrite Pregmap.gss. constructor; auto.
+  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
+  auto. omega.
+  generalize (transf_function_no_overflow _ _ H0). omega.
+  intros. apply Pregmap.gso; auto.
+Qed.
+
+(** Existence of return addresses *)
+
+Lemma return_address_exists:
+  forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) ->
+  exists ra, return_address_offset f c ra.
+Proof.
+  intros. eapply Asmgenproof0.return_address_exists; eauto.
+- intros. exploit transl_instr_label; eauto.
+  destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor.
+- intros. monadInv H0.
+  destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. monadInv EQ.
+  rewrite transl_code'_transl_code in EQ0.
+  exists x; exists true; split; auto. unfold fn_code.
+  constructor. apply (storeptr_label X30 XSP (fn_retaddr_ofs f0) x).
+- exact transf_function_no_overflow.
+Qed.
+
+(** * Proof of semantic preservation *)
+
+(** Semantic preservation is proved using simulation diagrams
+  of the following form.
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  *|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The invariant is the [match_states] predicate below, which includes:
+- The Asm code pointed by the PC register is the translation of
+  the current Mach code sequence.
+- Mach register values and Asm register values agree.
+*)
+
+Inductive match_states: Mach.state -> Asm.state -> Prop :=
+  | match_states_intro:
+      forall s fb sp c ep ms m m' rs f tf tc
+        (STACKS: match_stack ge s)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (MEXT: Mem.extends m m')
+        (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
+        (AG: agree ms sp rs)
+        (DXP: ep = true -> rs#X29 = parent_sp s)
+        (LEAF: is_leaf_function f = true -> rs#RA = parent_ra s),
+      match_states (Mach.State s fb sp c ms m)
+                   (Asm.State rs m')
+  | match_states_call:
+      forall s fb ms m m' rs
+        (STACKS: match_stack ge s)
+        (MEXT: Mem.extends m m')
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = Vptr fb Ptrofs.zero)
+        (ATLR: rs RA = parent_ra s),
+      match_states (Mach.Callstate s fb ms m)
+                   (Asm.State rs m')
+  | match_states_return:
+      forall s ms m m' rs
+        (STACKS: match_stack ge s)
+        (MEXT: Mem.extends m m')
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = parent_ra s),
+      match_states (Mach.Returnstate s ms m)
+                   (Asm.State rs m').
+
+Lemma exec_straight_steps:
+  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2,
+  match_stack ge s ->
+  Mem.extends m2 m2' ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
+  (forall k c (TR: transl_instr f i ep k = OK c),
+   exists rs2,
+       exec_straight tge tf c rs1 m1' k rs2 m2'
+    /\ agree ms2 sp rs2
+    /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)
+    /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
+  exists st',
+  plus step tge (State rs1 m1') E0 st' /\
+  match_states (Mach.State s fb sp c ms2 m2) st'.
+Proof.
+  intros. inversion H2. subst. monadInv H7.
+  exploit H3; eauto. intros [rs2 [A [B [C D]]]].
+  exists (State rs2 m2'); split.
+  - eapply exec_straight_exec; eauto.
+  - econstructor; eauto. eapply exec_straight_at; eauto.
+Qed.
+
+Lemma exec_straight_steps_goto:
+  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
+  match_stack ge s ->
+  Mem.extends m2 m2' ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
+  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
+  it1_is_parent ep i = false ->
+  (forall k c (TR: transl_instr f i ep k = OK c),
+   exists jmp, exists k', exists rs2,
+       exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2'
+    /\ agree ms2 sp rs2
+    /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2'
+    /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
+  exists st',
+  plus step tge (State rs1 m1') E0 st' /\
+  match_states (Mach.State s fb sp c' ms2 m2) st'.
+Proof.
+  intros. inversion H3. subst. monadInv H9.
+  exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]].
+  generalize (functions_transl _ _ _ H7 H8); intro FN.
+  generalize (transf_function_no_overflow _ _ H8); intro NOOV.
+  exploit exec_straight_steps_2; eauto.
+  intros [ofs' [PC2 CT2]].
+  exploit find_label_goto_label; eauto.
+  intros [tc' [rs3 [GOTO [AT' OTH]]]].
+  exists (State rs3 m2'); split.
+  eapply plus_right'.
+  eapply exec_straight_steps_1; eauto.
+  econstructor; eauto.
+  eapply find_instr_tail. eauto.
+  rewrite C. eexact GOTO.
+  traceEq.
+  econstructor; eauto.
+  apply agree_exten with rs2; auto with asmgen.
+  congruence.
+  rewrite OTH by congruence; auto.
+Qed.
+
+Lemma exec_straight_opt_steps_goto:
+  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
+  match_stack ge s ->
+  Mem.extends m2 m2' ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
+  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
+  it1_is_parent ep i = false ->
+  (forall k c (TR: transl_instr f i ep k = OK c),
+   exists jmp, exists k', exists rs2,
+       exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2'
+    /\ agree ms2 sp rs2
+    /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2'
+    /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
+  exists st',
+  plus step tge (State rs1 m1') E0 st' /\
+  match_states (Mach.State s fb sp c' ms2 m2) st'.
+Proof.
+  intros. inversion H3. subst. monadInv H9.
+  exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]].
+  generalize (functions_transl _ _ _ H7 H8); intro FN.
+  generalize (transf_function_no_overflow _ _ H8); intro NOOV.
+  inv A.
+- exploit find_label_goto_label; eauto.
+  intros [tc' [rs3 [GOTO [AT' OTH]]]].
+  exists (State rs3 m2'); split.
+  apply plus_one. econstructor; eauto.
+  eapply find_instr_tail. eauto.
+  rewrite C. eexact GOTO.
+  econstructor; eauto.
+  apply agree_exten with rs2; auto with asmgen.
+  congruence.
+  rewrite OTH by congruence; auto.
+- exploit exec_straight_steps_2; eauto.
+  intros [ofs' [PC2 CT2]].
+  exploit find_label_goto_label; eauto.
+  intros [tc' [rs3 [GOTO [AT' OTH]]]].
+  exists (State rs3 m2'); split.
+  eapply plus_right'.
+  eapply exec_straight_steps_1; eauto.
+  econstructor; eauto.
+  eapply find_instr_tail. eauto.
+  rewrite C. eexact GOTO.
+  traceEq.
+  econstructor; eauto.
+  apply agree_exten with rs2; auto with asmgen.
+  congruence.
+  rewrite OTH by congruence; auto.
+Qed.
+
+(** We need to show that, in the simulation diagram, we cannot
+  take infinitely many Mach transitions that correspond to zero
+  transitions on the Asm side.  Actually, all Mach transitions
+  correspond to at least one Asm transition, except the
+  transition from [Machsem.Returnstate] to [Machsem.State].
+  So, the following integer measure will suffice to rule out
+  the unwanted behaviour. *)
+
+Definition measure (s: Mach.state) : nat :=
+  match s with
+  | Mach.State _ _ _ _ _ _ => 0%nat
+  | Mach.Callstate _ _ _ _ => 0%nat
+  | Mach.Returnstate _ _ _ => 1%nat
+  end.
+
+Remark preg_of_not_X29: forall r, negb (mreg_eq r R29) = true -> IR X29 <> preg_of r.
+Proof.
+  intros. change (IR X29) with (preg_of R29). red; intros.
+  exploit preg_of_injective; eauto. intros; subst r; discriminate.
+Qed.
+
+Lemma sp_val': forall ms sp rs, agree ms sp rs -> sp = rs XSP.
+Proof.
+  intros. eapply sp_val; eauto. 
+Qed. 
+
+(** This is the simulation diagram.  We prove it by case analysis on the Mach transition. *)
+
+Theorem step_simulation:
+  forall S1 t S2, Mach.step return_address_offset ge S1 t S2 ->
+  forall S1' (MS: match_states S1 S1')  (WF: wf_state ge S1),
+  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
+  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+  induction 1; intros; inv MS.
+
+- (* Mlabel *)
+  left; eapply exec_straight_steps; eauto; intros.
+  monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split. { apply agree_nextinstr; auto. }
+  split. { simpl; congruence. }
+  rewrite nextinstr_inv by congruence; assumption.
+
+- (* Mgetstack *)
+  unfold load_stack in H.
+  exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+  rewrite (sp_val _ _ _ AG) in A.
+  left; eapply exec_straight_steps; eauto. intros. simpl in TR.
+  exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q [R S]]]].
+  exists rs'; split. eauto.
+  split. { eapply agree_set_mreg; eauto with asmgen. congruence. }
+  split. { simpl; congruence. }
+  rewrite S. assumption.
+
+- (* Msetstack *)
+  unfold store_stack in H.
+  assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
+  exploit Mem.storev_extends; eauto. intros [m2' [A B]].
+  left; eapply exec_straight_steps; eauto.
+  rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
+  exploit storeind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
+  exists rs'; split. eauto.
+  split. eapply agree_undef_regs; eauto with asmgen.
+  simpl; intros.
+  split. rewrite Q; auto with asmgen.
+  rewrite R. assumption.
+
+- (* Mgetparam *)
+  assert (f0 = f) by congruence; subst f0.
+  unfold load_stack in *.
+  exploit Mem.loadv_extends. eauto. eexact H0. auto.
+  intros [parent' [A B]]. rewrite (sp_val' _ _ _ AG) in A.
+  exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
+  exploit Mem.loadv_extends. eauto. eexact H1. auto.
+  intros [v' [C D]].
+Opaque loadind.
+  left; eapply exec_straight_steps; eauto; intros. monadInv TR. 
+  destruct ep.
+(* X30 contains parent *)
+  exploit loadind_correct. eexact EQ.
+  instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence.
+  intros [rs1 [P [Q [R S]]]].
+  exists rs1; split. eauto.
+  split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+  simpl; split; intros.
+  { rewrite R; auto with asmgen.
+    apply preg_of_not_X29; auto.
+  }
+  { rewrite S; auto. }
+    
+(* X30 does not contain parent *)
+  exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]].
+  exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence.
+  intros [rs2 [S [T [U V]]]].
+  exists rs2; split. eapply exec_straight_trans; eauto.
+  split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+  instantiate (1 := rs1#X29 <- (rs2#X29)). intros.
+  rewrite Pregmap.gso; auto with asmgen.
+  congruence.
+  intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
+  split; simpl; intros. rewrite U; auto with asmgen.
+  apply preg_of_not_X29; auto.
+  rewrite V. rewrite R by congruence. auto.
+  
+- (* Mop *)
+  assert (eval_operation tge sp op (map rs args) m = Some v).
+  { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. }
+  exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0.
+  intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+  left; eapply exec_straight_steps; eauto; intros. simpl in TR.
+  exploit transl_op_correct; eauto. intros [rs2 [P [Q [R S]]]].
+  exists rs2; split. eauto. split.
+  apply agree_set_undef_mreg with rs0; auto. 
+  apply Val.lessdef_trans with v'; auto.
+  split; simpl; intros. InvBooleans. 
+  rewrite R; auto. apply preg_of_not_X29; auto.
+Local Transparent destroyed_by_op.
+  destruct op; try exact I; simpl; congruence.
+  rewrite S.
+  auto.
+- (* Mload *)
+  destruct trap.
+  {
+  assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
+  { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
+  exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+  intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+  exploit Mem.loadv_extends; eauto. intros [v' [C D]].
+  left; eapply exec_straight_steps; eauto; intros. simpl in TR.
+  exploit transl_load_correct; eauto. intros [rs2 [P [Q [R S]]]].
+  exists rs2; split. eauto.
+  split. eapply agree_set_undef_mreg; eauto. congruence.
+  split. simpl; congruence.
+  rewrite S. assumption.
+  }
+  
+  (* Mload notrap1 *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mload notrap *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mload notrap *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mstore *)
+  assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
+  { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
+  exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+  intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+  assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
+  exploit Mem.storev_extends; eauto. intros [m2' [C D]].
+  left; eapply exec_straight_steps; eauto.
+  intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P [Q R]]].
+  exists rs2; split. eauto.
+  split. eapply agree_undef_regs; eauto with asmgen.
+  split. simpl; congruence.
+  rewrite R. assumption.
+
+- (* Mcall *)
+  assert (f0 = f) by congruence.  subst f0.
+  inv AT.
+  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
+  { eapply transf_function_no_overflow; eauto. }
+  destruct ros as [rf|fid]; simpl in H; monadInv H5.
++ (* Indirect call *)
+  assert (rs rf = Vptr f' Ptrofs.zero).
+  { destruct (rs rf); try discriminate.
+    revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+  assert (rs0 x0 = Vptr f' Ptrofs.zero).
+  { exploit ireg_val; eauto. rewrite H5; intros LD; inv LD; auto. }
+  generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
+  assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
+  {  econstructor; eauto. }
+  exploit return_address_offset_correct; eauto. intros; subst ra.
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_internal. Simpl. rewrite <- H2; simpl; eauto.
+  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
+  simpl. eauto.
+  econstructor; eauto.
+  econstructor; eauto.
+  eapply agree_sp_def; eauto.
+  simpl. eapply agree_exten; eauto. intros. Simpl.
+  Simpl. rewrite <- H2. auto.
++ (* Direct call *)
+  generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
+  assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
+    econstructor; eauto.
+  exploit return_address_offset_correct; eauto. intros; subst ra.
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_internal. eauto.
+  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
+  simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. eauto.
+  econstructor; eauto.
+  econstructor; eauto.
+  eapply agree_sp_def; eauto.
+  simpl. eapply agree_exten; eauto. intros. Simpl.
+  Simpl. rewrite <- H2. auto.
+
+- (* Mtailcall *)
+  assert (f0 = f) by congruence.  subst f0.
+  inversion AT; subst.
+  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
+  { eapply transf_function_no_overflow; eauto. }
+  exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]].
+  destruct ros as [rf|fid]; simpl in H; monadInv H7.
++ (* Indirect call *)
+  assert (rs rf = Vptr f' Ptrofs.zero).
+  { destruct (rs rf); try discriminate.
+    revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+  assert (rs0 x0 = Vptr f' Ptrofs.zero).
+  { exploit ireg_val; eauto. rewrite H7; intros LD; inv LD; auto. }
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
+  left; econstructor; split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
+  Simpl. rewrite Z by (rewrite <- (ireg_of_eq _ _ EQ1); eauto with asmgen). assumption. 
++ (* Direct call *)
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
+  left; econstructor; split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
+  Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto.
+
+- (* Mbuiltin *)
+  inv AT. monadInv H4.
+  exploit functions_transl; eauto. intro FN.
+  generalize (transf_function_no_overflow _ _ H3); intro NOOV.
+  exploit builtin_args_match; eauto. intros [vargs' [P Q]].
+  exploit external_call_mem_extends; eauto.
+  intros [vres' [m2' [A [B [C D]]]]].
+  left. econstructor; split. apply plus_one.
+  eapply exec_step_builtin. eauto. eauto.
+  eapply find_instr_tail; eauto.
+  erewrite <- sp_val by eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  eauto.
+  econstructor; eauto.
+  instantiate (2 := tf); instantiate (1 := x).
+  unfold nextinstr. rewrite Pregmap.gss.
+  rewrite set_res_other. rewrite undef_regs_other_2.
+  rewrite <- H1. simpl. econstructor; eauto.
+  eapply code_tail_next_int; eauto.
+  rewrite preg_notin_charact. intros. auto with asmgen.
+  auto with asmgen.
+  apply agree_nextinstr. eapply agree_set_res; auto.
+  eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
+  congruence.
+
+  Simpl.
+  rewrite set_res_other by trivial.
+  rewrite undef_regs_other.
+  assumption.
+  intro.
+  rewrite in_map_iff.
+  intros (x0 & PREG & IN).
+  subst r'.
+  intro.
+  apply (preg_of_not_RA x0).
+  congruence.
+  
+- (* Mgoto *)
+  assert (f0 = f) by congruence. subst f0.
+  inv AT. monadInv H4.
+  exploit find_label_goto_label; eauto. intros [tc' [rs' [GOTO [AT2 INV]]]].
+  left; exists (State rs' m'); split.
+  apply plus_one. econstructor; eauto.
+  eapply functions_transl; eauto.
+  eapply find_instr_tail; eauto.
+  simpl; eauto.
+  econstructor; eauto.
+  eapply agree_exten; eauto with asmgen.
+  congruence.
+
+  rewrite INV by congruence.
+  assumption.
+  
+- (* Mcond true *)
+  assert (f0 = f) by congruence. subst f0.
+  exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
+  left; eapply exec_straight_opt_steps_goto; eauto.
+  intros. simpl in TR.
+  exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D).
+  exists jmp; exists k; exists rs'.
+  split. eexact A. 
+  split. apply agree_exten with rs0; auto with asmgen.
+  split.
+  exact B.
+  rewrite D. exact LEAF.
+
+- (* Mcond false *)
+  exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
+  left; eapply exec_straight_steps; eauto. intros. simpl in TR.
+  exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D).
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto.
+  split. apply agree_exten with rs0; auto. intros. Simpl.
+  split.
+  simpl; congruence.
+  Simpl. rewrite D.
+  exact LEAF.
+
+- (* Mjumptable *)
+  assert (f0 = f) by congruence. subst f0.
+  inv AT. monadInv H6.
+  exploit functions_transl; eauto. intro FN.
+  generalize (transf_function_no_overflow _ _ H5); intro NOOV.
+  exploit find_label_goto_label. eauto. eauto.
+  instantiate (2 := rs0#X16 <- Vundef #X17 <- Vundef).
+  Simpl. eauto.
+  eauto.
+  intros [tc' [rs' [A [B C]]]].
+  exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
+  left; econstructor; split.
+  apply plus_one. econstructor; eauto.
+  eapply find_instr_tail; eauto.
+  simpl. Simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
+  econstructor; eauto.
+  eapply agree_undef_regs; eauto.
+  simpl. intros. rewrite C; auto with asmgen. Simpl.
+  congruence.
+
+  rewrite C by congruence.
+  repeat rewrite Pregmap.gso by congruence.
+  assumption.
+  
+- (* Mreturn *)
+  assert (f0 = f) by congruence. subst f0.
+  inversion AT; subst. simpl in H6; monadInv H6.
+  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
+    eapply transf_function_no_overflow; eauto.
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
+  left; econstructor; split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
+
+- (* internal function *)
+
+  exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
+  generalize EQ; intros EQ'. monadInv EQ'.
+  destruct (zlt Ptrofs.max_unsigned (list_length_z x0.(fn_code))); inversion EQ1. clear EQ1. subst x0.
+  unfold store_stack in *.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros [m1' [C D]].
+  exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
+  intros [m2' [F G]].
+  simpl chunk_of_type in F.
+  exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
+  intros [m3' [P Q]].
+  change (chunk_of_type Tptr) with Mint64 in *.
+  (* Execution of function prologue *)
+  monadInv EQ0. rewrite transl_code'_transl_code in EQ1.
+  set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) ::
+                 storeptr RA XSP (fn_retaddr_ofs f) x0) in *.
+  set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
+  set (rs2 := nextinstr (rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef)).
+  exploit (storeptr_correct tge tf XSP (fn_retaddr_ofs f) RA x0 m2' m3' rs2).
+    simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR. 
+    change (rs2 X2) with sp. eexact P. 
+    simpl; congruence. congruence.
+  intros (rs3 & U & V & W).
+  assert (EXEC_PROLOGUE:
+            exec_straight tge tf
+              tf.(fn_code) rs0 m'
+              x0 rs3 m3').
+  { change (fn_code tf) with tfbody; unfold tfbody.
+    apply exec_straight_step with rs2 m2'.
+    unfold exec_instr. rewrite C. fold sp.
+    rewrite <- (sp_val _ _ _ AG). rewrite F. reflexivity.
+    reflexivity. 
+    eexact U. }
+  exploit exec_straight_steps_2; eauto using functions_transl. omega. constructor.
+  intros (ofs' & X & Y).                    
+  left; exists (State rs3 m3'); split.
+  eapply exec_straight_steps_1; eauto. omega. constructor.
+  econstructor; eauto.
+  rewrite X; econstructor; eauto. 
+  apply agree_exten with rs2; eauto with asmgen.
+  unfold rs2. 
+  apply agree_nextinstr. apply agree_set_other; auto with asmgen.
+  apply agree_change_sp with (parent_sp s). 
+  apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry. simpl.
+  simpl; intros; Simpl.
+  unfold sp; congruence.
+  intros. rewrite V by auto with asmgen. reflexivity.
+
+  rewrite W.
+  unfold rs2.
+  Simpl.
+  
+- (* external function *)
+  exploit functions_translated; eauto.
+  intros [tf [A B]]. simpl in B. inv B.
+  exploit extcall_arguments_match; eauto.
+  intros [args' [C D]].
+  exploit external_call_mem_extends; eauto.
+  intros [res' [m2' [P [Q [R S]]]]].
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_external; eauto.
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  econstructor; eauto.
+  unfold loc_external_result. apply agree_set_other; auto. apply agree_set_pair; auto.
+  apply agree_undef_caller_save_regs; auto. 
+
+- (* return *)
+  inv STACKS. simpl in *.
+  right. split. omega. split. auto.
+  rewrite <- ATPC in H5.
+  econstructor; eauto. congruence.
+  inv WF.
+  inv STACK.
+  inv H1.
+  congruence.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, Mach.initial_state prog st1 ->
+  exists st2, Asm.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. unfold ge0 in *.
+  econstructor; split.
+  econstructor.
+  eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+  replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
+     with (Vptr fb Ptrofs.zero).
+  econstructor; eauto.
+  constructor.
+  apply Mem.extends_refl.
+  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
+  intros. rewrite Regmap.gi. auto.
+  unfold Genv.symbol_address.
+  rewrite (match_program_main TRANSF).
+  rewrite symbols_preserved.
+  unfold ge; rewrite H1. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. constructor. assumption.
+  compute in H1. inv H1.
+  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
+Proof.
+  eapply forward_simulation_star with (measure := measure)
+         (match_states := fun S1 S2 => match_states S1 S2 /\ wf_state ge S1).
+  - apply senv_preserved.
+  - simpl; intros. exploit transf_initial_states; eauto.
+    intros (s2 & A & B).
+    exists s2; intuition auto. apply wf_initial; auto.
+  - simpl; intros. destruct H as [MS WF]. eapply transf_final_states; eauto.
+  - simpl; intros. destruct H0 as [MS WF]. 
+    exploit step_simulation; eauto. intros [ (s2' & A & B) | (A & B & C) ].
+    + left; exists s2'; intuition auto. eapply wf_step; eauto. 
+    + right; intuition auto. eapply wf_step; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
new file mode 100644
index 00000000..0e36bd05
--- /dev/null
+++ b/aarch64/Asmgenproof1.v
@@ -0,0 +1,2138 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for AArch64 code generation: auxiliary results. *)
+
+Require Import Recdef Coqlib Zwf Zbits.
+Require Import Maps Errors AST Integers Floats Values Memory Globalenvs.
+Require Import Op Locations Mach Asm Conventions.
+Require Import Asmgen.
+Require Import Asmgenproof0.
+
+Local Transparent Archi.ptr64.
+
+(** Properties of registers *)
+
+Lemma preg_of_not_RA:
+  forall r, (preg_of r) <> RA.
+Proof.
+  destruct r; discriminate.
+Qed.
+
+Lemma RA_not_written:
+  forall (rs : regset) dst v,
+    rs # (preg_of dst) <- v RA = rs RA.
+Proof.
+  intros.
+  apply Pregmap.gso.
+  intro.
+  symmetry in H.
+  exact (preg_of_not_RA dst H).
+Qed.
+
+Hint Resolve RA_not_written : asmgen.
+
+Lemma RA_not_written2:
+  forall (rs : regset) dst v i,
+    preg_of dst = i ->
+    rs # i <- v RA = rs RA.
+Proof.
+  intros.
+  subst i.
+  apply RA_not_written.
+Qed.
+
+Hint Resolve RA_not_written2 : asmgen.
+
+Lemma RA_not_written3:
+  forall (rs : regset) dst v i,
+    ireg_of dst = OK i ->
+    rs # i <- v RA = rs RA.
+Proof.
+  intros.
+  unfold ireg_of in H.
+  destruct preg_of eqn:PREG; try discriminate.
+  replace i0 with i in * by congruence.
+  eapply RA_not_written2; eassumption.
+Qed.
+
+Hint Resolve RA_not_written3 : asmgen.
+
+Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC.
+Proof.
+  destruct r; simpl; congruence.
+Qed.
+Hint Resolve preg_of_iregsp_not_PC: asmgen.
+
+Lemma preg_of_not_X16: forall r, preg_of r <> X16.
+Proof.
+  destruct r; simpl; congruence.
+Qed.
+
+Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16.
+Proof.
+  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+  red; intros; subst x. elim (preg_of_not_X16 r); auto.
+Qed.
+
+Lemma ireg_of_not_RA: forall r x, ireg_of r = OK x -> x <> RA.
+Proof.
+  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+  red; intros; subst x. elim (preg_of_not_RA r); auto.
+Qed.
+
+Lemma ireg_of_not_RA': forall r x, ireg_of r = OK x -> RA <> x.
+Proof.
+  intros. intro.
+  apply (ireg_of_not_RA r x); auto.
+Qed.
+
+Lemma ireg_of_not_RA'': forall r x, ireg_of r = OK x -> IR RA <> IR x.
+Proof.
+  intros. intro.
+  apply (ireg_of_not_RA' r x); auto. congruence.
+Qed.
+
+Hint Resolve ireg_of_not_RA ireg_of_not_RA' ireg_of_not_RA'' : asmgen.
+
+Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16.
+Proof.
+  intros. apply ireg_of_not_X16 in H. congruence.
+Qed.
+
+Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16': asmgen.
+
+(** Useful simplification tactic *)
+
+
+Ltac Simplif :=
+  ((rewrite nextinstr_inv by eauto with asmgen)
+  || (rewrite nextinstr_inv1 by eauto with asmgen)
+  || (rewrite Pregmap.gss)
+  || (rewrite nextinstr_pc)
+  || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+(** * Correctness of ARM constructor functions *)
+
+Section CONSTRUCTORS.
+
+Variable ge: genv.
+Variable fn: function.
+
+(** Decomposition of integer literals *)
+
+Inductive wf_decomposition: list (Z * Z) -> Prop :=
+  | wf_decomp_nil:
+      wf_decomposition nil
+  | wf_decomp_cons: forall m n p l,
+      n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l ->
+      wf_decomposition ((n, p) :: l).
+
+Lemma decompose_int_wf:
+  forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p).
+Proof.
+Local Opaque Zzero_ext.
+  induction N as [ | N]; simpl; intros.
+- constructor.
+- set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0).
++ apply IHN. omega.
++ econstructor. reflexivity. omega. apply IHN; omega. 
+Qed.
+
+Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z :=
+  match l with
+  | nil => accu
+  | (n, p) :: l => recompose_int (Zinsert accu n p 16) l
+  end.
+
+Lemma decompose_int_correct:
+  forall N n p accu,
+  0 <= p ->
+  (forall i, p <= i -> Z.testbit accu i = false) ->
+  (forall i, 0 <= i < p + Z.of_nat N * 16 ->
+   Z.testbit (recompose_int accu (decompose_int N n p)) i =
+   if zlt i p then Z.testbit accu i else Z.testbit n i).
+Proof.
+  induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE.
+- simpl. rewrite zlt_true; auto. xomega.
+- rewrite inj_S in RANGE. simpl.
+  set (frag := Zzero_ext 16 (Z.shiftr n p)).
+  assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)).
+  { unfold frag; intros. rewrite Zzero_ext_spec by omega. rewrite zlt_true by omega.
+    rewrite Z.shiftr_spec by omega. f_equal; omega. }
+  destruct (Z.eqb_spec frag 0).
++ rewrite IHN.
+* destruct (zlt i p). rewrite zlt_true by omega. auto.
+  destruct (zlt i (p + 16)); auto.
+  rewrite ABOVE by omega. rewrite FRAG by omega. rewrite e, Z.testbit_0_l. auto.
+* omega.
+* intros; apply ABOVE; omega.
+* xomega.
++ simpl. rewrite IHN.
+* destruct (zlt i (p + 16)).
+** rewrite Zinsert_spec by omega. unfold proj_sumbool.
+   rewrite zlt_true by omega.
+   destruct (zlt i p).
+   rewrite zle_false by omega. auto.
+   rewrite zle_true by omega. simpl. symmetry; apply FRAG; omega.
+** rewrite Z.ldiff_spec, Z.shiftl_spec by omega.
+   change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by omega.
+   rewrite zlt_false by omega. rewrite zlt_false by omega. apply andb_true_r. 
+* omega.
+* intros. rewrite Zinsert_spec by omega. unfold proj_sumbool.
+  rewrite zle_true by omega. rewrite zlt_false by omega. simpl.
+  apply ABOVE. omega.
+* xomega.
+Qed.
+
+Corollary decompose_int_eqmod: forall N n,
+  eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n.
+Proof.
+  intros; apply eqmod_same_bits; intros.
+  rewrite decompose_int_correct. apply zlt_false; omega. 
+  omega. intros; apply Z.testbit_0_l. xomega.
+Qed.
+
+Corollary decompose_notint_eqmod: forall N n,
+  eqmod (two_power_nat (N * 16)%nat)
+        (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n.
+Proof.
+  intros; apply eqmod_same_bits; intros.
+  rewrite Z.lnot_spec, decompose_int_correct.
+  rewrite zlt_false by omega. rewrite Z.lnot_spec by omega. apply negb_involutive.
+  omega. intros; apply Z.testbit_0_l. xomega. omega.
+Qed.
+
+Lemma negate_decomposition_wf:
+  forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l).
+Proof.
+  induction 1; simpl; econstructor; auto.
+  instantiate (1 := (Z.lnot m)).
+  apply equal_same_bits; intros.
+  rewrite H. change 65535 with (two_p 16 - 1).
+  rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by omega.
+  destruct (zlt i 16).
+  apply xorb_true_r.
+  auto.
+Qed.
+
+Lemma Zinsert_eqmod:
+  forall n x1 x2 y p l, 0 <= p -> 0 <= l ->
+  eqmod (two_power_nat n) x1 x2 ->
+  eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l).
+Proof.
+  intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by omega.
+  destruct (zle p i && zlt i (p + l)); auto.
+  apply same_bits_eqmod with n; auto.
+Qed.
+
+Lemma Zinsert_0_l:
+  forall y p l,
+  0 <= p -> 0 <= l ->
+  Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l.
+Proof.
+  intros. apply equal_same_bits; intros.
+  rewrite Zinsert_spec by omega. unfold proj_sumbool.
+  destruct (zlt i p); [rewrite zle_false by omega|rewrite zle_true by omega]; simpl.
+- rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto.
+- rewrite Z.shiftl_spec by omega. 
+  destruct (zlt i (p + l)); auto.
+  rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by omega. auto.
+Qed.
+
+Lemma recompose_int_negated:
+  forall l, wf_decomposition l ->
+  forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l).
+Proof.
+  induction 1; intros accu; simpl.
+- auto.
+- rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros. 
+  rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by omega.
+  unfold proj_sumbool.
+  destruct (zle p i); simpl; auto.
+  destruct (zlt i (p + 16)); simpl; auto.
+  change 65535 with (two_p 16 - 1).
+  rewrite Ztestbit_two_p_m1 by omega. rewrite zlt_true by omega.
+  apply xorb_true_r. 
+Qed.
+
+Lemma exec_loadimm_k_w:
+  forall (rd: ireg) k m l,
+  wf_decomposition l ->
+  rd <> RA ->
+  forall (rs: regset) accu,
+  rs#rd = Vint (Int.repr accu) ->
+  exists rs',
+     exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (recompose_int accu l))
+  /\ (forall r, r <> PC -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  induction 1; intros RD_NOT_RA rs accu ACCU; simpl.                          
+- exists rs; split. apply exec_straight_opt_refl. auto.
+- destruct (IHwf_decomposition RD_NOT_RA
+                (nextinstr (rs#rd <- (insert_in_int rs#rd n p 16)))
+                (Zinsert accu n p 16))
+  as (rs' & P & Q & R & S).
+  Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. 
+  apply Zinsert_eqmod. auto. omega. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
+  exists rs'; split.
+  eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
+  split. exact Q.
+  split.
+  { intros; Simpl.
+    rewrite R by auto. Simpl. }
+  { rewrite S. Simpl. }
+Qed.
+
+Lemma exec_loadimm_z_w:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  rd <> RA ->
+  exists rs',
+     exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (recompose_int 0 l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_z; destruct 1; intro RD_NOT_RA.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl.
+  intros; Simpl.
+- set (accu0 := Zinsert 0 n p 16).
+  set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
+  destruct (exec_loadimm_k_w rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R & S); auto.
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; omega.
+  reflexivity.
+  split. exact Q. 
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_w:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  rd <> RA ->
+  exists rs',
+     exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l)))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_n; destruct 1; intro RD_NOT_RA.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. 
+  intros; Simpl.
+- set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+  set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
+  destruct (exec_loadimm_k_w rd k m (negate_decomposition l) 
+                                    (negate_decomposition_wf l H1)
+                                    RD_NOT_RA rs1 accu0)
+    as (rs2 & P & Q & R & S).
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal.
+  unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; omega.
+  reflexivity.  
+  split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm32:
+  forall rd n k rs m
+  (RD_NOT_RA : rd <> RA),
+  exists rs',
+     exec_straight ge fn (loadimm32 rd n k) rs m k rs' m
+  /\ rs'#rd = Vint n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm32, loadimm; intros.
+  destruct (is_logical_imm32 n).
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto.
+  intros; Simpl.
+- set (dz := decompose_int 2%nat (Int.unsigned n) 0).
+  set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0).
+  assert (A: Int.repr (recompose_int 0 dz) = n).
+  { transitivity (Int.repr (Int.unsigned n)).
+    apply Int.eqm_samerepr. apply decompose_int_eqmod. 
+    apply Int.repr_unsigned. }
+  assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n).
+  { transitivity (Int.repr (Int.unsigned n)).
+    apply Int.eqm_samerepr. apply decompose_notint_eqmod. 
+    apply Int.repr_unsigned. }
+  destruct Nat.leb.
++ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. trivial.
++ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. trivial.
+Qed.
+
+Lemma exec_loadimm_k_x:
+  forall (rd: ireg) k m l,
+    wf_decomposition l ->
+    rd <> RA ->
+  forall (rs: regset) accu,
+  rs#rd = Vlong (Int64.repr accu) ->
+  exists rs',
+     exec_straight_opt ge fn (loadimm_k X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  induction 1; intros RD_NOT_RA rs accu ACCU; simpl.
+- exists rs; split. apply exec_straight_opt_refl. auto.
+- destruct (IHwf_decomposition RD_NOT_RA
+                (nextinstr (rs#rd <- (insert_in_long rs#rd n p 16)))
+                (Zinsert accu n p 16))
+  as (rs' & P & Q & R).
+  Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr. 
+  apply Zinsert_eqmod. auto. omega. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr.
+  exists rs'; split.
+  eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
+  split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_x:
+  forall rd l k rs m,
+  wf_decomposition l ->
+    rd <> RA ->
+  exists rs',
+     exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_z; destruct 1; intro RD_NOT_RA.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl.
+  intros; Simpl.
+- set (accu0 := Zinsert 0 n p 16).
+  set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
+  destruct (exec_loadimm_k_x rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R); auto.
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; omega.
+  reflexivity.
+  split. exact Q. 
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_x:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  rd <> RA ->
+  exists rs',
+     exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l)))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_n; destruct 1; intro RD_NOT_RA.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. 
+  intros; Simpl.
+- set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+  set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
+  destruct (exec_loadimm_k_x rd k m (negate_decomposition l) 
+                                    (negate_decomposition_wf l H1)
+                                    RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R).
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal.
+  unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; omega.
+  reflexivity.  
+  split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm64:
+  forall rd n k rs m,
+    rd <> RA ->
+  exists rs',
+     exec_straight ge fn (loadimm64 rd n k) rs m k rs' m
+  /\ rs'#rd = Vlong n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm64, loadimm; intros until m; intro RD_NOT_RA.
+  destruct (is_logical_imm64 n).
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto.
+  intros; Simpl.
+- set (dz := decompose_int 4%nat (Int64.unsigned n) 0).
+  set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0).
+  assert (A: Int64.repr (recompose_int 0 dz) = n).
+  { transitivity (Int64.repr (Int64.unsigned n)).
+    apply Int64.eqm_samerepr. apply decompose_int_eqmod. 
+    apply Int64.repr_unsigned. }
+  assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n).
+  { transitivity (Int64.repr (Int64.unsigned n)).
+    apply Int64.eqm_samerepr. apply decompose_notint_eqmod. 
+    apply Int64.repr_unsigned. }
+  destruct Nat.leb.
++ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. trivial.
++ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. trivial.
+Qed.
+
+(** Add immediate *)
+
+Lemma exec_addimm_aux_32:
+  forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) ->
+  (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) ->
+  forall rd r1 n k rs m,
+    (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vint n)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros insn sem SEM ASSOC; intros until m; intro RD_NOT_RA. unfold addimm_aux.
+  set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo).
+  assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; omega).
+  rewrite <- (Int.repr_unsigned n).
+  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; omega.
+  split; intros; Simpl.
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; omega.
+  split; intros; Simpl.
+- econstructor; split. eapply exec_straight_two.
+  apply SEM. apply SEM. Simpl. Simpl.
+  split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr.
+  rewrite E. auto with ints.
+  split; intros; Simpl.
+Qed.
+
+Lemma exec_addimm32:
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Val.add rs#r1 (Vint n)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros. unfold addimm32. set (nn := Int.neg n).
+  destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))].
+- apply exec_addimm_aux_32 with (sem := Val.add); auto. intros; apply Val.add_assoc. 
+- rewrite <- Val.sub_opp_add.
+  apply exec_addimm_aux_32 with (sem := Val.sub); auto.
+  intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto.
+- destruct (Int.lt n Int.zero).
++ rewrite <- Val.sub_opp_add; fold nn.
+  edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). congruence.
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite B, C; eauto with asmgen.
+  split; intros; Simpl.
++ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence.
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite B, C; eauto with asmgen.
+  split; intros; Simpl.
+Qed.
+
+Lemma exec_addimm_aux_64:
+  forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) ->
+  (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) ->
+  forall rd r1 n k rs m,
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vlong n)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+  set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo).
+  assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; omega).
+  rewrite <- (Int64.repr_unsigned n).
+  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; omega.
+  split; intros; Simpl.
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; omega.
+  split; intros; Simpl.
+- econstructor; split. eapply exec_straight_two.
+  apply SEM. apply SEM. Simpl. Simpl.
+  split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr.
+  rewrite E. auto with ints.
+  split; intros; Simpl.
+Qed.
+
+Lemma exec_addimm64:
+  forall rd r1 n k rs m,
+  preg_of_iregsp r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Val.addl rs#r1 (Vlong n)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros. 
+  unfold addimm64. set (nn := Int64.neg n).
+  destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))].
+- apply exec_addimm_aux_64 with (sem := Val.addl); auto. intros; apply Val.addl_assoc. 
+- rewrite <- Val.subl_opp_addl.
+  apply exec_addimm_aux_64 with (sem := Val.subl); auto.
+  intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto.
+- destruct (Int64.lt n Int64.zero).
++ rewrite <- Val.subl_opp_addl; fold nn.
+  edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). congruence.
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
+  split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+  split; intros; Simpl.
++ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence.
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
+  split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+  split; intros; Simpl.
+Qed.
+
+(** Logical immediate *)
+
+Lemma exec_logicalimm32:
+  forall (insn1: ireg -> ireg0 -> Z -> instruction)
+         (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+         (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn1 rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs##r1 (Vint (Int.repr n))))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_instr ge fn (insn2 rd r1 r2 s) rs m =
+      Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) ->
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vint n)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32.
+  destruct (is_logical_imm32 n).
+- econstructor; split. 
+  apply exec_straight_one. apply SEM1. reflexivity. 
+  split. Simpl. rewrite Int.repr_unsigned; auto.
+  split; intros; Simpl.
+- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence.
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.  
+  apply exec_straight_one. apply SEM2. reflexivity.
+  split. Simpl. f_equal; auto. apply C; auto with asmgen.
+  split; intros; Simpl. 
+Qed.
+
+Lemma exec_logicalimm64:
+  forall (insn1: ireg -> ireg0 -> Z -> instruction)
+         (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+         (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn1 rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n))))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_instr ge fn (insn2 rd r1 r2 s) rs m =
+      Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vlong n)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64.
+  destruct (is_logical_imm64 n).
+- econstructor; split. 
+  apply exec_straight_one. apply SEM1. reflexivity. 
+  split. Simpl. rewrite Int64.repr_unsigned. auto.
+  split; intros; Simpl.
+- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence.
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.  
+  apply exec_straight_one. apply SEM2. reflexivity.
+  split. Simpl. f_equal; auto. apply C; auto with asmgen.
+  split; intros; Simpl. 
+Qed.
+
+(** Load address of symbol *)
+
+Lemma exec_loadsymbol: forall rd s ofs k rs m,
+    rd <> X16 \/ Archi.pic_code tt = false ->
+    (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m
+  /\ rs'#rd = Genv.symbol_address ge s ofs
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs'#RA = rs#RA.
+Proof.
+  unfold loadsymbol; intros. destruct (Archi.pic_code tt).
+- predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
++ subst ofs. econstructor; split.
+  apply exec_straight_one; [simpl; eauto | reflexivity].
+  split. Simpl. split; intros; Simpl.
+  
++ exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence.
+  instantiate (1 := rd). assumption.
+  intros (rs1 & A & B & C & D).
+  econstructor; split.
+  econstructor. simpl; eauto. auto. eexact A. 
+  split. simpl in B; rewrite B. Simpl. 
+  rewrite <- Genv.shift_symbol_address_64 by auto.
+  rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
+  split; intros. rewrite C by auto; Simpl.
+  rewrite D. Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+  split. Simpl. rewrite symbol_high_low; auto. 
+  split; intros; Simpl.
+Qed.
+
+(** Shifted operands *)
+
+Remark transl_shift_not_none:
+  forall s a, transl_shift s a <> SOnone.
+Proof.
+  destruct s; intros; simpl; congruence.
+Qed.
+
+Remark or_zero_eval_shift_op_int:
+  forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto.
+Qed.
+
+Remark or_zero_eval_shift_op_long:
+  forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto.
+Qed.
+
+Remark add_zero_eval_shift_op_long:
+  forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto.
+Qed.
+
+Lemma transl_eval_shift: forall s v (a: amount32),
+  eval_shift_op_int v (transl_shift s a) = eval_shift s v a.
+Proof.
+  intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shift': forall s v (a: amount32),
+  Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a.
+Proof.
+  intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none).
+  apply transl_eval_shift.
+Qed.
+
+Lemma transl_eval_shiftl: forall s v (a: amount64),
+  eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a.
+Proof.
+  intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shiftl': forall s v (a: amount64),
+  Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+  intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none).
+  apply transl_eval_shiftl.
+Qed.
+
+Lemma transl_eval_shiftl'': forall s v (a: amount64),
+  Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+  intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none).
+  apply transl_eval_shiftl.
+Qed.
+
+(** Zero- and Sign- extensions *)
+
+Lemma exec_move_extended_base: forall rd r1 ex k rs m,
+  exists rs',
+     exec_straight ge fn (move_extended_base rd r1 ex k) rs m k rs' m
+  /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold move_extended_base; destruct ex; econstructor;
+  (split; [apply exec_straight_one; [simpl;eauto|auto] | split; [Simpl|intros;Simpl]]).
+Qed.
+
+Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m,
+  exists rs',
+     exec_straight ge fn (move_extended rd r1 ex a k) rs m k rs' m
+  /\ rs' rd = Op.eval_extend ex rs#r1 a
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero.
+- exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B.
+  destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto.
+  auto.
+- Local Opaque Val.addl.
+  exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  unfold exec_instr. change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto.
+  split. Simpl. rewrite B. auto. 
+  intros; Simpl.
+Qed.
+
+Lemma exec_arith_extended:
+  forall (sem: val -> val -> val)
+         (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
+         (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction),
+  (forall rd r1 r2 x rs m,
+    exec_instr ge fn (insnX rd r1 r2 x) rs m =
+      Next (nextinstr (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x)))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_instr ge fn (insnS rd r1 r2 s) rs m =
+      Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
+  forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m,
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a)
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)).
+- econstructor; split. 
+  apply exec_straight_one. rewrite EX; eauto. auto.
+  split. Simpl. f_equal. destruct ex; auto.
+  split; intros; Simpl.
+- exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+  rewrite ES. eauto. auto.
+  split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal. 
+  rewrite B. destruct ex; auto.
+  split; intros; Simpl.
+Qed. 
+
+(** Extended right shift *)
+
+Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = Some v ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx32; intros. apply Val.shrx_shr_3 in H.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split. Simpl. subst v; auto.
+  split; intros; Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: simpl; auto.
+    split.
+    ** subst v; Simpl.
+       destruct (Val.add _ _); simpl; trivial.
+       change (Int.ltu Int.one Int.iwordsize) with true; simpl.
+       rewrite Int.or_zero_l.
+       reflexivity.
+    ** split; intros; Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    simpl; eauto.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    auto. auto. auto.
+    split. subst v; Simpl.
+    split; intros; Simpl.
+Qed.
+ 
+Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = Some v ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx64; intros. apply Val.shrxl_shrl_3 in H.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split. Simpl. subst v; auto.
+  split; intros; Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: simpl; auto.
+    split.
+    ** subst v; Simpl.
+       destruct (Val.addl _ _); simpl; trivial.
+       change (Int.ltu Int.one Int64.iwordsize') with true; simpl.
+       rewrite Int64.or_zero_l.
+       reflexivity.
+    ** split; intros; Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    simpl; eauto.
+    unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    auto. auto. auto.
+    split. subst v; Simpl.
+    split; intros; Simpl.
+Qed.
+
+(** Condition bits *)
+
+Lemma compare_int_spec: forall rs v1 v2 m,
+  let rs' := compare_int rs v1 v2 m in
+     rs'#CN = (Val.negative (Val.sub v1 v2))
+  /\ rs'#CZ = (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
+  /\ rs'#CC = (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
+  /\ rs'#CV = (Val.sub_overflow v1 v2).
+Proof.
+  intros; unfold rs'; auto.
+Qed.
+
+Lemma eval_testcond_compare_sint: forall c v1 v2 b rs m,
+  Val.cmp_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_int_spec rs v1 v2 m). 
+  set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.cmpu; simpl. destruct c; simpl.
+- destruct (Int.eq i i0); auto.
+- destruct (Int.eq i i0); auto.
+- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+- rewrite Int.lt_sub_overflow, Int.not_lt.
+  destruct (Int.eq i i0), (Int.lt i i0); auto.
+- rewrite Int.lt_sub_overflow, (Int.lt_not i). 
+  destruct (Int.eq i i0), (Int.lt i i0); auto.
+- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_uint: forall c v1 v2 b rs m,
+  Val.cmpu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
+  eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_int_spec rs v1 v2 m). 
+  set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.cmpu; simpl. destruct c; simpl.
+- destruct (Int.eq i i0); auto.
+- destruct (Int.eq i i0); auto.
+- destruct (Int.ltu i i0); auto.
+- rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+- rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+- destruct (Int.ltu i i0); auto.
+Qed.
+
+Lemma compare_long_spec: forall rs v1 v2 m,
+  let rs' := compare_long rs v1 v2 m in
+     rs'#CN = (Val.negativel (Val.subl v1 v2))
+  /\ rs'#CZ = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2))
+  /\ rs'#CC = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2))
+  /\ rs'#CV = (Val.subl_overflow v1 v2).
+Proof.
+  intros; unfold rs'; auto.
+Qed.
+
+Remark int64_sub_overflow:
+  forall x y,
+  Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero)))
+          (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) =
+  (if Int64.lt x y then Int.one else Int.zero).
+Proof.
+  intros.
+  transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))).
+  rewrite <- (Int64.lt_sub_overflow x y).
+  unfold Int64.sub_overflow, Int64.negative.
+  set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero).
+  destruct (zle Int64.min_signed s && zle s Int64.max_signed);
+  destruct (Int64.lt (Int64.sub x y) Int64.zero);
+  auto.
+  destruct (Int64.lt x y); auto.
+Qed.
+
+Lemma eval_testcond_compare_slong: forall c v1 v2 b rs m,
+  Val.cmpl_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_long_spec rs v1 v2 m). 
+  set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.cmplu; simpl. destruct c; simpl.
+- destruct (Int64.eq i i0); auto.
+- destruct (Int64.eq i i0); auto.
+- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+- rewrite int64_sub_overflow, Int64.not_lt.
+  destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+- rewrite int64_sub_overflow, (Int64.lt_not i). 
+  destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs m,
+  Val.cmplu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
+  eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_long_spec rs v1 v2 m). 
+  set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E; unfold Val.cmplu.
+  destruct v1; try discriminate; destruct v2; try discriminate; simpl in H.
+- (* int-int *)
+  inv H. destruct c; simpl.
++ destruct (Int64.eq i i0); auto.
++ destruct (Int64.eq i i0); auto.
++ destruct (Int64.ltu i i0); auto.
++ rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
++ rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
++ destruct (Int64.ltu i i0); auto.
+- (* int-ptr *)
+  simpl.
+  destruct (Int64.eq i Int64.zero &&
+            (Mem.valid_pointer m b0 (Ptrofs.unsigned i0)
+              || Mem.valid_pointer m b0 (Ptrofs.unsigned i0 - 1))); try discriminate.
+  destruct c; simpl in H; inv H; reflexivity.
+- (* ptr-int *)
+  simpl.
+  destruct (Int64.eq i0 Int64.zero &&
+            (Mem.valid_pointer m b0 (Ptrofs.unsigned i)
+              || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); try discriminate.
+  destruct c; simpl in H; inv H; reflexivity.
+- (* ptr-ptr *)
+  simpl. 
+  destruct (eq_block b0 b1).
++ destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i)
+             || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1)) &&
+            (Mem.valid_pointer m b1 (Ptrofs.unsigned i0)
+             || Mem.valid_pointer m b1 (Ptrofs.unsigned i0 - 1)));
+  inv H.
+  destruct c; simpl.
+* destruct (Ptrofs.eq i i0); auto.
+* destruct (Ptrofs.eq i i0); auto.
+* destruct (Ptrofs.ltu i i0); auto.
+* rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+* rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+* destruct (Ptrofs.ltu i i0); auto.
++ destruct (Mem.valid_pointer m b0 (Ptrofs.unsigned i) &&
+            Mem.valid_pointer m b1 (Ptrofs.unsigned i0)); try discriminate.
+  destruct c; simpl in H; inv H; reflexivity.
+Qed.
+
+Lemma compare_float_spec: forall rs f1 f2,
+  let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in
+     rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2))
+  /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2))
+  /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2)))
+  /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))).
+Proof.
+  intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_float: forall c v1 v2 b rs,
+  Val.cmpf_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
+  generalize (compare_float_spec rs f f0). 
+  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+  unfold Float.cmp, Float.ordered;
+  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs,
+  option_map negb (Val.cmpf_bool c v1 v2) = Some b ->
+  eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+  generalize (compare_float_spec rs f f0). 
+  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+  unfold Float.cmp, Float.ordered;
+  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma compare_single_spec: forall rs f1 f2,
+  let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in
+     rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2))
+  /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2))
+  /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+  /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))).
+Proof.
+  intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_single: forall c v1 v2 b rs,
+  Val.cmpfs_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
+  generalize (compare_single_spec rs f f0). 
+  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+  unfold Float32.cmp, Float32.ordered;
+  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs,
+  option_map negb (Val.cmpfs_bool c v1 v2) = Some b ->
+  eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+  generalize (compare_single_spec rs f f0). 
+  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+  unfold Float32.cmp, Float32.ordered;
+  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Remark compare_float_inv: forall rs v1 v2 r,
+  match r with CR _ => False | _ => True end ->
+  (nextinstr (compare_float rs v1 v2))#r = (nextinstr rs)#r.
+Proof.
+  intros; unfold compare_float.
+  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+Remark compare_single_inv: forall rs v1 v2 r,
+  match r with CR _ => False | _ => True end ->
+  (nextinstr (compare_single rs v1 v2))#r = (nextinstr rs)#r.
+Proof.
+  intros; unfold compare_single.
+  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+(** Translation of conditionals *)
+
+Ltac ArgsInv :=
+  repeat (match goal with
+  | [ H: Error _ = OK _ |- _ ] => discriminate
+  | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
+  | [ H: bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  end);
+  subst;
+  repeat (match goal with
+  | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
+  | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
+  end).
+
+Lemma compare_int_RA:
+  forall rs a b m,
+    compare_int rs a b m X30 = rs X30.
+Proof.
+  unfold compare_int.
+  intros.
+  repeat rewrite Pregmap.gso by congruence.
+  trivial.
+Qed.
+
+Hint Resolve compare_int_RA : asmgen.
+
+Lemma compare_long_RA:
+  forall rs a b m,
+    compare_long rs a b m X30 = rs X30.
+Proof.
+  unfold compare_long.
+  intros.
+  repeat rewrite Pregmap.gso by congruence.
+  trivial.
+Qed.
+
+Hint Resolve compare_long_RA : asmgen.
+
+Lemma compare_float_RA:
+  forall rs a b,
+    compare_float rs a b X30 = rs X30.
+Proof.
+  unfold compare_float.
+  intros.
+  destruct a; destruct b.
+  all: repeat rewrite Pregmap.gso by congruence; trivial.
+Qed.
+
+Hint Resolve compare_float_RA : asmgen.
+  
+
+Lemma compare_single_RA:
+  forall rs a b,
+    compare_single rs a b X30 = rs X30.
+Proof.
+  unfold compare_single.
+  intros.
+  destruct a; destruct b.
+  all: repeat rewrite Pregmap.gso by congruence; trivial.
+Qed.
+
+Hint Resolve compare_single_RA : asmgen.
+  
+                                         
+Lemma transl_cond_correct:
+  forall cond args k c rs m,
+  transl_cond cond args k = OK c ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ (forall b,
+      eval_condition cond (map rs (map preg_of args)) m = Some b ->
+      eval_testcond (cond_for_cond cond) rs' = Some b)
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros until m; intros TR. destruct cond; simpl in TR; ArgsInv.
+- (* Ccomp *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccompu *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccompimm *)
+  destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_sint; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate.
+  auto with asmgen.
+  Simpl. rewrite compare_int_RA.
+  apply C; congruence.
+- (* Ccompuimm *)
+  destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_uint; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  Simpl. rewrite compare_int_RA.
+  apply C; congruence.  
+- (* Ccompshift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccompushift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Cmaskzero *)
+  destruct (is_logical_imm32 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply (eval_testcond_compare_sint Ceq); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  Simpl. rewrite compare_int_RA.
+  apply C; congruence.
+
+- (* Cmasknotzero *)
+  destruct (is_logical_imm32 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
+  destruct r; reflexivity || discriminate.
+
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply (eval_testcond_compare_sint Cne); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  Simpl. rewrite compare_int_RA.
+  apply C; congruence.
+  
+- (* Ccompl *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccomplu *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccomplimm *)
+  destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_slong; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  Simpl. rewrite compare_long_RA.
+  apply C; congruence.
+
+- (* Ccompluimm *)
+  destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply eval_testcond_compare_ulong; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  Simpl. rewrite compare_long_RA.
+  apply C; congruence.
+  
+- (* Ccomplshift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccomplushift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Cmasklzero *)
+  destruct (is_logical_imm64 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply (eval_testcond_compare_slong Ceq); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  Simpl. rewrite compare_long_RA.
+  apply C; congruence.
+
+- (* Cmasknotzero *)
+  destruct (is_logical_imm64 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  repeat split; intros. apply (eval_testcond_compare_slong Cne); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+    Simpl. rewrite compare_long_RA.
+  apply C; congruence.
+
+- (* Ccompf *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+  Simpl. 
+- (* Cnotcompf *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_not_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+  Simpl. 
+- (* Ccompfzero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+  Simpl.
+- (* Cnotcompfzero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_not_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+  Simpl.
+- (* Ccompfs *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+  Simpl.
+- (* Cnotcompfs *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_not_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+  Simpl.
+- (* Ccompfszero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+  Simpl.
+- (* Cnotcompfszero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  repeat split; intros. apply eval_testcond_compare_not_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+  Simpl.
+Qed.
+
+(** Translation of conditional branches *)
+
+Lemma transl_cond_branch_correct:
+  forall cond args lbl k c rs m b,
+  transl_cond_branch cond args lbl k = OK c ->
+  eval_condition cond (map rs (map preg_of args)) m = Some b ->
+  exists rs' insn,
+     exec_straight_opt ge fn c rs m (insn :: k) rs' m
+  /\ exec_instr ge fn insn rs' m =
+         (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros until b; intros TR EV.
+  assert (DFL:
+    transl_cond_branch_default cond args lbl k = OK c ->
+    exists rs' insn,
+       exec_straight_opt ge fn c rs m (insn :: k) rs' m
+    /\ exec_instr ge fn insn rs' m =
+         (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
+    /\ (forall r, data_preg r = true -> rs'#r = rs#r)
+     /\ rs' # RA = rs # RA ).
+  {
+    unfold transl_cond_branch_default; intros.
+    exploit transl_cond_correct; eauto. intros (rs' & A & B & C & D).
+    exists rs', (Pbc (cond_for_cond cond) lbl); split.
+    apply exec_straight_opt_intro. eexact A.
+    repeat split; auto. simpl. rewrite (B b) by auto. auto.
+  }
+Local Opaque transl_cond transl_cond_branch_default.
+  destruct args as [ | a1 args]; simpl in TR; auto.
+  destruct args as [ | a2 args]; simpl in TR; auto.
+  destruct cond; simpl in TR; auto.
+- (* Ccompimm *)
+  destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; 
+  apply Int.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccompimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto.
++ (* Ccompimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int.eq i Int.zero); auto.
+- (* Ccompuimm *)
+  destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto;
+  apply Int.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccompuimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite EV. auto.
++ (* Ccompuimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto.
+- (* Cmaskzero *)
+  destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+  rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto.
+- (* Cmasknotzero *)
+  destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+  rewrite EV. auto.
+- (* Ccomplimm *)
+  destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; 
+  apply Int64.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccomplimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto.
++ (* Ccomplimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int64.eq i Int64.zero); auto.
+- (* Ccompluimm *)
+  destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto;
+  apply Int64.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccompluimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite EV. auto.
++ (* Ccompluimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto.
+- (* Cmasklzero *)
+  destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+  rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto.
+- (* Cmasklnotzero *)
+  destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+  rewrite EV. auto.
+Qed.
+
+(** Translation of arithmetic operations *)
+
+Ltac SimplEval H :=
+  match type of H with
+  | Some _ = None _ => discriminate
+  | Some _ = Some _ => inv H
+  | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
+end.
+
+Ltac TranslOpSimpl :=
+  econstructor; split;
+  [ apply exec_straight_one; [simpl; eauto | reflexivity]
+  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl;
+             apply Val.lessdef_same; Simpl; fail
+           | split; [ intros; Simpl; fail
+                    | intros; Simpl; eauto with asmgen; fail] ]].
+
+Ltac TranslOpBase :=
+  econstructor; split;
+  [ apply exec_straight_one; [simpl; eauto | reflexivity]
+  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl
+           | split; [ intros; Simpl; fail
+                    | intros; Simpl; eapply RA_not_written2; eauto] ]].
+
+Lemma transl_op_correct:
+  forall op args res k (rs: regset) m v c,
+  transl_op op args res k = OK c ->
+  eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ Val.lessdef v rs'#(preg_of res)
+  /\ (forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r)
+  /\ rs' RA = rs RA.
+Proof.
+Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
+  intros until c; intros TR EV.
+  unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
+- (* move *)
+  destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR.
+  all: TranslOpSimpl.
+- (* intconst *)
+  exploit exec_loadimm32. apply (ireg_of_not_RA res); eassumption.
+  intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. rewrite B; auto.
+  split. intros; auto with asmgen.
+  apply C. congruence.
+  eapply ireg_of_not_RA''; eauto.
+- (* longconst *)
+  exploit exec_loadimm64. apply (ireg_of_not_RA res); eassumption.
+  intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. rewrite B; auto.
+  split. intros; auto with asmgen.
+  apply C. congruence.
+  eapply ireg_of_not_RA''; eauto.
+- (* floatconst *)
+  destruct (Float.eq_dec n Float.zero).
++ subst n. TranslOpSimpl. 
++ TranslOpSimpl.
+- (* singleconst *)
+  destruct (Float32.eq_dec n Float32.zero).
++ subst n. TranslOpSimpl. 
++ TranslOpSimpl.
+- (* loadsymbol *)
+  exploit (exec_loadsymbol x id ofs). eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption. 
+  intros (rs' & A & B & C & D).
+  exists rs'; split. eexact A. split. rewrite B; auto.
+  split; auto.
+- (* addrstack *)
+  exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption. 
+  intros (rs' & A & B & C & D).
+  exists rs'; split. eexact A. split. simpl in B; rewrite B.
+Local Transparent Val.addl.
+  destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+  auto.
+- (* shift *)
+  rewrite <- transl_eval_shift'. TranslOpSimpl.
+- (* addimm *)
+  exploit (exec_addimm32 x x0 n). eauto with asmgen. eapply ireg_of_not_RA''; eassumption.
+  intros (rs' & A & B & C & D).
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* mul *)
+  TranslOpBase.
+Local Transparent Val.add.
+  destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto.
+- (* andimm *)
+  exploit (exec_logicalimm32 (Pandimm W) (Pand W)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D). 
+  exists rs'; split. eexact A. split. rewrite B; auto.
+  split; auto.
+- (* orimm *)
+  exploit (exec_logicalimm32 (Porrimm W) (Porr W)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D). 
+  exists rs'; split. eexact A. split. rewrite B; auto.
+  split; auto.
+- (* xorimm *)
+  exploit (exec_logicalimm32 (Peorimm W) (Peor W)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* not *)
+  TranslOpBase.
+  destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto.
+- (* notshift *)
+  TranslOpBase.
+  destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
+- (* shrx *)
+  exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D).
+  econstructor; split. eexact A. split. rewrite B; auto.
+  split; auto.
+- (* zero-ext *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+- (* sign-ext *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+- (* shlzext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range.
+- (* shlsext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range.
+- (* zextshr *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range.
+- (* sextshr *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range.
+- (* shiftl *)
+  rewrite <- transl_eval_shiftl'. TranslOpSimpl.
+- (* extend *)
+  exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C).
+  econstructor; split. eexact A. 
+  split. rewrite B; auto.
+  split; eauto with asmgen.
+- (* addext *)
+  exploit (exec_arith_extended Val.addl Paddext (Padd X)).
+  auto. auto. instantiate (1 := x1). eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D).
+  econstructor; split. eexact A. split. rewrite B; auto.
+  split; auto.
+- (* addlimm *)
+  exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence.
+  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D).
+  exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto.
+- (* subext *)
+  exploit (exec_arith_extended Val.subl Psubext (Psub X)).
+  auto. auto. instantiate (1 := x1). eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D).
+  econstructor; split. eexact A. split. rewrite B; auto.
+  split; auto.
+- (* mull *)
+  TranslOpBase.
+  destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto.
+- (* andlimm *)
+  exploit (exec_logicalimm64 (Pandimm X) (Pand X)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* orlimm *)
+  exploit (exec_logicalimm64 (Porrimm X) (Porr X)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* xorlimm *)
+  exploit (exec_logicalimm64 (Peorimm X) (Peor X)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C & D). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* notl *)
+  TranslOpBase.
+  destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto.
+- (* notlshift *)
+  TranslOpBase.
+  destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
+- (* shrx *)
+  exploit (exec_shrx64 x x0 n); eauto with asmgen.
+  apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
+  econstructor; split. eexact A. split. rewrite B; auto. auto.
+- (* zero-ext-l *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+- (* sign-ext-l *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+- (* shllzext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range.
+- (* shllsext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range.
+- (* zextshrl *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range.
+- (* sextshrl *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range.
+- (* condition *)
+  exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+  rewrite (B b) by auto. auto. 
+  auto.
+  split; intros; Simpl.
+- (* select *)
+  destruct (preg_of res) eqn:RES; monadInv TR.
+  + (* integer *)
+    generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+    rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+    rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+    auto.
+    split; intros; Simpl.
+    rewrite <- D.
+    eapply RA_not_written2; eassumption.
+  + (* FP *)
+    generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+    rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+    rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+    auto.
+    split; intros; Simpl.
+Qed.
+
+(** Translation of addressing modes, loads, stores *)
+
+Lemma transl_addressing_correct:
+  forall sz addr args (insn: Asm.addressing -> instruction) k (rs: regset) m c b o,
+  transl_addressing sz addr args insn k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) ->
+  exists ad rs',
+     exec_straight_opt ge fn c rs m (insn ad :: k) rs' m
+  /\ Asm.eval_addressing ge ad rs' = Vptr b o
+  /\ (forall r, data_preg r = true -> rs' r = rs r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros until o; intros TR EV.
+  unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV.
+- (* Aindexed *)
+  destruct (offset_representable sz ofs); inv EQ0.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto.
++ exploit (exec_loadimm64 X16 ofs). congruence. intros (rs' & A & B & C).
+  econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A.
+  split. simpl. rewrite B, C by eauto with asmgen. auto.
+  split; eauto with asmgen.
+- (* Aindexed2 *)
+  econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto.
+- (* Aindexed2shift *)
+  destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2.
++ apply Int.same_if_eq in E. rewrite E.
+  econstructor; econstructor; split. apply exec_straight_opt_refl.
+  split; auto. simpl.
+  rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate.
+  unfold Val.shll. rewrite Int64.shl'_zero. auto.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto. 
++ econstructor; econstructor; split.
+  apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
+  split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto.
+  split; intros; Simpl.
+- (* Aindexed2ext *)
+  destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  split; auto. destruct x; auto.
++ exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto.
+  instantiate (1 := x0). eauto with asmgen.
+  instantiate (1 := X16). simpl. congruence.
+  intros (rs' & A & B & C & D).
+  econstructor; exists rs'; split.
+  apply exec_straight_opt_intro. eexact A. 
+  split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal.
+  unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range;
+  simpl; rewrite Int64.add_zero; auto.
+  split; intros.
+  apply C; eauto with asmgen.
+  trivial.
+- (* Aglobal *)
+  destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR.
++ econstructor; econstructor; split.
+  apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
+  split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence.
+  split; intros; Simpl.
++ exploit (exec_loadsymbol X16 id ofs). auto.
+  simpl. congruence.
+  intros (rs' & A & B & C & D).
+  econstructor; exists rs'; split.
+  apply exec_straight_opt_intro. eexact A.
+  split. simpl. 
+  rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. 
+  simpl in EV. congruence. 
+  split; auto with asmgen.
+- (* Ainstrack *)
+  assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o).
+  { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. 
+    rewrite Ptrofs.of_int64_to_int64 by auto. auto. }   
+  destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto.
++ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)).
+  simpl. congruence.
+  intros (rs' & A & B & C).
+  econstructor; exists rs'; split.
+  apply exec_straight_opt_intro. eexact A.
+  split. simpl. rewrite B, C by eauto with asmgen. auto.
+  auto with asmgen.
+Qed.
+
+Lemma transl_load_correct:
+  forall chunk addr args dst k c (rs: regset) m vaddr v,
+  transl_load TRAP chunk addr args dst k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+  Mem.loadv chunk m vaddr = Some v ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  intros. destruct vaddr; try discriminate. 
+  assert (A: exists sz insn,
+                transl_addressing sz addr args insn k = OK c
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m)).
+  {
+    destruct chunk; monadInv H;
+    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
+    do 2 econstructor; (split; [eassumption|auto]).
+  }
+  destruct A as (sz & insn & B & C).
+  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S).
+  assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m =
+             Next (nextinstr (rs'#(preg_of dst) <- v)) m).
+  { unfold exec_load. rewrite Q, H1. auto. }
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact P.
+  apply exec_straight_one. rewrite C, X; eauto. Simpl. 
+  split. Simpl.
+  split; intros; Simpl.
+  rewrite <- S.
+  apply RA_not_written.
+Qed.
+
+Lemma transl_store_correct:
+  forall chunk addr args src k c (rs: regset) m vaddr m',
+  transl_store chunk addr args src k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+  Mem.storev chunk m vaddr rs#(preg_of src) = Some m' ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m'
+     /\ (forall r, data_preg r = true -> rs' r = rs r)
+     /\ rs' # RA = rs # RA.
+Proof.
+  intros. destruct vaddr; try discriminate. 
+  set (chunk' := match chunk with Mint8signed => Mint8unsigned
+                                | Mint16signed => Mint16unsigned
+                                | _ => chunk end).
+  assert (A: exists sz insn,
+                transl_addressing sz addr args insn k = OK c
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_store ge chunk' ad rs'#(preg_of src) rs' m)).
+  {
+    unfold chunk'; destruct chunk; monadInv H;
+    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
+    do 2 econstructor; (split; [eassumption|auto]).
+  }
+  destruct A as (sz & insn & B & C).
+  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S).                                                  
+  assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m').
+  { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry.
+    apply Mem.store_signed_unsigned_8.
+    apply Mem.store_signed_unsigned_16. }
+  assert (Y: exec_store ge chunk' ad rs'#(preg_of src) rs' m =
+             Next (nextinstr rs') m').
+  { unfold exec_store. rewrite Q, R, X by auto with asmgen. auto. }
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact P.
+  apply exec_straight_one. rewrite C, Y; eauto. Simpl. 
+  split; intros; Simpl.
+Qed.
+
+(** Translation of indexed memory accesses *)
+
+Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i,
+  preg_of_iregsp base <> IR X16 ->
+  Val.offset_ptr rs#base ofs = Vptr b i ->
+  exists ad rs',
+     exec_straight_opt ge fn (indexed_memory_access insn sz base ofs k) rs m (insn ad :: k) rs' m
+  /\ Asm.eval_addressing ge ad rs' = Vptr b i
+  /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+  unfold indexed_memory_access; intros.
+  assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i).
+  { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
+  destruct offset_representable.
+- econstructor; econstructor; split. apply exec_straight_opt_refl. auto. 
+- exploit (exec_loadimm64 X16); eauto.
+  simpl. congruence.
+  intros (rs' & A & B & C).
+  econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A.
+  split. simpl. rewrite B, C by eauto with asmgen. auto. auto.
+Qed.
+
+Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset),
+  Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m
+  /\ rs'#dst = v
+  /\ (forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r).
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto.
+  split. Simpl.
+  intros; Simpl.
+Qed.
+
+Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset),
+  Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
+  preg_of_iregsp base <> IR X16 ->
+  src <> X16 ->
+  exists rs',
+     exec_straight ge fn (storeptr src base ofs k) rs m k rs' m'
+  /\ (forall r, r <> PC -> r <> X16 -> rs' r = rs r)
+  /\ rs' RA = rs RA.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto.
+  split; intros; Simpl.
+Qed.
+
+Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
+  loadind base ofs ty dst k = OK c ->
+  Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r)
+  /\ rs' RA = rs RA.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  assert (X: exists sz insn,
+                c = indexed_memory_access insn sz base ofs k
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_load ge (chunk_of_type ty) (fun v => v) ad (preg_of dst) rs' m)).
+  {
+    unfold loadind in H; destruct ty; destruct (preg_of dst); inv H; do 2 econstructor; eauto.
+  }
+  destruct X as (sz & insn & EQ & SEM). subst c.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl.
+  split. Simpl.
+  split. intros; Simpl.
+  Simpl. rewrite RA_not_written.
+  apply C; congruence.
+Qed.
+
+Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
+  storeind src base ofs ty k = OK c ->
+  Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m'
+     /\ (forall r, data_preg r = true -> rs' r = rs r)
+     /\ rs' RA = rs RA.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  assert (X: exists sz insn,
+                c = indexed_memory_access insn sz base ofs k
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_store ge (chunk_of_type ty) ad rs'#(preg_of src) rs' m)).
+  {
+    unfold storeind in H; destruct ty; destruct (preg_of src); inv H; do 2 econstructor; eauto.
+  }
+  destruct X as (sz & insn & EQ & SEM). subst c.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. rewrite SEM.
+  unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto.
+  Simpl.
+  split. intros; Simpl.
+  Simpl. 
+Qed.
+
+Lemma make_epilogue_correct:
+  forall ge0 f m stk soff cs m' ms rs k tm,
+  (is_leaf_function f = true -> rs # (IR RA) = parent_ra cs) ->
+  load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
+  ((* FIXME is_leaf_function f = false -> *) load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs)) ->
+  Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+  agree ms (Vptr stk soff) rs ->
+  Mem.extends m tm ->
+  match_stack ge0 cs ->
+  exists rs', exists tm',
+     exec_straight ge fn (make_epilogue f k) rs tm k rs' tm'
+  /\ agree ms (parent_sp cs) rs'
+  /\ Mem.extends m' tm'
+  /\ rs'#RA = parent_ra cs
+  /\ rs'#SP = parent_sp cs
+  /\ (forall r, r <> PC -> r <> SP -> r <> RA -> r <> X16 -> rs'#r = rs#r).
+Proof.
+  intros until tm; intros LEAF_RA LP LRA FREE AG MEXT MCS.
+
+  (* FIXME
+  Cannot be used at this point
+  destruct (is_leaf_function f) eqn:IS_LEAF.
+  {
+  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+  unfold make_epilogue.
+  rewrite IS_LEAF.
+
+  econstructor; econstructor; split.
+  apply exec_straight_one. simpl.
+  rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
+    rewrite FREE'. eauto. auto. 
+  split. apply agree_nextinstr. apply agree_set_other; auto.
+  apply agree_change_sp with (Vptr stk soff).
+  apply agree_exten with rs; auto.
+  eapply parent_sp_def; eauto.
+  split. auto.
+  split. Simpl.
+  split. Simpl.
+  intros. Simpl.
+  }
+  lapply LRA. 2: reflexivity.
+  clear LRA. intro LRA. *)
+  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+  exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
+  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+  exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
+  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+  unfold make_epilogue.
+  (* FIXME rewrite IS_LEAF. *)
+  exploit (loadptr_correct XSP (fn_retaddr_ofs f)).
+    instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence.
+  intros (rs1 & A1 & B1 & C1).
+
+  econstructor; econstructor; split.
+  eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. 
+    simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
+    rewrite FREE'. eauto. auto. 
+  split. apply agree_nextinstr. apply agree_set_other; auto.
+  apply agree_change_sp with (Vptr stk soff).
+  apply agree_exten with rs; auto. intros; apply C1; auto with asmgen.
+  eapply parent_sp_def; eauto.
+  split. auto.
+  split. Simpl. 
+  split. Simpl. 
+  intros. Simpl.
+Qed.
+
+End CONSTRUCTORS.
diff --git a/aarch64/Builtins1.v b/aarch64/Builtins1.v
new file mode 100644
index 00000000..53c83d7e
--- /dev/null
+++ b/aarch64/Builtins1.v
@@ -0,0 +1,33 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, Collège de France and Inria Paris            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Platform-specific built-in functions *)
+
+Require Import String Coqlib.
+Require Import AST Integers Floats Values.
+Require Import Builtins0.
+
+Inductive platform_builtin : Type := .
+
+Local Open Scope string_scope.
+
+Definition platform_builtin_table : list (string * platform_builtin) :=
+  nil.
+
+Definition platform_builtin_sig (b: platform_builtin) : signature :=
+  match b with end.
+
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
+  match b with end.
diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml
new file mode 100644
index 00000000..fdc1372d
--- /dev/null
+++ b/aarch64/CBuiltins.ml
@@ -0,0 +1,72 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Processor-dependent builtin C functions *)
+
+open C
+
+(* va_list is a struct of size 32 and alignment 8, passed by reference *)
+
+let va_list_type = TArray(TInt(IULong, []), Some 4L, [])
+let size_va_list = 32
+let va_list_scalar = false
+
+let builtins = {
+  builtin_typedefs = [
+    "__builtin_va_list", va_list_type
+  ];
+  builtin_functions = [
+    (* Synchronization *)
+    "__builtin_fence",
+      (TVoid [], [], false);
+    (* Integer arithmetic *)
+    "__builtin_bswap64",
+      (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
+    "__builtin_clz",
+      (TInt(IInt, []), [TInt(IUInt, [])], false);
+    "__builtin_clzl",
+      (TInt(IInt, []), [TInt(IULong, [])], false);
+    "__builtin_clzll",
+      (TInt(IInt, []), [TInt(IULongLong, [])], false);
+    "__builtin_cls",
+      (TInt(IInt, []), [TInt(IInt, [])], false);
+    "__builtin_clsl",
+      (TInt(IInt, []), [TInt(ILong, [])], false);
+    "__builtin_clsll",
+      (TInt(IInt, []), [TInt(ILongLong, [])], false);
+    (* Float arithmetic *)
+    "__builtin_fmadd",
+      (TFloat(FDouble, []),
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fmsub",
+      (TFloat(FDouble, []),
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fnmadd",
+      (TFloat(FDouble, []),
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fnmsub",
+      (TFloat(FDouble, []),
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
+       false);
+    "__builtin_fmax",
+      (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+    "__builtin_fmin",
+      (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+  ]
+}
+
+(* Expand memory references inside extended asm statements.  Used in C2C. *)
+
+let asm_mem_argument arg = Printf.sprintf "[%s]" arg
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
new file mode 100644
index 00000000..90b514a2
--- /dev/null
+++ b/aarch64/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+     (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+  && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+  && ((ofsw + size_chunk chunkw <=? ofsr) ||
+      (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+  match addr, addr', args, args' with
+  | (Aindexed ofs), (Aindexed ofs'),
+    (base :: nil), (base' :: nil) =>
+    if peq base base'
+    then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
+    else true  | _, _, _, _ => true
+  end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
new file mode 100644
index 00000000..4aac23af
--- /dev/null
+++ b/aarch64/CSE2depsproof.v
@@ -0,0 +1,128 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+  Ptrofs.wordsize = 64%nat.
+Proof.
+  unfold Ptrofs.wordsize.
+  unfold Wordsize_Ptrofs.wordsize.
+  trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = 18446744073709551616.
+Proof.
+  reflexivity.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section MEMORY_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : int64.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
+
+  Lemma load_store_away1 :
+    forall RANGEW : 0 <= Int64.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Int64.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Int64.unsigned ofsw + size_chunk chunkw <= Int64.unsigned ofsr
+                       \/ Int64.unsigned ofsr + size_chunk chunkr <= Int64.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct base; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: unfold Ptrofs.of_int64.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsr))) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsw))) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia).
+
+    all: try rewrite ptrofs_modulus in *.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem load_store_away :
+    can_swap_accesses_ofs (Int64.unsigned ofsr) chunkr (Int64.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Lemma may_overlap_sound:
+  forall m m' : mem,
+  forall chunk addr args chunk' addr' args' v a a' rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+    (may_overlap chunk addr args chunk' addr' args') = false ->
+    (Mem.storev chunk m a v) = Some m' ->
+    (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m  a').
+Proof.
+  intros until rs.
+  intros ADDR ADDR' OVERLAP STORE.
+  destruct addr; destruct addr'; try discriminate.
+  { (* Aindexed / Aindexed *)
+  destruct args as [ | base [ | ]]. 1,3: discriminate.
+  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+  simpl in OVERLAP.
+  destruct (peq base base'). 2: discriminate.
+  subst base'.
+  destruct (can_swap_accesses_ofs (Int64.unsigned ofs0) chunk' (Int64.unsigned ofs) chunk) eqn:SWAP.
+  2: discriminate.
+  simpl in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  }
+Qed.
+
+End SOUNDNESS.
diff --git a/aarch64/CombineOp.v b/aarch64/CombineOp.v
new file mode 100644
index 00000000..4d78c9a0
--- /dev/null
+++ b/aarch64/CombineOp.v
@@ -0,0 +1,137 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Recognition of combined operations, addressing modes and conditions
+  during the [CSE] phase. *)
+
+Require Import Coqlib.
+Require Import AST Integers.
+Require Import Op.
+Require Import CSEdomain.
+
+Section COMBINE.
+
+Variable get: valnum -> option rhs.
+
+Function combine_compimm_ne_0 (x: valnum) : option(condition * list valnum) :=
+  match get x with
+  | Some(Op (Ocmp c) ys) => Some (c, ys)
+  | _ => None
+  end.
+
+Function combine_compimm_eq_0 (x: valnum) : option(condition * list valnum) :=
+  match get x with
+  | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys)
+  | _ => None
+  end.
+
+Function combine_compimm_eq_1 (x: valnum) : option(condition * list valnum) :=
+  match get x with
+  | Some(Op (Ocmp c) ys) => Some (c, ys)
+  | _ => None
+  end.
+
+Function combine_compimm_ne_1 (x: valnum) : option(condition * list valnum) :=
+  match get x with
+  | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys)
+  | _ => None
+  end.
+
+Function combine_cond (cond: condition) (args: list valnum) : option(condition * list valnum) :=
+  match cond, args with
+  | Ccompimm Cne n, x::nil =>
+      if Int.eq_dec n Int.zero then combine_compimm_ne_0 x
+      else if Int.eq_dec n Int.one then combine_compimm_ne_1 x
+      else None
+  | Ccompimm Ceq n, x::nil =>
+      if Int.eq_dec n Int.zero then combine_compimm_eq_0 x
+      else if Int.eq_dec n Int.one then combine_compimm_eq_1 x
+      else None
+  | Ccompuimm Cne n, x::nil =>
+      if Int.eq_dec n Int.zero then combine_compimm_ne_0 x
+      else if Int.eq_dec n Int.one then combine_compimm_ne_1 x
+      else None
+  | Ccompuimm Ceq n, x::nil =>
+      if Int.eq_dec n Int.zero then combine_compimm_eq_0 x
+      else if Int.eq_dec n Int.one then combine_compimm_eq_1 x
+      else None
+  | _, _ => None
+  end.
+
+Function combine_addr (addr: addressing) (args: list valnum) : option(addressing * list valnum) :=
+  match addr, args with
+  | Aindexed n, x::nil =>
+      match get x with
+      | Some(Op (Oaddlimm m) ys) =>
+          Some(Aindexed (Int64.add m n), ys)
+      | _ => None
+      end
+  | _, _ => None
+  end.
+
+Function combine_op (op: operation) (args: list valnum) : option(operation * list valnum) :=
+  match op, args with
+  | Oaddimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oaddimm m) ys) => Some(Oaddimm (Int.add m n), ys)
+      | _ => None
+      end
+  | Oandimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oandimm m) ys) =>
+          Some(let p := Int.and m n in
+               if Int.eq p m then (Omove, x :: nil) else (Oandimm p, ys))
+      | _ => None
+      end
+  | Oorimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oorimm m) ys) => Some(Oorimm (Int.or m n), ys)
+      | _ => None
+      end
+  | Oxorimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oxorimm m) ys) => Some(Oxorimm (Int.xor m n), ys)
+      | _ => None
+      end
+  | Oaddlimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oaddlimm m) ys) => Some(Oaddlimm (Int64.add m n), ys)
+      | _ => None
+      end
+  | Oandlimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oandlimm m) ys) =>
+          Some(let p := Int64.and m n in
+               if Int64.eq p m then (Omove, x :: nil) else (Oandlimm p, ys))
+      | _ => None
+      end
+  | Oorlimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oorlimm m) ys) => Some(Oorlimm (Int64.or m n), ys)
+      | _ => None
+      end
+  | Oxorlimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oxorlimm m) ys) => Some(Oxorlimm (Int64.xor m n), ys)
+      | _ => None
+      end
+  | Ocmp cond, _ =>
+      match combine_cond cond args with
+      | Some(cond', args') => Some(Ocmp cond', args')
+      | None => None
+      end
+  | _, _ => None
+  end.
+
+End COMBINE.
+
+
diff --git a/aarch64/CombineOpproof.v b/aarch64/CombineOpproof.v
new file mode 100644
index 00000000..7d13b964
--- /dev/null
+++ b/aarch64/CombineOpproof.v
@@ -0,0 +1,161 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+Require Import FunInd.
+Require Import Coqlib.
+Require Import AST Integers Values Memory.
+Require Import Op Registers RTL.
+Require Import CSEdomain.
+Require Import CombineOp.
+
+Section COMBINE.
+
+Variable ge: genv.
+Variable sp: val.
+Variable m: mem.
+Variable get: valnum -> option rhs.
+Variable valu: valnum -> val.
+Hypothesis get_sound: forall v rhs, get v = Some rhs -> rhs_eval_to valu ge sp m rhs (valu v).
+
+Lemma get_op_sound:
+  forall v op vl, get v = Some (Op op vl) -> eval_operation ge sp op (map valu vl) m = Some (valu v).
+Proof.
+  intros. exploit get_sound; eauto. intros REV; inv REV; auto.
+Qed.
+
+Ltac UseGetSound :=
+  match goal with
+  | [ H: get _ = Some _ |- _ ] =>
+      let x := fresh "EQ" in (generalize (get_op_sound _ _ _ H); intros x; simpl in x; FuncInv)
+  end.
+
+Lemma combine_compimm_ne_0_sound:
+  forall x cond args,
+  combine_compimm_ne_0 get x = Some(cond, args) ->
+  eval_condition cond (map valu args) m = Val.cmp_bool Cne (valu x) (Vint Int.zero) /\
+  eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Cne (valu x) (Vint Int.zero).
+Proof.
+  intros until args. functional induction (combine_compimm_ne_0 get x); intros EQ; inv EQ.
+  (* of cmp *)
+  UseGetSound. rewrite <- H.
+  destruct (eval_condition cond (map valu args) m); simpl; auto. destruct b; auto.
+Qed.
+
+Lemma combine_compimm_eq_0_sound:
+  forall x cond args,
+  combine_compimm_eq_0 get x = Some(cond, args) ->
+  eval_condition cond (map valu args) m = Val.cmp_bool Ceq (valu x) (Vint Int.zero) /\
+  eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Ceq (valu x) (Vint Int.zero).
+Proof.
+  intros until args. functional induction (combine_compimm_eq_0 get x); intros EQ; inv EQ.
+  (* of cmp *)
+  UseGetSound. rewrite <- H.
+  rewrite eval_negate_condition.
+  destruct (eval_condition c (map valu args) m); simpl; auto. destruct b; auto.
+Qed.
+
+Lemma combine_compimm_eq_1_sound:
+  forall x cond args,
+  combine_compimm_eq_1 get x = Some(cond, args) ->
+  eval_condition cond (map valu args) m = Val.cmp_bool Ceq (valu x) (Vint Int.one) /\
+  eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Ceq (valu x) (Vint Int.one).
+Proof.
+  intros until args. functional induction (combine_compimm_eq_1 get x); intros EQ; inv EQ.
+  (* of cmp *)
+  UseGetSound. rewrite <- H.
+  destruct (eval_condition cond (map valu args) m); simpl; auto. destruct b; auto.
+Qed.
+
+Lemma combine_compimm_ne_1_sound:
+  forall x cond args,
+  combine_compimm_ne_1 get x = Some(cond, args) ->
+  eval_condition cond (map valu args) m = Val.cmp_bool Cne (valu x) (Vint Int.one) /\
+  eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Cne (valu x) (Vint Int.one).
+Proof.
+  intros until args. functional induction (combine_compimm_ne_1 get x); intros EQ; inv EQ.
+  (* of cmp *)
+  UseGetSound. rewrite <- H.
+  rewrite eval_negate_condition.
+  destruct (eval_condition c (map valu args) m); simpl; auto. destruct b; auto.
+Qed.
+
+Theorem combine_cond_sound:
+  forall cond args cond' args',
+  combine_cond get cond args = Some(cond', args') ->
+  eval_condition cond' (map valu args') m = eval_condition cond (map valu args) m.
+Proof.
+  intros. functional inversion H; subst.
+  (* compimm ne zero *)
+  - simpl; eapply combine_compimm_ne_0_sound; eauto.
+  (* compimm ne one *)
+  - simpl; eapply combine_compimm_ne_1_sound; eauto.
+  (* compimm eq zero *)
+  - simpl; eapply combine_compimm_eq_0_sound; eauto.
+  (* compimm eq one *)
+  - simpl; eapply combine_compimm_eq_1_sound; eauto.
+  (* compuimm ne zero *)
+  - simpl; eapply combine_compimm_ne_0_sound; eauto.
+  (* compuimm ne one *)
+  - simpl; eapply combine_compimm_ne_1_sound; eauto.
+  (* compuimm eq zero *)
+  - simpl; eapply combine_compimm_eq_0_sound; eauto.
+  (* compuimm eq one *)
+  - simpl; eapply combine_compimm_eq_1_sound; eauto.
+Qed.
+
+Theorem combine_addr_sound:
+  forall addr args addr' args',
+  combine_addr get addr args = Some(addr', args') ->
+  eval_addressing ge sp addr' (map valu args') = eval_addressing ge sp addr (map valu args).
+Proof.
+  intros. functional inversion H; subst.
+- (* indexed - addimml *)
+  UseGetSound. simpl. rewrite <- H0. rewrite Val.addl_assoc. auto. 
+Qed.
+
+Theorem combine_op_sound:
+  forall op args op' args',
+  combine_op get op args = Some(op', args') ->
+  eval_operation ge sp op' (map valu args') m = eval_operation ge sp op (map valu args) m.
+Proof.
+  intros. functional inversion H; subst.
+  (* addimm - addimm *)
+  - UseGetSound. FuncInv. simpl.
+    rewrite <- H0. rewrite Val.add_assoc. auto.
+  (* andimm - andimm *)
+  - UseGetSound; simpl.
+    generalize (Int.eq_spec p m0); rewrite H7; intros.
+    rewrite <- H0. rewrite Val.and_assoc. simpl. fold p. rewrite H1. auto.
+  - UseGetSound; simpl.
+    rewrite <- H0. rewrite Val.and_assoc. auto.
+  (* orimm - orimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.or_assoc. auto.
+  (* xorimm - xorimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.xor_assoc. auto.
+  (* addlimm - addlimm *)
+  - UseGetSound. FuncInv. simpl.
+    rewrite <- H0. rewrite Val.addl_assoc. auto.
+  (* andlimm - andlimm *)
+  - UseGetSound; simpl.
+    generalize (Int64.eq_spec p m0); rewrite H7; intros.
+    rewrite <- H0. rewrite Val.andl_assoc. simpl. fold p. rewrite H1. auto.
+  - UseGetSound; simpl.
+    rewrite <- H0. rewrite Val.andl_assoc. auto.
+  (* orlimm - orlimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.orl_assoc. auto.
+  (* xorlimm - xorlimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.xorl_assoc. auto.
+  (* cmp *)
+  - simpl. decEq; decEq. eapply combine_cond_sound; eauto.
+Qed.
+
+End COMBINE.
diff --git a/aarch64/ConstpropOp.vp b/aarch64/ConstpropOp.vp
new file mode 100644
index 00000000..c0a2c6bf
--- /dev/null
+++ b/aarch64/ConstpropOp.vp
@@ -0,0 +1,401 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Strength reduction for operators and conditions.
+    This is the machine-dependent part of [Constprop]. *)
+
+Require Archi.
+Require Import Coqlib Compopts.
+Require Import AST Integers Floats.
+Require Import Op Registers.
+Require Import ValueDomain ValueAOp.
+
+(** * Converting known values to constants *)
+
+Definition const_for_result (a: aval) : option operation :=
+  match a with
+  | I n => Some(Ointconst n)
+  | L n => Some(Olongconst n)
+  | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
+  | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
+  | Ptr(Gl id ofs) => Some(Oaddrsymbol id ofs)
+  | Ptr(Stk ofs) => Some(Oaddrstack ofs)
+  | _ => None
+  end.
+
+(** * Operator strength reduction *)
+
+Definition eval_static_shift (s: shift) (v: int) (n: amount32) : int :=
+  match s with
+  | Slsl => Int.shl v n
+  | Slsr => Int.shru v n
+  | Sasr => Int.shr v n
+  | Sror => Int.ror v n
+  end.
+
+Definition eval_static_shiftl (s: shift) (v: int64) (n: amount64) : int64 :=
+  match s with
+  | Slsl => Int64.shl' v n
+  | Slsr => Int64.shru' v n
+  | Sasr => Int64.shr' v n
+  | Sror => Int64.ror v (Int64.repr (Int.unsigned n))
+  end.
+
+Definition eval_static_extend (x: extension) (v: int) (n: amount64) : int64 :=
+  Int64.shl' (match x with Xsgn32 => Int64.repr (Int.signed v)
+                         | Xuns32 => Int64.repr (Int.unsigned v) end)
+             n.
+
+Nondetfunction cond_strength_reduction 
+              (cond: condition) (args: list reg) (vl: list aval) :=
+  match cond, args, vl with
+  | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
+      (Ccompimm (swap_comparison c) n1, r2 :: nil)
+  | Ccomp c, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+      (Ccompimm c n2, r1 :: nil)
+  | Ccompu c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
+      (Ccompuimm (swap_comparison c) n1, r2 :: nil)
+  | Ccompu c, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+      (Ccompuimm c n2, r1 :: nil)
+  | Ccompshift c s a, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+      (Ccompimm c (eval_static_shift s n2 a), r1 :: nil)
+  | Ccompushift c s a, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+      (Ccompuimm c (eval_static_shift s n2 a), r1 :: nil)
+  | Ccompl c, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
+      (Ccomplimm (swap_comparison c) n1, r2 :: nil)
+  | Ccompl c, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+      (Ccomplimm c n2, r1 :: nil)
+  | Ccomplu c, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
+      (Ccompluimm (swap_comparison c) n1, r2 :: nil)
+  | Ccomplu c, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+      (Ccompluimm c n2, r1 :: nil)
+  | Ccomplshift c s a, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+      (Ccomplimm c (eval_static_shiftl s n2 a), r1 :: nil)
+  | Ccomplushift c s a, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+      (Ccompluimm c (eval_static_shiftl s n2 a), r1 :: nil)
+  | Ccompf c, r1 :: r2 :: nil, F n1 :: v2 :: nil =>
+      if Float.eq_dec n1 Float.zero
+      then (Ccompfzero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Ccompf c, r1 :: r2 :: nil, v1 :: F n2 :: nil =>
+      if Float.eq_dec n2 Float.zero
+      then (Ccompfzero c, r1 :: nil)
+      else (cond, args)
+  | Cnotcompf c, r1 :: r2 :: nil, F n1 :: v2 :: nil =>
+      if Float.eq_dec n1 Float.zero
+      then (Cnotcompfzero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Cnotcompf c, r1 :: r2 :: nil, v1 :: F n2 :: nil =>
+      if Float.eq_dec n2 Float.zero
+      then (Cnotcompfzero c, r1 :: nil)
+      else (cond, args)
+  | Ccompfs c, r1 :: r2 :: nil, FS n1 :: v2 :: nil =>
+      if Float32.eq_dec n1 Float32.zero
+      then (Ccompfszero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Ccompfs c, r1 :: r2 :: nil, v1 :: FS n2 :: nil =>
+      if Float32.eq_dec n2 Float32.zero
+      then (Ccompfszero c, r1 :: nil)
+      else (cond, args)
+  | Cnotcompfs c, r1 :: r2 :: nil, FS n1 :: v2 :: nil =>
+      if Float32.eq_dec n1 Float32.zero
+      then (Cnotcompfszero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Cnotcompfs c, r1 :: r2 :: nil, v1 :: FS n2 :: nil =>
+      if Float32.eq_dec n2 Float32.zero
+      then (Cnotcompfszero c, r1 :: nil)
+      else (cond, args)
+  | _, _, _ => 
+      (cond, args)
+  end.
+
+Definition make_cmp_base (c: condition) (args: list reg) (vl: list aval) :=
+  let (c', args') := cond_strength_reduction c args vl in (Ocmp c', args').
+
+Definition make_cmp_imm_eq (c: condition) (args: list reg) (vl: list aval) 
+                           (n: int) (r1: reg) (v1: aval) :=
+  if Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1) then (Omove, r1 :: nil)
+  else if Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1) then (Oxorimm Int.one, r1 :: nil)
+  else make_cmp_base c args vl.
+
+Definition make_cmp_imm_ne (c: condition) (args: list reg) (vl: list aval) 
+                           (n: int) (r1: reg) (v1: aval) :=
+  if Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1) then (Omove, r1 :: nil)
+  else if Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1) then (Oxorimm Int.one, r1 :: nil)
+  else make_cmp_base c args vl.
+
+Nondetfunction make_cmp (c: condition) (args: list reg) (vl: list aval) :=
+  match c, args, vl with
+  | Ccompimm Ceq n, r1 :: nil, v1 :: nil =>
+      make_cmp_imm_eq c args vl n r1 v1
+  | Ccompimm Cne n, r1 :: nil, v1 :: nil =>
+      make_cmp_imm_ne c args vl n r1 v1
+  | Ccompuimm Ceq n, r1 :: nil, v1 :: nil =>
+      make_cmp_imm_eq c args vl n r1 v1
+  | Ccompuimm Cne n, r1 :: nil, v1 :: nil =>
+      make_cmp_imm_ne c args vl n r1 v1
+  | _, _, _ =>
+      make_cmp_base c args vl
+  end.
+
+Definition make_select (c: condition) (ty: typ)
+                       (r1 r2: reg) (args: list reg) (vl: list aval) :=
+  match resolve_branch (eval_static_condition c vl) with
+  | Some b => (Omove, (if b then r1 else r2) :: nil)
+  | None =>
+      let (c', args') := cond_strength_reduction c args vl in
+      (Osel c' ty, r1 :: r2 :: args')
+  end.
+
+Definition make_addimm (n: int) (r: reg) :=
+  if Int.eq n Int.zero
+  then (Omove, r :: nil)
+  else (Oaddimm n, r :: nil).
+
+Definition make_shlimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then (Omove, r1 :: nil)
+  else if Int.ltu n Int.iwordsize then (Oshift Slsl (mk_amount32 n), r1 :: nil)
+  else (Oshl, r1 :: r2 :: nil).
+
+Definition make_shrimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then (Omove, r1 :: nil)
+  else if Int.ltu n Int.iwordsize then (Oshift Sasr (mk_amount32 n), r1 :: nil)
+  else (Oshr, r1 :: r2 :: nil).
+
+Definition make_shruimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then (Omove, r1 :: nil)
+  else if Int.ltu n Int.iwordsize then (Oshift Slsr (mk_amount32 n), r1 :: nil)
+  else (Oshru, r1 :: r2 :: nil).
+
+Definition make_mulimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then
+    (Ointconst Int.zero, nil)
+  else if Int.eq n Int.one then
+    (Omove, r1 :: nil)
+  else
+    match Int.is_power2 n with
+    | Some l => (Oshift Slsl (mk_amount32 l), r1 :: nil)
+    | None => (Omul, r1 :: r2 :: nil)
+    end.
+
+Definition make_andimm (n: int) (r: reg) (a: aval) :=
+  if Int.eq n Int.zero then (Ointconst Int.zero, nil)
+  else if Int.eq n Int.mone then (Omove, r :: nil)
+  else if match a with Uns _ m => Int.eq (Int.zero_ext m (Int.not n)) Int.zero
+                     | _ => false end
+  then (Omove, r :: nil)
+  else (Oandimm n, r :: nil).
+
+Definition make_orimm (n: int) (r: reg) :=
+  if Int.eq n Int.zero then (Omove, r :: nil)
+  else if Int.eq n Int.mone then (Ointconst Int.mone, nil)
+  else (Oorimm n, r :: nil).
+
+Definition make_xorimm (n: int) (r: reg) :=
+  if Int.eq n Int.zero then (Omove, r :: nil)
+  else (Oxorimm n, r :: nil).
+
+Definition make_divimm n (r1 r2: reg) :=
+  if Int.eq n Int.one then
+    (Omove, r1 :: nil)
+  else
+    match Int.is_power2 n with
+    | Some l => if Int.ltu l (Int.repr 31)
+                then (Oshrximm l, r1 :: nil)
+                else (Odiv, r1 :: r2 :: nil)
+    | None   => (Odiv, r1 :: r2 :: nil)
+    end.
+
+Definition make_divuimm n (r1 r2: reg) :=
+  if Int.eq n Int.one then
+    (Omove, r1 :: nil)
+  else
+    match Int.is_power2 n with
+    | Some l => (Oshift Slsr (mk_amount32 l), r1 :: nil)
+    | None   => (Odivu, r1 :: r2 :: nil)
+    end.
+
+Definition make_addlimm (n: int64) (r: reg) :=
+  if Int64.eq n Int64.zero
+  then (Omove, r :: nil)
+  else (Oaddlimm n, r :: nil).
+
+Definition make_shllimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then (Omove, r1 :: nil)
+  else if Int.ltu n Int64.iwordsize' then (Oshiftl Slsl (mk_amount64 n), r1 :: nil)
+  else (Oshll, r1 :: r2 :: nil).
+
+Definition make_shrlimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then (Omove, r1 :: nil)
+  else if Int.ltu n Int64.iwordsize' then (Oshiftl Sasr (mk_amount64 n), r1 :: nil)
+  else (Oshrl, r1 :: r2 :: nil).
+
+Definition make_shrluimm (n: int) (r1 r2: reg) :=
+  if Int.eq n Int.zero then (Omove, r1 :: nil)
+  else if Int.ltu n Int64.iwordsize' then (Oshiftl Slsr (mk_amount64 n), r1 :: nil)
+  else (Oshrlu, r1 :: r2 :: nil).
+
+Definition make_mullimm (n: int64) (r1 r2: reg) :=
+  if Int64.eq n Int64.zero then
+    (Olongconst Int64.zero, nil)
+  else if Int64.eq n Int64.one then
+    (Omove, r1 :: nil)
+  else
+    match Int64.is_power2' n with
+    | Some l => (Oshiftl Slsl (mk_amount64 l), r1 :: nil)
+    | None => (Omull, r1 :: r2 :: nil)
+    end.
+
+Definition make_andlimm (n: int64) (r: reg) (a: aval) :=
+  if Int64.eq n Int64.zero then (Olongconst Int64.zero, nil)
+  else if Int64.eq n Int64.mone then (Omove, r :: nil)
+  else (Oandlimm n, r :: nil).
+
+Definition make_orlimm (n: int64) (r: reg) :=
+  if Int64.eq n Int64.zero then (Omove, r :: nil)
+  else if Int64.eq n Int64.mone then (Olongconst Int64.mone, nil)
+  else (Oorlimm n, r :: nil).
+
+Definition make_xorlimm (n: int64) (r: reg) :=
+  if Int64.eq n Int64.zero then (Omove, r :: nil)
+  else (Oxorlimm n, r :: nil).
+
+Definition make_divlimm n (r1 r2: reg) :=
+  match Int64.is_power2' n with
+  | Some l => if Int.ltu l (Int.repr 63)
+              then (Oshrlximm l, r1 :: nil)
+              else (Odivl, r1 :: r2 :: nil)
+  | None   => (Odivl, r1 :: r2 :: nil)
+  end.
+
+Definition make_divluimm n (r1 r2: reg) :=
+  match Int64.is_power2' n with
+  | Some l => (Oshiftl Slsr (mk_amount64 l), r1 :: nil)
+  | None   => (Odivlu, r1 :: r2 :: nil)
+  end.
+
+Definition make_mulfimm (n: float) (r r1 r2: reg) :=
+  if Float.eq_dec n (Float.of_int (Int.repr 2))
+  then (Oaddf, r :: r :: nil)
+  else (Omulf, r1 :: r2 :: nil).
+
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+  if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+  then (Oaddfs, r :: r :: nil)
+  else (Omulfs, r1 :: r2 :: nil).
+
+Definition make_zext (s: Z) (r: reg) (a: aval) :=
+  if vincl a (Uns Ptop s) then (Omove, r :: nil) else (Ozext s, r :: nil).
+
+Definition make_sext (s: Z) (r: reg) (a: aval) :=
+  if vincl a (Sgn Ptop s) then (Omove, r :: nil) else (Osext s, r :: nil).
+
+Nondetfunction op_strength_reduction 
+              (op: operation) (args: list reg) (vl: list aval) :=
+  match op, args, vl with
+  | Oadd, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_addimm n1 r2
+  | Oadd, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm n2 r1
+  | Oaddshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (eval_static_shift s n2 a) r1
+  | Osub, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (Int.neg n2) r1
+  | Osubshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (Int.neg (eval_static_shift s n2 a)) r1
+  | Omul, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_mulimm n1 r2 r1
+  | Omul, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_mulimm n2 r1 r2
+  | Odiv, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divimm n2 r1 r2
+  | Odivu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divuimm n2 r1 r2
+  | Oand, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_andimm n1 r2 v2
+  | Oand, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm n2 r1 v1
+  | Oandshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm (eval_static_shift s n2 a) r1 v1
+  | Oandimm n, r1 :: nil, v1 :: nil => make_andimm n r1 v1
+  | Oor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_orimm n1 r2
+  | Oor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm n2 r1
+  | Oorshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm (eval_static_shift s n2 a) r1
+  | Oxor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_xorimm n1 r2
+  | Oxor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm n2 r1
+  | Oxorshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm (eval_static_shift s n2 a) r1
+  | Obic, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm (Int.not n2) r1 v1
+  | Obicshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm (Int.not (eval_static_shift s n2 a)) r1 v1
+  | Oorn, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm (Int.not n2) r1
+  | Oornshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm (Int.not (eval_static_shift s n2 a)) r1
+  | Oeqv, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm (Int.not n2) r1
+  | Oeqvshift s a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm (Int.not (eval_static_shift s n2 a)) r1
+  | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2
+  | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
+  | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
+  | Ozext s, r1 :: nil, v1 :: nil => make_zext s r1 v1
+  | Osext s, r1 :: nil, v1 :: nil => make_sext s r1 v1
+
+  | Oaddl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_addlimm n1 r2
+  | Oaddl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm n2 r1
+  | Oaddlshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (eval_static_shiftl s n2 a) r1
+  | Oaddlext x a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addlimm (eval_static_extend x n2 a) r1
+  | Osubl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (Int64.neg n2) r1
+  | Osublshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (Int64.neg (eval_static_shiftl s n2 a)) r1
+  | Osublext x a, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addlimm (Int64.neg (eval_static_extend x n2 a)) r1
+  | Omull, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_mullimm n1 r2 r1
+  | Omull, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_mullimm n2 r1 r2
+  | Odivl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divlimm n2 r1 r2
+  | Odivlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divluimm n2 r1 r2
+  | Oandl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_andlimm n1 r2 v2
+  | Oandl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_andlimm n2 r1 v1
+  | Oandlshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_andlimm (eval_static_shiftl s n2 a) r1 v1
+  | Oandlimm n, r1 :: nil, v1 :: nil => make_andlimm n r1 v1
+  | Oorl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_orlimm n1 r2
+  | Oorl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_orlimm n2 r1
+  | Oorlshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_orlimm (eval_static_shiftl s n2 a) r1
+  | Oxorl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_xorlimm n1 r2
+  | Oxorl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_xorlimm n2 r1
+  | Oxorlshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_xorlimm (eval_static_shiftl s n2 a) r1
+  | Obicl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_andlimm (Int64.not n2) r1 v1
+  | Obiclshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_andlimm (Int64.not (eval_static_shiftl s n2 a)) r1 v1
+  | Oornl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_orlimm (Int64.not n2) r1
+  | Oornlshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_orlimm (Int64.not (eval_static_shiftl s n2 a)) r1
+  | Oeqvl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_xorlimm (Int64.not n2) r1
+  | Oeqvlshift s a, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_xorlimm (Int64.not (eval_static_shiftl s n2 a)) r1
+  | Oshll, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shllimm n2 r1 r2
+  | Oshrl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrlimm n2 r1 r2
+  | Oshrlu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrluimm n2 r1 r2
+  | Ocmp c, args, vl => make_cmp c args vl
+  | Osel c ty, r1 :: r2 :: args, v1 :: v2 :: vl => make_select c ty r1 r2 args vl
+  | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
+  | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+  | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+  | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
+  | _, _, _ => (op, args)
+  end.
+
+Nondetfunction addr_strength_reduction
+                (addr: addressing) (args: list reg) (vl: list aval) :=
+  match addr, args, vl with
+  | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil =>
+      (Aglobal symb (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil)
+  | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil =>
+      (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil)
+  | Aindexed2, r1 :: r2 :: nil, Ptr(Stk n1) :: L n2 :: nil =>
+      (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 n2)), nil)
+  | Aindexed2, r1 :: r2 :: nil, L n1 :: Ptr(Stk n2) :: nil =>
+      (Ainstack (Ptrofs.add (Ptrofs.of_int64 n1) n2), nil)
+  | Aindexed2, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
+      (Aindexed n1, r2 :: nil)
+  | Aindexed2, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+      (Aindexed n2, r1 :: nil)
+  | Aindexed2shift a, r1 :: r2 :: nil, Ptr(Stk n1) :: L n2 :: nil =>
+      (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 (Int64.shl' n2 a))), nil)
+  | Aindexed2shift a, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+      (Aindexed (Int64.shl' n2 a), r1 :: nil)
+  | Aindexed2ext x a, r1 :: r2 :: nil, Ptr(Stk n1) :: I n2 :: nil =>
+      (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 (eval_static_extend x n2 a))), nil)
+  | Aindexed2ext x a, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+      (Aindexed (eval_static_extend x n2 a), r1 :: nil)
+  | _, _, _ =>
+      (addr, args)
+  end.
+
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
new file mode 100644
index 00000000..deab7cd4
--- /dev/null
+++ b/aarch64/ConstpropOpproof.v
@@ -0,0 +1,838 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for operator strength reduction. *)
+
+Require Import Coqlib Compopts.
+Require Import Integers Floats Values Memory Globalenvs Events.
+Require Import Op Registers RTL ValueDomain ValueAOp ValueAnalysis.
+Require Import ConstpropOp.
+
+Local Transparent Archi.ptr64.
+
+Section STRENGTH_REDUCTION.
+
+Variable bc: block_classification.
+Variable ge: genv.
+Hypothesis GENV: genv_match bc ge.
+Variable sp: block.
+Hypothesis STACK: bc sp = BCstack.
+Variable ae: AE.t.
+Variable e: regset.
+Variable m: mem.
+Hypothesis MATCH: ematch bc e ae.
+
+Lemma match_G:
+  forall r id ofs,
+  AE.get r ae = Ptr(Gl id ofs) -> Val.lessdef e#r (Genv.symbol_address ge id ofs).
+Proof.
+  intros. apply vmatch_ptr_gl with bc; auto. rewrite <- H. apply MATCH.
+Qed.
+
+Lemma match_S:
+  forall r ofs,
+  AE.get r ae = Ptr(Stk ofs) -> Val.lessdef e#r (Vptr sp ofs).
+Proof.
+  intros. apply vmatch_ptr_stk with bc; auto. rewrite <- H. apply MATCH.
+Qed.
+
+Ltac InvApproxRegs :=
+  match goal with
+  | [ H: _ :: _ = _ :: _ |- _ ] =>
+        injection H; clear H; intros; InvApproxRegs
+  | [ H: ?v = AE.get ?r ae |- _ ] =>
+        generalize (MATCH r); rewrite <- H; clear H; intro; InvApproxRegs
+  | _ => idtac
+  end.
+
+Ltac SimplVM :=
+  match goal with
+  | [ H: vmatch _ ?v (I ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vint n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (L ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vlong n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (F ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vfloat n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vsingle n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
+      let E := fresh in
+      assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto);
+      clear H; SimplVM
+  | [ H: vmatch _ ?v (Ptr(Stk ?ofs)) |- _ ] =>
+      let E := fresh in
+      assert (E: Val.lessdef v (Vptr sp ofs)) by (eapply vmatch_ptr_stk; eauto);
+      clear H; SimplVM
+  | _ => idtac
+  end.
+
+Lemma const_for_result_correct:
+  forall a op v,
+  const_for_result a = Some op ->
+  vmatch bc v a ->
+  exists v', eval_operation ge (Vptr sp Ptrofs.zero) op nil m = Some v' /\ Val.lessdef v v'.
+Proof.
+  unfold const_for_result; intros; destruct a; inv H; SimplVM.
+- (* integer *)
+  exists (Vint n); auto.
+- (* long *)
+  exists (Vlong n); auto.
+- (* float *)
+  destruct (Compopts.generate_float_constants tt); inv H2. exists (Vfloat f); auto.
+- (* single *)
+  destruct (Compopts.generate_float_constants tt); inv H2. exists (Vsingle f); auto.
+- (* pointer *)
+  destruct p; try discriminate; SimplVM.
+  + (* global *)
+    inv H2. exists (Genv.symbol_address ge id ofs); auto.
+  + (* stack *)
+    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.
+Qed.
+
+Lemma eval_static_shift_correct: forall s v a,
+  eval_shift s (Vint v) a = Vint (eval_static_shift s v a).
+Proof.
+  intros; destruct s; simpl; rewrite ? a32_range; auto.
+Qed.
+
+Lemma eval_static_shiftl_correct: forall s v a,
+  eval_shiftl s (Vlong v) a = Vlong (eval_static_shiftl s v a).
+Proof.
+  intros; destruct s; simpl; rewrite ? a64_range; auto.
+Qed.
+
+Lemma eval_static_extend_correct: forall x v a,
+  eval_extend x (Vint v) a = Vlong (eval_static_extend x v a).
+Proof.
+  unfold eval_extend, eval_static_extend; intros; destruct x; simpl; rewrite ? a64_range; auto.
+Qed.
+
+Lemma cond_strength_reduction_correct:
+  forall cond args vl,
+  vl = map (fun r => AE.get r ae) args ->
+  let (cond', args') := cond_strength_reduction cond args vl in
+  eval_condition cond' e##args' m = eval_condition cond e##args m.
+Proof.
+  intros until vl. unfold cond_strength_reduction.
+  case (cond_strength_reduction_match cond args vl); simpl; intros; InvApproxRegs; SimplVM.
+- apply Val.swap_cmp_bool.
+- auto.
+- apply Val.swap_cmpu_bool.
+- auto.
+- rewrite eval_static_shift_correct; auto.
+- rewrite eval_static_shift_correct; auto.
+- apply Val.swap_cmpl_bool.
+- auto.
+- apply Val.swap_cmplu_bool.
+- auto.
+- rewrite eval_static_shiftl_correct; auto.
+- rewrite eval_static_shiftl_correct; auto.
+- destruct (Float.eq_dec n1 Float.zero).
+  subst n1. simpl. destruct (e#r2); simpl; auto. rewrite Float.cmp_swap. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float.eq_dec n2 Float.zero).
+  subst n2. simpl. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float.eq_dec n1 Float.zero).
+  subst n1. simpl. destruct (e#r2); simpl; auto. rewrite Float.cmp_swap. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float.eq_dec n2 Float.zero); simpl; auto.
+  subst n2; auto.
+  rewrite H1; auto.
+- destruct (Float32.eq_dec n1 Float32.zero).
+  subst n1. simpl. destruct (e#r2); simpl; auto. rewrite Float32.cmp_swap. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float32.eq_dec n2 Float32.zero).
+  subst n2. simpl. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float32.eq_dec n1 Float32.zero).
+  subst n1. simpl. destruct (e#r2); simpl; auto. rewrite Float32.cmp_swap. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float32.eq_dec n2 Float32.zero); simpl; auto.
+  subst n2; auto.
+  rewrite H1; auto.
+- auto.
+Qed.
+
+Lemma make_cmp_base_correct:
+  forall c args vl,
+  vl = map (fun r => AE.get r ae) args ->
+  let (op', args') := make_cmp_base c args vl in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
+         /\ Val.lessdef (Val.of_optbool (eval_condition c e##args m)) v.
+Proof.
+  intros. unfold make_cmp_base.
+  generalize (cond_strength_reduction_correct c args vl H).
+  destruct (cond_strength_reduction c args vl) as [c' args']. intros EQ.
+  econstructor; split. simpl; eauto. rewrite EQ. auto.
+Qed.
+
+Lemma make_cmp_correct:
+  forall c args vl,
+  vl = map (fun r => AE.get r ae) args ->
+  let (op', args') := make_cmp c args vl in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
+         /\ Val.lessdef (Val.of_optbool (eval_condition c e##args m)) v.
+Proof.
+  intros c args vl.
+  assert (Y: forall r, vincl (AE.get r ae) (Uns Ptop 1) = true ->
+             e#r = Vundef \/ e#r = Vint Int.zero \/ e#r = Vint Int.one).
+  { intros. apply vmatch_Uns_1 with bc Ptop. eapply vmatch_ge. eapply vincl_ge; eauto. apply MATCH. }
+  unfold make_cmp. case (make_cmp_match c args vl); intros.
+- unfold make_cmp_imm_eq.
+  destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
++ simpl in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
+* simpl in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* apply make_cmp_base_correct; auto.
+- unfold make_cmp_imm_ne.
+  destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
++ simpl in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
+* simpl in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* apply make_cmp_base_correct; auto.
+- unfold make_cmp_imm_eq.
+  destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
++ simpl in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
+* simpl in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* apply make_cmp_base_correct; auto.
+- unfold make_cmp_imm_ne.
+  destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
++ simpl in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
+* simpl in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* apply make_cmp_base_correct; auto.
+- apply make_cmp_base_correct; auto.
+Qed.
+
+Lemma make_select_correct:
+  forall c ty r1 r2 args vl,
+  vl = map (fun r => AE.get r ae) args ->
+  let (op', args') := make_select c ty r1 r2 args vl in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
+         /\ Val.lessdef (Val.select (eval_condition c e##args m) e#r1 e#r2 ty) v.
+Proof.
+  unfold make_select; intros.
+  destruct (resolve_branch (eval_static_condition c vl)) as [b|] eqn:RB.
+- exists (if b then e#r1 else e#r2); split.
++ simpl. destruct b; auto.
++ destruct (eval_condition c e##args m) as [b'|] eqn:EC; simpl; auto.
+  assert (b = b').
+  { eapply resolve_branch_sound; eauto. 
+    rewrite <- EC. apply eval_static_condition_sound with bc. 
+    subst vl. exact (aregs_sound _ _ _ args MATCH). }
+  subst b'. apply Val.lessdef_normalize.
+- generalize (cond_strength_reduction_correct c args vl H).
+  destruct (cond_strength_reduction c args vl) as [cond' args']; intros EQ.
+  econstructor; split. simpl; eauto. rewrite EQ; auto.
+Qed.
+
+Lemma make_addimm_correct:
+  forall n r,
+  let (op, args) := make_addimm n r in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.add e#r (Vint n)) v.
+Proof.
+  intros. unfold make_addimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros.
+  subst. exists (e#r); split; auto.
+  destruct (e#r); simpl; rewrite ?Int.add_zero; auto.
+  exists (Val.add e#r (Vint n)); split; auto.
+Qed.
+
+Lemma make_shlimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_shlimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shl e#r1 (Vint n)) v.
+Proof.
+Local Opaque mk_amount32.
+  intros; unfold make_shlimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.shl_zero. auto.
+  destruct (Int.ltu n Int.iwordsize) eqn:L.
+  econstructor; split. simpl. eauto. rewrite mk_amount32_eq; auto.
+  econstructor; split. simpl. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shrimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_shrimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shr e#r1 (Vint n)) v.
+Proof.
+  intros; unfold make_shrimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.shr_zero. auto.
+  destruct (Int.ltu n Int.iwordsize) eqn:L.
+  econstructor; split. simpl. eauto. rewrite mk_amount32_eq; auto.
+  econstructor; split. simpl. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shruimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_shruimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shru e#r1 (Vint n)) v.
+Proof.
+  intros; unfold make_shruimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.shru_zero. auto.
+  destruct (Int.ltu n Int.iwordsize) eqn:L.
+  econstructor; split. simpl. eauto. rewrite mk_amount32_eq; auto.
+  econstructor; split. simpl. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_mulimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_mulimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mul e#r1 (Vint n)) v.
+Proof.
+  intros; unfold make_mulimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (Vint Int.zero); split; auto. destruct (e#r1); simpl; auto. rewrite Int.mul_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.mul_one; auto.
+  destruct (Int.is_power2 n) eqn:?; intros.
+  rewrite (Val.mul_pow2 e#r1 _ _ Heqo). econstructor; split. simpl; eauto.
+  rewrite mk_amount32_eq; auto. eapply Int.is_power2_range; eauto.
+  econstructor; split; eauto. simpl. rewrite H; auto.
+Qed.
+
+Lemma make_divimm_correct:
+  forall n r1 r2 v,
+  Val.divs e#r1 e#r2 = Some v ->
+  e#r2 = Vint n ->
+  let (op, args) := make_divimm n r1 r2 in
+  exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+  intros; unfold make_divimm.
+  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
+  destruct (e#r1) eqn:?;
+    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
+    inv H; auto.
+  destruct (Int.is_power2 n) eqn:?.
+  destruct (Int.ltu i (Int.repr 31)) eqn:?.
+  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
+  exists v; auto.
+  exists v; auto.
+Qed.
+
+Lemma make_divuimm_correct:
+  forall n r1 r2 v,
+  Val.divu e#r1 e#r2 = Some v ->
+  e#r2 = Vint n ->
+  let (op, args) := make_divuimm n r1 r2 in
+  exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+  intros; unfold make_divuimm.
+  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
+  destruct (e#r1) eqn:?;
+    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
+    inv H; auto.
+  destruct (Int.is_power2 n) eqn:?.
+  econstructor; split. simpl; eauto.
+  rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
+  exists v; auto.
+Qed.
+
+Lemma make_andimm_correct:
+  forall n r x,
+  vmatch bc e#r x ->
+  let (op, args) := make_andimm n r x in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.and e#r (Vint n)) v.
+Proof.
+  intros; unfold make_andimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros.
+  subst n. exists (Vint Int.zero); split; auto. destruct (e#r); simpl; auto. rewrite Int.and_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone; intros.
+  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int.and_mone; auto.
+  destruct (match x with Uns _ k => Int.eq (Int.zero_ext k (Int.not n)) Int.zero
+                       | _ => false end) eqn:UNS.
+  destruct x; try congruence.
+  exists (e#r); split; auto.
+  inv H; auto. simpl. replace (Int.and i n) with i; auto.
+  generalize (Int.eq_spec (Int.zero_ext n0 (Int.not n)) Int.zero); rewrite UNS; intro EQ.
+  Int.bit_solve. destruct (zlt i0 n0).
+  replace (Int.testbit n i0) with (negb (Int.testbit Int.zero i0)).
+  rewrite Int.bits_zero. simpl. rewrite andb_true_r. auto.
+  rewrite <- EQ. rewrite Int.bits_zero_ext by omega. rewrite zlt_true by auto.
+  rewrite Int.bits_not by auto. apply negb_involutive.
+  rewrite H6 by auto. auto.
+  econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_orimm_correct:
+  forall n r,
+  let (op, args) := make_orimm n r in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.or e#r (Vint n)) v.
+Proof.
+  intros; unfold make_orimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros.
+  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int.or_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone; intros.
+  subst n. exists (Vint Int.mone); split; auto. destruct (e#r); simpl; auto. rewrite Int.or_mone; auto.
+  econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_xorimm_correct:
+  forall n r,
+  let (op, args) := make_xorimm n r in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.xor e#r (Vint n)) v.
+Proof.
+  intros; unfold make_xorimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros.
+  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int.xor_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone; intros.
+  subst n. exists (Val.notint e#r); split; auto.
+  econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_addlimm_correct:
+  forall n r,
+  let (op, args) := make_addlimm n r in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.addl e#r (Vlong n)) v.
+Proof.
+  intros. unfold make_addlimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+  subst. exists (e#r); split; auto.
+  destruct (e#r); simpl; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto.
+  exists (Val.addl e#r (Vlong n)); split; auto.
+Qed.
+
+Lemma make_shllimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_shllimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shll e#r1 (Vint n)) v.
+Proof.
+Local Opaque mk_amount64.
+  intros; unfold make_shllimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto.
+  unfold Int64.shl'. rewrite Z.shiftl_0_r, Int64.repr_unsigned. auto.
+  destruct (Int.ltu n Int64.iwordsize') eqn:L.
+  econstructor; split. simpl. eauto. rewrite mk_amount64_eq; auto.
+  econstructor; split. simpl. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shrlimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_shrlimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shrl e#r1 (Vint n)) v.
+Proof.
+  intros; unfold make_shrlimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto.
+  unfold Int64.shr'. rewrite Z.shiftr_0_r, Int64.repr_signed. auto.
+  destruct (Int.ltu n Int64.iwordsize') eqn:L.
+  econstructor; split. simpl. eauto. rewrite mk_amount64_eq; auto.
+  econstructor; split. simpl. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shrluimm_correct:
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_shrluimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shrlu e#r1 (Vint n)) v.
+Proof.
+  intros; unfold make_shrluimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto.
+  unfold Int64.shru'. rewrite Z.shiftr_0_r, Int64.repr_unsigned. auto.
+  destruct (Int.ltu n Int64.iwordsize') eqn:L.
+  econstructor; split. simpl. eauto. rewrite mk_amount64_eq; auto.
+  econstructor; split. simpl. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_mullimm_correct:
+  forall n r1 r2,
+  e#r2 = Vlong n ->
+  let (op, args) := make_mullimm n r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mull e#r1 (Vlong n)) v.
+Proof.
+  intros; unfold make_mullimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. subst.
+  exists (Vlong Int64.zero); split; auto. destruct (e#r1); simpl; auto. rewrite Int64.mul_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.one; intros. subst.
+  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int64.mul_one; auto.
+  destruct (Int64.is_power2' n) eqn:?; intros.
+  econstructor; split. simpl; eauto. 
+  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+  destruct (e#r1); simpl; auto.
+  erewrite Int64.is_power2'_range by eauto.
+  erewrite Int64.mul_pow2' by eauto. auto.
+  econstructor; split; eauto. simpl; rewrite H; auto.
+Qed.
+
+Lemma make_divlimm_correct:
+  forall n r1 r2 v,
+  Val.divls e#r1 e#r2 = Some v ->
+  e#r2 = Vlong n ->
+  let (op, args) := make_divlimm n r1 r2 in
+  exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+  intros; unfold make_divlimm.
+  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
+  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
+  exists v; auto.
+  exists v; auto.
+Qed.
+
+Lemma make_divluimm_correct:
+  forall n r1 r2 v,
+  Val.divlu e#r1 e#r2 = Some v ->
+  e#r2 = Vlong n ->
+  let (op, args) := make_divluimm n r1 r2 in
+  exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+  intros; unfold make_divluimm.
+  destruct (Int64.is_power2' n) eqn:?.
+  econstructor; split. simpl; eauto.
+  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
+  simpl.
+  erewrite Int64.is_power2'_range by eauto.    
+  erewrite Int64.divu_pow2' by eauto.  auto. 
+  exists v; auto.
+Qed.
+
+Lemma make_andlimm_correct:
+  forall n r x,
+  let (op, args) := make_andlimm n r x in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.andl e#r (Vlong n)) v.
+Proof.
+  intros; unfold make_andlimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+  subst n. exists (Vlong Int64.zero); split; auto. destruct (e#r); simpl; auto. rewrite Int64.and_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
+  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int64.and_mone; auto.
+  econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_orlimm_correct:
+  forall n r,
+  let (op, args) := make_orlimm n r in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.orl e#r (Vlong n)) v.
+Proof.
+  intros; unfold make_orlimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int64.or_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
+  subst n. exists (Vlong Int64.mone); split; auto. destruct (e#r); simpl; auto. rewrite Int64.or_mone; auto.
+  econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_xorlimm_correct:
+  forall n r,
+  let (op, args) := make_xorlimm n r in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.xorl e#r (Vlong n)) v.
+Proof.
+  intros; unfold make_xorlimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int64.xor_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
+  subst n. exists (Val.notl e#r); split; auto.
+  econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_mulfimm_correct:
+  forall n r1 r2,
+  e#r2 = Vfloat n ->
+  let (op, args) := make_mulfimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfimm.
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros.
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r1); simpl; auto. rewrite Float.mul2_add; auto.
+  simpl. econstructor; split; eauto.
+Qed.
+
+Lemma make_mulfimm_correct_2:
+  forall n r1 r2,
+  e#r1 = Vfloat n ->
+  let (op, args) := make_mulfimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfimm.
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros.
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r2); simpl; auto. rewrite Float.mul2_add; auto.
+  rewrite Float.mul_commut; auto.
+  simpl. econstructor; split; eauto.
+Qed.
+
+Lemma make_mulfsimm_correct:
+  forall n r1 r2,
+  e#r2 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfsimm.
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros.
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r1); simpl; auto. rewrite Float32.mul2_add; auto.
+  simpl. econstructor; split; eauto.
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+  forall n r1 r2,
+  e#r1 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfsimm.
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros.
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r2); simpl; auto. rewrite Float32.mul2_add; auto.
+  rewrite Float32.mul_commut; auto.
+  simpl. econstructor; split; eauto.
+Qed.
+
+Lemma make_zext_correct:
+  forall s r x,
+  vmatch bc e#r x ->
+  let (op, args) := make_zext s r x in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.zero_ext s e#r) v.
+Proof.
+  intros; unfold make_zext. destruct (vincl x (Uns Ptop s)) eqn:INCL.
+- exists e#r; split; auto.
+  assert (V: vmatch bc e#r (Uns Ptop s)).
+  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
+  inv V; simpl; auto. rewrite is_uns_zero_ext in H4 by auto. rewrite H4; auto.
+- econstructor; split; simpl; eauto.
+Qed.
+
+Lemma make_sext_correct:
+  forall s r x,
+  vmatch bc e#r x ->
+  let (op, args) := make_sext s r x in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.sign_ext s e#r) v.
+Proof.
+  intros; unfold make_sext. destruct (vincl x (Sgn Ptop s)) eqn:INCL.
+- exists e#r; split; auto.
+  assert (V: vmatch bc e#r (Sgn Ptop s)).
+  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
+  inv V; simpl; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
+- econstructor; split; simpl; eauto.
+Qed.
+
+Lemma op_strength_reduction_correct:
+  forall op args vl v,
+  vl = map (fun r => AE.get r ae) args ->
+  eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v ->
+  let (op', args') := op_strength_reduction op args vl in
+  exists w, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some w /\ Val.lessdef v w.
+Proof.
+  intros until v; unfold op_strength_reduction;
+  case (op_strength_reduction_match op args vl); simpl; intros.
+- (* add 1 *)
+  rewrite Val.add_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto.
+- (* add 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto.
+- (* addshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_addimm_correct; auto.
+- (* sub *)
+  InvApproxRegs; SimplVM; inv H0. rewrite Val.sub_add_opp. apply make_addimm_correct; auto.
+- (* subshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct, Val.sub_add_opp. apply make_addimm_correct; auto.
+- (* mul 1 *)
+  rewrite Val.mul_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
+- (* mul 2*)
+  InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
+- (* divs *)
+  assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
+  apply make_divimm_correct; auto.
+- (* divu *)
+  assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
+  apply make_divuimm_correct; auto.
+- (* and 1 *)
+  rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* and 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* andshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_andimm_correct; auto.
+- (* andimm *)
+  inv H; inv H0. apply make_andimm_correct; auto.
+- (* or 1 *)
+  rewrite Val.or_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
+- (* or 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
+- (* orshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_orimm_correct; auto.
+- (* xor 1 *)
+  rewrite Val.xor_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
+- (* xor 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
+- (* xorshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_xorimm_correct; auto.
+- (* bic *)
+  InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* bicshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_andimm_correct; auto.
+- (* orn *)
+  InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
+- (* ornshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_orimm_correct; auto.
+- (* eor *)
+  InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
+- (* eorshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shift_correct. apply make_xorimm_correct; auto.
+- (* shl *)
+  InvApproxRegs; SimplVM; inv H0. apply make_shlimm_correct; auto.
+- (* shr *)
+  InvApproxRegs; SimplVM; inv H0. apply make_shrimm_correct; auto.
+- (* shru *)
+  InvApproxRegs; SimplVM; inv H0. apply make_shruimm_correct; auto.
+- (* zext *)
+  InvApproxRegs; SimplVM; inv H0. apply make_zext_correct; auto.
+- (* sext *)
+  InvApproxRegs; SimplVM; inv H0. apply make_sext_correct; auto.
+- (* addl 1 *)
+  rewrite Val.addl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto.
+- (* addl 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto.
+- (* addshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_addlimm_correct; auto.
+- (* addext *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_extend_correct. apply make_addlimm_correct; auto.
+- (* subl *)
+  InvApproxRegs; SimplVM; inv H0. rewrite Val.subl_addl_opp. apply make_addlimm_correct; auto.
+- (* sublshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct, Val.subl_addl_opp. apply make_addlimm_correct; auto.
+- (* sublextend *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_extend_correct, Val.subl_addl_opp. apply make_addlimm_correct; auto.
+- (* mull 1 *)
+  rewrite Val.mull_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
+- (* mull 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
+- (* divl *)
+  assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
+  apply make_divlimm_correct; auto.
+- (* divlu *)
+  assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
+  apply make_divluimm_correct; auto.
+- (* andl 1 *)
+  rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* andl 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* andlshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_andlimm_correct; auto.
+- (* andlimm *)
+  inv H; inv H0. apply make_andlimm_correct; auto.
+- (* orl 1 *)
+  rewrite Val.orl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
+- (* orl 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
+- (* orlshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_orlimm_correct; auto.
+- (* xorl 1 *)
+  rewrite Val.xorl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
+- (* xorl 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
+- (* xorlshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_xorlimm_correct; auto.
+- (* bicl *)
+  InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* biclshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_andlimm_correct; auto.
+- (* ornl *)
+  InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
+- (* ornlshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_orlimm_correct; auto.
+- (* eorl *)
+  InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
+- (* eorlshift *)
+  InvApproxRegs; SimplVM; inv H0. rewrite eval_static_shiftl_correct. apply make_xorlimm_correct; auto.
+- (* shll *)
+  InvApproxRegs; SimplVM; inv H0. apply make_shllimm_correct; auto.
+- (* shrl *)
+  InvApproxRegs; SimplVM; inv H0. apply make_shrlimm_correct; auto.
+- (* shrlu *)
+  InvApproxRegs; SimplVM; inv H0. apply make_shrluimm_correct; auto.
+- (* cond *)
+  inv H0. apply make_cmp_correct; auto.
+- (* select *)
+  inv H0. apply make_select_correct; congruence.
+- (* mulf 1 *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
+- (* mulf 2 *)
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2).
+  rewrite <- H2. apply make_mulfimm_correct_2; auto.
+- (* mulfs 1 *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+- (* mulfs 2 *)
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) e#r2).
+  rewrite <- H2. apply make_mulfsimm_correct_2; auto.
+- (* default *)
+  exists v; auto.
+Qed.
+
+Lemma addr_strength_reduction_correct:
+  forall addr args vl res,
+  vl = map (fun r => AE.get r ae) args ->
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
+  let (addr', args') := addr_strength_reduction addr args vl in
+  exists res', eval_addressing ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
+Proof.
+  intros until res. unfold addr_strength_reduction.
+  destruct (addr_strength_reduction_match addr args vl); simpl;
+  intros VL EA; InvApproxRegs; SimplVM; try (inv EA).
+- econstructor; split; eauto. inv H0; simpl; auto. rewrite H2.
+  unfold Genv.symbol_address. destruct (Genv.find_symbol ge symb); auto.
+- rewrite Ptrofs.add_zero_l. econstructor; split; eauto.
+  inv H0; auto. rewrite H2; auto.
+- rewrite Ptrofs.add_zero_l. econstructor; split; eauto. 
+  inv H; auto. rewrite H3; auto.
+- rewrite Ptrofs.add_zero_l. econstructor; split; eauto. 
+  inv H0; auto. rewrite H3. rewrite Ptrofs.add_commut; auto.
+- econstructor; split; eauto. rewrite Val.addl_commut. auto.
+- econstructor; split; eauto.
+- rewrite Ptrofs.add_zero_l. rewrite a64_range. econstructor; split; eauto.
+  inv H; auto. rewrite H3; auto.
+- rewrite a64_range. econstructor; split; eauto.
+- rewrite Ptrofs.add_zero_l, eval_static_extend_correct.
+  econstructor; split; eauto. inv H; auto. rewrite H3; auto.
+- rewrite eval_static_extend_correct. 
+  econstructor; split; eauto.
+- exists res; auto.
+Qed.
+
+End STRENGTH_REDUCTION.
diff --git a/aarch64/Conventions1.v b/aarch64/Conventions1.v
new file mode 100644
index 00000000..efda835d
--- /dev/null
+++ b/aarch64/Conventions1.v
@@ -0,0 +1,285 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Function calling conventions and other conventions regarding the use of
+    machine registers and stack slots. *)
+
+Require Import Coqlib Decidableplus.
+Require Import AST Events Locations.
+Require Archi.
+
+(** * Classification of machine registers *)
+
+(** Machine registers (type [mreg] in module [Locations]) are divided in:
+- Callee-save registers, whose value is preserved across a function call.
+- Caller-save registers that can be modified during a function call.
+
+  We follow the Procedure Call Standard for the ARM 64-bit Architecture
+  (AArch64) document: R19-R28 and F8-F15 are callee-save. *)
+
+Definition is_callee_save (r: mreg): bool :=
+  match r with
+  | R0  | R1  | R2  | R3  | R4  | R5  | R6  | R7 => false
+  | R8  | R9  | R10 | R11 | R12 | R13 | R14 | R15 => false
+  | R17 => false
+  | R19 | R20 | R21 |  R22 | R23 => true
+  | R24 | R25 | R26 | R27 | R28 => true
+  | R29 => false
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7 => false
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15 => true
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23 => false
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => false
+  end.
+
+Definition int_caller_save_regs :=
+     R0  :: R1  :: R2  :: R3  :: R4  :: R5  :: R6  :: R7
+  :: R8  :: R9  :: R10 :: R11 :: R12 :: R13 :: R14 :: R15
+  :: R17 :: R29 :: nil.
+
+Definition float_caller_save_regs :=
+     F0  :: F1  :: F2  :: F3  :: F4  :: F5  :: F6  :: F7
+  :: F16 :: F17 :: F18 :: F19 :: F20 :: F21 :: F22 :: F23
+  :: F24 :: F25 :: F26 :: F27 :: F28 :: F29 :: F30 :: F31 :: nil.
+
+Definition int_callee_save_regs :=
+     R19 :: R20 :: R21 ::  R22 :: R23
+  :: R24 :: R25 :: R26 :: R27 :: R28 :: nil.
+
+Definition float_callee_save_regs :=
+     F8  :: F9  :: F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: nil.
+
+Definition destroyed_at_call :=
+  List.filter (fun r => negb (is_callee_save r)) all_mregs.
+
+Definition dummy_int_reg := R0.     (**r Used in [Coloring]. *)
+Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
+
+Definition callee_save_type := mreg_type.
+  
+Definition is_float_reg (r: mreg): bool :=
+  match r with
+  | R0  | R1  | R2  | R3  | R4  | R5  | R6  | R7
+  | R8  | R9  | R10 | R11 | R12 | R13 | R14 | R15
+  | R17 | R19 | R20 | R21 |  R22 | R23
+  | R24 | R25 | R26 | R27 | R28
+  | R29 => false
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => true
+  end.
+
+(** * Function calling conventions *)
+
+(** The functions in this section determine the locations (machine registers
+  and stack slots) used to communicate arguments and results between the
+  caller and the callee during function calls.  These locations are functions
+  of the signature of the function and of the call instruction.
+  Agreement between the caller and the callee on the locations to use
+  is guaranteed by our dynamic semantics for Cminor and RTL, which demand
+  that the signature of the call instruction is identical to that of the
+  called function.
+
+  Calling conventions are largely arbitrary: they must respect the properties
+  proved in this section (such as no overlapping between the locations
+  of function arguments), but this leaves much liberty in choosing actual
+  locations.  *)
+
+(** ** Location of function result *)
+
+(** The result value of a function is passed back to the caller in
+  registers [R0] or [F0], depending on the type of the
+  returned value.  We treat a function without result as a function
+  with one integer result. *)
+
+Definition loc_result (s: signature) : rpair mreg :=
+  match proj_sig_res s with
+  | Tint | Tlong | Tany32 | Tany64 => One R0
+  | Tfloat | Tsingle => One F0
+  end.
+
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+  forall sig,
+  subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
+Proof.
+  intros. unfold loc_result. destruct (proj_sig_res sig); auto.
+Qed.
+
+(** The result locations are caller-save registers *)
+
+Lemma loc_result_caller_save:
+  forall (s: signature),
+  forall_rpair (fun r => is_callee_save r = false) (loc_result s).
+Proof.
+  intros.
+  unfold loc_result. destruct (proj_sig_res s); simpl; auto.
+Qed.
+
+(** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
+
+Lemma loc_result_pair:
+  forall sg,
+  match loc_result sg with
+  | One _ => True
+  | Twolong r1 r2 =>
+        r1 <> r2 /\ proj_sig_res sg = Tlong
+     /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
+     /\ Archi.ptr64 = false
+  end.
+Proof.
+  intros; unfold loc_result; destruct (proj_sig_res sg); exact I.
+Qed.
+
+(** The location of the result depends only on the result part of the signature *)
+
+Lemma loc_result_exten:
+  forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
+Proof.
+  intros. unfold loc_result, proj_sig_res. rewrite H; auto.
+Qed.
+
+(** ** Location of function arguments *)
+
+(**
+- The first 8 integer arguments are passed in registers [R0...R7].
+- The first 8 FP arguments are passed in registers [F0...F7].
+- Extra arguments are passed on the stack, in [Outgoing] slots of size
+  64 bits (2 words), consecutively assigned, starting at word offset 0.
+**)
+
+Definition int_param_regs :=
+  R0  :: R1  :: R2  :: R3  :: R4  :: R5  :: R6  :: R7 :: nil.
+
+Definition float_param_regs :=
+  F0  :: F1  :: F2  :: F3  :: F4  :: F5  :: F6  :: F7 :: nil.
+
+Fixpoint loc_arguments_rec
+     (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) :=
+  match tyl with
+  | nil => nil
+  | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
+      match list_nth_z int_param_regs ir with
+      | None =>
+          One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2)
+      | Some ireg =>
+          One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs
+      end
+  | (Tfloat | Tsingle) as ty :: tys =>
+      match list_nth_z float_param_regs fr with
+      | None =>
+          One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2)
+      | Some freg =>
+          One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs
+      end
+  end.
+
+(** [loc_arguments s] returns the list of locations where to store arguments
+  when calling a function with signature [s].  *)
+
+Definition loc_arguments (s: signature) : list (rpair loc) :=
+  loc_arguments_rec s.(sig_args) 0 0 0.
+
+(** Argument locations are either caller-save registers or [Outgoing]
+  stack slots at nonnegative offsets. *)
+
+Definition loc_argument_acceptable (l: loc) : Prop :=
+  match l with
+  | R r => is_callee_save r = false
+  | S Outgoing ofs ty => ofs >= 0 /\ (typealign ty | ofs)
+  | _ => False
+  end.
+
+Definition loc_argument_charact (ofs: Z) (l: loc) : Prop :=
+  match l with
+  | R r => In r int_param_regs \/ In r float_param_regs
+  | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
+  | _ => False
+  end.
+
+Remark loc_arguments_rec_charact:
+  forall tyl ir fr ofs p,
+  In p (loc_arguments_rec tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_charact ofs) p.
+Proof.
+  assert (X: forall ofs1 ofs2 l, loc_argument_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_charact ofs1 l).
+  { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. }
+  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_charact ofs1) p).
+  { destruct p; simpl; intuition eauto. }
+  assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
+  { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
+Opaque list_nth_z.
+  induction tyl; simpl loc_arguments_rec; intros.
+- contradiction.
+- assert (A: forall ty, In p
+      match list_nth_z int_param_regs ir with
+      | Some ireg => One (R ireg) :: loc_arguments_rec tyl (ir + 1) fr ofs
+      | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2)
+      end ->
+      forall_rpair (loc_argument_charact ofs) p).
+  { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1.
+    subst. left. eapply list_nth_z_in; eauto.
+    eapply IHtyl; eauto.
+    subst. split. omega. assumption.
+    eapply Y; eauto. omega. }
+  assert (B: forall ty, In p
+      match list_nth_z float_param_regs fr with
+      | Some ireg => One (R ireg) :: loc_arguments_rec tyl ir (fr + 1) ofs
+      | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2)
+      end ->
+      forall_rpair (loc_argument_charact ofs) p).
+  { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1.
+    subst. right. eapply list_nth_z_in; eauto.
+    eapply IHtyl; eauto.
+    subst. split. omega. assumption.
+    eapply Y; eauto. omega. }
+  destruct a; eauto.
+Qed.
+
+Lemma loc_arguments_acceptable:
+  forall (s: signature) (p: rpair loc),
+  In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p.
+Proof.
+  unfold loc_arguments; intros.
+  assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by decide_goal.
+  assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal.
+  assert (X: forall l, loc_argument_charact 0 l -> loc_argument_acceptable l).
+  { unfold loc_argument_charact, loc_argument_acceptable.
+    destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
+    intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
+    exists (2 / typealign ty); destruct ty; reflexivity.
+  }
+  exploit loc_arguments_rec_charact; eauto using Z.divide_0_r.
+  unfold forall_rpair; destruct p; intuition auto.
+Qed.
+
+Hint Resolve loc_arguments_acceptable: locs.
+
+Lemma loc_arguments_main:
+  loc_arguments signature_main = nil.
+Proof.
+  unfold loc_arguments; reflexivity.
+Qed.
+
+(** ** Normalization of function results *)
+
+(** According to the AAPCS64 ABI specification, "padding bits" in the return
+    value of a function have unpredictable values and must be ignored.
+    Consequently, we force normalization of return values of small integer
+    types (8- and 16-bit integers), so that the top bits (the "padding bits")
+    are proper sign- or zero-extensions of the small integer value. *)
+
+Definition return_value_needs_normalization (t: rettype) : bool :=
+  match t with
+  | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true
+  | _ => false
+  end.
diff --git a/aarch64/DuplicateOpcodeHeuristic.ml b/aarch64/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..5fc2156c
--- /dev/null
+++ b/aarch64/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,27 @@
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c | Ccompfs c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c | Cnotcompfs c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
+
diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v
new file mode 100644
index 00000000..b2a2308e
--- /dev/null
+++ b/aarch64/Machregs.v
@@ -0,0 +1,210 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+Require Import String.
+Require Import Coqlib Decidableplus Maps.
+Require Import AST Op.
+
+(** ** Machine registers *)
+
+(** Integer register 16 is reserved as temporary and for call veeners.
+    Integer register 18 is reserved as the platform register.
+    Integer register 30 is reserved for the return address. *)
+
+Inductive mreg: Type :=
+  (** Allocatable integer regs *)
+  | R0  | R1  | R2  | R3  | R4  | R5  | R6  | R7
+  | R8  | R9  | R10 | R11 | R12 | R13 | R14 | R15
+  | R17 | R19 | R20 | R21 |  R22 | R23
+  | R24 | R25 | R26 | R27 | R28 | R29
+  (** Allocatable floating-point regs *)
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31.
+
+Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
+Proof. decide equality. Defined.
+Global Opaque mreg_eq.
+
+Definition all_mregs :=
+     R0  :: R1  :: R2  :: R3  :: R4  :: R5  ::  R6  :: R7
+  :: R8  :: R9  :: R10 :: R11 :: R12 :: R13 ::  R14 :: R15
+  :: R17 :: R19 :: R20 :: R21 ::  R22 :: R23
+  :: R24 :: R25 :: R26 :: R27 :: R28 :: R29
+  :: F0  :: F1  :: F2  :: F3  :: F4  :: F5  ::  F6  :: F7
+  :: F8  :: F9  :: F10 :: F11 :: F12 :: F13 ::  F14 :: F15
+  :: F16 :: F17 :: F18 :: F19 :: F20 :: F21 ::  F22 :: F23
+  :: F24 :: F25 :: F26 :: F27 :: F28 :: F29 ::  F30 :: F31
+  :: nil.
+
+Lemma all_mregs_complete:
+  forall (r: mreg), In r all_mregs.
+Proof.
+  assert (forall r, proj_sumbool (In_dec mreg_eq r all_mregs) = true) by (destruct r; reflexivity).
+  intros. specialize (H r). InvBooleans. auto.
+Qed.
+
+Instance Decidable_eq_mreg : forall (x y: mreg), Decidable (eq x y) := Decidable_eq mreg_eq.
+
+Instance Finite_mreg : Finite mreg := {
+  Finite_elements := all_mregs;
+  Finite_elements_spec := all_mregs_complete
+}.
+
+Definition mreg_type (r: mreg): typ := Tany64.
+
+Open Scope positive_scope.
+
+Module IndexedMreg <: INDEXED_TYPE.
+  Definition t := mreg.
+  Definition eq := mreg_eq.
+  Definition index (r: mreg): positive :=
+    match r with
+    | R0 => 1   | R1 => 2   | R2 => 3   | R3 => 4
+    | R4 => 5   | R5 => 6   | R6 => 7   | R7 => 8
+    | R8 => 9   | R9 => 10  | R10 => 11 | R11 => 12
+    | R12 => 13 | R13 => 14 | R14 => 15 | R15 => 16
+    | R17 => 17 | R19 => 19
+    | R20 => 20 | R21 => 21 | R22 => 22 | R23 => 23
+    | R24 => 24 | R25 => 25 | R26 => 26 | R27 => 27
+    | R28 => 28 | R29 => 29
+    | F0 => 32  | F1 => 33  | F2 => 34  | F3 => 35
+    | F4 => 36  | F5 => 37  | F6 => 38  | F7 => 39
+    | F8 => 40  | F9 => 41  | F10 => 42 | F11 => 43
+    | F12 => 44 | F13 => 45 | F14 => 46 | F15 => 47
+    | F16 => 48 | F17 => 49 | F18 => 50 | F19 => 51
+    | F20 => 52 | F21 => 53 | F22 => 54 | F23 => 55
+    | F24 => 56 | F25 => 57 | F26 => 58 | F27 => 59
+    | F28 => 60 | F29 => 61 | F30 => 62 | F31 => 63
+    end.
+  Lemma index_inj:
+    forall r1 r2, index r1 = index r2 -> r1 = r2.
+  Proof.
+    decide_goal.
+  Qed.
+End IndexedMreg.
+
+Definition is_stack_reg (r: mreg) : bool := false.
+
+(** ** Names of registers *)
+
+Local Open Scope string_scope.
+
+Definition register_names :=
+     ("X0", R0)  :: ("X1", R1)  :: ("X2", R2)  :: ("X3", R3)
+  :: ("X4", R4)  :: ("X5", R5)  :: ("X6", R6)  :: ("X7", R7)
+  :: ("X8", R8)  :: ("X9", R9)  :: ("X10", R10) :: ("X11", R11)
+  :: ("X12", R12) :: ("X13", R13) :: ("X14", R14) :: ("X15", R15)
+  :: ("X17", R17) :: ("X19", R19)
+  :: ("X20", R20) :: ("X21", R21) :: ("X22", R22) :: ("X23", R23)
+  :: ("X24", R24) :: ("X25", R25) :: ("X26", R26) :: ("X27", R27)
+  :: ("X28", R28) :: ("X29", R29)
+  :: ("D0", F0)  :: ("D1", F1)  :: ("D2", F2)  :: ("D3", F3)
+  :: ("D4", F4)  :: ("D5", F5)  :: ("D6", F6)  :: ("D7", F7)
+  :: ("D8", F8)  :: ("D9", F9)  :: ("D10", F10) :: ("D11", F11)
+  :: ("D12", F12) :: ("D13", F13) :: ("D14", F14) :: ("D15", F15)
+  :: ("D16", F16) :: ("D17", F17) :: ("D18", F18) :: ("D19", F19)
+  :: ("D20", F20) :: ("D21", F21) :: ("D22", F22) :: ("D23", F23)
+  :: ("D24", F24) :: ("D25", F25) :: ("D26", F26) :: ("D27", F27)
+  :: ("D28", F28) :: ("D29", F29) :: ("D30", F30) :: ("D31", F31)
+  :: nil.
+
+Definition register_by_name (s: string) : option mreg :=
+  let fix assoc (l: list (string * mreg)) : option mreg :=
+    match l with
+    | nil => None
+    | (s1, r1) :: l' => if string_dec s s1 then Some r1 else assoc l'
+    end
+  in assoc register_names.
+
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg :=
+  match op with
+  | Oshrximm _ | Oshrlximm _ => R17 :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
+  nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := nil.
+
+Definition destroyed_by_cond (cond: condition): list mreg := nil.
+
+Definition destroyed_by_jumptable: list mreg := R17 :: nil.
+
+Fixpoint destroyed_by_clobber (cl: list string): list mreg :=
+  match cl with
+  | nil => nil
+  | c1 :: cl =>
+      match register_by_name c1 with
+      | Some r => r :: destroyed_by_clobber cl
+      | None   => destroyed_by_clobber cl
+      end
+  end.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+  match ef with
+  | EF_memcpy sz al => R15 :: R17 :: R29 :: nil
+  | EF_inline_asm txt sg clob => destroyed_by_clobber clob
+  | _ => nil
+  end.
+
+Definition destroyed_by_setstack (ty: typ): list mreg := nil.
+
+Definition destroyed_at_function_entry: list mreg := R29 :: nil.
+
+Definition destroyed_at_indirect_call: list mreg := nil.
+
+Definition temp_for_parent_frame: mreg := R29.
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
+  (nil, None).
+
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(option mreg) :=
+  (nil, nil).
+
+Global Opaque
+    destroyed_by_op destroyed_by_load destroyed_by_store
+    destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+    destroyed_by_setstack destroyed_at_function_entry temp_for_parent_frame
+    destroyed_at_indirect_call
+    mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations.  Return [true] if the first argument and
+  the result must be in the same location *and* are unconstrained
+  by [mregs_for_operation].  There is one for AArch64: [Olowlong],
+  which is actually a no-operation in the generated asm code. *)
+
+Definition two_address_op (op: operation) : bool := 
+  match op with
+  | Olowlong => true
+  | _ => false
+  end.
+
+Global Opaque two_address_op.
+
+(* Constraints on constant propagation for builtins *)
+
+Definition builtin_constraints (ef: external_function) :
+                                       list builtin_arg_constraint :=
+  match ef with
+  | EF_vload _ => OK_addressing :: nil
+  | EF_vstore _ => OK_addressing :: OK_default :: nil
+  | EF_memcpy _ _ => OK_addrstack :: OK_addrstack :: nil
+  | EF_annot kind txt targs => map (fun _ => OK_all) targs
+  | EF_debug kind txt targs => map (fun _ => OK_all) targs
+  | _ => nil
+  end.
+
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
new file mode 100644
index 00000000..f13a9ff5
--- /dev/null
+++ b/aarch64/Machregsaux.ml
@@ -0,0 +1,40 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Auxiliary functions on machine registers *)
+
+open Camlcoq
+open Machregs
+
+let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
+
+let _ =
+  List.iter
+    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
+    Machregs.register_names
+
+let is_scratch_register s =
+  s = "X16" || s = "x16" || s = "X30" || s = "x30"
+
+
+let name_of_register r =
+  try Some (Hashtbl.find register_names r) with Not_found -> None
+
+let register_by_name s =
+  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
+
+let can_reserve_register r = Conventions1.is_callee_save r
+                           
+let class_of_type = function
+  | AST.Tint | AST.Tlong -> 0
+  | AST.Tfloat | AST.Tsingle -> 1
+  | AST.Tany32 | AST.Tany64 -> assert false
diff --git a/aarch64/NeedOp.v b/aarch64/NeedOp.v
new file mode 100644
index 00000000..8fcab9e1
--- /dev/null
+++ b/aarch64/NeedOp.v
@@ -0,0 +1,253 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+Require Import Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs.
+Require Import Op RTL.
+Require Import NeedDomain.
+
+(** Neededness analysis for AArch64 operators *)
+
+Definition needs_of_shift (s: shift) (a: amount32) (nv: nval) :=
+  match s with
+  | Slsl => shlimm nv a
+  | Sasr => shrimm nv a
+  | Slsr => shruimm nv a
+  | Sror => ror nv a
+  end.
+
+Definition zero_ext' (s: Z) (nv: nval) :=
+  if zle 0 s then zero_ext s nv else default nv.
+Definition sign_ext' (s: Z) (nv: nval) :=
+  if zlt 0 s then sign_ext s nv else default nv.
+
+Definition op1 (nv: nval) := nv :: nil.
+Definition op2 (nv: nval) := nv :: nv :: nil.
+Definition op1shift (s: shift) (a: amount32) (nv: nval) :=
+  needs_of_shift s a nv :: nil.
+Definition op2shift (s: shift) (a: amount32) (nv: nval) :=
+  nv :: needs_of_shift s a nv :: nil.
+
+Definition needs_of_condition (cond: condition): list nval := nil.
+
+Definition needs_of_operation (op: operation) (nv: nval): list nval :=
+  match op with
+  | Omove => nv :: nil
+  | Ointconst _ => nil
+  | Olongconst _ => nil
+  | Ofloatconst _ => nil
+  | Osingleconst _ => nil
+  | Oaddrsymbol _ _ => nil
+  | Oaddrstack _ => nil
+  | Oshift s a => op1shift s a nv
+  | Oadd | Osub | Omul => op2 (modarith nv)
+  | Oaddshift s a | Osubshift s a => op2shift s a (modarith nv)
+  | Oaddimm _ => op1 (modarith nv)
+  | Oneg => op1 (modarith nv)
+  | Onegshift s a => op1shift s a (modarith nv)
+  | Omuladd | Omulsub => 
+      let n := modarith nv in n :: n :: n :: nil
+  | Odiv | Odivu => op2 (default nv)
+  | Oand | Oor | Oxor => op2 (bitwise nv)
+  | Oandshift s a | Oorshift s a | Oxorshift s a =>  op2shift s a (bitwise nv)
+  | Oandimm  n => op1 (andimm nv n)
+  | Oorimm n => op1 (orimm nv n)
+  | Oxorimm n => op1 (bitwise nv)
+  | Onot => op1 (bitwise nv)
+  | Onotshift s a => needs_of_shift s a (bitwise nv) :: nil
+  | Obic | Oorn | Oeqv => 
+      let n := bitwise nv in n :: bitwise n :: nil
+  | Obicshift s a | Oornshift s a | Oeqvshift s a =>
+      let n := bitwise nv in n :: needs_of_shift s a (bitwise n) :: nil
+  | Oshl | Oshr | Oshru => op2 (default nv)
+  | Oshrximm _ => op1 (default nv)
+  | Ozext s => op1 (zero_ext' s nv)
+  | Osext s => op1 (sign_ext' s nv)
+  | Oshlzext s a => op1 (zero_ext' s (shlimm nv a))
+  | Oshlsext s a => op1 (sign_ext' s (shlimm nv a))
+  | Ozextshr a s => op1 (shruimm (zero_ext' s nv) a)
+  | Osextshr a s => op1 (shrimm (sign_ext' s nv) a)
+
+  | Oshiftl _ _ | Oextend _ _ => op1 (default nv)
+  | Omakelong  | Olowlong  | Ohighlong => op1 (default nv)
+  | Oaddl | Osubl | Omull => op2 (default nv)
+  | Oaddlshift _ _ | Oaddlext _ _ | Osublshift _ _ | Osublext _ _ => op2 (default nv)
+  | Oaddlimm _ => op1 (default nv)
+  | Onegl => op1 (default nv)
+  | Oneglshift _ _ => op1 (default nv)
+  | Omulladd | Omullsub => let n := default nv in n :: n :: n :: nil
+  | Omullhs | Omullhu | Odivl | Odivlu => op2 (default nv)
+  | Oandl | Oorl | Oxorl | Obicl | Oornl | Oeqvl => op2 (default nv)
+  | Oandlshift _ _ | Oorlshift _ _  | Oxorlshift _ _
+  | Obiclshift _ _ | Oornlshift _ _ | Oeqvlshift _ _ => op2 (default nv)
+  | Oandlimm _ | Oorlimm _ | Oxorlimm _ => op1 (default nv)
+  | Onotl => op1 (default nv)
+  | Onotlshift _ _ => op1 (default nv)
+  | Oshll | Oshrl | Oshrlu => op2 (default nv)
+  | Oshrlximm _ => op1 (default nv)
+  | Ozextl _ | Osextl _
+  | Oshllzext _ _ | Oshllsext _ _ | Ozextshrl _ _ | Osextshrl _ _ => op1 (default nv)
+  | Onegf | Oabsf => op1 (default nv)
+  | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
+  | Onegfs | Oabsfs => op1 (default nv)
+  | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
+  | Ofloatofsingle | Osingleoffloat => op1 (default nv)
+  | Ointoffloat | Ointuoffloat | Ofloatofint | Ofloatofintu => op1 (default nv)
+  | Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv)
+  | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
+  | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv)
+  | Ocmp c => needs_of_condition c
+  | Osel c ty => nv :: nv :: needs_of_condition c
+  end.
+
+Definition operation_is_redundant (op: operation) (nv: nval): bool :=
+  match op with
+  | Ozext s => zle 0 s && zero_ext_redundant s nv
+  | Osext s => zlt 0 s && sign_ext_redundant s nv
+  | Oandimm n => andimm_redundant nv n
+  | Oorimm n => orimm_redundant nv n
+  | _ => false
+  end.
+
+Ltac InvAgree :=
+  match goal with
+  | [H: vagree_list nil _ _ |- _ ] => inv H; InvAgree
+  | [H: vagree_list (_::_) _ _ |- _ ] => inv H; InvAgree
+  | _ => idtac
+  end.
+
+Ltac TrivialExists :=
+  match goal with
+  | [ |- exists v, Some ?x = Some v /\ _ ] => exists x; split; auto
+  | _ => idtac
+  end.
+
+Lemma shift_sound:
+  forall v w s a x,
+  vagree v w (needs_of_shift s a x) ->
+  vagree (eval_shift s v a) (eval_shift s w a) x.
+Proof.
+  intros until x; destruct s; simpl; intros.
+- apply shlimm_sound; auto.
+- apply shruimm_sound; auto.
+- apply shrimm_sound; auto.
+- apply ror_sound; auto.
+Qed.
+
+Lemma zero_ext'_sound:
+  forall v w x n,
+  vagree v w (zero_ext' n x) ->
+  vagree (Val.zero_ext n v) (Val.zero_ext n w) x.
+Proof.
+  unfold zero_ext'; intros. destruct (zle 0 n).
+- apply zero_ext_sound; auto.
+- assert (E: x = Nothing \/ Val.lessdef v w) by (destruct x; auto).
+  destruct E. subst x; simpl; auto. apply vagree_lessdef; apply Val.zero_ext_lessdef; auto.
+Qed.
+
+Lemma sign_ext'_sound:
+  forall v w x n,
+  vagree v w (sign_ext' n x) ->
+  vagree (Val.sign_ext n v) (Val.sign_ext n w) x.
+Proof.
+  unfold sign_ext'; intros. destruct (zlt 0 n).
+- apply sign_ext_sound; auto.
+- assert (E: x = Nothing \/ Val.lessdef v w) by (destruct x; auto).
+  destruct E. subst x; simpl; auto. apply vagree_lessdef; apply Val.sign_ext_lessdef; auto.
+Qed.
+
+Section SOUNDNESS.
+
+Variable ge: genv.
+Variable sp: block.
+Variables m m': mem.
+Hypothesis PERM: forall b ofs k p, Mem.perm m b ofs k p -> Mem.perm m' b ofs k p.
+
+Lemma needs_of_condition_sound:
+  forall cond args b args',
+  eval_condition cond args m = Some b ->
+  vagree_list args args' (needs_of_condition cond) ->
+  eval_condition cond args' m' = Some b.
+Proof.
+  intros. unfold needs_of_condition in H0.
+  eapply default_needs_of_condition_sound; eauto.
+Qed.
+
+Lemma needs_of_operation_sound:
+  forall op args v nv args',
+  eval_operation ge (Vptr sp Ptrofs.zero) op args m = Some v ->
+  vagree_list args args' (needs_of_operation op nv) ->
+  nv <> Nothing ->
+  exists v',
+     eval_operation ge (Vptr sp Ptrofs.zero) op args' m' = Some v'
+  /\ vagree v v' nv.
+Proof.
+  unfold needs_of_operation; intros; destruct op; try (eapply default_needs_of_operation_sound; eauto; fail);
+  simpl in *; FuncInv; InvAgree; TrivialExists.
+- apply shift_sound; auto.
+- apply add_sound; auto.
+- apply add_sound; auto using shift_sound.
+- apply add_sound; auto with na.
+- apply neg_sound; auto.
+- apply neg_sound; auto using shift_sound.
+- apply sub_sound; auto.
+- apply sub_sound; auto using shift_sound.
+- apply mul_sound; auto.
+- apply add_sound; auto. apply mul_sound; rewrite modarith_idem; auto. 
+- apply sub_sound; auto. apply mul_sound; rewrite modarith_idem; auto. 
+- apply and_sound; auto.
+- apply and_sound; auto using shift_sound.
+- apply andimm_sound; auto.
+- apply or_sound; auto.
+- apply or_sound; auto using shift_sound.
+- apply orimm_sound; auto.
+- apply xor_sound; auto.
+- apply xor_sound; auto using shift_sound.
+- apply xor_sound; auto with na.
+- apply notint_sound; auto.
+- apply notint_sound; auto using shift_sound.
+- apply and_sound; auto. apply notint_sound; rewrite bitwise_idem; auto.
+- apply and_sound; auto. apply notint_sound; rewrite bitwise_idem; auto using shift_sound.
+- apply or_sound; auto. apply notint_sound; rewrite bitwise_idem; auto.
+- apply or_sound; auto. apply notint_sound; rewrite bitwise_idem; auto using shift_sound.
+- apply xor_sound; auto. apply notint_sound; rewrite bitwise_idem; auto.
+- apply xor_sound; auto. apply notint_sound; rewrite bitwise_idem; auto using shift_sound.
+- apply zero_ext'_sound; auto.
+- apply sign_ext'_sound; auto.
+- apply shlimm_sound; apply zero_ext'_sound; auto.
+- apply shlimm_sound; apply sign_ext'_sound; auto.
+- apply zero_ext'_sound; apply shruimm_sound; auto.
+- apply sign_ext'_sound; apply shrimm_sound; auto.
+- destruct (eval_condition cond args m) as [b|] eqn:EC.
+  erewrite needs_of_condition_sound by eauto.
+  apply select_sound; auto.
+  simpl; auto with na.
+Qed.
+
+Lemma operation_is_redundant_sound:
+  forall op nv arg1 args v arg1' args',
+  operation_is_redundant op nv = true ->
+  eval_operation ge (Vptr sp Ptrofs.zero) op (arg1 :: args) m = Some v ->
+  vagree_list (arg1 :: args) (arg1' :: args') (needs_of_operation op nv) ->
+  vagree v arg1' nv.
+Proof.
+  intros. destruct op; simpl in *; try discriminate; inv H1; FuncInv; subst.
+- apply andimm_redundant_sound; auto.
+- apply orimm_redundant_sound; auto.
+- InvBooleans. unfold zero_ext' in H5; rewrite zle_true in H5 by auto. 
+  apply zero_ext_redundant_sound; auto.
+- InvBooleans. unfold sign_ext' in H5; rewrite zlt_true in H5 by auto. 
+  apply sign_ext_redundant_sound; auto.
+Qed.
+
+End SOUNDNESS.
diff --git a/aarch64/Op.v b/aarch64/Op.v
new file mode 100644
index 00000000..c0b9d435
--- /dev/null
+++ b/aarch64/Op.v
@@ -0,0 +1,1848 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Operators and addressing modes.  The abstract syntax and dynamic
+  semantics for the CminorSel, RTL, LTL and Mach languages depend on the
+  following types, defined in this library:
+- [condition]:  boolean conditions for conditional branches;
+- [operation]: arithmetic and logical operations;
+- [addressing]: addressing modes for load and store operations.
+
+  These types are processor-specific and correspond roughly to what the
+  processor can compute in one instruction.  In other terms, these
+  types reflect the state of the program after instruction selection.
+  For a processor-independent set of operations, see the abstract
+  syntax and dynamic semantics of the Cminor language.
+*)
+
+Require Import Axioms Coqlib BoolEqual.
+Require Import AST Integers Floats Values Memory Globalenvs Events.
+
+Set Implicit Arguments.
+Local Transparent Archi.ptr64.
+
+(** Shift amounts *)
+
+Record amount32 : Type := {
+  a32_amount :> int;
+  a32_range  : Int.ltu a32_amount Int.iwordsize = true }.
+
+Record amount64 : Type := {
+  a64_amount :> int;
+  a64_range  : Int.ltu a64_amount Int64.iwordsize' = true }.
+
+(** Shifted operands *)
+
+Inductive shift : Type :=
+  | Slsl                                (**r left shift *)
+  | Slsr                                (**r right unsigned shift *)
+  | Sasr                                (**r right signed shift *)
+  | Sror.                               (**r rotate right *)
+
+(** Sign- or zero-extended operands *)
+
+Inductive extension : Type :=
+  | Xsgn32                              (**r from signed 32-bit integer to 64-bit integer *)
+  | Xuns32.                             (**r from unsigned 32-bit integer to 64-bit integer *)
+
+(** Conditions (boolean-valued operators). *)
+
+Inductive condition: Type :=
+(** Tests over 32-bit integers *)
+  | Ccomp (c: comparison)                               (**r signed comparison *)
+  | Ccompu (c: comparison)                              (**r unsigned comparison *)
+  | Ccompimm (c: comparison) (n: int)                   (**r signed comparison with constant *)
+  | Ccompuimm (c: comparison) (n: int)                  (**r unsigned comparison with constant *)
+  | Ccompshift (c: comparison) (s: shift) (a: amount32) (**r signed comparison with shift *)
+  | Ccompushift (c: comparison) (s: shift) (a: amount32)(**r unsigned comparison width shift *)
+  | Cmaskzero (n: int)                                  (**r test [(arg & n) == 0] *)
+  | Cmasknotzero (n: int)                               (**r test [(arg & n) != 0] *)
+(** Tests over 64-bit integers *)
+  | Ccompl (c: comparison)                              (**r signed comparison *)
+  | Ccomplu (c: comparison)                             (**r unsigned comparison *)
+  | Ccomplimm (c: comparison) (n: int64)                (**r signed comparison with constant *)
+  | Ccompluimm (c: comparison) (n: int64)               (**r unsigned comparison with constant *)
+  | Ccomplshift (c: comparison) (s: shift) (a: amount64)(**r signed comparison with shift *)
+  | Ccomplushift (c: comparison) (s: shift) (a: amount64)(**r unsigned comparison width shift *)
+  | Cmasklzero (n: int64)                               (**r test [(arg & n) == 0] *)
+  | Cmasklnotzero (n: int64)                            (**r test [(arg & n) != 0] *)
+(** Tests over 64-bit floating-point numbers *)
+  | Ccompf (c: comparison)                              (**r FP comparison *)
+  | Cnotcompf (c: comparison)                           (**r negation of an FP comparison *)
+  | Ccompfzero (c: comparison)                          (**r comparison with 0.0 *)
+  | Cnotcompfzero (c: comparison)                       (**r negation of comparison with 0.0 *)
+(** Tests over 32-bit floating-point numbers *)
+  | Ccompfs (c: comparison)                             (**r FP comparison *)
+  | Cnotcompfs (c: comparison)                          (**r negation of an FP comparison *)
+  | Ccompfszero (c: comparison)                         (**r equal to 0.0 *)
+  | Cnotcompfszero (c: comparison).                     (**r not equal to 0.0 *)
+
+(** Arithmetic and logical operations.  In the descriptions, [rd] is the
+  result of the operation and [r1], [r2], etc, are the arguments. *)
+
+Inductive operation : Type :=
+  | Omove                                               (**r [rd = r1] *)
+  | Ointconst (n: int)                                  (**r [rd] is set to the given integer constant *)
+  | Olongconst (n: int64)                               (**r [rd] is set to the given integer constant *)
+  | Ofloatconst (n: float)                              (**r [rd] is set to the given float constant *)
+  | Osingleconst (n: float32)                           (**r [rd] is set to the given float constant *)
+  | Oaddrsymbol (id: ident) (ofs: ptrofs)               (**r [rd] is set to the address of the symbol plus the given offset *)
+  | Oaddrstack (ofs: ptrofs)                            (**r [rd] is set to the stack pointer plus the given offset *)
+(** 32-bit integer arithmetic *)
+  | Oshift (s: shift) (a: amount32)                     (**r shift or rotate by immediate quantity *)
+  | Oadd                                                (**r [rd = r1 + r2] *)
+  | Oaddshift (s: shift) (a: amount32)                  (**r [rd = r1 + shifted r2] *)
+  | Oaddimm (n: int)                                    (**r [rd = r1 + n] *)
+  | Oneg                                                (**r [rd = - r1]   *)                     
+  | Onegshift (s: shift) (a: amount32)                  (**r [rd = - shifted r1] *)
+  | Osub                                                (**r [rd = r1 - r2] *)
+  | Osubshift (s: shift) (a: amount32)                  (**r [rd = r1 - shifted r2] *)
+  | Omul                                                (**r [rd = r1 * r2] *)
+  | Omuladd                                             (**r [rd = r1 + r2 * r3] *)
+  | Omulsub                                             (**r [rd = r1 - r2 * r3] *)
+  | Odiv                                                (**r [rd = r1 / r2] (signed) *)
+  | Odivu                                               (**r [rd = r1 / r2] (unsigned) *)
+  | Oand                                                (**r [rd = r1 & r2] *)
+  | Oandshift (s: shift) (a: amount32)                  (**r [rd = r1 & shifted r2] *)
+  | Oandimm (n: int)                                    (**r [rd = r1 & n] *)
+  | Oor                                                 (**r [rd = r1 | r2] *)
+  | Oorshift (s: shift) (a: amount32)                   (**r [rd = r1 | shifted r2] *)
+  | Oorimm (n: int)                                     (**r [rd = r1 | n] *)
+  | Oxor                                                (**r [rd = r1 ^ r2] *)
+  | Oxorshift (s: shift) (a: amount32)                  (**r [rd = r1 ^ shifted r2] *)
+  | Oxorimm (n: int)                                    (**r [rd = r1 ^ n] *)
+  | Onot                                                (**r [rd = ~r1] *)
+  | Onotshift (s: shift) (a: amount32)                  (**r [rd = ~ shifted r1] *)
+  | Obic                                                (**r [rd = r1 & ~r2] *)
+  | Obicshift (s: shift) (a: amount32)                  (**r [rd = r1 ^ ~ shifted r2] *)
+  | Oorn                                                (**r [rd = r1 | ~r2] *)
+  | Oornshift (s: shift) (a: amount32)                  (**r [rd = r1 | ~ shifted r2] *)
+  | Oeqv                                                (**r [rd = r1 ^ ~r2] *)
+  | Oeqvshift (s: shift) (a: amount32)                  (**r [rd = r1 | ~ shifted r2] *)
+  | Oshl                                                (**r [rd = r1 << r2] *)
+  | Oshr                                                (**r [rd = r1 >> r2] (signed) *)
+  | Oshru                                               (**r [rd = r1 >> r2] (unsigned) *)
+  | Oshrximm (n: int)                                   (**r [rd = r1 / 2^n] (signed) *)
+  | Ozext (s: Z)                                        (**r [rd = zero_ext(r1,s)] *)
+  | Osext (s: Z)                                        (**r [rd = sign_ext(r1,s)] *)
+  | Oshlzext (s: Z) (a: amount32)                       (**r [rd = zero_ext(r1,s) << a] *)
+  | Oshlsext (s: Z) (a: amount32)                       (**r [rd = sign_ext(r1,s) << a] *)
+  | Ozextshr (a: amount32) (s: Z)                       (**r [rd = zero_ext(r1 >> a, s)] *)
+  | Osextshr (a: amount32) (s: Z)                       (**r [rd = sign_ext(r1 >> a, s)] *)
+(** 64-bit integer arithmetic *)
+  | Oshiftl (s: shift) (a: amount64)                    (**r shift or rotate by immediate quantity *)
+  | Oextend (x: extension) (a: amount64)                (**r convert from 32 to 64 bits and shift *)
+  | Omakelong                                           (**r [rd = r1 << 32 | r2] *)
+  | Olowlong                                            (**r [rd = low-word(r1)] *)
+  | Ohighlong                                           (**r [rd = high-word(r1)] *)
+  | Oaddl                                               (**r [rd = r1 + r2] *)
+  | Oaddlshift (s: shift) (a: amount64)                 (**r [rd = r1 + shifted r2] *)
+  | Oaddlext (x: extension) (a: amount64)               (**r [rd = r1 + shifted, converted r2] *)
+  | Oaddlimm (n: int64)                                 (**r [rd = r1 + n] *)
+  | Onegl                                               (**r [rd = - r1]   *)                     
+  | Oneglshift (s: shift) (a: amount64)                 (**r [rd = - shifted r1] *)
+  | Osubl                                               (**r [rd = r1 - r2] *)
+  | Osublshift (s: shift) (a: amount64)                 (**r [rd = r1 - shifted r2] *)
+  | Osublext (x: extension) (a: amount64)               (**r [rd = r1 - shifted, converted r2] *)
+  | Omull                                               (**r [rd = r1 * r2] *)
+  | Omulladd                                            (**r [rd = r1 + r2 * r3] *)
+  | Omullsub                                            (**r [rd = r1 - r2 * r3] *)
+  | Omullhs                                             (**r [rd = high part of r1 * r2 (signed)] *)
+  | Omullhu                                             (**r [rd = high part of r1 * r2 (unsigned)] *)
+  | Odivl                                               (**r [rd = r1 / r2] (signed) *)
+  | Odivlu                                              (**r [rd = r1 / r2] (unsigned) *)
+  | Oandl                                               (**r [rd = r1 & r2] *)
+  | Oandlshift (s: shift) (a: amount64)                 (**r [rd = r1 & shifted r2] *)
+  | Oandlimm (n: int64)                                 (**r [rd = r1 & n] *)
+  | Oorl                                                (**r [rd = r1 | r2] *)
+  | Oorlshift (s: shift) (a: amount64)                  (**r [rd = r1 | shifted r2] *)
+  | Oorlimm (n: int64)                                  (**r [rd = r1 | n] *)
+  | Oxorl                                               (**r [rd = r1 ^ r2] *)
+  | Oxorlshift (s: shift) (a: amount64)                 (**r [rd = r1 ^ shifted r2] *)
+  | Oxorlimm (n: int64)                                 (**r [rd = r1 ^ n] *)
+  | Onotl                                               (**r [rd = ~r1] *)
+  | Onotlshift (s: shift) (a: amount64)                 (**r [rd = ~ shifted r1] *)
+  | Obicl                                               (**r [rd = r1 & ~r2] *)
+  | Obiclshift (s: shift) (a: amount64)                 (**r [rd = r1 ^ ~ shifted r2] *)
+  | Oornl                                               (**r [rd = r1 | ~r2] *)
+  | Oornlshift (s: shift) (a: amount64)                 (**r [rd = r1 | ~ shifted r2] *)
+  | Oeqvl                                               (**r [rd = r1 ^ ~r2] *)
+  | Oeqvlshift (s: shift) (a: amount64)                 (**r [rd = r1 | ~ shifted r2] *)
+  | Oshll                                               (**r [rd = r1 << r2] *)
+  | Oshrl                                               (**r [rd = r1 >> r2] (signed) *)
+  | Oshrlu                                              (**r [rd = r1 >> r2] (unsigned) *)
+  | Oshrlximm (n: int)                                  (**r [rd = r1 / 2^n] (signed) *)
+  | Ozextl (s: Z)                                       (**r [rd = zero_ext(r1,s)] *)
+  | Osextl (s: Z)                                       (**r [rd = sign_ext(r1,s)] *)
+  | Oshllzext (s: Z) (a: amount64)                      (**r [rd = zero_ext(r1,s) << a] *)
+  | Oshllsext (s: Z) (a: amount64)                      (**r [rd = sign_ext(r1,s) << a] *)
+  | Ozextshrl (a: amount64) (s: Z)                      (**r [rd = zero_ext(r1 >> a, s)] *)
+  | Osextshrl (a: amount64) (s: Z)                      (**r [rd = sign_ext(r1 >> a, s)] *)
+(** 64-bit floating-point arithmetic *)
+  | Onegf                                               (**r [rd = - r1] *)
+  | Oabsf                                               (**r [rd = abs(r1)] *)
+  | Oaddf                                               (**r [rd = r1 + r2] *)
+  | Osubf                                               (**r [rd = r1 - r2] *)
+  | Omulf                                               (**r [rd = r1 * r2] *)
+  | Odivf                                               (**r [rd = r1 / r2] *)
+(** 32-bit floating-point arithmetic *)
+  | Onegfs                                              (**r [rd = - r1] *)
+  | Oabsfs                                              (**r [rd = abs(r1)] *)
+  | Oaddfs                                              (**r [rd = r1 + r2] *)
+  | Osubfs                                              (**r [rd = r1 - r2] *)
+  | Omulfs                                              (**r [rd = r1 * r2] *)
+  | Odivfs                                              (**r [rd = r1 / r2] *)
+  | Osingleoffloat                                      (**r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle                                      (**r [rd] is [r1] extended to double-precision float *)
+(** Conversions between int and float *)
+  | Ointoffloat                                         (**r [rd = signed_int_of_float64(r1)] *)
+  | Ointuoffloat                                        (**r [rd = unsigned_int_of_float64(r1)] *)
+  | Ofloatofint                                         (**r [rd = float64_of_signed_int(r1)] *)
+  | Ofloatofintu                                        (**r [rd = float64_of_unsigned_int(r1)] *)
+  | Ointofsingle                                        (**r [rd = signed_int_of_float32(r1)] *)
+  | Ointuofsingle                                       (**r [rd = unsigned_int_of_float32(r1)] *)
+  | Osingleofint                                        (**r [rd = float32_of_signed_int(r1)] *)
+  | Osingleofintu                                       (**r [rd = float32_of_unsigned_int(r1)] *)
+  | Olongoffloat                                        (**r [rd = signed_long_of_float64(r1)] *)
+  | Olonguoffloat                                       (**r [rd = unsigned_long_of_float64(r1)] *)
+  | Ofloatoflong                                        (**r [rd = float64_of_signed_long(r1)] *)
+  | Ofloatoflongu                                       (**r [rd = float64_of_unsigned_long(r1)] *)
+  | Olongofsingle                                       (**r [rd = signed_long_of_float32(r1)] *)
+  | Olonguofsingle                                      (**r [rd = unsigned_long_of_float32(r1)] *)
+  | Osingleoflong                                       (**r [rd = float32_of_signed_long(r1)] *)
+  | Osingleoflongu                                      (**r [rd = float32_of_unsigned_int(r1)] *)
+(** Boolean tests *)
+  | Ocmp (cond: condition)                              (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
+  | Osel (cond: condition) (ty: typ).                   (**r [rd = rs1] if condition holds, [rd = rs2] otherwise. *)
+
+(** Addressing modes.  [r1], [r2], etc, are the arguments to the addressing. *)
+
+Inductive addressing: Type :=
+  | Aindexed (ofs: int64)                               (**r Address is [r1 + offset] *)
+  | Aindexed2                                           (**r Address is [r1 + r2] *)
+  | Aindexed2shift (a: amount64)                        (**r Address is [r1 + r2 << a] *)
+  | Aindexed2ext (x: extension) (a: amount64)           (**r Address is [r1 + sign-or-zero-ext(r2) << a] *)
+  | Aglobal (id: ident) (ofs: ptrofs)                   (**r Address is [global + offset] *)
+  | Ainstack (ofs: ptrofs).                             (**r Address is [stack_pointer + offset] *)
+
+(** Comparison functions (used in modules [CSE] and [Allocation]). *)
+
+Definition eq_amount32 (x y: amount32): {x=y} + {x<>y}.
+Proof.
+  destruct x as [x Px], y as [y Py].
+  destruct (Int.eq_dec x y).
+- subst y. assert (Px = Py) by (apply proof_irr). subst Py. left; auto.
+- right; congruence.
+Defined.
+
+Definition eq_amount64 (x y: amount64): {x=y} + {x<>y}.
+Proof.
+  destruct x as [x Px], y as [y Py].
+  destruct (Int.eq_dec x y).
+- subst y. assert (Px = Py) by (apply proof_irr). subst Py. left; auto.
+- right; congruence.
+Defined.
+
+Definition eq_shift (x y: shift): {x=y} + {x<>y}.
+Proof.
+  decide equality.
+Defined.
+
+Definition eq_extension (x y: extension): {x=y} + {x<>y}.
+Proof.
+  decide equality.
+Defined.
+
+Definition eq_condition (x y: condition) : {x=y} + {x<>y}.
+Proof.
+  assert (forall (x y: comparison), {x=y}+{x<>y}) by decide equality.
+  generalize Int.eq_dec Int64.eq_dec eq_shift eq_amount32 eq_amount64; intro.
+  decide equality.
+Defined.
+
+Definition eq_addressing (x y: addressing) : {x=y} + {x<>y}.
+Proof.
+  generalize ident_eq Int64.eq_dec Ptrofs.eq_dec eq_extension eq_amount64; intros.
+  decide equality.
+Defined.
+
+Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
+Proof.
+  intros.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec
+             zeq ident_eq eq_shift eq_extension eq_amount32 eq_amount64
+             typ_eq eq_condition; 
+  decide equality.
+Defined.
+
+(** Alternative:
+
+Definition beq_operation: forall (x y: operation), bool.
+Proof.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec
+             zeq ident_eq eq_shift eq_extension eq_amount32 eq_amount64
+             eq_condition typ_eq; boolean_equality.
+Defined.
+
+Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
+Proof.
+  decidable_equality_from beq_operation.
+Defined.
+*)
+
+(** * Evaluation functions *)
+
+(** Evaluation of conditions, operators and addressing modes applied
+  to lists of values.  Return [None] when the computation can trigger an
+  error, e.g. integer division by zero.  [eval_condition] returns a boolean,
+  [eval_operation] and [eval_addressing] return a value. *)
+
+Definition eval_shift (s: shift) (v: val) (n: amount32) : val :=
+  match s with
+  | Slsl => Val.shl v (Vint n)
+  | Slsr => Val.shru v (Vint n)
+  | Sasr => Val.shr v (Vint n)
+  | Sror => Val.ror v (Vint n)
+  end.
+
+Definition eval_shiftl (s: shift) (v: val) (n: amount64) : val :=
+  match s with
+  | Slsl => Val.shll v (Vint n)
+  | Slsr => Val.shrlu v (Vint n)
+  | Sasr => Val.shrl v (Vint n)
+  | Sror => Val.rorl v (Vint n)
+  end.
+
+Definition eval_extend (x: extension) (v: val) (n: amount64) : val :=
+  Val.shll
+    (match x with
+      | Xsgn32 => Val.longofint v
+      | Xuns32 => Val.longofintu v
+     end)
+    (Vint n).
+
+Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool :=
+  match cond, vl with
+  | Ccomp c, v1 :: v2 :: nil => Val.cmp_bool c v1 v2
+  | Ccompu c, v1 :: v2 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 v2
+  | Ccompimm c n, v1 :: nil => Val.cmp_bool c v1 (Vint n)
+  | Ccompuimm c n, v1 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint n)
+  | Ccompshift c s a, v1 :: v2 :: nil => Val.cmp_bool c v1 (eval_shift s v2 a)
+  | Ccompushift c s a, v1 :: v2 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 (eval_shift s v2 a)
+  | Cmaskzero n, v1 :: nil => Val.cmp_bool Ceq (Val.and v1 (Vint n)) (Vint Int.zero)
+  | Cmasknotzero n, v1 :: nil => Val.cmp_bool Cne (Val.and v1 (Vint n)) (Vint Int.zero)
+
+  | Ccompl c, v1 :: v2 :: nil => Val.cmpl_bool c v1 v2
+  | Ccomplu c, v1 :: v2 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 v2
+  | Ccomplimm c n, v1 :: nil => Val.cmpl_bool c v1 (Vlong n)
+  | Ccompluimm c n, v1 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 (Vlong n)
+  | Ccomplshift c s a, v1 :: v2 :: nil => Val.cmpl_bool c v1 (eval_shiftl s v2 a)
+  | Ccomplushift c s a, v1 :: v2 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 (eval_shiftl s v2 a)
+  | Cmasklzero n, v1 :: nil => Val.cmpl_bool Ceq (Val.andl v1 (Vlong n)) (Vlong Int64.zero)
+  | Cmasklnotzero n, v1 :: nil => Val.cmpl_bool Cne (Val.andl v1 (Vlong n)) (Vlong Int64.zero)
+
+  | Ccompf c, v1 :: v2 :: nil => Val.cmpf_bool c v1 v2
+  | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
+  | Ccompfzero c, v1 :: nil => Val.cmpf_bool c v1 (Vfloat Float.zero)
+  | Cnotcompfzero c, v1 :: nil => option_map negb (Val.cmpf_bool c v1 (Vfloat Float.zero))
+
+  | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
+  | Ccompfszero c, v1 :: nil => Val.cmpfs_bool c v1 (Vsingle Float32.zero)
+  | Cnotcompfszero c, v1 :: nil => option_map negb (Val.cmpfs_bool c v1 (Vsingle Float32.zero))
+
+  | _, _ => None
+  end.
+
+Definition eval_operation
+    (F V: Type) (genv: Genv.t F V) (sp: val)
+    (op: operation) (vl: list val) (m: mem): option val :=
+  match op, vl with
+  | Omove, v1::nil => Some v1
+  | Ointconst n, nil => Some (Vint n)
+  | Olongconst n, nil => Some (Vlong n)
+  | Ofloatconst n, nil => Some (Vfloat n)
+  | Osingleconst n, nil => Some (Vsingle n)
+  | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs)
+  | Oaddrstack ofs, nil => Some (Val.offset_ptr sp ofs)
+
+  | Oshift s a, v1 :: nil => Some (eval_shift s v1 a)
+  | Oadd, v1 :: v2 :: nil => Some (Val.add v1 v2)
+  | Oaddshift s a, v1 :: v2 :: nil => Some (Val.add v1 (eval_shift s v2 a))
+  | Oaddimm n, v1 :: nil => Some (Val.add v1 (Vint n))
+  | Oneg, v1 :: nil => Some (Val.neg v1)
+  | Onegshift s a, v1 :: nil => Some (Val.neg (eval_shift s v1 a))
+  | Osub, v1 :: v2 :: nil => Some (Val.sub v1 v2)
+  | Osubshift s a, v1 :: v2 :: nil => Some (Val.sub v1 (eval_shift s v2 a))
+  | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
+  | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
+  | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
+  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
+  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+  | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
+  | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
+  | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
+  | Oor, v1 :: v2 :: nil => Some (Val.or v1 v2)
+  | Oorshift s a, v1 :: v2 :: nil => Some (Val.or v1 (eval_shift s v2 a))
+  | Oorimm n, v1 :: nil => Some (Val.or v1 (Vint n))
+  | Oxor, v1 :: v2 :: nil => Some (Val.xor v1 v2)
+  | Oxorshift s a, v1 :: v2 :: nil => Some (Val.xor v1 (eval_shift s v2 a))
+  | Oxorimm n, v1 :: nil => Some (Val.xor v1 (Vint n))
+  | Onot, v1 :: nil => Some (Val.notint v1)
+  | Onotshift s a, v1 :: nil => Some (Val.notint (eval_shift s v1 a))
+  | Obic, v1 :: v2 :: nil => Some (Val.and v1 (Val.notint v2))
+  | Obicshift s a, v1 :: v2 :: nil => Some (Val.and v1 (Val.notint (eval_shift s v2 a)))
+  | Oorn, v1 :: v2 :: nil => Some (Val.or v1 (Val.notint v2))
+  | Oornshift s a, v1 :: v2 :: nil => Some (Val.or v1 (Val.notint (eval_shift s v2 a)))
+  | Oeqv, v1 :: v2 :: nil => Some (Val.xor v1 (Val.notint v2))
+  | Oeqvshift s a, v1 :: v2 :: nil => Some (Val.xor v1 (Val.notint (eval_shift s v2 a)))
+  | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
+  | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
+  | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
+  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
+  | Osext s, v1 :: nil => Some (Val.sign_ext s v1)
+  | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
+  | Oshlsext s a, v1 :: nil => Some (Val.shl (Val.sign_ext s v1) (Vint a))
+  | Ozextshr a s, v1 :: nil => Some (Val.zero_ext s (Val.shru v1 (Vint a)))
+  | Osextshr a s, v1 :: nil => Some (Val.sign_ext s (Val.shr v1 (Vint a)))
+
+  | Oshiftl s a, v1 :: nil => Some (eval_shiftl s v1 a)
+  | Oextend x a, v1 :: nil => Some (eval_extend x v1 a)
+  | Omakelong, v1::v2::nil => Some (Val.longofwords v1 v2)
+  | Olowlong, v1::nil => Some (Val.loword v1)
+  | Ohighlong, v1::nil => Some (Val.hiword v1)
+  | Oaddl, v1 :: v2 :: nil => Some (Val.addl v1 v2)
+  | Oaddlshift s a, v1 :: v2 :: nil => Some (Val.addl v1 (eval_shiftl s v2 a))
+  | Oaddlext x a, v1 :: v2 :: nil => Some (Val.addl v1 (eval_extend x v2 a))
+  | Oaddlimm n, v1 :: nil => Some (Val.addl v1 (Vlong n))
+  | Onegl, v1 :: nil => Some (Val.negl v1)
+  | Oneglshift s a, v1 :: nil => Some (Val.negl (eval_shiftl s v1 a))
+  | Osubl, v1 :: v2 :: nil => Some (Val.subl v1 v2)
+  | Osublshift s a, v1 :: v2 :: nil => Some (Val.subl v1 (eval_shiftl s v2 a))
+  | Osublext x a, v1 :: v2 :: nil => Some (Val.subl v1 (eval_extend x v2 a))
+  | Omull, v1 :: v2 :: nil => Some (Val.mull v1 v2)
+  | Omulladd, v1 :: v2 :: v3 :: nil => Some (Val.addl v1 (Val.mull v2 v3))
+  | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
+  | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
+  | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
+  | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
+  | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+  | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
+  | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
+  | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
+  | Oorl, v1 :: v2 :: nil => Some (Val.orl v1 v2)
+  | Oorlshift s a, v1 :: v2 :: nil => Some (Val.orl v1 (eval_shiftl s v2 a))
+  | Oorlimm n, v1 :: nil => Some (Val.orl v1 (Vlong n))
+  | Oxorl, v1 :: v2 :: nil => Some (Val.xorl v1 v2)
+  | Oxorlshift s a, v1 :: v2 :: nil => Some (Val.xorl v1 (eval_shiftl s v2 a))
+  | Oxorlimm n, v1 :: nil => Some (Val.xorl v1 (Vlong n))
+  | Onotl, v1 :: nil => Some (Val.notl v1)
+  | Onotlshift s a, v1 :: nil => Some (Val.notl (eval_shiftl s v1 a))
+  | Obicl, v1 :: v2 :: nil => Some (Val.andl v1 (Val.notl v2))
+  | Obiclshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (Val.notl (eval_shiftl s v2 a)))
+  | Oornl, v1 :: v2 :: nil => Some (Val.orl v1 (Val.notl v2))
+  | Oornlshift s a, v1 :: v2 :: nil => Some (Val.orl v1 (Val.notl (eval_shiftl s v2 a)))
+  | Oeqvl, v1 :: v2 :: nil => Some (Val.xorl v1 (Val.notl v2))
+  | Oeqvlshift s a, v1 :: v2 :: nil => Some (Val.xorl v1 (Val.notl (eval_shiftl s v2 a)))
+  | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
+  | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
+  | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
+  | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
+  | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
+  | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
+  | Oshllsext s a, v1 :: nil => Some (Val.shll (Val.sign_ext_l s v1) (Vint a))
+  | Ozextshrl a s, v1 :: nil => Some (Val.zero_ext_l s (Val.shrlu v1 (Vint a)))
+  | Osextshrl a s, v1 :: nil => Some (Val.sign_ext_l s (Val.shrl v1 (Vint a)))
+
+  | Onegf, v1::nil => Some (Val.negf v1)
+  | Oabsf, v1::nil => Some (Val.absf v1)
+  | Oaddf, v1::v2::nil => Some (Val.addf v1 v2)
+  | Osubf, v1::v2::nil => Some (Val.subf v1 v2)
+  | Omulf, v1::v2::nil => Some (Val.mulf v1 v2)
+  | Odivf, v1::v2::nil => Some (Val.divf v1 v2)
+
+  | Onegfs, v1::nil => Some (Val.negfs v1)
+  | Oabsfs, v1::nil => Some (Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some (Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some (Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some (Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some (Val.divfs v1 v2)
+
+  | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
+  | Ointoffloat, v1::nil => Val.intoffloat v1
+  | Ointuoffloat, v1::nil => Val.intuoffloat v1
+  | Ofloatofint, v1::nil => Val.floatofint v1
+  | Ofloatofintu, v1::nil => Val.floatofintu v1
+  | Ointofsingle, v1::nil => Val.intofsingle v1
+  | Ointuofsingle, v1::nil => Val.intuofsingle v1
+  | Osingleofint, v1::nil => Val.singleofint v1
+  | Osingleofintu, v1::nil => Val.singleofintu v1
+  | Olongoffloat, v1::nil => Val.longoffloat v1
+  | Olonguoffloat, v1::nil => Val.longuoffloat v1
+  | Ofloatoflong, v1::nil => Val.floatoflong v1
+  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
+  | Olongofsingle, v1::nil => Val.longofsingle v1
+  | Olonguofsingle, v1::nil => Val.longuofsingle v1
+  | Osingleoflong, v1::nil => Val.singleoflong v1
+  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+
+  | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
+  | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
+  | _, _ => None
+  end.
+
+Definition eval_addressing
+    (F V: Type) (genv: Genv.t F V) (sp: val)
+    (addr: addressing) (vl: list val) : option val :=
+  match addr, vl with
+  | Aindexed n, v1 :: nil => Some (Val.addl v1 (Vlong n))
+  | Aindexed2, v1 :: v2 :: nil => Some (Val.addl v1 v2)
+  | Aindexed2shift a, v1 :: v2 :: nil => Some (Val.addl v1 (Val.shll v2 (Vint a)))
+  | Aindexed2ext x a, v1 :: v2 :: nil => Some (Val.addl v1 (eval_extend x v2 a))
+  | Aglobal s ofs, nil => Some (Genv.symbol_address genv s ofs)
+  | Ainstack n, nil => Some (Val.offset_ptr sp n)
+  | _, _ => None
+  end.
+
+Remark eval_addressing_Ainstack:
+  forall (F V: Type) (genv: Genv.t F V) sp ofs,
+  eval_addressing genv sp (Ainstack ofs) nil = Some (Val.offset_ptr sp ofs).
+Proof.
+  intros. reflexivity.
+Qed.
+
+Remark eval_addressing_Ainstack_inv:
+  forall (F V: Type) (genv: Genv.t F V) sp ofs vl v,
+  eval_addressing genv sp (Ainstack ofs) vl = Some v -> vl = nil /\ v = Val.offset_ptr sp ofs.
+Proof.
+  unfold eval_addressing; intros; destruct vl; inv H; auto.
+Qed.
+
+Ltac FuncInv :=
+  match goal with
+  | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
+      destruct x; simpl in H; FuncInv
+  | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
+      destruct v; simpl in H; FuncInv
+  | H: (if Archi.ptr64 then _ else _) = Some _ |- _ =>
+      change Archi.ptr64 with true in H; simpl in H; FuncInv
+  | H: (Some _ = Some _) |- _ =>
+      injection H; intros; clear H; FuncInv
+  | H: (None = Some _) |- _ =>
+      discriminate H
+  | _ =>
+      idtac
+  end.
+
+(** * Static typing of conditions, operators and addressing modes. *)
+
+Definition type_of_condition (c: condition) : list typ :=
+  match c with
+  | Ccomp _ => Tint :: Tint :: nil
+  | Ccompu _ => Tint :: Tint :: nil
+  | Ccompimm _ _ => Tint :: nil
+  | Ccompuimm _ _ => Tint :: nil
+  | Ccompshift _ _ _ => Tint :: Tint :: nil
+  | Ccompushift _ _ _ => Tint :: Tint :: nil
+  | Cmaskzero _ => Tint :: nil
+  | Cmasknotzero _ => Tint :: nil
+  | Ccompl _ => Tlong :: Tlong :: nil
+  | Ccomplu _ => Tlong :: Tlong :: nil
+  | Ccomplimm _ _ => Tlong :: nil
+  | Ccompluimm _ _ => Tlong :: nil
+  | Ccomplshift _ _ _ => Tlong :: Tlong :: nil
+  | Ccomplushift _ _ _ => Tlong :: Tlong :: nil
+  | Cmasklzero _ => Tlong :: nil
+  | Cmasklnotzero _ => Tlong :: nil
+  | Ccompf _ => Tfloat :: Tfloat :: nil
+  | Cnotcompf _ => Tfloat :: Tfloat :: nil
+  | Ccompfzero _ => Tfloat :: nil
+  | Cnotcompfzero _ => Tfloat :: nil
+  | Ccompfs _ => Tsingle :: Tsingle :: nil
+  | Cnotcompfs _ => Tsingle :: Tsingle :: nil
+  | Ccompfszero _ => Tsingle :: nil
+  | Cnotcompfszero _ => Tsingle :: nil
+  end.
+
+Definition type_of_operation (op: operation) : list typ * typ :=
+  match op with
+  | Omove => (nil, Tint)   (* treated specially *)
+  | Ointconst _ => (nil, Tint)
+  | Olongconst _ => (nil, Tlong)
+  | Ofloatconst f => (nil, Tfloat)
+  | Osingleconst f => (nil, Tsingle)
+  | Oaddrsymbol _ _ => (nil, Tptr)
+  | Oaddrstack _ => (nil, Tptr)
+
+  | Oshift _ _ => (Tint :: nil, Tint)
+  | Oadd => (Tint :: Tint :: nil, Tint)
+  | Oaddshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oaddimm _ => (Tint :: nil, Tint)
+  | Oneg => (Tint :: nil, Tint)
+  | Onegshift _ _ => (Tint :: nil, Tint)
+  | Osub => (Tint :: Tint :: nil, Tint)
+  | Osubshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Omul => (Tint :: Tint :: nil, Tint)
+  | Omuladd => (Tint :: Tint :: Tint :: nil, Tint)
+  | Omulsub => (Tint :: Tint :: Tint :: nil, Tint)
+  | Odiv => (Tint :: Tint :: nil, Tint)
+  | Odivu => (Tint :: Tint :: nil, Tint)
+  | Oand => (Tint :: Tint :: nil, Tint)
+  | Oandshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oandimm _ => (Tint :: nil, Tint)
+  | Oor => (Tint :: Tint :: nil, Tint)
+  | Oorshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oorimm _ => (Tint :: nil, Tint)
+  | Oxor => (Tint :: Tint :: nil, Tint)
+  | Oxorshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oxorimm _ => (Tint :: nil, Tint)
+  | Onot => (Tint :: nil, Tint)
+  | Onotshift _ _ => (Tint :: nil, Tint)
+  | Obic => (Tint :: Tint :: nil, Tint)
+  | Obicshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oorn => (Tint :: Tint :: nil, Tint)
+  | Oornshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oeqv => (Tint :: Tint :: nil, Tint)
+  | Oeqvshift _ _ => (Tint :: Tint :: nil, Tint)
+  | Oshl => (Tint :: Tint :: nil, Tint)
+  | Oshr => (Tint :: Tint :: nil, Tint)
+  | Oshru => (Tint :: Tint :: nil, Tint)
+  | Oshrximm _ => (Tint :: nil, Tint)
+  | Ozext _ => (Tint :: nil, Tint)
+  | Osext _ => (Tint :: nil, Tint)
+  | Oshlzext _ _ => (Tint :: nil, Tint)
+  | Oshlsext _ _ => (Tint :: nil, Tint)
+  | Ozextshr _ _ => (Tint :: nil, Tint)
+  | Osextshr _ _ => (Tint :: nil, Tint)
+
+  | Oshiftl _ _ => (Tlong :: nil, Tlong)
+  | Oextend _ _ => (Tint :: nil, Tlong)
+  | Omakelong => (Tint :: Tint :: nil, Tlong)
+  | Olowlong => (Tlong :: nil, Tint)
+  | Ohighlong => (Tlong :: nil, Tint)
+  | Oaddl => (Tlong :: Tlong :: nil, Tlong)
+  | Oaddlshift _ _ => (Tlong :: Tlong :: nil, Tlong) 
+  | Oaddlext _ _ => (Tlong :: Tint :: nil, Tlong)
+  | Oaddlimm _ => (Tlong :: nil, Tlong)
+  | Onegl => (Tlong :: nil, Tlong)
+  | Oneglshift _ _ => (Tlong :: nil, Tlong)
+  | Osubl => (Tlong :: Tlong :: nil, Tlong)
+  | Osublshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Osublext _ _ => (Tlong :: Tint :: nil, Tlong)
+  | Omull => (Tlong :: Tlong :: nil, Tlong)
+  | Omulladd => (Tlong :: Tlong :: Tlong :: nil, Tlong)
+  | Omullsub => (Tlong :: Tlong :: Tlong :: nil, Tlong)
+  | Omullhs => (Tlong :: Tlong :: nil, Tlong)
+  | Omullhu => (Tlong :: Tlong :: nil, Tlong)
+  | Odivl => (Tlong :: Tlong :: nil, Tlong)
+  | Odivlu => (Tlong :: Tlong :: nil, Tlong)
+  | Oandl => (Tlong :: Tlong :: nil, Tlong)
+  | Oandlshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Oandlimm _ => (Tlong :: nil, Tlong)
+  | Oorl => (Tlong :: Tlong :: nil, Tlong)
+  | Oorlshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Oorlimm _ => (Tlong :: nil, Tlong)
+  | Oxorl => (Tlong :: Tlong :: nil, Tlong)
+  | Oxorlshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Oxorlimm _ => (Tlong :: nil, Tlong)
+  | Onotl => (Tlong :: nil, Tlong)
+  | Onotlshift _ _ => (Tlong :: nil, Tlong)
+  | Obicl => (Tlong :: Tlong :: nil, Tlong)
+  | Obiclshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Oornl => (Tlong :: Tlong :: nil, Tlong)
+  | Oornlshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Oeqvl => (Tlong :: Tlong :: nil, Tlong)
+  | Oeqvlshift _ _ => (Tlong :: Tlong :: nil, Tlong)
+  | Oshll => (Tlong :: Tint :: nil, Tlong)
+  | Oshrl => (Tlong :: Tint :: nil, Tlong)
+  | Oshrlu => (Tlong :: Tint :: nil, Tlong)
+  | Oshrlximm _ => (Tlong :: nil, Tlong)
+  | Ozextl _ => (Tlong :: nil, Tlong)
+  | Osextl _ => (Tlong :: nil, Tlong)
+  | Oshllzext _ _ => (Tlong :: nil, Tlong)
+  | Oshllsext _ _ => (Tlong :: nil, Tlong)
+  | Ozextshrl _ _ => (Tlong :: nil, Tlong)
+  | Osextshrl _ _ => (Tlong :: nil, Tlong)
+
+  | Onegf => (Tfloat :: nil, Tfloat)
+  | Oabsf => (Tfloat :: nil, Tfloat)
+  | Oaddf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
+
+  | Onegfs => (Tsingle :: nil, Tsingle)
+  | Oabsfs => (Tsingle :: nil, Tsingle)
+  | Oaddfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osubfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Omulfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Odivfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osingleoffloat => (Tfloat :: nil, Tsingle)
+  | Ofloatofsingle => (Tsingle :: nil, Tfloat)
+
+  | Ointoffloat => (Tfloat :: nil, Tint)
+  | Ointuoffloat => (Tfloat :: nil, Tint)
+  | Ofloatofint => (Tint :: nil, Tfloat)
+  | Ofloatofintu => (Tint :: nil, Tfloat)
+  | Ointofsingle => (Tsingle :: nil, Tint)
+  | Ointuofsingle => (Tsingle :: nil, Tint)
+  | Osingleofint => (Tint :: nil, Tsingle)
+  | Osingleofintu => (Tint :: nil, Tsingle)
+  | Olongoffloat => (Tfloat :: nil, Tlong)
+  | Olonguoffloat => (Tfloat :: nil, Tlong)
+  | Ofloatoflong => (Tlong :: nil, Tfloat)
+  | Ofloatoflongu => (Tlong :: nil, Tfloat)
+  | Olongofsingle => (Tsingle :: nil, Tlong)
+  | Olonguofsingle => (Tsingle :: nil, Tlong)
+  | Osingleoflong => (Tlong :: nil, Tsingle)
+  | Osingleoflongu => (Tlong :: nil, Tsingle)
+
+  | Ocmp c => (type_of_condition c, Tint)
+  | Osel c ty => (ty :: ty :: type_of_condition c, ty)
+  end.
+
+Definition type_of_addressing (addr: addressing) : list typ :=
+  match addr with
+  | Aindexed _ => Tptr :: nil
+  | Aindexed2 => Tptr :: Tlong :: nil
+  | Aindexed2shift _ => Tptr :: Tlong :: nil
+  | Aindexed2ext _ _ => Tptr :: Tint :: nil
+  | Aglobal _ _ => nil
+  | Ainstack _ => nil
+  end.
+
+(** Weak type soundness results for [eval_operation]:
+  the result values, when defined, are always of the type predicted
+  by [type_of_operation]. *)
+
+Section SOUNDNESS.
+
+Variable A V: Type.
+Variable genv: Genv.t A V.
+
+Remark type_add:
+  forall v1 v2, Val.has_type (Val.add v1 v2) Tint.
+Proof.
+  intros. unfold Val.has_type, Val.add. destruct v1, v2; simpl; auto.
+Qed.
+
+Remark type_sub:
+  forall v1 v2, Val.has_type (Val.sub v1 v2) Tint.
+Proof.
+  intros. unfold Val.has_type, Val.add. destruct v1, v2; simpl; auto.
+Qed.
+
+Remark type_addl:
+  forall v1 v2, Val.has_type (Val.addl v1 v2) Tlong.
+Proof.
+  intros. unfold Val.has_type, Val.addl. destruct v1, v2; simpl; auto.
+Qed.
+
+Remark type_subl:
+  forall v1 v2, Val.has_type (Val.subl v1 v2) Tlong.
+Proof.
+  intros. unfold Val.has_type, Val.addl. destruct v1, v2; simpl; auto.
+  destruct (eq_block b b0); auto.
+Qed.
+
+Lemma type_of_operation_sound:
+  forall op vl sp v m,
+  op <> Omove ->
+  eval_operation genv sp op vl m = Some v ->
+  Val.has_type v (snd (type_of_operation op)).
+Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
+  intros.
+  destruct op; simpl; simpl in H0; FuncInv; subst; simpl.
+  (* move *)
+  - congruence.
+  (* intconst, longconst, floatconst, singleconst *)
+  - exact I.
+  - exact I.
+  - exact I.
+  - exact I.
+  (* addrsymbol *)
+  - unfold Genv.symbol_address. destruct (Genv.find_symbol genv id)...
+  (* addrstack *)
+  - destruct sp...
+  (* 32-bit integer operations *)
+  - destruct s, v0; try exact I; simpl; rewrite a32_range...
+  - apply type_add.
+  - apply type_add.
+  - apply type_add.
+  - destruct v0...
+  - destruct (eval_shift s v0 a)...
+  - apply type_sub.
+  - apply type_sub.
+  - destruct v0... destruct v1...
+  - apply type_add.
+  - apply type_sub.
+  - destruct v0; destruct v1; simpl in *; inv H0.
+    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
+  - destruct v0; destruct v1; simpl in *; inv H0.
+    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shift s v1 a)...
+  - destruct v0...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shift s v1 a)...
+  - destruct v0...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shift s v1 a)...
+  - destruct v0...
+  - destruct v0...
+  - destruct (eval_shift s v0 a)...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shift s v1 a)...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shift s v1 a)...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shift s v1 a)...
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0...
+  - destruct v0...
+  - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... 
+  - destruct (Val.sign_ext s v0)... simpl; rewrite a32_range...
+  - destruct (Val.shru v0 (Vint a))... 
+  - destruct (Val.shr v0 (Vint a))...
+  (* 64-bit integer operations *)
+  - destruct s, v0; try exact I; simpl; rewrite a64_range...
+  - unfold eval_extend. destruct (match x with
+     | Xsgn32 => Val.longofint v0
+     | Xuns32 => Val.longofintu v0
+     end)...
+    simpl; rewrite a64_range...
+  - destruct v0... destruct v1...
+  - destruct v0...
+  - destruct v0...
+  - apply type_addl.
+  - apply type_addl.
+  - apply type_addl.
+  - apply type_addl.
+  - destruct v0...
+  - destruct (eval_shiftl s v0 a)...
+  - apply type_subl.
+  - apply type_subl.
+  - apply type_subl.
+  - destruct v0... destruct v1...
+  - apply type_addl.
+  - apply type_subl.
+  - destruct v0... destruct v1...
+  - destruct v0... destruct v1...
+  - destruct v0; destruct v1; simpl in *; inv H0.
+    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
+  - destruct v0; destruct v1; simpl in *; inv H0.
+    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shiftl s v1 a)...
+  - destruct v0...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shiftl s v1 a)...
+  - destruct v0...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shiftl s v1 a)...
+  - destruct v0...
+  - destruct v0...
+  - destruct (eval_shiftl s v0 a)...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shiftl s v1 a)...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shiftl s v1 a)...
+  - destruct v0... destruct v1...
+  - destruct v0... destruct (eval_shiftl s v1 a)...
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0...
+  - destruct v0...
+  - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... 
+  - destruct (Val.sign_ext_l s v0)... simpl; rewrite a64_range...
+  - destruct (Val.shrlu v0 (Vint a))... 
+  - destruct (Val.shrl v0 (Vint a))...
+
+  (* 64-bit FP *)
+  - destruct v0...
+  - destruct v0...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* 32-bit FP *)
+  - destruct v0...
+  - destruct v0...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* singleoffloat, floatofsingle *)
+  - destruct v0...
+  - destruct v0...
+  (* intoffloat, intuoffloat *)
+  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
+  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  (* floatofint, floatofintu *)
+  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; simpl in H0; inv H0...
+  (* intofsingle, intuofsingle *)
+  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
+  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  (* singleofint, singleofintu *)
+  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; simpl in H0; inv H0...
+  (* longoffloat, longuoffloat *)
+  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
+  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  (* floatoflong, floatoflongu *)
+  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; simpl in H0; inv H0...
+  (* longofsingle, longuofsingle *)
+  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
+  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  (* singleoflong, singleoflongu *)
+  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; simpl in H0; inv H0...
+  (* cmp *)
+  - destruct (eval_condition cond vl m) as [[]|]...
+  - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
+Qed.
+
+
+Definition is_trapping_op (op : operation) :=
+  match op with
+  | Odiv | Odivu | Odivl | Odivlu
+  | Oshrximm _  | Oshrlximm _
+  | Ointoffloat | Ointuoffloat
+  | Ointofsingle | Ointuofsingle
+  | Ofloatofint | Ofloatofintu
+  | Osingleofint | Osingleofintu
+  | Olongoffloat | Olonguoffloat
+  | Olongofsingle | Olonguofsingle
+  | Ofloatoflong | Ofloatoflongu
+  | Osingleoflong | Osingleoflongu => true
+  | _ => false
+  end.
+                
+
+Lemma is_trapping_op_sound:
+  forall op vl sp m,
+    op <> Omove ->
+    is_trapping_op op = false ->
+    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    eval_operation genv sp op vl m <> None.
+Proof.
+  destruct op; intros; simpl in *; try congruence.
+  all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+  all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+  all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+  all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
+End SOUNDNESS.
+
+(** * Manipulating and transforming operations *)
+
+(** Constructing shift amounts *)
+
+Section SHIFT_AMOUNT.
+
+Variable l: Z.
+Hypothesis l_range: 0 <= l < 32.
+Variable N: int.
+Hypothesis N_eq: Int.unsigned N = two_p l.
+
+Remark mk_amount_range:
+  forall n, Int.ltu (Int.zero_ext l n) N = true.
+Proof.
+  intros; unfold Int.ltu. apply zlt_true. rewrite N_eq. apply (Int.zero_ext_range l n). assumption.
+Qed.
+
+Remark mk_amount_eq:
+  forall n, Int.ltu n N = true -> Int.zero_ext l n = n.
+Proof.
+  intros.
+  transitivity (Int.repr (Int.unsigned (Int.zero_ext l n))).
+  symmetry; apply Int.repr_unsigned.
+  transitivity (Int.repr (Int.unsigned n)).
+  f_equal. rewrite Int.zero_ext_mod. apply Int.ltu_inv in H. rewrite N_eq in H. 
+  apply Z.mod_small. assumption. assumption.
+  apply Int.repr_unsigned.
+Qed.
+
+End SHIFT_AMOUNT.
+
+Program Definition mk_amount32 (n: int): amount32 :=
+  {| a32_amount := Int.zero_ext 5 n |}.
+Next Obligation.
+  apply mk_amount_range. omega. reflexivity.
+Qed.
+
+Lemma mk_amount32_eq: forall n,
+  Int.ltu n Int.iwordsize = true -> a32_amount (mk_amount32 n) = n.
+Proof.
+  intros. eapply mk_amount_eq; eauto. omega. reflexivity.
+Qed.
+
+Program Definition mk_amount64 (n: int): amount64 :=
+  {| a64_amount := Int.zero_ext 6 n |}.
+Next Obligation.
+  apply mk_amount_range. omega. reflexivity.
+Qed.
+
+Lemma mk_amount64_eq: forall n,
+  Int.ltu n Int64.iwordsize' = true -> a64_amount (mk_amount64 n) = n.
+Proof.
+  intros. eapply mk_amount_eq; eauto. omega. reflexivity.
+Qed.
+ 
+(** Recognition of move operations. *)
+
+Definition is_move_operation
+    (A: Type) (op: operation) (args: list A) : option A :=
+  match op, args with
+  | Omove, arg :: nil => Some arg
+  | _, _ => None
+  end.
+
+Lemma is_move_operation_correct:
+  forall (A: Type) (op: operation) (args: list A) (a: A),
+  is_move_operation op args = Some a ->
+  op = Omove /\ args = a :: nil.
+Proof.
+  intros until a. unfold is_move_operation; destruct op;
+  try (intros; discriminate).
+  destruct args. intros; discriminate.
+  destruct args. intros. intuition congruence.
+  intros; discriminate.
+Qed.
+
+(** [negate_condition cond] returns a condition that is logically
+  equivalent to the negation of [cond]. *)
+
+Definition negate_condition (cond: condition): condition :=
+  match cond with
+  | Ccomp c => Ccomp (negate_comparison c)
+  | Ccompu c => Ccompu (negate_comparison c)
+  | Ccompimm c n => Ccompimm (negate_comparison c) n
+  | Ccompuimm c n => Ccompuimm (negate_comparison c) n
+  | Ccompshift c s a => Ccompshift (negate_comparison c) s a
+  | Ccompushift c s a => Ccompushift (negate_comparison c) s a
+  | Cmaskzero n => Cmasknotzero n
+  | Cmasknotzero n => Cmaskzero n
+  | Ccompl c => Ccompl (negate_comparison c)
+  | Ccomplu c => Ccomplu (negate_comparison c)
+  | Ccomplimm c n => Ccomplimm (negate_comparison c) n
+  | Ccompluimm c n => Ccompluimm (negate_comparison c) n
+  | Ccomplshift c s a => Ccomplshift (negate_comparison c) s a
+  | Ccomplushift c s a => Ccomplushift (negate_comparison c) s a
+  | Cmasklzero n => Cmasklnotzero n
+  | Cmasklnotzero n => Cmasklzero n
+  | Ccompf c => Cnotcompf c
+  | Cnotcompf c => Ccompf c
+  | Ccompfzero c => Cnotcompfzero c
+  | Cnotcompfzero c => Ccompfzero c
+  | Ccompfs c => Cnotcompfs c
+  | Cnotcompfs c => Ccompfs c
+  | Ccompfszero c => Cnotcompfszero c
+  | Cnotcompfszero c => Ccompfszero c
+  end.
+
+Lemma eval_negate_condition:
+  forall cond vl m,
+  eval_condition (negate_condition cond) vl m = option_map negb (eval_condition cond vl m).
+Proof.
+  intros. destruct cond; simpl.
+  repeat (destruct vl; auto). apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto). apply (Val.negate_cmp_bool Ceq).
+  repeat (destruct vl; auto). apply (Val.negate_cmp_bool Cne).
+  repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto). apply (Val.negate_cmpl_bool Ceq).
+  repeat (destruct vl; auto). apply (Val.negate_cmpl_bool Cne).
+  repeat (destruct vl; auto).
+  repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto.
+  repeat (destruct vl; auto).
+  repeat (destruct vl; auto). destruct (Val.cmpf_bool c v (Vfloat Float.zero)) as [[]|]; auto.
+  repeat (destruct vl; auto).
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto.
+  repeat (destruct vl; auto).
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v (Vsingle Float32.zero)) as [[]|]; auto.
+Qed.
+
+(** Shifting stack-relative references.  This is used in [Stacking]. *)
+
+Definition shift_stack_addressing (delta: Z) (addr: addressing) :=
+  match addr with
+  | Ainstack ofs => Ainstack (Ptrofs.add ofs (Ptrofs.repr delta))
+  | _ => addr
+  end.
+
+Definition shift_stack_operation (delta: Z) (op: operation) :=
+  match op with
+  | Oaddrstack ofs => Oaddrstack (Ptrofs.add ofs (Ptrofs.repr delta))
+  | _ => op
+  end.
+
+Lemma type_shift_stack_addressing:
+  forall delta addr, type_of_addressing (shift_stack_addressing delta addr) = type_of_addressing addr.
+Proof.
+  intros. destruct addr; auto.
+Qed.
+
+Lemma type_shift_stack_operation:
+  forall delta op, type_of_operation (shift_stack_operation delta op) = type_of_operation op.
+Proof.
+  intros. destruct op; auto.
+Qed.
+
+Lemma eval_shift_stack_addressing:
+  forall F V (ge: Genv.t F V) sp addr vl delta,
+  eval_addressing ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
+  eval_addressing ge (Vptr sp (Ptrofs.repr delta)) addr vl.
+Proof.
+  intros. destruct addr; simpl; auto. destruct vl; auto.
+  rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
+Qed.
+
+Lemma eval_shift_stack_operation:
+  forall F V (ge: Genv.t F V) sp op vl m delta,
+  eval_operation ge (Vptr sp Ptrofs.zero) (shift_stack_operation delta op) vl m =
+  eval_operation ge (Vptr sp (Ptrofs.repr delta)) op vl m.
+Proof.
+  intros. destruct op; simpl; auto. destruct vl; auto.
+  rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
+Qed.
+
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+  it designates the pointer [delta] bytes past the pointer designated
+  by [addr].  May be undefined, in which case [None] is returned. *)
+
+Definition offset_addressing (addr: addressing) (delta: Z) : option addressing :=
+  match addr with
+  | Aindexed n => Some(Aindexed (Int64.add n (Int64.repr delta)))
+  | Aindexed2 => None
+  | Aindexed2shift _ => None
+  | Aindexed2ext _ _ => None
+  | Aglobal id n => Some(Aglobal id (Ptrofs.add n (Ptrofs.repr delta)))
+  | Ainstack n => Some(Ainstack (Ptrofs.add n (Ptrofs.repr delta)))
+  end.
+
+Lemma eval_offset_addressing:
+  forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+  offset_addressing addr delta = Some addr' ->
+  eval_addressing ge sp addr args = Some v ->
+  Archi.ptr64 = false ->
+  eval_addressing ge sp addr' args = Some(Val.add v (Vint (Int.repr delta))).
+Proof.
+  intros. discriminate. 
+Qed.
+
+(** Operations that are so cheap to recompute that CSE should not factor them out. *)
+
+Definition is_trivial_op (op: operation) : bool :=
+  match op with
+  | Omove => true
+  | Ointconst n => Int.eq (Int.sign_ext 16 n) n
+  | Olongconst n => Int64.eq (Int64.sign_ext 16 n) n
+  | Oaddrstack _ => true
+  | _ => false
+  end.
+
+(** Operations that depend on the memory state. *)
+
+Definition cond_depends_on_memory (c: condition) : bool :=
+  match c with
+  | Ccomplu _ | Ccompluimm _ _ | Ccomplushift _ _ _ => true
+  | _ => false
+  end.
+
+Lemma cond_depends_on_memory_correct:
+  forall c args m1 m2,
+  cond_depends_on_memory c = false ->
+  eval_condition c args m1 = eval_condition c args m2.
+Proof.
+  intros; destruct c; simpl; discriminate || reflexivity.
+Qed.
+
+Definition op_depends_on_memory (op: operation) : bool :=
+  match op with
+  | Ocmp c => cond_depends_on_memory c
+  | Osel c yu => cond_depends_on_memory c
+  | _ => false
+  end.
+
+Lemma op_depends_on_memory_correct:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  op_depends_on_memory op = false ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros. destruct op; auto.
+  simpl. rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
+  simpl. destruct args; auto. destruct args; auto.
+  rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
+Qed.
+
+(** Global variables mentioned in an operation or addressing mode *)
+
+Definition globals_addressing (addr: addressing) : list ident :=
+  match addr with
+  | Aglobal s ofs => s :: nil
+  | _ => nil
+  end.
+
+Definition globals_operation (op: operation) : list ident :=
+  match op with
+  | Oaddrsymbol s ofs => s :: nil
+  | _ => nil
+  end.
+
+(** * Invariance and compatibility properties. *)
+
+(** [eval_operation] and [eval_addressing] depend on a global environment
+  for resolving references to global symbols.  We show that they give
+  the same results if a global environment is replaced by another that
+  assigns the same addresses to the same symbols. *)
+
+Section GENV_TRANSF.
+
+Variable F1 F2 V1 V2: Type.
+Variable ge1: Genv.t F1 V1.
+Variable ge2: Genv.t F2 V2.
+Hypothesis agree_on_symbols:
+  forall (s: ident), Genv.find_symbol ge2 s = Genv.find_symbol ge1 s.
+
+Lemma eval_addressing_preserved:
+  forall sp addr vl,
+  eval_addressing ge2 sp addr vl = eval_addressing ge1 sp addr vl.
+Proof.
+  intros.
+  unfold eval_addressing; destruct addr; auto. destruct vl; auto. 
+  unfold Genv.symbol_address. rewrite agree_on_symbols; auto.
+Qed.
+
+Lemma eval_operation_preserved:
+  forall sp op vl m,
+  eval_operation ge2 sp op vl m = eval_operation ge1 sp op vl m.
+Proof.
+  intros.
+  unfold eval_operation; destruct op; auto. destruct vl; auto.
+  unfold Genv.symbol_address. rewrite agree_on_symbols; auto.
+Qed.
+
+End GENV_TRANSF.
+
+(** Compatibility of the evaluation functions with value injections. *)
+
+Section EVAL_COMPAT.
+
+Variable F1 F2 V1 V2: Type.
+Variable ge1: Genv.t F1 V1.
+Variable ge2: Genv.t F2 V2.
+Variable f: meminj.
+
+Variable m1: mem.
+Variable m2: mem.
+
+Hypothesis valid_pointer_inj:
+  forall b1 ofs b2 delta,
+  f b1 = Some(b2, delta) ->
+  Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+  Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+
+Hypothesis weak_valid_pointer_inj:
+  forall b1 ofs b2 delta,
+  f b1 = Some(b2, delta) ->
+  Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+  Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+
+Hypothesis weak_valid_pointer_no_overflow:
+  forall b1 ofs b2 delta,
+  f b1 = Some(b2, delta) ->
+  Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+  0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned.
+
+Hypothesis valid_different_pointers_inj:
+  forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2,
+  b1 <> b2 ->
+  Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true ->
+  Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true ->
+  f b1 = Some (b1', delta1) ->
+  f b2 = Some (b2', delta2) ->
+  b1' <> b2' \/
+  Ptrofs.unsigned (Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned (Ptrofs.add ofs2 (Ptrofs.repr delta2)).
+
+Ltac InvInject :=
+  match goal with
+  | [ H: Val.inject _ (Vint _) _ |- _ ] =>
+      inv H; InvInject
+  | [ H: Val.inject _ (Vfloat _) _ |- _ ] =>
+      inv H; InvInject
+  | [ H: Val.inject _ (Vptr _ _) _ |- _ ] =>
+      inv H; InvInject
+  | [ H: Val.inject_list _ nil _ |- _ ] =>
+      inv H; InvInject
+  | [ H: Val.inject_list _ (_ :: _) _ |- _ ] =>
+      inv H; InvInject
+  | _ => idtac
+  end.
+
+Lemma eval_shift_inject:
+  forall v1 v2 s a,
+  Val.inject f v1 v2 -> Val.inject f (eval_shift s v1 a) (eval_shift s v2 a).
+Proof.
+  intros; inv H; destruct s; simpl; auto; rewrite a32_range; auto.
+Qed.
+
+Lemma eval_shiftl_inject:
+  forall v1 v2 s a,
+  Val.inject f v1 v2 -> Val.inject f (eval_shiftl s v1 a) (eval_shiftl s v2 a).
+Proof.
+  intros; inv H; destruct s; simpl; auto; rewrite a64_range; auto.
+Qed.
+
+Lemma eval_extend_inject:
+  forall v1 v2 x a,
+  Val.inject f v1 v2 -> Val.inject f (eval_extend x v1 a) (eval_extend x v2 a).
+Proof.
+  unfold eval_extend; intros; inv H; destruct x; simpl; auto; rewrite a64_range; auto.
+Qed.
+
+Lemma eval_condition_inj:
+  forall cond vl1 vl2 b,
+  Val.inject_list f vl1 vl2 ->
+  eval_condition cond vl1 m1 = Some b ->
+  eval_condition cond vl2 m2 = Some b.
+Proof.
+  intros. destruct cond; simpl in H0; FuncInv; InvInject; simpl; auto.
+(* 32-bit integers *)
+- inv H3; inv H2; simpl in H0; inv H0; auto.
+- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+- inv H3; simpl in H0; inv H0; auto.
+- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+- revert H0. generalize (eval_shift_inject s a H2); intros J; inv H3; inv J; simpl; congruence.
+- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies, eval_shift_inject.
+- inv H3; inv H0; auto.
+- inv H3; inv H0; auto.
+(* 64-bit integers *)
+- inv H3; inv H2; simpl in H0; inv H0; auto.
+- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+- inv H3; simpl in H0; inv H0; auto.
+- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+- revert H0. generalize (eval_shiftl_inject s a H2); intros J; inv H3; inv J; simpl; congruence.
+- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies, eval_shiftl_inject.
+- inv H3; inv H0; auto.
+- inv H3; inv H0; auto.
+(* 64-bit floats *)
+- inv H3; inv H2; simpl in H0; inv H0; auto.
+- inv H3; inv H2; simpl in H0; inv H0; auto.
+- inv H3; simpl in H0; inv H0; auto.
+- inv H3; simpl in H0; inv H0; auto.
+(* 32-bit floats *)
+- inv H3; inv H2; simpl in H0; inv H0; auto.
+- inv H3; inv H2; simpl in H0; inv H0; auto.
+- inv H3; simpl in H0; inv H0; auto.
+- inv H3; simpl in H0; inv H0; auto.
+Qed.
+
+Ltac TrivialExists :=
+  match goal with
+  | [ |- exists v2, Some ?v1 = Some v2 /\ Val.inject _ _ v2 ] =>
+      exists v1; split; auto
+  | _ => idtac
+  end.
+
+Lemma eval_operation_inj:
+  forall op sp1 vl1 sp2 vl2 v1,
+  (forall id ofs,
+      In id (globals_operation op) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_operation ge1 sp1 op vl1 m1 = Some v1 ->
+  exists v2, eval_operation ge2 sp2 op vl2 m2 = Some v2 /\ Val.inject f v1 v2.
+Proof.
+  intros until v1; intros GL; intros. destruct op; simpl in H1; simpl; FuncInv; InvInject; TrivialExists.
+  (* addrsymbol *)
+  - apply GL; simpl; auto.
+  (* addrstack *)
+  - apply Val.offset_ptr_inject; auto. 
+  (* shift *)
+  - apply eval_shift_inject; auto.
+  (* add *)
+  - apply Val.add_inject; auto.
+  - apply Val.add_inject; auto using eval_shift_inject.
+  - apply Val.add_inject; auto.
+  (* neg, sub *)
+  - inv H4; simpl; auto.
+  - generalize (eval_shift_inject s a H4); intros J; inv J; simpl; auto.
+  - apply Val.sub_inject; auto.
+  - apply Val.sub_inject; auto using eval_shift_inject.
+  (* mul, muladd, mulsub *)
+  - inv H4; inv H2; simpl; auto.
+  - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
+  - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
+  (* div, divu *)
+  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+    destruct (Int.eq i0 Int.zero
+              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
+    TrivialExists.
+  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  (* and*)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  - inv H4; simpl; auto.
+  (* or *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  - inv H4; simpl; auto.
+  (* xor *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  - inv H4; simpl; auto.
+  (* not *)
+  - inv H4; simpl; auto.
+  - generalize (eval_shift_inject s a H4); intros J; inv J; simpl; auto.
+  (* bic *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  (* nor *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  (* eqv *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  (* shl *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  (* shr *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  (* shru *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  (* shrx *)
+  - inv H4; simpl in H1; try discriminate. simpl.
+    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  (* shift-ext *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto; rewrite a32_range; auto.
+  - inv H4; simpl; auto; rewrite a32_range; auto.
+  - inv H4; simpl; auto; rewrite a32_range; simpl; auto.
+  - inv H4; simpl; auto; rewrite a32_range; simpl; auto.
+
+  (* shiftl *)
+  - apply eval_shiftl_inject; auto.
+  (* extend *)
+  - apply eval_extend_inject; auto.
+  (* makelong, low, high *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* addl *)
+  - apply Val.addl_inject; auto.
+  - apply Val.addl_inject; auto using eval_shiftl_inject.
+  - apply Val.addl_inject; auto using eval_extend_inject.
+  - apply Val.addl_inject; auto.
+  (* negl, subl *)
+  - inv H4; simpl; auto.
+  - generalize (eval_shiftl_inject s a H4); intros J; inv J; simpl; auto.
+  - apply Val.subl_inject; auto.
+  - apply Val.subl_inject; auto using eval_shiftl_inject.
+  - apply Val.subl_inject; auto using eval_extend_inject.
+  (* mull, mulladd, mullsub, mullhs, mullhu *)
+  - inv H4; inv H2; simpl; auto.
+  - apply Val.addl_inject; auto. inv H2; inv H3; simpl; auto.
+  - apply Val.subl_inject; auto. inv H2; inv H3; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* divl, divlu *)
+  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+    destruct (Int64.eq i0 Int64.zero
+              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
+    TrivialExists.
+  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  (* andl *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  - inv H4; simpl; auto.
+  (* orl *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  - inv H4; simpl; auto.
+  (* xorl *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  - inv H4; simpl; auto.
+  (* notl *)
+  - inv H4; simpl; auto.
+  - generalize (eval_shiftl_inject s a H4); intros J; inv J; simpl; auto.
+  (* bicl *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  (* norl *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  (* eqvl *)
+  - inv H4; inv H2; simpl; auto. 
+  - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
+  (* shll *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  (* shrl *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  (* shrlu *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  (* shrlx *)
+  - inv H4; simpl in H1; try discriminate. simpl.
+    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  (* shift-ext *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto; rewrite a64_range; auto.
+  - inv H4; simpl; auto; rewrite a64_range; auto.
+  - inv H4; simpl; auto; rewrite a64_range; simpl; auto.
+  - inv H4; simpl; auto; rewrite a64_range; simpl; auto.
+
+  (* negf, absf *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* addf, subf *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* mulf, divf *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* negfs, absfs *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* addfs, subfs *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* mulfs, divfs *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* singleoffloat, floatofsingle *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* intoffloat, intuoffloat *)
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
+    exists (Vint i); auto.
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
+    exists (Vint i); auto.
+  (* floatofint, floatofintu *)
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  (* intofsingle, intuofsingle *)
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
+    exists (Vint i); auto.
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
+    exists (Vint i); auto.
+  (* singleofint, singleofintu *)
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  (* longoffloat, longuoffloat *)
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
+    exists (Vlong i); auto.
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
+    exists (Vlong i); auto.
+  (* floatoflong, floatoflongu *)
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  (* longofsingle, longuofsingle *)
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
+    exists (Vlong i); auto.
+  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
+    exists (Vlong i); auto.
+  (* singleoflong, singleoflongu *)
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  (* cmp, sel *)
+  - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
+    exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
+    destruct b; simpl; constructor.
+    simpl; constructor.
+  - apply Val.select_inject; auto.  
+    destruct (eval_condition cond vl1 m1) eqn:?; auto.
+    right; symmetry; eapply eval_condition_inj; eauto.
+Qed.
+
+Lemma eval_addressing_inj:
+  forall addr sp1 vl1 sp2 vl2 v1,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = Some v1 ->
+  exists v2, eval_addressing ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2.
+Proof.
+  intros. destruct addr; simpl in H2; simpl; FuncInv; InvInject; TrivialExists.
+- apply Val.addl_inject; auto.
+- apply Val.addl_inject; auto.
+- apply Val.addl_inject; auto. inv H3; simpl; auto; rewrite a64_range; auto.
+- apply Val.addl_inject; auto using eval_extend_inject.
+- apply H; simpl; auto.
+- apply Val.offset_ptr_inject; auto. 
+Qed.
+
+
+Lemma eval_addressing_inj_none:
+  forall addr sp1 vl1 sp2 vl2,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = None ->
+  eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+  intros until vl2. intros Hglobal Hinjsp Hinjvl.
+  destruct addr; simpl in *;
+  inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
+Qed.
+End EVAL_COMPAT.
+
+(** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
+
+Section EVAL_LESSDEF.
+
+Variable F V: Type.
+Variable genv: Genv.t F V.
+
+Remark valid_pointer_extends:
+  forall m1 m2, Mem.extends m1 m2 ->
+  forall b1 ofs b2 delta,
+  Some(b1, 0) = Some(b2, delta) ->
+  Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+  Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+Proof.
+  intros. inv H0. rewrite Ptrofs.add_zero. eapply Mem.valid_pointer_extends; eauto.
+Qed.
+
+Remark weak_valid_pointer_extends:
+  forall m1 m2, Mem.extends m1 m2 ->
+  forall b1 ofs b2 delta,
+  Some(b1, 0) = Some(b2, delta) ->
+  Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+  Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+Proof.
+  intros. inv H0. rewrite Ptrofs.add_zero. eapply Mem.weak_valid_pointer_extends; eauto.
+Qed.
+
+Remark weak_valid_pointer_no_overflow_extends:
+  forall m1 b1 ofs b2 delta,
+  Some(b1, 0) = Some(b2, delta) ->
+  Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+  0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned.
+Proof.
+  intros. inv H. rewrite Z.add_0_r. apply Ptrofs.unsigned_range_2.
+Qed.
+
+Remark valid_different_pointers_extends:
+  forall m1 b1 ofs1 b2 ofs2 b1' delta1 b2' delta2,
+  b1 <> b2 ->
+  Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true ->
+  Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true ->
+  Some(b1, 0) = Some (b1', delta1) ->
+  Some(b2, 0) = Some (b2', delta2) ->
+  b1' <> b2' \/
+  Ptrofs.unsigned(Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned(Ptrofs.add ofs2 (Ptrofs.repr delta2)).
+Proof.
+  intros. inv H2; inv H3. auto.
+Qed.
+
+Lemma eval_condition_lessdef:
+  forall cond vl1 vl2 b m1 m2,
+  Val.lessdef_list vl1 vl2 ->
+  Mem.extends m1 m2 ->
+  eval_condition cond vl1 m1 = Some b ->
+  eval_condition cond vl2 m2 = Some b.
+Proof.
+  intros. eapply eval_condition_inj with (f := fun b => Some(b, 0)) (m1 := m1).
+  apply valid_pointer_extends; auto.
+  apply weak_valid_pointer_extends; auto.
+  apply weak_valid_pointer_no_overflow_extends.
+  apply valid_different_pointers_extends; auto.
+  rewrite <- val_inject_list_lessdef. eauto. auto.
+Qed.
+
+Lemma eval_operation_lessdef:
+  forall sp op vl1 vl2 v1 m1 m2,
+  Val.lessdef_list vl1 vl2 ->
+  Mem.extends m1 m2 ->
+  eval_operation genv sp op vl1 m1 = Some v1 ->
+  exists v2, eval_operation genv sp op vl2 m2 = Some v2 /\ Val.lessdef v1 v2.
+Proof.
+  intros. rewrite val_inject_list_lessdef in H.
+  assert (exists v2 : val,
+          eval_operation genv sp op vl2 m2 = Some v2
+          /\ Val.inject (fun b => Some(b, 0)) v1 v2).
+  eapply eval_operation_inj with (m1 := m1) (sp1 := sp).
+  apply valid_pointer_extends; auto.
+  apply weak_valid_pointer_extends; auto.
+  apply weak_valid_pointer_no_overflow_extends.
+  apply valid_different_pointers_extends; auto.
+  intros. apply val_inject_lessdef. auto.
+  apply val_inject_lessdef; auto.
+  eauto.
+  auto.
+  destruct H2 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
+Qed.
+
+Lemma eval_addressing_lessdef:
+  forall sp addr vl1 vl2 v1,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = Some v1 ->
+  exists v2, eval_addressing genv sp addr vl2 = Some v2 /\ Val.lessdef v1 v2.
+Proof.
+  intros. rewrite val_inject_list_lessdef in H.
+  assert (exists v2 : val,
+          eval_addressing genv sp addr vl2 = Some v2
+          /\ Val.inject (fun b => Some(b, 0)) v1 v2).
+  eapply eval_addressing_inj with (sp1 := sp).
+  intros. rewrite <- val_inject_lessdef; auto.
+  rewrite <- val_inject_lessdef; auto.
+  eauto. auto.
+  destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
+Qed.
+
+Lemma eval_addressing_lessdef_none:
+  forall sp addr vl1 vl2,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = None ->
+  eval_addressing genv sp addr vl2 = None.
+Proof.
+  intros. rewrite val_inject_list_lessdef in H.
+  eapply eval_addressing_inj_none with (sp1 := sp).
+  intros. rewrite <- val_inject_lessdef; auto.
+  rewrite <- val_inject_lessdef; auto.
+  eauto. auto.
+Qed.
+End EVAL_LESSDEF.
+
+(** Compatibility of the evaluation functions with memory injections. *)
+
+Section EVAL_INJECT.
+
+Variable F V: Type.
+Variable genv: Genv.t F V.
+Variable f: meminj.
+Hypothesis globals: meminj_preserves_globals genv f.
+Variable sp1: block.
+Variable sp2: block.
+Variable delta: Z.
+Hypothesis sp_inj: f sp1 = Some(sp2, delta).
+
+Remark symbol_address_inject:
+  forall id ofs, Val.inject f (Genv.symbol_address genv id ofs) (Genv.symbol_address genv id ofs).
+Proof.
+  intros. unfold Genv.symbol_address. destruct (Genv.find_symbol genv id) eqn:?; auto.
+  exploit (proj1 globals); eauto. intros.
+  econstructor; eauto. rewrite Ptrofs.add_zero; auto.
+Qed.
+
+Lemma eval_condition_inject:
+  forall cond vl1 vl2 b m1 m2,
+  Val.inject_list f vl1 vl2 ->
+  Mem.inject f m1 m2 ->
+  eval_condition cond vl1 m1 = Some b ->
+  eval_condition cond vl2 m2 = Some b.
+Proof.
+  intros. eapply eval_condition_inj with (f := f) (m1 := m1); eauto.
+  intros; eapply Mem.valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
+  intros; eapply Mem.different_pointers_inject; eauto.
+Qed.
+
+Lemma eval_addressing_inject:
+  forall addr vl1 vl2 v1,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = Some v1 ->
+  exists v2,
+     eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = Some v2
+  /\ Val.inject f v1 v2.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
+Lemma eval_addressing_inject_none:
+  forall addr vl1 vl2,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+  eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
+Lemma eval_operation_inject:
+  forall op vl1 vl2 v1 m1 m2,
+  Val.inject_list f vl1 vl2 ->
+  Mem.inject f m1 m2 ->
+  eval_operation genv (Vptr sp1 Ptrofs.zero) op vl1 m1 = Some v1 ->
+  exists v2,
+     eval_operation genv (Vptr sp2 Ptrofs.zero) (shift_stack_operation delta op) vl2 m2 = Some v2
+  /\ Val.inject f v1 v2.
+Proof.
+  intros.
+  rewrite eval_shift_stack_operation. simpl.
+  eapply eval_operation_inj with (sp1 := Vptr sp1 Ptrofs.zero) (m1 := m1); eauto.
+  intros; eapply Mem.valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
+  intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
+  intros; eapply Mem.different_pointers_inject; eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
+End EVAL_INJECT.
+
+(** * Handling of builtin arguments *)
+
+Definition builtin_arg_ok_1
+       (A: Type) (ba: builtin_arg A) (c: builtin_arg_constraint) :=
+  match c, ba with
+  | OK_all, _ => true
+  | OK_const, (BA_int _ | BA_long _ | BA_float _ | BA_single _) => true
+  | OK_addrstack, BA_addrstack _ => true
+  | OK_addressing, BA_addrstack _ => true
+  | OK_addressing, BA_addptr (BA _) (BA_int _) => true
+  | OK_addressing, BA_addptr (BA _) (BA_long _) => true
+  | _, _ => false
+  end.
+
+Definition builtin_arg_ok
+       (A: Type) (ba: builtin_arg A) (c: builtin_arg_constraint) :=
+  match ba with
+  | (BA _ | BA_splitlong (BA _) (BA _)) => true
+  | _ => builtin_arg_ok_1 ba c
+  end.  
diff --git a/aarch64/PrintOp.ml b/aarch64/PrintOp.ml
new file mode 100644
index 00000000..2d45e446
--- /dev/null
+++ b/aarch64/PrintOp.ml
@@ -0,0 +1,247 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Pretty-printing of operators, conditions, addressing modes *)
+
+open Printf
+open Camlcoq
+open Integers
+open Op
+
+let comparison_name = function
+  | Ceq -> "=="
+  | Cne -> "!="
+  | Clt -> "<"
+  | Cle -> "<="
+  | Cgt -> ">"
+  | Cge -> ">="
+
+let shift pp (s, a) =
+  match s with
+  | Slsl -> fprintf pp "<< %ld" (camlint_of_coqint a)
+  | Slsr -> fprintf pp ">>u %ld" (camlint_of_coqint a)
+  | Sasr -> fprintf pp ">>s %ld" (camlint_of_coqint a)
+  | Sror -> fprintf pp "ror %ld" (camlint_of_coqint a)
+
+let shiftl pp (s, a) =
+  match s with
+  | Slsl -> fprintf pp "<<l %ld" (camlint_of_coqint a)
+  | Slsr -> fprintf pp ">>lu %ld" (camlint_of_coqint a)
+  | Sasr -> fprintf pp ">>ls %ld" (camlint_of_coqint a)
+  | Sror -> fprintf pp "rorl %ld" (camlint_of_coqint a)
+
+let extend_name = function
+  | Xsgn32 -> "sext"
+  | Xuns32 -> "zext"
+
+let print_condition reg pp = function
+  | (Ccomp c, [r1;r2]) ->
+      fprintf pp "%a %ss %a" reg r1 (comparison_name c) reg r2
+  | (Ccompu c, [r1;r2]) ->
+      fprintf pp "%a %su %a" reg r1 (comparison_name c) reg r2
+  | (Ccompimm(c, n), [r1]) ->
+      fprintf pp "%a %ss %ld" reg r1 (comparison_name c) (camlint_of_coqint n)
+  | (Ccompuimm(c, n), [r1]) ->
+      fprintf pp "%a %su %ld" reg r1 (comparison_name c) (camlint_of_coqint n)
+  | (Ccompshift(c, s, a), [r1;r2]) ->
+      fprintf pp "%a %ss %a %a" reg r1 (comparison_name c) reg r2 shift (s, a)
+  | (Ccompushift(c, s, a), [r1;r2]) ->
+      fprintf pp "%a %su %a %a" reg r1 (comparison_name c) reg r2 shift (s, a)
+  | (Cmaskzero n, [r1]) ->
+      fprintf pp "%a & 0x%lx == 0" reg r1 (camlint_of_coqint n)
+  | (Cmasknotzero n, [r1]) ->
+      fprintf pp "%a & 0x%lx != 0" reg r1 (camlint_of_coqint n)
+  | (Ccompl c, [r1;r2]) ->
+      fprintf pp "%a %sls %a" reg r1 (comparison_name c) reg r2
+  | (Ccomplu c, [r1;r2]) ->
+      fprintf pp "%a %slu %a" reg r1 (comparison_name c) reg r2
+  | (Ccomplimm(c, n), [r1]) ->
+      fprintf pp "%a %sls %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n)
+  | (Ccompluimm(c, n), [r1]) ->
+      fprintf pp "%a %slu %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n)
+  | (Ccomplshift(c, s, a), [r1;r2]) ->
+      fprintf pp "%a %sls %a %a" reg r1 (comparison_name c) reg r2 shift (s, a)
+  | (Ccomplushift(c, s, a), [r1;r2]) ->
+      fprintf pp "%a %slu %a %a" reg r1 (comparison_name c) reg r2 shift (s, a)
+  | (Cmasklzero n, [r1]) ->
+      fprintf pp "%a &l 0x%Lx == 0" reg r1 (camlint64_of_coqint n)
+  | (Cmasklnotzero n, [r1]) ->
+      fprintf pp "%a &l 0x%Lx != 0" reg r1 (camlint64_of_coqint n)
+  | (Ccompf c, [r1;r2]) ->
+      fprintf pp "%a %sf %a" reg r1 (comparison_name c) reg r2
+  | (Cnotcompf c, [r1;r2]) ->
+      fprintf pp "%a not(%sf) %a" reg r1 (comparison_name c) reg r2
+  | (Ccompfzero c, [r1]) ->
+      fprintf pp "%a %sf 0.0" reg r1 (comparison_name c)
+  | (Cnotcompfzero c, [r1]) ->
+      fprintf pp "%a not(%sf) 0.0" reg r1 (comparison_name c)
+  | (Ccompfs c, [r1;r2]) ->
+      fprintf pp "%a %sfs %a" reg r1 (comparison_name c) reg r2
+  | (Cnotcompfs c, [r1;r2]) ->
+      fprintf pp "%a not(%sfs) %a" reg r1 (comparison_name c) reg r2
+  | (Ccompfszero c, [r1]) ->
+      fprintf pp "%a %sfs 0.0" reg r1 (comparison_name c)
+  | (Cnotcompfszero c, [r1]) ->
+      fprintf pp "%a not(%sfs) 0.0" reg r1 (comparison_name c)
+  | _ ->
+      fprintf pp "<bad condition>"
+
+let print_operation reg pp = function
+  | Omove, [r1] -> reg pp r1
+  | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
+  | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
+  | Ofloatconst n, [] -> fprintf pp "%F" (camlfloat_of_coqfloat n)
+  | Osingleconst n, [] -> fprintf pp "%Ff" (camlfloat_of_coqfloat32 n)
+  | Oaddrsymbol(id, ofs), [] ->
+      fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
+  | Oaddrstack ofs, [] ->
+      fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
+(* 32-bit integer arithmetic *)
+  | Oshift(s, a), [r1] -> fprintf pp "%a %a" reg r1 shift (s,a)
+  | Oadd, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
+  | Oaddshift(s, a), [r1;r2] -> fprintf pp "%a + %a %a" reg r1 reg r2 shift (s,a)
+  | Oaddimm n, [r1] -> fprintf pp "%a + %ld" reg r1 (camlint_of_coqint n)
+  | Oneg, [r1] -> fprintf pp "- %a" reg r1
+  | Onegshift(s, a), [r1] -> fprintf pp "- (%a %a)" reg r1 shift (s,a)
+  | Osub, [r1;r2] -> fprintf pp "%a - %a" reg r1 reg r2
+  | Osubshift(s, a), [r1;r2] -> fprintf pp "%a - %a %a" reg r1 reg r2 shift (s,a)
+  | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2
+  | Omuladd, [r1;r2;r3] -> fprintf pp "%a + %a * %a" reg r3 reg r1 reg r2
+  | Omulsub, [r1;r2;r3] -> fprintf pp "%a - %a * %a" reg r3 reg r1 reg r2
+  | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2
+  | Odivu, [r1;r2] -> fprintf pp "%a /u %a" reg r1 reg r2
+  | Oand, [r1;r2] -> fprintf pp "%a & %a" reg r1 reg r2
+  | Oandshift(s, a), [r1;r2] -> fprintf pp "%a & %a %a" reg r1 reg r2 shift (s,a)
+  | Oandimm n, [r1] -> fprintf pp "%a & %ld" reg r1 (camlint_of_coqint n)
+  | Oor, [r1;r2] -> fprintf pp "%a | %a" reg r1 reg r2
+  | Oorshift(s, a), [r1;r2] -> fprintf pp "%a | %a %a" reg r1 reg r2 shift (s,a)
+  | Oorimm n, [r1] ->  fprintf pp "%a | %ld" reg r1 (camlint_of_coqint n)
+  | Oxor, [r1;r2] -> fprintf pp "%a ^ %a" reg r1 reg r2
+  | Oxorshift(s, a), [r1;r2] -> fprintf pp "%a ^ %a %a" reg r1 reg r2 shift (s,a)
+  | Oxorimm n, [r1] -> fprintf pp "%a ^ %ld" reg r1 (camlint_of_coqint n)
+  | Onot, [r1] -> fprintf pp "~ %a" reg r1
+  | Onotshift(s, a), [r1] -> fprintf pp "~ (%a %a)" reg r1 shift (s,a)
+  | Obic, [r1;r2] -> fprintf pp "%a & ~ %a" reg r1 reg r2
+  | Obicshift(s, a), [r1;r2] -> fprintf pp "%a & ~ %a %a" reg r1 reg r2 shift (s,a)
+  | Oorn, [r1;r2] -> fprintf pp "%a | ~ %a" reg r1 reg r2
+  | Oornshift(s, a), [r1;r2] -> fprintf pp "%a | ~ %a %a" reg r1 reg r2 shift (s,a)
+  | Oeqv, [r1;r2] -> fprintf pp "%a ^ ~ %a" reg r1 reg r2
+  | Oeqvshift(s, a), [r1;r2] -> fprintf pp "%a ^ ~ %a %a" reg r1 reg r2 shift (s,a)
+  | Oshl, [r1;r2] -> fprintf pp "%a << %a" reg r1 reg r2
+  | Oshr, [r1;r2] -> fprintf pp "%a >>s %a" reg r1 reg r2
+  | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
+  | Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n)
+  | Ozext s, [r1] -> fprintf pp "zext(%d, %a)" (Z.to_int s) reg r1
+  | Osext s, [r1] -> fprintf pp "sext(%d, %a)" (Z.to_int s) reg r1
+  | Oshlzext(s, a), [r1] -> fprintf pp "zext(%d, %a) << %ld" (Z.to_int s) reg r1 (camlint_of_coqint a)
+  | Oshlsext(s, a), [r1] -> fprintf pp "sext(%d, %a) << %ld" (Z.to_int s) reg r1 (camlint_of_coqint a)
+  | Ozextshr(a, s), [r1] -> fprintf pp "zext(%d, %a >>u %ld)" (Z.to_int s) reg r1 (camlint_of_coqint a)
+  | Osextshr(a, s), [r1] -> fprintf pp "sext(%d, %a >>s %ld)" (Z.to_int s) reg r1 (camlint_of_coqint a)
+(* 64-bit integer arithmetic *)
+  | Oshiftl(s, a), [r1] -> fprintf pp "%a %a" reg r1 shiftl (s,a)
+  | Oextend(x, a), [r1] -> fprintf pp "%s(32, %a) <<l %ld" (extend_name x) reg r1 (camlint_of_coqint a)
+  | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+  | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+  | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
+  | Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2
+  | Oaddlshift(s, a), [r1;r2] -> fprintf pp "%a +l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Oaddlext(x, a), [r1;r2] -> fprintf pp "%a +l %s(%a) << %ld" reg r1 (extend_name x) reg r2 (camlint_of_coqint a)
+  | Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n)
+  | Onegl, [r1] -> fprintf pp "-l %a" reg r1
+  | Oneglshift(s, a), [r1] -> fprintf pp "-l (%a %a)" reg r1 shiftl (s,a)
+  | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
+  | Osublext(x, a), [r1;r2] -> fprintf pp "%a +l %s(%a) << %ld" reg r1 (extend_name x) reg r2 (camlint_of_coqint a)
+  | Osublshift(s, a), [r1;r2] -> fprintf pp "%a -l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
+  | Omulladd, [r1;r2;r3] -> fprintf pp "%a +l %a *l %a" reg r3 reg r1 reg r2
+  | Omullsub, [r1;r2;r3] -> fprintf pp "%a -l %a *l %a" reg r3 reg r1 reg r2
+  | Omullhs, [r1;r2] -> fprintf pp "%a *hls %a" reg r1 reg r2
+  | Omullhu, [r1;r2] -> fprintf pp "%a *hlu %a" reg r1 reg r2
+  | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
+  | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2
+  | Oandl, [r1;r2] -> fprintf pp "%a &l %a" reg r1 reg r2
+  | Oandlshift(s, a), [r1;r2] -> fprintf pp "%a &l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Oandlimm n, [r1] -> fprintf pp "%a &l %Ld" reg r1 (camlint64_of_coqint n)
+  | Oorl, [r1;r2] -> fprintf pp "%a |l %a" reg r1 reg r2
+  | Oorlshift(s, a), [r1;r2] -> fprintf pp "%a |l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Oorlimm n, [r1] ->  fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n)
+  | Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2
+  | Oxorlshift(s, a), [r1;r2] -> fprintf pp "%a ^l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n)
+  | Onotl, [r1] -> fprintf pp "~l %a" reg r1
+  | Onotlshift(s, a), [r1] -> fprintf pp "~l (%a %a)" reg r1 shiftl (s,a)
+  | Obicl, [r1;r2] -> fprintf pp "%a &l ~l %a" reg r1 reg r2
+  | Obiclshift(s, a), [r1;r2] -> fprintf pp "%a &l ~l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Oornl, [r1;r2] -> fprintf pp "%a |l ~l %a" reg r1 reg r2
+  | Oornlshift(s, a), [r1;r2] -> fprintf pp "%a |l ~l %a %a" reg r1 reg r2 shiftl (s,a)
+  | Oeqvl, [r1;r2] -> fprintf pp "%a ^l ~l %a" reg r1 reg r2
+  | Oeqvlshift(s, a), [r1;r2] -> fprintf pp "%a ^l ~l %a %a" reg r1 reg r2 shift (s,a)
+  | Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2
+  | Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2
+  | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
+  | Oshrlximm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n)
+  | Ozextl s, [r1] -> fprintf pp "zextl(%d, %a)" (Z.to_int s) reg r1
+  | Osextl s, [r1] -> fprintf pp "sextl(%d, %a)" (Z.to_int s) reg r1
+  | Oshllzext(s, a), [r1] -> fprintf pp "zextl(%d, %a) <<l %ld" (Z.to_int s) reg r1 (camlint_of_coqint a)
+  | Oshllsext(s, a), [r1] -> fprintf pp "sextl(%d, %a) <<l %ld" (Z.to_int s) reg r1 (camlint_of_coqint a)
+  | Ozextshrl(a, s), [r1] -> fprintf pp "zextl(%d, %a >>lu %ld)" (Z.to_int s) reg r1 (camlint_of_coqint a)
+  | Osextshrl(a, s), [r1] -> fprintf pp "sextl(%d, %a >>ls %ld)" (Z.to_int s) reg r1 (camlint_of_coqint a)
+(* 64-bit floating-point arithmetic *)
+  | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
+  | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
+  | Oaddf, [r1;r2] -> fprintf pp "%a +f %a" reg r1 reg r2
+  | Osubf, [r1;r2] -> fprintf pp "%a -f %a" reg r1 reg r2
+  | Omulf, [r1;r2] -> fprintf pp "%a *f %a" reg r1 reg r2
+  | Odivf, [r1;r2] -> fprintf pp "%a /f %a" reg r1 reg r2
+(* 32-bit floating-point arithmetic *)
+  | Onegfs, [r1] -> fprintf pp "negfs(%a)" reg r1
+  | Oabsfs, [r1] -> fprintf pp "absfs(%a)" reg r1
+  | Oaddfs, [r1;r2] -> fprintf pp "%a +fs %a" reg r1 reg r2
+  | Osubfs, [r1;r2] -> fprintf pp "%a -fs %a" reg r1 reg r2
+  | Omulfs, [r1;r2] -> fprintf pp "%a *fs %a" reg r1 reg r2
+  | Odivfs, [r1;r2] -> fprintf pp "%a /fs %a" reg r1 reg r2
+  | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
+  | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
+(* Conversions between int and float *)
+  | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
+  | Ointuoffloat, [r1] -> fprintf pp "intuoffloat(%a)" reg r1
+  | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
+  | Ofloatofintu, [r1] -> fprintf pp "floatofintu(%a)" reg r1
+  | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
+  | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
+  | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
+  | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
+  | Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1
+  | Ointuofsingle, [r1] -> fprintf pp "intuofsingle(%a)" reg r1
+  | Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1
+  | Osingleofintu, [r1] -> fprintf pp "singleofintu(%a)" reg r1
+  | Olongofsingle, [r1] -> fprintf pp "longofsingle(%a)" reg r1
+  | Olonguofsingle, [r1] -> fprintf pp "longuofsingle(%a)" reg r1
+  | Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1
+  | Osingleoflongu, [r1] -> fprintf pp "singleoflongu(%a)" reg r1
+(* Boolean tests *)
+  | Ocmp c, args -> print_condition reg pp (c, args)
+  | Osel (c, ty), r1::r2::args ->
+      fprintf pp "%a ?%s %a : %a"
+         (print_condition reg) (c, args)
+         (PrintAST.name_of_type ty) reg r1 reg r2
+  | _ -> fprintf pp "<bad operator>"
+
+let print_addressing reg pp = function
+  | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_coqint n)
+  | Aindexed2, [r1; r2] -> fprintf pp "%a + %a" reg r1 reg r2
+  | Aindexed2shift a, [r1; r2] -> fprintf pp "%a + %a << %ld" reg r1 reg r2 (camlint_of_coqint a)
+  | Aindexed2ext(x, a), [r1; r2] -> fprintf pp "%a + %s(%a) << %ld" reg r1 (extend_name x) reg r2 (camlint_of_coqint a)
+  | Aglobal(id, ofs), [] ->
+      fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
+  | Ainstack ofs, [] -> fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
+  | _ -> fprintf pp "<bad addressing>"
diff --git a/aarch64/SelectLong.vp b/aarch64/SelectLong.vp
new file mode 100644
index 00000000..ddf6e212
--- /dev/null
+++ b/aarch64/SelectLong.vp
@@ -0,0 +1,478 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Instruction selection for 64-bit integer operations *)
+
+Require Import Coqlib Zbits.
+Require Import Compopts AST Integers Floats.
+Require Import Op CminorSel SelectOp.
+
+Local Open Scope cminorsel_scope.
+
+(** ** Constants **)
+
+Definition longconst (n: int64) : expr :=
+  Eop (Olongconst n) Enil.
+
+(** ** Conversions *)
+
+Nondetfunction intoflong (e: expr) :=
+  match e with
+  | Eop (Olongconst n) Enil => Eop (Ointconst (Int.repr (Int64.unsigned n))) Enil
+  | _ =>  Eop Olowlong (e ::: Enil)
+  end.
+
+Nondetfunction longofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => longconst (Int64.repr (Int.signed n))
+  | _ =>  Eop (Oextend Xsgn32 (mk_amount64 Int.zero)) (e ::: Enil)
+  end.
+
+Nondetfunction longofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => longconst (Int64.repr (Int.unsigned n))
+  | _ =>  Eop (Oextend Xuns32 (mk_amount64 Int.zero)) (e ::: Enil)
+  end.
+
+(** ** Integer addition and pointer addition *)
+
+Nondetfunction addlimm (n: int64) (e: expr) :=
+  if Int64.eq n Int64.zero then e else
+  match e with
+  | Eop (Olongconst m) Enil => longconst (Int64.add n m)
+  | Eop (Oaddrsymbol s m) Enil => Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil
+  | Eop (Oaddrstack m) Enil => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil
+  | Eop (Oaddlimm m) (t ::: Enil) => Eop (Oaddlimm(Int64.add n m)) (t ::: Enil)
+  | _ => Eop (Oaddlimm n) (e ::: Enil)
+  end.
+
+Nondetfunction addl (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => addlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => addlimm n2 t1
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.add n1 n2) (Eop Oaddl (t1:::t2:::Enil))
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil =>
+      Eop Oaddl (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n1) n2)) Enil ::: t1 ::: Enil)
+  | Eop (Oaddrstack n1) Enil, Eop (Oaddlimm n2) (t2:::Enil) =>
+      Eop Oaddl (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int64 n2))) Enil ::: t2 ::: Enil)
+  | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+      addlimm n1 (Eop Oaddl (t1:::t2:::Enil))
+  | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm n2 (Eop Oaddl (t1:::t2:::Enil))
+  | Eop (Oshiftl s a) (t1:::Enil), t2 ?? arith_shift s =>
+      Eop (Oaddlshift s a) (t2 ::: t1 ::: Enil)
+  | t1, Eop (Oshiftl s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Oaddlshift s a) (t1 ::: t2 ::: Enil)
+  | Eop (Oextend x a) (t1:::Enil), t2 =>
+      Eop (Oaddlext x a) (t2 ::: t1 ::: Enil)
+  | t1, Eop (Oextend x a) (t2:::Enil) =>
+      Eop (Oaddlext x a) (t1 ::: t2 ::: Enil)
+  | Eop Omull (t1:::t2:::Enil), t3 =>
+      Eop Omulladd (t3:::t1:::t2:::Enil)
+  | t1, Eop Omull (t2:::t3:::Enil) =>
+      Eop Omulladd (t1:::t2:::t3:::Enil)
+  | _, _ => Eop Oaddl (e1:::e2:::Enil)
+  end.
+
+(** ** Opposite *)
+
+Nondetfunction negl (e: expr) :=
+  match e with
+  | Eop (Olongconst n) Enil => Eop (Olongconst (Int64.neg n)) Enil
+  | Eop (Oshiftl s a) (t1:::Enil) ?? arith_shift s => Eop (Oneglshift s a) (t1:::Enil)
+  | _ =>  Eop Onegl (e ::: Enil)
+  end.
+
+(** ** Integer and pointer subtraction *)
+
+Nondetfunction subl (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | t1, Eop (Olongconst n2) Enil =>
+      addlimm (Int64.neg n2) t1
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.sub n1 n2) (Eop Osubl (t1:::t2:::Enil))
+  | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+      addlimm n1 (Eop Osubl (t1:::t2:::Enil))
+  | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.neg n2) (Eop Osubl (t1:::t2:::Enil))
+  | t1, Eop (Oshiftl s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Osublshift s a) (t1:::t2::: Enil)
+  | t1, Eop (Oextend x a) (t2:::Enil) =>
+      Eop (Osublext x a) (t1 ::: t2 ::: Enil)
+  | t1, Eop Omull (t2:::t3:::Enil) =>
+      Eop Omullsub (t1:::t2:::t3:::Enil)
+  | _, _ => Eop Osubl (e1:::e2:::Enil)
+  end.
+
+(** ** Immediate shift left *)
+
+Definition shllimm_base (e1: expr) (n: int) :=
+  Eop (Oshiftl Slsl (mk_amount64 n)) (e1 ::: Enil).
+
+Nondetfunction shllimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshll (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Olongconst n1) Enil =>
+        Eop (Olongconst (Int64.shl' n1 n)) Enil
+    | Eop (Oshiftl Slsl a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int64.iwordsize'
+        then shllimm_base t1 (Int.add a n)
+        else shllimm_base e1 n
+    | Eop (Ozextl s) (t1:::Enil) =>
+        Eop (Oshllzext s (mk_amount64 n)) (t1:::Enil)
+    | Eop (Osextl s) (t1:::Enil) =>
+        Eop (Oshllsext s (mk_amount64 n)) (t1:::Enil)
+    | Eop (Oshllzext s a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int64.iwordsize'
+        then Eop (Oshllzext s (mk_amount64 (Int.add a n))) (t1:::Enil)
+        else shllimm_base e1 n
+    | Eop (Oshllsext s a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int64.iwordsize'
+        then Eop (Oshllsext s (mk_amount64 (Int.add a n))) (t1:::Enil)
+        else shllimm_base e1 n
+    | Eop (Oextend x a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int64.iwordsize'
+        then Eop (Oextend x (mk_amount64 (Int.add a n))) (t1:::Enil)
+        else shllimm_base e1 n
+    | _ =>
+        shllimm_base e1 n
+  end.
+
+(** ** Immediate shift right (logical) *)
+
+Definition shrluimm_base (e1: expr) (n: int) :=
+  Eop (Oshiftl Slsr (mk_amount64 n)) (e1 ::: Enil).
+
+Nondetfunction shrluimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshrlu (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Olongconst n1) Enil =>
+        Eop (Olongconst (Int64.shru' n1 n)) Enil
+    | Eop (Oshiftl Slsl a) (t1:::Enil) =>
+        if Int.ltu n a
+        then Eop (Oshllzext (Int64.zwordsize - Int.unsigned a) (mk_amount64 (Int.sub a n))) (t1:::Enil)
+        else Eop (Ozextshrl (mk_amount64 (Int.sub n a)) (Int64.zwordsize - Int.unsigned n)) (t1:::Enil)
+    | Eop (Oshiftl Slsr a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int64.iwordsize'
+        then shrluimm_base t1 (Int.add a n)
+        else shrluimm_base e1 n
+    | Eop (Ozextl s) (t1:::Enil) =>
+        if zlt (Int.unsigned n) s
+        then Eop (Ozextshrl (mk_amount64 n) (s - Int.unsigned n)) (t1:::Enil)
+        else Eop (Olongconst Int64.zero) Enil
+    | _ =>
+        shrluimm_base e1 n
+    end.
+
+(** ** Immediate shift right (arithmetic) *)
+
+Definition shrlimm_base (e1: expr) (n: int) :=
+  Eop (Oshiftl Sasr (mk_amount64 n)) (e1 ::: Enil).
+
+Nondetfunction shrlimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshrl (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Olongconst n1) Enil =>
+        Eop (Olongconst (Int64.shr' n1 n)) Enil
+    | Eop (Oshiftl Slsl a) (t1:::Enil) =>
+        if Int.ltu n a
+        then Eop (Oshllsext (Int64.zwordsize - Int.unsigned a) (mk_amount64 (Int.sub a n))) (t1:::Enil)
+        else Eop (Osextshrl (mk_amount64 (Int.sub n a)) (Int64.zwordsize - Int.unsigned n)) (t1:::Enil)
+    | Eop (Oshiftl Sasr a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int64.iwordsize'
+        then shrlimm_base t1 (Int.add a n)
+        else shrlimm_base e1 n
+    | Eop (Osextl s) (t1:::Enil) =>
+        if zlt (Int.unsigned n) s && zlt s Int64.zwordsize
+        then Eop (Osextshrl (mk_amount64 n) (s - Int.unsigned n)) (t1:::Enil)
+        else shrlimm_base e1 n
+    | _ =>
+        shrlimm_base e1 n
+    end.
+
+(** ** Integer multiply *)
+
+Definition mullimm_base (n1: int64) (e2: expr) :=
+  match Int64.one_bits' n1 with
+  | i :: nil =>
+      shllimm e2 i
+  | i :: j :: nil =>
+      Elet e2 (addl (shllimm (Eletvar 0) i) (shllimm (Eletvar 0) j))
+  | _ =>
+      Eop Omull (Eop (Olongconst n1) Enil ::: e2 ::: Enil)
+  end.
+
+Nondetfunction mullimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then Eop (Olongconst Int64.zero) Enil
+  else if Int64.eq n1 Int64.one then e2
+  else match e2 with
+  | Eop (Olongconst n2) Enil => Eop (Olongconst (Int64.mul n1 n2)) Enil
+  | Eop (Oaddlimm n2) (t2:::Enil) => addlimm (Int64.mul n1 n2) (mullimm_base n1 t2)
+  | _ => mullimm_base n1 e2
+  end.
+
+Nondetfunction mull (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => mullimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => mullimm n2 t1
+  | _, _ => Eop Omull (e1:::e2:::Enil)
+  end.
+
+Definition mullhs (e1: expr) (n2: int64) :=
+  Eop Omullhs (e1 ::: longconst n2 ::: Enil).
+
+Definition mullhu (e1: expr) (n2: int64) :=
+  Eop Omullhu (e1 ::: longconst n2 ::: Enil).
+
+(** ** Integer conversions *)
+
+Nondetfunction zero_ext_l (sz: Z) (e: expr) :=
+  match e with
+  | Eop (Olongconst n) Enil => Eop (Olongconst (Int64.zero_ext sz n)) Enil
+  | Eop (Oshiftl Slsr a) (t1:::Enil) => Eop (Ozextshrl a sz) (t1:::Enil)
+  | Eop (Oshiftl Slsl a) (t1:::Enil) =>
+      if zlt (Int.unsigned a) sz
+      then Eop (Oshllzext (sz - Int.unsigned a) a) (t1:::Enil)
+      else Eop (Ozextl sz) (e:::Enil)
+  | _ => Eop (Ozextl sz) (e:::Enil)
+  end.
+
+(** ** Bitwise not *)
+
+Nondetfunction notl (e: expr) :=
+  match e with
+  | Eop (Olongconst n) Enil => Eop (Olongconst (Int64.not n)) Enil
+  | Eop (Oshiftl s a) (t1:::Enil) => Eop (Onotlshift s a) (t1:::Enil)
+  | Eop Onotl (t1:::Enil) => t1
+  | Eop (Onotlshift s a) (t1:::Enil) => Eop (Oshiftl s a) (t1:::Enil)
+  | Eop Obicl (t1:::t2:::Enil) => Eop Oornl (t2:::t1:::Enil)
+  | Eop Oornl (t1:::t2:::Enil) => Eop Obicl (t2:::t1:::Enil)
+  | Eop Oxorl (t1:::t2:::Enil) => Eop Oeqvl (t1:::t2:::Enil)
+  | Eop Oeqvl (t1:::t2:::Enil) => Eop Oxorl (t1:::t2:::Enil)
+  | _ => Eop Onotl (e:::Enil)
+  end.
+
+(** ** Bitwise and *)
+
+Definition andlimm_base (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then Eop (Olongconst Int64.zero) Enil else
+  if Int64.eq n1 Int64.mone then e2 else
+  match Z_is_power2m1 (Int64.unsigned n1) with
+  | Some s => zero_ext_l s e2
+  | None => Eop (Oandlimm n1) (e2 ::: Enil)
+  end.
+
+Nondetfunction andlimm (n1: int64) (e2: expr) :=
+  match e2 with
+  | Eop (Olongconst n2) Enil => Eop (Olongconst (Int64.and n1 n2)) Enil
+  | Eop (Oandlimm n2) (t2:::Enil) => andlimm_base (Int64.and n1 n2) t2
+  | Eop (Ozextl s) (t2:::Enil) =>
+      if zle 0 s
+      then andlimm_base (Int64.and n1 (Int64.repr (two_p s - 1))) t2
+      else andlimm_base n1 e2
+  | _ => andlimm_base n1 e2
+  end.
+
+Nondetfunction andl (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => andlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => andlimm n2 t1
+  | Eop Onotl (t1:::Enil), t2 => Eop Obicl (t2:::t1:::Enil)
+  | t1, Eop Onotl (t2:::Enil) => Eop Obicl (t1:::t2:::Enil)
+  | Eop (Onotlshift s a) (t1:::Enil), t2 => Eop (Obiclshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Onotlshift s a) (t2:::Enil) => Eop (Obiclshift s a) (t1:::t2:::Enil)
+  | Eop (Oshiftl s a) (t1:::Enil), t2 => Eop (Oandlshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Oshiftl s a) (t2:::Enil) => Eop (Oandlshift s a) (t1:::t2:::Enil)
+  | _, _ => Eop Oandl (e1:::e2:::Enil)
+  end.
+
+(** ** Bitwise or *)
+
+Nondetfunction orlimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then e2
+  else if Int64.eq n1 Int64.mone then Eop (Olongconst Int64.mone) Enil
+  else match e2 with
+  | Eop (Olongconst n2) Enil => Eop (Olongconst (Int64.or n1 n2)) Enil
+  | Eop (Oorlimm n2) (t2:::Enil) => Eop (Oorlimm (Int64.or n1 n2)) (t2:::Enil)
+  | _ => Eop (Oorlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction orl (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => orlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => orlimm n2 t1
+  | Eop Onotl (t1:::Enil), t2 => Eop Oornl (t2:::t1:::Enil)
+  | t1, Eop Onotl (t2:::Enil) => Eop Oornl (t1:::t2:::Enil)
+  | Eop (Onotlshift s a) (t1:::Enil), t2 => Eop (Oornlshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Onotlshift s a) (t2:::Enil) => Eop (Oornlshift s a) (t1:::t2:::Enil)
+  | Eop (Oshiftl Slsl a1) (t1:::Enil), Eop (Oshiftl Slsr a2) (t2:::Enil) =>
+      if Int.eq (Int.add a1 a2) Int64.iwordsize' && same_expr_pure t1 t2
+      then Eop (Oshiftl Sror a2) (t2:::Enil)
+      else Eop (Oorlshift Slsr a2) (Eop (Oshiftl Slsl a1) (t1:::Enil):::t2:::Enil)
+  | Eop (Oshiftl Slsr a1) (t1:::Enil), Eop (Oshiftl Slsl a2) (t2:::Enil) =>
+      if Int.eq (Int.add a2 a1) Int64.iwordsize' && same_expr_pure t1 t2
+      then Eop (Oshiftl Sror a1) (t1:::Enil)
+      else Eop (Oorlshift Slsl a2) (Eop (Oshiftl Slsr a1) (t1:::Enil):::t2:::Enil)
+  | Eop (Oshiftl s a) (t1:::Enil), t2 => Eop (Oorlshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Oshiftl s a) (t2:::Enil) => Eop (Oorlshift s a) (t1:::t2:::Enil)
+  | _, _ => Eop Oorl (e1:::e2:::Enil)
+  end.
+
+(** ** Bitwise xor *)
+
+Definition xorlimm_base (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then e2 else
+  if Int64.eq n1 Int64.mone then notl e2 else
+  Eop (Oxorlimm n1) (e2:::Enil).  
+
+Nondetfunction xorlimm (n1: int64) (e2: expr) :=
+  match e2 with
+  | Eop (Olongconst n2) Enil => Eop (Olongconst (Int64.xor n1 n2)) Enil
+  | Eop (Oxorlimm n2) (t2:::Enil) => xorlimm_base (Int64.xor n1 n2) t2
+  | _ => xorlimm_base n1 e2
+  end.
+
+Nondetfunction xorl (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => xorlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => xorlimm n2 t1
+  | Eop Onotl (t1:::Enil), t2 => Eop Oeqvl (t2:::t1:::Enil)
+  | t1, Eop Onotl (t2:::Enil) => Eop Oeqvl (t1:::t2:::Enil)
+  | Eop (Onotlshift s a) (t1:::Enil), t2 => Eop (Oeqvlshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Onotlshift s a) (t2:::Enil) => Eop (Oeqvlshift s a) (t1:::t2:::Enil)
+  | Eop (Oshiftl s a) (t1:::Enil), t2 => Eop (Oxorlshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Oshiftl s a) (t2:::Enil) => Eop (Oxorlshift s a) (t1:::t2:::Enil)
+  | _, _ => Eop Oxorl (e1:::e2:::Enil)
+  end.
+
+(** ** Integer division and modulus *)
+
+Definition modl_aux (divop: operation) (e1 e2: expr) :=
+  Elet e1
+    (Elet (lift e2)
+      (Eop Omullsub (Eletvar 1 :::
+                     Eop divop (Eletvar 1 ::: Eletvar 0 ::: Enil) :::
+                     Eletvar 0 :::
+                     Enil))).
+
+Definition divls_base (e1: expr) (e2: expr) := Eop Odivl (e1:::e2:::Enil).
+Definition modls_base := modl_aux Odivl.
+Definition divlu_base (e1: expr) (e2: expr) := Eop Odivlu (e1:::e2:::Enil).
+Definition modlu_base := modl_aux Odivlu.
+
+Definition shrxlimm (e1: expr) (n2: int) :=
+  if Int.eq n2 Int.zero then e1 else Eop (Oshrlximm n2) (e1:::Enil).
+
+(** ** General shifts *)
+
+Nondetfunction shll (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => shllimm e1 n2
+  | _ => Eop Oshll (e1:::e2:::Enil)
+  end.
+
+Nondetfunction shrl (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => shrlimm e1 n2
+  | _ => Eop Oshrl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction shrlu (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => shrluimm e1 n2
+  | _ => Eop Oshrlu (e1:::e2:::Enil)
+  end.
+
+(** ** Comparisons *)
+
+Nondetfunction complimm (default: comparison -> int64 -> condition)
+                        (sem: comparison -> int64 -> int64 -> bool)
+                        (c: comparison) (e1: expr) (n2: int64) :=
+  match c, e1 with
+  | c, Eop (Olongconst n1) Enil =>
+      Eop (Ointconst (if sem c n1 n2 then Int.one else Int.zero)) Enil
+  | Ceq, Eop (Oandlimm m) (t1:::Enil) =>
+      if Int64.eq n2 Int64.zero
+      then Eop (Ocmp (Cmasklzero m)) (t1:::Enil)
+      else Eop (Ocmp (default c n2)) (e1:::Enil)
+  | Cne, Eop (Oandlimm m) (t1:::Enil) =>
+      if Int64.eq n2 Int64.zero
+      then Eop (Ocmp (Cmasklnotzero m)) (t1:::Enil)
+      else Eop (Ocmp (default c n2)) (e1:::Enil)
+  | _, _ =>
+      Eop (Ocmp (default c n2)) (e1:::Enil)
+  end.
+
+Nondetfunction cmpl (c: comparison) (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 =>
+      complimm Ccomplimm Int64.cmp (swap_comparison c) t2 n1
+  | t1, Eop (Olongconst n2) Enil =>
+      complimm Ccomplimm Int64.cmp c t1 n2
+  | Eop (Oshiftl s a) (t1:::Enil), t2 ?? arith_shift s =>
+      Eop (Ocmp (Ccomplshift (swap_comparison c) s a)) (t2:::t1:::Enil)
+  | t1, Eop (Oshiftl s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Ocmp (Ccomplshift c s a)) (t1:::t2:::Enil)
+  | _, _ =>
+      Eop (Ocmp (Ccompl c)) (e1:::e2:::Enil)
+  end.
+
+Nondetfunction cmplu (c: comparison) (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 =>
+      complimm Ccompluimm Int64.cmpu (swap_comparison c) t2 n1
+  | t1, Eop (Olongconst n2) Enil =>
+      complimm Ccompluimm Int64.cmpu c t1 n2
+  | Eop (Oshiftl s a) (t1:::Enil), t2 ?? arith_shift s =>
+      Eop (Ocmp (Ccomplushift (swap_comparison c) s a)) (t2:::t1:::Enil)
+  | t1, Eop (Oshiftl s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Ocmp (Ccomplushift c s a)) (t1:::t2:::Enil)
+  | _, _ =>
+      Eop (Ocmp (Ccomplu c)) (e1:::e2:::Enil)
+  end.
+
+(** ** Floating-point conversions *)
+
+Definition longoffloat (e: expr) :=
+  Eop Olongoffloat (e:::Enil).
+
+Definition longuoffloat (e: expr) :=
+  Eop Olonguoffloat (e:::Enil).
+
+Definition floatoflong (e: expr) := 
+  Eop Ofloatoflong (e:::Enil).
+
+Definition floatoflongu (e: expr) := 
+  Eop Ofloatoflongu (e:::Enil).
+
+Definition longofsingle (e: expr) := 
+  Eop Olongofsingle (e:::Enil).
+
+Definition longuofsingle (e: expr) := 
+  Eop Olonguofsingle (e:::Enil).
+
+Definition singleoflong (e: expr) := 
+  Eop Osingleoflong (e:::Enil).
+
+Definition singleoflongu (e: expr) := 
+  Eop Osingleoflongu (e:::Enil).
+
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
new file mode 100644
index 00000000..60dc1a12
--- /dev/null
+++ b/aarch64/SelectLongproof.v
@@ -0,0 +1,767 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness of instruction selection for 64-bit integer operators *)
+
+Require Import Coqlib Zbits.
+Require Import AST Integers Floats Values Memory Globalenvs.
+Require Import Cminor Op CminorSel.
+Require Import SelectOp SelectLong SelectOpproof.
+Require Import OpHelpers OpHelpersproof.
+
+Local Open Scope cminorsel_scope.
+Local Transparent Archi.ptr64.
+
+(** * Correctness of the smart constructors *)
+
+Section CMCONSTR.
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
+Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop :=
+  forall le a x,
+  eval_expr ge sp e m le a x ->
+  exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v.
+
+Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop :=
+  forall le a x b y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
+
+Definition partial_unary_constructor_sound (cstr: expr -> expr) (sem: val -> option val) : Prop :=
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  sem x = Some y ->
+  exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef y v.
+
+Definition partial_binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> option val) : Prop :=
+  forall le a x b y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  sem x y = Some z ->
+  exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef z v.
+
+(** ** Constants *)
+
+Theorem eval_longconst:
+  forall le n, eval_expr ge sp e m le (longconst n) (Vlong n).
+Proof.
+  intros; EvalOp.
+Qed.
+
+(** ** Conversions *)
+
+Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword.
+Proof.
+  unfold intoflong; red; intros until x; destruct (intoflong_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_longofintu: unary_constructor_sound longofintu Val.longofintu.
+Proof.
+  unfold longofintu; red; intros until x; destruct (longofintu_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists. simpl. unfold eval_extend. rewrite mk_amount64_eq by reflexivity.
+  destruct x; simpl; auto. rewrite Int64.shl'_zero. auto.
+Qed.
+
+Theorem eval_longofint: unary_constructor_sound longofint Val.longofint.
+Proof.
+  unfold longofint; red; intros until x; destruct (longofint_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists. simpl. unfold eval_extend. rewrite mk_amount64_eq by reflexivity.
+  destruct x; simpl; auto. rewrite Int64.shl'_zero. auto.
+Qed.
+
+(** ** Addition, opposite, subtraction *)
+
+Theorem eval_addlimm:
+  forall n, unary_constructor_sound (addlimm n) (fun x => Val.addl x (Vlong n)).
+Proof.
+  red; unfold addlimm; intros until x.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+- subst n. intros. exists x; split; auto.
+  destruct x; simpl; auto.
+  rewrite Int64.add_zero; auto.
+  rewrite Ptrofs.add_zero; auto.
+- case (addlimm_match a); intros; InvEval; subst.
++ rewrite Int64.add_commut; TrivialExists.
++ TrivialExists. simpl. rewrite Ptrofs.add_commut, Genv.shift_symbol_address_64; auto.
++ econstructor; split. EvalOp. destruct sp; simpl; auto. 
+  rewrite Ptrofs.add_assoc, (Ptrofs.add_commut m0); auto.
++ rewrite Val.addl_assoc, Int64.add_commut; TrivialExists.  
++ TrivialExists.
+Qed.
+
+Theorem eval_addl: binary_constructor_sound addl Val.addl.
+Proof.
+  red; intros until y.
+  unfold addl; case (addl_match a b); intros; InvEval; subst.
+- rewrite Val.addl_commut. apply eval_addlimm; auto.
+- apply eval_addlimm; auto.
+- replace (Val.addl (Val.addl v1 (Vlong n1)) (Val.addl v0 (Vlong n2)))
+     with (Val.addl (Val.addl v1 v0) (Val.addl (Vlong n1) (Vlong n2))).
+  apply eval_addlimm. EvalOp.
+  repeat rewrite Val.addl_assoc. decEq. apply Val.addl_permut.
+- TrivialExists. simpl. 
+  rewrite Val.addl_commut, Val.addl_assoc. f_equal; f_equal. 
+  destruct sp; simpl; auto. rewrite Ptrofs.add_assoc, (Ptrofs.add_commut n2). auto.
+- TrivialExists. simpl.
+  rewrite <- (Val.addl_commut v1), <- (Val.addl_commut (Val.addl v1 (Vlong n2))).
+  rewrite Val.addl_assoc. f_equal; f_equal.
+  destruct sp; simpl; auto. rewrite Ptrofs.add_assoc. auto.
+- replace (Val.addl (Val.addl v1 (Vlong n1)) y)
+     with (Val.addl (Val.addl v1 y) (Vlong n1)).
+  apply eval_addlimm. EvalOp.
+  repeat rewrite Val.addl_assoc. decEq. apply Val.addl_commut.
+- rewrite <- Val.addl_assoc. apply eval_addlimm. EvalOp.
+- rewrite Val.addl_commut. TrivialExists.
+- TrivialExists.
+- rewrite Val.addl_commut. TrivialExists.
+- TrivialExists.
+- rewrite Val.addl_commut. TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_negl: unary_constructor_sound negl (fun v => Val.subl (Vlong Int64.zero) v).
+Proof.
+  red; intros until x; unfold negl. case (negl_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_subl: binary_constructor_sound subl Val.subl.
+Proof.
+  red; intros until y; unfold subl; case (subl_match a b); intros; InvEval; subst.
+- rewrite Val.subl_addl_opp. apply eval_addlimm; auto.
+- rewrite Val.subl_addl_l. rewrite Val.subl_addl_r.
+  rewrite Val.addl_assoc. simpl. rewrite Int64.add_commut. rewrite <- Int64.sub_add_opp.
+  apply eval_addlimm; EvalOp.
+- rewrite Val.subl_addl_l. apply eval_addlimm; EvalOp.
+- rewrite Val.subl_addl_r. apply eval_addlimm; EvalOp.
+- TrivialExists.
+- TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+(** ** Immediate shifts *)
+
+Remark eval_shllimm_base: forall le a n x,
+  eval_expr ge sp e m le a x ->
+  Int.ltu n Int64.iwordsize' = true ->
+  eval_expr ge sp e m le (shllimm_base a n) (Val.shll x (Vint n)).
+Proof.
+Local Opaque mk_amount64.
+  unfold shlimm_base; intros; EvalOp. simpl. rewrite mk_amount64_eq by auto. auto.
+Qed.
+
+Theorem eval_shllimm:
+  forall n, unary_constructor_sound (fun a => shllimm a n)
+                                    (fun x => Val.shll x (Vint n)).
+Proof.
+  red; intros until x; unfold shllimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; [| destruct (Int.ltu n Int64.iwordsize') eqn:L]; simpl.
+- intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int64.shl'_zero; auto.
+- destruct (shllimm_match a); intros; InvEval; subst.
++ TrivialExists. simpl; rewrite L; auto.
++ destruct (Int.ltu (Int.add a n) Int64.iwordsize') eqn:L2.
+* econstructor; split. eapply eval_shllimm_base; eauto.
+  destruct v1; simpl; auto. rewrite a64_range; simpl. rewrite L, L2. 
+  rewrite Int64.shl'_shl'; auto using a64_range.
+* econstructor; split; [|eauto]. apply eval_shllimm_base; auto. EvalOp.
++ TrivialExists. simpl. rewrite mk_amount64_eq; auto.
++ TrivialExists. simpl. rewrite mk_amount64_eq; auto.
++ destruct (Int.ltu (Int.add a n) Int64.iwordsize') eqn:L2.
+* TrivialExists. simpl. rewrite mk_amount64_eq by auto. 
+  destruct (Val.zero_ext_l s v1); simpl; auto.
+  rewrite a64_range; simpl; rewrite L, L2.
+  rewrite Int64.shl'_shl'; auto using a64_range.
+* econstructor; split. eapply eval_shllimm_base; eauto. EvalOp; simpl; eauto. auto.
++ destruct (Int.ltu (Int.add a n) Int64.iwordsize') eqn:L2.
+* TrivialExists. simpl. rewrite mk_amount64_eq by auto. 
+  destruct (Val.sign_ext_l s v1); simpl; auto.
+  rewrite a64_range; simpl; rewrite L, L2.
+  rewrite Int64.shl'_shl'; auto using a64_range.
+* econstructor; split. eapply eval_shllimm_base; eauto. EvalOp; simpl; eauto. auto.
++ destruct (Int.ltu (Int.add a n) Int64.iwordsize') eqn:L2.
+* TrivialExists. simpl. unfold eval_extend. rewrite mk_amount64_eq by auto. 
+  destruct (match x0 with Xsgn32 => Val.longofint v1 | Xuns32 => Val.longofintu v1 end); simpl; auto.
+  rewrite a64_range; simpl; rewrite L, L2.
+  rewrite Int64.shl'_shl'; auto using a64_range.
+* econstructor; split. eapply eval_shllimm_base; eauto. EvalOp; simpl; eauto. auto.
++ econstructor; eauto using eval_shllimm_base.
+- intros; TrivialExists.
+Qed.
+
+Remark eval_shrluimm_base: forall le a n x,
+  eval_expr ge sp e m le a x ->
+  Int.ltu n Int64.iwordsize' = true ->
+  eval_expr ge sp e m le (shrluimm_base a n) (Val.shrlu x (Vint n)).
+Proof.
+  unfold shrluimm_base; intros; EvalOp. simpl. rewrite mk_amount64_eq by auto. auto.
+Qed.
+
+Remark sub_shift_amount:
+  forall y z,
+  Int.ltu y Int64.iwordsize' = true -> Int.ltu z Int64.iwordsize' = true -> Int.unsigned y <= Int.unsigned z ->
+  Int.ltu (Int.sub z y) Int64.iwordsize' = true.
+Proof.
+  intros. unfold Int.ltu; apply zlt_true.
+  apply Int.ltu_inv in H. apply Int.ltu_inv in H0.
+  change (Int.unsigned Int64.iwordsize') with Int64.zwordsize in *.
+  unfold Int.sub; rewrite Int.unsigned_repr. omega.
+  assert (Int64.zwordsize < Int.max_unsigned) by reflexivity. omega.
+Qed.
+
+Theorem eval_shrluimm:
+  forall n, unary_constructor_sound (fun a => shrluimm a n)
+                                    (fun x => Val.shrlu x (Vint n)).
+Proof.
+Local Opaque Int64.zwordsize.
+  red; intros until x; unfold shrluimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; [| destruct (Int.ltu n Int64.iwordsize') eqn:L]; simpl.
+- intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int64.shru'_zero; auto.
+- destruct (shrluimm_match a); intros; InvEval; subst.
++ TrivialExists. simpl; rewrite L; auto.
++ destruct (Int.ltu n a) eqn:L2.
+* assert (L3: Int.ltu (Int.sub a n) Int64.iwordsize' = true).
+  { apply sub_shift_amount; auto using a64_range.
+    apply Int.ltu_inv in L2. omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
+  simpl. rewrite L. rewrite Int64.shru'_shl', L2 by auto using a64_range. auto.
+* assert (L3: Int.ltu (Int.sub n a) Int64.iwordsize' = true).
+  { apply sub_shift_amount; auto using a64_range.
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
+  simpl. rewrite L. rewrite Int64.shru'_shl', L2 by auto using a64_range. auto.
++ destruct (Int.ltu (Int.add a n) Int64.iwordsize') eqn:L2.
+* econstructor; split. eapply eval_shrluimm_base; eauto.
+  destruct v1; simpl; auto. rewrite a64_range; simpl. rewrite L, L2. 
+  rewrite Int64.shru'_shru'; auto using a64_range.
+* econstructor; split; [|eauto]. apply eval_shrluimm_base; auto. EvalOp.
++ destruct (zlt (Int.unsigned n) s).
+* econstructor; split. EvalOp. rewrite mk_amount64_eq by auto.
+  destruct v1; simpl; auto. rewrite ! L; simpl.
+  set (s' := s - Int.unsigned n).
+  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
+  rewrite Int64.shru'_zero_ext. auto. unfold s'; omega.
+* econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite ! L; simpl.
+  rewrite Int64.shru'_zero_ext_0 by omega. auto. 
++ econstructor; eauto using eval_shrluimm_base.
+- intros; TrivialExists.
+Qed.
+
+Remark eval_shrlimm_base: forall le a n x,
+  eval_expr ge sp e m le a x ->
+  Int.ltu n Int64.iwordsize' = true ->
+  eval_expr ge sp e m le (shrlimm_base a n) (Val.shrl x (Vint n)).
+Proof.
+  unfold shrlimm_base; intros; EvalOp. simpl. rewrite mk_amount64_eq by auto. auto.
+Qed.
+
+Theorem eval_shrlimm:
+  forall n, unary_constructor_sound (fun a => shrlimm a n)
+                                    (fun x => Val.shrl x (Vint n)).
+Proof.
+  red; intros until x; unfold shrlimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; [| destruct (Int.ltu n Int64.iwordsize') eqn:L]; simpl.
+- intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int64.shr'_zero; auto.
+- destruct (shrlimm_match a); intros; InvEval; subst.
++ TrivialExists. simpl; rewrite L; auto.
++ destruct (Int.ltu n a) eqn:L2.
+* assert (L3: Int.ltu (Int.sub a n) Int64.iwordsize' = true).
+  { apply sub_shift_amount; auto using a64_range.
+    apply Int.ltu_inv in L2. omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
+  simpl. rewrite L. rewrite Int64.shr'_shl', L2 by auto using a64_range. auto.
+* assert (L3: Int.ltu (Int.sub n a) Int64.iwordsize' = true).
+  { apply sub_shift_amount; auto using a64_range.
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
+  simpl. rewrite L. rewrite Int64.shr'_shl', L2 by auto using a64_range. auto.
++ destruct (Int.ltu (Int.add a n) Int64.iwordsize') eqn:L2.
+* econstructor; split. eapply eval_shrlimm_base; eauto.
+  destruct v1; simpl; auto. rewrite a64_range; simpl. rewrite L, L2. 
+  rewrite Int64.shr'_shr'; auto using a64_range.
+* econstructor; split; [|eauto]. apply eval_shrlimm_base; auto. EvalOp.
++ destruct (zlt (Int.unsigned n) s && zlt s Int64.zwordsize) eqn:E.
+* InvBooleans. econstructor; split. EvalOp. rewrite mk_amount64_eq by auto.
+  destruct v1; simpl; auto. rewrite ! L; simpl.
+  set (s' := s - Int.unsigned n).
+  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
+  rewrite Int64.shr'_sign_ext. auto. unfold s'; omega. unfold s'; omega.
+* econstructor; split; [|eauto]. apply eval_shrlimm_base; auto. EvalOp.
++ econstructor; eauto using eval_shrlimm_base.
+- intros; TrivialExists.
+Qed.
+
+(** ** Multiplication *)
+
+Lemma eval_mullimm_base:
+  forall n, unary_constructor_sound (mullimm_base n) (fun x => Val.mull x (Vlong n)).
+Proof.
+  intros; red; intros; unfold mullimm_base.
+  assert (DFL: exists v, eval_expr ge sp e m le (Eop Omull (Eop (Olongconst n) Enil ::: a ::: Enil)) v /\ Val.lessdef (Val.mull x (Vlong n)) v).
+  { rewrite Val.mull_commut; TrivialExists. } 
+  generalize (Int64.one_bits'_decomp n); generalize (Int64.one_bits'_range n);
+  destruct (Int64.one_bits' n) as [ | i [ | j []]]; intros P Q.
+- apply DFL.
+- replace (Val.mull x (Vlong n)) with (Val.shll x (Vint i)).
+  apply eval_shllimm; auto.
+  simpl in Q. destruct x; auto; simpl. rewrite P by auto with coqlib.
+  rewrite Q, Int64.add_zero, Int64.shl'_mul. auto.
+- exploit (eval_shllimm i (x :: le) (Eletvar 0) x). constructor; auto. intros [v1 [A1 B1]].
+  exploit (eval_shllimm j (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]].
+  exploit (eval_addl (x :: le)). eexact A1. eexact A2. intros [v [A B]].
+  exists v; split. econstructor; eauto.
+  simpl in Q. rewrite Q, Int64.add_zero. eapply Val.lessdef_trans; [|eexact B].
+  eapply Val.lessdef_trans; [|eapply Val.addl_lessdef; eauto].
+  destruct x; simpl; auto; rewrite ! P by auto with coqlib.
+  rewrite Int64.mul_add_distr_r, <- ! Int64.shl'_mul. auto.
+- apply DFL.
+Qed.
+
+Theorem eval_mullimm:
+  forall n, unary_constructor_sound (mullimm n) (fun x => Val.mull x (Vlong n)).
+Proof.
+  intros; red; intros until x; unfold mullimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  intros. exists (Vlong Int64.zero); split. EvalOp.
+  destruct x; simpl; auto. subst n. rewrite Int64.mul_zero. auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.one.
+  intros. exists x; split; auto.
+  destruct x; simpl; auto. subst n. rewrite Int64.mul_one. auto.
+  case (mullimm_match a); intros; InvEval; subst.
+- TrivialExists. simpl. rewrite Int64.mul_commut; auto.
+- rewrite Val.mull_addl_distr_l.
+  exploit eval_mullimm_base; eauto. instantiate (1 := n). intros [v' [A1 B1]].
+  exploit (eval_addlimm (Int64.mul n n2) le (mullimm_base n t2) v'). auto. intros [v'' [A2 B2]].
+  exists v''; split; auto. eapply Val.lessdef_trans. eapply Val.addl_lessdef; eauto.
+  rewrite Val.mull_commut; auto.
+- apply eval_mullimm_base; auto.
+Qed.
+
+Theorem eval_mull: binary_constructor_sound mull Val.mull.
+Proof.
+  red; intros until y; unfold mull; case (mull_match a b); intros; InvEval; subst.
+- rewrite Val.mull_commut. apply eval_mullimm; auto.
+- apply eval_mullimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_mullhu: 
+  forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
+Proof.
+  unfold mullhu; red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mullhs: 
+  forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
+Proof.
+  unfold mullhs; red; intros; TrivialExists.
+Qed.
+
+(** Integer conversions *)
+
+Theorem eval_zero_ext_l:
+  forall sz, 0 <= sz -> unary_constructor_sound (zero_ext_l sz) (Val.zero_ext_l sz).
+Proof.
+  intros; red; intros until x; unfold zero_ext_l; case (zero_ext_l_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- destruct (zlt (Int.unsigned a0) sz).
++ econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a64_range; simpl.
+  apply Val.lessdef_same. f_equal. rewrite Int64.shl'_zero_ext by omega. f_equal. omega.
++ TrivialExists.
+- TrivialExists.
+Qed.
+
+(** Bitwise not, and, or, xor *)
+
+Theorem eval_notl: unary_constructor_sound notl Val.notl.
+Proof.
+  assert (INV: forall v, Val.lessdef (Val.notl (Val.notl v)) v).
+  { destruct v; auto. simpl; rewrite Int64.not_involutive; auto. } 
+  unfold notl; red; intros until x; case (notl_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- exists v1; auto.
+- exists (eval_shiftl s v1 a0); split; auto. EvalOp.
+- econstructor; split. EvalOp. 
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  rewrite Int64.not_and_or_not, Int64.not_involutive, Int64.or_commut. auto.
+- econstructor; split. EvalOp. 
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  rewrite Int64.not_or_and_not, Int64.not_involutive, Int64.and_commut. auto.
+- econstructor; split. EvalOp.
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  unfold Int64.not; rewrite ! Int64.xor_assoc. auto.
+- econstructor; split. EvalOp.
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  unfold Int64.not; rewrite ! Int64.xor_assoc, Int64.xor_idem, Int64.xor_zero. auto.
+- TrivialExists.
+Qed.
+
+Lemma eval_andlimm_base:
+  forall n, unary_constructor_sound (andlimm_base n) (fun x => Val.andl x (Vlong n)).
+Proof.
+  intros; red; intros. unfold andlimm_base.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists (Vlong Int64.zero); split. EvalOp.
+  destruct x; simpl; auto. subst n. rewrite Int64.and_zero. auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  exists x; split; auto.
+  subst. destruct x; simpl; auto. rewrite Int64.and_mone; auto.
+  destruct (Z_is_power2m1 (Int64.unsigned n)) as [s|] eqn:P.
+  assert (0 <= s) by (eapply Z_is_power2m1_nonneg; eauto).
+  rewrite <- (Int64.repr_unsigned n), (Z_is_power2m1_sound _ _ P), <- Val.zero_ext_andl by auto.
+  apply eval_zero_ext_l; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_andlimm:
+  forall n, unary_constructor_sound (andlimm n) (fun x => Val.andl x (Vlong n)).
+Proof.
+  intros; red; intros until x. unfold andlimm.
+  case (andlimm_match a); intros; InvEval; subst.
+- rewrite Int64.and_commut; TrivialExists.
+- rewrite Val.andl_assoc, Int64.and_commut. apply eval_andlimm_base; auto.
+- destruct (zle 0 s).
++ replace (Val.zero_ext_l s v1) with (Val.andl v1 (Vlong (Int64.repr (two_p s - 1)))).
+  rewrite Val.andl_assoc, Int64.and_commut.
+  apply eval_andlimm_base; auto.
+  destruct v1; simpl; auto. rewrite Int64.zero_ext_and by auto. auto. 
++ apply eval_andlimm_base. EvalOp.
+- apply eval_andlimm_base; auto.
+Qed.
+
+Theorem eval_andl: binary_constructor_sound andl Val.andl.
+Proof.
+  red; intros until y; unfold andl; case (andl_match a b); intros; InvEval; subst.
+- rewrite Val.andl_commut; apply eval_andlimm; auto.
+- apply eval_andlimm; auto.
+- rewrite Val.andl_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.andl_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.andl_commut; TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_orlimm:
+  forall n, unary_constructor_sound (orlimm n) (fun x => Val.orl x (Vlong n)).
+Proof.
+  intros; red; intros until x. unfold orlimm.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  intros. subst. exists x; split; auto.
+  destruct x; simpl; auto. rewrite Int64.or_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  intros. exists (Vlong Int64.mone); split. EvalOp.
+  destruct x; simpl; auto. subst n. rewrite Int64.or_mone. auto.
+  destruct (orlimm_match a); intros; InvEval; subst.
+- rewrite Int64.or_commut; TrivialExists.
+- rewrite Val.orl_assoc, Int64.or_commut; TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_orl: binary_constructor_sound orl Val.orl.
+Proof.
+  red; intros until y; unfold orl; case (orl_match a b); intros; InvEval; subst.
+- rewrite Val.orl_commut. apply eval_orlimm; auto.
+- apply eval_orlimm; auto.
+- rewrite Val.orl_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.orl_commut; TrivialExists.
+- TrivialExists.
+- (* shl - shru *)
+  destruct (Int.eq (Int.add a1 a2) Int64.iwordsize' && same_expr_pure t1 t2) eqn:?.
++ InvBooleans. apply Int.same_if_eq in H.
+  exploit eval_same_expr; eauto. intros [EQ1 EQ2]. subst.
+  econstructor; split. EvalOp.
+  destruct v0; simpl; auto. rewrite ! a64_range. simpl. rewrite <- Int64.or_ror'; auto using a64_range.
++ TrivialExists.
+- (* shru - shl *)
+  destruct (Int.eq (Int.add a2 a1) Int64.iwordsize' && same_expr_pure t1 t2) eqn:?.
++ InvBooleans. apply Int.same_if_eq in H.
+  exploit eval_same_expr; eauto. intros [EQ1 EQ2]. subst.
+  econstructor; split. EvalOp.
+  destruct v0; simpl; auto. rewrite ! a64_range. simpl.
+  rewrite Int64.or_commut, <- Int64.or_ror'; auto using a64_range.
++ TrivialExists.
+- rewrite Val.orl_commut; TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Lemma eval_xorlimm_base:
+  forall n, unary_constructor_sound (xorlimm_base n) (fun x => Val.xorl x (Vlong n)).
+Proof.
+  intros; red; intros. unfold xorlimm_base.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  intros. exists x; split. auto.
+  destruct x; simpl; auto. subst n. rewrite Int64.xor_zero. auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  subst n. change (Val.xorl x (Vlong Int64.mone)) with (Val.notl x). apply eval_notl; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_xorlimm:
+  forall n, unary_constructor_sound (xorlimm n) (fun x => Val.xorl x (Vlong n)).
+Proof.
+  intros; red; intros until x. unfold xorlimm.
+  destruct (xorlimm_match a); intros; InvEval; subst.
+- rewrite Int64.xor_commut; TrivialExists.
+- rewrite Val.xorl_assoc; simpl. rewrite (Int64.xor_commut n2). apply eval_xorlimm_base; auto.
+- apply eval_xorlimm_base; auto.
+Qed.
+
+Theorem eval_xorl: binary_constructor_sound xorl Val.xorl.
+Proof.
+  red; intros until y; unfold xorl; case (xorl_match a b); intros; InvEval; subst.
+- rewrite Val.xorl_commut; apply eval_xorlimm; auto.
+- apply eval_xorlimm; auto.
+- rewrite Val.xorl_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.xorl_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.xorl_commut; TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+(** ** Integer division and modulus *)
+
+Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
+Proof.
+  red; intros; unfold divls_base; TrivialExists.
+Qed.
+
+Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
+Proof.
+  red; intros; unfold modls_base, modl_aux.
+  exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
+  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+Qed.
+
+Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
+Proof.
+  red; intros; unfold divlu_base; TrivialExists.
+Qed.
+
+Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
+Proof.
+  red; intros; unfold modlu_base, modl_aux.
+  exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
+  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+Qed.
+
+Theorem eval_shrxlimm:
+  forall le a n x z,
+  eval_expr ge sp e m le a x ->
+  Val.shrxl x (Vint n) = Some z ->
+  exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v.
+Proof.
+  intros; unfold shrxlimm. 
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. exists x; split; auto.
+  destruct x; simpl in H0; try discriminate.
+  change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
+  rewrite Int64.shrx'_zero. auto.
+- TrivialExists.
+Qed.
+
+(** General shifts *)
+
+Theorem eval_shll: binary_constructor_sound shll Val.shll.
+Proof.
+  red; intros until y; unfold shll; case (shll_match b); intros.
+  InvEval. apply eval_shllimm; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_shrl: binary_constructor_sound shrl Val.shrl.
+Proof.
+  red; intros until y; unfold shrl; case (shrl_match b); intros.
+  InvEval. apply eval_shrlimm; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_shrlu: binary_constructor_sound shrlu Val.shrlu.
+Proof.
+  red; intros until y; unfold shrlu; case (shrlu_match b); intros.
+  InvEval. apply eval_shrluimm; auto.
+  TrivialExists.
+Qed.
+
+(** Comparisons *)
+
+Remark option_map_of_bool_inv: forall ov w,
+  option_map Val.of_bool ov = Some w -> Val.of_optbool ov = w.
+Proof.
+  intros. destruct ov; inv H; auto.
+Qed.
+
+Section COMP_IMM.
+
+Variable default: comparison -> int64 -> condition.
+Variable intsem: comparison -> int64 -> int64 -> bool.
+Variable sem: comparison -> val -> val -> option val.
+
+Hypothesis sem_int: forall c x y, 
+  sem c (Vlong x) (Vlong y) = Some (Val.of_bool (intsem c x y)).
+Hypothesis sem_undef: forall c v,
+  sem c Vundef v = None.
+Hypothesis sem_eq: forall x y,
+  sem Ceq (Vlong x) (Vlong y) = Some (Val.of_bool (Int64.eq x y)).
+Hypothesis sem_ne: forall x y,
+  sem Cne (Vlong x) (Vlong y) = Some (Val.of_bool (negb (Int64.eq x y))).
+Hypothesis sem_default: forall c v n,
+  sem c v (Vlong n) = option_map Val.of_bool (eval_condition (default c n) (v :: nil) m).
+
+Lemma eval_complimm_default: forall le a x c n2 v,
+  sem c x (Vlong n2) = Some v ->
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le (Eop (Ocmp (default c n2)) (a:::Enil)) v.
+Proof.
+  intros. EvalOp. simpl. rewrite sem_default in H. apply option_map_of_bool_inv in H.
+  congruence.
+Qed.
+
+Lemma eval_complimm:
+  forall le c a n2 x v,
+  eval_expr ge sp e m le a x ->
+  sem c x (Vlong n2) = Some v ->
+  eval_expr ge sp e m le (complimm default intsem c a n2) v.
+Proof.
+  intros until x; unfold complimm; case (complimm_match c a); intros; InvEval; subst.
+- (* constant *)
+  rewrite sem_int in H0; inv H0. EvalOp. destruct (intsem c0 n1 n2); auto.
+- (* mask zero *)
+  predSpec Int64.eq Int64.eq_spec n2 Int64.zero.
++ subst n2. destruct v1; simpl in H0; rewrite ? sem_undef, ? sem_eq in H0; inv H0.
+  EvalOp.
++ eapply eval_complimm_default; eauto. EvalOp. 
+- (* mask not zero *)
+  predSpec Int64.eq Int64.eq_spec n2 Int64.zero.
++ subst n2. destruct v1; simpl in H0; rewrite ? sem_undef, ? sem_ne in H0; inv H0.
+  EvalOp.
++ eapply eval_complimm_default; eauto. EvalOp.
+- (* default *)
+  eapply eval_complimm_default; eauto.
+Qed.
+
+Hypothesis sem_swap:
+  forall c x y, sem (swap_comparison c) x y = sem c y x.
+
+Lemma eval_complimm_swap:
+  forall le c a n2 x v,
+  eval_expr ge sp e m le a x ->
+  sem c (Vlong n2) x = Some v ->
+  eval_expr ge sp e m le (complimm default intsem (swap_comparison c) a n2) v.
+Proof.
+  intros. eapply eval_complimm; eauto. rewrite sem_swap; auto.
+Qed.
+
+End COMP_IMM.
+
+Theorem eval_cmpl:
+  forall c le a x b y v,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.cmpl c x y = Some v ->
+  eval_expr ge sp e m le (cmpl c a b) v.
+Proof.
+  intros until y; unfold cmpl; case (cmpl_match a b); intros; InvEval; subst.
+- apply eval_complimm_swap with (sem := Val.cmpl) (x := y); auto.
+  intros; unfold Val.cmpl; rewrite Val.swap_cmpl_bool; auto. 
+- apply eval_complimm with (sem := Val.cmpl) (x := x); auto.
+- EvalOp. simpl. rewrite Val.swap_cmpl_bool. apply option_map_of_bool_inv in H1. congruence.
+- EvalOp. simpl. apply option_map_of_bool_inv in H1. congruence.
+- EvalOp. simpl. apply option_map_of_bool_inv in H1. congruence.
+Qed.
+
+Theorem eval_cmplu:
+  forall c le a x b y v,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.cmplu (Mem.valid_pointer m) c x y = Some v ->
+  eval_expr ge sp e m le (cmplu c a b) v.
+Proof.
+  intros until y; unfold cmplu; case (cmplu_match a b); intros; InvEval; subst.
+- apply eval_complimm_swap with (sem := Val.cmplu (Mem.valid_pointer m)) (x := y); auto.
+  intros; unfold Val.cmplu; rewrite Val.swap_cmplu_bool; auto. 
+- apply eval_complimm with (sem := Val.cmplu (Mem.valid_pointer m)) (x := x); auto.
+- EvalOp. simpl. rewrite Val.swap_cmplu_bool. apply option_map_of_bool_inv in H1. congruence.
+- EvalOp. simpl. apply option_map_of_bool_inv in H1. congruence.
+- EvalOp. simpl. apply option_map_of_bool_inv in H1. congruence.
+Qed.
+
+
+(** Floating-point conversions *)
+
+Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+End CMCONSTR.
diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp
new file mode 100644
index 00000000..f9e5a1c4
--- /dev/null
+++ b/aarch64/SelectOp.vp
@@ -0,0 +1,573 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Instruction selection for operators *)
+
+Require Import Coqlib Zbits.
+Require Import Compopts AST Integers Floats Builtins.
+Require Import Op CminorSel.
+
+Local Open Scope cminorsel_scope.
+
+(** "ror" shifted operands are not supported by arithmetic operations *)
+
+Definition arith_shift (s: shift) :=
+  match s with Sror => false | _ => true end.
+
+(** ** Constants **)
+
+Definition addrsymbol (id: ident) (ofs: ptrofs) :=
+  Eop (Oaddrsymbol id ofs) Enil.
+
+Definition addrstack (ofs: ptrofs) :=
+  Eop (Oaddrstack ofs) Enil.
+
+(** ** Integer addition *)
+
+Nondetfunction addimm (n: int) (e: expr) :=
+  if Int.eq n Int.zero then e else
+  match e with
+  | Eop (Ointconst m) Enil => Eop (Ointconst (Int.add n m)) Enil
+  | Eop (Oaddimm m) (t ::: Enil) => Eop (Oaddimm(Int.add n m)) (t ::: Enil)
+  | _ => Eop (Oaddimm n) (e ::: Enil)
+  end.
+
+Nondetfunction add (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 => addimm n1 t2
+  | t1, Eop (Ointconst n2) Enil => addimm n2 t1
+  | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm (Int.add n1 n2) (Eop Oadd (t1:::t2:::Enil))
+  | Eop (Oaddimm n1) (t1:::Enil), t2 =>
+      addimm n1 (Eop Oadd (t1:::t2:::Enil))
+  | t1, Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm n2 (Eop Oadd (t1:::t2:::Enil))
+  | Eop (Oshift s a) (t1:::Enil), t2 ?? arith_shift s =>
+      Eop (Oaddshift s a) (t2 ::: t1 ::: Enil)
+  | t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Oaddshift s a) (t1 ::: t2 ::: Enil)
+  | Eop Omul (t1:::t2:::Enil), t3 =>
+      Eop Omuladd (t3:::t1:::t2:::Enil)
+  | t1, Eop Omul (t2:::t3:::Enil) =>
+      Eop Omuladd (t1:::t2:::t3:::Enil)
+  | _, _ => Eop Oadd (e1:::e2:::Enil)
+  end.
+
+(** ** Opposite *)
+
+Nondetfunction negint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.neg n)) Enil
+  | Eop (Oshift s a) (t1:::Enil) ?? arith_shift s => Eop (Onegshift s a) (t1:::Enil)
+  | _ =>  Eop Oneg (e ::: Enil)
+  end.
+
+(** ** Integer and pointer subtraction *)
+
+Nondetfunction sub (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | t1, Eop (Ointconst n2) Enil =>
+      addimm (Int.neg n2) t1
+  | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm (Int.sub n1 n2) (Eop Osub (t1:::t2:::Enil))
+  | Eop (Oaddimm n1) (t1:::Enil), t2 =>
+      addimm n1 (Eop Osub (t1:::t2:::Enil))
+  | t1, Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm (Int.neg n2) (Eop Osub (t1:::t2:::Enil))
+  | t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Osubshift s a) (t1:::t2::: Enil)
+  | t1, Eop Omul (t2:::t3:::Enil) =>
+      Eop Omulsub (t1:::t2:::t3:::Enil)
+  | _, _ => Eop Osub (e1:::e2:::Enil)
+  end.
+
+(** ** Immediate shift left *)
+
+Definition shlimm_base (e1: expr) (n: int) :=
+  Eop (Oshift Slsl (mk_amount32 n)) (e1 ::: Enil).
+
+Nondetfunction shlimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int.iwordsize) then
+    Eop Oshl (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Ointconst n1) Enil =>
+        Eop (Ointconst (Int.shl n1 n)) Enil
+    | Eop (Oshift Slsl a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int.iwordsize
+        then shlimm_base t1 (Int.add a n)
+        else shlimm_base e1 n
+    | Eop (Ozext s) (t1:::Enil) =>
+        Eop (Oshlzext s (mk_amount32 n)) (t1:::Enil)
+    | Eop (Osext s) (t1:::Enil) =>
+        Eop (Oshlsext s (mk_amount32 n)) (t1:::Enil)
+    | Eop (Oshlzext s a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int.iwordsize
+        then Eop (Oshlzext s (mk_amount32 (Int.add a n))) (t1:::Enil)
+        else shlimm_base e1 n
+    | Eop (Oshlsext s a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int.iwordsize
+        then Eop (Oshlsext s (mk_amount32 (Int.add a n))) (t1:::Enil)
+        else shlimm_base e1 n
+    | _ =>
+        shlimm_base e1 n
+  end.
+
+(** ** Immediate shift right (logical) *)
+
+Definition shruimm_base (e1: expr) (n: int) :=
+  Eop (Oshift Slsr (mk_amount32 n)) (e1 ::: Enil).
+
+Nondetfunction shruimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int.iwordsize) then
+    Eop Oshru (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Ointconst n1) Enil =>
+        Eop (Ointconst (Int.shru n1 n)) Enil
+    | Eop (Oshift Slsl a) (t1:::Enil) =>
+        if Int.ltu n a
+        then Eop (Oshlzext (Int.zwordsize - Int.unsigned a) (mk_amount32 (Int.sub a n))) (t1:::Enil)
+        else Eop (Ozextshr (mk_amount32 (Int.sub n a)) (Int.zwordsize - Int.unsigned n)) (t1:::Enil)
+    | Eop (Oshift Slsr a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int.iwordsize
+        then shruimm_base t1 (Int.add a n)
+        else shruimm_base e1 n
+    | Eop (Ozext s) (t1:::Enil) =>
+        if zlt (Int.unsigned n) s
+        then Eop (Ozextshr (mk_amount32 n) (s - Int.unsigned n)) (t1:::Enil)
+        else Eop (Ointconst Int.zero) Enil
+    | _ =>
+        shruimm_base e1 n
+    end.
+
+(** ** Immediate shift right (arithmetic) *)
+
+Definition shrimm_base (e1: expr) (n: int) :=
+  Eop (Oshift Sasr (mk_amount32 n)) (e1 ::: Enil).
+
+Nondetfunction shrimm (e1: expr) (n: int) :=
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int.iwordsize) then
+    Eop Oshr (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Ointconst n1) Enil =>
+        Eop (Ointconst (Int.shr n1 n)) Enil
+    | Eop (Oshift Slsl a) (t1:::Enil) =>
+        if Int.ltu n a
+        then Eop (Oshlsext (Int.zwordsize - Int.unsigned a) (mk_amount32 (Int.sub a n))) (t1:::Enil)
+        else Eop (Osextshr (mk_amount32 (Int.sub n a)) (Int.zwordsize - Int.unsigned n)) (t1:::Enil)
+    | Eop (Oshift Sasr a) (t1:::Enil) =>
+        if Int.ltu (Int.add a n) Int.iwordsize
+        then shrimm_base t1 (Int.add a n)
+        else shrimm_base e1 n
+    | Eop (Osext s) (t1:::Enil) =>
+        if zlt (Int.unsigned n) s && zlt s Int.zwordsize
+        then Eop (Osextshr (mk_amount32 n) (s - Int.unsigned n)) (t1:::Enil)
+        else shrimm_base e1 n
+    | _ =>
+        shrimm_base e1 n
+    end.
+
+(** ** Integer multiply *)
+
+Definition mulimm_base (n1: int) (e2: expr) :=
+  match Int.one_bits n1 with
+  | i :: nil =>
+      shlimm e2 i
+  | i :: j :: nil =>
+      Elet e2 (add (shlimm (Eletvar 0) i) (shlimm (Eletvar 0) j))
+  | _ =>
+      Eop Omul (Eop (Ointconst n1) Enil ::: e2 ::: Enil)
+  end.
+
+Nondetfunction mulimm (n1: int) (e2: expr) :=
+  if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil
+  else if Int.eq n1 Int.one then e2
+  else match e2 with
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.mul n1 n2)) Enil
+  | Eop (Oaddimm n2) (t2:::Enil) => addimm (Int.mul n1 n2) (mulimm_base n1 t2)
+  | _ => mulimm_base n1 e2
+  end.
+
+Nondetfunction mul (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 => mulimm n1 t2
+  | t1, Eop (Ointconst n2) Enil => mulimm n2 t1
+  | _, _ => Eop Omul (e1:::e2:::Enil)
+  end.
+
+Definition mulhs (e1: expr) (e2: expr) :=
+  Eop Olowlong
+    (Eop (Oshiftl Sasr (mk_amount64 (Int.repr 32)))
+       (Eop Omull (Eop (Oextend Xsgn32 (mk_amount64 Int.zero)) (e1 ::: Enil) :::
+                   Eop (Oextend Xsgn32 (mk_amount64 Int.zero)) (e2 ::: Enil) ::: Enil) ::: Enil)
+        ::: Enil).
+
+Definition mulhu (e1: expr) (e2: expr) :=
+  Eop Olowlong
+    (Eop (Oshiftl Slsr (mk_amount64 (Int.repr 32)))
+       (Eop Omull (Eop (Oextend Xuns32 (mk_amount64 Int.zero)) (e1 ::: Enil) :::
+                   Eop (Oextend Xuns32 (mk_amount64 Int.zero)) (e2 ::: Enil) ::: Enil) ::: Enil)
+        ::: Enil).
+
+(** ** Integer conversions *)
+
+Nondetfunction zero_ext (sz: Z) (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.zero_ext sz n)) Enil
+  | Eop (Oshift Slsr a) (t1:::Enil) => Eop (Ozextshr a sz) (t1:::Enil)
+  | Eop (Oshift Slsl a) (t1:::Enil) =>
+      if zlt (Int.unsigned a) sz
+      then Eop (Oshlzext (sz - Int.unsigned a) a) (t1:::Enil)
+      else Eop (Ozext sz) (e:::Enil)
+  | _ => Eop (Ozext sz) (e:::Enil)
+  end.
+
+Nondetfunction sign_ext (sz: Z) (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext sz n)) Enil
+  | Eop (Oshift Sasr a) (t1:::Enil) => Eop (Osextshr a sz) (t1:::Enil)
+  | Eop (Oshift Slsl a) (t1:::Enil) =>
+      if zlt (Int.unsigned a) sz
+      then Eop (Oshlsext (sz - Int.unsigned a) a) (t1:::Enil)
+      else Eop (Osext sz) (e:::Enil)
+  | _ => Eop (Osext sz) (e:::Enil)
+  end.
+
+Definition cast8unsigned (e: expr) := zero_ext 8 e.
+Definition cast8signed (e: expr) := sign_ext 8 e.
+Definition cast16unsigned (e: expr) := zero_ext 16 e.
+Definition cast16signed (e: expr) := sign_ext 16 e.
+
+(** ** Bitwise not *)
+
+Nondetfunction notint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.not n)) Enil
+  | Eop (Oshift s a) (t1:::Enil) => Eop (Onotshift s a) (t1:::Enil)
+  | Eop Onot (t1:::Enil) => t1
+  | Eop (Onotshift s a) (t1:::Enil) => Eop (Oshift s a) (t1:::Enil)
+  | Eop Obic (t1:::t2:::Enil) => Eop Oorn (t2:::t1:::Enil)
+  | Eop Oorn (t1:::t2:::Enil) => Eop Obic (t2:::t1:::Enil)
+  | Eop Oxor (t1:::t2:::Enil) => Eop Oeqv (t1:::t2:::Enil)
+  | Eop Oeqv (t1:::t2:::Enil) => Eop Oxor (t1:::t2:::Enil)
+  | _ => Eop Onot (e:::Enil)
+  end.
+
+(** ** Bitwise and *)
+
+Definition andimm_base (n1: int) (e2: expr) :=
+  if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else
+  if Int.eq n1 Int.mone then e2 else
+  match Z_is_power2m1 (Int.unsigned n1) with
+  | Some s => zero_ext s e2
+  | None => Eop (Oandimm n1) (e2 ::: Enil)
+  end.
+
+Nondetfunction andimm (n1: int) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil
+  | Eop (Oandimm n2) (t2:::Enil) => andimm_base (Int.and n1 n2) t2
+  | Eop (Ozext s) (t2:::Enil) =>
+      if zle 0 s
+      then andimm_base (Int.and n1 (Int.repr (two_p s - 1))) t2
+      else andimm_base n1 e2
+  | _ => andimm_base n1 e2
+  end.
+
+Nondetfunction and (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 => andimm n1 t2
+  | t1, Eop (Ointconst n2) Enil => andimm n2 t1
+  | Eop Onot (t1:::Enil), t2 => Eop Obic (t2:::t1:::Enil)
+  | t1, Eop Onot (t2:::Enil) => Eop Obic (t1:::t2:::Enil)
+  | Eop (Onotshift s a) (t1:::Enil), t2 => Eop (Obicshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Onotshift s a) (t2:::Enil) => Eop (Obicshift s a) (t1:::t2:::Enil)
+  | Eop (Oshift s a) (t1:::Enil), t2 => Eop (Oandshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Oshift s a) (t2:::Enil) => Eop (Oandshift s a) (t1:::t2:::Enil)
+  | _, _ => Eop Oand (e1:::e2:::Enil)
+  end.
+
+(** ** Bitwise or *)
+
+Nondetfunction orimm (n1: int) (e2: expr) :=
+  if Int.eq n1 Int.zero then e2
+  else if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil
+  else match e2 with
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil
+  | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
+  | _ => Eop (Oorimm n1) (e2:::Enil)
+  end.
+
+Definition same_expr_pure (e1 e2: expr) :=
+  match e1, e2 with
+  | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
+  | _, _ => false
+  end.
+
+Nondetfunction or (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 => orimm n1 t2
+  | t1, Eop (Ointconst n2) Enil => orimm n2 t1
+  | Eop Onot (t1:::Enil), t2 => Eop Oorn (t2:::t1:::Enil)
+  | t1, Eop Onot (t2:::Enil) => Eop Oorn (t1:::t2:::Enil)
+  | Eop (Onotshift s a) (t1:::Enil), t2 => Eop (Oornshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Onotshift s a) (t2:::Enil) => Eop (Oornshift s a) (t1:::t2:::Enil)
+  | Eop (Oshift Slsl a1) (t1:::Enil), Eop (Oshift Slsr a2) (t2:::Enil) =>
+      if Int.eq (Int.add a1 a2) Int.iwordsize && same_expr_pure t1 t2
+      then Eop (Oshift Sror a2) (t2:::Enil)
+      else Eop (Oorshift Slsr a2) (Eop (Oshift Slsl a1) (t1:::Enil):::t2:::Enil)
+  | Eop (Oshift Slsr a1) (t1:::Enil), Eop (Oshift Slsl a2) (t2:::Enil) =>
+      if Int.eq (Int.add a2 a1) Int.iwordsize && same_expr_pure t1 t2
+      then Eop (Oshift Sror a1) (t1:::Enil)
+      else Eop (Oorshift Slsl a2) (Eop (Oshift Slsr a1) (t1:::Enil):::t2:::Enil)
+  | Eop (Oshift s a) (t1:::Enil), t2 => Eop (Oorshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Oshift s a) (t2:::Enil) => Eop (Oorshift s a) (t1:::t2:::Enil)
+  | _, _ => Eop Oor (e1:::e2:::Enil)
+  end.
+
+(** ** Bitwise xor *)
+
+Definition xorimm_base (n1: int) (e2: expr) :=
+  if Int.eq n1 Int.zero then e2 else
+  if Int.eq n1 Int.mone then notint e2 else
+  Eop (Oxorimm n1) (e2:::Enil).  
+
+Nondetfunction xorimm (n1: int) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil
+  | Eop (Oxorimm n2) (t2:::Enil) => xorimm_base (Int.xor n1 n2) t2
+  | _ => xorimm_base n1 e2
+  end.
+
+Nondetfunction xor (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 => xorimm n1 t2
+  | t1, Eop (Ointconst n2) Enil => xorimm n2 t1
+  | Eop Onot (t1:::Enil), t2 => Eop Oeqv (t2:::t1:::Enil)
+  | t1, Eop Onot (t2:::Enil) => Eop Oeqv (t1:::t2:::Enil)
+  | Eop (Onotshift s a) (t1:::Enil), t2 => Eop (Oeqvshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Onotshift s a) (t2:::Enil) => Eop (Oeqvshift s a) (t1:::t2:::Enil)
+  | Eop (Oshift s a) (t1:::Enil), t2 => Eop (Oxorshift s a) (t2:::t1:::Enil)
+  | t1, Eop (Oshift s a) (t2:::Enil) => Eop (Oxorshift s a) (t1:::t2:::Enil)
+  | _, _ => Eop Oxor (e1:::e2:::Enil)
+  end.
+
+(** ** Integer division and modulus *)
+
+Definition mod_aux (divop: operation) (e1 e2: expr) :=
+  Elet e1
+    (Elet (lift e2)
+      (Eop Omulsub (Eletvar 1 :::
+                    Eop divop (Eletvar 1 ::: Eletvar 0 ::: Enil) :::
+                    Eletvar 0 :::
+                    Enil))).
+
+Definition divs_base (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil).
+Definition mods_base := mod_aux Odiv.
+Definition divu_base (e1: expr) (e2: expr) := Eop Odivu (e1:::e2:::Enil).
+Definition modu_base := mod_aux Odivu.
+
+Definition shrximm (e1: expr) (n2: int) :=
+  if Int.eq n2 Int.zero then e1 else Eop (Oshrximm n2) (e1:::Enil).
+
+(** ** General shifts *)
+
+Nondetfunction shl (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => shlimm e1 n2
+  | _ => Eop Oshl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction shr (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => shrimm e1 n2
+  | _ => Eop Oshr (e1:::e2:::Enil)
+  end.
+
+Nondetfunction shru (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ointconst n2) Enil => shruimm e1 n2
+  | _ => Eop Oshru (e1:::e2:::Enil)
+  end.
+
+(** ** Floating-point arithmetic *)
+
+Definition negf (e: expr) :=  Eop Onegf (e ::: Enil).
+Definition absf (e: expr) :=  Eop Oabsf (e ::: Enil).
+Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
+Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
+Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
+
+Definition negfs (e: expr) :=  Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) :=  Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) :=  Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) :=  Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) :=  Eop Omulfs (e1 ::: e2 ::: Enil).
+
+(** ** Comparisons *)
+
+Nondetfunction compimm (default: comparison -> int -> condition)
+                       (sem: comparison -> int -> int -> bool)
+                       (c: comparison) (e1: expr) (n2: int) :=
+  match c, e1 with
+  | c, Eop (Ointconst n1) Enil =>
+      Eop (Ointconst (if sem c n1 n2 then Int.one else Int.zero)) Enil
+  | Ceq, Eop (Ocmp c) el =>
+      if Int.eq_dec n2 Int.zero then
+        Eop (Ocmp (negate_condition c)) el
+      else if Int.eq_dec n2 Int.one then
+        Eop (Ocmp c) el
+      else
+        Eop (Ointconst Int.zero) Enil
+  | Cne, Eop (Ocmp c) el =>
+      if Int.eq_dec n2 Int.zero then
+        Eop (Ocmp c) el
+      else if Int.eq_dec n2 Int.one then
+        Eop (Ocmp (negate_condition c)) el
+      else
+        Eop (Ointconst Int.one) Enil
+  | Ceq, Eop (Oandimm m) (t1:::Enil) =>
+      if Int.eq n2 Int.zero
+      then Eop (Ocmp (Cmaskzero m)) (t1:::Enil)
+      else Eop (Ocmp (default c n2)) (e1:::Enil)
+  | Cne, Eop (Oandimm m) (t1:::Enil) =>
+      if Int.eq n2 Int.zero
+      then Eop (Ocmp (Cmasknotzero m)) (t1:::Enil)
+      else Eop (Ocmp (default c n2)) (e1:::Enil)
+  | _, _ =>
+      Eop (Ocmp (default c n2)) (e1:::Enil)
+  end.
+
+Nondetfunction comp (c: comparison) (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 =>
+      compimm Ccompimm Int.cmp (swap_comparison c) t2 n1
+  | t1, Eop (Ointconst n2) Enil =>
+      compimm Ccompimm Int.cmp c t1 n2
+  | Eop (Oshift s a) (t1:::Enil), t2 ?? arith_shift s =>
+      Eop (Ocmp (Ccompshift (swap_comparison c) s a)) (t2:::t1:::Enil)
+  | t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Ocmp (Ccompshift c s a)) (t1:::t2:::Enil)
+  | _, _ =>
+      Eop (Ocmp (Ccomp c)) (e1:::e2:::Enil)
+  end.
+
+Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
+  match e1, e2 with
+  | Eop (Ointconst n1) Enil, t2 =>
+      compimm Ccompuimm Int.cmpu (swap_comparison c) t2 n1
+  | t1, Eop (Ointconst n2) Enil =>
+      compimm Ccompuimm Int.cmpu c t1 n2
+  | Eop (Oshift s a) (t1:::Enil), t2 ?? arith_shift s =>
+      Eop (Ocmp (Ccompushift (swap_comparison c) s a)) (t2:::t1:::Enil)
+  | t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s =>
+      Eop (Ocmp (Ccompushift c s a)) (t1:::t2:::Enil)
+  | _, _ =>
+      Eop (Ocmp (Ccompu c)) (e1:::e2:::Enil)
+  end.
+
+Definition compf (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompf c)) (e1:::e2:::Enil).
+
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompfs c)) (e1:::e2:::Enil).
+
+(** ** Floating-point conversions *)
+
+Definition intoffloat  (e: expr) := Eop Ointoffloat (e ::: Enil).
+Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil).
+
+Nondetfunction floatofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
+  | _ => Eop Ofloatofintu (e ::: Enil)
+  end.
+
+Nondetfunction floatofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil
+  | _ => Eop Ofloatofint (e ::: Enil)
+  end.
+
+Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+Definition intuofsingle (e: expr) := Eop Ointuofsingle (e ::: Enil).
+
+Nondetfunction singleofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_int n)) Enil
+  | _ => Eop Osingleofint (e ::: Enil)
+  end.
+
+Nondetfunction singleofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_intu n)) Enil
+  | _ => Eop Osingleofintu (e ::: Enil)
+  end.
+
+Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+
+(** ** Selection *)
+
+Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) :=
+  if match ty with
+     | Tint => true
+     | Tlong => true
+     | Tfloat => true
+     | Tsingle => true
+     | _ => false
+     end
+  then Some (Eop (Osel cond ty) (e1 ::: e2 ::: args))
+  else None.
+
+(** ** Recognition of addressing modes for load and store operations *)
+
+Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
+  match e with
+  | Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
+  | Eop (Oaddrsymbol id ofs) Enil => (Aglobal id ofs, Enil)
+  | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed n, e1:::Enil)
+  | Eop (Oaddlshift Slsl a) (e1:::e2:::Enil) => (Aindexed2shift a, e1:::e2:::Enil)
+  | Eop (Oaddlext x a) (e1:::e2:::Enil) => (Aindexed2ext x a, e1:::e2:::Enil)
+  | Eop Oaddl (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil)
+  | _ => (Aindexed Int64.zero, e:::Enil)
+  end.
+
+(* floats *)
+Definition divf_base (e1: expr) (e2: expr) :=
+  Eop Odivf (e1 ::: e2 ::: Enil).
+
+Definition divfs_base (e1: expr) (e2: expr) :=
+  Eop Odivfs (e1 ::: e2 ::: Enil).
+
+(** ** Arguments of builtins *)
+
+Nondetfunction builtin_arg (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => BA_int n
+  | Eop (Olongconst n) Enil => BA_long n
+  | Eop (Oaddrsymbol id ofs) Enil => BA_addrglobal id ofs
+  | Eop (Oaddrstack ofs) Enil => BA_addrstack ofs
+  | Eload chunk (Ainstack ofs) Enil => BA_loadstack chunk ofs
+  | Eop (Oaddlimm n) (e1:::Enil) => BA_addptr (BA e1) (BA_long n)
+  | _ => BA e
+  end.
+
+(** Platform-specific known builtins *)
+
+Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
+  None.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
new file mode 100644
index 00000000..54c6a9fd
--- /dev/null
+++ b/aarch64/SelectOpproof.v
@@ -0,0 +1,1093 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness of instruction selection for operators *)
+
+Require Import Coqlib Zbits.
+Require Import AST Integers Floats Values Memory Builtins Globalenvs.
+Require Import Cminor Op CminorSel.
+Require Import SelectOp.
+Require Import OpHelpers OpHelpersproof.
+
+Local Open Scope cminorsel_scope.
+Local Transparent Archi.ptr64.
+
+(** * Useful lemmas and tactics *)
+
+(** The following are trivial lemmas and custom tactics that help
+  perform backward (inversion) and forward reasoning over the evaluation
+  of operator applications. *)
+
+Ltac EvalOp := 
+  eauto with evalexpr;
+  match goal with
+  | [ |- eval_expr _ _ _ _ _ _ _ ] => eapply eval_Eop; [EvalOp|try reflexivity; auto]
+  | [ |- eval_exprlist _ _ _ _ _ _ _ ] => econstructor; EvalOp
+  | _ => idtac
+  end.
+
+Ltac InvEval1 :=
+  match goal with
+  | [ H: (eval_expr _ _ _ _ _ (Eop _ Enil) _) |- _ ] =>
+      inv H; InvEval1
+  | [ H: (eval_expr _ _ _ _ _ (Eop _ (_ ::: Enil)) _) |- _ ] =>
+      inv H; InvEval1
+  | [ H: (eval_expr _ _ _ _ _ (Eop _ (_ ::: _ ::: Enil)) _) |- _ ] =>
+      inv H; InvEval1
+  | [ H: (eval_exprlist _ _ _ _ _ Enil _) |- _ ] =>
+      inv H; InvEval1
+  | [ H: (eval_exprlist _ _ _ _ _ (_ ::: _) _) |- _ ] =>
+      inv H; InvEval1
+  | _ =>
+      idtac
+  end.
+
+Ltac InvEval2 :=
+  match goal with
+  | [ H: (eval_operation _ _ _ nil _ = Some _) |- _ ] =>
+      simpl in H; inv H
+  | [ H: (eval_operation _ _ _ (_ :: nil) _ = Some _) |- _ ] =>
+      simpl in H; FuncInv
+  | [ H: (eval_operation _ _ _ (_ :: _ :: nil) _ = Some _) |- _ ] =>
+      simpl in H; FuncInv
+  | [ H: (eval_operation _ _ _ (_ :: _ :: _ :: nil) _ = Some _) |- _ ] =>
+      simpl in H; FuncInv
+  | _ =>
+      idtac
+  end.
+
+Ltac InvEval := InvEval1; InvEval2; InvEval2.
+
+Ltac TrivialExists :=
+  match goal with
+  | [ |- exists v, _ /\ Val.lessdef ?a v ] => exists a; split; [EvalOp | auto]
+  end.
+
+(** * Correctness of the smart constructors *)
+
+Section CMCONSTR.
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
+(** We now show that the code generated by "smart constructor" functions
+  such as [Selection.notint] behaves as expected.  Continuing the
+  [notint] example, we show that if the expression [e]
+  evaluates to some integer value [Vint n], then [Selection.notint e]
+  evaluates to a value [Vint (Int.not n)] which is indeed the integer
+  negation of the value of [e].
+
+  All proofs follow a common pattern:
+- Reasoning by case over the result of the classification functions
+  (such as [add_match] for integer addition), gathering additional
+  information on the shape of the argument expressions in the non-default
+  cases.
+- Inversion of the evaluations of the arguments, exploiting the additional
+  information thus gathered.
+- Equational reasoning over the arithmetic operations performed,
+  using the lemmas from the [Int] and [Float] modules.
+- Construction of an evaluation derivation for the expression returned
+  by the smart constructor.
+*)
+
+Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop :=
+  forall le a x,
+  eval_expr ge sp e m le a x ->
+  exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v.
+
+Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop :=
+  forall le a x b y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
+
+(** ** Constants *)
+
+Theorem eval_addrsymbol:
+  forall le id ofs,
+  exists v, eval_expr ge sp e m le (addrsymbol id ofs) v /\ Val.lessdef (Genv.symbol_address ge id ofs) v.
+Proof.
+  intros. unfold addrsymbol. TrivialExists.
+Qed.
+
+Theorem eval_addrstack:
+  forall le ofs,
+  exists v, eval_expr ge sp e m le (addrstack ofs) v /\ Val.lessdef (Val.offset_ptr sp ofs) v.
+Proof.
+  intros. unfold addrstack. TrivialExists.
+Qed.
+
+(** ** Addition, opposite, subtraction *)
+
+Theorem eval_addimm:
+  forall n, unary_constructor_sound (addimm n) (fun x => Val.add x (Vint n)).
+Proof.
+  red; unfold addimm; intros until x.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. intros. exists x; split; auto.
+  destruct x; simpl; auto. rewrite Int.add_zero; auto.
+- case (addimm_match a); intros; InvEval; simpl; TrivialExists; simpl.
++ rewrite Int.add_commut. auto.
++ subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
+Qed.
+
+Theorem eval_add: binary_constructor_sound add Val.add.
+Proof.
+  red; intros until y.
+  unfold add; case (add_match a b); intros; InvEval; subst.
+- rewrite Val.add_commut. apply eval_addimm; auto.
+- apply eval_addimm; auto.
+- replace (Val.add (Val.add v1 (Vint n1)) (Val.add v0 (Vint n2)))
+     with (Val.add (Val.add v1 v0) (Val.add (Vint n1) (Vint n2))).
+  apply eval_addimm. EvalOp.
+  repeat rewrite Val.add_assoc. decEq. apply Val.add_permut.
+- replace (Val.add (Val.add v1 (Vint n1)) y)
+     with (Val.add (Val.add v1 y) (Vint n1)).
+  apply eval_addimm. EvalOp.
+  repeat rewrite Val.add_assoc. decEq. apply Val.add_commut.
+- rewrite <- Val.add_assoc. apply eval_addimm. EvalOp.
+- rewrite Val.add_commut. TrivialExists.
+- TrivialExists.
+- rewrite Val.add_commut. TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_negint: unary_constructor_sound negint (fun v => Val.sub Vzero v).
+Proof.
+  red; intros until x; unfold negint. case (negint_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_sub: binary_constructor_sound sub Val.sub.
+Proof.
+  red; intros until y; unfold sub; case (sub_match a b); intros; InvEval; subst.
+- rewrite Val.sub_add_opp. apply eval_addimm; auto.
+- rewrite Val.sub_add_l. rewrite Val.sub_add_r.
+  rewrite Val.add_assoc. simpl. rewrite Int.add_commut. rewrite <- Int.sub_add_opp.
+  apply eval_addimm; EvalOp.
+- rewrite Val.sub_add_l. apply eval_addimm; EvalOp.
+- rewrite Val.sub_add_r. apply eval_addimm; EvalOp.
+- TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+(** ** Immediate shifts *)
+
+Remark eval_shlimm_base: forall le a n x,
+  eval_expr ge sp e m le a x ->
+  Int.ltu n Int.iwordsize = true ->
+  eval_expr ge sp e m le (shlimm_base a n) (Val.shl x (Vint n)).
+Proof.
+Local Opaque mk_amount32.
+  unfold shlimm_base; intros; EvalOp. simpl. rewrite mk_amount32_eq by auto. auto.
+Qed.
+
+Theorem eval_shlimm:
+  forall n, unary_constructor_sound (fun a => shlimm a n)
+                                    (fun x => Val.shl x (Vint n)).
+Proof.
+  red; intros until x; unfold shlimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; [| destruct (Int.ltu n Int.iwordsize) eqn:L]; simpl.
+- intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shl_zero; auto.
+- destruct (shlimm_match a); intros; InvEval; subst.
++ TrivialExists. simpl; rewrite L; auto.
++ destruct (Int.ltu (Int.add a n) Int.iwordsize) eqn:L2.
+* econstructor; split. eapply eval_shlimm_base; eauto.
+  destruct v1; simpl; auto. rewrite a32_range; simpl. rewrite L, L2. 
+  rewrite Int.shl_shl; auto using a32_range.
+* econstructor; split; [|eauto]. apply eval_shlimm_base; auto. EvalOp.
++ TrivialExists. simpl. rewrite mk_amount32_eq; auto.
++ TrivialExists. simpl. rewrite mk_amount32_eq; auto.
++ destruct (Int.ltu (Int.add a n) Int.iwordsize) eqn:L2.
+* TrivialExists. simpl. rewrite mk_amount32_eq by auto. 
+  destruct (Val.zero_ext s v1); simpl; auto.
+  rewrite a32_range; simpl; rewrite L, L2.
+  rewrite Int.shl_shl; auto using a32_range.
+* econstructor; split. eapply eval_shlimm_base; eauto. EvalOp; simpl; eauto. auto.
++ destruct (Int.ltu (Int.add a n) Int.iwordsize) eqn:L2.
+* TrivialExists. simpl. rewrite mk_amount32_eq by auto. 
+  destruct (Val.sign_ext s v1); simpl; auto.
+  rewrite a32_range; simpl; rewrite L, L2.
+  rewrite Int.shl_shl; auto using a32_range.
+* econstructor; split. eapply eval_shlimm_base; eauto. EvalOp; simpl; eauto. auto.
++ econstructor; eauto using eval_shlimm_base.
+- intros; TrivialExists.
+Qed.
+
+Remark eval_shruimm_base: forall le a n x,
+  eval_expr ge sp e m le a x ->
+  Int.ltu n Int.iwordsize = true ->
+  eval_expr ge sp e m le (shruimm_base a n) (Val.shru x (Vint n)).
+Proof.
+  unfold shruimm_base; intros; EvalOp. simpl. rewrite mk_amount32_eq by auto. auto.
+Qed.
+
+Remark sub_shift_amount:
+  forall y z,
+  Int.ltu y Int.iwordsize = true -> Int.ltu z Int.iwordsize = true -> Int.unsigned y <= Int.unsigned z ->
+  Int.ltu (Int.sub z y) Int.iwordsize = true.
+Proof.
+  intros. unfold Int.ltu; apply zlt_true. rewrite Int.unsigned_repr_wordsize.
+  apply Int.ltu_iwordsize_inv in H. apply Int.ltu_iwordsize_inv in H0.
+  unfold Int.sub; rewrite Int.unsigned_repr. omega.
+  generalize Int.wordsize_max_unsigned; omega.
+Qed.
+
+Theorem eval_shruimm:
+  forall n, unary_constructor_sound (fun a => shruimm a n)
+                                    (fun x => Val.shru x (Vint n)).
+Proof.
+Local Opaque Int.zwordsize.
+  red; intros until x; unfold shruimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; [| destruct (Int.ltu n Int.iwordsize) eqn:L]; simpl.
+- intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shru_zero; auto.
+- destruct (shruimm_match a); intros; InvEval; subst.
++ TrivialExists. simpl; rewrite L; auto.
++ destruct (Int.ltu n a) eqn:L2.
+* assert (L3: Int.ltu (Int.sub a n) Int.iwordsize = true).
+  { apply sub_shift_amount; auto using a32_range.
+    apply Int.ltu_inv in L2. omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
+  simpl. rewrite L. rewrite Int.shru_shl, L2 by auto using a32_range. auto.
+* assert (L3: Int.ltu (Int.sub n a) Int.iwordsize = true).
+  { apply sub_shift_amount; auto using a32_range.
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
+  simpl. rewrite L. rewrite Int.shru_shl, L2 by auto using a32_range. auto.
++ destruct (Int.ltu (Int.add a n) Int.iwordsize) eqn:L2.
+* econstructor; split. eapply eval_shruimm_base; eauto.
+  destruct v1; simpl; auto. rewrite a32_range; simpl. rewrite L, L2. 
+  rewrite Int.shru_shru; auto using a32_range.
+* econstructor; split; [|eauto]. apply eval_shruimm_base; auto. EvalOp.
++ destruct (zlt (Int.unsigned n) s).
+* econstructor; split. EvalOp. rewrite mk_amount32_eq by auto.
+  destruct v1; simpl; auto. rewrite ! L; simpl.
+  set (s' := s - Int.unsigned n).
+  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
+  rewrite Int.shru_zero_ext. auto. unfold s'; omega.
+* econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite ! L; simpl.
+  rewrite Int.shru_zero_ext_0 by omega. auto. 
++ econstructor; eauto using eval_shruimm_base.
+- intros; TrivialExists.
+Qed.
+
+Remark eval_shrimm_base: forall le a n x,
+  eval_expr ge sp e m le a x ->
+  Int.ltu n Int.iwordsize = true ->
+  eval_expr ge sp e m le (shrimm_base a n) (Val.shr x (Vint n)).
+Proof.
+  unfold shrimm_base; intros; EvalOp. simpl. rewrite mk_amount32_eq by auto. auto.
+Qed.
+
+Theorem eval_shrimm:
+  forall n, unary_constructor_sound (fun a => shrimm a n)
+                                    (fun x => Val.shr x (Vint n)).
+Proof.
+  red; intros until x; unfold shrimm.
+  predSpec Int.eq Int.eq_spec n Int.zero; [| destruct (Int.ltu n Int.iwordsize) eqn:L]; simpl.
+- intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shr_zero; auto.
+- destruct (shrimm_match a); intros; InvEval; subst.
++ TrivialExists. simpl; rewrite L; auto.
++ destruct (Int.ltu n a) eqn:L2.
+* assert (L3: Int.ltu (Int.sub a n) Int.iwordsize = true).
+  { apply sub_shift_amount; auto using a32_range.
+    apply Int.ltu_inv in L2. omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
+  simpl. rewrite L. rewrite Int.shr_shl, L2 by auto using a32_range. auto.
+* assert (L3: Int.ltu (Int.sub n a) Int.iwordsize = true).
+  { apply sub_shift_amount; auto using a32_range.
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+  econstructor; split. EvalOp.
+  destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
+  simpl. rewrite L. rewrite Int.shr_shl, L2 by auto using a32_range. auto.
++ destruct (Int.ltu (Int.add a n) Int.iwordsize) eqn:L2.
+* econstructor; split. eapply eval_shrimm_base; eauto.
+  destruct v1; simpl; auto. rewrite a32_range; simpl. rewrite L, L2. 
+  rewrite Int.shr_shr; auto using a32_range.
+* econstructor; split; [|eauto]. apply eval_shrimm_base; auto. EvalOp.
++ destruct (zlt (Int.unsigned n) s && zlt s Int.zwordsize) eqn:E.
+* InvBooleans. econstructor; split. EvalOp. rewrite mk_amount32_eq by auto.
+  destruct v1; simpl; auto. rewrite ! L; simpl.
+  set (s' := s - Int.unsigned n).
+  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
+  rewrite Int.shr_sign_ext. auto. unfold s'; omega. unfold s'; omega.
+* econstructor; split; [|eauto]. apply eval_shrimm_base; auto. EvalOp.
++ econstructor; eauto using eval_shrimm_base. 
+- intros; TrivialExists.
+Qed.
+
+(** ** Multiplication *)
+
+Lemma eval_mulimm_base:
+  forall n, unary_constructor_sound (mulimm_base n) (fun x => Val.mul x (Vint n)).
+Proof.
+  intros; red; intros; unfold mulimm_base.
+  assert (DFL: exists v, eval_expr ge sp e m le (Eop Omul (Eop (Ointconst n) Enil ::: a ::: Enil)) v /\ Val.lessdef (Val.mul x (Vint n)) v).
+  { rewrite Val.mul_commut; TrivialExists. } 
+  generalize (Int.one_bits_decomp n); generalize (Int.one_bits_range n);
+  destruct (Int.one_bits n) as [ | i [ | j []]]; intros P Q.
+- apply DFL.
+- replace (Val.mul x (Vint n)) with (Val.shl x (Vint i)).
+  apply eval_shlimm; auto.
+  simpl in Q. rewrite <- Val.shl_mul, Q, Int.add_zero. simpl. rewrite P by auto with coqlib. auto.
+- exploit (eval_shlimm i (x :: le) (Eletvar 0) x). constructor; auto. intros [v1 [A1 B1]].
+  exploit (eval_shlimm j (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]].
+  exploit (eval_add (x :: le)). eexact A1. eexact A2. intros [v [A B]].
+  exists v; split. econstructor; eauto.
+  simpl in Q. rewrite Q, Int.add_zero.
+  replace (Vint (Int.add (Int.shl Int.one i) (Int.shl Int.one j)))
+     with (Val.add (Val.shl Vone (Vint i)) (Val.shl Vone (Vint j))).
+  rewrite Val.mul_add_distr_r.
+  repeat rewrite Val.shl_mul. eapply Val.lessdef_trans; [|eauto]. apply Val.add_lessdef; auto.
+  simpl. repeat rewrite P by auto with coqlib. auto.
+- apply DFL.
+Qed.
+
+Theorem eval_mulimm:
+  forall n, unary_constructor_sound (mulimm n) (fun x => Val.mul x (Vint n)).
+Proof.
+  intros; red; intros until x; unfold mulimm.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  intros. exists (Vint Int.zero); split. EvalOp.
+  destruct x; simpl; auto. subst n. rewrite Int.mul_zero. auto.
+  predSpec Int.eq Int.eq_spec n Int.one.
+  intros. exists x; split; auto.
+  destruct x; simpl; auto. subst n. rewrite Int.mul_one. auto.
+  case (mulimm_match a); intros; InvEval; subst.
+- TrivialExists. simpl. rewrite Int.mul_commut; auto.
+- rewrite Val.mul_add_distr_l.
+  exploit eval_mulimm_base; eauto. instantiate (1 := n). intros [v' [A1 B1]].
+  exploit (eval_addimm (Int.mul n n2) le (mulimm_base n t2) v'). auto. intros [v'' [A2 B2]].
+  exists v''; split; auto. eapply Val.lessdef_trans. eapply Val.add_lessdef; eauto.
+  rewrite Val.mul_commut; auto.
+- apply eval_mulimm_base; auto.
+Qed.
+
+Theorem eval_mul: binary_constructor_sound mul Val.mul.
+Proof.
+  red; intros until y; unfold mul; case (mul_match a b); intros; InvEval; subst.
+- rewrite Val.mul_commut. apply eval_mulimm; auto.
+- apply eval_mulimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_mulhs: binary_constructor_sound mulhs Val.mulhs.
+Proof.
+  unfold mulhs; red; intros. econstructor; split. EvalOp.
+  unfold eval_shiftl, eval_extend. rewrite ! mk_amount64_eq by auto.
+  destruct x; simpl; auto. destruct y; simpl; auto.
+  change (Int.ltu Int.zero Int64.iwordsize') with true; simpl.
+  rewrite ! Int64.shl'_zero.  
+  change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
+  apply Val.lessdef_same. f_equal. 
+  transitivity (Int.repr (Z.shiftr (Int.signed i * Int.signed i0) 32)).
+  unfold Int.mulhs; f_equal. rewrite Zshiftr_div_two_p by omega. reflexivity.
+  apply Int.same_bits_eq; intros n N.
+  change Int.zwordsize with 32 in *.
+  assert (N1: 0 <= n < 64) by omega.
+  rewrite Int64.bits_loword by auto.
+  rewrite Int64.bits_shr' by auto.
+  change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
+  rewrite zlt_true by omega.
+  rewrite Int.testbit_repr by auto. 
+  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; omega).
+  transitivity (Z.testbit (Int.signed i * Int.signed i0) (n + 32)).
+  rewrite Z.shiftr_spec by omega. auto.
+  apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. 
+  change Int64.zwordsize with 64; omega.
+Qed.
+
+Theorem eval_mulhu: binary_constructor_sound mulhu Val.mulhu.
+Proof.
+  unfold mulhu; red; intros. econstructor; split. EvalOp.
+  unfold eval_shiftl, eval_extend. rewrite ! mk_amount64_eq by auto.
+  destruct x; simpl; auto. destruct y; simpl; auto.
+  change (Int.ltu Int.zero Int64.iwordsize') with true; simpl.
+  rewrite ! Int64.shl'_zero.
+  change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
+  apply Val.lessdef_same. f_equal. 
+  transitivity (Int.repr (Z.shiftr (Int.unsigned i * Int.unsigned i0) 32)).
+  unfold Int.mulhu; f_equal. rewrite Zshiftr_div_two_p by omega. reflexivity.
+  apply Int.same_bits_eq; intros n N.
+  change Int.zwordsize with 32 in *.
+  assert (N1: 0 <= n < 64) by omega.
+  rewrite Int64.bits_loword by auto.
+  rewrite Int64.bits_shru' by auto.
+  change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
+  rewrite zlt_true by omega.
+  rewrite Int.testbit_repr by auto. 
+  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; omega).
+  transitivity (Z.testbit (Int.unsigned i * Int.unsigned i0) (n + 32)).
+  rewrite Z.shiftr_spec by omega. auto.
+  apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. 
+  change Int64.zwordsize with 64; omega.
+Qed.
+
+(** Integer conversions *)
+
+Theorem eval_zero_ext:
+  forall sz, 0 <= sz -> unary_constructor_sound (zero_ext sz) (Val.zero_ext sz).
+Proof.
+  intros; red; intros until x; unfold zero_ext; case (zero_ext_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- destruct (zlt (Int.unsigned a0) sz).
++ econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a32_range; simpl.
+  apply Val.lessdef_same. f_equal. rewrite Int.shl_zero_ext by omega. f_equal. omega.
++ TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_sign_ext:
+  forall sz, 0 < sz -> unary_constructor_sound (sign_ext sz) (Val.sign_ext sz).
+Proof.
+  intros; red; intros until x; unfold sign_ext; case (sign_ext_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- destruct (zlt (Int.unsigned a0) sz).
++ econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a32_range; simpl.
+  apply Val.lessdef_same. f_equal. rewrite Int.shl_sign_ext by omega. f_equal. omega.
++ TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
+Proof.
+  apply eval_sign_ext; omega. 
+Qed.
+
+Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
+Proof.
+  apply eval_zero_ext; omega.
+Qed.
+
+Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
+Proof.
+  apply eval_sign_ext; omega. 
+Qed.
+
+Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
+Proof.
+  apply eval_zero_ext; omega.
+Qed.
+
+(** Bitwise not, and, or, xor *)
+
+Theorem eval_notint: unary_constructor_sound notint Val.notint.
+Proof.
+  assert (INV: forall v, Val.lessdef (Val.notint (Val.notint v)) v).
+  { destruct v; auto. simpl; rewrite Int.not_involutive; auto. } 
+  unfold notint; red; intros until x; case (notint_match a); intros; InvEval; subst.
+- TrivialExists.
+- TrivialExists.
+- exists v1; auto.
+- exists (eval_shift s v1 a0); split; auto. EvalOp.
+- econstructor; split. EvalOp. 
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  rewrite Int.not_and_or_not, Int.not_involutive, Int.or_commut. auto.
+- econstructor; split. EvalOp. 
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  rewrite Int.not_or_and_not, Int.not_involutive, Int.and_commut. auto.
+- econstructor; split. EvalOp. 
+  rewrite ! Val.not_xor, Val.xor_assoc; auto.
+- econstructor; split. EvalOp.
+  destruct v1; simpl; auto; destruct v0; simpl; auto.
+  unfold Int.not; rewrite ! Int.xor_assoc, Int.xor_idem, Int.xor_zero. auto.
+- TrivialExists.
+Qed.
+
+Lemma eval_andimm_base:
+  forall n, unary_constructor_sound (andimm_base n) (fun x => Val.and x (Vint n)).
+Proof.
+  intros; red; intros. unfold andimm_base.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  exists (Vint Int.zero); split. EvalOp.
+  destruct x; simpl; auto. subst n. rewrite Int.and_zero. auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  exists x; split; auto.
+  subst. destruct x; simpl; auto. rewrite Int.and_mone; auto.
+  destruct (Z_is_power2m1 (Int.unsigned n)) as [s|] eqn:P.
+  assert (0 <= s) by (eapply Z_is_power2m1_nonneg; eauto).
+  rewrite <- (Int.repr_unsigned n), (Z_is_power2m1_sound _ _ P), <- Val.zero_ext_and by auto.
+  apply eval_zero_ext; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_andimm:
+  forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)).
+Proof.
+  intros; red; intros until x. unfold andimm.
+  case (andimm_match a); intros; InvEval; subst.
+- rewrite Int.and_commut; TrivialExists.
+- rewrite Val.and_assoc, Int.and_commut. apply eval_andimm_base; auto.
+- destruct (zle 0 s).
++ rewrite Val.zero_ext_and, Val.and_assoc, Int.and_commut by auto. 
+  apply eval_andimm_base; auto.
++ apply eval_andimm_base. EvalOp.
+- apply eval_andimm_base; auto.
+Qed.
+
+Theorem eval_and: binary_constructor_sound and Val.and.
+Proof.
+  red; intros until y; unfold and; case (and_match a b); intros; InvEval; subst.
+- rewrite Val.and_commut; apply eval_andimm; auto.
+- apply eval_andimm; auto.
+- rewrite Val.and_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.and_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.and_commut; TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_orimm:
+  forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)).
+Proof.
+  intros; red; intros until x. unfold orimm.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  intros. subst. exists x; split; auto.
+  destruct x; simpl; auto. rewrite Int.or_zero; auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  intros. exists (Vint Int.mone); split. EvalOp.
+  destruct x; simpl; auto. subst n. rewrite Int.or_mone. auto.
+  destruct (orimm_match a); intros; InvEval; subst.
+- rewrite Int.or_commut; TrivialExists.
+- rewrite Val.or_assoc, Int.or_commut; TrivialExists.
+- TrivialExists.
+Qed.
+
+Remark eval_same_expr:
+  forall a1 a2 le v1 v2,
+  same_expr_pure a1 a2 = true ->
+  eval_expr ge sp e m le a1 v1 ->
+  eval_expr ge sp e m le a2 v2 ->
+  a1 = a2 /\ v1 = v2.
+Proof.
+  intros. destruct a1; try discriminate. destruct a2; try discriminate.
+  simpl in H; destruct (ident_eq i i0); inv H.
+  split. auto. inv H0; inv H1; congruence.
+Qed.
+
+Theorem eval_or: binary_constructor_sound or Val.or.
+Proof.
+  red; intros until y; unfold or; case (or_match a b); intros; InvEval; subst.
+- rewrite Val.or_commut. apply eval_orimm; auto.
+- apply eval_orimm; auto.
+- rewrite Val.or_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.or_commut; TrivialExists.
+- TrivialExists.
+- (* shl - shru *)
+  destruct (Int.eq (Int.add a1 a2) Int.iwordsize && same_expr_pure t1 t2) eqn:?.
++ InvBooleans. apply Int.same_if_eq in H.
+  exploit eval_same_expr; eauto. intros [EQ1 EQ2]. subst.
+  econstructor; split. EvalOp.
+  destruct v0; simpl; auto. rewrite ! a32_range. simpl. rewrite <- Int.or_ror; auto using a32_range.
++ TrivialExists.
+- (* shru - shl *)
+  destruct (Int.eq (Int.add a2 a1) Int.iwordsize && same_expr_pure t1 t2) eqn:?.
++ InvBooleans. apply Int.same_if_eq in H.
+  exploit eval_same_expr; eauto. intros [EQ1 EQ2]. subst.
+  econstructor; split. EvalOp.
+  destruct v0; simpl; auto. rewrite ! a32_range. simpl.
+  rewrite Int.or_commut, <- Int.or_ror; auto using a32_range.
++ TrivialExists.
+- rewrite Val.or_commut; TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Lemma eval_xorimm_base:
+  forall n, unary_constructor_sound (xorimm_base n) (fun x => Val.xor x (Vint n)).
+Proof.
+  intros; red; intros. unfold xorimm_base.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+  intros. exists x; split. auto.
+  destruct x; simpl; auto. subst n. rewrite Int.xor_zero. auto.
+  predSpec Int.eq Int.eq_spec n Int.mone.
+  subst n. rewrite <- Val.not_xor. apply eval_notint; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_xorimm:
+  forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)).
+Proof.
+  intros; red; intros until x. unfold xorimm.
+  destruct (xorimm_match a); intros; InvEval; subst.
+- rewrite Int.xor_commut; TrivialExists.
+- rewrite Val.xor_assoc; simpl. rewrite (Int.xor_commut n2). apply eval_xorimm_base; auto.
+- apply eval_xorimm_base; auto.
+Qed.
+
+Theorem eval_xor: binary_constructor_sound xor Val.xor.
+Proof.
+  red; intros until y; unfold xor; case (xor_match a b); intros; InvEval; subst.
+- rewrite Val.xor_commut; apply eval_xorimm; auto.
+- apply eval_xorimm; auto.
+- rewrite Val.xor_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.xor_commut; TrivialExists.
+- TrivialExists.
+- rewrite Val.xor_commut; TrivialExists.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+(** ** Integer division and modulus *)
+
+Theorem eval_divs_base:
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.divs x y = Some z ->
+  exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
+Proof.
+  intros; unfold divs_base; TrivialExists.
+Qed.
+
+Theorem eval_mods_base:
+  forall le a b x y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.mods x y = Some z ->
+  exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
+Proof.
+  intros; unfold mods_base, mod_aux.
+  exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
+  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+Qed.
+
+Theorem eval_divu_base:
+  forall le a x b y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.divu x y = Some z ->
+  exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
+Proof.
+  intros; unfold divu_base; TrivialExists.
+Qed.
+
+Theorem eval_modu_base:
+  forall le a x b y z,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  Val.modu x y = Some z ->
+  exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
+Proof.
+  intros; unfold modu_base, mod_aux.
+  exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
+  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+Qed.
+
+Theorem eval_shrximm:
+  forall le a n x z,
+  eval_expr ge sp e m le a x ->
+  Val.shrx x (Vint n) = Some z ->
+  exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v.
+Proof.
+  intros; unfold shrximm. 
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. exists x; split; auto.
+  destruct x; simpl in H0; try discriminate.
+  change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
+  rewrite Int.shrx_zero by (compute; auto). auto.
+- TrivialExists.
+Qed.
+
+(** General shifts *)
+
+Theorem eval_shl: binary_constructor_sound shl Val.shl.
+Proof.
+  red; intros until y; unfold shl; case (shl_match b); intros.
+  InvEval. apply eval_shlimm; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_shr: binary_constructor_sound shr Val.shr.
+Proof.
+  red; intros until y; unfold shr; case (shr_match b); intros.
+  InvEval. apply eval_shrimm; auto.
+  TrivialExists.
+Qed.
+
+Theorem eval_shru: binary_constructor_sound shru Val.shru.
+Proof.
+  red; intros until y; unfold shru; case (shru_match b); intros.
+  InvEval. apply eval_shruimm; auto.
+  TrivialExists.
+Qed.
+
+(** Floating-point operations *)
+
+Theorem eval_negf: unary_constructor_sound negf Val.negf.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_absf: unary_constructor_sound absf Val.absf.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_addf: binary_constructor_sound addf Val.addf.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_subf: binary_constructor_sound subf Val.subf.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulf: binary_constructor_sound mulf Val.mulf.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Section COMP_IMM.
+
+Variable default: comparison -> int -> condition.
+Variable intsem: comparison -> int -> int -> bool.
+Variable sem: comparison -> val -> val -> val.
+
+Hypothesis sem_int: forall c x y, sem c (Vint x) (Vint y) = Val.of_bool (intsem c x y).
+Hypothesis sem_undef: forall c v, sem c Vundef v = Vundef.
+Hypothesis sem_eq: forall x y, sem Ceq (Vint x) (Vint y) = Val.of_bool (Int.eq x y).
+Hypothesis sem_ne: forall x y, sem Cne (Vint x) (Vint y) = Val.of_bool (negb (Int.eq x y)).
+Hypothesis sem_default: forall c v n, sem c v (Vint n) = Val.of_optbool (eval_condition (default c n) (v :: nil) m).
+
+Lemma eval_compimm:
+  forall le c a n2 x,
+  eval_expr ge sp e m le a x ->
+  exists v, eval_expr ge sp e m le (compimm default intsem c a n2) v
+         /\ Val.lessdef (sem c x (Vint n2)) v.
+Proof.
+  intros until x.
+  unfold compimm; case (compimm_match c a); intros; InvEval; subst.
+- (* constant *)
+  rewrite sem_int. TrivialExists. simpl. destruct (intsem c0 n1 n2); auto.
+- (* eq cmp *)
+  inv H. simpl in H5. inv H5.
+  destruct (Int.eq_dec n2 Int.zero). subst n2. TrivialExists.
+  simpl. rewrite eval_negate_condition.
+  destruct (eval_condition c0 vl m); simpl.
+  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
+  rewrite sem_undef; auto.
+  destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
+  simpl. destruct (eval_condition c0 vl m); simpl.
+  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
+  rewrite sem_undef; auto.
+  exists (Vint Int.zero); split. EvalOp.
+  destruct (eval_condition c0 vl m); simpl.
+  unfold Vtrue, Vfalse. destruct b; rewrite sem_eq; rewrite Int.eq_false; auto.
+  rewrite sem_undef; auto.
+- (* ne cmp *)
+  inv H. simpl in H5. inv H5.
+  destruct (Int.eq_dec n2 Int.zero). subst n2. TrivialExists.
+  simpl. destruct (eval_condition c0 vl m); simpl.
+  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
+  rewrite sem_undef; auto.
+  destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
+  simpl. rewrite eval_negate_condition. destruct (eval_condition c0 vl m); simpl.
+  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
+  rewrite sem_undef; auto.
+  exists (Vint Int.one); split. EvalOp.
+  destruct (eval_condition c0 vl m); simpl.
+  unfold Vtrue, Vfalse. destruct b; rewrite sem_ne; rewrite Int.eq_false; auto.
+  rewrite sem_undef; auto.
+- (* mask zero *)
+  predSpec Int.eq Int.eq_spec n2 Int.zero.
++ subst n2. econstructor; split. EvalOp. simpl. 
+  destruct v1; simpl; try (rewrite sem_undef; auto).
+  rewrite sem_eq. destruct (Int.eq (Int.and i m0) Int.zero); auto.
++ TrivialExists. simpl. rewrite sem_default. auto.
+- (* mask not zero *)
+  predSpec Int.eq Int.eq_spec n2 Int.zero.
++ subst n2. econstructor; split. EvalOp. simpl. 
+  destruct v1; simpl; try (rewrite sem_undef; auto).
+  rewrite sem_ne. destruct (Int.eq (Int.and i m0) Int.zero); auto.
++ TrivialExists. simpl. rewrite sem_default. auto.
+- (* default *)
+  TrivialExists. simpl. rewrite sem_default. auto.
+Qed.
+
+Hypothesis sem_swap:
+  forall c x y, sem (swap_comparison c) x y = sem c y x.
+
+Lemma eval_compimm_swap:
+  forall le c a n2 x,
+  eval_expr ge sp e m le a x ->
+  exists v, eval_expr ge sp e m le (compimm default intsem (swap_comparison c) a n2) v
+         /\ Val.lessdef (sem c (Vint n2) x) v.
+Proof.
+  intros. rewrite <- sem_swap. eapply eval_compimm; eauto.
+Qed.
+
+End COMP_IMM.
+
+Theorem eval_comp:
+  forall c, binary_constructor_sound (comp c) (Val.cmp c).
+Proof.
+  intros; red; intros until y. unfold comp; case (comp_match a b); intros; InvEval; subst.
+- eapply eval_compimm_swap; eauto.
+  intros. unfold Val.cmp. rewrite Val.swap_cmp_bool; auto.
+- eapply eval_compimm; eauto.
+- TrivialExists. simpl. rewrite Val.swap_cmp_bool. auto.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_compu:
+  forall c, binary_constructor_sound (compu c) (Val.cmpu (Mem.valid_pointer m) c).
+Proof.
+  intros; red; intros until y. unfold compu; case (compu_match a b); intros; InvEval; subst.
+- eapply eval_compimm_swap; eauto.
+  intros. unfold Val.cmpu. rewrite Val.swap_cmpu_bool; auto.
+- eapply eval_compimm; eauto.
+- TrivialExists. simpl. rewrite Val.swap_cmpu_bool. auto.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_compf:
+  forall c, binary_constructor_sound (compf c) (Val.cmpf c).
+Proof.
+  intros; red; intros. unfold compf. TrivialExists.
+Qed.
+
+Theorem eval_compfs:
+  forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+  intros; red; intros. unfold compfs. TrivialExists.
+Qed.
+
+(** Floating-point conversions *)
+
+Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_intoffloat:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intoffloat x = Some y ->
+  exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
+Proof.
+  intros; TrivialExists.
+Qed.
+
+Theorem eval_floatofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.floatofint x = Some y ->
+  exists v, eval_expr ge sp e m le (floatofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_intuoffloat:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuoffloat x = Some y ->
+  exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
+Proof.
+  intros; TrivialExists.
+Qed.
+
+Theorem eval_floatofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.floatofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_intofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; TrivialExists.
+Qed.
+
+Theorem eval_singleofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofint x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_intuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; TrivialExists.
+Qed.
+
+Theorem eval_singleofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+(** Selection *)
+
+Theorem eval_select:
+  forall le ty cond al vl a1 v1 a2 v2 a b,
+  select ty cond al a1 a2 = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  eval_expr ge sp e m le a1 v1 ->
+  eval_expr ge sp e m le a2 v2 ->
+  eval_condition cond vl m = Some b ->
+  exists v, 
+     eval_expr ge sp e m le a v
+  /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v.
+Proof.
+  unfold select; intros.
+  destruct (match ty with Tint | Tlong | Tfloat | Tsingle => true | _ => false end); inv H.
+  rewrite <- H3; TrivialExists. 
+Qed.
+
+(** Addressing modes *)
+
+Theorem eval_addressing:
+  forall le chunk a v b ofs,
+  eval_expr ge sp e m le a v ->
+  v = Vptr b ofs ->
+  match addressing chunk a with (mode, args) =>
+    exists vl,
+    eval_exprlist ge sp e m le args vl /\
+    eval_addressing ge sp mode vl = Some v
+  end.
+Proof.
+  intros until v. unfold addressing; case (addressing_match a); intros; InvEval.
+- econstructor; split. EvalOp. simpl; auto.
+- econstructor; split. EvalOp. simpl; auto.
+- econstructor; split. EvalOp. simpl. 
+  destruct v1; try discriminate. rewrite <- H; auto.
+- econstructor; split. EvalOp. simpl. congruence.
+- econstructor; split. EvalOp. simpl. congruence.
+- econstructor; split. EvalOp. simpl. congruence.
+- econstructor; split. EvalOp. simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero; auto.
+Qed.
+
+(** Builtins *)
+
+Theorem eval_builtin_arg:
+  forall a v,
+  eval_expr ge sp e m nil a v ->
+  CminorSel.eval_builtin_arg ge sp e m (builtin_arg a) v.
+Proof.
+  intros until v. unfold builtin_arg; case (builtin_arg_match a); intros; InvEval.
+- constructor.
+- constructor.
+- constructor.
+- constructor.
+- inv H. InvEval. simpl in H6. inv H6. constructor; auto.
+- subst v. repeat constructor; auto.
+- constructor; auto. 
+Qed.
+
+Theorem eval_divf_base:
+  forall le a b x y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (divf_base a b) v /\ Val.lessdef (Val.divf x y) v.
+Proof.
+  intros; unfold divf_base.
+  TrivialExists.
+Qed.
+
+Theorem eval_divfs_base:
+  forall le a b x y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (divfs_base a b) v /\ Val.lessdef (Val.divfs x y) v.
+Proof.
+  intros; unfold divfs_base.
+  TrivialExists.
+Qed.
+
+(** Platform-specific known builtins *)
+
+Theorem eval_platform_builtin:
+  forall bf al a vl v le,
+  platform_builtin bf al = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  platform_builtin_sem bf vl = Some v ->
+  exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+  intros. discriminate.
+Qed.
+
+End CMCONSTR.
diff --git a/aarch64/Stacklayout.v b/aarch64/Stacklayout.v
new file mode 100644
index 00000000..86ba9f45
--- /dev/null
+++ b/aarch64/Stacklayout.v
@@ -0,0 +1,140 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Machine- and ABI-dependent layout information for activation records. *)
+
+Require Import Coqlib.
+Require Import AST Memory Separation.
+Require Import Bounds.
+
+Local Open Scope sep_scope.
+
+(** The general shape of activation records is as follows,
+  from bottom (lowest offsets) to top:
+- Space for outgoing arguments to function calls.
+- Back link to parent frame
+- Return address
+- Saved values of callee-save registers used by the function.
+- Local stack slots.
+- Space for the stack-allocated data declared in Cminor.
+
+The stack pointer is kept 16-aligned.
+*)
+
+Definition fe_ofs_arg := 0.
+
+Definition make_env (b: bounds) : frame_env :=
+  let olink := align (4 * b.(bound_outgoing)) 8 in  (* back link *)
+  let oretaddr := olink + 8 in                      (* return address *)
+  let ocs := oretaddr + 8 in                        (* callee-saves *)
+  let ol :=  align (size_callee_save_area b ocs) 8 in (* locals *)
+  let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
+  let sz := align (ostkdata + b.(bound_stack_data)) 16 in
+  {| fe_size := sz;
+     fe_ofs_link := olink;
+     fe_ofs_retaddr := oretaddr;
+     fe_ofs_local := ol;
+     fe_ofs_callee_save := ocs;
+     fe_stack_data := ostkdata;
+     fe_used_callee_save := b.(used_callee_save) |}.
+
+Lemma frame_env_separated:
+  forall b sp m P,
+  let fe := make_env b in
+  m |= range sp 0 (fe_stack_data fe) ** range sp (fe_stack_data fe + bound_stack_data b) (fe_size fe) ** P ->
+  m |= range sp (fe_ofs_local fe) (fe_ofs_local fe + 4 * bound_local b)
+       ** range sp fe_ofs_arg (fe_ofs_arg + 4 * bound_outgoing b)
+       ** range sp (fe_ofs_link fe) (fe_ofs_link fe + size_chunk Mptr)
+       ** range sp (fe_ofs_retaddr fe) (fe_ofs_retaddr fe + size_chunk Mptr)
+       ** range sp (fe_ofs_callee_save fe) (size_callee_save_area b (fe_ofs_callee_save fe))
+       ** P.
+Proof.
+Local Opaque Z.add Z.mul sepconj range.
+  intros; simpl.
+  set (olink := align (4 * b.(bound_outgoing)) 8).
+  set (oretaddr := olink + 8).
+  set (ocs := oretaddr + 8).
+  set (ol :=  align (size_callee_save_area b ocs) 8).
+  set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
+  change (size_chunk Mptr) with 8.
+  generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+  assert (0 <= 4 * b.(bound_outgoing)) by omega.
+  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
+  assert (olink + 8 <= oretaddr) by (unfold oretaddr; omega).
+  assert (oretaddr + 8 <= ocs) by (unfold ocs; omega).
+  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
+  assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
+  assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega).
+(* Reorder as:
+     outgoing
+     back link
+     retaddr
+     callee-save
+     local *)
+  rewrite sep_swap12.
+  rewrite sep_swap23.
+  rewrite sep_swap34.
+  rewrite sep_swap45.
+(* Apply range_split and range_split2 repeatedly *)
+  unfold fe_ofs_arg.
+  apply range_split_2. fold olink; omega. omega.
+  apply range_split. omega.
+  apply range_split. omega.
+  apply range_split_2. fold ol. omega. omega.
+  apply range_drop_right with ostkdata. omega.
+  eapply sep_drop2. eexact H.
+Qed.
+
+Lemma frame_env_range:
+  forall b,
+  let fe := make_env b in
+  0 <= fe_stack_data fe /\ fe_stack_data fe + bound_stack_data b <= fe_size fe.
+Proof.
+  intros; simpl.
+  set (olink := align (4 * b.(bound_outgoing)) 8).
+  set (oretaddr := olink + 8).
+  set (ocs := oretaddr + 8).
+  set (ol :=  align (size_callee_save_area b ocs) 8).
+  set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
+  generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+  assert (0 <= 4 * b.(bound_outgoing)) by omega.
+  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
+  assert (olink + 8 <= oretaddr) by (unfold oretaddr; omega).
+  assert (oretaddr + 8 <= ocs) by (unfold ocs; omega).
+  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
+  assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
+  assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega).
+  split. omega. apply align_le. omega. 
+Qed.
+
+Lemma frame_env_aligned:
+  forall b,
+  let fe := make_env b in
+     (8 | fe_ofs_arg)
+  /\ (8 | fe_ofs_local fe)
+  /\ (8 | fe_stack_data fe)
+  /\ (align_chunk Mptr | fe_ofs_link fe)
+  /\ (align_chunk Mptr | fe_ofs_retaddr fe).
+Proof.
+  intros; simpl.
+  set (olink := align (4 * b.(bound_outgoing)) 8).
+  set (oretaddr := olink + 8).
+  set (ocs := oretaddr + 8).
+  set (ol :=  align (size_callee_save_area b ocs) 8).
+  set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
+  change (align_chunk Mptr) with 8.
+  split. apply Z.divide_0_r.
+  split. apply align_divides; omega.
+  split. apply align_divides; omega.
+  split. apply align_divides; omega.
+  apply Z.divide_add_r. apply align_divides; omega. apply Z.divide_refl.
+Qed.
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
new file mode 100644
index 00000000..e54673dd
--- /dev/null
+++ b/aarch64/TargetPrinter.ml
@@ -0,0 +1,592 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Printing AArch64 assembly code in asm syntax *)
+
+open Printf
+open Camlcoq
+open Sections
+open AST
+open Asm
+open AisAnnot
+open PrintAsmaux
+open Fileinfo
+
+(* Recognition of FP numbers that are supported by the fmov #imm instructions:
+   "a normalized binary floating point encoding with 1 sign bit,
+    4 bits of fraction and a 3-bit exponent"
+*)
+
+let is_immediate_float64 bits =
+  let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
+  let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
+  exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
+
+let is_immediate_float32 bits =
+  let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in
+  let mant = Int32.logand bits 0x7F_FFFFl in
+  exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant
+
+(* Module containing the printing functions *)
+
+module Target : TARGET =
+  struct
+
+(* Basic printing functions *)
+
+    let comment = "//"
+
+    let symbol        = elf_symbol
+    let symbol_offset = elf_symbol_offset
+    let label         = elf_label
+
+    let print_label oc lbl = label oc (transl_label lbl)
+
+    let intsz oc (sz, n) =
+      match sz with X -> coqint64 oc n | W -> coqint oc n
+
+    let xreg_name = function
+    | X0 -> "x0"   | X1 -> "x1"   | X2 -> "x2"   | X3 -> "x3"
+    | X4 -> "x4"   | X5 -> "x5"   | X6 -> "x6"   | X7 -> "x7"
+    | X8 -> "x8"   | X9 -> "x9"   | X10 -> "x10" | X11 -> "x11"
+    | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15"
+    | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19"
+    | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23"
+    | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27"
+    | X28 -> "x28" | X29 -> "x29" | X30 -> "x30"
+
+    let wreg_name = function
+    | X0 -> "w0"   | X1 -> "w1"   | X2 -> "w2"   | X3 -> "w3"
+    | X4 -> "w4"   | X5 -> "w5"   | X6 -> "w6"   | X7 -> "w7"
+    | X8 -> "w8"   | X9 -> "w9"   | X10 -> "w10" | X11 -> "w11"
+    | X12 -> "w12" | X13 -> "w13" | X14 -> "w14" | X15 -> "w15"
+    | X16 -> "w16" | X17 -> "w17" | X18 -> "w18" | X19 -> "w19"
+    | X20 -> "w20" | X21 -> "w21" | X22 -> "w22" | X23 -> "w23"
+    | X24 -> "w24" | X25 -> "w25" | X26 -> "w26" | X27 -> "w27"
+    | X28 -> "w28" | X29 -> "w29" | X30 -> "w30"
+
+    let xreg0_name = function RR0 r -> xreg_name r | XZR -> "xzr"
+    let wreg0_name = function RR0 r -> wreg_name r | XZR -> "wzr"
+
+    let xregsp_name = function RR1 r -> xreg_name r | XSP -> "sp"
+    let wregsp_name = function RR1 r -> wreg_name r | XSP -> "wsp"
+
+    let dreg_name = function
+    | D0 -> "d0"   | D1 -> "d1"   | D2 -> "d2"   | D3 -> "d3"
+    | D4 -> "d4"   | D5 -> "d5"   | D6 -> "d6"   | D7 -> "d7"
+    | D8 -> "d8"   | D9 -> "d9"   | D10 -> "d10" | D11 -> "d11"
+    | D12 -> "d12" | D13 -> "d13" | D14 -> "d14" | D15 -> "d15"
+    | D16 -> "d16" | D17 -> "d17" | D18 -> "d18" | D19 -> "d19"
+    | D20 -> "d20" | D21 -> "d21" | D22 -> "d22" | D23 -> "d23"
+    | D24 -> "d24" | D25 -> "d25" | D26 -> "d26" | D27 -> "d27"
+    | D28 -> "d28" | D29 -> "d29" | D30 -> "d30" | D31 -> "d31"
+
+    let sreg_name = function
+    | D0 -> "s0"   | D1 -> "s1"   | D2 -> "s2"   | D3 -> "s3"
+    | D4 -> "s4"   | D5 -> "s5"   | D6 -> "s6"   | D7 -> "s7"
+    | D8 -> "s8"   | D9 -> "s9"   | D10 -> "s10" | D11 -> "s11"
+    | D12 -> "s12" | D13 -> "s13" | D14 -> "s14" | D15 -> "s15"
+    | D16 -> "s16" | D17 -> "s17" | D18 -> "s18" | D19 -> "s19"
+    | D20 -> "s20" | D21 -> "s21" | D22 -> "s22" | D23 -> "s23"
+    | D24 -> "s24" | D25 -> "s25" | D26 -> "s26" | D27 -> "s27"
+    | D28 -> "s28" | D29 -> "s29" | D30 -> "s30" | D31 -> "s31"
+
+    let xreg oc r = output_string oc (xreg_name r)
+    let wreg oc r = output_string oc (wreg_name r)
+    let ireg oc (sz, r) =
+      output_string oc (match sz with X -> xreg_name r | W -> wreg_name r)
+
+    let xreg0 oc r = output_string oc (xreg0_name r)
+    let wreg0 oc r = output_string oc (wreg0_name r)
+    let ireg0 oc (sz, r) =
+      output_string oc (match sz with X -> xreg0_name r | W -> wreg0_name r)
+
+    let xregsp oc r = output_string oc (xregsp_name r)
+    let iregsp oc (sz, r) =
+      output_string oc (match sz with X -> xregsp_name r | W -> wregsp_name r)
+
+    let dreg oc r = output_string oc (dreg_name r)
+    let sreg oc r = output_string oc (sreg_name r)
+    let freg oc (sz, r) =
+      output_string oc (match sz with D -> dreg_name r | S -> sreg_name r)
+
+    let preg_asm oc ty = function
+      | IR r -> if ty = Tint then wreg oc r else xreg oc r
+      | FR r -> if ty = Tsingle then sreg oc r else dreg oc r
+      | _    -> assert false
+
+    let preg_annot = function
+      | IR r -> xreg_name r
+      | FR r -> dreg_name r
+      | _ -> assert false
+
+(* Names of sections *)
+
+    let name_of_section = function
+      | Section_text         -> ".text"
+      | Section_data i | Section_small_data i ->
+          if i then ".data" else common_section ()
+      | Section_const i | Section_small_const i ->
+          if i || (not !Clflags.option_fcommon) then ".section	.rodata" else "COMM"
+      | Section_string       -> ".section	.rodata"
+      | Section_literal      -> ".section	.rodata"
+      | Section_jumptable    -> ".section	.rodata"
+      | Section_debug_info _ -> ".section	.debug_info,\"\",%progbits"
+      | Section_debug_loc    -> ".section	.debug_loc,\"\",%progbits"
+      | Section_debug_abbrev -> ".section	.debug_abbrev,\"\",%progbits"
+      | Section_debug_line _ -> ".section	.debug_line,\"\",%progbits"
+      | Section_debug_ranges -> ".section	.debug_ranges,\"\",%progbits"
+      | Section_debug_str    -> ".section	.debug_str,\"MS\",%progbits,1"
+      | Section_user(s, wr, ex) ->
+          sprintf ".section	\"%s\",\"a%s%s\",%%progbits"
+            s (if wr then "w" else "") (if ex then "x" else "")
+      | Section_ais_annotation -> sprintf ".section	\"__compcert_ais_annotations\",\"\",@note"
+
+    let section oc sec =
+      fprintf oc "	%s\n" (name_of_section sec)
+
+(* Associate labels to floating-point constants and to symbols. *)
+
+    let emit_constants oc lit =
+      if exists_constants () then begin
+         section oc lit;
+         if Hashtbl.length literal64_labels > 0 then
+           begin
+             fprintf oc "	.balign 8\n";
+             Hashtbl.iter
+               (fun bf lbl -> fprintf oc "%a:	.quad	0x%Lx\n" label lbl bf)
+               literal64_labels
+           end;
+         if Hashtbl.length literal32_labels > 0 then
+           begin
+             fprintf oc "	.balign	4\n";
+             Hashtbl.iter
+               (fun bf lbl ->
+                  fprintf oc "%a:	.long	0x%lx\n" label lbl bf)
+               literal32_labels
+           end;
+         reset_literals ()
+      end
+
+(* Emit .file / .loc debugging directives *)
+
+    let print_file_line oc file line =
+      print_file_line oc comment file line
+
+(* Name of testable condition *)
+
+    let condition_name = function
+      | TCeq -> "eq"
+      | TCne -> "ne"
+      | TChs -> "hs"
+      | TClo -> "lo"
+      | TCmi -> "mi"
+      | TCpl -> "pl"
+      | TChi -> "hi"
+      | TCls -> "ls"
+      | TCge -> "ge"
+      | TClt -> "lt"
+      | TCgt -> "gt"
+      | TCle -> "le"
+
+(* Print an addressing mode *)
+
+    let addressing oc = function
+    | ADimm(base, n) -> fprintf oc "[%a, #%a]" xregsp base coqint64 n
+    | ADreg(base, r) -> fprintf oc "[%a, %a]" xregsp base xreg r
+    | ADlsl(base, r, n) -> fprintf oc "[%a, %a, lsl #%a]" xregsp base xreg r coqint n
+    | ADsxt(base, r, n) -> fprintf oc "[%a, %a, sxtw #%a]" xregsp base wreg r coqint n
+    | ADuxt(base, r, n) -> fprintf oc "[%a, %a, uxtw #%a]" xregsp base wreg r coqint n
+    | ADadr(base, id, ofs) -> fprintf oc "[%a, #:lo12:%a]" xregsp base symbol_offset (id, ofs)
+    | ADpostincr(base, n) -> fprintf oc "[%a], #%a" xregsp base coqint64 n
+
+(* Print a shifted operand *)
+    let shiftop oc = function
+    | SOnone -> ()
+    | SOlsl n -> fprintf oc ", lsl #%a" coqint n
+    | SOlsr n -> fprintf oc ", lsr #%a" coqint n
+    | SOasr n -> fprintf oc ", asr #%a" coqint n
+    | SOror n -> fprintf oc ", ror #%a" coqint n
+
+(* Print a sign- or zero-extended operand *)
+    let extendop oc = function
+    | EOsxtb n -> fprintf oc ", sxtb #%a" coqint n
+    | EOsxth n -> fprintf oc ", sxth #%a" coqint n
+    | EOsxtw n -> fprintf oc ", sxtw #%a" coqint n
+    | EOuxtb n -> fprintf oc ", uxtb #%a" coqint n
+    | EOuxth n -> fprintf oc ", uxth #%a" coqint n
+    | EOuxtw n -> fprintf oc ", uxtw #%a" coqint n
+    | EOuxtx n -> fprintf oc ", uxtx #%a" coqint n
+
+(* Printing of instructions *)
+    let print_instruction oc = function
+    (* Branches *)
+    | Pb lbl ->
+        fprintf oc "	b	%a\n" print_label lbl
+    | Pbc(c, lbl) ->
+        fprintf oc "	b.%s	%a\n" (condition_name c) print_label lbl
+    | Pbl(id, sg) ->
+        fprintf oc "	bl	%a\n" symbol id
+    | Pbs(id, sg) ->
+        fprintf oc "	b	%a\n" symbol id
+    | Pblr(r, sg) ->
+        fprintf oc "	blr	%a\n" xreg r
+    | Pbr(r, sg) ->
+        fprintf oc "	br	%a\n" xreg r
+    | Pret r ->
+        fprintf oc "	ret	%a\n" xreg r
+    | Pcbnz(sz, r, lbl) ->
+        fprintf oc "	cbnz	%a, %a\n" ireg (sz, r) print_label lbl
+    | Pcbz(sz, r, lbl) ->
+        fprintf oc "	cbz	%a, %a\n" ireg (sz, r) print_label lbl
+    | Ptbnz(sz, r, n, lbl) ->
+        fprintf oc "	tbnz	%a, #%a, %a\n" ireg (sz, r) coqint n print_label lbl
+    | Ptbz(sz, r, n, lbl) ->
+        fprintf oc "	tbz	%a, #%a, %a\n" ireg (sz, r) coqint n print_label lbl
+    (* Memory loads and stores *)
+    | Pldrw(rd, a) | Pldrw_a(rd, a) ->
+        fprintf oc "	ldr	%a, %a\n" wreg rd addressing a
+    | Pldrx(rd, a) | Pldrx_a(rd, a) ->
+        fprintf oc "	ldr	%a, %a\n" xreg rd addressing a
+    | Pldrb(sz, rd, a) ->
+        fprintf oc "	ldrb	%a, %a\n" wreg rd addressing a
+    | Pldrsb(sz, rd, a) ->
+        fprintf oc "	ldrsb	%a, %a\n" ireg (sz, rd) addressing a
+    | Pldrh(sz, rd, a) ->
+        fprintf oc "	ldrh	%a, %a\n" wreg rd addressing a
+    | Pldrsh(sz, rd, a) ->
+        fprintf oc "	ldrsh	%a, %a\n" ireg (sz, rd) addressing a
+    | Pldrzw(rd, a) ->
+        fprintf oc "	ldr	%a, %a\n" wreg rd addressing a
+        (* the upper 32 bits of Xrd are set to 0, performing zero-extension *)
+    | Pldrsw(rd, a) ->
+        fprintf oc "	ldrsw	%a, %a\n" xreg rd addressing a
+    | Pldp(rd1, rd2, a) ->
+        fprintf oc "	ldp	%a, %a, %a\n" xreg rd1 xreg rd2 addressing a
+    | Pstrw(rs, a) | Pstrw_a(rs, a) ->
+        fprintf oc "	str	%a, %a\n" wreg rs addressing a
+    | Pstrx(rs, a) | Pstrx_a(rs, a) ->
+        fprintf oc "	str	%a, %a\n" xreg rs addressing a
+    | Pstrb(rs, a) ->
+        fprintf oc "	strb	%a, %a\n" wreg rs addressing a
+    | Pstrh(rs, a) ->
+        fprintf oc "	strh	%a, %a\n" wreg rs addressing a
+    | Pstp(rs1, rs2, a) ->
+        fprintf oc "	stp	%a, %a, %a\n" xreg rs1 xreg rs2 addressing a
+    (* Integer arithmetic, immediate *)
+    | Paddimm(sz, rd, r1, n) ->
+        fprintf oc "	add	%a, %a, #%a\n" iregsp (sz, rd) iregsp (sz, r1) intsz (sz, n)
+    | Psubimm(sz, rd, r1, n) ->
+        fprintf oc "	sub	%a, %a, #%a\n" iregsp (sz, rd) iregsp (sz, r1) intsz (sz, n)
+    | Pcmpimm(sz, r1, n) ->
+        fprintf oc "	cmp	%a, #%a\n" ireg (sz, r1) intsz (sz, n)
+    | Pcmnimm(sz, r1, n) ->
+        fprintf oc "	cmn	%a, #%a\n" ireg (sz, r1) intsz (sz, n)
+    (* Move integer register *)
+    | Pmov(rd, r1) ->
+        fprintf oc "	mov	%a, %a\n" xregsp rd xregsp r1
+    (* Logical, immediate *)
+    | Pandimm(sz, rd, r1, n) ->
+        fprintf oc "	and	%a, %a, #%a\n" ireg (sz, rd) ireg0 (sz, r1) intsz (sz, n)
+    | Peorimm(sz, rd, r1, n) ->
+        fprintf oc "	eor	%a, %a, #%a\n" ireg (sz, rd) ireg0 (sz, r1) intsz (sz, n)
+    | Porrimm(sz, rd, r1, n) ->
+        fprintf oc "	orr	%a, %a, #%a\n" ireg (sz, rd) ireg0 (sz, r1) intsz (sz, n)
+    | Ptstimm(sz, r1, n) ->
+        fprintf oc "	tst	%a, #%a\n" ireg (sz, r1) intsz (sz, n)
+    (* Move wide immediate *)
+    | Pmovz(sz, rd, n, pos) ->
+        fprintf oc "	movz	%a, #%d, lsl #%d\n" ireg (sz, rd) (Z.to_int n) (Z.to_int pos)
+    | Pmovn(sz, rd, n, pos) ->
+        fprintf oc "	movn	%a, #%d, lsl #%d\n" ireg (sz, rd) (Z.to_int n) (Z.to_int pos)
+    | Pmovk(sz, rd, n, pos) ->
+        fprintf oc "	movk	%a, #%d, lsl #%d\n" ireg (sz, rd) (Z.to_int n) (Z.to_int pos)
+    (* PC-relative addressing *)
+    | Padrp(rd, id, ofs) ->
+        fprintf oc "	adrp	%a, %a\n" xreg rd symbol_offset (id, ofs)
+    | Paddadr(rd, r1, id, ofs) ->
+        fprintf oc "	add	%a, %a, #:lo12:%a\n" xreg rd xreg r1 symbol_offset (id, ofs)
+    (* Bit-field operations *)
+    | Psbfiz(sz, rd, r1, r, s) ->
+        fprintf oc "	sbfiz	%a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s)
+    | Psbfx(sz, rd, r1, r, s) ->
+        fprintf oc "	sbfx	%a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s)
+    | Pubfiz(sz, rd, r1, r, s) ->
+        fprintf oc "	ubfiz	%a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s)
+    | Pubfx(sz, rd, r1, r, s) ->
+        fprintf oc "	ubfx	%a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s)
+    (* Integer arithmetic, shifted register *)
+    | Padd(sz, rd, r1, r2, s) ->
+        fprintf oc "	add	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Psub(sz, rd, r1, r2, s) ->
+        fprintf oc "	sub	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Pcmp(sz, r1, r2, s) ->
+        fprintf oc "	cmp	%a, %a%a\n" ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Pcmn(sz, r1, r2, s) ->
+        fprintf oc "	cmn	%a, %a%a\n" ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    (* Integer arithmetic, extending register *)
+    | Paddext(rd, r1, r2, x) ->
+        fprintf oc "	add	%a, %a, %a%a\n" xregsp rd xregsp r1 wreg r2 extendop x
+    | Psubext(rd, r1, r2, x) ->
+        fprintf oc "	sub	%a, %a, %a%a\n" xregsp rd xregsp r1 wreg r2 extendop x
+    | Pcmpext(r1, r2, x) ->
+        fprintf oc "	cmp	%a, %a%a\n" xreg r1 wreg r2 extendop x
+    | Pcmnext(r1, r2, x) ->
+        fprintf oc "	cmn	%a, %a%a\n" xreg r1 wreg r2 extendop x
+    (* Logical, shifted register *)
+    | Pand(sz, rd, r1, r2, s) ->
+        fprintf oc "	and	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Pbic(sz, rd, r1, r2, s) ->
+        fprintf oc "	bic	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Peon(sz, rd, r1, r2, s) ->
+        fprintf oc "	eon	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Peor(sz, rd, r1, r2, s) ->
+        fprintf oc "	eor	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Porr(sz, rd, r1, r2, s) ->
+        fprintf oc "	orr	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Porn(sz, rd, r1, r2, s) ->
+        fprintf oc "	orn	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    | Ptst(sz, r1, r2, s) ->
+        fprintf oc "	tst	%a, %a%a\n" ireg0 (sz, r1) ireg (sz, r2) shiftop s
+    (* Variable shifts *)
+    | Pasrv(sz, rd, r1, r2) ->
+        fprintf oc "	asr	%a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2)
+    | Plslv(sz, rd, r1, r2) ->
+        fprintf oc "	lsl	%a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2)
+    | Plsrv(sz, rd, r1, r2) ->
+        fprintf oc "	lsr	%a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2)
+    | Prorv(sz, rd, r1, r2) ->
+        fprintf oc "	ror	%a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2)
+    (* Bit operations *)
+    | Pcls(sz, rd, r1) ->
+        fprintf oc "	cls	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    | Pclz(sz, rd, r1) ->
+        fprintf oc "	clz	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    | Prev(sz, rd, r1) ->
+        fprintf oc "	rev	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    | Prev16(sz, rd, r1) ->
+        fprintf oc "	rev16	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    (* Conditional data processing *)
+    | Pcsel(rd, r1, r2, c) ->
+        fprintf oc "	csel	%a, %a, %a, %s\n" xreg rd xreg r1 xreg r2 (condition_name c)
+    | Pcset(rd, c) ->
+        fprintf oc "	cset	%a, %s\n" xreg rd (condition_name c)
+    (* Integer multiply/divide *)
+    | Pmadd(sz, rd, r1, r2, r3) ->
+        fprintf oc "	madd	%a, %a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2) ireg0 (sz, r3)
+    | Pmsub(sz, rd, r1, r2, r3) ->
+        fprintf oc "	msub	%a, %a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2) ireg0 (sz, r3)
+    | Psmulh(rd, r1, r2) ->
+        fprintf oc "	smulh	%a, %a, %a\n" xreg rd xreg r1 xreg r2
+    | Pumulh(rd, r1, r2) ->
+        fprintf oc "	umulh	%a, %a, %a\n" xreg rd xreg r1 xreg r2
+    | Psdiv(sz, rd, r1, r2) ->
+        fprintf oc "	sdiv	%a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2)
+    | Pudiv(sz, rd, r1, r2) ->
+        fprintf oc "	udiv	%a, %a, %a\n" ireg (sz, rd) ireg (sz, r1) ireg (sz, r2)
+    (* Floating-point loads and stores *)
+    | Pldrs(rd, a) ->
+        fprintf oc "	ldr	%a, %a\n" sreg rd addressing a
+    | Pldrd(rd, a) | Pldrd_a(rd, a) ->
+        fprintf oc "	ldr	%a, %a\n" dreg rd addressing a
+    | Pstrs(rd, a) ->
+        fprintf oc "	str	%a, %a\n" sreg rd addressing a
+    | Pstrd(rd, a) | Pstrd_a(rd, a) ->
+        fprintf oc "	str	%a, %a\n" dreg rd addressing a
+    (* Floating-point move *)
+    | Pfmov(rd, r1) ->
+        fprintf oc "	fmov	%a, %a\n" dreg rd dreg r1
+    | Pfmovimmd(rd, f) ->
+        let d = camlint64_of_coqint (Floats.Float.to_bits f) in
+        if is_immediate_float64 d then
+          fprintf oc "	fmov	%a, #%.7f\n" dreg rd (Int64.float_of_bits d)
+        else begin
+          let lbl = label_literal64 d in
+          fprintf oc "	adrp	x16, %a\n" label lbl;
+          fprintf oc "	ldr	%a, [x16, #:lo12:%a] %s %.18g\n" dreg rd label lbl comment (Int64.float_of_bits d)
+        end
+    | Pfmovimms(rd, f) ->
+        let d = camlint_of_coqint (Floats.Float32.to_bits f) in
+        if is_immediate_float32 d then
+          fprintf oc "	fmov	%a, #%.7f\n" sreg rd (Int32.float_of_bits d)
+        else begin
+          let lbl = label_literal32 d in
+          fprintf oc "	adrp	x16, %a\n" label lbl;
+          fprintf oc "	ldr	%a, [x16, #:lo12:%a] %s %.18g\n" sreg rd label lbl comment (Int32.float_of_bits d)
+        end
+    | Pfmovi(D, rd, r1) ->
+        fprintf oc "	fmov	%a, %a\n" dreg rd xreg0 r1
+    | Pfmovi(S, rd, r1) ->
+        fprintf oc "	fmov	%a, %a\n" sreg rd wreg0 r1
+    (* Floating-point conversions *)
+    | Pfcvtds(rd, r1) ->
+        fprintf oc "	fcvt	%a, %a\n" dreg rd sreg r1 
+    | Pfcvtsd(rd, r1) ->
+        fprintf oc "	fcvt	%a, %a\n" sreg rd dreg r1 
+    | Pfcvtzs(isz, fsz, rd, r1) ->
+        fprintf oc "	fcvtzs  %a, %a\n" ireg (isz, rd) freg (fsz, r1)
+    | Pfcvtzu(isz, fsz, rd, r1) ->
+        fprintf oc "	fcvtzu  %a, %a\n" ireg (isz, rd) freg (fsz, r1)
+    | Pscvtf(fsz, isz, rd, r1) ->
+        fprintf oc "	scvtf	%a, %a\n" freg (fsz, rd) ireg (isz, r1)
+    | Pucvtf(fsz, isz, rd, r1) ->
+        fprintf oc "	ucvtf	%a, %a\n" freg (fsz, rd) ireg (isz, r1)
+    (* Floating-point arithmetic *)
+    | Pfabs(sz, rd, r1) ->
+        fprintf oc "	fabs	%a, %a\n" freg (sz, rd) freg (sz, r1)
+    | Pfneg(sz, rd, r1) ->
+        fprintf oc "	fneg	%a, %a\n" freg (sz, rd) freg (sz, r1)
+    | Pfsqrt(sz, rd, r1) ->
+        fprintf oc "	fsqrt	%a, %a\n" freg (sz, rd) freg (sz, r1)
+    | Pfadd(sz, rd, r1, r2) ->
+        fprintf oc "	fadd	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfdiv(sz, rd, r1, r2) ->
+        fprintf oc "	fdiv	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfmul(sz, rd, r1, r2) ->
+        fprintf oc "	fmul	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfnmul(sz, rd, r1, r2) ->
+        fprintf oc "	fnmul	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfsub(sz, rd, r1, r2) ->
+        fprintf oc "	fsub	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfmadd(sz, rd, r1, r2, r3) ->
+        fprintf oc "	fmadd	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+    | Pfmsub(sz, rd, r1, r2, r3) ->
+        fprintf oc "	fmsub	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+    | Pfnmadd(sz, rd, r1, r2, r3) ->
+        fprintf oc "	fnmadd	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+    | Pfnmsub(sz, rd, r1, r2, r3) ->
+        fprintf oc "	fnmsub	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+    (* Floating-point comparison *)
+    | Pfcmp(sz, r1, r2) ->
+        fprintf oc "	fcmp	%a, %a\n" freg (sz, r1) freg (sz, r2)
+    | Pfcmp0(sz, r1) ->
+        fprintf oc "	fcmp	%a, #0.0\n" freg (sz, r1)
+    (* Floating-point conditional select *)
+    | Pfsel(rd, r1, r2, c) ->
+        fprintf oc "	fcsel	%a, %a, %a, %s\n" dreg rd dreg r1 dreg r2 (condition_name c)
+    (* No-op *)
+    | Pnop ->
+        fprintf oc "	nop\n"
+    (* Pseudo-instructions expanded in Asmexpand *)
+    | Pallocframe(sz, linkofs) -> assert false
+    | Pfreeframe(sz, linkofs) -> assert false
+    | Pcvtx2w rd -> assert false
+    (* Pseudo-instructions not yet expanded *)
+    | Plabel lbl ->
+        fprintf oc "%a:\n" print_label lbl
+    | Ploadsymbol(rd, id) ->
+        fprintf oc "	adrp	%a, :got:%a\n" xreg rd symbol id;
+        fprintf oc "	ldr	%a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id
+    | Pcvtsw2x(rd, r1) ->
+        fprintf oc "	sxtw	%a, %a\n" xreg rd wreg r1
+    | Pcvtuw2x(rd, r1) ->
+        fprintf oc "	uxtw	%a, %a\n" xreg rd wreg r1
+    | Pbtbl(r1, tbl) ->
+        let lbl = new_label() in
+        fprintf oc "	adr	x16, %a\n" label lbl;
+        fprintf oc "	add	x16, x16, %a, uxtw #2\n" wreg r1;
+        fprintf oc "	br	x16\n";
+        fprintf oc "%a:" label lbl;
+        List.iter (fun l -> fprintf oc "	b	%a\n" print_label l) tbl
+    | Pcfi_adjust sz ->
+        cfi_adjust oc (camlint_of_coqint sz)
+    | Pcfi_rel_offset ofs ->
+        cfi_rel_offset oc "lr" (camlint_of_coqint ofs)
+    | Pbuiltin(ef, args, res) ->
+        begin match ef with
+          | EF_annot(kind,txt, targs) ->
+            begin match (P.to_int kind) with
+              | 1 -> let annot = annot_text preg_annot "sp" (camlstring_of_coqstring txt) args  in
+                fprintf oc "%s annotation: %S\n" comment annot
+              | 2 -> let lbl = new_label () in
+                fprintf oc "%a:\n" label lbl;
+                add_ais_annot lbl preg_annot "sp" (camlstring_of_coqstring txt) args
+              | _ -> assert false
+            end
+         | EF_debug(kind, txt, targs) ->
+             print_debug_info comment print_file_line preg_annot "sp" oc
+                              (P.to_int kind) (extern_atom txt) args
+         | EF_inline_asm(txt, sg, clob) ->
+             fprintf oc "%s begin inline assembly\n\t" comment;
+             print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
+             fprintf oc "%s end inline assembly\n" comment
+         | _ ->
+             assert false
+        end
+
+    let get_section_names name =
+      let (text, lit) =
+        match C2C.atom_sections name with
+        | t :: l :: _ -> (t, l)
+        | _    -> (Section_text, Section_literal) in
+      text,lit,Section_jumptable
+
+    let print_align oc alignment =
+      fprintf oc "	.balign %d\n" alignment
+
+    let print_jumptable oc jmptbl =
+      let print_tbl oc (lbl, tbl) =
+        fprintf oc "%a:\n" label lbl;
+        List.iter
+          (fun l -> fprintf oc "	.long	%a - %a\n"
+                               print_label l label lbl)
+          tbl in
+      if !jumptables <> [] then
+        begin
+          section oc jmptbl;
+          fprintf oc "	.balign 4\n";
+          List.iter (print_tbl oc) !jumptables;
+          jumptables := []
+        end
+
+    let print_fun_info = elf_print_fun_info
+
+    let print_optional_fun_info _ = ()
+
+    let print_var_info = elf_print_var_info
+
+    let print_comm_symb oc sz name align =
+      if C2C.atom_is_static name then
+        fprintf oc "	.local	%a\n" symbol name;
+        fprintf oc "	.comm	%a, %s, %d\n"
+        symbol name
+        (Z.to_string sz)
+        align
+
+    let print_instructions oc fn =
+      current_function_sig := fn.fn_sig;
+      List.iter (print_instruction oc) fn.fn_code
+
+(* Data *)
+
+    let address = ".quad"
+
+    let print_prologue oc =
+      if !Clflags.option_g then begin
+        section oc Section_text;
+      end
+
+    let print_epilogue oc =
+      if !Clflags.option_g then begin
+        Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
+        section oc Section_text;
+      end
+
+    let default_falignment = 2
+
+    let cfi_startproc oc = ()
+    let cfi_endproc oc = ()
+
+  end
+
+let sel_target () =
+  (module Target:TARGET)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
new file mode 100644
index 00000000..e0d98c85
--- /dev/null
+++ b/aarch64/ValueAOp.v
@@ -0,0 +1,319 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+Require Import Coqlib Compopts.
+Require Import AST Integers Floats Values Memory Globalenvs.
+Require Import Op RTL ValueDomain.
+
+(** Value analysis for AArch64 operators *)
+
+Definition eval_static_shift (s: shift) (v: aval) (n: amount32) : aval :=
+  match s with
+  | Slsl => shl v (I n)
+  | Slsr => shru v (I n)
+  | Sasr => shr v (I n)
+  | Sror => ror v (I n)
+  end.
+
+Definition eval_static_shiftl (s: shift) (v: aval) (n: amount64) : aval :=
+  match s with
+  | Slsl => shll v (I n)
+  | Slsr => shrlu v (I n)
+  | Sasr => shrl v (I n)
+  | Sror => rorl v (I n)
+  end.
+
+Definition eval_static_extend (x: extension) (v: aval) (n: amount64) : aval :=
+  shll (match x with Xsgn32 => longofint v | Xuns32 => longofintu v end) (I n).
+
+Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
+  match cond, vl with
+  | Ccomp c, v1 :: v2 :: nil => cmp_bool c v1 v2
+  | Ccompu c, v1 :: v2 :: nil => cmpu_bool c v1 v2
+  | Ccompimm c n, v1 :: nil => cmp_bool c v1 (I n)
+  | Ccompuimm c n, v1 :: nil => cmpu_bool c v1 (I n)
+  | Ccompshift c s a, v1 :: v2 :: nil => cmp_bool c v1 (eval_static_shift s v2 a)
+  | Ccompushift c s a, v1 :: v2 :: nil => cmpu_bool c v1 (eval_static_shift s v2 a)
+  | Cmaskzero m, v1 :: nil => maskzero v1 m
+  | Cmasknotzero m, v1 :: nil => cnot (maskzero v1 m)
+  | Ccompl c, v1 :: v2 :: nil => cmpl_bool c v1 v2
+  | Ccomplu c, v1 :: v2 :: nil => cmplu_bool c v1 v2
+  | Ccomplimm c n, v1 :: nil => cmpl_bool c v1 (L n)
+  | Ccompluimm c n, v1 :: nil => cmplu_bool c v1 (L n)
+  | Ccomplshift c s a, v1 :: v2 :: nil => cmpl_bool c v1 (eval_static_shiftl s v2 a)
+  | Ccomplushift c s a, v1 :: v2 :: nil => cmplu_bool c v1 (eval_static_shiftl s v2 a)
+  | Cmasklzero m, v1 :: nil => cmpl_bool Ceq (andl v1 (L m)) (L Int64.zero)
+  | Cmasklnotzero m, v1 :: nil => cmpl_bool Cne (andl v1 (L m)) (L Int64.zero)
+  | Ccompf c, v1 :: v2 :: nil => cmpf_bool c v1 v2
+  | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
+  | Ccompfzero c, v1 :: nil => cmpf_bool c v1 (F Float.zero)
+  | Cnotcompfzero c, v1 :: nil => cnot (cmpf_bool c v1 (F Float.zero))
+  | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
+  | Ccompfszero c, v1 :: nil => cmpfs_bool c v1 (FS Float32.zero)
+  | Cnotcompfszero c, v1 :: nil => cnot (cmpfs_bool c v1 (FS Float32.zero))
+  | _, _ => Bnone
+  end.
+
+Definition eval_static_addressing (addr: addressing) (vl: list aval): aval :=
+  match addr, vl with
+  | Aindexed n, v1 :: nil => addl v1 (L n)
+  | Aindexed2, v1 :: v2 :: nil => addl v1 v2
+  | Aindexed2shift a, v1 :: v2 :: nil => addl v1 (shll v2 (I a))
+  | Aindexed2ext x a, v1 :: v2 :: nil => addl v1 (eval_static_extend x v2 a)
+  | Aglobal s ofs, nil => Ptr (Gl s ofs)
+  | Ainstack ofs, nil => Ptr (Stk ofs)
+  | _, _ => Vbot
+  end.
+
+Definition eval_static_operation (op: operation) (vl: list aval): aval :=
+  match op, vl with
+  | Omove, v1::nil => v1
+  | Ointconst n, nil => I n
+  | Olongconst n, nil => L n
+  | Ofloatconst n, nil => if propagate_float_constants tt then F n else ntop
+  | Osingleconst n, nil => if propagate_float_constants tt then FS n else ntop
+  | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs)
+  | Oaddrstack ofs, nil => Ptr (Stk ofs)
+
+  | Oshift s a, v1::nil => eval_static_shift s v1 a  
+  | Oadd, v1::v2::nil => add v1 v2
+  | Oaddshift s a, v1::v2::nil => add v1 (eval_static_shift s v2 a)
+  | Oaddimm n, v1::nil => add v1 (I n)
+  | Oneg, v1::nil => neg v1
+  | Onegshift s a, v1::nil => neg (eval_static_shift s v1 a)
+  | Osub, v1::v2::nil => sub v1 v2
+  | Osubshift s a, v1::v2::nil => sub v1 (eval_static_shift s v2 a)
+  | Omul, v1::v2::nil => mul v1 v2
+  | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
+  | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
+  | Odiv, v1::v2::nil => divs v1 v2
+  | Odivu, v1::v2::nil => divu v1 v2
+  | Oand, v1::v2::nil => and v1 v2
+  | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
+  | Oandimm n, v1::nil => and v1 (I n)
+  | Oor, v1::v2::nil => or v1 v2
+  | Oorshift s a, v1::v2::nil => or v1 (eval_static_shift s v2 a)
+  | Oorimm n, v1::nil => or v1 (I n)
+  | Oxor, v1::v2::nil => xor v1 v2
+  | Oxorshift s a, v1::v2::nil => xor v1 (eval_static_shift s v2 a)
+  | Oxorimm n, v1::nil => xor v1 (I n)
+  | Onot, v1::nil => notint v1
+  | Onotshift s a, v1::nil => notint (eval_static_shift s v1 a)
+  | Obic, v1::v2::nil => and v1 (notint v2)
+  | Obicshift s a, v1::v2::nil => and v1 (notint (eval_static_shift s v2 a))
+  | Oorn, v1::v2::nil => or v1 (notint v2)
+  | Oornshift s a, v1::v2::nil => or v1 (notint (eval_static_shift s v2 a))
+  | Oeqv, v1::v2::nil => xor v1 (notint v2)
+  | Oeqvshift s a, v1::v2::nil => xor v1 (notint (eval_static_shift s v2 a))
+  | Oshl, v1::v2::nil => shl v1 v2
+  | Oshr, v1::v2::nil => shr v1 v2
+  | Oshru, v1::v2::nil => shru v1 v2
+  | Oshrximm n, v1::nil => shrx v1 (I n)
+  | Ozext s, v1::nil => zero_ext s v1
+  | Osext s, v1::nil => sign_ext s v1
+  | Oshlzext s a, v1::nil => shl (zero_ext s v1) (I a)
+  | Oshlsext s a, v1::nil => shl (sign_ext s v1) (I a)
+  | Ozextshr a s, v1::nil => zero_ext s (shru v1 (I a))
+  | Osextshr a s, v1::nil => sign_ext s (shr v1 (I a))
+
+  | Oshiftl s a, v1::nil => eval_static_shiftl s v1 a  
+  | Oextend x a, v1::nil => eval_static_extend x v1 a
+  | Omakelong, v1::v2::nil => longofwords v1 v2
+  | Olowlong, v1::nil => loword v1
+  | Ohighlong, v1::nil => hiword v1
+  | Oaddl, v1::v2::nil => addl v1 v2
+  | Oaddlshift s a, v1::v2::nil => addl v1 (eval_static_shiftl s v2 a)
+  | Oaddlext x a, v1::v2::nil => addl v1 (eval_static_extend x v2 a)
+  | Oaddlimm n, v1::nil => addl v1 (L n)
+  | Onegl, v1::nil => negl v1
+  | Oneglshift s a, v1::nil => negl (eval_static_shiftl s v1 a)
+  | Osubl, v1::v2::nil => subl v1 v2
+  | Osublshift s a, v1::v2::nil => subl v1 (eval_static_shiftl s v2 a)
+  | Osublext x a, v1::v2::nil => subl v1 (eval_static_extend x v2 a)
+  | Omull, v1::v2::nil => mull v1 v2
+  | Omulladd, v1::v2::v3::nil => addl v1 (mull v2 v3)
+  | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
+  | Omullhs, v1::v2::nil => mullhs v1 v2
+  | Omullhu, v1::v2::nil => mullhu v1 v2
+  | Odivl, v1::v2::nil => divls v1 v2
+  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Oandl, v1::v2::nil => andl v1 v2
+  | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
+  | Oandlimm n, v1::nil => andl v1 (L n)
+  | Oorl, v1::v2::nil => orl v1 v2
+  | Oorlshift s a, v1::v2::nil => orl v1 (eval_static_shiftl s v2 a)
+  | Oorlimm n, v1::nil => orl v1 (L n)
+  | Oxorl, v1::v2::nil => xorl v1 v2
+  | Oxorlshift s a, v1::v2::nil => xorl v1 (eval_static_shiftl s v2 a)
+  | Oxorlimm n, v1::nil => xorl v1 (L n)
+  | Onotl, v1::nil => notl v1
+  | Onotlshift s a, v1::nil => notl (eval_static_shiftl s v1 a)
+  | Obicl, v1::v2::nil => andl v1 (notl v2)
+  | Obiclshift s a, v1::v2::nil => andl v1 (notl (eval_static_shiftl s v2 a))
+  | Oornl, v1::v2::nil => orl v1 (notl v2)
+  | Oornlshift s a, v1::v2::nil => orl v1 (notl (eval_static_shiftl s v2 a))
+  | Oeqvl, v1::v2::nil => xorl v1 (notl v2)
+  | Oeqvlshift s a, v1::v2::nil => xorl v1 (notl (eval_static_shiftl s v2 a))
+  | Oshll, v1::v2::nil => shll v1 v2
+  | Oshrl, v1::v2::nil => shrl v1 v2
+  | Oshrlu, v1::v2::nil => shrlu v1 v2
+  | Oshrlximm n, v1::nil => shrxl v1 (I n)
+  | Ozextl s, v1::nil => zero_ext_l s v1
+  | Osextl s, v1::nil => sign_ext_l s v1
+  | Oshllzext s a, v1::nil => shll (zero_ext_l s v1) (I a)
+  | Oshllsext s a, v1::nil => shll (sign_ext_l s v1) (I a)
+  | Ozextshrl a s, v1::nil => zero_ext_l s (shrlu v1 (I a))
+  | Osextshrl a s, v1::nil => sign_ext_l s (shrl v1 (I a))
+
+  | Onegf, v1::nil => negf v1
+  | Oabsf, v1::nil => absf v1
+  | Oaddf, v1::v2::nil => addf v1 v2
+  | Osubf, v1::v2::nil => subf v1 v2
+  | Omulf, v1::v2::nil => mulf v1 v2
+  | Odivf, v1::v2::nil => divf v1 v2
+
+  | Onegfs, v1::nil => negfs v1
+  | Oabsfs, v1::nil => absfs v1
+  | Oaddfs, v1::v2::nil => addfs v1 v2
+  | Osubfs, v1::v2::nil => subfs v1 v2
+  | Omulfs, v1::v2::nil => mulfs v1 v2
+  | Odivfs, v1::v2::nil => divfs v1 v2
+
+  | Osingleoffloat, v1::nil => singleoffloat v1
+  | Ofloatofsingle, v1::nil => floatofsingle v1
+  | Ointoffloat, v1::nil => intoffloat v1
+  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ofloatofint, v1::nil => floatofint v1
+  | Ofloatofintu, v1::nil => floatofintu v1
+  | Ointofsingle, v1::nil => intofsingle v1
+  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Osingleofint, v1::nil => singleofint v1
+  | Osingleofintu, v1::nil => singleofintu v1
+  | Olongoffloat, v1::nil => longoffloat v1
+  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Ofloatoflong, v1::nil => floatoflong v1
+  | Ofloatoflongu, v1::nil => floatoflongu v1
+  | Olongofsingle, v1::nil => longofsingle v1
+  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Osingleoflong, v1::nil => singleoflong v1
+  | Osingleoflongu, v1::nil => singleoflongu v1
+
+  | Ocmp c, _ => of_optbool (eval_static_condition c vl)
+  | Osel c ty, v1::v2::vl => select (eval_static_condition c vl) v1 v2
+
+  | _, _ => Vbot
+  end.
+
+Section SOUNDNESS.
+
+Variable bc: block_classification.
+Variable ge: genv.
+Hypothesis GENV: genv_match bc ge.
+Variable sp: block.
+Hypothesis STACK: bc sp = BCstack.
+
+Ltac InvHyps :=
+  match goal with
+  | [H: None = Some _ |- _ ] => discriminate
+  | [H: Some _ = Some _ |- _] => inv H
+  | [H1: match ?vl with nil => _ | _ :: _ => _ end = Some _ ,
+     H2: list_forall2 _ ?vl _ |- _ ] => inv H2; InvHyps
+  | _ => idtac
+  end.
+
+Lemma eval_static_shift_sound: forall v av s n,
+  vmatch bc v av -> vmatch bc (eval_shift s v n) (eval_static_shift s av n).
+Proof.
+  intros. unfold eval_shift, eval_static_shift; destruct s; auto with va.
+Qed.
+
+Lemma eval_static_shiftl_sound: forall v av s n,
+  vmatch bc v av -> vmatch bc (eval_shiftl s v n) (eval_static_shiftl s av n).
+Proof.
+  intros. unfold eval_shiftl, eval_static_shiftl; destruct s; auto with va.
+Qed.
+
+Lemma eval_static_extend_sound: forall v av x n,
+  vmatch bc v av -> vmatch bc (eval_extend x v n) (eval_static_extend x av n).
+Proof.
+  intros. unfold eval_extend, eval_static_extend; destruct x; auto with va.
+Qed.
+
+Hint Resolve eval_static_shift_sound eval_static_shiftl_sound eval_static_extend_sound: va.
+
+Theorem eval_static_condition_sound:
+  forall cond vargs m aargs,
+  list_forall2 (vmatch bc) vargs aargs ->
+  cmatch (eval_condition cond vargs m) (eval_static_condition cond aargs).
+Proof.
+  intros until aargs; intros VM. inv VM.
+  destruct cond; auto with va.
+  inv H0.
+  destruct cond; simpl; eauto with va.
+  replace (Val.cmp_bool Ceq (Val.and a1 (Vint n)) (Vint Int.zero))
+     with (Val.maskzero_bool a1 n) by (destruct a1; auto).
+  eauto with va.
+  replace (Val.cmp_bool Cne (Val.and a1 (Vint n)) (Vint Int.zero))
+     with (option_map negb (Val.maskzero_bool a1 n)) by (destruct a1; auto).
+  eauto with va.
+  inv H2.
+  destruct cond; simpl; eauto with va.
+  destruct cond; auto with va.
+Qed.
+
+Lemma symbol_address_sound:
+  forall id ofs,
+  vmatch bc (Genv.symbol_address ge id ofs) (Ptr (Gl id ofs)).
+Proof.
+  intros; apply symbol_address_sound; apply GENV.
+Qed.
+
+Lemma symbol_address_sound_2:
+  forall id ofs,
+  vmatch bc (Genv.symbol_address ge id ofs) (Ifptr (Gl id ofs)).
+Proof.
+  intros. unfold Genv.symbol_address. destruct (Genv.find_symbol ge id) as [b|] eqn:F.
+  constructor. constructor. apply GENV; auto.
+  constructor.
+Qed.
+
+Hint Resolve symbol_address_sound symbol_address_sound_2: va.
+
+Theorem eval_static_addressing_sound:
+  forall addr vargs vres aargs,
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = Some vres ->
+  list_forall2 (vmatch bc) vargs aargs ->
+  vmatch bc vres (eval_static_addressing addr aargs).
+Proof.
+  unfold eval_addressing, eval_static_addressing; intros;
+  destruct addr; InvHyps; eauto with va.
+  rewrite Ptrofs.add_zero_l; eauto with va.
+Qed.
+
+Theorem eval_static_operation_sound:
+  forall op vargs m vres aargs,
+  eval_operation ge (Vptr sp Ptrofs.zero) op vargs m = Some vres ->
+  list_forall2 (vmatch bc) vargs aargs ->
+  vmatch bc vres (eval_static_operation op aargs).
+Proof.
+  unfold eval_operation, eval_static_operation; intros;
+  destruct op; InvHyps; eauto with va.
+  destruct (propagate_float_constants tt); constructor.
+  destruct (propagate_float_constants tt); constructor.
+  rewrite Ptrofs.add_zero_l; eauto with va.
+  apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
+  apply select_sound; eauto using eval_static_condition_sound.
+Qed.
+
+End SOUNDNESS.
+
diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v
new file mode 100644
index 00000000..e82056e2
--- /dev/null
+++ b/aarch64/extractionMachdep.v
@@ -0,0 +1,24 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Additional extraction directives specific to the AArch64 port *)
+
+Require Archi Asm.
+
+(* Archi *)
+
+Extract Constant Archi.pic_code => "fun () -> false".  (* for the time being *)
+
+(* Asm *)
+Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false".
+Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false".
+Extract Constant Asmgen.symbol_is_aligned => "C2C.atom_is_aligned".
diff --git a/arm/AsmToJSON.ml b/arm/AsmToJSON.ml
index 6ba3f1bc..e850fed6 100644
--- a/arm/AsmToJSON.ml
+++ b/arm/AsmToJSON.ml
@@ -19,21 +19,25 @@ open BinNums
 open Camlcoq
 open Json
 
-let mnemonic_names = [ "Padc"; "Padd"; "Padds"; "Pand";"Pannot"; "Pasr"; "Pb"; "Pbc"; "Pbic"; "Pblreg";
-                       "Pblsymb"; "Pbne"; "Pbreg"; "Pbsymb"; "Pbtbl"; "Pclz"; "Pcmp"; "Pcmn"; "Pconstants"; "Pfcpy_iif";
-                       "Pfcpy_fii"; "Pfcpy_fi"; "Pfcpy_sf"; "Pflid_lbl"; "Pflis_lbl"; "Pdmb"; "Pdsb"; "Peor"; "Pfabsd";
-                       "Pfabss"; "Pfaddd"; "Pfadds"; "Pfcmpd"; "Pfcmps"; "Pfcmpzd"; "Pfcmpzs";
-                       "Pfcpyd"; "Pfcpy_fs"; "Pfcpy_if";"Pfcvtds"; "Pfcvtsd"; "Pfdivd"; "Pfdivs"; "Pfldd";
-                       "Pflid"; "Pflds"; "Pflid_imm"; "Pflis_imm"; "Pfmuld"; "Pfmuls"; "Pfnegd";
-                       "Pfnegs"; "Pfsitod"; "Pfsitos"; "Pfsqrt"; "Pfstd";
-                       "Pfsts"; "Pfsubd"; "Pfsubs"; "Pftosizd"; "Pftosizs"; "Pftouizd";
-                       "Pftouizs"; "Pfuitod"; "Pfuitos"; "Pinlineasm"; "Pisb"; "Plabel"; "Pldr";
-                       "Ploadsymbol_lbl"; "Pldr_p"; "Pldrb"; "Pldrb_p"; "Pldrh"; "Pldrh_p"; "Pldrsb";
-                       "Pldrsh"; "Plsl"; "Plsr"; "Pmla"; "Pmov"; "Pmovite"; "Pfmovite";
-                       "Pmovt"; "Pmovw"; "Pmul"; "Pmvn";  "Ploadsymbol_imm"; "Pnop"; "Porr";
-                       "Ppush"; "Prev"; "Prev16"; "Prsb"; "Prsbs"; "Prsc"; "Psbc"; "Psbfx"; "Psdiv"; "Psmull";
-                       "Pstr"; "Pstr_p"; "Pstrb"; "Pstrb_p"; "Pstrh"; "Pstrh_p"; "Psub"; "Psubs"; "Pudiv";
-                       "Pumull" ]
+module StringSet = Set.Make(String)
+
+let mnemonic_names = StringSet.of_list
+    [ "Padc"; "Padd"; "Padds"; "Pand";"Pannot"; "Pasr"; "Pb"; "Pbc"; "Pbic";
+      "Pblreg"; "Pblsymb"; "Pbne"; "Pbreg"; "Pbsymb"; "Pbtbl"; "Pclz"; "Pcmp";
+      "Pcmn"; "Pconstants"; "Pfcpy_iif"; "Pfcpy_fii"; "Pfcpy_fi"; "Pfcpy_sf";
+      "Pflid_lbl"; "Pflis_lbl"; "Pdmb"; "Pdsb"; "Peor"; "Pfabsd"; "Pfabss";
+      "Pfaddd"; "Pfadds"; "Pfcmpd"; "Pfcmps"; "Pfcmpzd"; "Pfcmpzs"; "Pfcpyd";
+      "Pfcpy_fs"; "Pfcpy_if";"Pfcvtds"; "Pfcvtsd"; "Pfdivd"; "Pfdivs"; "Pfldd";
+      "Pflid"; "Pflds"; "Pflid_imm"; "Pflis_imm"; "Pfmuld"; "Pfmuls"; "Pfnegd";
+      "Pfnegs"; "Pfsitod"; "Pfsitos"; "Pfsqrt"; "Pfstd"; "Pfsts"; "Pfsubd";
+      "Pfsubs"; "Pftosizd"; "Pftosizs"; "Pftouizd"; "Pftouizs"; "Pfuitod";
+      "Pfuitos"; "Pinlineasm"; "Pisb"; "Plabel"; "Pldr"; "Ploadsymbol_lbl";
+      "Pldr_p"; "Pldrb"; "Pldrb_p"; "Pldrh"; "Pldrh_p"; "Pldrsb"; "Pldrsh";
+      "Plsl"; "Plsr"; "Pmla"; "Pmov"; "Pmovite"; "Pfmovite"; "Pmovt"; "Pmovw";
+      "Pmul"; "Pmvn";  "Ploadsymbol_imm"; "Pnop"; "Porr"; "Ppush"; "Prev";
+      "Prev16"; "Prsb"; "Prsbs"; "Prsc"; "Psbc"; "Psbfx"; "Psdiv"; "Psmull";
+      "Pstr"; "Pstr_p"; "Pstrb"; "Pstrb_p"; "Pstrh"; "Pstrh_p"; "Psub"; "Psubs";
+      "Pudiv";"Pumull" ]
 
 type instruction_arg =
   | ALabel of positive
@@ -143,7 +147,7 @@ let pp_instructions pp ic =
       | _ -> true) ic in
 
   let instruction pp n args =
-    assert (List.mem n mnemonic_names);
+    assert (StringSet.mem n mnemonic_names);
     pp_jobject_start pp;
     pp_jmember ~first:true pp "Instruction Name" pp_jstring n;
     pp_jmember pp "Args" (pp_jarray pp_arg) args;
@@ -313,8 +317,8 @@ let print_if prog sourcename =
   | Some f ->
     let f = Filename.concat !sdump_folder f in
     let oc = open_out_bin f in
-    JsonAST.pp_ast (Format.formatter_of_out_channel oc) pp_instructions prog sourcename;
+    JsonAST.pp_ast oc pp_instructions prog sourcename;
     close_out oc
 
 let pp_mnemonics pp =
-  JsonAST.pp_mnemonics pp mnemonic_names
+  JsonAST.pp_mnemonics pp (StringSet.elements mnemonic_names)
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 1a1e7f2f..f428feea 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -481,6 +481,9 @@ Definition transl_op
       do r <- ireg_of res; do r1 <- ireg_of a1;
       if Int.eq n Int.zero then
         OK (Pmov r (SOreg r1) :: k)
+      else if Int.eq n Int.one then
+        OK (Padd IR14 r1 (SOlsr r1 (Int.repr 31)) ::
+            Pmov r (SOasr IR14 n) :: k)
       else
         OK (Pmov IR14 (SOasr r1 (Int.repr 31)) ::
             Padd IR14 r1 (SOlsr IR14 (Int.sub Int.iwordsize n)) ::
@@ -689,8 +692,12 @@ Definition transl_memory_access_float
     None
     mk_immed addr args k.
 
-Definition transl_load (chunk: memory_chunk) (addr: addressing)
-                       (args: list mreg) (dst: mreg) (k: code) :=
+Definition transl_load (trap : trapping_mode)
+           (chunk: memory_chunk) (addr: addressing)
+           (args: list mreg) (dst: mreg) (k: code) :=
+  match trap with
+  | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on Arm")
+  | TRAP =>
   match chunk with
   | Mint8signed =>
       transl_memory_access_int Pldrsb mk_immed_mem_small dst addr args k
@@ -708,6 +715,7 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pfldd mk_immed_mem_float dst addr args k
   | _ =>
       Error (msg "Asmgen.transl_load")
+  end
   end.
 
 Definition transl_store (chunk: memory_chunk) (addr: addressing)
@@ -747,8 +755,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
           else loadind_int IR13 f.(fn_link_ofs) IR12 c)
   | Mop op args res =>
       transl_op op args res k
-  | Mload chunk addr args dst =>
-      transl_load chunk addr args dst k
+  | Mload trap chunk addr args dst =>
+      transl_load trap chunk addr args dst k
   | Mstore chunk addr args src =>
       transl_store chunk addr args src k
   | Mcall sig (inl arg) =>
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 25f91d23..92ae524f 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -303,6 +303,7 @@ Proof.
     eapply tail_nolabel_trans. 2: eapply loadind_label; eauto. unfold loadind_int; TailNoLabel.
   eapply transl_op_label; eauto.
   unfold transl_load, transl_memory_access_int, transl_memory_access_float in H.
+  destruct t; try discriminate.
   destruct m; monadInv H; eapply transl_memory_access_label; eauto; simpl; auto.
   unfold transl_store, transl_memory_access_int, transl_memory_access_float in H.
   destruct m; monadInv H; eapply transl_memory_access_label; eauto; simpl; auto.
@@ -618,6 +619,12 @@ Opaque loadind.
   split. eapply agree_set_undef_mreg; eauto. congruence.
   simpl; congruence.
 
+- (* Mload notrap1 *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mload notrap *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
 - (* Mstore *)
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 807e069d..cdac697e 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -1264,15 +1264,32 @@ Local Transparent destroyed_by_op.
   destruct (rs x0) eqn: X0; simpl in H0; try discriminate.
   destruct (Int.ltu i (Int.repr 31)) eqn: LTU; inv H0.
   revert EQ2. predSpec Int.eq Int.eq_spec i Int.zero; intros EQ2.
+  {
   (* i = 0 *)
   inv EQ2. econstructor.
   split. apply exec_straight_one. simpl. reflexivity. auto.
   split. Simpl. unfold Int.shrx. rewrite Int.shl_zero. unfold Int.divs.
   change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed. auto.
   intros. Simpl.
-  (* i <> 0 *)
-  inv EQ2.
-  assert (LTU': Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize = true).
+  }
+  { (* i <> 0 *)
+    revert EQ2. predSpec Int.eq Int.eq_spec i Int.one; intros EQ2.
+    {
+      inv EQ2.
+      econstructor; split.
+      eapply exec_straight_two; simpl; reflexivity.
+      split.
+      { rewrite X0.
+        rewrite Int.shrx1_shr by reflexivity.
+        Simpl.
+      }
+      { intros.
+        Simpl.
+      }
+    }
+    clear H0.
+    inv EQ2.
+    assert (LTU': Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize = true).
   {
     generalize (Int.ltu_inv _ _ LTU). intros.
     unfold Int.sub, Int.ltu. rewrite Int.unsigned_repr_wordsize.
@@ -1306,6 +1323,7 @@ Local Transparent destroyed_by_op.
   rewrite LTU'; simpl. rewrite LTU''; simpl.
   f_equal. symmetry. apply Int.shrx_shr_2. assumption.
   intros. unfold rs3; Simpl. unfold rs2; Simpl. unfold rs1; Simpl.
+  }
   (* intoffloat *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
 Transparent destroyed_by_op.
@@ -1540,8 +1558,8 @@ Proof.
 Qed.
 
 Lemma transl_load_correct:
-  forall chunk addr args dst k c (rs: regset) a m v,
-  transl_load chunk addr args dst k = OK c ->
+  forall trap chunk addr args dst k c (rs: regset) a m v,
+  transl_load trap chunk addr args dst k = OK c ->
   eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists rs',
@@ -1549,7 +1567,9 @@ Lemma transl_load_correct:
    /\ rs'#(preg_of dst) = v
    /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  intros. destruct chunk; simpl in H.
+  intros.
+  destruct trap; try (simpl in *; discriminate).
+  destruct chunk; simpl in H.
   eapply transl_load_int_correct; eauto.
   eapply transl_load_int_correct; eauto.
   eapply transl_load_int_correct; eauto.
diff --git a/arm/Builtins1.v b/arm/Builtins1.v
index f6e643d2..53c83d7e 100644
--- a/arm/Builtins1.v
+++ b/arm/Builtins1.v
@@ -29,5 +29,5 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
   match b with end.
 
-Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
   match b with end.
diff --git a/arm/CSE2deps.v b/arm/CSE2deps.v
new file mode 100644
index 00000000..9db51bbb
--- /dev/null
+++ b/arm/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+     (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+  && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+  && ((ofsw + size_chunk chunkw <=? ofsr) ||
+      (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+  match addr, addr', args, args' with
+  | (Aindexed ofs), (Aindexed ofs'),
+    (base :: nil), (base' :: nil) =>
+    if peq base base'
+    then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
+    else true  | _, _, _, _ => true
+  end.
diff --git a/arm/CSE2depsproof.v b/arm/CSE2depsproof.v
new file mode 100644
index 00000000..61fe5980
--- /dev/null
+++ b/arm/CSE2depsproof.v
@@ -0,0 +1,129 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+  Ptrofs.wordsize = 32%nat.
+Proof.
+  unfold Ptrofs.wordsize.
+  unfold Wordsize_Ptrofs.wordsize.
+  trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = 4294967296.
+Proof.
+  unfold Ptrofs.modulus.
+  rewrite ptrofs_size.
+  destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section MEMORY_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : int.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
+
+  Lemma load_store_away1 :
+    forall RANGEW : 0 <= Int.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Int.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Int.unsigned ofsw + size_chunk chunkw <= Int.unsigned ofsr
+                       \/ Int.unsigned ofsr + size_chunk chunkr <= Int.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+    destruct base; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsr)) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsw)) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: try rewrite ptrofs_modulus in *.
+
+    all: unfold Ptrofs.of_int.
+   
+    all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia).
+    all: intuition lia.
+  Qed.
+  
+  Theorem load_store_away :
+    can_swap_accesses_ofs (Int.unsigned ofsr) chunkr (Int.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr =  Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Lemma may_overlap_sound:
+  forall m m' : mem,
+  forall chunk addr args chunk' addr' args' v a a' rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+    (may_overlap chunk addr args chunk' addr' args') = false ->
+    (Mem.storev chunk m a v) = Some m' ->
+    (Mem.loadv chunk' m'  a') = (Mem.loadv chunk' m a').
+Proof.
+  intros until rs.
+  intros ADDR ADDR' OVERLAP STORE.
+  destruct addr; destruct addr'; try discriminate.
+  { (* Aindexed / Aindexed *)
+  destruct args as [ | base [ | ]]. 1,3: discriminate.
+  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+  simpl in OVERLAP.
+  destruct (peq base base'). 2: discriminate.
+  subst base'.
+  destruct (can_swap_accesses_ofs (Int.unsigned i0) chunk' (Int.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  simpl in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  }
+Qed.
+
+End SOUNDNESS.
diff --git a/arm/Conventions1.v b/arm/Conventions1.v
index c5277e8d..fe49a781 100644
--- a/arm/Conventions1.v
+++ b/arm/Conventions1.v
@@ -104,13 +104,12 @@ Definition is_float_reg (r: mreg): bool :=
   representation with a single LDM instruction. *)
 
 Definition loc_result (s: signature) : rpair mreg :=
-  match s.(sig_res) with
-  | None => One R0
-  | Some (Tint | Tany32) => One R0
-  | Some (Tfloat | Tsingle | Tany64) => One F0
-  | Some Tlong => if Archi.big_endian
-                  then Twolong R0 R1
-                  else Twolong R1 R0
+  match proj_sig_res s with
+  | Tint | Tany32 => One R0
+  | Tfloat | Tsingle | Tany64 => One F0
+  | Tlong => if Archi.big_endian
+             then Twolong R0 R1
+             else Twolong R1 R0
   end.
 
 (** The result registers have types compatible with that given in the signature. *)
@@ -119,7 +118,7 @@ Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold proj_sig_res, loc_result. destruct (sig_res sig) as [[]|]; destruct Archi.big_endian; auto.
+  intros. unfold loc_result. destruct (proj_sig_res sig); destruct Archi.big_endian; auto.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -129,7 +128,7 @@ Lemma loc_result_caller_save:
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
   intros.
-  unfold loc_result. destruct (sig_res s) as [[]|]; destruct Archi.big_endian; simpl; auto.
+  unfold loc_result. destruct (proj_sig_res s); destruct Archi.big_endian; simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -139,14 +138,13 @@ Lemma loc_result_pair:
   match loc_result sg with
   | One _ => True
   | Twolong r1 r2 =>
-        r1 <> r2 /\ sg.(sig_res) = Some Tlong
+        r1 <> r2 /\ proj_sig_res sg = Tlong
      /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
      /\ Archi.ptr64 = false
   end.
 Proof.
-  intros; unfold loc_result; destruct (sig_res sg) as [[]|]; destruct Archi.big_endian; auto.
-  intuition congruence.
-  intuition congruence.
+  intros; unfold loc_result; destruct (proj_sig_res sg); auto.
+  destruct Archi.big_endian; intuition congruence.
 Qed.
 
 (** The location of the result depends only on the result part of the signature *)
@@ -154,7 +152,7 @@ Qed.
 Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
-  intros. unfold loc_result. rewrite H; auto.
+  intros. unfold loc_result, proj_sig_res. rewrite H; auto.
 Qed.
 
 (** ** Location of function arguments *)
@@ -271,48 +269,6 @@ Definition loc_arguments (s: signature) : list (rpair loc) :=
       else loc_arguments_hf s.(sig_args) 0 0 0
   end.
 
-(** [size_arguments s] returns the number of [Outgoing] slots used
-  to call a function with signature [s]. *)
-
-Fixpoint size_arguments_hf (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
-  match tyl with
-  | nil => ofs
-  | (Tint|Tany32) :: tys =>
-      if zlt ir 4
-      then size_arguments_hf tys (ir + 1) fr ofs
-      else size_arguments_hf tys ir fr (ofs + 1)
-  | (Tfloat|Tany64) :: tys =>
-      if zlt fr 8
-      then size_arguments_hf tys ir (fr + 1) ofs
-      else size_arguments_hf tys ir fr (align ofs 2 + 2)
-  | Tsingle :: tys =>
-      if zlt fr 8
-      then size_arguments_hf tys ir (fr + 1) ofs
-      else size_arguments_hf tys ir fr (ofs + 1)
-  | Tlong :: tys =>
-      let ir := align ir 2 in
-      if zlt ir 4
-      then size_arguments_hf tys (ir + 2) fr ofs
-      else size_arguments_hf tys ir fr (align ofs 2 + 2)
-  end.
-
-Fixpoint size_arguments_sf (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
-  match tyl with
-  | nil => Z.max 0 ofs
-  | (Tint | Tsingle | Tany32) :: tys => size_arguments_sf tys (ofs + 1)
-  | (Tfloat | Tlong | Tany64) :: tys => size_arguments_sf tys (align ofs 2 + 2)
-  end.
-
-Definition size_arguments (s: signature) : Z :=
-  match Archi.abi with
-  | Archi.Softfloat =>
-      size_arguments_sf s.(sig_args) (-4)
-  | Archi.Hardfloat =>
-      if s.(sig_cc).(cc_vararg)
-      then size_arguments_sf s.(sig_args) (-4)
-      else size_arguments_hf s.(sig_args) 0 0 0
-  end.
-
 (** Argument locations are either non-temporary registers or [Outgoing]
   stack slots at nonnegative offsets. *)
 
@@ -473,173 +429,15 @@ Qed.
 
 Hint Resolve loc_arguments_acceptable: locs.
 
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_hf_above:
-  forall tyl ir fr ofs0,
-  ofs0 <= size_arguments_hf tyl ir fr ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  omega.
-  destruct a.
-  destruct (zlt ir 4); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
-  destruct (zlt fr 8); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-  set (ir' := align ir 2).
-  destruct (zlt ir' 4); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-  destruct (zlt fr 8); eauto.
-  apply Z.le_trans with (ofs0 + 1); eauto. omega.
-  destruct (zlt ir 4); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
-  destruct (zlt fr 8); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-Qed.
-
-Remark size_arguments_sf_above:
-  forall tyl ofs0,
-  Z.max 0 ofs0 <= size_arguments_sf tyl ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  omega.
-  destruct a; (eapply Z.le_trans; [idtac|eauto]).
-  xomega.
-  assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
-  assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
-  xomega.
-  xomega.
-  assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
-Qed.
-
-Lemma size_arguments_above:
-  forall s, size_arguments s >= 0.
-Proof.
-  intros; unfold size_arguments. apply Z.le_ge.
-  assert (0 <= size_arguments_sf (sig_args s) (-4)).
-  { change 0 with (Z.max 0 (-4)). apply size_arguments_sf_above. }
-  assert (0 <= size_arguments_hf (sig_args s) 0 0 0).
-  { apply size_arguments_hf_above. }
-  destruct Archi.abi; [ | destruct (cc_vararg (sig_cc s)) ]; auto.
-Qed.
-
-Lemma loc_arguments_hf_bounded:
-  forall ofs ty tyl ir fr ofs0,
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_hf tyl ir fr ofs0)) ->
-  ofs + typesize ty <= size_arguments_hf tyl ir fr ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  elim H.
-  destruct a.
-- (* int *)
-  destruct (zlt ir 4); destruct H.
-  discriminate.
-  eauto.
-  inv H. apply size_arguments_hf_above.
-  eauto.
-- (* float *)
-  destruct (zlt fr 8); destruct H.
-  discriminate.
-  eauto.
-  inv H. apply size_arguments_hf_above.
-  eauto.
-- (* long *)
-  destruct (zlt (align ir 2) 4).
-  destruct H. discriminate. destruct H. discriminate. eauto.
-  destruct Archi.big_endian.
-  destruct H. inv H.
-  eapply Z.le_trans. 2: apply size_arguments_hf_above. simpl; omega.
-  destruct H. inv H.
-  rewrite <- Z.add_assoc. simpl. apply size_arguments_hf_above.
-  eauto.
-  destruct H. inv H.
-  rewrite <- Z.add_assoc. simpl. apply size_arguments_hf_above.
-  destruct H. inv H.
-  eapply Z.le_trans. 2: apply size_arguments_hf_above. simpl; omega.
-  eauto.
-- (* float *)
-  destruct (zlt fr 8); destruct H.
-  discriminate.
-  eauto.
-  inv H. apply size_arguments_hf_above.
-  eauto.
-- (* any32 *)
-  destruct (zlt ir 4); destruct H.
-  discriminate.
-  eauto.
-  inv H. apply size_arguments_hf_above.
-  eauto.
-- (* any64 *)
-  destruct (zlt fr 8); destruct H.
-  discriminate.
-  eauto.
-  inv H. apply size_arguments_hf_above.
-  eauto.
-Qed.
-
-Lemma loc_arguments_sf_bounded:
-  forall ofs ty tyl ofs0,
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_sf tyl ofs0)) ->
-  Z.max 0 (ofs + typesize ty) <= size_arguments_sf tyl ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  elim H.
-  destruct a.
-- (* int *)
-  destruct H.
-  destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above.
-  eauto.
-- (* float *)
-  destruct H.
-  destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_sf_above.
-  eauto.
-- (* long *)
-  destruct H.
-  destruct Archi.big_endian.
-  destruct (zlt (align ofs0 2) 0); inv H.
-  eapply Z.le_trans. 2: apply size_arguments_sf_above. simpl; xomega.
-  destruct (zlt (align ofs0 2) 0); inv H.
-  rewrite <- Z.add_assoc. simpl. apply size_arguments_sf_above.
-  destruct H.
-  destruct Archi.big_endian.
-  destruct (zlt (align ofs0 2) 0); inv H.
-  rewrite <- Z.add_assoc. simpl. apply size_arguments_sf_above.
-  destruct (zlt (align ofs0 2) 0); inv H.
-  eapply Z.le_trans. 2: apply size_arguments_sf_above. simpl; xomega.
-  eauto.
-- (* float *)
-  destruct H.
-  destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above.
-  eauto.
-- (* any32 *)
-  destruct H.
-  destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above.
-  eauto.
-- (* any64 *)
-  destruct H.
-  destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_sf_above.
-  eauto.
-Qed.
-
-Lemma loc_arguments_bounded:
-  forall (s: signature) (ofs: Z) (ty: typ),
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
-  ofs + typesize ty <= size_arguments s.
-Proof.
-  unfold loc_arguments, size_arguments; intros.
-  assert (In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_sf (sig_args s) (-4))) ->
-          ofs + typesize ty <= size_arguments_sf (sig_args s) (-4)).
-  { intros. eapply Z.le_trans. 2: eapply loc_arguments_sf_bounded; eauto. xomega. }
-  assert (In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_hf (sig_args s) 0 0 0)) ->
-          ofs + typesize ty <= size_arguments_hf (sig_args s) 0 0 0).
-  { intros. eapply loc_arguments_hf_bounded; eauto. }
-  destruct Archi.abi; [ | destruct (cc_vararg (sig_cc s)) ]; eauto.
-Qed.
-
 Lemma loc_arguments_main:
   loc_arguments signature_main = nil.
 Proof.
   unfold loc_arguments.
   destruct Archi.abi; reflexivity.
 Qed.
+
+(** ** Normalization of function results *)
+
+(** No normalization needed. *)
+
+Definition return_value_needs_normalization (t: rettype) := false.
diff --git a/arm/DuplicateOpcodeHeuristic.ml b/arm/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..9b6a6409
--- /dev/null
+++ b/arm/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,22 @@
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c | Ccompfs c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c | Cnotcompfs c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
+
diff --git a/arm/Op.v b/arm/Op.v
index cc90e043..671bdbe4 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -518,6 +518,32 @@ Proof with (try exact I; try reflexivity).
   unfold Val.select. destruct (eval_condition c vl m). apply Val.normalize_type. exact I.
 Qed.
 
+
+Definition is_trapping_op (op : operation) :=
+  match op with
+  | Odiv | Odivu
+  | Oshrximm _
+  | Ointoffloat | Ointuoffloat
+  | Ointofsingle | Ointuofsingle
+  | Ofloatofint | Ofloatofintu
+  | Osingleofint | Osingleofintu => true
+  | _ => false
+  end.
+                
+
+Lemma is_trapping_op_sound:
+  forall op vl sp m,
+    op <> Omove ->
+    is_trapping_op op = false ->
+    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    eval_operation genv sp op vl m <> None.
+Proof.
+  destruct op; intros; simpl in *; try congruence.
+  all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+  all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+  all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+  all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
@@ -975,6 +1001,20 @@ Proof.
   apply Val.offset_ptr_inject; auto.
 Qed.
 
+Lemma eval_addressing_inj_none:
+  forall addr sp1 vl1 sp2 vl2,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = None ->
+  eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+  intros until vl2. intros Hglobal Hinjsp Hinjvl.
+  destruct addr; simpl in *;
+  inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
+Qed.
 End EVAL_COMPAT.
 
 (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
@@ -1080,6 +1120,19 @@ Proof.
   destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
 Qed.
 
+Lemma eval_addressing_lessdef_none:
+  forall sp addr vl1 vl2,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = None ->
+  eval_addressing genv sp addr vl2 = None.
+Proof.
+  intros. rewrite val_inject_list_lessdef in H.
+  eapply eval_addressing_inj_none with (sp1 := sp).
+  intros. rewrite <- val_inject_lessdef; auto.
+  rewrite <- val_inject_lessdef; auto.
+  eauto. auto.
+Qed.
+
 End EVAL_LESSDEF.
 
 (** Compatibility of the evaluation functions with memory injections. *)
@@ -1132,6 +1185,19 @@ Proof.
   econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
+Lemma eval_addressing_inject_none:
+  forall addr vl1 vl2,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+  eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
 Lemma eval_operation_inject:
   forall op vl1 vl2 v1 m1 m2,
   Val.inject_list f vl1 vl2 ->
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index 75d32ea3..56534c04 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -757,7 +757,7 @@ Qed.
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
   red; intros until x. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm. omega.
 Qed.
 
 Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
@@ -770,7 +770,7 @@ Qed.
 Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
 Proof.
   red; intros until x. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm. omega.
 Qed.
 
 Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
diff --git a/backend/Allnontrap.v b/backend/Allnontrap.v
new file mode 100644
index 00000000..acf03eca
--- /dev/null
+++ b/backend/Allnontrap.v
@@ -0,0 +1,26 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+
+
+Definition transf_ros (ros: reg + ident) : reg + ident := ros.
+
+Definition transf_instr (pc: node) (instr: instruction) :=
+  match instr with
+  | Iload trap chunk addr args dst s => Iload NOTRAP chunk addr args dst s
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map transf_instr f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
diff --git a/backend/Allnontrapproof.v b/backend/Allnontrapproof.v
new file mode 100644
index 00000000..92e5a88c
--- /dev/null
+++ b/backend/Allnontrapproof.v
@@ -0,0 +1,215 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import Allnontrap.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+  | match_frames_intro: forall res f sp pc rs,
+      match_frames (Stackframe res f sp pc rs)
+                   (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  constructor; auto. 
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. constructor; auto. constructor.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/Allocation.v b/backend/Allocation.v
index 13e14530..2323c050 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -58,7 +58,7 @@ Inductive block_shape: Type :=
          (mv2: moves) (s: node)
   | BSopdead (op: operation) (args: list reg) (res: reg)
          (mv: moves) (s: node)
-  | BSload (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
+  | BSload (trap : trapping_mode) (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
          (mv1: moves) (args': list mreg) (dst': mreg)
          (mv2: moves) (s: node)
   | BSloaddead (chunk: memory_chunk) (addr: addressing) (args: list reg) (dst: reg)
@@ -226,15 +226,19 @@ Definition pair_instr_block
       | operation_other _ _ =>
           pair_Iop_block op args res s b
       end
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       let (mv1, b1) := extract_moves nil b in
       match b1 with
-      | Lload chunk' addr' args' dst' :: b2 =>
+      | Lload trap' chunk' addr' args' dst' :: b2 =>
+        assertion (trapping_mode_eq trap' trap);
           if chunk_eq chunk Mint64 && Archi.splitlong then
+            (* TODO: do not support non trapping split loads *)
+            assertion (trapping_mode_eq trap TRAP);
             assertion (chunk_eq chunk' Mint32);
             let (mv2, b3) := extract_moves nil b2 in
             match b3 with
-            | Lload chunk'' addr'' args'' dst'' :: b4 =>
+            | Lload trap'' chunk'' addr'' args'' dst'' :: b4 =>
+              assertion (trapping_mode_eq trap'' TRAP);
                 let (mv3, b5) := extract_moves nil b4 in
                 assertion (chunk_eq chunk'' Mint32);
                 assertion (eq_addressing addr addr');
@@ -254,7 +258,7 @@ Definition pair_instr_block
             assertion (chunk_eq chunk chunk');
             assertion (eq_addressing addr addr');
             assertion (check_succ s b3);
-            Some(BSload chunk addr args dst mv1 args' dst' mv2 s))
+            Some(BSload trap chunk addr args dst mv1 args' dst' mv2 s))
       | _ =>
           assertion (check_succ s b1);
           Some(BSloaddead chunk addr args dst mv1 s)
@@ -310,10 +314,10 @@ Definition pair_instr_block
           Some(BSbuiltin ef args res mv1 args' res' mv2 s)
       | _ => None
       end
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 i =>
       let (mv1, b1) := extract_moves nil b in
       match b1 with
-      | Lcond cond' args' s1' s2' :: b2 =>
+      | Lcond cond' args' s1' s2' i' :: b2 =>
           assertion (eq_condition cond cond');
           assertion (peq s1 s1');
           assertion (peq s2 s2');
@@ -734,11 +738,11 @@ Function add_equations_args (rl: list reg) (tyl: list typ) (ll: list (rpair loc)
 (** [add_equations_res] is similar but is specialized to the case where
   there is only one pseudo-register. *)
 
-Function add_equations_res (r: reg) (oty: option typ) (p: rpair mreg) (e: eqs) : option eqs :=
-  match p, oty with
+Function add_equations_res (r: reg) (ty: typ) (p: rpair mreg) (e: eqs) : option eqs :=
+  match p, ty with
   | One mr, _ =>
       Some (add_equation (Eq Full r (R mr)) e)
-  | Twolong mr1 mr2, Some Tlong =>
+  | Twolong mr1 mr2, Tlong =>
       if Archi.ptr64 then None else
       Some (add_equation (Eq Low r (R mr2)) (add_equation (Eq High r (R mr1)) e))
   | _, _ =>
@@ -1023,7 +1027,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
       track_moves env mv e
-  | BSload chunk addr args dst mv1 args' dst' mv2 s =>
+  | BSload trap chunk addr args dst mv1 args' dst' mv2 s =>
       do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
       track_moves env mv1 e2
@@ -1084,7 +1088,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
   | BStailcall sg ros args mv1 ros' =>
       let args' := loc_arguments sg in
       assertion (tailcall_is_possible sg);
-      assertion (opt_typ_eq sg.(sig_res) f.(RTL.fn_sig).(sig_res));
+      assertion (rettype_eq sg.(sig_res) f.(RTL.fn_sig).(sig_res));
       assertion (ros_compatible_tailcall ros');
       do e1 <- add_equation_ros ros ros' empty_eqs;
       do e2 <- add_equations_args args (sig_args sg) args' e1;
@@ -1114,7 +1118,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv)
       track_moves env mv empty_eqs
   | BSreturn (Some arg) mv =>
       let arg' := loc_result (RTL.fn_sig f) in
-      do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
+      do e1 <- add_equations_res arg (proj_sig_res (RTL.fn_sig f)) arg' empty_eqs;
       track_moves env mv e1
   end.
 
@@ -1263,7 +1267,7 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BShighlong src dst mv s => s :: nil
   | BSop op args res mv1 args' res' mv2 s => s :: nil
   | BSopdead op args res mv s => s :: nil
-  | BSload chunk addr args dst mv1 args' dst' mv2 s => s :: nil
+  | BSload trap chunk addr args dst mv1 args' dst' mv2 s => s :: nil
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s => s :: nil
   | BSload2_1 addr args dst mv1 args' dst' mv2 s => s :: nil
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s => s :: nil
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 1804f46b..3c7df58a 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -96,44 +96,44 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
       expand_block_shape (BSopdead op args res mv s)
                          (Iop op args res s)
                          (expand_moves mv (Lbranch s :: k))
-  | ebs_load: forall chunk addr args dst mv1 args' dst' mv2 s k,
+  | ebs_load: forall trap chunk addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
-      expand_block_shape (BSload chunk addr args dst mv1 args' dst' mv2 s)
-                         (Iload chunk addr args dst s)
+      expand_block_shape (BSload trap chunk addr args dst mv1 args' dst' mv2 s)
+                         (Iload trap chunk addr args dst s)
                          (expand_moves mv1
-                           (Lload chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
+                           (Lload trap chunk addr args' dst' :: expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2: forall addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s k,
       wf_moves mv1 -> wf_moves mv2 -> wf_moves mv3 ->
       Archi.splitlong = true ->
       offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2 addr addr2 args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr args1' dst1' ::
+                           (Lload TRAP Mint32 addr args1' dst1' ::
                            expand_moves mv2
-                             (Lload Mint32 addr2 args2' dst2' ::
+                             (Lload TRAP Mint32 addr2 args2' dst2' ::
                               expand_moves mv3 (Lbranch s :: k))))
   | ebs_load2_1: forall addr args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
       Archi.splitlong = true ->
       expand_block_shape (BSload2_1 addr args dst mv1 args' dst' mv2 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr args' dst' ::
+                           (Lload TRAP Mint32 addr args' dst' ::
                             expand_moves mv2 (Lbranch s :: k)))
   | ebs_load2_2: forall addr addr2 args dst mv1 args' dst' mv2 s k,
       wf_moves mv1 -> wf_moves mv2 ->
       Archi.splitlong = true ->
       offset_addressing addr 4 = Some addr2 ->
       expand_block_shape (BSload2_2 addr addr2 args dst mv1 args' dst' mv2 s)
-                         (Iload Mint64 addr args dst s)
+                         (Iload TRAP Mint64 addr args dst s)
                          (expand_moves mv1
-                           (Lload Mint32 addr2 args' dst' ::
+                           (Lload TRAP Mint32 addr2 args' dst' ::
                             expand_moves mv2 (Lbranch s :: k)))
-  | ebs_load_dead: forall chunk addr args dst mv s k,
+  | ebs_load_dead: forall trap chunk addr args dst mv s k,
       wf_moves mv ->
       expand_block_shape (BSloaddead chunk addr args dst mv s)
-                         (Iload chunk addr args dst s)
+                         (Iload trap chunk addr args dst s)
                          (expand_moves mv (Lbranch s :: k))
   | ebs_store: forall chunk addr args src mv1 args' src' s k,
       wf_moves mv1 ->
@@ -169,11 +169,11 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
                          (Ibuiltin ef args res s)
                          (expand_moves mv1
                            (Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k)))
-  | ebs_cond: forall cond args mv args' s1 s2 k,
+  | ebs_cond: forall cond args mv args' s1 s2 k i i',
       wf_moves mv ->
       expand_block_shape (BScond cond args mv args' s1 s2)
-                         (Icond cond args s1 s2)
-                         (expand_moves mv (Lcond cond args' s1 s2 :: k))
+                         (Icond cond args s1 s2 i)
+                         (expand_moves mv (Lcond cond args' s1 s2 i' :: k))
   | ebs_jumptable: forall arg mv arg' tbl k,
       wf_moves mv ->
       expand_block_shape (BSjumptable arg mv arg' tbl)
@@ -1301,10 +1301,10 @@ Proof.
 Qed.
 
 Lemma add_equations_res_lessdef:
-  forall r oty l e e' rs ls,
-  add_equations_res r oty l e = Some e' ->
+  forall r ty l e e' rs ls,
+  add_equations_res r ty l e = Some e' ->
   satisf rs ls e' ->
-  Val.has_type rs#r (match oty with Some ty => ty | None => Tint end) ->
+  Val.has_type rs#r ty ->
   Val.lessdef rs#r (Locmap.getpair (map_rpair R l) ls).
 Proof.
   intros. functional inversion H; simpl.
@@ -1892,7 +1892,7 @@ Qed.
 
 Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signature -> Prop :=
   | match_stackframes_nil: forall sg,
-      sg.(sig_res) = Some Tint ->
+      sg.(sig_res) = Tint ->
       match_stackframes nil nil sg
   | match_stackframes_cons:
       forall res f sp pc rs s tf bb ls ts sg an e env
@@ -1970,8 +1970,8 @@ Ltac UseShape :=
   end.
 
 Remark addressing_not_long:
-  forall env f addr args dst s r,
-  wt_instr f env (Iload Mint64 addr args dst s) -> Archi.splitlong = true ->
+  forall trap env f addr args dst s r,
+  wt_instr f env (Iload trap Mint64 addr args dst s) -> Archi.splitlong = true ->
   In r args -> r <> dst.
 Proof.
   intros. inv H.
@@ -1981,7 +1981,7 @@ Proof.
   { rewrite <- H5. apply in_map; auto. }
   assert (C: env r = Tint).
   { apply A in B. rewrite B. unfold Tptr. rewrite Archi.splitlong_ptr32 by auto. auto. }
-  red; intros; subst r. rewrite C in H8; discriminate.
+  red; intros; subst r. rewrite C in H9; discriminate.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument of the
@@ -2082,8 +2082,8 @@ Proof.
   econstructor; eauto.
   eapply wt_exec_Iop; eauto.
 
-(* load regular *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+(* load regular TRAP *)
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
@@ -2100,7 +2100,7 @@ Proof.
   econstructor; eauto.
 
 (* load pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2155,7 +2155,7 @@ Proof.
   econstructor; eauto.
 
 (* load first word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2185,7 +2185,7 @@ Proof.
   econstructor; eauto.
 
 (* load second word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
   exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
@@ -2229,6 +2229,79 @@ Proof.
   econstructor; eauto.
   eapply wt_exec_Iload; eauto.
 
+- (* load notrap1 *)
+  generalize (wt_exec_Iload_notrap _ _ _ _ _ _ _ _ WTI WTRS).
+  intro WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef_none; eauto. intro Haddr.
+  exploit (exec_moves mv2); eauto.  intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. eapply exec_Lload_notrap1. rewrite <- Haddr.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+
+(* load notrap1 dead *)
+- exploit exec_moves; eauto. intros [ls1 [X Y]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_right. eexact X. econstructor; eauto.
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto.
+  eapply reg_unconstrained_satisf; eauto.
+  intros [enext [U V]].
+  econstructor; eauto.
+  eapply wt_exec_Iload_notrap; eauto.
+
+(* load regular notrap2 *)
+- generalize (wt_exec_Iload_notrap _ _ _ _ _ _ _ _ WTI WTRS).
+  intro WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]].
+  exploit transfer_use_def_satisf; eauto. intros [X Y].
+  exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
+  destruct (Mem.loadv chunk m' a') as [v' |] eqn:Hload.
+  { exploit (exec_moves mv2 env (rs # dst <- Vundef)); eauto.  intros [ls2 [A2 B2]].
+      econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. econstructor. instantiate (1 := a'). rewrite <- F.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto. eauto.
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  }
+  { exploit (exec_moves mv2 env (rs # dst <- Vundef)); eauto.  intros [ls2 [A2 B2]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_trans. eexact A1.
+  eapply star_left. eapply exec_Lload_notrap2. rewrite <- F.
+  apply eval_addressing_preserved. exact symbols_preserved. assumption.
+  eauto.
+  eapply star_right. eexact A2. constructor.
+  eauto. eauto. eauto. traceEq.
+  exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]].
+  econstructor; eauto.
+  }
+  
+- (* load notrap2 dead *)
+  exploit exec_moves; eauto. intros [ls1 [X Y]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto.
+  eapply star_right. eexact X. econstructor; eauto.
+  eauto. traceEq.
+  exploit satisf_successors. eauto. eauto. simpl; eauto. eauto.
+  eapply reg_unconstrained_satisf; eauto.
+  intros [enext [U V]].
+  econstructor; eauto.
+  eapply wt_exec_Iload_notrap; eauto.
+  
 (* store *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]].
   exploit add_equations_lessdef; eauto. intros LD. simpl in LD. inv LD.
@@ -2425,13 +2498,13 @@ Proof.
                           (return_regs (parent_locset ts) ls1))
   with (Locmap.getpair (map_rpair R (loc_result (RTL.fn_sig f))) ls1).
   eapply add_equations_res_lessdef; eauto.
-  rewrite H13. apply WTRS.
+  rewrite <- H14. apply WTRS.
   generalize (loc_result_caller_save (RTL.fn_sig f)).
   destruct (loc_result (RTL.fn_sig f)); simpl.
   intros A; rewrite A; auto.
   intros [A B]; rewrite A, B; auto.
   apply return_regs_agree_callee_save.
-  unfold proj_sig_res. rewrite <- H11; rewrite H13. apply WTRS.
+  rewrite <- H11, <- H14. apply WTRS.
 
 (* internal function *)
 - monadInv FUN. simpl in *.
@@ -2463,7 +2536,8 @@ Proof.
   simpl. destruct (loc_result (ef_sig ef)) eqn:RES; simpl.
   rewrite Locmap.gss; auto.
   generalize (loc_result_pair (ef_sig ef)); rewrite RES; intros (A & B & C & D & E).
-  exploit external_call_well_typed; eauto. unfold proj_sig_res; rewrite B. intros WTRES'.
+  assert (WTRES': Val.has_type v' Tlong).
+  { rewrite <- B. eapply external_call_well_typed; eauto. }
   rewrite Locmap.gss. rewrite Locmap.gso by (red; auto). rewrite Locmap.gss.
   rewrite val_longofwords_eq_1 by auto. auto.
   red; intros. rewrite (AG l H0).
diff --git a/backend/Asmexpandaux.ml b/backend/Asmexpandaux.ml
index b1d822db..cc171cae 100644
--- a/backend/Asmexpandaux.ml
+++ b/backend/Asmexpandaux.ml
@@ -100,7 +100,7 @@ let translate_annot sp preg_to_dwarf annot =
   | a::_ -> aux a)
 
 let builtin_nop =
-  let signature ={sig_args = []; sig_res = None; sig_cc = cc_default} in
+  let signature ={sig_args = []; sig_res = Tvoid; sig_cc = cc_default} in
   let name = coqstring_of_camlstring "__builtin_nop" in
   Pbuiltin(EF_builtin(name,signature),[],BR_none)
 
diff --git a/backend/Asmgenproof0.v b/backend/Asmgenproof0.v
index 70c4323c..3638c465 100644
--- a/backend/Asmgenproof0.v
+++ b/backend/Asmgenproof0.v
@@ -897,6 +897,55 @@ Proof.
   apply code_tail_next_int with i; auto.
 Qed.
 
+(** A variant that supports zero steps of execution *)
+
+Inductive exec_straight_opt: code -> regset -> mem -> code -> regset -> mem -> Prop :=
+  | exec_straight_opt_refl: forall c rs m,
+      exec_straight_opt c rs m c rs m
+  | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2,
+      exec_straight c1 rs1 m1 c2 rs2 m2 ->
+      exec_straight_opt c1 rs1 m1 c2 rs2 m2.
+
+Lemma exec_straight_opt_left:
+  forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+  exec_straight c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight_opt c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+  destruct 2; intros. auto. eapply exec_straight_trans; eauto. 
+Qed.
+
+Lemma exec_straight_opt_right:
+  forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+  exec_straight_opt c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+  destruct 1; intros. auto. eapply exec_straight_trans; eauto. 
+Qed.
+
+Lemma exec_straight_opt_step:
+  forall i c rs1 m1 rs2 m2 c' rs3 m3,
+  exec_instr ge fn i rs1 m1 = Next rs2 m2 ->
+  rs2#PC = Val.offset_ptr rs1#PC Ptrofs.one ->
+  exec_straight_opt c rs2 m2 c' rs3 m3 ->
+  exec_straight (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+  intros. inv H1. 
+- apply exec_straight_one; auto.
+- eapply exec_straight_step; eauto.
+Qed.
+
+Lemma exec_straight_opt_step_opt:
+  forall i c rs1 m1 rs2 m2 c' rs3 m3,
+  exec_instr ge fn i rs1 m1 = Next rs2 m2 ->
+  rs2#PC = Val.offset_ptr rs1#PC Ptrofs.one ->
+  exec_straight_opt c rs2 m2 c' rs3 m3 ->
+  exec_straight_opt (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+  intros. apply exec_straight_opt_intro. eapply exec_straight_opt_step; eauto.
+Qed.
+
 End STRAIGHTLINE.
 
 (** * Properties of the Mach call stack *)
diff --git a/backend/Bounds.v b/backend/Bounds.v
index fa695234..b8c12166 100644
--- a/backend/Bounds.v
+++ b/backend/Bounds.v
@@ -67,7 +67,7 @@ Definition instr_within_bounds (i: instruction) :=
   | Lgetstack sl ofs ty r => slot_within_bounds sl ofs ty /\ mreg_within_bounds r
   | Lsetstack r sl ofs ty => slot_within_bounds sl ofs ty
   | Lop op args res => mreg_within_bounds res
-  | Lload chunk addr args dst => mreg_within_bounds dst
+  | Lload trap chunk addr args dst => mreg_within_bounds dst
   | Lcall sig ros => size_arguments sig <= bound_outgoing b
   | Lbuiltin ef args res =>
        (forall r, In r (params_of_builtin_res res) \/ In r (destroyed_by_builtin ef) -> mreg_within_bounds r)
@@ -104,7 +104,7 @@ Definition record_regs_of_instr (u: RegSet.t) (i: instruction) : RegSet.t :=
   | Lgetstack sl ofs ty r => record_reg u r
   | Lsetstack r sl ofs ty => record_reg u r
   | Lop op args res => record_reg u res
-  | Lload chunk addr args dst => record_reg u dst
+  | Lload trap chunk addr args dst => record_reg u dst
   | Lstore chunk addr args src => u
   | Lcall sig ros => u
   | Ltailcall sig ros => u
@@ -280,7 +280,7 @@ Definition defined_by_instr (r': mreg) (i: instruction) :=
   match i with
   | Lgetstack sl ofs ty r => r' = r
   | Lop op args res => r' = res
-  | Lload chunk addr args dst => r' = dst
+  | Lload trap chunk addr args dst => r' = dst
   | Lbuiltin ef args res => In r' (params_of_builtin_res res) \/ In r' (destroyed_by_builtin ef)
   | _ => False
   end.
diff --git a/backend/CSE.v b/backend/CSE.v
index ecfa1f9e..1936d4e4 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -459,8 +459,10 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
           before
       | Iop op args res s =>
           add_op before res op args
-      | Iload chunk addr args dst s =>
-          add_load before dst chunk addr args
+      | Iload TRAP chunk addr args dst s =>
+        add_load before dst chunk addr args
+      | Iload NOTRAP _ _ _ dst _ =>
+        set_unknown before dst
       | Istore chunk addr args src s =>
           let app := approx!!pc in
           let n := kill_loads_after_store app before chunk addr args in
@@ -494,7 +496,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
           | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ =>
               set_res_unknown before res
           end
-      | Icond cond args ifso ifnot =>
+      | Icond cond args ifso ifnot _ =>
           before
       | Ijumptable arg tbl =>
           before
@@ -534,23 +536,23 @@ Definition transf_instr (n: numbering) (instr: instruction) :=
             let (op', args') := reduce _ combine_op n1 op args vl in
             Iop op' args' res s
         end
-  | Iload chunk addr args dst s =>
+  | Iload TRAP chunk addr args dst s =>
       let (n1, vl) := valnum_regs n args in
       match find_rhs n1 (Load chunk addr vl) with
       | Some r =>
           Iop Omove (r :: nil) dst s
       | None =>
           let (addr', args') := reduce _ combine_addr n1 addr args vl in
-          Iload chunk addr' args' dst s
+          Iload TRAP chunk addr' args' dst s
       end
   | Istore chunk addr args src s =>
       let (n1, vl) := valnum_regs n args in
       let (addr', args') := reduce _ combine_addr n1 addr args vl in
       Istore chunk addr' args' src s
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 i =>
       let (n1, vl) := valnum_regs n args in
       let (cond', args') := reduce _ combine_cond n1 cond args vl in
-      Icond cond' args' s1 s2
+      Icond cond' args' s1 s2 i
   | _ =>
       instr
   end.
diff --git a/backend/CSE2.v b/backend/CSE2.v
new file mode 100644
index 00000000..900a7517
--- /dev/null
+++ b/backend/CSE2.v
@@ -0,0 +1,518 @@
+(*
+Replace available expressions by the register containing their value.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps CSE2deps.
+
+(* Static analysis *)
+
+Inductive sym_val : Type :=
+| SMove (src : reg)
+| SOp (op : operation) (args : list reg)
+| SLoad (chunk : memory_chunk) (addr : addressing) (args : list reg).
+                                                   
+Definition eq_args (x y : list reg) : { x = y } + { x <> y } :=
+  list_eq_dec peq x y.
+
+Definition eq_sym_val : forall x y : sym_val,
+    {x = y} + { x <> y }.
+Proof.
+  generalize eq_operation.
+  generalize eq_args.
+  generalize peq.
+  generalize eq_addressing.
+  generalize chunk_eq.
+  decide equality.
+Defined.
+
+Module RELATION.
+  
+Definition t := (PTree.t sym_val).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty sym_val.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition sym_val_beq (x y : sym_val) :=
+  if eq_sym_val x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq sym_val_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct sym_val_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold sym_val_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (eq_sym_val R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if eq_sym_val v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (eq_sym_val _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (eq_sym_val _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
+
+  Parameter t: Type.
+  Parameter eq: t -> t -> Prop.
+  Axiom eq_refl: forall x, eq x x.
+  Axiom eq_sym: forall x y, eq x y -> eq y x.
+  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Parameter beq: t -> t -> bool.
+  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
+  Parameter ge: t -> t -> Prop.
+  Axiom ge_refl: forall x y, eq x y -> ge x y.
+  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Parameter lub: t -> t -> t.
+  Axiom ge_lub_left: forall x y, ge (lub x y) x.
+  Axiom ge_lub_right: forall x y, ge (lub x y) y.
+
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill_sym_val (dst : reg) (sv : sym_val) :=
+  match sv with
+  | SMove src => if peq dst src then true else false
+  | SOp op args => List.existsb (peq dst) args
+  | SLoad chunk addr args => List.existsb (peq dst) args
+  end.
+                                                 
+Definition kill_reg (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => negb (kill_sym_val dst x))
+                (PTree.remove dst rel).
+  
+Definition kill_sym_val_mem (sv: sym_val) :=
+  match sv with
+  | SMove _ => false
+  | SOp op _ => op_depends_on_memory op
+  | SLoad _ _ _ => true
+  end.
+
+Definition kill_sym_val_store chunk addr args (sv: sym_val) :=
+  match sv with
+  | SMove _ => false
+  | SOp op _ => op_depends_on_memory op
+  | SLoad chunk' addr' args' => may_overlap chunk addr args chunk' addr' args'
+  end.
+
+Definition kill_mem (rel : RELATION.t) :=
+  PTree.filter1 (fun x => negb (kill_sym_val_mem x)) rel.
+
+Definition forward_move (rel : RELATION.t) (x : reg) : reg :=
+  match rel ! x with
+  | Some (SMove org) => org
+  | _ => x
+  end.
+
+Definition kill_store1 chunk addr args rel :=
+  PTree.filter1 (fun x => negb (kill_sym_val_store chunk addr args x)) rel.
+
+Definition kill_store chunk addr args rel :=
+  kill_store1 chunk addr (List.map (forward_move rel) args) rel.
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (SMove (forward_move rel src)) (kill_reg dst rel).
+
+Definition find_op_fold op args (already : option reg) x sv :=
+                match already with
+                | Some found => already
+                | None =>
+                  match sv with
+                  | (SOp op' args') =>
+                    if (eq_operation op op') && (eq_args args args')
+                    then Some x
+                    else None
+                  | _ => None
+                  end
+                end.
+
+Definition find_op (rel : RELATION.t) (op : operation) (args : list reg) :=
+  PTree.fold (find_op_fold op args) rel None.
+
+Definition find_load_fold chunk addr args (already : option reg) x sv :=
+                match already with
+                | Some found => already
+                | None =>
+                  match sv with
+                  | (SLoad chunk' addr' args') =>
+                    if (chunk_eq chunk chunk') &&
+                       (eq_addressing addr addr') &&
+                       (eq_args args args')
+                    then Some x
+                    else None
+                  | _ => None
+                  end
+                end.
+
+Definition find_load (rel : RELATION.t) (chunk : memory_chunk) (addr : addressing) (args : list reg) :=
+  PTree.fold (find_load_fold chunk addr args) rel None.
+
+Definition oper2 (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  let rel' := kill_reg dst rel in
+  PTree.set dst (SOp op (List.map (forward_move rel') args)) rel'.
+
+Definition oper1 (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  if List.in_dec peq dst args
+  then kill_reg dst rel
+  else oper2 op dst args rel.
+
+Definition oper (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  match find_op rel op (List.map (forward_move rel) args) with
+  | Some r => move r dst rel
+  | None => oper1 op dst args rel
+  end.
+
+Definition gen_oper (op: operation) (dst : reg) (args : list reg)
+           (rel : RELATION.t) :=
+  match op, args with
+  | Omove, src::nil => move src dst rel
+  | _, _ => oper op dst args rel
+  end.
+
+Definition load2 (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  let rel' := kill_reg dst rel in
+  PTree.set dst (SLoad chunk addr (List.map (forward_move rel') args)) rel'.
+
+Definition load1 (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  if List.in_dec peq dst args
+  then kill_reg dst rel
+  else load2 chunk addr dst args rel.
+
+Definition load (chunk: memory_chunk) (addr : addressing)
+           (dst : reg) (args : list reg) (rel : RELATION.t) :=
+  match find_load rel chunk addr (List.map (forward_move rel) args) with
+  | Some r => move r dst rel
+  | None => load1 chunk addr dst args rel
+  end.
+
+(* NO LONGER NEEDED
+Fixpoint list_represents { X : Type } (l : list (positive*X)) (tr : PTree.t X) : Prop :=
+  match l with
+  | nil => True
+  | (r,sv)::tail => (tr ! r) = Some sv /\ list_represents tail tr
+  end.
+
+Lemma elements_represent :
+  forall { X : Type },
+  forall tr : (PTree.t X),
+    (list_represents (PTree.elements tr) tr).
+Proof.
+  intros.
+  generalize (PTree.elements_complete tr).
+  generalize (PTree.elements tr).
+  induction l; simpl; trivial.
+  intro COMPLETE.
+  destruct a as [ r sv ].
+  split.
+  {
+    apply COMPLETE.
+    left; reflexivity.
+  }
+  apply IHl; auto.
+Qed.
+*)
+    
+Definition apply_instr instr (rel : RELATION.t) : RB.t :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _ => Some rel
+  | Istore chunk addr args _ _ => Some (kill_store chunk addr args rel)
+  | Iop op args dst _ => Some (gen_oper op dst args rel)
+  | Iload trap chunk addr args dst _ => Some (load chunk addr dst args rel)
+  | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
+  | Ibuiltin _ _ res _ => Some (RELATION.top) (* TODO (kill_builtin_res res x) *)
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition forward_move_b (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => forward_move rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => forward_move_b (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition find_op_in_fmap fmap pc op args :=
+  match fmap with
+  | None => None
+  | Some map =>
+    match PMap.get pc map with
+    | Some rel => find_op rel op args
+    | None => None
+    end
+  end.
+
+Definition find_load_in_fmap fmap pc chunk addr args :=
+  match fmap with
+  | None => None
+  | Some map =>
+    match PMap.get pc map with
+    | Some rel => find_load rel chunk addr args
+    | None => None
+    end
+  end.
+
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    let args' := subst_args fmap pc args in
+    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    | None => Iop op args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Iload trap chunk addr args dst s =>
+    let args' := subst_args fmap pc args in
+    match find_load_in_fmap fmap pc chunk addr args' with
+    | None => Iload trap chunk addr args' dst s
+    | Some src => Iop Omove (src::nil) dst s
+    end
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) src s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 i =>
+    Icond cond (subst_args fmap pc args) s1 s2 i
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v
new file mode 100644
index 00000000..309ccce1
--- /dev/null
+++ b/backend/CSE2proof.v
@@ -0,0 +1,1715 @@
+(*
+Replace available expressions by the register containing their value.
+
+Proofs.
+
+David Monniaux, CNRS, VERIMAG
+ *)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps CSE2depsproof.
+Require Import Lia.
+
+Lemma args_unaffected:
+  forall rs : regset,
+  forall dst : reg,
+  forall v,
+  forall args : list reg,
+    existsb (fun y : reg => peq dst y) args = false ->
+    (rs # dst <- v ## args) = (rs ## args).
+Proof.
+  induction args; simpl; trivial.
+  destruct (peq dst a) as [EQ | NEQ]; simpl.
+  { discriminate.
+  }
+  intro EXIST.
+  f_equal.
+  {
+    apply Regmap.gso.
+    congruence.
+  }
+  apply IHargs.
+  assumption.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section SAME_MEMORY.
+  Variable m : mem.
+
+Definition sem_sym_val sym rs (v : option val) : Prop :=
+  match sym with
+  | SMove src => v = Some (rs # src)
+  | SOp op args =>
+    v = (eval_operation genv sp op (rs ## args) m)
+  | SLoad chunk addr args =>
+    match eval_addressing genv sp addr rs##args with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => v = Some dat
+                | None => v = None \/ v = Some Vundef
+                end
+    | None   => v = None \/ v = Some Vundef
+    end
+  end.
+    
+Definition sem_reg (rel : RELATION.t) (x : reg) (rs : regset) (v : val) : Prop :=
+  match rel ! x with
+  | None => True
+  | Some sym => sem_sym_val sym rs (Some (rs # x)) 
+  end.
+
+Definition sem_rel (rel : RELATION.t) (rs : regset) :=
+  forall x : reg, (sem_reg rel x rs (rs # x)).
+
+Definition sem_rel_b (relb : RB.t) (rs : regset) :=
+  match relb with
+  | Some rel => sem_rel rel rs
+  | None => False
+  end.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => sem_rel_b (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold sem_rel_b, sem_rel, sem_reg in *.
+  destruct (map # pc).
+  2: contradiction.
+  pose proof (SEM arg) as SEMarg.
+  simpl. unfold forward_move.
+  unfold sem_sym_val in *.
+  destruct (t ! arg); trivial.
+  destruct s; congruence.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_reg_sound :
+  forall rel : RELATION.t,
+  forall dst : reg,
+  forall rs,
+  forall v,
+    sem_rel rel rs ->
+    sem_rel (kill_reg dst rel) (rs # dst <- v).
+Proof.
+  unfold sem_rel, kill_reg, sem_reg, sem_sym_val.
+  intros until v.
+  intros REL x.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec dst x).
+  {
+    subst x.
+    rewrite PTree.grs.
+    trivial.
+  }
+  rewrite PTree.gro by congruence.
+  rewrite Regmap.gso by congruence.
+  destruct (rel ! x) as [relx | ] eqn:RELx; trivial.
+  unfold kill_sym_val.
+  pose proof (REL x) as RELinstx.
+  rewrite RELx in RELinstx.
+  destruct relx eqn:SYMVAL.
+  {
+    destruct (peq dst src); simpl.
+    { reflexivity. }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  { destruct existsb eqn:EXISTS; simpl.
+    { reflexivity. }
+    rewrite args_unaffected by exact EXISTS.
+    assumption.
+  }
+  { destruct existsb eqn:EXISTS; simpl.
+    { reflexivity. }
+    rewrite args_unaffected by exact EXISTS.
+    assumption.
+  }
+Qed.
+
+Lemma write_same:
+  forall rs : regset,
+  forall src dst : reg,
+    (rs # dst <- (rs # src)) # src = rs # src.
+Proof.
+  intros.
+  destruct (peq src dst).
+  {
+    subst dst.
+    apply Regmap.gss.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+
+Lemma move_sound :
+  forall rel : RELATION.t,
+  forall src dst : reg,
+  forall rs,
+    sem_rel rel rs ->
+    sem_rel (move src dst rel) (rs # dst <- (rs # src)).
+Proof.
+  intros until rs. intros REL x.
+  pose proof (kill_reg_sound rel dst rs (rs # src) REL x) as KILL.
+  pose proof (REL src) as RELsrc.
+  unfold move.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    unfold sem_reg in *.
+    simpl.
+    unfold forward_move.
+    destruct (rel ! src) as [ sv |]; simpl.
+    destruct sv eqn:SV; simpl in *.
+    {
+      destruct (peq dst src0).
+      {
+        subst src0.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    all: f_equal; symmetry; apply write_same.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma move_cases_neq:
+  forall dst rel a,
+    a <> dst ->
+    (forward_move (kill_reg dst rel) a) <> dst.
+Proof.
+  intros until a. intro NEQ.
+  unfold kill_reg, forward_move.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! a); simpl.
+  2: congruence.
+  destruct s.
+  {
+    unfold kill_sym_val.
+    destruct peq; simpl; congruence.
+  }
+  all: simpl;
+    destruct negb; simpl; congruence.
+Qed.
+
+Lemma args_replace_dst :
+  forall rel,
+  forall args : list reg,
+  forall dst : reg,
+  forall rs : regset,
+  forall v,
+    (sem_rel rel rs) ->
+    not (In dst args) ->
+    (rs # dst <- v)
+    ## (map
+          (forward_move (kill_reg dst rel)) args) = rs ## args.
+Proof.
+  induction args; simpl.
+  1: reflexivity.
+  intros until v.
+  intros REL NOT_IN.
+  rewrite IHargs by auto.
+  f_equal.
+  pose proof (REL a) as RELa.
+  rewrite Regmap.gso by (apply move_cases_neq; auto).
+  unfold kill_reg.
+  unfold sem_reg in RELa.
+  unfold forward_move.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by auto.
+  destruct (rel ! a); simpl; trivial.
+  destruct s; simpl in *; destruct negb; simpl; congruence.
+Qed.
+
+Lemma oper2_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper2 op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL NOT_IN EVAL x.
+  pose proof (kill_reg_sound rel dst rs v REL x) as KILL.
+  unfold oper2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    symmetry.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma oper1_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper1 op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold oper1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply oper2_sound; auto.
+Qed.
+
+Lemma find_op_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_op rel op args = Some src ->
+    (eval_operation genv sp op (rs ## args) m) = Some (rs # src).
+Proof.
+  intros until rs.
+  unfold find_op.
+  rewrite PTree.fold_spec.
+  intro REL.
+  assert (
+     forall start,
+             match start with
+             | None => True
+             | Some src => eval_operation genv sp op rs ## args m = Some rs # src
+             end -> fold_left
+    (fun (a : option reg) (p : positive * sym_val) =>
+     find_op_fold op args a (fst p) (snd p)) (PTree.elements rel) start =
+                    Some src ->
+             eval_operation genv sp op rs ## args m = Some rs # src) as REC.
+  {
+    unfold sem_rel, sem_reg in REL.
+    generalize (PTree.elements_complete rel).
+    generalize (PTree.elements rel).
+    induction l; simpl.
+    {
+      intros.
+      subst start.
+      assumption.
+    }
+    destruct a as [r sv]; simpl.
+    intros COMPLETE start GEN.
+    apply IHl.
+    {
+      intros.
+      apply COMPLETE.
+      right.
+      assumption.
+    }
+    unfold find_op_fold.
+    destruct start.
+    assumption.
+    destruct sv; trivial.
+    destruct eq_operation; trivial.
+    subst op0.
+    destruct eq_args; trivial.
+    subst args0.
+    simpl.
+    assert ((rel ! r) = Some (SOp op args)) as RELatr.
+    {
+      apply COMPLETE.
+      left.
+      reflexivity.
+    }
+    pose proof (REL r) as RELr.
+    rewrite RELatr in RELr.
+    simpl in RELr.
+    symmetry.
+    assumption.
+  }
+  apply REC; auto.
+Qed.
+
+
+Lemma find_load_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    match eval_addressing genv sp addr rs##args with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end.
+Proof.
+  intros until rs.
+  unfold find_load.
+  rewrite PTree.fold_spec.
+  intro REL.
+  assert (
+     forall start,
+             match start with
+             | None => True
+             | Some src =>
+               match eval_addressing genv sp addr rs##args with
+               | Some a => match Mem.loadv chunk m a with
+                           | Some dat => rs#src = dat
+                           | None => rs#src = Vundef
+                           end
+               | None   => rs#src = Vundef
+               end
+             end ->
+    fold_left
+    (fun (a : option reg) (p : positive * sym_val) =>
+     find_load_fold chunk addr args a (fst p) (snd p)) (PTree.elements rel) start =
+    Some src ->
+    match eval_addressing genv sp addr rs##args with
+               | Some a => match Mem.loadv chunk m a with
+                           | Some dat => rs#src = dat
+                           | None => rs#src = Vundef
+                           end
+               | None   => rs#src = Vundef
+               end) as REC.
+  
+  {
+    unfold sem_rel, sem_reg in REL.
+    generalize (PTree.elements_complete rel).
+    generalize (PTree.elements rel).
+    induction l; simpl.
+    {
+      intros.
+      subst start.
+      assumption.
+    }
+    destruct a as [r sv]; simpl.
+    intros COMPLETE start GEN.
+    apply IHl.
+    {
+      intros.
+      apply COMPLETE.
+      right.
+      assumption.
+    }
+    unfold find_load_fold.
+    destruct start.
+    assumption.
+    destruct sv; trivial.
+    destruct chunk_eq; trivial.
+    subst chunk0.
+    destruct eq_addressing; trivial.
+    subst addr0.
+    destruct eq_args; trivial.
+    subst args0.
+    simpl.
+    assert ((rel ! r) = Some (SLoad chunk addr args)) as RELatr.
+    {
+      apply COMPLETE.
+      left.
+      reflexivity.
+    }
+    pose proof (REL r) as RELr.
+    rewrite RELatr in RELr.
+    simpl in RELr.
+    destruct eval_addressing.
+    { destruct Mem.loadv.
+      congruence.
+      destruct RELr; congruence.
+    }
+    destruct RELr; congruence.
+  }
+  apply REC; auto.
+Qed.
+
+
+Lemma find_load_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    v = rs # src.
+Proof.
+  intros until v. intros REL FINDLOAD ADDR LOAD.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  destruct eval_addressing in *.
+  {
+    replace a with v0 in * by congruence.
+    destruct Mem.loadv in * ; congruence.
+  }
+  discriminate.
+Qed.
+
+Lemma find_load_notrap1_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = None ->
+    rs # src = Vundef.
+Proof.
+  intros until rs. intros REL FINDLOAD ADDR.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  rewrite ADDR in Z.
+  assumption.
+Qed.
+
+Lemma find_load_notrap2_sound' :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall src : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    find_load rel chunk addr args = Some src ->
+    eval_addressing genv sp addr rs##args = Some a ->
+    Mem.loadv chunk m a = None ->
+    rs # src = Vundef.
+Proof.
+  intros until a. intros REL FINDLOAD ADDR LOAD.
+  pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
+  rewrite ADDR in Z.
+  destruct Mem.loadv.
+  discriminate.
+  assumption.
+Qed.
+
+Lemma forward_move_map:
+  forall rel args rs,
+    sem_rel rel rs ->
+    rs ## (map (forward_move rel) args) = rs ## args.
+Proof.
+  induction args; simpl; trivial.
+  intros rs REL.
+  f_equal.
+  2: (apply IHargs; assumption).
+  unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+  pose proof (REL a) as RELa.
+  destruct (rel ! a); trivial.
+  destruct s; congruence.
+Qed.
+
+
+Lemma forward_move_rs:
+  forall rel arg rs,
+    sem_rel rel rs ->
+    rs # (forward_move rel arg) = rs # arg.
+Proof.
+  unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+  intros until rs.
+  intro REL.
+  pose proof (REL arg) as RELarg.
+  destruct (rel ! arg); trivial.
+  destruct s; congruence.
+Qed.
+
+Lemma oper_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (oper op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold oper.
+  destruct find_op eqn:FIND.
+  {
+    assert (eval_operation genv sp op rs ## (map (forward_move rel) args) m = Some rs # r) as FIND_OP.
+    {
+      apply (find_op_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_OP by assumption.
+    replace v with (rs # r) by congruence.
+    apply move_sound; auto.
+  }
+  apply oper1_sound; trivial.
+Qed.
+
+Lemma gen_oper_sound :
+  forall rel : RELATION.t,
+  forall op : operation,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall v,
+    sem_rel rel rs ->
+    eval_operation genv sp op (rs ## args) m = Some v ->
+    sem_rel (gen_oper op dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL EVAL.
+  unfold gen_oper.
+  destruct op.
+  { destruct args as [ | h0 t0].
+    apply oper_sound; auto.
+    destruct t0.
+    {
+      simpl in *.
+      replace v with (rs # h0) by congruence.
+      apply move_sound; auto.
+    }
+    apply oper_sound; auto.
+  }
+  all: apply oper_sound; auto.
+Qed.
+
+
+Lemma load2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL NOT_IN ADDR LOAD x.
+  pose proof (kill_reg_sound rel dst rs v REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    destruct eval_addressing.
+    {
+      replace a with v0 in * by congruence.
+      destruct Mem.loadv; congruence.
+    }
+    discriminate.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load2_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL NOT_IN ADDR x.
+  pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    rewrite ADDR.
+    right.
+    trivial.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load2_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    not (In dst args) ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL NOT_IN ADDR LOAD x.
+  pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
+  unfold load2.
+  destruct (peq x dst).
+  {
+    subst x.
+    unfold sem_reg.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    simpl.
+    rewrite args_replace_dst by auto.
+    rewrite ADDR.
+    rewrite LOAD.
+    right; trivial.
+  }
+  rewrite Regmap.gso by congruence.
+  unfold sem_reg.
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso in KILL by congruence.
+  exact KILL.
+Qed.
+
+Lemma load1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_sound with (a := a); auto.
+Qed.
+
+Lemma load1_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_notrap1_sound; auto.
+Qed.
+
+Lemma load1_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL ADDR LOAD.
+  unfold load1.
+  destruct in_dec.
+  {
+    apply kill_reg_sound; auto. 
+  }
+  apply load2_notrap2_sound with (a := a); auto.
+Qed.
+
+Lemma load_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+  forall v,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = Some v ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- v).
+Proof.
+  intros until v.
+  intros REL ADDR LOAD.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    destruct eval_addressing in *.
+    2: discriminate.
+    replace v0 with a in * by congruence.
+    destruct Mem.loadv in *.
+    2: discriminate.
+    replace v with (rs # src) by congruence.
+    apply move_sound; auto.
+  }
+  apply load1_sound with (a := a); trivial.
+Qed.
+
+Lemma load_notrap1_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = None ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until rs.
+  intros REL ADDR.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    destruct eval_addressing in *.
+    discriminate.
+    rewrite <- FIND_LOAD.
+    apply move_sound; auto.
+  }
+  apply load1_notrap1_sound; trivial.
+Qed.
+
+Lemma load_notrap2_sound :
+  forall rel : RELATION.t,
+  forall chunk : memory_chunk,
+  forall addr : addressing,
+  forall dst : reg,
+  forall args: list reg,
+  forall rs : regset,
+  forall a,
+    sem_rel rel rs ->
+    eval_addressing genv sp addr (rs ## args) = Some a ->
+    Mem.loadv chunk m a = None ->
+    sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
+Proof.
+  intros until a.
+  intros REL ADDR.
+  unfold load.
+  destruct find_load as [src | ] eqn:FIND.
+  {
+    assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
+    | Some a => match Mem.loadv chunk m a with
+                | Some dat => rs#src = dat
+                | None => rs#src = Vundef
+                end
+    | None   => rs#src = Vundef
+    end) as FIND_LOAD.
+    {
+      apply (find_load_sound rel); trivial.
+    }
+    rewrite forward_move_map in FIND_LOAD by assumption.
+    rewrite ADDR in FIND_LOAD.
+    destruct Mem.loadv; intro.
+    discriminate.
+    rewrite <- FIND_LOAD.
+    apply move_sound; auto.
+  }
+  apply load1_notrap2_sound; trivial.
+Qed.
+
+Lemma kill_reg_weaken:
+  forall res mpc rs,
+    sem_rel mpc rs ->
+    sem_rel (kill_reg res mpc) rs.
+Proof.
+  intros until rs.
+  intros REL x.
+  pose proof (REL x) as RELx.
+  unfold kill_reg, sem_reg in *.
+  rewrite PTree.gfilter1.
+  destruct (peq res x).
+  { subst x.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence.
+  destruct (mpc ! x) as [sv | ]; trivial.
+  destruct negb; trivial.
+Qed.
+
+Lemma top_ok:
+  forall rs, sem_rel RELATION.top rs.
+Proof.
+  unfold sem_rel, sem_reg, RELATION.top.
+  intros.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma sem_rel_ge:
+  forall r1 r2 : RELATION.t,
+    (RELATION.ge r1 r2) ->
+    forall rs : regset,
+      (sem_rel r2 rs) -> (sem_rel r1 rs).
+Proof.
+  intros r1 r2 GE rs RE x.
+  pose proof (RE x) as REx.
+  pose proof (GE x) as GEx.
+  unfold sem_reg in *.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+End SAME_MEMORY.
+
+Lemma kill_mem_sound :
+  forall m m' : mem,
+  forall rel : RELATION.t,
+  forall rs,
+    sem_rel m rel rs -> sem_rel m' (kill_mem rel) rs.
+Proof.
+  unfold sem_rel, sem_reg.
+  intros until rs.
+  intros SEM x.
+  pose proof (SEM x) as SEMx.
+  unfold kill_mem.
+  rewrite PTree.gfilter1.
+  unfold kill_sym_val_mem.
+  destruct (rel ! x) as [ sv | ].
+  2: reflexivity.
+  destruct sv; simpl in *; trivial.
+  {
+    destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+    rewrite SEMx.
+    apply op_depends_on_memory_correct; auto.
+  }
+Qed.
+  
+Lemma kill_store_sound :
+  forall m m' : mem,
+  forall rel : RELATION.t,
+  forall chunk addr args a v rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (Mem.storev chunk m a v) = Some m' ->
+    sem_rel m rel rs -> sem_rel m' (kill_store chunk addr args rel) rs.
+Proof.
+  unfold sem_rel, sem_reg.
+  intros until rs.
+  intros ADDR STORE SEM x.
+  pose proof (SEM x) as SEMx.
+  unfold kill_store, kill_store1.
+  rewrite PTree.gfilter1.
+  destruct (rel ! x) as [ sv | ].
+  2: reflexivity.
+  destruct sv; simpl in *; trivial.
+  {
+    destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+    rewrite SEMx.
+    apply op_depends_on_memory_correct; auto.
+  }
+  destruct may_overlap eqn:OVERLAP; simpl; trivial.
+  destruct (eval_addressing genv sp addr0 rs ## args0) eqn:ADDR0.
+  {
+    erewrite may_overlap_sound with (args := (map (forward_move rel) args)).
+    all: try eassumption.
+    
+    erewrite forward_move_map by eassumption.
+    assumption.
+  }
+  intuition congruence.
+Qed. 
+End SOUNDNESS.
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun cu f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. apply match_transform_program; auto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall (f : function) (pc : node) (i : instruction),
+  (fn_code f)!pc = Some i ->
+  (fn_code (transf_function f))!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill_reg res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Definition sem_rel_b' := sem_rel_b fundef unit ge.
+Definition fmap_sem' := fmap_sem fundef unit ge.
+Definition subst_arg_ok' := subst_arg_ok fundef unit ge.
+Definition subst_args_ok' := subst_args_ok fundef unit ge.
+Definition kill_mem_sound' := kill_mem_sound fundef unit ge.
+Definition kill_store_sound' := kill_store_sound fundef unit ge.
+
+Lemma sem_rel_b_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall sp m,
+    forall rs : regset,
+      (sem_rel_b' sp m rb2 rs) -> (sem_rel_b' sp m rb1 rs).
+Proof.
+  unfold sem_rel_b', sem_rel_b.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ]; simpl;
+      intros GE sp m rs RE; try contradiction.
+  apply sem_rel_ge with (r2 := r2); assumption.
+Qed.
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (forall m : mem,
+     forall vres, (fmap_sem' sp m (forward_map f) pc rs # res <- vres)) ->
+    match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem' sp m (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  unfold transf_instr in *.
+  destruct (if is_trivial_op op then None else find_op_in_fmap (forward_map f) pc op
+               (subst_args (forward_map f) pc args)) eqn:FIND_OP.
+  {
+    destruct (is_trivial_op op).
+    discriminate.
+    unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        rewrite MAP in H0.
+        rewrite find_op_sound with (rel := mpc) (src := r) in H0 by assumption.
+        assumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (gen_oper op res args mpc)).
+    {
+      replace (Some (gen_oper op res args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply gen_oper_sound; auto.
+  }
+  {
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      rewrite (subst_args_ok' sp m) by assumption.
+      rewrite <- H0.
+      apply eval_operation_preserved. exact symbols_preserved.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: constructor.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: contradiction.
+
+    apply sem_rel_b_ge with (rb2 := Some (gen_oper op res args mpc)).
+    {
+      replace (Some (gen_oper op res args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply gen_oper_sound; auto.
+  }
+    
+(* load *)
+- unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := v); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        symmetry.
+        rewrite MAP in H0.
+        eapply find_load_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_sound with (a := a); auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_sound with (a := a); assumption.
+  }
+  
+- (* load notrap1 *)
+  unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        rewrite MAP in H0.
+        eapply find_load_notrap1_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_notrap1_sound; auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_notrap1_sound; assumption.
+  }
+  
+(* load notrap2 *)
+- unfold transf_instr in *.
+  destruct find_load_in_fmap eqn:FIND_LOAD.
+  {
+    unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
+    destruct (forward_map f) as [map |] eqn:MAP.
+    2: discriminate.
+    change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. 
+    destruct (map # pc) as [mpc | ] eqn:MPC.
+    2: discriminate.
+    econstructor; split.
+    {
+      eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+      simpl.
+      rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
+      {
+        f_equal.
+        rewrite MAP in H0.
+        eapply find_load_notrap2_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+        all: try eassumption.
+      }
+      unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
+    }
+    constructor; eauto.
+    unfold fmap_sem', fmap_sem in *.
+    rewrite MAP.
+    apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+    {
+      replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      simpl.
+      reflexivity.
+    }
+    unfold sem_rel_b', sem_rel_b.
+    apply load_notrap2_sound with (a := a); auto.
+  }
+  {  
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite (subst_args_ok' sp m); assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+  {
+    replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    simpl.
+    reflexivity.
+  }
+  apply load_notrap2_sound with (a := a); assumption.
+  }
+  
+- (* store *)
+  econstructor. split.
+  {
+    assert (eval_addressing tge sp addr rs ## args = Some a).
+    rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+    eapply exec_Istore; eauto.
+    rewrite (subst_args_ok' sp m); assumption.
+  }
+  
+  constructor; auto.
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply sem_rel_b_ge with (rb2 := Some (kill_store chunk addr args mpc)); trivial.
+  {
+  replace (Some (kill_store chunk addr args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  rewrite MPC.
+  rewrite H.
+  reflexivity.
+  }
+  eapply (kill_store_sound' sp m); eassumption.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite (subst_args_ok' sp m) by assumption.
+  constructor. constructor; auto.
+
+  constructor.
+  {
+    intros m' vres.
+    unfold fmap_sem', fmap_sem in *.
+    destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+    destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+    apply sem_rel_b_ge with (rb2 := Some (kill_reg res (kill_mem mpc))).
+    {
+      replace (Some (kill_reg res (kill_mem mpc))) with (apply_instr' (fn_code f) pc (map # pc)).
+      {
+        eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+        2: apply apply_instr'_bot.
+        simpl. tauto.
+      }
+      unfold apply_instr'.
+      rewrite H.
+      rewrite MPC.
+      reflexivity.
+    }
+    apply kill_reg_sound.
+    apply (kill_mem_sound' sp m).
+    assumption.
+  }
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite (subst_args_ok' (Vptr stk Ptrofs.zero) m) by assumption.
+  constructor. auto.
+
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply sem_rel_b_ge with (rb2 := Some RELATION.top).
+  {
+    replace (Some RELATION.top) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply top_ok.
+  
+
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite (subst_args_ok' sp m); eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite (subst_arg_ok' sp m); eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold sem_rel_b in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite (subst_arg_ok' (Vptr stk Ptrofs.zero) m) by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem', fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply sem_rel_b_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v
index 9b1243c8..34ec0118 100644
--- a/backend/CSEdomain.v
+++ b/backend/CSEdomain.v
@@ -43,7 +43,7 @@ Definition eq_list_valnum: forall (x y: list valnum), {x=y}+{x<>y} := list_eq_de
 
 Definition eq_rhs (x y: rhs) : {x=y}+{x<>y}.
 Proof.
-  generalize chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
+  generalize trapping_mode_eq chunk_eq eq_operation eq_addressing eq_valnum eq_list_valnum.
   decide equality.
 Defined.
 
@@ -109,7 +109,16 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem):
   | load_eval_to: forall chunk addr vl a v,
       eval_addressing ge sp addr (map valu vl) = Some a ->
       Mem.loadv chunk m a = Some v ->
-      rhs_eval_to valu ge sp m (Load chunk addr vl) v.
+      rhs_eval_to valu ge sp m (Load chunk addr vl) v
+(*  | load_notrap1_eval_to: forall chunk addr vl,
+      eval_addressing ge sp addr (map valu vl) = None ->
+      rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
+                  (default_notrap_load_value chunk)
+  | load_notrap2_eval_to: forall chunk addr vl a,
+      eval_addressing ge sp addr (map valu vl) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
+                  (default_notrap_load_value chunk) *).
 
 Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem):
                                                       equation -> Prop :=
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 03c7ecfc..5bbb7508 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -71,7 +71,11 @@ Lemma rhs_eval_to_exten:
 Proof.
   intros. inv H; simpl in *.
 - constructor. rewrite valnums_val_exten by assumption. auto.
-- econstructor; eauto. rewrite valnums_val_exten by assumption. auto.
+- eapply load_eval_to; eauto. rewrite valnums_val_exten by assumption. auto.
+(*
+- apply load_notrap1_eval_to; auto. rewrite valnums_val_exten by assumption. assumption.
+- eapply load_notrap2_eval_to; eauto. rewrite valnums_val_exten by assumption. assumption.
+*)
 Qed.
 
 Lemma equation_holds_exten:
@@ -393,6 +397,39 @@ Proof.
 + intros. apply Regmap.gso; auto.
 Qed.
 
+(*
+Lemma add_load_holds_none1:
+  forall valu1 ge sp rs m n addr (args: list reg) chunk dst,
+  numbering_holds valu1 ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = None ->
+  exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst chunk addr args).
+Proof.
+  unfold add_load; intros.
+  destruct (valnum_regs n args) as [n1 vl] eqn:VN.
+  exploit valnum_regs_holds; eauto.
+  intros (valu2 & A & B & C & D & E).
+  eapply add_rhs_holds; eauto.
++ rewrite Regmap.gss; auto. eapply load_notrap1_eval_to. rewrite <- B; eauto.
++ intros. apply Regmap.gso; auto.
+Qed.
+
+Lemma add_load_holds_none2:
+  forall valu1 ge sp rs m n addr (args: list reg) a chunk dst,
+  numbering_holds valu1 ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst NOTRAP chunk addr args).
+Proof.
+  unfold add_load; intros.
+  destruct (valnum_regs n args) as [n1 vl] eqn:VN.
+  exploit valnum_regs_holds; eauto.
+  intros (valu2 & A & B & C & D & E).
+  eapply add_rhs_holds; eauto.
++ rewrite Regmap.gss; auto. eapply load_notrap2_eval_to. rewrite <- B; eauto. assumption. 
++ intros. apply Regmap.gso; auto.
+Qed.
+ *)
+
 Lemma set_unknown_holds:
   forall valu ge sp rs m n r v,
   numbering_holds valu ge sp rs m n ->
@@ -456,8 +493,8 @@ Lemma kill_all_loads_hold:
 Proof.
   intros. eapply kill_equations_hold; eauto.
   unfold filter_loads; intros. inv H1.
-  constructor. rewrite <- H2. apply op_depends_on_memory_correct; auto.
-  discriminate.
+  1: constructor; rewrite <- H2; apply op_depends_on_memory_correct; auto.
+  all: discriminate.
 Qed.
 
 Lemma kill_loads_after_store_holds:
@@ -486,6 +523,20 @@ Proof.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
   erewrite <- regs_valnums_sound by eauto. eauto with va.
   apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+(*
+- eapply load_notrap1_eval_to; assumption.
+- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
+  eapply load_notrap2_eval_to; eauto.
+  rewrite <- H9.
+  destruct a; simpl in H1; try discriminate.
+  destruct a0; simpl in H9; try discriminate; simpl; trivial.
+  rewrite negb_false_iff in H6. unfold aaddressing in H6.
+  eapply Mem.load_store_other. eauto.
+  eapply pdisjoint_sound; eauto.
+  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
+  erewrite <- regs_valnums_sound by eauto. eauto with va.
+  apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto with va.
+*)
 Qed.
 
 Lemma store_normalized_range_sound:
@@ -562,6 +613,19 @@ Proof.
   unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
   erewrite <- regs_valnums_sound by eauto. eauto with va.
   auto.
+(*
+- eapply load_notrap1_eval_to; assumption.
+- destruct (regs_valnums n vl) as [rl|] eqn:RV; try discriminate.
+  eapply load_notrap2_eval_to; eauto. rewrite <- H11.
+  destruct a; simpl in H10; try discriminate; simpl; trivial.
+  rewrite negb_false_iff in H8.
+  eapply Mem.load_storebytes_other. eauto.
+  rewrite H6. rewrite Z2Nat.id by omega.
+  eapply pdisjoint_sound. eauto.
+  unfold aaddressing. apply match_aptr_of_aval. eapply eval_static_addressing_sound; eauto.
+  erewrite <- regs_valnums_sound by eauto. eauto with va.
+  auto.
+*)
 Qed.
 
 Lemma load_memcpy:
@@ -1034,6 +1098,10 @@ Proof.
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
   exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+  destruct trap.
+
+  (* TRAP *)
+  {
   destruct (find_rhs n1 (Load chunk addr vl)) as [r|] eqn:?.
 + (* replaced by move *)
   exploit find_rhs_sound; eauto. intros (v' & EV & LD).
@@ -1063,7 +1131,103 @@ Proof.
   unfold transfer; rewrite H.
   eapply add_load_holds; eauto.
   apply set_reg_lessdef; auto.
+  }
+
+  (* NOTRAP *)
+  {
+    assert (exists a' : val,
+       eval_addressing ge sp addr rs' ## args = Some a' /\ Val.lessdef a a')
+       as Haa'.    
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Haa' as [a' [Ha'1 Ha'2]].
+
+    assert (
+    exists v' : val,
+        Mem.loadv chunk m' a' = Some v' /\ Val.lessdef v v') as Hload' by
+    (apply Mem.loadv_extends with (m1 := m) (addr1 := a); assumption).
+    destruct Hload' as [v' [Hv'1 Hv'2]].
+
+    econstructor. split.
+    eapply exec_Iload; eauto.
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+
+    econstructor; eauto.
+    eapply analysis_correct_1; eauto. simpl; eauto.
+    unfold transfer. rewrite H.
+    exists valu1.
+    apply set_unknown_holds.
+    assumption.
+    apply set_reg_lessdef; assumption.
+  }
+
+- (* Iload notrap 1*)
+  destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
+  destruct SAT as [valu1 NH1].
+  exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
+
+    econstructor. split.
+    eapply exec_Iload_notrap1; eauto.
+    rewrite eval_addressing_preserved with (ge1 := ge).
+    apply eval_addressing_lessdef_none with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    exact symbols_preserved.
+
+    econstructor; eauto.
+    eapply analysis_correct_1; eauto. simpl; eauto.
+    unfold transfer. rewrite H.
+    exists valu1.
+    apply set_unknown_holds.
+    assumption.
+    apply set_reg_lessdef.
+    constructor. assumption.
+  
+- (* Iload notrap 2*)
+  destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
+  destruct SAT as [valu1 NH1].
+  exploit valnum_regs_holds; eauto. intros (valu2 & NH2 & EQ & AG & P & Q).
 
+    assert (exists a' : val,
+       eval_addressing ge sp addr rs' ## args = Some a' /\ Val.lessdef a a')
+       as Haa'.    
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Haa' as [a' [Ha'1 Ha'2]].
+
+    destruct (Mem.loadv chunk m' a') eqn:Hload'.
+
+    {
+      econstructor. split.
+      eapply exec_Iload; eauto.
+      try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+      
+      econstructor; eauto.
+      eapply analysis_correct_1; eauto. simpl; eauto.
+      unfold transfer. rewrite H.
+      exists valu1.
+      apply set_unknown_holds.
+      assumption.
+      unfold default_notrap_load_value.
+      apply set_reg_lessdef; eauto.
+    }
+    {
+      econstructor. split.
+      eapply exec_Iload_notrap2; eauto.
+      try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+      
+      econstructor; eauto.
+      eapply analysis_correct_1; eauto. simpl; eauto.
+      unfold transfer. rewrite H.
+      exists valu1.
+      apply set_unknown_holds.
+      assumption.
+      apply set_reg_lessdef.
+      constructor. assumption.
+    }
+    
 - (* Istore *)
   destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.
   destruct SAT as [valu1 NH1].
diff --git a/backend/CleanupLabelsproof.v b/backend/CleanupLabelsproof.v
index e92be2b4..84ca403e 100644
--- a/backend/CleanupLabelsproof.v
+++ b/backend/CleanupLabelsproof.v
@@ -255,6 +255,18 @@ Proof.
   left; econstructor; split.
   econstructor; eauto.
   econstructor; eauto with coqlib.
+(* Lload notrap1 *)
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = None).
+    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  left; econstructor; split.
+  eapply exec_Lload_notrap1; eauto.
+  econstructor; eauto with coqlib.
+(* Lload notrap2 *)
+  assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
+    rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+  left; econstructor; split.
+  eapply exec_Lload_notrap2; eauto.
+  econstructor; eauto with coqlib.
 (* Lstore *)
   assert (eval_addressing tge sp addr (LTL.reglist rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/backend/Cminor.v b/backend/Cminor.v
index ca01ad50..91a4c104 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -676,12 +676,24 @@ Definition outcome_block (out: outcome) : outcome :=
   | out => out
   end.
 
+(*
 Definition outcome_result_value
-    (out: outcome) (retsig: option typ) (vres: val) : Prop :=
+    (out: outcome) (retsig: rettype) (vres: val) : Prop :=
   match out with
   | Out_normal => vres = Vundef
   | Out_return None => vres = Vundef
-  | Out_return (Some v) => retsig <> None /\ vres = v
+  | Out_return (Some v) => retsig <> Tvoid /\ vres = v
+  | Out_tailcall_return v => vres = v
+  | _ => False
+  end.
+*)
+
+Definition outcome_result_value
+    (out: outcome) (vres: val) : Prop :=
+  match out with
+  | Out_normal => vres = Vundef
+  | Out_return None => vres = Vundef
+  | Out_return (Some v) => vres = v
   | Out_tailcall_return v => vres = v
   | _ => False
   end.
@@ -711,7 +723,7 @@ Inductive eval_funcall:
       Mem.alloc m 0 f.(fn_stackspace) = (m1, sp) ->
       set_locals f.(fn_vars) (set_params vargs f.(fn_params)) = e ->
       exec_stmt f (Vptr sp Ptrofs.zero) e m1 f.(fn_body) t e2 m2 out ->
-      outcome_result_value out f.(fn_sig).(sig_res) vres ->
+      outcome_result_value out vres ->
       outcome_free_mem out m2 sp f.(fn_stackspace) m3 ->
       eval_funcall m (Internal f) vargs t m3 vres
   | eval_funcall_external:
@@ -995,7 +1007,7 @@ Proof.
   subst vres. replace k with (call_cont k') by congruence.
   apply star_one. apply step_return_0; auto.
   (* Out_return Some *)
-  destruct H3. subst vres.
+  subst vres.
   replace k with (call_cont k') by congruence.
   apply star_one. eapply step_return_1; eauto.
   (* Out_tailcall_return *)
diff --git a/backend/Cminortyping.v b/backend/Cminortyping.v
index fccbda27..92ec45f2 100644
--- a/backend/Cminortyping.v
+++ b/backend/Cminortyping.v
@@ -130,7 +130,7 @@ Definition opt_set (e: S.typenv) (optid: option ident) (ty: typ) : res S.typenv
   | Some id => S.set e id ty
   end.
 
-Fixpoint type_stmt (tret: option typ) (e: S.typenv) (s: stmt) : res S.typenv :=
+Fixpoint type_stmt (tret: rettype) (e: S.typenv) (s: stmt) : res S.typenv :=
   match s with
   | Sskip => OK e
   | Sassign id a => type_assign e id a
@@ -141,7 +141,7 @@ Fixpoint type_stmt (tret: option typ) (e: S.typenv) (s: stmt) : res S.typenv :=
       do e2 <- type_exprlist e1 args sg.(sig_args);
       opt_set e2 optid (proj_sig_res sg)
   | Stailcall sg fn args =>
-      assertion (opt_typ_eq sg.(sig_res) tret);
+      assertion (rettype_eq sg.(sig_res) tret);
       do e1 <- type_expr e fn Tptr;
       type_exprlist e1 args sg.(sig_args)
   | Sbuiltin optid ef args =>
@@ -163,10 +163,14 @@ Fixpoint type_stmt (tret: option typ) (e: S.typenv) (s: stmt) : res S.typenv :=
   | Sswitch sz a tbl dfl =>
       type_expr e a (if sz then Tlong else Tint)
   | Sreturn opta =>
-      match opta, tret with
-      | None, _ => OK e
-      | Some a, Some t => type_expr e a t
-      | _, _ => Error (msg "inconsistent return")
+      match opta with
+      | None => OK e
+      | Some a => type_expr e a (proj_rettype tret)
+(*
+          if rettype_eq tret Tvoid
+          then Error (msg "inconsistent return")
+          else type_expr e a (proj_rettype tret)
+*)
       end
   | Slabel lbl s1 =>
       type_stmt tret e s1
@@ -186,7 +190,7 @@ Definition type_function (f: function) : res typenv :=
 Section SPEC.
 
 Variable env: ident -> typ.
-Variable tret: option typ.
+Variable tret: rettype.
 
 Inductive wt_expr: expr -> typ -> Prop :=
   | wt_Evar: forall id,
@@ -205,9 +209,9 @@ Inductive wt_expr: expr -> typ -> Prop :=
       wt_expr a1 Tptr ->
       wt_expr (Eload chunk a1) (type_of_chunk chunk).
 
-Definition wt_opt_assign (optid: option ident) (optty: option typ) : Prop :=
+Definition wt_opt_assign (optid: option ident) (ty: rettype) : Prop :=
   match optid with
-  | Some id => match optty with Some ty => ty | None => Tint end = env id
+  | Some id => proj_rettype ty = env id
   | _ => True
   end.
 
@@ -251,8 +255,8 @@ Inductive wt_stmt: stmt -> Prop :=
       wt_stmt (Sswitch sz a tbl dfl)
   | wt_Sreturn_none:
       wt_stmt (Sreturn None)
-  | wt_Sreturn_some: forall a t,
-      tret = Some t -> wt_expr a t ->
+  | wt_Sreturn_some: forall a,
+      wt_expr a (proj_rettype tret) ->
       wt_stmt (Sreturn (Some a))
   | wt_Slabel: forall lbl s1,
       wt_stmt s1 ->
@@ -393,7 +397,7 @@ Proof.
 - constructor; eauto.
 - constructor.
 - constructor; eauto using type_expr_sound with ty.
-- destruct tret, o; try (monadInv T); econstructor; eauto using type_expr_sound with ty.
+- destruct o; try (monadInv T); econstructor; eauto using type_expr_sound with ty.
 - constructor; eauto.
 - constructor.
 Qed.
@@ -414,9 +418,9 @@ Definition wt_env (env: typenv) (e: Cminor.env) : Prop :=
 Definition def_env (f: function) (e: Cminor.env) : Prop :=
   forall id, In id f.(fn_params) \/ In id f.(fn_vars) -> exists v, e!id = Some v.
 
-Inductive wt_cont_call: cont -> option typ -> Prop :=
+Inductive wt_cont_call: cont -> rettype -> Prop :=
   | wt_cont_Kstop:
-      wt_cont_call Kstop (Some Tint)
+      wt_cont_call Kstop Tint
   | wt_cont_Kcall: forall optid f sp e k tret env
         (WT_FN: wt_function env f)
         (WT_CONT: wt_cont env f.(fn_sig).(sig_res) k)
@@ -425,7 +429,7 @@ Inductive wt_cont_call: cont -> option typ -> Prop :=
         (WT_DEST: wt_opt_assign env optid tret),
       wt_cont_call (Kcall optid f sp e k) tret
 
-with wt_cont: typenv -> option typ -> cont -> Prop :=
+with wt_cont: typenv -> rettype -> cont -> Prop :=
   | wt_cont_Kseq: forall env tret s k,
       wt_stmt env tret s ->
       wt_cont env tret k ->
@@ -451,7 +455,7 @@ Inductive wt_state: state -> Prop :=
         (WT_CONT: wt_cont_call k (funsig f).(sig_res)),
       wt_state (Callstate f args k m)
   | wt_return_state: forall v k m tret
-        (WT_RES: Val.has_type v (match tret with None => Tint | Some t => t end))
+        (WT_RES: Val.has_type v (proj_rettype tret))
         (WT_CONT: wt_cont_call k tret),
       wt_state (Returnstate v k m).
 
@@ -651,9 +655,8 @@ Proof.
   rewrite H8; eapply call_cont_wt; eauto.
 - inv WT_STMT. exploit external_call_well_typed; eauto. intros TRES.
   econstructor; eauto using wt_Sskip.
-  unfold proj_sig_res in TRES; red in H5.
-  destruct optid. rewrite H5 in TRES. apply wt_env_assign; auto. assumption.
-  destruct optid. apply def_env_assign; auto. assumption.
+  destruct optid; auto. apply wt_env_assign; auto. rewrite <- H5; auto.
+  destruct optid; auto. apply def_env_assign; auto.
 - inv WT_STMT. econstructor; eauto. econstructor; eauto.
 - inv WT_STMT. destruct b; econstructor; eauto.
 - inv WT_STMT. econstructor; eauto. econstructor; eauto. constructor; auto.
@@ -664,7 +667,7 @@ Proof.
 - econstructor; eauto using wt_Sexit.
 - inv WT_STMT. econstructor; eauto using call_cont_wt. exact I.
 - inv WT_STMT. econstructor; eauto using call_cont_wt.
-  rewrite H2. eapply wt_eval_expr; eauto.
+  eapply wt_eval_expr; eauto.
 - inv WT_STMT. econstructor; eauto.
 - inversion WT_FN; subst.
   assert (WT_CK: wt_cont env (sig_res (fn_sig f)) (call_cont k)).
@@ -675,7 +678,7 @@ Proof.
   constructor; auto.
   apply wt_env_set_locals. apply wt_env_set_params. rewrite H2; auto.
   red; intros. apply def_set_locals. destruct H4; auto. left; apply def_set_params; auto.
-- exploit external_call_well_typed; eauto. unfold proj_sig_res. simpl in *. intros.
+- exploit external_call_well_typed; eauto. intros.
   econstructor; eauto.
 - inv WT_CONT. econstructor; eauto using wt_Sskip.
   red in WT_DEST.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index 4aab7677..0be9438c 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -69,7 +69,7 @@ Fixpoint successor_rec (n: nat) (f: function) (ae: AE.t) (pc: node) : node :=
       match f.(fn_code)!pc with
       | Some (Inop s) =>
           successor_rec n' f ae s
-      | Some (Icond cond args s1 s2) =>
+      | Some (Icond cond args s1 s2 _) =>
           match resolve_branch (eval_static_condition cond (aregs ae args)) with
           | Some b => successor_rec n' f ae (if b then s1 else s2)
           | None => pc
@@ -181,7 +181,7 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
               let (op', args') := op_strength_reduction op args aargs in
               Iop op' args' res s'
           end
-      | Iload chunk addr args dst s =>
+      | Iload TRAP chunk addr args dst s =>
           let aargs := aregs ae args in
           let a := ValueDomain.loadv chunk rm am (eval_static_addressing addr aargs) in
           match const_for_result a with
@@ -189,7 +189,7 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
               Iop cop nil dst s
           | None =>
               let (addr', args') := addr_strength_reduction addr args aargs in
-              Iload chunk addr' args' dst s
+              Iload TRAP chunk addr' args' dst s
           end
       | Istore chunk addr args src s =>
           let aargs := aregs ae args in
@@ -217,14 +217,14 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
              end
           | _, _ => dfl
           end
-      | Icond cond args s1 s2 =>
+      | Icond cond args s1 s2 i =>
           let aargs := aregs ae args in
           match resolve_branch (eval_static_condition cond aargs) with
           | Some b =>
               if b then Inop s1 else Inop s2
           | None =>
               let (cond', args') := cond_strength_reduction cond args aargs in
-              Icond cond' args' s1 s2
+              Icond cond' args' s1 s2 i
           end
       | Ijumptable arg tbl =>
           match areg ae arg with
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index a5d08a0f..60663503 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -142,8 +142,8 @@ Inductive match_pc (f: function) (rs: regset) (m: mem): nat -> node -> node -> P
       f.(fn_code)!pc = Some (Inop s) ->
       match_pc f rs m n s pcx ->
       match_pc f rs m (S n) pc pcx
-  | match_pc_cond: forall n pc cond args s1 s2 pcx,
-      f.(fn_code)!pc = Some (Icond cond args s1 s2) ->
+  | match_pc_cond: forall n pc cond args s1 s2 pcx i,
+      f.(fn_code)!pc = Some (Icond cond args s1 s2 i) ->
       (forall b,
         eval_condition cond rs##args m = Some b ->
         match_pc f rs m n (if b then s1 else s2) pcx) ->
@@ -406,6 +406,8 @@ Proof.
   assert (VM1: vmatch bc a aa) by (eapply eval_static_addressing_sound; eauto with va).
   set (av := loadv chunk (romem_for cu) am aa).
   assert (VM2: vmatch bc v av) by (eapply loadv_sound; eauto).
+  destruct trap.
+  {
   destruct (const_for_result av) as [cop|] eqn:?; intros.
 + (* constant-propagated *)
   exploit const_for_result_correct; eauto. intros (v' & A & B).
@@ -431,6 +433,59 @@ Proof.
   left; econstructor; econstructor; split.
   eapply exec_Iload; eauto.
   eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+  }
+  {
+    assert (exists v2 : val,
+         eval_addressing ge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) = Some v2 /\ Val.lessdef a v2) as Hexist2.
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Hexist2 as [v2 [Heval2 Hlessdef2]].
+    destruct (Mem.loadv_extends chunk m m' a v2 v MEM H1 Hlessdef2) as [vX [Hvx1 Hvx2]].
+    left; econstructor; econstructor; split.
+    eapply exec_Iload with (a := v2); eauto.
+    try (erewrite eval_addressing_preserved with (ge1:=ge); auto;
+    exact symbols_preserved).
+  eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    
+  }
+
+- (* Iload notrap1 *)
+  rename pc'0 into pc. TransfInstr.
+  assert (eval_addressing tge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) =  None).
+  rewrite eval_addressing_preserved with (ge1 := ge); eauto.
+  apply eval_addressing_lessdef_none with (vl1 := rs ## args).
+  apply regs_lessdef_regs; assumption.
+  assumption.
+  exact symbols_preserved.
+  
+  left; econstructor; econstructor; split.
+  eapply exec_Iload_notrap1; eauto.
+  eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+
+- (* Iload notrap2 *)
+  rename pc'0 into pc. TransfInstr.
+    assert (exists v2 : val,
+         eval_addressing ge (Vptr sp0 Ptrofs.zero) addr (rs' ## args) = Some v2 /\ Val.lessdef a v2) as Hexist2.
+    apply eval_addressing_lessdef with (vl1 := rs ## args).
+    apply regs_lessdef_regs; assumption.
+    assumption.
+    destruct Hexist2 as [a' [Heval' Hlessdef']].
+    destruct (Mem.loadv chunk m' a') eqn:Hload'.
+    {
+    left; econstructor; econstructor; split.
+    eapply exec_Iload; eauto.
+   
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+    eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    }
+    {
+      left; econstructor; econstructor; split.
+      eapply exec_Iload_notrap2; eauto.
+   
+    try (rewrite eval_addressing_preserved with (ge1 := ge); auto; exact symbols_preserved).
+    eapply match_states_succ; eauto. apply set_reg_lessdef; auto.
+    }
 
 - (* Istore *)
   rename pc'0 into pc. TransfInstr.
diff --git a/backend/Conventions.v b/backend/Conventions.v
index 6025c6b4..14ffb587 100644
--- a/backend/Conventions.v
+++ b/backend/Conventions.v
@@ -34,6 +34,73 @@ Proof.
   apply IHpl; auto.
 Qed.
 
+(** ** Stack size of function arguments *)
+
+(** [size_arguments s] returns the number of [Outgoing] slots used
+  to call a function with signature [s]. *)
+
+Definition max_outgoing_1 (accu: Z) (l: loc) : Z :=
+  match l with
+  | S Outgoing ofs ty => Z.max accu (ofs + typesize ty)
+  | _ => accu
+  end.
+
+Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z :=
+  match rl with
+  | One l => max_outgoing_1 accu l
+  | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2
+  end.
+
+Definition size_arguments (s: signature) : Z :=
+  List.fold_left max_outgoing_2 (loc_arguments s) 0.
+
+(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
+
+Remark fold_max_outgoing_above:
+  forall l n, fold_left max_outgoing_2 l n >= n.
+Proof.
+  assert (A: forall n l, max_outgoing_1 n l >= n).
+  { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
+  induction l; simpl; intros. 
+  - omega.
+  - eapply Zge_trans. eauto.
+    destruct a; simpl. apply A. eapply Zge_trans; eauto.
+Qed.
+
+Lemma size_arguments_above:
+  forall s, size_arguments s >= 0.
+Proof.
+  intros. apply fold_max_outgoing_above.
+Qed.
+
+Lemma loc_arguments_bounded:
+  forall (s: signature) (ofs: Z) (ty: typ),
+  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
+  ofs + typesize ty <= size_arguments s.
+Proof.
+  intros until ty.
+  assert (A: forall n l, n <= max_outgoing_1 n l).
+  { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
+  assert (B: forall p n,
+             In (S Outgoing ofs ty) (regs_of_rpair p) ->
+             ofs + typesize ty <= max_outgoing_2 n p).
+  { intros. destruct p; simpl in H; intuition; subst; simpl.
+  - xomega.
+  - eapply Z.le_trans. 2: apply A. xomega.
+  - xomega. }
+  assert (C: forall l n,
+             In (S Outgoing ofs ty) (regs_of_rpairs l) ->
+             ofs + typesize ty <= fold_left max_outgoing_2 l n).
+  { induction l; simpl; intros.
+  - contradiction.
+  - rewrite in_app_iff in H. destruct H.
+  + eapply Z.le_trans. eapply B; eauto.
+    apply Z.ge_le. apply fold_max_outgoing_above.
+  + apply IHl; auto.
+  }
+  apply C. 
+Qed.
+
 (** ** Location of function parameters *)
 
 (** A function finds the values of its parameter in the same locations
diff --git a/backend/Deadcode.v b/backend/Deadcode.v
index 2286876e..3412a6fa 100644
--- a/backend/Deadcode.v
+++ b/backend/Deadcode.v
@@ -123,7 +123,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
       if is_dead nres then after
       else if is_int_zero nres then (kill res ne, nm)
       else (add_needs args (needs_of_operation op nres) (kill res ne), nm)
-  | Some (Iload chunk addr args dst s) =>
+  | Some (Iload trap chunk addr args dst s) =>
       let ndst := nreg ne dst in
       if is_dead ndst then after
       else if is_int_zero ndst then (kill dst ne, nm)
@@ -142,7 +142,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
        nmem_dead_stack f.(fn_stacksize))
   | Some(Ibuiltin ef args res s) =>
       transfer_builtin approx!!pc ef args res ne nm
-  | Some(Icond cond args s1 s2) =>
+  | Some(Icond cond args s1 s2 _) =>
       if peq s1 s2 then after else 
         (add_needs args (needs_of_condition cond) ne, nm)
   | Some(Ijumptable arg tbl) =>
@@ -175,7 +175,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
         end
       else
         instr
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       let ndst := nreg (fst an!!pc) dst in
       if is_dead ndst then
         Inop s
@@ -192,7 +192,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
       if nmem_contains (snd an!!pc) (aaddr_arg approx!!pc dst) sz
       then instr
       else Inop s
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 _ =>
       if peq s1 s2 then Inop s1 else instr
   | _ =>
       instr
diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v
index 2edc0395..6919fe78 100644
--- a/backend/Deadcodeproof.v
+++ b/backend/Deadcodeproof.v
@@ -829,6 +829,83 @@ Ltac UseTransfer :=
   apply eagree_update; eauto 2 with na.
   eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
 
+- (* load notrap1 *)
+  TransfInstr; UseTransfer.
+  destruct (is_dead (nreg ne dst)) eqn:DEAD;
+  [idtac|destruct (is_int_zero (nreg ne dst)) eqn:INTZERO];
+  simpl in *.
++ (* dead instruction, turned into a nop *)
+  econstructor; split.
+  eapply exec_Inop; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update_dead; auto with na.
++ (* instruction with needs = [I Int.zero], turned into a load immediate of zero. *)
+  econstructor; split.
+  eapply exec_Iop with (v := Vint Int.zero); eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; auto.
+  rewrite is_int_zero_sound by auto.
+  unfold default_notrap_load_value.
+  constructor.
++ (* preserved *)
+  exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption.
+  intro Hnone'.
+  assert (eval_addressing tge (Vptr sp0 Ptrofs.zero) addr te ## args = None) as Hnone2'.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  assumption.
+  exact symbols_preserved.
+  
+  econstructor; split.
+  eapply exec_Iload_notrap1; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+
+- (* load notrap2 *)
+  TransfInstr; UseTransfer.
+  
+  destruct (is_dead (nreg ne dst)) eqn:DEAD;
+  [idtac|destruct (is_int_zero (nreg ne dst)) eqn:INTZERO];
+  simpl in *.
++ (* dead instruction, turned into a nop *)
+  econstructor; split.
+  eapply exec_Inop; eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update_dead; auto with na.
++ (* instruction with needs = [I Int.zero], turned into a load immediate of zero. *)
+  econstructor; split.
+  eapply exec_Iop with (v := Vint Int.zero); eauto.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; auto.
+  rewrite is_int_zero_sound by auto.
+  unfold default_notrap_load_value.
+  constructor.
++ (* preserved *)
+  exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto.
+  intros (ta & U & V).
+  destruct (Mem.loadv chunk tm ta) eqn:Hchunk2.
+  {
+  econstructor; split.
+  eapply exec_Iload. eauto.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  eassumption.
+  exact symbols_preserved.
+  eassumption.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+  }
+  {
+  econstructor; split.
+  eapply exec_Iload_notrap2. eauto.
+  erewrite eval_addressing_preserved with (ge1 := ge).
+  eassumption.
+  exact symbols_preserved.
+  eassumption.
+  eapply match_succ_states; eauto. simpl; auto.
+  apply eagree_update; eauto 2 with na.
+  eapply magree_monotone; eauto. intros. apply incl_nmem_add; auto.
+  }
 - (* store *)
   TransfInstr; UseTransfer.
   destruct (nmem_contains nm (aaddressing (vanalyze cu f) # pc addr args)
diff --git a/backend/Debugvar.v b/backend/Debugvar.v
index 1f361030..56908855 100644
--- a/backend/Debugvar.v
+++ b/backend/Debugvar.v
@@ -233,7 +233,7 @@ Definition transfer (lm: labelmap) (before: option avail) (i: instruction):
           (lm, Some (kill (S sl ofs ty) s))
       | Lop op args dst =>
           (lm, Some (kill (R dst) s))
-      | Lload chunk addr args dst =>
+      | Lload trap chunk addr args dst =>
           (lm, Some (kill (R dst) s))
       | Lstore chunk addr args src =>
           (lm, before)
diff --git a/backend/Debugvarproof.v b/backend/Debugvarproof.v
index d31c63ec..95020637 100644
--- a/backend/Debugvarproof.v
+++ b/backend/Debugvarproof.v
@@ -449,6 +449,22 @@ Proof.
   eauto. eauto.
   apply eval_add_delta_ranges. traceEq.
   constructor; auto.
+- (* load notrap1 *)
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_Lload_notrap1.
+  rewrite <- H; apply eval_addressing_preserved; exact symbols_preserved.
+  eauto. eauto.
+  apply eval_add_delta_ranges. traceEq.
+  constructor; auto.
+- (* load notrap2 *)
+  econstructor; split.
+  eapply plus_left.
+  eapply exec_Lload_notrap2.
+  rewrite <- H; apply eval_addressing_preserved; exact symbols_preserved.
+  eauto. eauto.
+  apply eval_add_delta_ranges. traceEq.
+  constructor; auto.
 - (* store *)
   econstructor; split.
   eapply plus_left.
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
new file mode 100644
index 00000000..af85efe4
--- /dev/null
+++ b/backend/Duplicate.v
@@ -0,0 +1,203 @@
+(** RTL node duplication using external oracle. Used to form superblock
+  structures *)
+
+Require Import AST RTL Maps Globalenvs.
+Require Import Coqlib Errors Op.
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
+
+(** External oracle returning the new RTL code (entry point unchanged),
+    along with the new entrypoint, and a mapping of new nodes to old nodes *)
+Axiom duplicate_aux: function -> code * node * (PTree.t node).
+
+Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux".
+
+(** * Verification of node duplications *)
+
+Definition verify_is_copy dupmap n n' :=
+  match dupmap!n' with
+  | None => Error(msg "verify_is_copy None")
+  | Some revn => match (Pos.compare n revn) with Eq => OK tt | _ => Error(msg "verify_is_copy invalid map") end
+  end.
+
+Fixpoint verify_is_copy_list dupmap ln ln' :=
+  match ln with
+  | n::ln => match ln' with
+             | n'::ln' => do u <- verify_is_copy dupmap n n';
+                          verify_is_copy_list dupmap ln ln'
+             | nil => Error (msg "verify_is_copy_list: ln' bigger than ln") end
+  | nil => match ln' with
+          | n :: ln' => Error (msg "verify_is_copy_list: ln bigger than ln'")
+          | nil => OK tt end
+  end.
+
+Definition verify_mapping_entrypoint dupmap (f f': function): res unit :=
+  verify_is_copy dupmap (fn_entrypoint f) (fn_entrypoint f').
+
+Lemma product_eq {A B: Type} :
+  (forall (a b: A), {a=b} + {a<>b}) ->
+  (forall (c d: B), {c=d} + {c<>d}) ->
+  forall (x y: A+B), {x=y} + {x<>y}.
+Proof.
+  intros H H'. intros. decide equality.
+Qed.
+
+(** FIXME Ideally i would like to put this in AST.v but i get an "illegal application"
+ * error when doing so *)
+Remark builtin_arg_eq_pos: forall (a b: builtin_arg positive), {a=b} + {a<>b}.
+Proof.
+  intros.
+  apply (builtin_arg_eq Pos.eq_dec).
+Defined.
+Global Opaque builtin_arg_eq_pos.
+
+Remark builtin_res_eq_pos: forall (a b: builtin_res positive), {a=b} + {a<>b}.
+Proof. intros. apply (builtin_res_eq Pos.eq_dec). Qed.
+Global Opaque builtin_res_eq_pos.
+
+Definition verify_match_inst dupmap inst tinst :=
+  match inst with
+  | Inop n => match tinst with Inop n' => do u <- verify_is_copy dupmap n n'; OK tt | _ => Error(msg "verify_match_inst Inop") end
+
+  | Iop op lr r n => match tinst with
+      Iop op' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (eq_operation op op') then
+            if (list_eq_dec Pos.eq_dec lr lr') then
+              if (Pos.eq_dec r r') then
+                OK tt
+              else Error (msg "Different r in Iop")
+            else Error (msg "Different lr in Iop")
+          else Error(msg "Different operations in Iop")
+      | _ => Error(msg "verify_match_inst Inop") end
+
+  | Iload tm m a lr r n => match tinst with
+      | Iload tm' m' a' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (trapping_mode_eq tm tm') then
+            if (chunk_eq m m') then
+              if (eq_addressing a a') then
+                if (list_eq_dec Pos.eq_dec lr lr') then
+                  if (Pos.eq_dec r r') then OK tt
+                  else Error (msg "Different r in Iload")
+                else Error (msg "Different lr in Iload")
+              else Error (msg "Different addressing in Iload")
+            else Error (msg "Different mchunk in Iload")
+          else Error (msg "Different trapping_mode in Iload")
+      | _ => Error (msg "verify_match_inst Iload") end
+
+  | Istore m a lr r n => match tinst with
+      | Istore m' a' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (chunk_eq m m') then
+            if (eq_addressing a a') then
+              if (list_eq_dec Pos.eq_dec lr lr') then
+                if (Pos.eq_dec r r') then OK tt
+                else Error (msg "Different r in Istore")
+              else Error (msg "Different lr in Istore")
+            else Error (msg "Different addressing in Istore")
+          else Error (msg "Different mchunk in Istore")
+      | _ => Error (msg "verify_match_inst Istore") end
+
+  | Icall s ri lr r n => match tinst with
+      | Icall s' ri' lr' r' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (signature_eq s s') then
+            if (product_eq Pos.eq_dec ident_eq ri ri') then
+              if (list_eq_dec Pos.eq_dec lr lr') then
+                if (Pos.eq_dec r r') then OK tt
+                else Error (msg "Different r r' in Icall")
+              else Error (msg "Different lr in Icall")
+            else Error (msg "Different ri in Icall")
+          else Error (msg "Different signatures in Icall")
+      | _ => Error (msg "verify_match_inst Icall") end
+
+  | Itailcall s ri lr => match tinst with
+      | Itailcall s' ri' lr' =>
+          if (signature_eq s s') then
+            if (product_eq Pos.eq_dec ident_eq ri ri') then
+              if (list_eq_dec Pos.eq_dec lr lr') then OK tt
+              else Error (msg "Different lr in Itailcall")
+            else Error (msg "Different ri in Itailcall")
+          else Error (msg "Different signatures in Itailcall")
+      | _ => Error (msg "verify_match_inst Itailcall") end
+
+  | Ibuiltin ef lbar brr n => match tinst with
+      | Ibuiltin ef' lbar' brr' n' =>
+          do u <- verify_is_copy dupmap n n';
+          if (external_function_eq ef ef') then
+            if (list_eq_dec builtin_arg_eq_pos lbar lbar') then
+              if (builtin_res_eq_pos brr brr') then OK tt
+              else Error (msg "Different brr in Ibuiltin")
+            else Error (msg "Different lbar in Ibuiltin")
+          else Error (msg "Different ef in Ibuiltin")
+      | _ => Error (msg "verify_match_inst Ibuiltin") end
+
+  | Icond cond lr n1 n2 i => match tinst with
+      | Icond cond' lr' n1' n2' i' =>
+          if (list_eq_dec Pos.eq_dec lr lr') then
+            if (eq_condition cond cond') then
+              do u1 <- verify_is_copy dupmap n1 n1';
+              do u2 <- verify_is_copy dupmap n2 n2'; OK tt
+            else if (eq_condition (negate_condition cond) cond') then
+              do u1 <- verify_is_copy dupmap n1 n2';
+              do u2 <- verify_is_copy dupmap n2 n1'; OK tt
+            else Error (msg "Incompatible conditions in Icond")
+          else Error (msg "Different lr in Icond")
+      | _ => Error (msg "verify_match_inst Icond") end
+
+  | Ijumptable r ln => match tinst with
+      | Ijumptable r' ln' =>
+          do u <- verify_is_copy_list dupmap ln ln';
+          if (Pos.eq_dec r r') then OK tt
+          else Error (msg "Different r in Ijumptable")
+      | _ => Error (msg "verify_match_inst Ijumptable") end
+
+  | Ireturn or => match tinst with
+      | Ireturn or' =>
+          if (option_eq Pos.eq_dec or or') then OK tt
+          else Error (msg "Different or in Ireturn")
+      | _ => Error (msg "verify_match_inst Ireturn") end
+  end.
+
+Definition verify_mapping_mn dupmap f f' (m: positive*positive) :=
+  let (tn, n) := m in
+  match (fn_code f)!n with
+  | None => Error (msg "verify_mapping_mn: Could not get an instruction at (fn_code f)!n")
+  | Some inst => match (fn_code f')!tn with
+                 | None => Error (msg "verify_mapping_mn: Could not get an instruction at (fn_code xf)!tn")
+                 | Some tinst => verify_match_inst dupmap inst tinst
+                 end
+  end.
+
+Fixpoint verify_mapping_mn_rec dupmap f f' lm :=
+  match lm with
+  | nil => OK tt
+  | m :: lm => do u <- verify_mapping_mn dupmap f f' m;
+               do u2 <- verify_mapping_mn_rec dupmap f f' lm;
+               OK tt
+  end.
+
+Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
+  verify_mapping_mn_rec dupmap f f' (PTree.elements dupmap).
+
+(** Verifies that the [dupmap] of the translated function [f'] is giving correct information in regards to [f] *)
+Definition verify_mapping dupmap (f f': function) : res unit :=
+  do u <- verify_mapping_entrypoint dupmap f f';
+  do v <- verify_mapping_match_nodes dupmap f f'; OK tt.
+
+(** * Entry points *)
+
+Definition transf_function (f: function) : res function :=
+  let (tcte, dupmap) := duplicate_aux f in
+  let (tc, te) := tcte in
+  let f' := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+  do u <- verify_mapping dupmap f f';
+  OK f'.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef transf_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
new file mode 100644
index 00000000..98e2f325
--- /dev/null
+++ b/backend/Duplicateaux.ml
@@ -0,0 +1,662 @@
+open RTL
+open Maps
+open Camlcoq
+
+let get_some = function
+| None -> failwith "Did not get some"
+| Some thing -> thing
+
+let bfs code entrypoint = begin
+  Printf.printf "bfs\n"; flush stdout;
+  let visited = ref (PTree.map (fun n i -> false) code)
+  and bfs_list = ref []
+  and to_visit = Queue.create ()
+  and node = ref entrypoint
+  in begin
+    Queue.add entrypoint to_visit;
+    while not (Queue.is_empty to_visit) do
+      node := Queue.pop to_visit;
+      if not (get_some @@ PTree.get !node !visited) then begin
+        visited := PTree.set !node true !visited;
+        match PTree.get !node code with
+        | None -> failwith "No such node"
+        | Some i ->
+            bfs_list := !node :: !bfs_list;
+            match i with
+            | Icall(_, _, _, _, n) -> Queue.add n to_visit
+            | Ibuiltin(_, _, _, n) -> Queue.add n to_visit
+            | Ijumptable(_, ln) -> List.iter (fun n -> Queue.add n to_visit) ln
+            | Itailcall _ | Ireturn _ -> ()
+            | Icond (_, _, n1, n2, _) -> Queue.add n1 to_visit; Queue.add n2 to_visit
+            | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> Queue.add n to_visit
+      end
+    done;
+    List.rev !bfs_list
+  end
+end
+
+let optbool o = match o with Some _ -> true | None -> false
+
+let ptree_get_some n ptree = get_some @@ PTree.get n ptree
+
+let get_predecessors_rtl code = begin
+  Printf.printf "get_predecessors_rtl\n"; flush stdout;
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, i) =
+    let succ = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
+      | Icond (_,_,n1,n2,_) -> [n1;n2]
+      | Ijumptable (_,ln) -> ln
+      | Itailcall _ | Ireturn _ -> []
+    in List.iter (fun s ->
+      let previous_preds = ptree_get_some s !preds in
+      if optbool @@ List.find_opt (fun e -> e == node) previous_preds then ()
+      else preds := PTree.set s (node::previous_preds) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+end
+
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+let print_intlist l =
+  let rec f = function
+  | [] -> ()
+  | n::ln -> (Printf.printf "%d " (P.to_int n); f ln)
+  in begin
+    Printf.printf "[";
+    f l;
+    Printf.printf "]"
+  end
+
+let print_intset s =
+  let seq = PSet.to_seq s
+  in begin
+    Printf.printf "{";
+    Seq.iter (fun n ->
+      Printf.printf "%d " (P.to_int n)
+    ) seq;
+    Printf.printf "}"
+  end
+
+type vstate = Unvisited | Processed | Visited
+
+(** Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+let get_loop_headers code entrypoint = begin
+  Printf.printf "get_loop_headers\n"; flush stdout;
+  let visited = ref (PTree.map (fun n i -> Unvisited) code)
+  and is_loop_header = ref (PTree.map (fun n i -> false) code)
+  in let rec dfs_visit code = function
+  | [] -> ()
+  | node :: ln ->
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> ()
+      | Processed -> begin
+          Printf.printf "Node %d is a loop header\n" (P.to_int node);
+          is_loop_header := PTree.set node true !is_loop_header;
+          visited := PTree.set node Visited !visited
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          match PTree.get node code with
+          | None -> failwith "No such node"
+          | Some i -> let next_visits = (match i with
+            | Icall (_, _, _, _, n) | Ibuiltin (_, _, _, n) | Inop n | Iop (_, _, _, n)
+            | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> [n]
+            | Icond (_, _, n1, n2, _) -> [n1; n2]
+            | Itailcall _ | Ireturn _ -> []
+            | Ijumptable (_, ln) -> ln
+            ) in dfs_visit code next_visits;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code ln
+        end
+  in begin
+    dfs_visit code [entrypoint];
+    !is_loop_header
+  end
+end
+
+let ptree_printbool pt =
+  let elements = PTree.elements pt
+  in begin
+    Printf.printf "[";
+    List.iter (fun (n, b) ->
+      if b then Printf.printf "%d, " (P.to_int n) else ()
+    ) elements;
+    Printf.printf "]"
+  end
+
+(* Looks ahead (until a branch) to see if a node further down verifies
+ * the given predicate *)
+let rec look_ahead code node is_loop_header predicate =
+  if (predicate node) then true
+  else match (get_some @@ PTree.get node code) with
+    | Ireturn _ | Itailcall _ | Icond _ | Ijumptable _ -> false
+    | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n)
+    | Istore (_, _, _, _, n) | Icall (_, _, _, _, n)
+    | Ibuiltin (_, _, _, n) ->
+      if (predicate n) then true
+      else (
+        if (get_some @@ PTree.get n is_loop_header) then false
+        else look_ahead code n is_loop_header predicate
+      )
+
+let do_call_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tCall heuristic..\n";
+    let predicate n = (function
+    | Icall _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_call = look_ahead code ifso is_loop_header predicate
+    in let ifnot_call = look_ahead code ifnot is_loop_header predicate
+    in if ifso_call && ifnot_call then None
+    else if ifso_call then Some false
+    else if ifnot_call then Some true
+    else None
+  end
+
+let do_opcode_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tOpcode heuristic..\n";
+    DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header
+  end
+
+let do_return_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tReturn heuristic..\n";
+    let predicate n = (function
+    | Ireturn _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_return = look_ahead code ifso is_loop_header predicate
+    in let ifnot_return = look_ahead code ifnot is_loop_header predicate
+    in if ifso_return && ifnot_return then None
+    else if ifso_return then Some false
+    else if ifnot_return then Some true
+    else None
+  end
+
+let do_store_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tStore heuristic..\n";
+    let predicate n = (function
+    | Istore _ -> true
+    | _ -> false) @@ get_some @@ PTree.get n code
+    in let ifso_store = look_ahead code ifso is_loop_header predicate
+    in let ifnot_store = look_ahead code ifnot is_loop_header predicate
+    in if ifso_store && ifnot_store then None
+    else if ifso_store then Some false
+    else if ifnot_store then Some true
+    else None
+  end
+
+let do_loop_heuristic code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tLoop heuristic..\n";
+    let predicate n = get_some @@ PTree.get n is_loop_header in
+    let ifso_loop = look_ahead code ifso is_loop_header predicate in
+    let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
+    if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *)
+    else if ifso_loop then Some true
+    else if ifnot_loop then Some false
+    else None
+  end
+
+let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
+  begin
+    Printf.printf "\tLoop2 heuristic..\n";
+    match get_some @@ PTree.get n loop_info with
+    | None -> None
+    | Some b -> Some b
+  end
+
+(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
+(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
+let get_loop_info is_loop_header bfs_order code =
+  let loop_info = ref (PTree.map (fun n i -> None) code) in
+  let mark_path s n =
+    let visited = ref (PTree.map (fun n i -> false) code) in
+    let rec explore src dest =
+      if (get_some @@ PTree.get src !visited) then false
+      else if src == dest then true
+      else begin
+        visited := PTree.set src true !visited;
+        match get_some @@ PTree.get src code with
+        | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+        | Ibuiltin (_,_,_,s) -> explore s dest
+        | Icond (_,_,s1,s2,_) -> (explore s1 dest) || (explore s2 dest)
+        | Ijumptable _ | Itailcall _ | Ireturn _ -> false
+      end
+    in let rec advance_to_cb src =
+      if (get_some @@ PTree.get src !visited) then None
+      else begin
+        visited := PTree.set src true !visited;
+        match get_some @@ PTree.get src code with
+        | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+        | Ibuiltin (_,_,_,s) -> advance_to_cb s
+        | Icond _ -> Some src
+        | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+      end
+    in begin
+      Printf.printf "Marking path from %d to %d\n" (P.to_int n) (P.to_int s);
+      match advance_to_cb s with
+      | None -> (Printf.printf "Nothing found\n")
+      | Some s -> ( Printf.printf "Advancing to %d\n" (P.to_int s);
+          match get_some @@ PTree.get s !loop_info with
+          | None | Some _ -> begin
+              match get_some @@ PTree.get s code with
+              | Icond (_, _, n1, n2, _) ->
+                  let b1 = explore n1 n in
+                  let b2 = explore n2 n in
+                  if (b1 && b2) then (Printf.printf "both true\n")
+                  else if b1 then (Printf.printf "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info)
+                  else if b2 then (Printf.printf "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info)
+                  else (Printf.printf "none true\n")
+              | _ -> ( Printf.printf "not an icond\n" )
+            end
+          (* | Some _ -> ( Printf.printf "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
+        )
+    end
+  in begin
+    List.iter (fun n ->
+      match get_some @@ PTree.get n code with
+      | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+      | Ibuiltin (_, _, _, s) ->
+          if get_some @@ PTree.get s is_loop_header then mark_path s n
+      | Icond _ -> () (* loop backedges are never Icond in CompCert *)
+      | Ijumptable _ -> ()
+      | Itailcall _ | Ireturn _ -> ()
+    ) bfs_order;
+    !loop_info
+  end
+
+  (* Remark - compared to the original paper, we don't use the store heuristic *)
+let get_directions code entrypoint = begin
+  Printf.printf "get_directions\n"; flush stdout;
+  let bfs_order = bfs code entrypoint in
+  let is_loop_header = get_loop_headers code entrypoint in
+  let loop_info = get_loop_info is_loop_header bfs_order code in
+  let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
+  begin
+    (* ptree_printbool is_loop_header; *)
+    (* Printf.printf "\n"; *)
+    List.iter (fun n ->
+      match (get_some @@ PTree.get n code) with
+      | Icond (cond, lr, ifso, ifnot, _) ->
+          (* Printf.printf "Analyzing %d.." (P.to_int n); *)
+          let heuristics = [ do_opcode_heuristic;
+            do_return_heuristic; do_loop2_heuristic loop_info n; do_loop_heuristic; do_call_heuristic;
+             (* do_store_heuristic *) ] in
+          let preferred = ref None in
+          begin
+            Printf.printf "Deciding condition for RTL node %d\n" (P.to_int n);
+            List.iter (fun do_heur ->
+              match !preferred with
+              | None -> preferred := do_heur code cond ifso ifnot is_loop_header
+              | Some _ -> ()
+            ) heuristics;
+            directions := PTree.set n !preferred !directions;
+            (match !preferred with | Some false -> Printf.printf "\tFALLTHROUGH\n"
+                                   | Some true -> Printf.printf "\tBRANCH\n"
+                                   | None -> Printf.printf "\tUNSURE\n");
+            Printf.printf "---------------------------------------\n"
+          end
+      | _ -> ()
+    ) bfs_order;
+    !directions
+  end
+end
+
+let update_direction direction = function
+| Icond (cond, lr, n, n', _) -> Icond (cond, lr, n, n', direction)
+| i -> i
+
+let rec update_direction_rec directions = function
+| [] -> PTree.empty
+| m::lm -> let (n, i) = m
+    in let direction = get_some @@ PTree.get n directions
+    in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
+
+(* Uses branch prediction to write prediction annotations in Icond *)
+let update_directions code entrypoint = begin
+  Printf.printf "Update_directions\n"; flush stdout;
+  let directions = get_directions code entrypoint
+  in begin
+    (* Printf.printf "Ifso directions: ";
+    ptree_printbool directions;
+    Printf.printf "\n"; *)
+    update_direction_rec directions (PTree.elements code)
+  end
+end
+
+(** Trace selection *)
+
+let rec exists_false_rec = function
+  | [] -> false
+  | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true
+
+let exists_false boolmap = exists_false_rec (PTree.elements boolmap)
+
+(* DFS using prediction info to guide the exploration *)
+let dfs code entrypoint = begin
+  Printf.printf "dfs\n"; flush stdout;
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let rec dfs_list code = function
+  | [] -> []
+  | node :: ln ->
+      if get_some @@ PTree.get node !visited then dfs_list code ln
+      else begin
+        visited := PTree.set node true !visited;
+        let next_nodes = (match get_some @@ PTree.get node code with
+        | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+        | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n]
+        | Ijumptable (_, ln) -> ln
+        | Itailcall _ | Ireturn _ -> []
+        | Icond (_, _, n1, n2, info) -> (match info with
+          | Some false -> [n2; n1]
+          | _ -> [n1; n2]
+          )
+        ) in node :: dfs_list code (next_nodes @ ln)
+      end
+  in dfs_list code [entrypoint]
+end
+
+(*
+let get_predecessors_ttl code =
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, ti) = match ti with
+  | Tleaf _ -> ()
+  | Tnext (_, i) -> let succ = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
+      | Icond (_,_,n1,n2,_) -> [n1;n2]
+      | Ijumptable (_,ln) -> ln
+      | _ -> []
+      in List.iter (fun s -> preds := PTree.set s (node::(get_some @@ PTree.get s !preds)) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+*)
+
+let rec select_unvisited_node is_visited = function
+| [] -> failwith "Empty list"
+| n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln
+
+let best_successor_of node code is_visited =
+  match (PTree.get node code) with
+  | None -> failwith "No such node in the code"
+  | Some i ->
+      let next_node = match i with
+      | Inop n -> Some n
+      | Iop (_, _, _, n) -> Some n
+      | Iload (_, _, _, _, _, n) -> Some n
+      | Istore (_, _, _, _, n) -> Some n
+      | Icall (_, _, _, _, n) -> Some n
+      | Ibuiltin (_, _, _, n) -> Some n
+      | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1)
+      | _ -> None
+      in match next_node with
+      | None -> None
+      | Some n -> if not (ptree_get_some n is_visited) then Some n else None
+
+(* FIXME - could be improved by selecting in priority the predicted paths *)
+let best_predecessor_of node predecessors code order is_visited =
+  match (PTree.get node predecessors) with
+  | None -> failwith "No predecessor list found"
+  | Some lp ->
+      try Some (List.find (fun n ->
+          if (List.mem n lp) && (not (ptree_get_some n is_visited)) then
+            match ptree_get_some n code with
+            | Icond (_, _, n1, n2, ob) -> (match ob with
+              | None -> false
+              | Some false -> n == n2
+              | Some true -> n == n1
+              )
+            | _ -> true
+          else false
+        ) order)
+      with Not_found -> None
+
+let print_trace t = print_intlist t
+
+let print_traces traces =
+  let rec f = function
+  | [] -> ()
+  | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
+  in begin
+    Printf.printf "Traces: {";
+    f traces;
+    Printf.printf "}\n";
+  end
+
+(* Dumb (but linear) trace selection *)
+let select_traces_linear code entrypoint =
+  let is_visited = ref (PTree.map (fun n i -> false) code) in
+  let bfs_order = bfs code entrypoint in
+  let rec go_through node = begin
+    is_visited := PTree.set node true !is_visited;
+    let next_node = match (get_some @@ PTree.get node code) with
+      | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+      | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n
+      | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+      | Icond (_, _, n1, n2, info) -> (match info with
+        | Some false -> Some n2
+        | Some true -> Some n1
+        | None -> None
+        )
+    in match next_node with
+    | None -> [node]
+    | Some n ->
+        if not (get_some @@ PTree.get n !is_visited) then node :: go_through n
+        else [node]
+    end
+  in let traces = ref [] in begin
+    List.iter (fun n ->
+      if not (get_some @@ PTree.get n !is_visited) then
+        traces := (go_through n) :: !traces
+    ) bfs_order;
+    !traces
+  end
+
+
+(* Algorithm mostly inspired from Chang and Hwu 1988
+ * "Trace Selection for Compiling Large C Application Programs to Microcode" *)
+let select_traces_chang code entrypoint = begin
+  Printf.printf "select_traces\n"; flush stdout;
+  let order = dfs code entrypoint in
+  let predecessors = get_predecessors_rtl code in
+  let traces = ref [] in
+  let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *)
+    Printf.printf "Length: %d\n" (List.length order); flush stdout;
+    while exists_false !is_visited do (* while (there are unvisited nodes) *)
+      let seed = select_unvisited_node !is_visited order in
+      let trace = ref [seed] in
+      let current = ref seed in begin
+        is_visited := PTree.set seed true !is_visited; (* mark seed visited *)
+        let quit_loop = ref false in begin
+          while not !quit_loop do
+            let s = best_successor_of !current code !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some succ -> begin
+                trace := !trace @ [succ];
+                is_visited := PTree.set succ true !is_visited; (* mark s visited *)
+                current := succ
+                end
+          done;
+          current := seed;
+          quit_loop := false;
+          while not !quit_loop do
+            let s = best_predecessor_of !current predecessors code order !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some pred -> begin
+                trace := pred :: !trace;
+                is_visited := PTree.set pred true !is_visited; (* mark s visited *)
+                current := pred
+                end
+          done;
+          traces := !trace :: !traces;
+        end
+      end
+    done;
+    (* Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n"; *)
+    Printf.printf "Traces: "; print_traces !traces;
+    !traces
+  end
+end
+
+let select_traces code entrypoint =
+  let length = List.length @@ PTree.elements code in
+  if (length < 5000) then select_traces_chang code entrypoint
+  else select_traces_linear code entrypoint
+
+let rec make_identity_ptree_rec = function
+| [] -> PTree.empty
+| m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm)
+
+let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
+
+(* Change the pointers of preds nodes to point to n' instead of n *)
+let rec change_pointers code n n' = function
+  | [] -> code
+  | pred :: preds ->
+      let new_pred_inst = match ptree_get_some pred code with
+        | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n')
+        | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
+        | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
+                               Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
+        | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n);
+                                 let n1' = if (n1 == n) then n' else n1
+                                 in let n2' = if (n2 == n) then n' else n2
+                                 in Icond(a, b, n1', n2', i)
+        | Inop n0 -> assert (n0 == n); Inop n'
+        | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
+        | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
+        | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n')
+        | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
+      in let new_code = PTree.set pred new_pred_inst code
+      in change_pointers new_code n n' preds
+
+(* parent: parent of n to keep as parent
+ * preds: all the other parents of n
+ * n': the integer which should contain the duplicate of n
+ * returns: new code, new ptree *)
+let duplicate code ptree parent n preds n' =
+  Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+  match PTree.get n' code with
+  | Some _ -> failwith "The PTree already has a node n'"
+  | None ->
+      let c' = change_pointers code n n' preds
+      in let new_code = PTree.set n' (ptree_get_some n code) c'
+      and new_ptree = PTree.set n' n ptree
+      in (new_code, new_ptree)
+
+let rec maxint = function
+  | [] -> 0
+  | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m
+
+let is_empty = function
+  | [] -> true
+  | _ -> false
+
+(* code: RTL code
+ * preds: mapping node -> predecessors
+ * ptree: the revmap
+ * trace: the trace to follow tail duplication on *)
+let tail_duplicate code preds ptree trace =
+  (* next_int: unused integer that can be used for the next duplication *)
+  let next_int = ref (maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1)
+  (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
+  in let last_node = ref None
+  in let last_duplicate = ref None
+  in let nb_duplicated = ref 0
+  (* recursive function on a trace *)
+  in let rec f code ptree is_first = function
+    | [] -> (code, ptree)
+    | n :: t ->
+        let (new_code, new_ptree) =
+          if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
+          else
+            let node_preds = ptree_get_some n preds
+            in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+            in let final_node_preds = match !last_duplicate with
+              | None -> node_preds_nolast
+              | Some n' -> n' :: node_preds_nolast
+            in if not (is_empty final_node_preds) then
+              let n' = !next_int
+              in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
+              in begin
+                next_int := !next_int + 1;
+                nb_duplicated := !nb_duplicated + 1;
+                last_duplicate := Some (P.of_int n');
+                (newc, newp)
+              end
+            else (code, ptree)
+        in begin
+          last_node := Some n;
+          f new_code new_ptree false t
+        end
+  in let new_code, new_ptree = f code ptree true trace
+  in (new_code, new_ptree, !nb_duplicated)
+
+let superblockify_traces code preds traces =
+  let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *)
+  in let ptree = make_identity_ptree code
+  in let rec f code ptree = function
+    | [] -> (code, ptree, 0)
+    | trace :: traces ->
+        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+        in if (nb_duplicated < max_nb_duplicated)
+          then (Printf.printf "End duplication\n"; f new_code new_ptree traces)
+          else (Printf.printf "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
+  in let new_code, new_ptree, _ = f code ptree traces
+  in (new_code, new_ptree)
+
+let rec invert_iconds_trace code = function
+  | [] -> code
+  | n :: ln ->
+      let code' = match ptree_get_some n code with
+        | Icond (c, lr, ifso, ifnot, info) -> (match info with
+            | Some true -> begin
+                (* Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+                PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)) code
+              end
+            | _ -> code)
+        | _ -> code
+      in invert_iconds_trace code' ln
+
+let rec invert_iconds code = function
+  | [] -> code
+  | t :: ts ->
+      let code' = if !Clflags.option_finvertcond then invert_iconds_trace code t
+                  else code
+      in invert_iconds code' ts
+
+let duplicate_aux f =
+  let entrypoint = f.fn_entrypoint in
+  if !Clflags.option_fduplicate < 0 then
+    ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
+  else
+    let code = update_directions (f.fn_code) entrypoint in
+    let traces = select_traces code entrypoint in
+    let icond_code = invert_iconds code traces in
+    let preds = get_predecessors_rtl icond_code in
+    if !Clflags.option_fduplicate >= 1 then
+      let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in
+      ((new_code, f.fn_entrypoint), pTreeId)
+    else
+      ((icond_code, entrypoint), make_identity_ptree code)
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
new file mode 100644
index 00000000..6b598dc7
--- /dev/null
+++ b/backend/Duplicateproof.v
@@ -0,0 +1,523 @@
+(** Correctness proof for code duplication *)
+Require Import AST Linking Errors Globalenvs Smallstep.
+Require Import Coqlib Maps Events Values.
+Require Import Op RTL Duplicate.
+
+Local Open Scope positive_scope.
+
+(** * Definition of [match_states] (independently of the translation) *)
+
+(* est-ce plus simple de prendre dupmap: node -> node, avec un noeud hors CFG à la place de None ? *)
+Inductive match_inst (dupmap: PTree.t node): instruction -> instruction -> Prop :=
+  | match_inst_nop: forall n n',
+      dupmap!n' = (Some n) -> match_inst dupmap (Inop n) (Inop n')
+  | match_inst_op: forall n n' op lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Iop op lr r n) (Iop op lr r n')
+  | match_inst_load: forall n n' tm m a lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Iload tm m a lr r n) (Iload tm m a lr r n')
+  | match_inst_store: forall n n' m a lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Istore m a lr r n) (Istore m a lr r n')
+  | match_inst_call: forall n n' s ri lr r,
+      dupmap!n' = (Some n) -> match_inst dupmap (Icall s ri lr r n) (Icall s ri lr r n')
+  | match_inst_tailcall: forall s ri lr,
+      match_inst dupmap (Itailcall s ri lr) (Itailcall s ri lr)
+  | match_inst_builtin: forall n n' ef la br,
+      dupmap!n' = (Some n) -> match_inst dupmap (Ibuiltin ef la br n) (Ibuiltin ef la br n')
+  | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr i i',
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot i) (Icond c lr ifso' ifnot' i')
+  | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr i i',
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot i) (Icond (negate_condition c) lr ifnot' ifso' i')
+  | match_inst_jumptable: forall ln ln' r,
+      list_forall2 (fun n n' => (dupmap!n' = (Some n))) ln ln' ->
+      match_inst dupmap (Ijumptable r ln) (Ijumptable r ln')
+  | match_inst_return: forall or, match_inst dupmap (Ireturn or) (Ireturn or).
+
+Record match_function dupmap f f': Prop := {
+  dupmap_correct: forall n n', dupmap!n' = Some n ->
+    (forall i, (fn_code f)!n = Some i -> exists i', (fn_code f')!n' = Some i' /\ match_inst dupmap i i');
+  dupmap_entrypoint: dupmap!(fn_entrypoint f') = Some (fn_entrypoint f);
+  preserv_fnsig: fn_sig f = fn_sig f';
+  preserv_fnparams: fn_params f = fn_params f';
+  preserv_fnstacksize: fn_stacksize f = fn_stacksize f'
+}.
+
+Inductive match_fundef: RTL.fundef -> RTL.fundef -> Prop :=
+  | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
+  | match_External ef: match_fundef (External ef) (External ef).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframe_intro 
+      dupmap res f sp pc rs f' pc'
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc):
+      match_stackframes (Stackframe res f sp pc rs) (Stackframe res f' sp pc' rs).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro 
+      dupmap st f sp pc rs m st' f' pc'
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc):
+      match_states (State st f sp pc rs m) (State st' f' sp pc' rs m)
+  | match_states_call:
+    forall st st' f f' args m
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_fundef f f'),
+      match_states (Callstate st f args m) (Callstate st' f' args m)
+  | match_states_return:
+    forall st st' v m
+      (STACKS: list_forall2 match_stackframes st st'),
+      match_states (Returnstate st v m) (Returnstate st' v m).
+
+(** * Auxiliary properties *)
+
+
+Theorem transf_function_preserves:
+  forall f f',
+  transf_function f = OK f' ->
+     fn_sig f = fn_sig f' /\ fn_params f = fn_params f' /\ fn_stacksize f = fn_stacksize f'.
+Proof.
+  intros. unfold transf_function in H. destruct (duplicate_aux _) as (tcte & mp). destruct tcte as (tc & te). monadInv H.
+  repeat (split; try reflexivity).
+Qed.
+
+
+Lemma verify_mapping_mn_rec_step:
+  forall dupmap lb b f f',
+  In b lb ->
+  verify_mapping_mn_rec dupmap f f' lb = OK tt ->
+  verify_mapping_mn dupmap f f' b = OK tt.
+Proof.
+  induction lb; intros.
+  - monadInv H0. inversion H.
+  - inversion H.
+    + subst. monadInv H0. destruct x. assumption.
+    + monadInv H0. destruct x0. eapply IHlb; assumption.
+Qed.
+
+Lemma verify_is_copy_correct:
+  forall dupmap n n',
+  verify_is_copy dupmap n n' = OK tt ->
+  dupmap ! n' = Some n.
+Proof.
+  intros. unfold verify_is_copy in H. destruct (_ ! n') eqn:REVM; [|inversion H].
+  destruct (n ?= p) eqn:NP; try (inversion H; fail).
+  eapply Pos.compare_eq in NP. subst.
+  reflexivity.
+Qed.
+
+Lemma verify_is_copy_list_correct:
+  forall dupmap ln ln',
+  verify_is_copy_list dupmap ln ln' = OK tt ->
+  list_forall2 (fun n n' => dupmap ! n' = Some n) ln ln'.
+Proof.
+  induction ln.
+  - intros. destruct ln'; monadInv H. constructor.
+  - intros. destruct ln'; monadInv H. destruct x. apply verify_is_copy_correct in EQ.
+    eapply IHln in EQ0. constructor; assumption.
+Qed.
+
+Lemma verify_match_inst_correct:
+  forall dupmap i i',
+  verify_match_inst dupmap i i' = OK tt ->
+  match_inst dupmap i i'.
+Proof.
+  intros. unfold verify_match_inst in H.
+  destruct i; try (inversion H; fail).
+(* Inop *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    constructor; eauto.
+(* Iop *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (eq_operation _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Iload *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (trapping_mode_eq _ _); try discriminate.
+    destruct (chunk_eq _ _); try discriminate.
+    destruct (eq_addressing _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Istore *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (chunk_eq _ _); try discriminate.
+    destruct (eq_addressing _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. clear EQ0. subst.
+    constructor. assumption.
+(* Icall *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (signature_eq _ _); try discriminate.
+    destruct (product_eq _ _ _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (Pos.eq_dec _ _); try discriminate. subst.
+    constructor. assumption.
+(* Itailcall *)
+  - destruct i'; try (inversion H; fail).
+    destruct (signature_eq _ _); try discriminate.
+    destruct (product_eq _ _ _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate. subst. clear H.
+    constructor.
+(* Ibuiltin *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_correct in EQ.
+    destruct (external_function_eq _ _); try discriminate.
+    destruct (list_eq_dec _ _ _); try discriminate.
+    destruct (builtin_res_eq_pos _ _); try discriminate. subst.
+    constructor. assumption.
+(* Icond *)
+  - destruct i'; try (inversion H; fail).
+    destruct (list_eq_dec _ _ _); try discriminate. subst.
+    destruct (eq_condition _ _); try discriminate.
+    + monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      destruct x0. eapply verify_is_copy_correct in EQ1.
+      constructor; assumption.
+    + destruct (eq_condition _ _); try discriminate.
+      monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      destruct x0. eapply verify_is_copy_correct in EQ1.
+      constructor; assumption.
+(* Ijumptable *)
+  - destruct i'; try (inversion H; fail). monadInv H.
+    destruct x. eapply verify_is_copy_list_correct in EQ.
+    destruct (Pos.eq_dec _ _); try discriminate. subst.
+    constructor. assumption.
+(* Ireturn *)
+  - destruct i'; try (inversion H; fail).
+    destruct (option_eq _ _ _); try discriminate. subst. clear H.
+    constructor.
+Qed.
+
+
+Lemma verify_mapping_mn_correct mp n n' i f f' tc:
+  mp ! n' = Some n ->
+  (fn_code f) ! n = Some i ->
+  (fn_code f') = tc ->
+  verify_mapping_mn mp f f' (n', n) = OK tt ->
+  exists i',
+     tc ! n' = Some i'
+  /\ match_inst mp i i'.
+Proof.
+  unfold verify_mapping_mn; intros H H0 H1 H2. rewrite H0 in H2. clear H0. rewrite H1 in H2. clear H1.
+  destruct (tc ! n') eqn:TCN; [| inversion H2].
+  exists i0. split; auto.
+  eapply verify_match_inst_correct. assumption.
+Qed.
+
+
+Lemma verify_mapping_mn_rec_correct:
+  forall mp n n' i f f' tc,
+  mp ! n' = Some n ->
+  (fn_code f) ! n = Some i ->
+  (fn_code f') = tc ->
+  verify_mapping_mn_rec mp f f' (PTree.elements mp) = OK tt ->
+  exists i',
+     tc ! n' = Some i'
+  /\ match_inst mp i i'.
+Proof.
+  intros. exploit PTree.elements_correct. eapply H. intros IN.
+  eapply verify_mapping_mn_rec_step in H2; eauto.
+  eapply verify_mapping_mn_correct; eauto.
+Qed.
+
+Theorem transf_function_correct f f':
+    transf_function f = OK f' -> exists dupmap, match_function dupmap f f'.
+Proof.
+  unfold transf_function.
+  intros TRANSF.
+  destruct (duplicate_aux _) as (tcte & mp). destruct tcte as (tc & te).
+  monadInv TRANSF.
+  unfold verify_mapping in EQ. monadInv EQ.
+  exists mp; constructor 1; simpl; auto.
+  + (* correct *) 
+  intros until n'. intros REVM i FNC.
+  unfold verify_mapping_match_nodes in EQ. simpl in EQ. destruct x1.
+  eapply verify_mapping_mn_rec_correct; eauto.
+  simpl; eauto.
+  + (* entrypoint *)
+  intros. unfold verify_mapping_entrypoint in EQ0. simpl in EQ0.
+  eapply verify_is_copy_correct; eauto.
+  destruct x0; auto.
+Qed.
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> match_fundef f f'.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  + exploit transf_function_correct; eauto.
+    intros (dupmap & MATCH_F).
+    eapply match_Internal; eauto.
+  + eapply match_External.
+Qed.
+
+(** * Preservation proof *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: program.
+Variable tprog: program.
+
+Hypothesis TRANSL: match_prog prog tprog.
+
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+(* UNUSED LEMMA ?
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+*)
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tge v = Some tf /\ linkorder cunit prog.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+  + unfold incl; auto.
+  + eapply linkorder_refl.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma function_sig_translated:
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. simpl. symmetry. apply transf_function_preserves. assumption.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma list_nth_z_dupmap:
+  forall dupmap ln ln' (pc pc': node) val,
+  list_nth_z ln val = Some pc ->
+  list_forall2 (fun n n' => dupmap!n' = Some n) ln ln' ->
+  exists pc',
+     list_nth_z ln' val = Some pc'
+  /\ dupmap!pc' = Some pc.
+Proof.
+  induction ln; intros until val; intros LNZ LFA.
+  - inv LNZ.
+  - inv LNZ. destruct (zeq val 0) eqn:ZEQ.
+    + inv H0. destruct ln'; inv LFA.
+      simpl. exists p. split; auto.
+    + inv LFA. simpl. rewrite ZEQ. exploit IHln. 2: eapply H0. all: eauto.
+      intros (pc'1 & LNZ & REV). exists pc'1. split; auto. congruence.
+Qed.
+
+Theorem transf_initial_states:
+  forall s1, initial_state prog s1 ->
+  exists s2, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inv H.
+  exploit function_ptr_translated; eauto. intros (tf & FIND & TRANSF).
+  eexists. split.
+  - econstructor; eauto.
+    + eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+    + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main. eauto.
+    + destruct f.
+      * monadInv TRANSF. rewrite <- H3. symmetry; eapply transf_function_preserves. assumption.
+      * monadInv TRANSF. assumption.
+  - constructor; eauto.
+    + constructor. 
+    + apply transf_fundef_correct; auto.
+Qed.
+
+Theorem transf_final_states:
+  forall s1 s2 r,
+  match_states s1 s2 -> final_state s1 r -> final_state s2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem step_simulation:
+  forall s1 t s1', step ge s1 t s1' ->
+  forall s2 (MS: match_states s1 s2),
+  exists s2',
+     step tge s2 t s2'
+  /\ match_states s1' s2'.
+Proof.
+  Local Hint Resolve transf_fundef_correct: core.
+  induction 1; intros; inv MS.
+(* Inop *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3).
+    inv H3.
+    eexists. split.
+    + eapply exec_Inop; eauto.
+    + econstructor; eauto.
+(* Iop *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iop; eauto. erewrite eval_operation_preserved; eauto.
+    + econstructor; eauto.
+(* Iload *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload; eauto; (* is the follow still needed?*) erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+(* Iload notrap1 *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload_notrap1; eauto; erewrite eval_addressing_preserved; eauto. 
+    + econstructor; eauto.
+(* Iload notrap2 *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Iload_notrap2; eauto; erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+      
+(* Istore *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Istore; eauto; erewrite eval_addressing_preserved; eauto.
+    + econstructor; eauto.
+(* Icall *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    destruct ros.
+    * simpl in H0. apply functions_translated in H0.
+      destruct H0 as (tf & cunit & TFUN & GFIND & LO).
+      eexists. split.
+      + eapply exec_Icall. eassumption. simpl. eassumption.
+        apply function_sig_translated. assumption.
+      + repeat (econstructor; eauto).
+    * simpl in H0. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
+      apply function_ptr_translated in H0. destruct H0 as (tf & GFF & TF).
+      eexists. split.
+      + eapply exec_Icall. eassumption. simpl. rewrite symbols_preserved. rewrite GFS.
+        eassumption. apply function_sig_translated. assumption.
+      + repeat (econstructor; eauto).
+(* Itailcall *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H10 & H11). inv H11.
+    pose symbols_preserved as SYMPRES.
+    destruct ros.
+    * simpl in H0. apply functions_translated in H0.
+      destruct H0 as (tf & cunit & TFUN & GFIND & LO).
+      eexists. split.
+      + eapply exec_Itailcall. eassumption. simpl. eassumption.
+        apply function_sig_translated. assumption.
+        erewrite <- preserv_fnstacksize; eauto.
+      + repeat (constructor; auto).
+    * simpl in H0. destruct (Genv.find_symbol _ _) eqn:GFS; try discriminate.
+      apply function_ptr_translated in H0. destruct H0 as (tf & GFF & TF).
+      eexists. split.
+      + eapply exec_Itailcall. eassumption. simpl. rewrite symbols_preserved. rewrite GFS.
+        eassumption. apply function_sig_translated. assumption.
+        erewrite <- preserv_fnstacksize; eauto.
+      + repeat (constructor; auto).
+(* Ibuiltin *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Ibuiltin; eauto. eapply eval_builtin_args_preserved; eauto.
+      eapply external_call_symbols_preserved; eauto. eapply senv_preserved.
+    + econstructor; eauto.
+(* Icond *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    * (* match_inst_cond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto.
+      + econstructor; eauto. destruct b; auto.
+    * (* match_inst_revcond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto. rewrite eval_negate_condition. rewrite H0. simpl. eauto.
+      + econstructor; eauto. destruct b; auto.
+(* Ijumptable *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    exploit list_nth_z_dupmap; eauto. intros (pc'1 & LNZ & REVM).
+    eexists. split.
+    + eapply exec_Ijumptable; eauto.
+    + econstructor; eauto.
+(* Ireturn *)
+  - eapply dupmap_correct in DUPLIC; eauto.
+    destruct DUPLIC as (i' & H2 & H3). inv H3.
+    pose symbols_preserved as SYMPRES.
+    eexists. split.
+    + eapply exec_Ireturn; eauto. erewrite <- preserv_fnstacksize; eauto.
+    + econstructor; eauto.
+(* exec_function_internal *)
+  - inversion TRANSF as [dupmap f0 f0' MATCHF|]; subst. eexists. split.
+    + eapply exec_function_internal. erewrite <- preserv_fnstacksize; eauto.
+    + erewrite preserv_fnparams; eauto.
+      econstructor; eauto. apply dupmap_entrypoint. assumption.
+(* exec_function_external *)
+  - inversion TRANSF as [|]; subst. eexists. split.
+    + econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    + constructor. assumption.
+(* exec_return *)
+  - inv STACKS. destruct b1 as [res' f' sp' pc' rs']. eexists. split.
+    + constructor.
+    + inv H1. econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (semantics prog) (semantics tprog).
+Proof.
+  eapply forward_simulation_step with match_states.
+  - eapply senv_preserved.
+  - eapply transf_initial_states.
+  - eapply transf_final_states.
+  - eapply step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v
new file mode 100644
index 00000000..7cfd411f
--- /dev/null
+++ b/backend/ForwardMoves.v
@@ -0,0 +1,333 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+(* Static analysis *)
+
+Module RELATION.
+  
+Definition t := (PTree.t reg).
+Definition eq (r1 r2 : t) :=
+  forall x, (PTree.get x r1) = (PTree.get x r2).
+
+Definition top : t := PTree.empty reg.
+
+Lemma eq_refl: forall x, eq x x.
+Proof.
+  unfold eq.
+  intros; reflexivity.
+Qed.
+
+Lemma eq_sym: forall x y, eq x y -> eq y x.
+Proof.
+  unfold eq.
+  intros; eauto.
+Qed.
+
+Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+Proof.
+  unfold eq.
+  intros; congruence.
+Qed.
+
+Definition reg_beq (x y : reg) :=
+  if Pos.eq_dec x y then true else false.
+
+Definition beq (r1 r2 : t) := PTree.beq reg_beq r1 r2.
+
+Lemma beq_correct: forall r1 r2, beq r1 r2 = true -> eq r1 r2.
+Proof.
+  unfold beq, eq. intros r1 r2 EQ x.
+  pose proof (PTree.beq_correct reg_beq r1 r2) as CORRECT.
+  destruct CORRECT as [CORRECTF CORRECTB].
+  pose proof (CORRECTF EQ x) as EQx.
+  clear CORRECTF CORRECTB EQ.
+  unfold reg_beq in *.
+  destruct (r1 ! x) as [R1x | ] in *;
+    destruct (r2 ! x) as [R2x | ] in *;
+    trivial; try contradiction.
+  destruct (Pos.eq_dec R1x R2x) in *; congruence.
+Qed.
+
+Definition ge (r1 r2 : t) :=
+  forall x,
+    match PTree.get x r1 with
+    | None => True
+    | Some v => (PTree.get x r2) = Some v
+    end.
+
+Lemma ge_refl: forall r1 r2, eq r1 r2 -> ge r1 r2.
+Proof.
+  unfold eq, ge.
+  intros r1 r2 EQ x.
+  pose proof (EQ x) as EQx.
+  clear EQ.
+  destruct (r1 ! x).
+  - congruence.
+  - trivial.
+Qed.
+
+Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+Proof.
+  unfold ge.
+  intros r1 r2 r3 GE12 GE23 x.
+  pose proof (GE12 x) as GE12x; clear GE12.
+  pose proof (GE23 x) as GE23x; clear GE23.
+  destruct (r1 ! x); trivial.
+  destruct (r2 ! x); congruence.
+Qed.
+
+Definition lub (r1 r2 : t) :=
+  PTree.combine
+    (fun ov1 ov2 =>
+       match ov1, ov2 with
+       | (Some v1), (Some v2) =>
+         if Pos.eq_dec v1 v2
+         then ov1
+         else None
+       | None, _
+       | _, None => None
+       end)
+    r1 r2.
+
+Lemma ge_lub_left: forall x y, ge (lub x y) x.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+Qed.
+
+Lemma ge_lub_right: forall x y, ge (lub x y) y.
+Proof.
+  unfold ge, lub.
+  intros r1 r2 x.
+  rewrite PTree.gcombine by reflexivity.
+  destruct (_ ! _); trivial.
+  destruct (_ ! _); trivial.
+  destruct (Pos.eq_dec _ _); trivial.
+  congruence.
+Qed.
+
+End RELATION.
+
+Module Type SEMILATTICE_WITHOUT_BOTTOM.
+
+  Parameter t: Type.
+  Parameter eq: t -> t -> Prop.
+  Axiom eq_refl: forall x, eq x x.
+  Axiom eq_sym: forall x y, eq x y -> eq y x.
+  Axiom eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Parameter beq: t -> t -> bool.
+  Axiom beq_correct: forall x y, beq x y = true -> eq x y.
+  Parameter ge: t -> t -> Prop.
+  Axiom ge_refl: forall x y, eq x y -> ge x y.
+  Axiom ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Parameter lub: t -> t -> t.
+  Axiom ge_lub_left: forall x y, ge (lub x y) x.
+  Axiom ge_lub_right: forall x y, ge (lub x y) y.
+
+End SEMILATTICE_WITHOUT_BOTTOM.
+
+Module ADD_BOTTOM(L : SEMILATTICE_WITHOUT_BOTTOM).
+  Definition t := option L.t.
+  Definition eq (a b : t) :=
+    match a, b with
+    | None, None => True
+    | Some x, Some y => L.eq x y
+    | Some _, None | None, Some _ => False
+    end.
+  
+  Lemma eq_refl: forall x, eq x x.
+  Proof.
+    unfold eq; destruct x; trivial.
+    apply L.eq_refl.
+  Qed.
+
+  Lemma eq_sym: forall x y, eq x y -> eq y x.
+  Proof.
+    unfold eq; destruct x; destruct y; trivial.
+    apply L.eq_sym.
+  Qed.
+  
+  Lemma eq_trans: forall x y z, eq x y -> eq y z -> eq x z.
+  Proof.
+    unfold eq; destruct x; destruct y; destruct z; trivial.
+    - apply L.eq_trans.
+    - contradiction.
+  Qed.
+  
+  Definition beq (x y : t) :=
+    match x, y with
+    | None, None => true
+    | Some x, Some y => L.beq x y
+    | Some _, None | None, Some _ => false
+    end.
+  
+  Lemma beq_correct: forall x y, beq x y = true -> eq x y.
+  Proof.
+    unfold beq, eq.
+    destruct x; destruct y; trivial; try congruence.
+    apply L.beq_correct.
+  Qed.
+  
+  Definition ge (x y : t) :=
+    match x, y with
+    | None, Some _ => False
+    | _, None => True
+    | Some a, Some b => L.ge a b
+    end.
+  
+  Lemma ge_refl: forall x y, eq x y -> ge x y.
+  Proof.
+    unfold eq, ge.
+    destruct x; destruct y; trivial.
+    apply L.ge_refl.
+  Qed.
+  
+  Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z.
+  Proof.
+    unfold ge.
+    destruct x; destruct y; destruct z; trivial; try contradiction.
+    apply L.ge_trans.
+  Qed.
+  
+  Definition bot: t := None.
+  Lemma ge_bot: forall x, ge x bot.
+  Proof.
+    unfold ge, bot.
+    destruct x; trivial.
+  Qed.
+  
+  Definition lub (a b : t) :=
+    match a, b with
+    | None, _ => b
+    | _, None => a
+    | (Some x), (Some y) => Some (L.lub x y)
+    end.
+
+  Lemma ge_lub_left: forall x y, ge (lub x y) x.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_left.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+  
+  Lemma ge_lub_right: forall x y, ge (lub x y) y.
+  Proof.
+    unfold ge, lub.
+    destruct x; destruct y; trivial.
+    - apply L.ge_lub_right.
+    - apply L.ge_refl.
+      apply L.eq_refl.
+  Qed.
+End ADD_BOTTOM.
+
+Module RB := ADD_BOTTOM(RELATION).
+Module DS := Dataflow_Solver(RB)(NodeSetForward).
+
+Definition kill (dst : reg) (rel : RELATION.t) :=
+  PTree.filter1 (fun x => if Pos.eq_dec dst x then false else true)
+                (PTree.remove dst rel).
+
+Definition move (src dst : reg) (rel : RELATION.t) :=
+  PTree.set dst (match PTree.get src rel with
+                 | Some src' => src'
+                 | None => src
+                 end) (kill dst rel).
+
+Fixpoint kill_builtin_res (res : builtin_res reg) (rel : RELATION.t) :=
+  match res with
+  | BR z => kill z rel
+  | BR_none => rel
+  | BR_splitlong hi lo => kill_builtin_res hi (kill_builtin_res lo rel)
+  end.
+
+Definition apply_instr instr x :=
+  match instr with
+  | Inop _
+  | Icond _ _ _ _ _
+  | Ijumptable _ _
+  | Istore _ _ _ _ _ => Some x
+  | Iop Omove (src :: nil) dst _ => Some (move src dst x)
+  | Iop _ _ dst _
+  | Iload _ _ _ _ dst _
+  | Icall _ _ _ dst _ => Some (kill dst x)
+  | Ibuiltin _ _ res _ => Some (RELATION.top) (* TODO (kill_builtin_res res x) *)
+  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  end.
+
+Definition apply_instr' code (pc : node) (ro : RB.t) : RB.t :=
+  match ro with
+  | None => None
+  | Some x =>
+    match code ! pc with
+    | None => RB.bot
+    | Some instr => apply_instr instr x
+    end
+  end.
+
+Definition forward_map (f : RTL.function) := DS.fixpoint
+  (RTL.fn_code f) RTL.successors_instr
+  (apply_instr' (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
+
+Definition get_r (rel : RELATION.t) (x : reg) :=
+  match PTree.get x rel with
+  | None => x
+  | Some src => src
+  end.
+
+Definition get_rb (rb : RB.t) (x : reg) :=
+  match rb with
+  | None => x
+  | Some rel => get_r rel x
+  end.
+
+Definition subst_arg (fmap : option (PMap.t RB.t)) (pc : node) (x : reg) : reg :=
+  match fmap with
+  | None => x
+  | Some inv => get_rb (PMap.get pc inv) x
+  end.
+
+Definition subst_args fmap pc := List.map (subst_arg fmap pc).
+
+(* Transform *)
+Definition transf_instr (fmap : option (PMap.t RB.t))
+           (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args dst s =>
+    Iop op (subst_args fmap pc args) dst s
+  | Iload trap chunk addr args dst s =>
+    Iload trap chunk addr (subst_args fmap pc args) dst s
+  | Istore chunk addr args src s =>
+    Istore chunk addr (subst_args fmap pc args) src s
+  | Icall sig ros args dst s =>
+    Icall sig ros (subst_args fmap pc args) dst s
+  | Itailcall sig ros args =>
+    Itailcall sig ros (subst_args fmap pc args)
+  | Icond cond args s1 s2 i =>
+    Icond cond (subst_args fmap pc args) s1 s2 i
+  | Ijumptable arg tbl =>
+    Ijumptable (subst_arg fmap pc arg) tbl
+  | Ireturn (Some arg) =>
+    Ireturn (Some (subst_arg fmap pc arg))
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/ForwardMovesproof.v b/backend/ForwardMovesproof.v
new file mode 100644
index 00000000..826d4250
--- /dev/null
+++ b/backend/ForwardMovesproof.v
@@ -0,0 +1,801 @@
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import ForwardMoves.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; trivial.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc =
+    Some(transf_instr (forward_map f) pc i).
+Proof.
+  intros until i. intro CODE.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite CODE.
+  reflexivity.
+Qed.
+
+(*
+Definition fmap_sem (fmap : option (PMap.t RB.t)) (pc : node) (rs : regset) :=
+  forall x : reg,
+    (rs # (subst_arg fmap pc x)) = (rs # x).
+ *)
+
+Lemma apply_instr'_bot :
+  forall code,
+  forall pc,
+    RB.eq (apply_instr' code pc RB.bot) RB.bot.
+Proof.
+  reflexivity.
+Qed.
+
+Definition get_rb_sem (rb : RB.t) (rs : regset) :=
+  match rb with
+  | None => False
+  | Some rel =>
+    forall x : reg,
+      (rs # (get_r rel x)) = (rs # x)
+  end.
+
+Lemma get_rb_sem_ge:
+  forall rb1 rb2 : RB.t,
+    (RB.ge rb1 rb2) ->
+    forall rs : regset,
+      (get_rb_sem rb2 rs) -> (get_rb_sem rb1 rs).
+Proof.
+  destruct rb1 as [r1 | ];
+    destruct rb2 as [r2 | ];
+    unfold get_rb_sem;
+    simpl;
+    intros GE rs RB2RS;
+    try contradiction.
+  unfold RELATION.ge in GE.
+  unfold get_r in *.
+  intro x.
+  pose proof (GE x) as GEx.
+  pose proof (RB2RS x) as RB2RSx.
+  destruct (r1 ! x) as [r1x | ] in *;
+    destruct (r2 ! x) as [r2x | ] in *;
+    congruence.
+Qed.
+
+Definition fmap_sem (fmap : option (PMap.t RB.t))
+  (pc : node) (rs : regset) :=
+  match fmap with
+  | None => True
+  | Some m => get_rb_sem (PMap.get pc m) rs
+  end.
+
+Lemma subst_arg_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  forall arg,
+    fmap_sem (forward_map f) pc rs ->
+    rs # (subst_arg (forward_map f) pc arg) = rs # arg.
+Proof.
+  intros until arg.
+  intro SEM.
+  unfold fmap_sem in SEM.
+  destruct (forward_map f) as [map |]in *; trivial.
+  simpl.
+  unfold get_rb_sem in *.
+  destruct (map # pc).
+  2: contradiction.
+  apply SEM.
+Qed.
+
+Lemma subst_args_ok:
+  forall f,
+  forall pc,
+  forall rs,
+  fmap_sem (forward_map f) pc rs ->
+  forall args,
+    rs ## (subst_args (forward_map f) pc args) = rs ## args.
+Proof.
+  induction args; trivial.
+  simpl.
+  f_equal.
+  apply subst_arg_ok; assumption.
+  assumption.
+Qed.
+
+Lemma kill_ok:
+  forall dst,
+  forall mpc,
+  forall rs,
+  forall v,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs # dst <- v.
+Proof.
+  unfold get_rb_sem.
+  intros until v.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    rewrite Regmap.gss.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    apply Regmap.gss.
+  }
+  rewrite (Regmap.gso v rs NEQ).
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      subst dst.
+      rewrite Regmap.gso by assumption.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by assumption.
+  reflexivity.
+Qed.
+
+Lemma kill_weaken:
+  forall dst,
+  forall mpc,
+  forall rs,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (kill dst mpc)) rs.
+Proof.
+  unfold get_rb_sem.
+  intros until rs.
+  intros SEM x.
+  destruct (Pos.eq_dec x dst) as [EQ | NEQ].
+  {
+    subst dst.
+    unfold kill, get_r.
+    rewrite PTree.gfilter1.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  unfold kill, get_r in *.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by assumption.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x).
+  {
+    destruct (Pos.eq_dec dst r).
+    {
+      reflexivity.
+    }
+    assumption.
+  }
+  reflexivity.
+Qed.
+
+Lemma top_ok :
+  forall rs, get_rb_sem (Some RELATION.top) rs.
+Proof.
+  unfold get_rb_sem, RELATION.top.
+  intros.
+  unfold get_r.
+  rewrite PTree.gempty.
+  reflexivity.
+Qed.
+
+Lemma move_ok:
+  forall mpc : RELATION.t,
+  forall src res : reg,
+  forall rs : regset,
+    get_rb_sem (Some mpc) rs ->
+    get_rb_sem (Some (move src res mpc)) (rs # res <- (rs # src)).
+Proof.
+  unfold get_rb_sem, move.
+  intros until rs.
+  intros SEM x.
+  unfold get_r in *.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.gss.
+    rewrite Regmap.gss.
+    pose proof (SEM src) as SEMsrc.
+    destruct (mpc ! src) as [mpcsrc | ] in *.
+    {
+      destruct (Pos.eq_dec x mpcsrc).
+      {
+        subst mpcsrc.
+        rewrite Regmap.gss.
+        reflexivity.
+      }
+      rewrite Regmap.gso by congruence.
+      assumption.
+    }
+    destruct (Pos.eq_dec x src).
+    {
+      subst src.
+      rewrite Regmap.gss.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    reflexivity.
+  }
+  rewrite PTree.gso by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  unfold kill.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  pose proof (SEM x) as SEMx.
+  destruct (mpc ! x) as [ r |].
+  {
+    destruct (Pos.eq_dec res r).
+    {
+      subst r.
+      rewrite Regmap.gso by congruence.
+      trivial.
+    }
+    rewrite Regmap.gso by congruence.
+    assumption.
+  }
+  rewrite Regmap.gso by congruence.
+  reflexivity.
+Qed.
+  
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Definition is_killed_in_map (map : PMap.t RB.t) pc res :=
+  match PMap.get pc map with
+  | None => True
+  | Some rel => exists rel', RELATION.ge rel (kill res rel')
+  end.
+
+Definition is_killed_in_fmap fmap pc res :=
+  match fmap with
+  | None => True
+  | Some map => is_killed_in_map map pc res
+  end.
+
+Definition killed_twice:
+  forall rel : RELATION.t,
+  forall res,
+    RELATION.eq (kill res rel) (kill res (kill res rel)).
+Proof.
+  unfold kill, RELATION.eq.
+  intros.
+  rewrite PTree.gfilter1.
+  rewrite PTree.gfilter1.
+  destruct (Pos.eq_dec res x).
+  {
+    subst res.
+    rewrite PTree.grs.
+    rewrite PTree.grs.
+    reflexivity.
+  }
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gro by congruence. 
+  rewrite PTree.gfilter1.
+  rewrite PTree.gro by congruence.
+  destruct (rel ! x) as [relx | ]; trivial.
+  destruct (Pos.eq_dec res relx); trivial.
+  destruct (Pos.eq_dec res relx); congruence.
+Qed.
+
+Lemma get_rb_killed:
+  forall mpc,
+  forall rs,
+  forall rel,
+  forall res,
+  forall vres,
+    (get_rb_sem (Some mpc) rs) ->
+    (RELATION.ge mpc (kill res rel)) ->
+    (get_rb_sem (Some mpc) rs # res <- vres).
+Proof.
+  simpl.
+  intros until vres.
+  intros SEM GE x.
+  pose proof (GE x) as GEx.
+  pose proof (SEM x) as SEMx.
+  unfold get_r in *.
+  destruct (mpc ! x) as [mpcx | ] in *; trivial.
+  unfold kill in GEx.
+  rewrite PTree.gfilter1 in GEx.
+  destruct (Pos.eq_dec res x) as [ | res_NE_x].
+  {
+    subst res.
+    rewrite PTree.grs in GEx.
+    discriminate.
+  }
+  rewrite PTree.gro in GEx by congruence.
+  rewrite Regmap.gso with (i := x) by congruence.
+  destruct (rel ! x) as [relx | ]; try discriminate.
+  destruct (Pos.eq_dec res relx) as [ res_EQ_relx | res_NE_relx] in *; try discriminate.
+  rewrite Regmap.gso by congruence.
+  congruence.
+Qed.
+  
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs,
+    (fmap_sem (forward_map f) pc rs) ->
+    (is_killed_in_fmap (forward_map f) pc res) ->
+      match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs).
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs m stk'
+                                 (STACKS: list_forall2 match_frames stk stk'),
+      (fmap_sem (forward_map f) pc rs) ->
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma op_cases:
+  forall op,
+  forall args,
+  forall dst,
+  forall s,
+  forall x,
+    (exists src, op=Omove /\ args = src :: nil /\
+                 (apply_instr (Iop op args dst s) x) = Some (move src dst x))
+    \/
+    (apply_instr (Iop op args dst s) x) = Some (kill dst x).
+Proof.
+  destruct op; try (right; simpl; reflexivity).
+  destruct args as [| arg0 args0t]; try (right; simpl; reflexivity).
+  destruct args0t as [| arg1 args1t]; try (right; simpl; reflexivity).
+  left.
+  eauto.
+Qed.
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+              exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+  
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+- (* op *)
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0.
+  rewrite subst_args_ok by assumption.
+  apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto.
+
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  rewrite MPC in GE.
+  rewrite H in GE.
+  
+  destruct (op_cases op args res pc' mpc) as [[src [OP [ARGS MOVE]]] | KILL].
+  {
+    subst op.
+    subst args.
+    rewrite MOVE in GE.
+    simpl in H0.
+    simpl in GE.
+    apply get_rb_sem_ge with (rb2 := Some (move src res mpc)).
+    assumption.
+    replace v with (rs # src) by congruence.
+    apply move_ok.
+    assumption.
+  }
+  rewrite KILL in GE.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  assumption.
+  apply kill_ok.
+  assumption.
+  
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill dst mpc)).
+  {
+    replace (Some (kill dst mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_ok.
+  assumption.
+  
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Istore; eauto.
+  rewrite subst_args_ok; assumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl. tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. constructor; auto. constructor.
+
+  {
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  apply get_rb_sem_ge with (rb2 := Some (kill res mpc)).
+  {
+    replace (Some (kill res mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply kill_weaken.
+  assumption.
+  }
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  assert (RB.ge (map # pc') (apply_instr' (fn_code f) pc (map # pc))) as GE.
+  {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+  }
+  unfold apply_instr' in GE.
+  unfold fmap_sem in *.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  rewrite H in GE.
+  simpl in GE.
+  unfold is_killed_in_fmap, is_killed_in_map.
+  unfold RB.ge in GE.
+  destruct (map # pc') as [mpc'|] eqn:MPC' in *; trivial.
+  eauto.
+  
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+    apply sig_preserved.
+  rewrite subst_args_ok by assumption.
+  constructor. auto.
+  
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+  
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    replace (Some RELATION.top) with (apply_instr' (fn_code f) pc (map # pc)).
+    {
+      eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+      2: apply apply_instr'_bot.
+      simpl. tauto.
+    }
+    unfold apply_instr'.
+    rewrite H.
+    rewrite MPC.
+    reflexivity.
+  }
+  apply top_ok.
+  
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite subst_args_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    destruct b; tauto.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite subst_arg_ok; eassumption.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := map # pc); trivial.
+  replace (map # pc) with (apply_instr' (fn_code f) pc (map # pc)).
+  {
+    eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+    2: apply apply_instr'_bot.
+    simpl.
+    apply list_nth_z_in with (n := Int.unsigned n).
+    assumption.
+  }
+  unfold apply_instr'.
+  unfold get_rb_sem in *.
+  destruct (map # pc) in *; try contradiction.
+  rewrite H.
+  reflexivity.
+  
+(* return *)
+- destruct or as [arg | ].
+  {
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    unfold regmap_optget.
+    rewrite subst_arg_ok by eassumption.
+    constructor; auto.
+  }
+    econstructor; split.
+    eapply exec_Ireturn; eauto.
+    constructor; auto.
+  
+  
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  apply get_rb_sem_ge with (rb2 := Some RELATION.top).
+  {
+    eapply DS.fixpoint_entry with (code := fn_code f) (successors := successors_instr); try eassumption.
+  }
+  apply top_ok.
+  
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    constructor; auto.
+
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+
+  simpl in *.
+  unfold fmap_sem in *.
+  destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+  unfold is_killed_in_fmap in H8.
+  unfold is_killed_in_map in H8.
+  destruct (map # pc) as [mpc |] in *; try contradiction.
+  destruct H8 as [rel' RGE].
+  eapply get_rb_killed; eauto.
+Qed.
+
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 67da47da..785b0a2d 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -238,7 +238,6 @@ type graph = {
    according to their types.  A variable can be forced into class 2
    by giving it a negative spill cost. *)
 
-
 let class_of_reg r =
   if Conventions1.is_float_reg r then 1 else 0
 
diff --git a/backend/Inlining.v b/backend/Inlining.v
index f7ee4166..8c7e1898 100644
--- a/backend/Inlining.v
+++ b/backend/Inlining.v
@@ -364,9 +364,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
   | Iop op args res s =>
       set_instr (spc ctx pc)
                 (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s))
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       set_instr (spc ctx pc)
-                (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
+                (Iload trap chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
   | Istore chunk addr args src s =>
       set_instr (spc ctx pc)
                 (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s))
@@ -397,9 +397,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
   | Ibuiltin ef args res s =>
       set_instr (spc ctx pc)
                 (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s))
-  | Icond cond args s1 s2 =>
+  | Icond cond args s1 s2 info =>
       set_instr (spc ctx pc)
-                (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2))
+                (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) info)
   | Ijumptable r tbl =>
       set_instr (spc ctx pc)
                 (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl))
diff --git a/backend/Inliningaux.ml b/backend/Inliningaux.ml
index 842e0c93..cf308962 100644
--- a/backend/Inliningaux.ml
+++ b/backend/Inliningaux.ml
@@ -16,8 +16,9 @@ open FSetAVL
 open Maps
 open Op
 open Ordered
-open !RTL
-
+open! RTL
+open Camlcoq
+   
 module PSet = Make(OrderedPositive)
 
 type inlining_info = {
@@ -57,7 +58,7 @@ let used_in_globvar io gv =
 let fun_inline_analysis id io fn =
   let inst io nid = function
     | Iop (op, args, dest, succ) -> used_id io (globals_operation op)
-    | Iload (chunk, addr, args, dest, succ)
+    | Iload (_, chunk, addr, args, dest, succ)
     | Istore (chunk, addr, args, dest, succ) -> used_id io (globals_addressing addr)
     | Ibuiltin (ef, args, dest, succ) -> used_id io (globals_of_builtin_args args)
     | Icall (_, Coq_inr cid, _, _, _)
@@ -83,13 +84,15 @@ let static_called_once id io =
   else
     false
 
-(* To be considered: heuristics based on size of function? *)
+(* D. Monniaux: attempt at heuristic based on size *)
+let small_enough (f : coq_function) =
+  P.to_int (RTL.max_pc_function f) <= !Clflags.option_inline_auto_threshold
 
 let should_inline (io: inlining_info) (id: ident) (f: coq_function) =
   if !Clflags.option_finline then begin
     match C2C.atom_inline id with
     | C2C.Inline -> true
     | C2C.Noinline -> false
-    | C2C.No_specifier -> static_called_once id io
+    | C2C.No_specifier -> static_called_once id io || small_enough f
   end else
     false
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index 181f40bf..c4efaf18 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -744,7 +744,7 @@ Lemma match_stacks_free_right:
   match_stacks F m m1' stk stk' sp.
 Proof.
   intros. eapply match_stacks_invariant; eauto.
-  intros. eapply Mem.perm_free_1; eauto.
+  intros. eapply Mem.perm_free_1; eauto with ordered_type.
   intros. eapply Mem.perm_free_3; eauto.
 Qed.
 
@@ -929,6 +929,15 @@ Proof.
   intros. inv H. eauto.
 Qed.
 
+Lemma eval_addressing_none:
+  forall sp' ctx addr rs,
+  eval_addressing ge (Vptr sp' (Ptrofs.repr (dstk ctx))) addr rs = None ->
+  eval_addressing ge (Vptr sp' Ptrofs.zero) (saddr ctx addr) rs = None.
+Proof.
+  intros until rs; intro Heval.
+  destruct addr; destruct rs as [| r0 rs1]; simpl in *; trivial; discriminate.
+Qed.
+
 Theorem step_simulation:
   forall S1 t S2,
   step ge S1 t S2 ->
@@ -976,6 +985,51 @@ Proof.
   apply match_stacks_inside_set_reg; auto.
   apply agree_set_reg; auto.
 
+- (* load notrap1 *)
+  exploit tr_funbody_inv; eauto. intros TR; inv TR.
+  left; econstructor; split.
+  eapply plus_one. eapply exec_Iload_notrap1. eassumption.
+  rewrite eval_addressing_preserved with (ge1:=ge) (ge2:=tge).
+  exploit eval_addressing_inj_none.
+  4: eassumption.
+  intros. eapply symbol_address_inject.
+  eapply match_stacks_inside_globals; eauto.
+  eauto.
+  instantiate (1 := rs'##(sregs ctx args)). eapply agree_val_regs; eauto.
+  rewrite Ptrofs.add_zero_l.
+  apply eval_addressing_none.
+  exact symbols_preserved.
+  econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+
+- (* load notrap2 *)
+  exploit tr_funbody_inv; eauto. intros TR; inv TR.
+
+  exploit eval_addressing_inject.
+    eapply match_stacks_inside_globals; eauto.
+    eexact SP.
+    instantiate (2 := rs##args). instantiate (1 := rs'##(sregs ctx args)). eapply agree_val_regs; eauto.
+    eauto.
+  fold (saddr ctx addr). intros [a' [P Q]].
+
+  destruct (Mem.loadv chunk m' a') eqn:Hload'.
+  + left; econstructor; split.
+    eapply plus_one.
+    eapply exec_Iload; eauto.
+    try (rewrite <- P; apply eval_addressing_preserved; exact symbols_preserved).
+    econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+    
+  + left; econstructor; split.
+    eapply plus_one.
+    eapply exec_Iload_notrap2; eauto.
+    try (rewrite <- P; apply eval_addressing_preserved; exact symbols_preserved).
+    econstructor; eauto.
+  apply match_stacks_inside_set_reg; auto.
+  apply agree_set_reg; auto.
+  
 - (* store *)
   exploit tr_funbody_inv; eauto. intros TR; inv TR.
   exploit eval_addressing_inject.
@@ -1043,7 +1097,7 @@ Proof.
   eapply match_stacks_bound with (bound := sp').
   eapply match_stacks_invariant; eauto.
     intros. eapply Mem.perm_free_3; eauto.
-    intros. eapply Mem.perm_free_1; eauto.
+    intros. eapply Mem.perm_free_1; eauto with ordered_type.
     intros. eapply Mem.perm_free_3; eauto.
   erewrite Mem.nextblock_free; eauto. red in VB; xomega.
   eapply agree_val_regs; eauto.
@@ -1135,7 +1189,7 @@ Proof.
   eapply match_stacks_bound with (bound := sp').
   eapply match_stacks_invariant; eauto.
     intros. eapply Mem.perm_free_3; eauto.
-    intros. eapply Mem.perm_free_1; eauto.
+    intros. eapply Mem.perm_free_1; eauto with ordered_type.
     intros. eapply Mem.perm_free_3; eauto.
   erewrite Mem.nextblock_free; eauto. red in VB; xomega.
   destruct or; simpl. apply agree_val_reg; auto. auto.
@@ -1182,7 +1236,7 @@ Proof.
     subst b1. rewrite D in H8; inv H8. eelim Plt_strict; eauto.
     intros. eapply Mem.perm_alloc_1; eauto.
     intros. exploit Mem.perm_alloc_inv. eexact A. eauto.
-    rewrite dec_eq_false; auto.
+    rewrite dec_eq_false; auto with ordered_type.
   auto. auto. auto. eauto. auto.
   rewrite H5. apply agree_regs_init_regs. eauto. auto. inv H1; auto. congruence. auto.
   eapply Mem.valid_new_block; eauto.
diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v
index c345c942..eba026ec 100644
--- a/backend/Inliningspec.v
+++ b/backend/Inliningspec.v
@@ -270,10 +270,10 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
       Ple res ctx.(mreg) ->
       c!(spc ctx pc) = Some (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
       tr_instr ctx pc (Iop op args res s) c
-  | tr_load: forall ctx pc c chunk addr args res s,
+  | tr_load: forall ctx pc c trap chunk addr args res s,
       Ple res ctx.(mreg) ->
-      c!(spc ctx pc) = Some (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
-      tr_instr ctx pc (Iload chunk addr args res s) c
+      c!(spc ctx pc) = Some (Iload trap chunk (saddr ctx addr) (sregs ctx args) (sreg ctx res) (spc ctx s)) ->
+      tr_instr ctx pc (Iload trap chunk addr args res s) c
   | tr_store: forall ctx pc c chunk addr args src s,
       c!(spc ctx pc) = Some (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s)) ->
       tr_instr ctx pc (Istore chunk addr args src s) c
@@ -312,9 +312,9 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
       match res with BR r => Ple r ctx.(mreg) | _ => True end ->
       c!(spc ctx pc) = Some (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s)) ->
       tr_instr ctx pc (Ibuiltin ef args res s) c
-  | tr_cond: forall ctx pc cond args s1 s2 c,
-      c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) ->
-      tr_instr ctx pc (Icond cond args s1 s2) c
+  | tr_cond: forall ctx pc cond args s1 s2 c i,
+      c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) i) ->
+      tr_instr ctx pc (Icond cond args s1 s2 i) c
   | tr_jumptable: forall ctx pc r tbl c,
       c!(spc ctx pc) = Some (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) ->
       tr_instr ctx pc (Ijumptable r tbl) c
diff --git a/backend/Json.ml b/backend/Json.ml
index b8f66c08..bd4d6ff9 100644
--- a/backend/Json.ml
+++ b/backend/Json.ml
@@ -10,7 +10,6 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-open Format
 open Camlcoq
 
 
@@ -18,16 +17,21 @@ open Camlcoq
 
 (* Print a string as json string *)
 let pp_jstring oc s =
-  fprintf oc "\"%s\"" s
+  output_string oc "\"";
+  output_string oc s;
+  output_string oc "\""
 
 (* Print a bool as json bool *)
-let pp_jbool oc = fprintf oc "%B"
+let pp_jbool oc b = output_string oc (string_of_bool b)
 
 (* Print an int as json int *)
-let pp_jint oc = fprintf oc "%d"
+let pp_jint oc i = output_string oc (string_of_int i)
 
 (* Print an int32 as json int *)
-let pp_jint32 oc = fprintf oc "%ld"
+let pp_jint32 oc i = output_string oc (Int32.to_string i)
+
+(* Print an int64 as json int *)
+let pp_jint64 oc i = output_string oc (Int64.to_string i)
 
 (* Print optional value *)
 let pp_jopt pp_elem oc = function
@@ -36,15 +40,19 @@ let pp_jopt pp_elem oc = function
 
 (* Print opening and closing curly braces for json dictionaries *)
 let pp_jobject_start pp =
-  fprintf pp "@[<v 1>{"
+  output_string pp "\n{"
 
 let pp_jobject_end pp =
-  fprintf pp "@;<0 -1>}@]"
+  output_string pp "}"
 
 (* Print a member of a json dictionary *)
 let pp_jmember ?(first=false) pp name pp_mem mem =
-  let sep = if first then "" else "," in
-  fprintf pp "%s@ \"%s\": %a" sep name pp_mem mem
+  if not first then output_string pp ",";
+  output_string pp " ";
+  pp_jstring pp name;
+  output_string pp " :";
+  pp_mem pp mem;
+  output_string pp "\n"
 
 (* Print singleton object *)
 let pp_jsingle_object pp name pp_mem mem =
@@ -54,29 +62,31 @@ let pp_jsingle_object pp name pp_mem mem =
 
 (* Print a list as json array *)
 let pp_jarray elem pp l =
-  match l with
-  | []  ->  fprintf pp "[]";
+  let pp_sep () = output_string pp ", " in
+  output_string pp "[";
+  begin match l with
+  | []  -> ()
   | hd::tail ->
-    fprintf pp "@[<v 1>[";
-    fprintf pp "%a" elem hd;
-    List.iter (fun l -> fprintf pp ",@ %a" elem l) tail;
-  fprintf pp "@;<0 -1>]@]"
+    elem pp hd;
+    List.iter (fun l -> pp_sep (); elem pp l) tail;
+  end;
+  output_string pp "]"
 
 (* Helper functions for printing coq integer and floats *)
 let pp_int pp i =
-  fprintf pp "%ld" (camlint_of_coqint i)
+  pp_jint32 pp (camlint_of_coqint i)
 
 let pp_int64 pp i =
-  fprintf pp "%Ld" (camlint64_of_coqint i)
+  pp_jint64 pp (camlint64_of_coqint i)
 
 let pp_float32 pp f =
-  fprintf pp "%ld" (camlint_of_coqint (Floats.Float32.to_bits f))
+  pp_jint32 pp (camlint_of_coqint (Floats.Float32.to_bits f))
 
 let pp_float64 pp f =
-  fprintf pp "%Ld" (camlint64_of_coqint (Floats.Float.to_bits f))
+  pp_jint64 pp (camlint64_of_coqint (Floats.Float.to_bits f))
 
 let pp_z pp z =
-  fprintf pp "%s" (Z.to_string z)
+  output_string pp (Z.to_string z)
 
 (* Helper functions for printing assembler constructs *)
 let pp_atom pp a =
@@ -106,4 +116,4 @@ let reset_id () =
 
 let pp_id_const pp () =
   let i = next_id () in
-  pp_jsingle_object pp "Integer" (fun pp i -> fprintf pp "%d" i) i
+  pp_jsingle_object pp "Integer" pp_jint i
diff --git a/backend/JsonAST.ml b/backend/JsonAST.ml
index 4e57106f..8905e252 100644
--- a/backend/JsonAST.ml
+++ b/backend/JsonAST.ml
@@ -15,7 +15,6 @@ open Asm
 open AST
 open C2C
 open Json
-open Format
 open Sections
 
 
@@ -54,8 +53,8 @@ let pp_section pp sec =
   | Section_ais_annotation -> () (* There should be no info in the debug sections *)
 
 let pp_int_opt pp = function
-  | None -> fprintf pp "0"
-  | Some i -> fprintf pp "%d" i
+  | None -> output_string pp "0"
+  | Some i -> pp_jint pp i
 
 let pp_fundef pp_inst pp (name,fn) =
   let alignment = atom_alignof name
@@ -119,9 +118,8 @@ let pp_program pp pp_inst prog =
   pp_jobject_end pp
 
 let pp_mnemonics pp mnemonic_names =
-  let mnemonic_names = List.sort (String.compare) mnemonic_names in
-  let new_line pp () = pp_print_string pp "\n" in
-  pp_print_list ~pp_sep:new_line pp_print_string pp mnemonic_names
+  let new_line pp () = Format.pp_print_string pp "\n" in
+  Format.pp_print_list ~pp_sep:new_line Format.pp_print_string pp mnemonic_names
 
 let jdump_magic_number = "CompCertJDUMPRelease: " ^ Version.version
 
@@ -153,4 +151,4 @@ let pp_ast pp pp_inst ast sourcename =
     pp_jmember pp "Compilation Unit" pp_jstring sourcename;
     pp_jmember pp "Asm Ast" (fun pp prog -> pp_program pp pp_inst prog) ast;
     pp_jobject_end pp;
-    Format.pp_print_flush pp ()
+    flush pp
diff --git a/backend/JsonAST.mli b/backend/JsonAST.mli
index 7afdce51..c32439e4 100644
--- a/backend/JsonAST.mli
+++ b/backend/JsonAST.mli
@@ -13,4 +13,4 @@
 
 
 val pp_mnemonics : Format.formatter -> string list -> unit
-val pp_ast : Format.formatter -> (Format.formatter  -> Asm.code -> unit) -> (Asm.coq_function AST.fundef, 'a) AST.program  -> string -> unit
+val pp_ast : out_channel -> (out_channel  -> Asm.code -> unit) -> (Asm.coq_function AST.fundef, 'a) AST.program  -> string -> unit
diff --git a/backend/LTL.v b/backend/LTL.v
index 5e7eec8c..3edd60a2 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -29,7 +29,7 @@ Definition node := positive.
 
 Inductive instruction: Type :=
   | Lop (op: operation) (args: list mreg) (res: mreg)
-  | Lload (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
+  | Lload (trap : trapping_mode) (chunk: memory_chunk) (addr: addressing) (args: list mreg) (dst: mreg)
   | Lgetstack (sl: slot) (ofs: Z) (ty: typ) (dst: mreg)
   | Lsetstack (src: mreg) (sl: slot) (ofs: Z) (ty: typ)
   | Lstore (chunk: memory_chunk) (addr: addressing) (args: list mreg) (src: mreg)
@@ -37,7 +37,7 @@ Inductive instruction: Type :=
   | Ltailcall (sg: signature) (ros: mreg + ident)
   | Lbuiltin (ef: external_function) (args: list (builtin_arg loc)) (res: builtin_res mreg)
   | Lbranch (s: node)
-  | Lcond (cond: condition) (args: list mreg) (s1 s2: node)
+  | Lcond (cond: condition) (args: list mreg) (s1 s2: node) (info: option bool)
   | Ljumptable (arg: mreg) (tbl: list node)
   | Lreturn.
 
@@ -209,11 +209,24 @@ Inductive step: state -> trace -> state -> Prop :=
       rs' = Locmap.set (R res) v (undef_regs (destroyed_by_op op) rs) ->
       step (Block s f sp (Lop op args res :: bb) rs m)
         E0 (Block s f sp bb rs' m)
-  | exec_Lload: forall s f sp chunk addr args dst bb rs m a v rs',
+  | exec_Lload: forall s f sp trap chunk addr args dst bb rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
-      step (Block s f sp (Lload chunk addr args dst :: bb) rs m)
+      step (Block s f sp (Lload trap chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs',
+      eval_addressing ge sp addr (reglist rs args) = None ->
+      rs' = Locmap.set (R dst)  (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
+        E0 (Block s f sp bb rs' m)
+  | exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = Locmap.set (R dst) (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
         E0 (Block s f sp bb rs' m)
   | exec_Lgetstack: forall s f sp sl ofs ty dst bb rs m rs',
       rs' = Locmap.set (R dst) (rs (S sl ofs ty)) (undef_regs (destroyed_by_getstack sl) rs) ->
@@ -250,11 +263,11 @@ Inductive step: state -> trace -> state -> Prop :=
   | exec_Lbranch: forall s f sp pc bb rs m,
       step (Block s f sp (Lbranch pc :: bb) rs m)
         E0 (State s f sp pc rs m)
-  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m,
+  | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m i,
       eval_condition cond (reglist rs args) m = Some b ->
       pc = (if b then pc1 else pc2) ->
       rs' = undef_regs (destroyed_by_cond cond) rs ->
-      step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m)
+      step (Block s f sp (Lcond cond args pc1 pc2 i :: bb) rs m)
         E0 (State s f sp pc rs' m)
   | exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs',
       rs (R arg) = Vint n ->
@@ -315,7 +328,7 @@ Fixpoint successors_block (b: bblock) : list node :=
   | nil => nil                          (**r should never happen *)
   | Ltailcall _ _ :: _ => nil
   | Lbranch s :: _ => s :: nil
-  | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil
+  | Lcond _ _ s1 s2 _ :: _ => s1 :: s2 :: nil
   | Ljumptable _ tbl :: _ => tbl
   | Lreturn :: _ => nil
   | instr :: b' => successors_block b'
diff --git a/backend/Linear.v b/backend/Linear.v
index 447c6ba6..1443f795 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -28,7 +28,7 @@ Inductive instruction: Type :=
   | Lgetstack: slot -> Z -> typ -> mreg -> instruction
   | Lsetstack: mreg -> slot -> Z -> typ -> instruction
   | Lop: operation -> list mreg -> mreg -> instruction
-  | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
+  | Lload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Lcall: signature -> mreg + ident -> instruction
   | Ltailcall: signature -> mreg + ident -> instruction
@@ -160,11 +160,28 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Lop op args res :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lload:
-      forall s f sp chunk addr args dst b rs m a v rs',
+      forall s f sp trap chunk addr args dst b rs m a v rs',
       eval_addressing ge sp addr (reglist rs args) = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = Locmap.set (R dst) v (undef_regs (destroyed_by_load chunk addr) rs) ->
-      step (State s f sp (Lload chunk addr args dst :: b) rs m)
+      step (State s f sp (Lload trap chunk addr args dst :: b) rs m)
+        E0 (State s f sp b rs' m)
+  | exec_Lload_notrap1:
+      forall s f sp chunk addr args dst b rs m rs',
+      eval_addressing ge sp addr (reglist rs args) = None ->
+      rs' = Locmap.set (R dst)
+                       (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
+        E0 (State s f sp b rs' m)
+  | exec_Lload_notrap2:
+      forall s f sp chunk addr args dst b rs m a rs',
+      eval_addressing ge sp addr (reglist rs args) = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = Locmap.set (R dst)
+                       (default_notrap_load_value chunk)
+                       (undef_regs (destroyed_by_load chunk addr) rs) ->
+      step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lstore:
       forall s f sp chunk addr args src b rs m m' a rs',
diff --git a/backend/Linearize.v b/backend/Linearize.v
index 2cfa4d3c..66b36428 100644
--- a/backend/Linearize.v
+++ b/backend/Linearize.v
@@ -163,8 +163,8 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
   | nil => k
   | LTL.Lop op args res :: b' =>
       Lop op args res :: linearize_block b' k
-  | LTL.Lload chunk addr args dst :: b' =>
-      Lload chunk addr args dst :: linearize_block b' k
+  | LTL.Lload trap chunk addr args dst :: b' =>
+      Lload trap chunk addr args dst :: linearize_block b' k
   | LTL.Lgetstack sl ofs ty dst :: b' =>
       Lgetstack sl ofs ty dst :: linearize_block b' k
   | LTL.Lsetstack src sl ofs ty :: b' =>
@@ -179,7 +179,7 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
       Lbuiltin ef args res :: linearize_block b' k
   | LTL.Lbranch s :: b' =>
       add_branch s k
-  | LTL.Lcond cond args s1 s2 :: b' =>
+  | LTL.Lcond cond args s1 s2 _ :: b' =>
       if starts_with s1 k then
         Lcond (negate_condition cond) args s2 :: add_branch s1 k
       else
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index 902724e0..bfa056ca 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -1,4 +1,4 @@
-(* *********************************************************************)
+
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
@@ -12,7 +12,6 @@
 
 open LTL
 open Maps
-open Camlcoq
 
 (* Trivial enumeration, in decreasing order of PC *)
 
@@ -29,6 +28,8 @@ let enumerate_aux f reach =
 
 (* More clever enumeration that flattens basic blocks *)
 
+open Camlcoq
+
 module IntSet = Set.Make(struct type t = int let compare = compare end)
 
 (* Determine join points: reachable nodes that have > 1 predecessor *)
@@ -80,7 +81,7 @@ let basic_blocks f joins =
        | [] -> assert false
        | Lbranch s :: _ -> next_in_block blk minpc s
        | Ltailcall (sig0, ros) :: _ -> end_block blk minpc
-       | Lcond (cond, args, ifso, ifnot) :: _ ->
+       | Lcond (cond, args, ifso, ifnot, _) :: _ ->
              end_block blk minpc; start_block ifso; start_block ifnot
        | Ljumptable(arg, tbl) :: _ ->
              end_block blk minpc; List.iter start_block tbl
@@ -110,5 +111,405 @@ let flatten_blocks blks =
 
 (* Build the enumeration *)
 
-let enumerate_aux f reach =
+let enumerate_aux_flat f reach =
   flatten_blocks (basic_blocks f (join_points f))
+
+(**
+ * Enumeration based on traces as identified by Duplicate.v 
+ *
+ * The Duplicate phase heuristically identifies the most frequented paths. Each
+ * Icond is modified so that the preferred condition is a fallthrough (ifnot)
+ * rather than a branch (ifso).
+ *
+ * The enumeration below takes advantage of this - preferring to layout nodes
+ * following the fallthroughs of the Lcond branches.
+ *
+ * It is slightly adapted from the work of Petris and Hansen 90 on intraprocedural
+ * code positioning - only we do it on a broader grain, since we don't have the exact
+ * frequencies (we only know which branch is the preferred one)
+ *)
+
+let get_some = function
+| None -> failwith "Did not get some"
+| Some thing -> thing
+
+exception EmptyList
+
+let rec last_element = function
+  | [] -> raise EmptyList
+  | e :: [] -> e
+  | e' :: e :: l -> last_element (e::l)
+
+let print_plist l =
+  let rec f = function
+  | [] -> ()
+  | n :: l -> Printf.printf "%d, " (P.to_int n); f l
+  in begin
+    Printf.printf "[";
+    f l;
+    Printf.printf "]"
+  end
+
+(* adapted from the above join_points function, but with PTree *)
+let get_join_points code entry =
+  let reached = ref (PTree.map (fun n i -> false) code) in
+  let reached_twice = ref (PTree.map (fun n i -> false) code) in
+  let rec traverse pc =
+    if get_some @@ PTree.get pc !reached then begin
+      if not (get_some @@ PTree.get pc !reached_twice) then
+        reached_twice := PTree.set pc true !reached_twice
+    end else begin
+      reached := PTree.set pc true !reached;
+      traverse_succs (successors_block @@ get_some @@ PTree.get pc code)
+    end
+  and traverse_succs = function
+    | [] -> ()
+    | [pc] -> traverse pc
+    | pc :: l -> traverse pc; traverse_succs l
+  in traverse entry; !reached_twice
+
+let forward_sequences code entry =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let join_points = get_join_points code entry in
+  (* returns the list of traversed nodes, and a list of nodes to start traversing next *)
+  let rec traverse_fallthrough code node =
+    (* Printf.printf "Traversing %d..\n" (P.to_int node); *)
+    if not (get_some @@ PTree.get node !visited) then begin
+      visited := PTree.set node true !visited;
+      match PTree.get node code with
+      | None -> failwith "No such node"
+      | Some bb ->
+          let ln, rem = match (last_element bb) with
+          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+          | Lbuiltin _ -> assert false
+          | Ltailcall _ | Lreturn -> begin (* Printf.printf "STOP tailcall/return\n"; *) ([], []) end
+          | Lbranch n ->
+              if get_some @@ PTree.get n join_points then ([], [n])
+              else let ln, rem = traverse_fallthrough code n in (ln, rem)
+          | Lcond (_, _, ifso, ifnot, info) -> (match info with
+            | None -> begin (* Printf.printf "STOP Lcond None\n"; *) ([], [ifso; ifnot]) end
+            | Some false ->
+                if get_some @@ PTree.get ifnot join_points then ([], [ifso; ifnot])
+                else let ln, rem = traverse_fallthrough code ifnot in (ln, [ifso] @ rem)
+            | Some true ->
+                let errstr = Printf.sprintf ("Inconsistency detected in node %d: ifnot is not the preferred branch") (P.to_int node) in
+                  failwith errstr)
+          | Ljumptable(_, ln) -> begin (* Printf.printf "STOP Ljumptable\n"; *) ([], ln) end
+          in ([node] @ ln, rem)
+      end
+    else ([], [])
+  in let rec f code = function
+  | [] -> []
+  | node :: ln ->
+      let fs, rem_from_node = traverse_fallthrough code node
+      in [fs] @ ((f code rem_from_node) @ (f code ln))
+  in (f code [entry])
+
+(** Unused code
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+module LPInt = struct
+  type t = P.t list
+  let rec compare x y =
+    match x with
+    | [] -> ( match y with
+      | [] -> 0
+      | _ -> 1 )
+    | e :: l -> match y with
+      | [] -> -1
+      | e' :: l' ->
+          let e_cmp = PInt.compare e e' in
+          if e_cmp == 0 then compare l l' else e_cmp
+end
+
+module LPSet = Set.Make(LPInt)
+
+let iter_lpset f s = Seq.iter f (LPSet.to_seq s)
+
+let first_of = function
+  | [] -> None
+  | e :: l -> Some e
+
+let rec last_of = function
+  | [] -> None
+  | e :: l -> (match l with [] -> Some e | e :: l -> last_of l)
+
+let can_be_merged code s s' =
+  let last_s = get_some @@ last_of s in
+  let first_s' = get_some @@ first_of s' in
+  match get_some @@ PTree.get last_s code with
+  | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+  | Lbuiltin _ | Ltailcall _ | Lreturn -> false
+  | Lbranch n -> n == first_s'
+  | Lcond (_, _, ifso, ifnot, info) -> (match info with
+    | None -> false
+    | Some false -> ifnot == first_s'
+    | Some true -> failwith "Inconsistency detected - ifnot is not the preferred branch")
+  | Ljumptable (_, ln) ->
+      match ln with
+      | [] -> false
+      | n :: ln -> n == first_s'
+
+let merge s s' = Some s
+
+let try_merge code (fs: (BinNums.positive list) list) =
+  let seqs = ref (LPSet.of_list fs) in
+  let oldLength = ref (LPSet.cardinal !seqs) in
+  let continue = ref true in
+  let found = ref false in
+  while !continue do
+    begin
+      found := false;
+      iter_lpset (fun s ->
+        if !found then ()
+        else iter_lpset (fun s' ->
+          if (!found || s == s') then ()
+          else if (can_be_merged code s s') then
+            begin
+              seqs := LPSet.remove s !seqs;
+              seqs := LPSet.remove s' !seqs;
+              seqs := LPSet.add (get_some (merge s s')) !seqs;
+              found := true;
+            end
+          else ()
+        ) !seqs
+      ) !seqs;
+      if !oldLength == LPSet.cardinal !seqs then
+        continue := false
+      else
+        oldLength := LPSet.cardinal !seqs
+    end
+  done;
+  !seqs
+*)
+
+(** Code adapted from Duplicateaux.get_loop_headers
+  *
+  * Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+type pos = BinNums.positive
+
+module PP = struct
+  type t = pos * pos
+  let compare a b =
+    let ax, ay = a in
+    let bx, by = b in
+    let dx = compare ax bx in
+    if (dx == 0) then compare ay by
+    else dx
+end
+
+module PPMap = Map.Make(PP)
+
+type vstate = Unvisited | Processed | Visited
+
+let get_loop_edges code entry =
+  let visited = ref (PTree.map (fun n i -> Unvisited) code) in
+  let is_loop_edge = ref PPMap.empty
+  in let rec dfs_visit code from = function
+  | [] -> ()
+  | node :: ln ->
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> ()
+      | Processed -> begin
+          let from_node = get_some from in
+          is_loop_edge := PPMap.add (from_node, node) true !is_loop_edge;
+          visited := PTree.set node Visited !visited
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          let bb = get_some @@ PTree.get node code in
+          let next_visits = (match (last_element bb) with
+          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+          | Lbuiltin _ -> assert false
+          | Ltailcall _ | Lreturn -> []
+          | Lbranch n -> [n]
+          | Lcond (_, _, ifso, ifnot, _) -> [ifso; ifnot]
+          | Ljumptable(_, ln) -> ln
+          ) in dfs_visit code (Some node) next_visits;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code from ln
+        end
+  in begin
+    dfs_visit code None [entry];
+    !is_loop_edge
+  end
+
+let ppmap_is_true pp ppmap = PPMap.mem pp ppmap && PPMap.find pp ppmap
+
+module Int = struct
+  type t = int
+  let compare x y = compare x y
+end
+
+module ISet = Set.Make(Int)
+
+let print_iset s = begin
+  Printf.printf "{";
+  ISet.iter (fun e -> Printf.printf "%d, " e) s;
+  Printf.printf "}"
+end
+
+let print_depmap dm = begin
+  Printf.printf "[|";
+  Array.iter (fun s -> print_iset s; Printf.printf ", ") dm;
+  Printf.printf "|]\n"
+end
+
+let construct_depmap code entry fs =
+  let is_loop_edge = get_loop_edges code entry in
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let depmap = Array.map (fun e -> ISet.empty) fs in
+  let find_index_of_node n =
+    let index = ref 0 in
+    begin
+      Array.iteri (fun i s ->
+        match List.find_opt (fun e -> e == n) s with
+        | Some _ -> index := i
+        | None -> ()
+      ) fs;
+      !index
+    end
+  in let check_and_update_depmap from target =
+    (* Printf.printf "From %d to %d\n" (P.to_int from) (P.to_int target); *)
+    if not (ppmap_is_true (from, target) is_loop_edge) then
+      let in_index_fs = find_index_of_node from in
+      let out_index_fs = find_index_of_node target in
+      if out_index_fs != in_index_fs then
+        depmap.(out_index_fs) <- ISet.add in_index_fs depmap.(out_index_fs)
+      else ()
+    else ()
+  in let rec dfs_visit code = function
+  | [] -> ()
+  | node :: ln ->
+      begin
+        match (get_some @@ PTree.get node !visited) with
+        | true -> ()
+        | false -> begin
+            visited := PTree.set node true !visited;
+            let bb = get_some @@ PTree.get node code in
+            let next_visits =
+              match (last_element bb) with
+              | Ltailcall _ | Lreturn -> []
+              | Lbranch n -> (check_and_update_depmap node n; [n])
+              | Lcond (_, _, ifso, ifnot, _) -> begin
+                  check_and_update_depmap node ifso;
+                  check_and_update_depmap node ifnot;
+                  [ifso; ifnot]
+                end
+              | Ljumptable(_, ln) -> begin
+                  List.iter (fun n -> check_and_update_depmap node n) ln;
+                  ln
+                end
+              (* end of bblocks should not be another value than one of the above *)
+              | _ -> failwith "last_element gave an invalid output"
+            in dfs_visit code next_visits
+          end;
+        dfs_visit code ln
+      end
+  in begin
+    dfs_visit code [entry];
+    depmap
+  end
+
+let print_sequence s =
+  Printf.printf "[";
+  List.iter (fun n -> Printf.printf "%d, " (P.to_int n)) s;
+  Printf.printf "]\n"
+
+let print_ssequence ofs =
+  Printf.printf "[";
+  List.iter (fun s -> print_sequence s) ofs;
+  Printf.printf "]\n"
+
+let order_sequences code entry fs =
+  let fs_a = Array.of_list fs in
+  let depmap = construct_depmap code entry fs_a in
+  let fs_evaluated = Array.map (fun e -> false) fs_a in
+  let ordered_fs = ref [] in
+  let evaluate s_id =
+    begin
+      assert (not fs_evaluated.(s_id));
+      ordered_fs := fs_a.(s_id) :: !ordered_fs;
+      fs_evaluated.(s_id) <- true;
+      (* Printf.printf "++++++\n";
+      Printf.printf "Scheduling %d\n" s_id;
+      Printf.printf "Initial depmap: "; print_depmap depmap; *)
+      Array.iteri (fun i deps ->
+        depmap.(i) <- ISet.remove s_id deps
+      ) depmap;
+      (* Printf.printf "Final depmap: "; print_depmap depmap; *)
+    end
+  in let choose_best_of candidates =
+    let current_best_id = ref None in
+    let current_best_score = ref None in
+    begin
+      List.iter (fun id ->
+        match !current_best_id with
+        | None -> begin
+            current_best_id := Some id;
+            match fs_a.(id) with
+            | [] -> current_best_score := None
+            | n::l -> current_best_score := Some (P.to_int n)
+          end
+        | Some b -> begin
+            match fs_a.(id) with
+            | [] -> ()
+            | n::l -> let nscore = P.to_int n in
+              match !current_best_score with
+              | None -> (current_best_id := Some id; current_best_score := Some nscore)
+              | Some bs -> if nscore > bs then (current_best_id := Some id; current_best_score := Some nscore)
+          end
+      ) candidates;
+      !current_best_id
+    end
+  in let select_next () =
+    let candidates = ref [] in
+    begin
+      Array.iteri (fun i deps ->
+        begin
+          (* Printf.printf "Deps of %d: " i; print_iset deps; Printf.printf "\n"; *)
+          (* FIXME - if we keep it that way (no dependency check), remove all the unneeded stuff *)
+          if ((* deps == ISet.empty && *) not fs_evaluated.(i)) then
+            candidates := i :: !candidates
+        end
+      ) depmap;
+      if not (List.length !candidates > 0) then begin
+        Array.iteri (fun i deps ->
+          if (not fs_evaluated.(i)) then candidates := i :: !candidates
+        ) depmap;
+      end;
+      get_some (choose_best_of !candidates)
+    end
+  in begin
+    Printf.printf "-------------------------------\n";
+    Printf.printf "depmap: "; print_depmap depmap;
+    Printf.printf "forward sequences identified: "; print_ssequence fs;
+    while List.length !ordered_fs != List.length fs do
+      let next_id = select_next () in
+      evaluate next_id
+    done;
+    Printf.printf "forward sequences ordered: "; print_ssequence (List.rev (!ordered_fs));
+    List.rev (!ordered_fs)
+  end
+
+let enumerate_aux_trace f reach =
+  let code = f.fn_code in
+  let entry = f.fn_entrypoint in
+  let fs = forward_sequences code entry in
+  let ofs = order_sequences code entry fs in
+  List.flatten ofs
+
+let enumerate_aux f reach =
+  if !Clflags.option_ftracelinearize then enumerate_aux_trace f reach
+  else enumerate_aux_flat f reach
diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v
index 10a3d8b2..18dc52a5 100644
--- a/backend/Linearizeproof.v
+++ b/backend/Linearizeproof.v
@@ -585,45 +585,61 @@ Proof.
   intros; eapply reachable_successors; eauto.
   eapply is_tail_lin_block; eauto. eapply is_tail_find_label; eauto.
 
-  (* Lop *)
+- (* Lop *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   instantiate (1 := v); rewrite <- H; apply eval_operation_preserved.
   exact symbols_preserved.
   econstructor; eauto.
 
-  (* Lload *)
+- (* Lload *)
   left; econstructor; split. simpl.
-  apply plus_one. econstructor.
+  apply plus_one. eapply exec_Lload.
   instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved.
   exact symbols_preserved. eauto. eauto.
   econstructor; eauto.
 
-  (* Lgetstack *)
+- (* Lload notrap1 *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lload_notrap1.
+  rewrite <- H.
+  apply eval_addressing_preserved.
+  exact symbols_preserved. eauto.
+  econstructor; eauto.
+  
+- (* Lload notrap2 *)
+  left; econstructor; split. simpl.
+  apply plus_one. eapply exec_Lload_notrap2.
+  rewrite <- H.
+  apply eval_addressing_preserved.
+  exact symbols_preserved. eauto. eauto.
+  econstructor; eauto.
+
+- (* Lgetstack *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   econstructor; eauto.
 
-  (* Lsetstack *)
+- (* Lsetstack *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   econstructor; eauto.
 
-  (* Lstore *)
+- (* Lstore *)
   left; econstructor; split. simpl.
   apply plus_one. econstructor.
   instantiate (1 := a). rewrite <- H; apply eval_addressing_preserved.
   exact symbols_preserved. eauto. eauto.
   econstructor; eauto.
 
-  (* Lcall *)
+- (* Lcall *)
   exploit find_function_translated; eauto. intros [tfd [A B]].
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
   symmetry; eapply sig_preserved; eauto.
   econstructor; eauto. constructor; auto. econstructor; eauto.
 
-  (* Ltailcall *)
+- (* Ltailcall *)
   exploit find_function_translated; eauto. intros [tfd [A B]].
   left; econstructor; split. simpl.
   apply plus_one. econstructor; eauto.
@@ -633,18 +649,18 @@ Proof.
   rewrite (match_parent_locset _ _ STACKS).
   econstructor; eauto.
 
-  (* Lbuiltin *)
+- (* Lbuiltin *)
   left; econstructor; split. simpl.
   apply plus_one. eapply exec_Lbuiltin; eauto.
   eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 
-  (* Lbranch *)
+- (* Lbranch *)
   assert ((reachable f)!!pc = true). apply REACH; simpl; auto.
   right; split. simpl; omega. split. auto. simpl. econstructor; eauto.
 
-  (* Lcond *)
+- (* Lcond *)
   assert (REACH1: (reachable f)!!pc1 = true) by (apply REACH; simpl; auto).
   assert (REACH2: (reachable f)!!pc2 = true) by (apply REACH; simpl; auto).
   simpl linearize_block.
@@ -670,18 +686,18 @@ Proof.
   apply plus_one. eapply exec_Lcond_false. eauto. eauto.
   econstructor; eauto.
 
-  (* Ljumptable *)
+- (* Ljumptable *)
   assert (REACH': (reachable f)!!pc = true).
     apply REACH. simpl. eapply list_nth_z_in; eauto.
   right; split. simpl; omega. split. auto. econstructor; eauto.
 
-  (* Lreturn *)
+- (* Lreturn *)
   left; econstructor; split.
   simpl. apply plus_one. econstructor; eauto.
   rewrite (stacksize_preserved _ _ TRF). eauto.
   rewrite (match_parent_locset _ _ STACKS). econstructor; eauto.
 
-  (* internal functions *)
+- (* internal functions *)
   assert (REACH: (reachable f)!!(LTL.fn_entrypoint f) = true).
     apply reachable_entrypoint.
   monadInv H7.
@@ -691,13 +707,13 @@ Proof.
   generalize EQ; intro EQ'; monadInv EQ'. simpl.
   econstructor; eauto. simpl. eapply is_tail_add_branch. constructor.
 
-  (* external function *)
+- (* external function *)
   monadInv H8. left; econstructor; split.
   apply plus_one. eapply exec_function_external; eauto.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 
-  (* return *)
+- (* return *)
   inv H3. inv H1.
   left; econstructor; split.
   apply plus_one. econstructor.
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 1fe23a9d..3fe61470 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -76,7 +76,7 @@ Definition wt_instr (i: instruction) : bool :=
           let (targs, tres) := type_of_operation op in
           subtype tres (mreg_type res)
       end
-  | Lload chunk addr args dst =>
+  | Lload trap chunk addr args dst =>
       subtype (type_of_chunk chunk) (mreg_type dst)
   | Ltailcall sg ros =>
       zeq (size_arguments sg) 0
@@ -321,17 +321,34 @@ Local Opaque mreg_type.
   + (* other ops *)
     destruct (type_of_operation op) as [ty_args ty_res] eqn:TYOP. InvBooleans.
     econstructor; eauto.
+ 
     apply wt_setreg; auto; try (apply wt_undef_regs; auto).
     eapply Val.has_subtype; eauto.
+    
     change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto.
     red; intros; subst op. simpl in ISMOVE.
-    destruct args; try discriminate. destruct args; discriminate. 
+    destruct args; try discriminate. destruct args; discriminate.
+    (* no longer needed apply wt_undef_regs; auto. *)
 - (* load *)
   simpl in *; InvBooleans.
   econstructor; eauto.
   apply wt_setreg. eapply Val.has_subtype; eauto.
   destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
   apply wt_undef_regs; auto.
+- (* load notrap1 *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setreg. eapply Val.has_subtype; eauto.
+  unfold default_notrap_load_value.
+  constructor.
+  apply wt_undef_regs; auto.
+- (* load notrap2 *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setreg. eapply Val.has_subtype; eauto.
+  unfold default_notrap_load_value.
+  constructor.
+  apply wt_undef_regs; auto.
 - (* store *)
   simpl in *; InvBooleans.
   econstructor. eauto. eauto. eauto.
diff --git a/backend/Liveness.v b/backend/Liveness.v
index 16533158..9652b363 100644
--- a/backend/Liveness.v
+++ b/backend/Liveness.v
@@ -79,7 +79,7 @@ Definition transfer
             reg_list_live args (reg_dead res after)
           else
             after
-      | Iload chunk addr args dst s =>
+      | Iload trap chunk addr args dst s =>
           if Regset.mem dst after then
             reg_list_live args (reg_dead dst after)
           else
@@ -94,7 +94,7 @@ Definition transfer
       | Ibuiltin ef args res s =>
           reg_list_live (params_of_builtin_args args)
             (reg_list_dead (params_of_builtin_res res) after)
-      | Icond cond args ifso ifnot =>
+      | Icond cond args ifso ifnot _ =>
           reg_list_live args after
       | Ijumptable arg tbl =>
           reg_live arg after
diff --git a/backend/Mach.v b/backend/Mach.v
index 9fdee9eb..1c6fdb18 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -56,7 +56,7 @@ Inductive instruction: Type :=
   | Msetstack: mreg -> ptrofs -> typ -> instruction
   | Mgetparam: ptrofs -> typ -> mreg -> instruction
   | Mop: operation -> list mreg -> mreg -> instruction
-  | Mload: memory_chunk -> addressing -> list mreg -> mreg -> instruction
+  | Mload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction
   | Mcall: signature -> mreg + ident -> instruction
   | Mtailcall: signature -> mreg + ident -> instruction
@@ -321,11 +321,24 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp (Mop op args res :: c) rs m)
         E0 (State s f sp c rs' m)
   | exec_Mload:
-      forall s f sp chunk addr args dst c rs m a v rs',
+      forall s f sp trap chunk addr args dst c rs m a v rs',
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- v) ->
-      step (State s f sp (Mload chunk addr args dst :: c) rs m)
+      step (State s f sp (Mload trap chunk addr args dst :: c) rs m)
+        E0 (State s f sp c rs' m)
+  | exec_Mload_notrap1:
+      forall s f sp chunk addr args dst c rs m rs',
+      eval_addressing ge sp addr rs##args = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+      step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
+        E0 (State s f sp c rs' m)
+  | exec_Mload_notrap2:
+      forall s f sp chunk addr args dst c rs m a rs',
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+      step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
         E0 (State s f sp c rs' m)
   | exec_Mstore:
       forall s f sp chunk addr args src c rs m m' a rs',
diff --git a/backend/NeedDomain.v b/backend/NeedDomain.v
index b35c90b2..3c2d8e20 100644
--- a/backend/NeedDomain.v
+++ b/backend/NeedDomain.v
@@ -594,7 +594,8 @@ Proof.
 Qed.
 
 (** Modular arithmetic operations: add, mul, opposite.
-    (But not subtraction because of the pointer - pointer case. *)
+    Also subtraction, but only on 64-bit targets, otherwise
+    the pointer - pointer case does not fit. *)
 
 Definition modarith (x: nval) :=
   match x with
@@ -615,6 +616,19 @@ Proof.
 - inv H; auto. inv H0; auto. destruct w1; auto.
 Qed.
 
+Lemma sub_sound:
+  forall v1 w1 v2 w2 x,
+  vagree v1 w1 (modarith x) -> vagree v2 w2 (modarith x) ->
+  Archi.ptr64 = true ->
+  vagree (Val.sub v1 v2) (Val.sub w1 w2) x.
+Proof.
+  unfold modarith; intros. destruct x; simpl in *.
+- auto.
+- unfold Val.sub; rewrite H1; InvAgree.
+  apply eqmod_iagree. apply eqmod_sub; apply iagree_eqmod; auto.
+- inv H; auto. inv H0; auto. destruct w1; auto.
+Qed.
+
 Remark modarith_idem: forall nv, modarith (modarith nv) = modarith nv.
 Proof.
   destruct nv; simpl; auto. f_equal; apply complete_mask_idem.
@@ -680,7 +694,7 @@ Definition sign_ext (n: Z) (x: nval) :=
 Lemma sign_ext_sound:
   forall v w x n,
   vagree v w (sign_ext n x) ->
-  0 < n < Int.zwordsize ->
+  0 < n ->
   vagree (Val.sign_ext n v) (Val.sign_ext n w) x.
 Proof.
   unfold sign_ext; intros. destruct x; simpl in *.
@@ -889,7 +903,8 @@ Lemma default_needs_of_operation_sound:
   eval_operation ge (Vptr sp Ptrofs.zero) op args1 m1 = Some v1 ->
   vagree_list args1 args2 nil
   \/ vagree_list args1 args2 (default nv :: nil)
-  \/ vagree_list args1 args2 (default nv :: default nv :: nil) ->
+  \/ vagree_list args1 args2 (default nv :: default nv :: nil)
+  \/ vagree_list args1 args2 (default nv :: default nv :: default nv :: nil) ->
   nv <> Nothing ->
   exists v2,
      eval_operation ge (Vptr sp Ptrofs.zero) op args2 m2 = Some v2
@@ -901,7 +916,8 @@ Proof.
   {
     destruct H0. auto with na.
     destruct H0. inv H0; constructor; auto with na.
-    inv H0; constructor; auto with na. inv H8; constructor; auto with na.
+    destruct H0. inv H0. constructor. inv H8; constructor; auto with na. 
+    inv H0; constructor; auto with na. inv H8; constructor; auto with na. inv H9; constructor; auto with na.
   }
   exploit (@eval_operation_inj _ _ _ _ ge ge inject_id).
   eassumption. auto. auto. auto.
diff --git a/backend/OpHelpers.v b/backend/OpHelpers.v
index 53414dab..b9b97903 100644
--- a/backend/OpHelpers.v
+++ b/backend/OpHelpers.v
@@ -6,16 +6,16 @@ Require Import Op CminorSel.
   runtime library functions.  The following type class collects
   the names of these functions. *)
 
-Definition sig_l_l := mksignature (Tlong :: nil) (Some Tlong) cc_default.
-Definition sig_l_f := mksignature (Tlong :: nil) (Some Tfloat) cc_default.
-Definition sig_l_s := mksignature (Tlong :: nil) (Some Tsingle) cc_default.
-Definition sig_f_l := mksignature (Tfloat :: nil) (Some Tlong) cc_default.
-Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) (Some Tlong) cc_default.
-Definition sig_li_l := mksignature (Tlong :: Tint :: nil) (Some Tlong) cc_default.
-Definition sig_ii_l := mksignature (Tint :: Tint :: nil) (Some Tlong) cc_default.
-Definition sig_ii_i := mksignature (Tint :: Tint :: nil) (Some Tint) cc_default.
-Definition sig_ff_f := mksignature (Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default.
-Definition sig_ss_s := mksignature (Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default.
+Definition sig_l_l := mksignature (Tlong :: nil) Tlong cc_default.
+Definition sig_l_f := mksignature (Tlong :: nil) Tfloat cc_default.
+Definition sig_l_s := mksignature (Tlong :: nil) Tsingle cc_default.
+Definition sig_f_l := mksignature (Tfloat :: nil) Tlong cc_default.
+Definition sig_ll_l := mksignature (Tlong :: Tlong :: nil) Tlong cc_default.
+Definition sig_li_l := mksignature (Tlong :: Tint :: nil) Tlong cc_default.
+Definition sig_ii_l := mksignature (Tint :: Tint :: nil) Tlong cc_default.
+Definition sig_ii_i := mksignature (Tint :: Tint :: nil) Tint cc_default.
+Definition sig_ff_f := mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default.
+Definition sig_ss_s := mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default.
 
 Class helper_functions := mk_helper_functions {
   i64_dtos: ident;                      (**r float64 -> signed long *)
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index 8652b2c5..d82e6f84 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -99,7 +99,7 @@ let exists_constants () =
 
 let current_function_stacksize = ref 0l
 let current_function_sig =
-  ref { sig_args = []; sig_res = None; sig_cc = cc_default }
+  ref { sig_args = []; sig_res = Tvoid; sig_cc = cc_default }
 
 (* Functions for printing of symbol names *)
 let elf_symbol oc symb =
@@ -268,8 +268,8 @@ let re_asm_param_2 = Str.regexp "%\\([QR]?\\)\\([0-9]+\\)"
 let print_inline_asm print_preg oc txt sg args res =
   let (operands, ty_operands) =
     match sg.sig_res with
-    | None -> (args, sg.sig_args)
-    | Some tres -> (builtin_arg_of_res res :: args, tres :: sg.sig_args) in
+    | Tvoid -> (args, sg.sig_args)
+    | tres -> (builtin_arg_of_res res :: args, proj_rettype tres :: sg.sig_args) in
   let print_fragment = function
   | Str.Text s ->
       output_string oc s
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 8c255a65..c9a6d399 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -16,7 +16,7 @@
 (** Pretty-printer for Cminor *)
 
 open Format
-open !Camlcoq
+open! Camlcoq
 open Integers
 open AST
 open PrintAST
@@ -193,9 +193,7 @@ let print_sig p sg =
   List.iter
     (fun t -> fprintf p "%s ->@ " (name_of_type t))
     sg.sig_args;
-  match sg.sig_res with
-  | None -> fprintf p "void"
-  | Some ty -> fprintf p "%s" (name_of_type ty)
+  fprintf p "%s" (name_of_rettype sg.sig_res)
 
 let rec just_skips s =
   match s with
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index 1c449e74..d8f2ac12 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -61,9 +61,10 @@ let print_succ pp s dfl =
 let print_instruction pp succ = function
   | Lop(op, args, res) ->
       fprintf pp "%a = %a" mreg res (print_operation mreg) (op, args)
-  | Lload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]"
-         mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+  | Lload(trap,chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a]%a"
+        mreg dst (name_of_chunk chunk) (print_addressing mreg) (addr, args)
+        print_trapping_mode trap
   | Lgetstack(sl, ofs, ty, dst) ->
       fprintf pp "%a = %a" mreg dst slot (sl, ofs, ty)
   | Lsetstack(src, sl, ofs, ty) ->
@@ -82,10 +83,11 @@ let print_instruction pp succ = function
         (print_builtin_args loc) args
   | Lbranch s ->
       print_succ pp s succ
-  | Lcond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %d else goto %d"
+  | Lcond(cond, args, s1, s2, info) ->
+      fprintf pp "if (%a) goto %d else goto %d (prediction: %s)"
         (print_condition mreg) (cond, args)
         (P.to_int s1) (P.to_int s2)
+        (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
   | Ljumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
       fprintf pp "jumptable (%a)" mreg arg;
diff --git a/backend/PrintLTLin.ml b/backend/PrintLTLin.ml
deleted file mode 100644
index 4e8efd16..00000000
--- a/backend/PrintLTLin.ml
+++ /dev/null
@@ -1,115 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Pretty-printer for LTLin code *)
-
-open Format
-open Camlcoq
-open Datatypes
-open Maps
-open AST
-open Integers
-open Locations
-open Machregsaux
-open LTLin
-open PrintAST
-open PrintOp
-
-let reg pp loc =
-  match loc with
-  | R r ->
-      begin match name_of_register r with
-      | Some s -> fprintf pp "%s" s
-      | None -> fprintf pp "<unknown reg>"
-      end
-  | S (Local(ofs, ty)) ->
-      fprintf pp "local(%s,%ld)" (name_of_type ty) (camlint_of_coqint ofs)
-  | S (Incoming(ofs, ty)) ->
-      fprintf pp "incoming(%s,%ld)" (name_of_type ty) (camlint_of_coqint ofs)
-  | S (Outgoing(ofs, ty)) ->
-      fprintf pp "outgoing(%s,%ld)" (name_of_type ty) (camlint_of_coqint ofs)
-
-let rec regs pp = function
-  | [] -> ()
-  | [r] -> reg pp r
-  | r1::rl -> fprintf pp "%a, %a" reg r1 regs rl
-
-let ros pp = function
-  | Coq_inl r -> reg pp r
-  | Coq_inr s -> fprintf pp "\"%s\"" (extern_atom s)
-
-let print_instruction pp i =
-  match i with
-  | Lop(op, args, res) ->
-      fprintf pp "%a = %a@ "
-         reg res (PrintOp.print_operation reg) (op, args)
-  | Lload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]@ "
-         reg dst (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args)
-  | Lstore(chunk, addr, args, src) ->
-      fprintf pp "%s[%a] = %a@ "
-         (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args)
-         reg src
-  | Lcall(sg, fn, args, res) ->
-      fprintf pp "%a = %a(%a)@ "
-        reg res ros fn regs args
-  | Ltailcall(sg, fn, args) ->
-      fprintf pp "tailcall %a(%a)@ "
-        ros fn regs args
-  | Lbuiltin(ef, args, res) ->
-      fprintf pp "%a = builtin %s(%a)@ "
-        reg res (name_of_external ef) regs args
-  | Llabel lbl ->
-      fprintf pp "%ld:@ " (P.to_int32 lbl)
-  | Lgoto lbl ->
-      fprintf pp "goto %ld@ " (P.to_int32 lbl)
-  | Lcond(cond, args, lbl) ->
-      fprintf pp "if (%a) goto %ld@ "
-        (PrintOp.print_condition reg) (cond, args)
-        (P.to_int32 lbl)
-  | Ljumptable(arg, tbl) ->
-      let tbl = Array.of_list tbl in
-      fprintf pp "@[<v 2>jumptable (%a)" reg arg;
-      for i = 0 to Array.length tbl - 1 do
-        fprintf pp "@ case %d: goto %ld" i (P.to_int32 tbl.(i))
-      done;
-      fprintf pp "@]@ "
-  | Lreturn None ->
-      fprintf pp "return@ "
-  | Lreturn (Some arg) ->
-      fprintf pp "return %a@ " reg arg
-
-let print_function pp id f =
-  fprintf pp "@[<v 2>%s(%a) {@ " (extern_atom id) regs f.fn_params;
-  List.iter (print_instruction pp) f.fn_code;
-  fprintf pp "@;<0 -2>}@]@."
-
-let print_globdef pp (id, gd) =
-  match gd with
-  | Gfun(Internal f) -> print_function pp id f
-  | _ -> ()
-
-let print_program pp prog =
-  List.iter (print_globdef pp) prog.prog_defs
-
-let destination : string option ref = ref None
-
-let print_if prog =
-  match !destination with
-  | None -> ()
-  | Some f ->
-      let oc = open_out f in
-      let pp = formatter_of_out_channel oc in
-      print_program pp prog;
-      close_out oc
diff --git a/backend/PrintMach.ml b/backend/PrintMach.ml
index 517f3037..70e65832 100644
--- a/backend/PrintMach.ml
+++ b/backend/PrintMach.ml
@@ -48,10 +48,11 @@ let print_instruction pp i =
   | Mop(op, args, res) ->
       fprintf pp "\t%a = %a\n"
          reg res (PrintOp.print_operation reg) (op, args)
-  | Mload(chunk, addr, args, dst) ->
-      fprintf pp "\t%a = %s[%a]\n"
+  | Mload(trap, chunk, addr, args, dst) ->
+      fprintf pp "\t%a = %s[%a]%a\n"
          reg dst (name_of_chunk chunk)
          (PrintOp.print_addressing reg) (addr, args)
+         print_trapping_mode trap
   | Mstore(chunk, addr, args, src) ->
       fprintf pp "\t%s[%a] = %a\n"
          (name_of_chunk chunk)
diff --git a/backend/PrintRTL.ml b/backend/PrintRTL.ml
index 841540b6..b2ef05ca 100644
--- a/backend/PrintRTL.ml
+++ b/backend/PrintRTL.ml
@@ -50,10 +50,11 @@ let print_instruction pp (pc, i) =
       fprintf pp "%a = %a\n"
          reg res (PrintOp.print_operation reg) (op, args);
       print_succ pp s (pc - 1)
-  | Iload(chunk, addr, args, dst, s) ->
-      fprintf pp "%a = %s[%a]\n"
+  | Iload(trap, chunk, addr, args, dst, s) ->
+      fprintf pp "%a = %s[%a]%a\n"
          reg dst (name_of_chunk chunk)
-         (PrintOp.print_addressing reg) (addr, args);
+         (PrintOp.print_addressing reg) (addr, args)
+         print_trapping_mode trap;
       print_succ pp s (pc - 1)
   | Istore(chunk, addr, args, src, s) ->
       fprintf pp "%s[%a] = %a\n"
@@ -74,10 +75,11 @@ let print_instruction pp (pc, i) =
         (name_of_external ef)
         (print_builtin_args reg) args;
       print_succ pp s (pc - 1)
-  | Icond(cond, args, s1, s2) ->
-      fprintf pp "if (%a) goto %d else goto %d\n"
+  | Icond(cond, args, s1, s2, info) ->
+      fprintf pp "if (%a) goto %d else goto %d (prediction: %s)\n"
         (PrintOp.print_condition reg) (cond, args)
         (P.to_int s1) (P.to_int s2)
+        (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
   | Ijumptable(arg, tbl) ->
       let tbl = Array.of_list tbl in
       fprintf pp "jumptable (%a)\n" reg arg;
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index 6432682a..d1b79623 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -86,9 +86,10 @@ let print_instruction pp succ = function
       fprintf pp "(%a) = (%a) using %a, %a" vars dsts vars srcs var t1 var t2
   | Xop(op, args, res) ->
       fprintf pp "%a = %a" var res (print_operation var) (op, args)
-  | Xload(chunk, addr, args, dst) ->
-      fprintf pp "%a = %s[%a]"
-         var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+  | Xload(trap, chunk, addr, args, dst) ->
+      fprintf pp "%a = %s[%a]%a"
+        var dst (name_of_chunk chunk) (print_addressing var) (addr, args)
+        print_trapping_mode trap
   | Xstore(chunk, addr, args, src) ->
       fprintf pp "%s[%a] = %a"
          (name_of_chunk chunk) (print_addressing var) (addr, args) var src
@@ -103,7 +104,7 @@ let print_instruction pp succ = function
         (print_builtin_args var) args
   | Xbranch s ->
       print_succ pp s succ
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       fprintf pp "if (%a) goto %d else goto %d"
         (print_condition var) (cond, args)
         (P.to_int s1) (P.to_int s2)
diff --git a/backend/RTL.v b/backend/RTL.v
index 9599a24a..dec59ca2 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -43,11 +43,12 @@ Inductive instruction: Type :=
       (** [Iop op args dest succ] performs the arithmetic operation [op]
           over the values of registers [args], stores the result in [dest],
           and branches to [succ]. *)
-  | Iload: memory_chunk -> addressing -> list reg -> reg -> node -> instruction
-      (** [Iload chunk addr args dest succ] loads a [chunk] quantity from
+  | Iload: trapping_mode -> memory_chunk -> addressing -> list reg -> reg -> node -> instruction
+      (** [Iload trap chunk addr args dest succ] loads a [chunk] quantity from
           the address determined by the addressing mode [addr] and the
           values of the [args] registers, stores the quantity just read
-          into [dest], and branches to [succ]. *)
+          into [dest], and branches to [succ].
+          If trap=NOTRAP, then failures lead to a default value written to [dest]. *)
   | Istore: memory_chunk -> addressing -> list reg -> reg -> node -> instruction
       (** [Istore chunk addr args src succ] stores the value of register
           [src] in the [chunk] quantity at the
@@ -66,11 +67,12 @@ Inductive instruction: Type :=
       (** [Ibuiltin ef args dest succ] calls the built-in function
           identified by [ef], giving it the values of [args] as arguments.
           It stores the return value in [dest] and branches to [succ]. *)
-  | Icond: condition -> list reg -> node -> node -> instruction
-      (** [Icond cond args ifso ifnot] evaluates the boolean condition
+  | Icond: condition -> list reg -> node -> node -> option bool -> instruction
+      (** [Icond cond args ifso ifnot info] evaluates the boolean condition
           [cond] over the values of registers [args].  If the condition
           is true, it transitions to [ifso].  If the condition is false,
-          it transitions to [ifnot]. *)
+          it transitions to [ifnot]. [info] is a ghost field there to provide
+          information relative to branch prediction. *)
   | Ijumptable: reg -> list node -> instruction
       (** [Ijumptable arg tbl] transitions to the node that is the [n]-th
           element of the list [tbl], where [n] is the unsigned integer
@@ -212,12 +214,25 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp pc rs m)
         E0 (State s f sp pc' (rs#res <- v) m)
   | exec_Iload:
-      forall s f sp pc rs m chunk addr args dst pc' a v,
-      (fn_code f)!pc = Some(Iload chunk addr args dst pc') ->
+      forall s f sp pc rs m trap chunk addr args dst pc' a v,
+      (fn_code f)!pc = Some(Iload trap chunk addr args dst pc') ->
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       step (State s f sp pc rs m)
         E0 (State s f sp pc' (rs#dst <- v) m)
+  | exec_Iload_notrap1:
+      forall s f sp pc rs m chunk addr args dst pc',
+      (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = None ->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m)
+  | exec_Iload_notrap2:
+      forall s f sp pc rs m chunk addr args dst pc' a,
+      (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None->
+      step (State s f sp pc rs m)
+        E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m)
   | exec_Istore:
       forall s f sp pc rs m chunk addr args src pc' a m',
       (fn_code f)!pc = Some(Istore chunk addr args src pc') ->
@@ -248,8 +263,8 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State s f sp pc rs m)
          t (State s f sp pc' (regmap_setres res vres rs) m')
   | exec_Icond:
-      forall s f sp pc rs m cond args ifso ifnot b pc',
-      (fn_code f)!pc = Some(Icond cond args ifso ifnot) ->
+      forall s f sp pc rs m cond args ifso ifnot b pc' predb,
+      (fn_code f)!pc = Some(Icond cond args ifso ifnot predb) ->
       eval_condition cond rs##args m = Some b ->
       pc' = (if b then ifso else ifnot) ->
       step (State s f sp pc rs m)
@@ -299,8 +314,8 @@ Proof.
 Qed.
 
 Lemma exec_Iload':
-  forall s f sp pc rs m chunk addr args dst pc' rs' a v,
-  (fn_code f)!pc = Some(Iload chunk addr args dst pc') ->
+  forall s f sp pc rs m trap chunk addr args dst pc' rs' a v,
+  (fn_code f)!pc = Some(Iload trap chunk addr args dst pc') ->
   eval_addressing ge sp addr rs##args = Some a ->
   Mem.loadv chunk m a = Some v ->
   rs' = (rs#dst <- v) ->
@@ -384,12 +399,12 @@ Definition successors_instr (i: instruction) : list node :=
   match i with
   | Inop s => s :: nil
   | Iop op args res s => s :: nil
-  | Iload chunk addr args dst s => s :: nil
+  | Iload trap chunk addr args dst s => s :: nil
   | Istore chunk addr args src s => s :: nil
   | Icall sig ros args res s => s :: nil
   | Itailcall sig ros args => nil
   | Ibuiltin ef args res s => s :: nil
-  | Icond cond args ifso ifnot => ifso :: ifnot :: nil
+  | Icond cond args ifso ifnot _ => ifso :: ifnot :: nil
   | Ijumptable arg tbl => tbl
   | Ireturn optarg => nil
   end.
@@ -403,14 +418,14 @@ Definition instr_uses (i: instruction) : list reg :=
   match i with
   | Inop s => nil
   | Iop op args res s => args
-  | Iload chunk addr args dst s => args
+  | Iload trap chunk addr args dst s => args
   | Istore chunk addr args src s => src :: args
   | Icall sig (inl r) args res s => r :: args
   | Icall sig (inr id) args res s => args
   | Itailcall sig (inl r) args => r :: args
   | Itailcall sig (inr id) args => args
   | Ibuiltin ef args res s => params_of_builtin_args args
-  | Icond cond args ifso ifnot => args
+  | Icond cond args ifso ifnot _ => args
   | Ijumptable arg tbl => arg :: nil
   | Ireturn None => nil
   | Ireturn (Some arg) => arg :: nil
@@ -422,13 +437,13 @@ Definition instr_defs (i: instruction) : option reg :=
   match i with
   | Inop s => None
   | Iop op args res s => Some res
-  | Iload chunk addr args dst s => Some dst
+  | Iload trap chunk addr args dst s => Some dst
   | Istore chunk addr args src s => None
   | Icall sig ros args res s => Some res
   | Itailcall sig ros args => None
   | Ibuiltin ef args res s =>
       match res with BR r => Some r | _ => None end
-  | Icond cond args ifso ifnot => None
+  | Icond cond args ifso ifnot _ => None
   | Ijumptable arg tbl => None
   | Ireturn optarg => None
   end.
@@ -462,7 +477,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
   match i with
   | Inop s => m
   | Iop op args res s => fold_left Pos.max args (Pos.max res m)
-  | Iload chunk addr args dst s => fold_left Pos.max args (Pos.max dst m)
+  | Iload trap chunk addr args dst s => fold_left Pos.max args (Pos.max dst m)
   | Istore chunk addr args src s => fold_left Pos.max args (Pos.max src m)
   | Icall sig (inl r) args res s => fold_left Pos.max args (Pos.max r (Pos.max res m))
   | Icall sig (inr id) args res s => fold_left Pos.max args (Pos.max res m)
@@ -471,7 +486,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
   | Ibuiltin ef args res s =>
       fold_left Pos.max (params_of_builtin_args args)
         (fold_left Pos.max (params_of_builtin_res res) m)
-  | Icond cond args ifso ifnot => fold_left Pos.max args m
+  | Icond cond args ifso ifnot _ => fold_left Pos.max args m
   | Ijumptable arg tbl => Pos.max arg m
   | Ireturn None => m
   | Ireturn (Some arg) => Pos.max arg m
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index 9d7a8506..ac98f3a1 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -410,12 +410,11 @@ Fixpoint convert_builtin_args {A: Type} (al: list (builtin_arg expr)) (rl: list
       a1' :: convert_builtin_args al rl1
   end.
 
-Definition convert_builtin_res (map: mapping) (oty: option typ) (r: builtin_res ident) : mon (builtin_res reg) :=
-  match r, oty with
-  | BR id, _ => do r <- find_var map id; ret (BR r)
-  | BR_none, None => ret BR_none
-  | BR_none, Some _ => do r <- new_reg; ret (BR r)
-  | _, _ => error (Errors.msg "RTLgen: bad builtin_res")
+Definition convert_builtin_res (map: mapping) (ty: rettype) (r: builtin_res ident) : mon (builtin_res reg) :=
+  match r with
+  | BR id => do r <- find_var map id; ret (BR r)
+  | BR_none => if rettype_eq ty Tvoid then ret BR_none else (do r <- new_reg; ret (BR r))
+  | _ => error (Errors.msg "RTLgen: bad builtin_res")
   end.
 
 (** Translation of an expression.  [transl_expr map a rd nd]
@@ -436,7 +435,7 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node)
       transl_exprlist map al rl no
   | Eload chunk addr al =>
       do rl <- alloc_regs map al;
-      do no <- add_instr (Iload chunk addr rl rd nd);
+      do no <- add_instr (Iload TRAP chunk addr rl rd nd);
          transl_exprlist map al rl no
   | Econdition a b c =>
       do nfalse <- transl_expr map c rd nd;
@@ -480,7 +479,7 @@ with transl_condexpr (map: mapping) (a: condexpr) (ntrue nfalse: node)
   match a with
   | CEcond c al =>
       do rl <- alloc_regs map al;
-      do nt <- add_instr (Icond c rl ntrue nfalse);
+      do nt <- add_instr (Icond c rl ntrue nfalse None);
          transl_exprlist map al rl nt
   | CEcondition a b c =>
       do nc <- transl_condexpr map c ntrue nfalse;
@@ -667,10 +666,7 @@ Fixpoint reserve_labels (s: stmt) (ms: labelmap * state)
 (** Translation of a CminorSel function. *)
 
 Definition ret_reg (sig: signature) (rd: reg) : option reg :=
-  match sig.(sig_res) with
-  | None => None
-  | Some ty => Some rd
-  end.
+  if rettype_eq sig.(sig_res) Tvoid then None else Some rd.
 
 Definition transl_fun (f: CminorSel.function) (ngoto: labelmap): mon (node * list reg) :=
   do (rparams, map1) <- add_vars init_mapping f.(CminorSel.fn_params);
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 17022a7d..30ad7d82 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -639,8 +639,8 @@ Lemma new_reg_return_ok:
   map_valid map s1 ->
   return_reg_ok s2 map (ret_reg sig r).
 Proof.
-  intros. unfold ret_reg. destruct (sig_res sig); constructor.
-  eauto with rtlg. eauto with rtlg.
+  intros. unfold ret_reg.
+  destruct (rettype_eq (sig_res sig) Tvoid); constructor; eauto with rtlg.
 Qed.
 
 (** * Relational specification of the translation *)
@@ -707,7 +707,7 @@ Inductive tr_expr (c: code):
       tr_expr c map pr (Eop op al) ns nd rd dst
   | tr_Eload: forall map pr chunk addr al ns nd rd n1 rl dst,
       tr_exprlist c map pr al ns n1 rl ->
-      c!n1 = Some (Iload chunk addr rl rd nd) ->
+      c!n1 = Some (Iload TRAP chunk addr rl rd nd) ->
       reg_map_ok map rd dst -> ~In rd pr ->
       tr_expr c map pr (Eload chunk addr al) ns nd rd dst
   | tr_Econdition: forall map pr a ifso ifnot ns nd rd ntrue nfalse dst,
@@ -744,9 +744,9 @@ Inductive tr_expr (c: code):
 
 with tr_condition (c: code):
        mapping -> list reg -> condexpr -> node -> node -> node -> Prop :=
-  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl,
+  | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl i,
       tr_exprlist c map pr bl ns n1 rl ->
-      c!n1 = Some (Icond cond rl ntrue nfalse) ->
+      c!n1 = Some (Icond cond rl ntrue nfalse i) ->
       tr_condition c map pr (CEcond cond bl) ns ntrue nfalse
   | tr_CEcondition: forall map pr a1 a2 a3 ns ntrue nfalse n2 n3,
       tr_condition c map pr a1 ns n2 n3 ->
@@ -1224,9 +1224,9 @@ Lemma convert_builtin_res_charact:
 Proof.
   destruct res; simpl; intros.
 - monadInv TR. constructor.  unfold find_var in EQ. destruct (map_vars map)!x; inv EQ; auto.
-- destruct oty; monadInv TR.
-+ constructor. eauto with rtlg.
+- destruct (rettype_eq oty Tvoid); monadInv TR.
 + constructor.
++ constructor. eauto with rtlg.
 - monadInv TR.
 Qed.
 
@@ -1350,7 +1350,7 @@ Proof.
   intros [C D].
   eapply tr_function_intro; eauto with rtlg.
   eapply transl_stmt_charact; eauto with rtlg.
-  unfold ret_reg. destruct (sig_res (CminorSel.fn_sig f)).
-  constructor. eauto with rtlg. eauto with rtlg.
+  unfold ret_reg. destruct (rettype_eq (sig_res (CminorSel.fn_sig f)) Tvoid).
   constructor.
+  constructor; eauto with rtlg.
 Qed.
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 8336d1bf..15ed6d8a 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -104,11 +104,11 @@ Inductive wt_instr : instruction -> Prop :=
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
-      forall chunk addr args dst s,
+      forall trap chunk addr args dst s,
       map env args = type_of_addressing addr ->
       env dst = type_of_chunk chunk ->
       valid_successor s ->
-      wt_instr (Iload chunk addr args dst s)
+      wt_instr (Iload trap chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
       map env args = type_of_addressing addr ->
@@ -139,11 +139,11 @@ Inductive wt_instr : instruction -> Prop :=
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
-      forall cond args s1 s2,
+      forall cond args s1 s2 i,
       map env args = type_of_condition cond ->
       valid_successor s1 ->
       valid_successor s2 ->
-      wt_instr (Icond cond args s1 s2)
+      wt_instr (Icond cond args s1 s2 i)
   | wt_Ijumptable:
       forall arg tbl,
       env arg = Tint ->
@@ -151,11 +151,12 @@ Inductive wt_instr : instruction -> Prop :=
       list_length_z tbl * 4 <= Int.max_unsigned ->
       wt_instr (Ijumptable arg tbl)
   | wt_Ireturn_none:
-      funct.(fn_sig).(sig_res) = None ->
+      funct.(fn_sig).(sig_res) = Tvoid ->
       wt_instr (Ireturn None)
   | wt_Ireturn_some:
       forall arg ty,
-      funct.(fn_sig).(sig_res) = Some ty ->
+      funct.(fn_sig).(sig_res) <> Tvoid ->
+      env arg = proj_sig_res funct.(fn_sig) ->
       env arg = ty ->
       wt_instr (Ireturn (Some arg)).
 
@@ -282,7 +283,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
       else
        (let (targs, tres) := type_of_operation op in
         do e1 <- S.set_list e args targs; S.set e1 res tres)
-  | Iload chunk addr args dst s =>
+  | Iload trap chunk addr args dst s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
       S.set e1 dst (type_of_chunk chunk)
@@ -298,7 +299,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Itailcall sig ros args =>
       do e1 <- type_ros e ros;
       do e2 <- S.set_list e1 args sig.(sig_args);
-      if opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res) then
+      if rettype_eq sig.(sig_res) f.(fn_sig).(sig_res) then
         if tailcall_is_possible sig
         then OK e2
         else Error(msg "tailcall not possible")
@@ -312,7 +313,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
         | _ => type_builtin_args e args sig.(sig_args)
         end;
       type_builtin_res e1 res (proj_sig_res sig)
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 _ =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
       S.set_list e args (type_of_condition cond)
@@ -323,9 +324,9 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
       then OK e1
       else Error(msg "jumptable too big")
   | Ireturn optres =>
-      match optres, f.(fn_sig).(sig_res) with
-      | None, None => OK e
-      | Some r, Some t => S.set e r t
+      match optres, rettype_eq f.(fn_sig).(sig_res) Tvoid with
+      | None, left _ => OK e
+      | Some r, right _ => S.set e r (proj_sig_res f.(fn_sig))
       | _, _ => Error(msg "bad return")
       end
   end.
@@ -468,7 +469,7 @@ Proof.
   destruct l; try discriminate. destruct l; monadInv EQ0. eauto with ty.
   destruct (type_of_operation o) as [targs tres] eqn:TYOP. monadInv EQ0. eauto with ty.
 - (* tailcall *)
-  destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
+  destruct (rettype_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
   destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2.
   eauto with ty.
 - (* builtin *)
@@ -477,7 +478,8 @@ Proof.
   destruct (zle (list_length_z l * 4) Int.max_unsigned); inv EQ2.
   eauto with ty.
 - (* return *)
-  simpl in H. destruct o as [r|] eqn: RET; destruct (sig_res (fn_sig f)) as [t|] eqn: RES; try discriminate.
+  simpl in H.
+  destruct o as [r|] eqn: RET; destruct (rettype_eq (sig_res (fn_sig f)) Tvoid); try discriminate.
   eauto with ty.
   inv H; auto with ty.
 Qed.
@@ -519,7 +521,7 @@ Proof.
   eapply S.set_sound; eauto with ty.
   eauto with ty.
 - (* tailcall *)
-  destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
+  destruct (rettype_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
   destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2.
   constructor.
   eapply type_ros_sound; eauto with ty.
@@ -543,8 +545,9 @@ Proof.
   eapply check_successors_sound; eauto.
   auto.
 - (* return *)
-  simpl in H. destruct o as [r|] eqn: RET; destruct (sig_res (fn_sig f)) as [t|] eqn: RES; try discriminate.
-  econstructor. eauto. eapply S.set_sound; eauto with ty.
+  simpl in H.
+  destruct o as [r|] eqn: RET; destruct (rettype_eq (sig_res (fn_sig f)) Tvoid); try discriminate.
+  econstructor. auto. eapply S.set_sound; eauto with ty. eauto.
   inv H. constructor. auto.
 Qed.
 
@@ -721,9 +724,9 @@ Proof.
   rewrite check_successor_complete by auto; simpl.
   apply IHtbl0; intros; auto.
 - (* return none *)
-  rewrite H0. exists e; auto.
+  rewrite H0, dec_eq_true. exists e; auto.
 - (* return some *)
-  rewrite H0. apply S.set_complete; auto.
+  rewrite dec_eq_false by auto. apply S.set_complete; auto.
 Qed.
 
 Lemma type_code_complete:
@@ -841,14 +844,24 @@ Proof.
 Qed.
 
 Lemma wt_exec_Iload:
-  forall env f chunk addr args dst s m a v rs,
-  wt_instr f env (Iload chunk addr args dst s) ->
+  forall env f trap chunk addr args dst s m a v rs,
+  wt_instr f env (Iload trap chunk addr args dst s) ->
   Mem.loadv chunk m a = Some v ->
   wt_regset env rs ->
   wt_regset env (rs#dst <- v).
 Proof.
   intros. destruct a; simpl in H0; try discriminate. inv H.
-  eapply wt_regset_assign; eauto. rewrite H8; eapply Mem.load_type; eauto.
+  eapply wt_regset_assign; eauto. rewrite H9; eapply Mem.load_type; eauto.
+Qed.
+
+Lemma wt_exec_Iload_notrap:
+  forall env f chunk addr args dst s rs,
+  wt_instr f env (Iload NOTRAP chunk addr args dst s) ->
+  wt_regset env rs ->
+  wt_regset env (rs#dst <- (default_notrap_load_value chunk)).
+Proof.
+  intros. 
+  eapply wt_regset_assign; eauto. simpl. trivial.
 Qed.
 
 Lemma wt_exec_Ibuiltin:
@@ -872,7 +885,7 @@ Qed.
 
 Inductive wt_stackframes: list stackframe -> signature -> Prop :=
   | wt_stackframes_nil: forall sg,
-      sg.(sig_res) = Some Tint ->
+      sg.(sig_res) = Tint ->
       wt_stackframes nil sg
   | wt_stackframes_cons:
       forall s res f sp pc rs env sg,
@@ -930,6 +943,10 @@ Proof.
   econstructor; eauto. eapply wt_exec_Iop; eauto.
   (* Iload *)
   econstructor; eauto. eapply wt_exec_Iload; eauto.
+  (* Iload notrap1*)
+  econstructor; eauto. eapply wt_exec_Iload_notrap; eauto.
+  (* Iload notrap2*)
+  econstructor; eauto. eapply wt_exec_Iload_notrap; eauto.
   (* Istore *)
   econstructor; eauto.
   (* Icall *)
@@ -964,13 +981,13 @@ Proof.
   econstructor; eauto.
   (* Ireturn *)
   econstructor; eauto.
-  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. auto.
+  inv WTI; simpl. auto. rewrite <- H3. auto.
   (* internal function *)
   simpl in *. inv H5.
   econstructor; eauto.
   inv H1. apply wt_init_regs; auto. rewrite wt_params0. auto.
   (* external function *)
-  econstructor; eauto. simpl.
+  econstructor; eauto.
   eapply external_call_well_typed; eauto.
   (* return *)
   inv H1. econstructor; eauto.
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index 7db8a866..ffe26933 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -249,18 +249,18 @@ let block_of_RTL_instr funsig tyenv = function
             else
               let t = new_temp (tyenv res) in (t :: args2', t) in
       movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s])
-  | RTL.Iload(chunk, addr, args, dst, s) ->
+  | RTL.Iload(trap, chunk, addr, args, dst, s) ->
       if Archi.splitlong && chunk = Mint64 then begin
         match offset_addressing addr (coqint_of_camlint 4l) with
         | None -> assert false
         | Some addr' ->
-            [Xload(Mint32, addr, vregs tyenv args,
+            [Xload(trap, Mint32, addr, vregs tyenv args,
                    V((if Archi.big_endian then dst else twin_reg dst), Tint));
-             Xload(Mint32, addr', vregs tyenv args,
+             Xload(trap, Mint32, addr', vregs tyenv args,
                    V((if Archi.big_endian then twin_reg dst else dst), Tint));
              Xbranch s]
       end else
-        [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
+        [Xload(trap, chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
   | RTL.Istore(chunk, addr, args, src, s) ->
       if Archi.splitlong && chunk = Mint64 then begin
         match offset_addressing addr (coqint_of_camlint 4l) with
@@ -295,8 +295,8 @@ let block_of_RTL_instr funsig tyenv = function
          (Xbuiltin(ef, args2, res2) ::
             movelist (params_of_builtin_res res2) (params_of_builtin_res res1)
                [Xbranch s])
-  | RTL.Icond(cond, args, s1, s2) ->
-      [Xcond(cond, vregs tyenv args, s1, s2)]
+  | RTL.Icond(cond, args, s1, s2, i) ->
+      [Xcond(cond, vregs tyenv args, s1, s2, i)]
   | RTL.Ijumptable(arg, tbl) ->
       [Xjumptable(vreg tyenv arg, tbl)]
   | RTL.Ireturn None ->
@@ -364,7 +364,7 @@ let live_before instr after =
       if VSet.mem res after
       then vset_addlist args (VSet.remove res after)
       else after
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       if VSet.mem dst after
       then vset_addlist args (VSet.remove dst after)
       else after
@@ -380,7 +380,7 @@ let live_before instr after =
       vset_addargs args (vset_removeres res after)
   | Xbranch s ->
       after
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       List.fold_right VSet.add args after
   | Xjumptable(arg, tbl) ->
       VSet.add arg after
@@ -459,7 +459,7 @@ let dce_instr instr after k =
       if VSet.mem res after
       then instr :: k
       else k
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       if VSet.mem dst after
       then instr :: k
       else k
@@ -550,7 +550,7 @@ let spill_costs f =
         (* temps must not be spilled *)
     | Xop(op, args, res) ->
         charge_list 10 1 args; charge 10 1 res
-    | Xload(chunk, addr, args, dst) ->
+    | Xload(trap, chunk, addr, args, dst) ->
         charge_list 10 1 args; charge 10 1 dst
     | Xstore(chunk, addr, args, src) ->
         charge_list 10 1 args; charge 10 1 src
@@ -575,7 +575,7 @@ let spill_costs f =
             charge_list 10 1 (params_of_builtin_res res)
         end
     | Xbranch _ -> ()
-    | Xcond(cond, args, _, _) ->
+    | Xcond(cond, args, _, _, _) ->
         charge_list 10 1 args
     | Xjumptable(arg, _) ->
         charge 10 1 arg
@@ -677,7 +677,7 @@ let add_interfs_instr g instr live =
             (vset_addlist (res :: argl) (VSet.remove res live))
       end;
       add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op)
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       add_interfs_def g dst live;
       add_interfs_destroyed g (VSet.remove dst live)
                               (destroyed_by_load chunk addr)
@@ -718,7 +718,7 @@ let add_interfs_instr g instr live =
       end
   | Xbranch s ->
       ()
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       add_interfs_destroyed g live (destroyed_by_cond cond)
   | Xjumptable(arg, tbl) ->
       add_interfs_destroyed g live destroyed_by_jumptable
@@ -782,7 +782,7 @@ let tospill_instr alloc instr ts =
       ts
   | Xop(op, args, res) ->
       addlist_tospill alloc args (add_tospill alloc res ts)
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       addlist_tospill alloc args (add_tospill alloc dst ts)
   | Xstore(chunk, addr, args, src) ->
       addlist_tospill alloc args (add_tospill alloc src ts)
@@ -797,7 +797,7 @@ let tospill_instr alloc instr ts =
          (addlist_tospill alloc (params_of_builtin_res res) ts)
   | Xbranch s ->
       ts
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       addlist_tospill alloc args ts
   | Xjumptable(arg, tbl) ->
       add_tospill alloc arg ts
@@ -964,10 +964,10 @@ let spill_instr tospill eqs instr =
                add res tmp (kill tmp (kill res eqs2)))
           end
       end
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       let (args', c1, eqs1) = reload_vars tospill eqs args in
       let (dst', c2, eqs2) = save_var tospill eqs1 dst in
-      (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2)
+      (c1 @ Xload(trap, chunk, addr, args', dst') :: c2, eqs2)
   | Xstore(chunk, addr, args, src) ->
       let (args', c1, eqs1) = reload_vars tospill eqs args in
       let (src', c2, eqs2) = reload_var tospill eqs1 src in
@@ -990,9 +990,9 @@ let spill_instr tospill eqs instr =
       (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
   | Xbranch s ->
       ([instr], eqs)
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, i) ->
      let (args', c1, eqs1) = reload_vars tospill eqs args in
-     (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
+     (c1 @ [Xcond(cond, args', s1, s2, i)], eqs1)
   | Xjumptable(arg, tbl) ->
       let (arg', c1, eqs1) = reload_var tospill eqs arg in
       (c1 @ [Xjumptable(arg', tbl)], eqs1)
@@ -1115,8 +1115,8 @@ let transl_instr alloc instr k =
             LTL.Lop(Omove, [rarg1], rres) ::
             LTL.Lop(op, rres :: rargl, rres) :: k
       end
-  | Xload(chunk, addr, args, dst) ->
-      LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
+  | Xload(trap, chunk, addr, args, dst) ->
+      LTL.Lload(trap, chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
   | Xstore(chunk, addr, args, src) ->
       LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k
   | Xcall(sg, vos, args, res) ->
@@ -1128,8 +1128,8 @@ let transl_instr alloc instr k =
                        AST.map_builtin_res (mreg_of alloc) res) :: k
   | Xbranch s ->
       LTL.Lbranch s :: []
-  | Xcond(cond, args, s1, s2) ->
-      LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
+  | Xcond(cond, args, s1, s2, i) ->
+      LTL.Lcond(cond, mregs_of alloc args, s1, s2, i) :: []
   | Xjumptable(arg, tbl) ->
       LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
   | Xreturn optarg ->
diff --git a/backend/Renumber.v b/backend/Renumber.v
index 10f58251..2727b979 100644
--- a/backend/Renumber.v
+++ b/backend/Renumber.v
@@ -43,12 +43,12 @@ Definition renum_instr (i: instruction) : instruction :=
   match i with
   | Inop s => Inop (renum_pc s)
   | Iop op args res s => Iop op args res (renum_pc s)
-  | Iload chunk addr args res s => Iload chunk addr args res (renum_pc s)
+  | Iload trap chunk addr args res s => Iload trap chunk addr args res (renum_pc s)
   | Istore chunk addr args src s => Istore chunk addr args src (renum_pc s)
   | Icall sg ros args res s => Icall sg ros args res (renum_pc s)
   | Itailcall sg ros args => i
   | Ibuiltin ef args res s => Ibuiltin ef args res (renum_pc s)
-  | Icond cond args s1 s2 => Icond cond args (renum_pc s1) (renum_pc s2)
+  | Icond cond args s1 s2 info => Icond cond args (renum_pc s1) (renum_pc s2) info
   | Ijumptable arg tbl => Ijumptable arg (List.map renum_pc tbl)
   | Ireturn or => i
   end.
diff --git a/backend/Renumberproof.v b/backend/Renumberproof.v
index 7cda9425..2e161965 100644
--- a/backend/Renumberproof.v
+++ b/backend/Renumberproof.v
@@ -175,6 +175,18 @@ Proof.
   rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
   eapply exec_Iload; eauto.
   constructor; auto. eapply reach_succ; eauto. simpl; auto.
+  (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = None).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto. eapply reach_succ; eauto. simpl; auto.
+  (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs ## args = Some a).
+  rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto. eapply reach_succ; eauto. simpl; auto.
 (* store *)
   econstructor; split.
   assert (eval_addressing tge sp addr rs ## args = Some a).
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index a8ee8453..1873da4d 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -764,8 +764,8 @@ Lemma eval_divlu_mull:
 Proof.
   intros. unfold divlu_mull. exploit (divlu_mul_shift x); eauto. intros [A B].
   assert (A0: eval_expr ge sp e m le (Eletvar O) (Vlong x)) by (constructor; auto).
-  exploit eval_mullhu. eauto. eexact A0. instantiate (1 := Int64.repr M). intros (v1 & A1 & B1).
-  exploit eval_shrluimm. eauto. eexact A1. instantiate (1 := Int.repr p). intros (v2 & A2 & B2).
+  exploit eval_mullhu. try apply HELPERS. eexact A0. instantiate (1 := Int64.repr M). intros (v1 & A1 & B1).
+  exploit eval_shrluimm. try apply HELPERS. eexact A1. instantiate (1 := Int.repr p). intros (v2 & A2 & B2).
   simpl in B1; inv B1. simpl in B2. replace (Int.ltu (Int.repr p) Int64.iwordsize') with true in B2. inv B2.
   rewrite B. assumption.
   unfold Int.ltu. rewrite Int.unsigned_repr. rewrite zlt_true; auto. tauto.
@@ -835,17 +835,17 @@ Proof.
   intros. unfold divls_mull.
   assert (A0: eval_expr ge sp e m le (Eletvar O) (Vlong x)).
   { constructor; auto. }
-  exploit eval_mullhs. eauto. eexact A0. instantiate (1 := Int64.repr M).  intros (v1 & A1 & B1).
-  exploit eval_addl; auto; try apply HELPERS. eexact A1. eexact A0. intros (v2 & A2 & B2).
-  exploit eval_shrluimm. eauto. eexact A0. instantiate (1 := Int.repr 63). intros (v3 & A3 & B3).
+  exploit eval_mullhs. try apply HELPERS. eexact A0. instantiate (1 := Int64.repr M).  intros (v1 & A1 & B1).
+  exploit eval_addl. auto. eexact A1. eexact A0. intros (v2 & A2 & B2).
+  exploit eval_shrluimm. try apply HELPERS. eexact A0. instantiate (1 := Int.repr 63). intros (v3 & A3 & B3).
   set (a4 := if zlt M Int64.half_modulus
              then mullhs (Eletvar 0) (Int64.repr M)
              else addl (mullhs (Eletvar 0) (Int64.repr M)) (Eletvar 0)).
   set (v4 := if zlt M Int64.half_modulus then v1 else v2).
   assert (A4: eval_expr ge sp e m le a4 v4).
   { unfold a4, v4; destruct (zlt M Int64.half_modulus); auto. }
-  exploit eval_shrlimm. eauto. eexact A4. instantiate (1 := Int.repr p). intros (v5 & A5 & B5).
-  exploit eval_addl; auto; try apply HELPERS. eexact A5. eexact A3. intros (v6 & A6 & B6).
+  exploit eval_shrlimm. try apply HELPERS. eexact A4. instantiate (1 := Int.repr p). intros (v5 & A5 & B5).
+  exploit eval_addl. auto. eexact A5. eexact A3. intros (v6 & A6 & B6).
   assert (RANGE: forall x, 0 <= x < 64 -> Int.ltu (Int.repr x) Int64.iwordsize' = true).
   { intros. unfold Int.ltu. rewrite Int.unsigned_repr. rewrite zlt_true by tauto. auto.
     assert (64 < Int.max_unsigned) by (compute; auto). omega. }
@@ -949,8 +949,7 @@ Proof.
   intros until y. unfold divf. destruct (divf_match b); intros.
 - unfold divfimm. destruct (Float.exact_inverse n2) as [n2' | ] eqn:EINV.
   + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
-    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-    simpl; eauto.
+    repeat (econstructor; eauto). 
     destruct x; simpl; auto. erewrite Float.div_mul_inverse; eauto.
   + apply eval_divf_base; trivial.
 - apply eval_divf_base; trivial.
@@ -965,8 +964,7 @@ Proof.
   intros until y. unfold divfs. destruct (divfs_match b); intros.
 - unfold divfsimm. destruct (Float32.exact_inverse n2) as [n2' | ] eqn:EINV.
   + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
-    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-    simpl; eauto.
+    repeat (econstructor; eauto). 
     destruct x; simpl; auto. erewrite Float32.div_mul_inverse; eauto.
   + apply eval_divfs_base; trivial.
 - apply eval_divfs_base; trivial.
diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml
index 574c31f0..26a79fd7 100644
--- a/backend/Selectionaux.ml
+++ b/backend/Selectionaux.ml
@@ -68,6 +68,8 @@ let rec cost_expr = function
 
 let fast_cmove ty =
   match Configuration.arch, Configuration.model with
+  | "aarch64", _ ->
+      (match ty with Tint | Tlong | Tfloat | Tsingle -> true | _ -> false)
   | "arm", _ ->
       (match ty with Tint | Tfloat | Tsingle -> true | _ -> false)
   | "powerpc", "e5500" -> 
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 0be96167..9e0f22cc 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -1265,8 +1265,8 @@ Proof.
   econstructor; eauto.
   econstructor; eauto. apply set_var_lessdef; auto.
 - (* store *)
-  exploit sel_expr_correct. eauto. eauto. eexact H. eauto. eauto. intros [vaddr' [A B]].
-  exploit sel_expr_correct. eauto. eauto. eexact H0. eauto. eauto. intros [v' [C D]].
+  exploit sel_expr_correct. try apply LINK. try apply HF. eexact H. eauto. eauto. intros [vaddr' [A B]].
+  exploit sel_expr_correct. try apply LINK. try apply HF. eexact H0. eauto. eauto. intros [v' [C D]].
   exploit Mem.storev_extends; eauto. intros [m2' [P Q]].
   left; econstructor; split.
   eapply eval_store; eauto.
diff --git a/backend/Splitting.ml b/backend/Splitting.ml
index 40f09c3d..3ca45c3b 100644
--- a/backend/Splitting.ml
+++ b/backend/Splitting.ml
@@ -151,8 +151,8 @@ let ren_instr f maps pc i =
   | Inop s -> Inop s
   | Iop(op, args, res, s) ->
       Iop(op, ren_regs before args, ren_reg after res, s)
-  | Iload(chunk, addr, args, dst, s) ->
-      Iload(chunk, addr, ren_regs before args, ren_reg after dst, s)
+  | Iload(trap, chunk, addr, args, dst, s) ->
+      Iload(trap, chunk, addr, ren_regs before args, ren_reg after dst, s)
   | Istore(chunk, addr, args, src, s) ->
       Istore(chunk, addr, ren_regs before args, ren_reg before src, s)
   | Icall(sg, ros, args, res, s) ->
@@ -162,8 +162,8 @@ let ren_instr f maps pc i =
   | Ibuiltin(ef, args, res, s) ->
       Ibuiltin(ef, List.map (AST.map_builtin_arg (ren_reg before)) args,
                    AST.map_builtin_res (ren_reg after) res, s)
-  | Icond(cond, args, s1, s2) ->
-      Icond(cond, ren_regs before args, s1, s2)
+  | Icond(cond, args, s1, s2, i) ->
+      Icond(cond, ren_regs before args, s1, s2, i)
   | Ijumptable(arg, tbl) ->
       Ijumptable(ren_reg before arg, tbl)
   | Ireturn optarg ->
diff --git a/backend/Stacking.v b/backend/Stacking.v
index 7b382d05..0e3f2832 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -133,8 +133,8 @@ Definition transl_instr
       end
   | Lop op args res =>
       Mop (transl_op fe op) args res :: k
-  | Lload chunk addr args dst =>
-      Mload chunk (transl_addr fe addr) args dst :: k
+  | Lload trap chunk addr args dst =>
+      Mload trap chunk (transl_addr fe addr) args dst :: k
   | Lstore chunk addr args src =>
       Mstore chunk (transl_addr fe addr) args src :: k
   | Lcall sig ros =>
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 326fab61..d3fcdb91 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -1918,6 +1918,46 @@ Proof.
   apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
   apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
 
+- (* Lload notrap1*)
+  assert (eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = None) as Haddress.
+  eapply eval_addressing_inject_none; eauto.
+  eapply globalenv_inject_preserves_globals. eapply sep_proj2. eapply sep_proj2. eapply sep_proj2. eexact SEP.
+  eapply agree_reglist; eauto.
+  econstructor; split.
+  apply plus_one. apply exec_Mload_notrap1.
+  rewrite <- Haddress. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  
+- (* Lload notrap2 *)
+  assert (exists a',
+          eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
+       /\ Val.inject j a a').
+  eapply eval_addressing_inject; eauto.
+  eapply globalenv_inject_preserves_globals. eapply sep_proj2. eapply sep_proj2. eapply sep_proj2. eexact SEP.
+  eapply agree_reglist; eauto.
+  destruct H1 as [a' [A B]].
+
+  destruct ( Mem.loadv chunk m' a') as [v'|] eqn:Hloadv.
+  {
+  econstructor; split.
+  apply plus_one. apply exec_Mload with (a:=a') (v:=v'); eauto.
+  try (rewrite <- A; apply eval_addressing_preserved; auto; exact symbols_preserved).
+  econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  }
+  {
+  econstructor; split.
+  apply plus_one. apply exec_Mload_notrap2 with (a:=a'); eauto.
+  try (rewrite <- A; apply eval_addressing_preserved; auto; exact symbols_preserved).
+  
+  econstructor; eauto with coqlib.
+  apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto.
+  apply agree_locs_set_reg. apply agree_locs_undef_locs. auto. apply destroyed_by_load_caller_save. auto.
+  }
+
 - (* Lstore *)
   assert (exists a',
           eval_addressing ge (Vptr sp' Ptrofs.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
diff --git a/backend/Tailcall.v b/backend/Tailcall.v
index 939abeea..b7a62d74 100644
--- a/backend/Tailcall.v
+++ b/backend/Tailcall.v
@@ -82,7 +82,7 @@ Definition transf_instr (f: function) (pc: node) (instr: instruction) :=
   | Icall sig ros args res s =>
       if is_return niter f s res
       && tailcall_is_possible sig
-      && opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res)
+      && rettype_eq sig.(sig_res) f.(fn_sig).(sig_res)
       then Itailcall sig ros args
       else instr
   | _ => instr
diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v
index 06e314f3..79a5c1cf 100644
--- a/backend/Tailcallproof.v
+++ b/backend/Tailcallproof.v
@@ -157,12 +157,10 @@ Lemma transf_instr_charact:
   transf_instr_spec f instr (transf_instr f pc instr).
 Proof.
   intros. unfold transf_instr. destruct instr; try constructor.
-  caseEq (is_return niter f n r && tailcall_is_possible s &&
-          opt_typ_eq (sig_res s) (sig_res (fn_sig f))); intros.
-  destruct (andb_prop _ _ H0). destruct (andb_prop _ _ H1).
-  eapply transf_instr_tailcall; eauto.
-  eapply is_return_charact; eauto.
-  constructor.
+  destruct (is_return niter f n r && tailcall_is_possible s &&
+            rettype_eq (sig_res s) (sig_res (fn_sig f))) eqn:B.
+- InvBooleans. eapply transf_instr_tailcall; eauto. eapply is_return_charact; eauto.
+- constructor.
 Qed.
 
 Lemma transf_instr_lookup:
@@ -438,6 +436,43 @@ Proof.
   apply eval_addressing_preserved. exact symbols_preserved. eauto.
   econstructor; eauto. apply set_reg_lessdef; auto.
 
+- (* load notrap1 *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
+  left.
+  exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split.
+  eapply exec_Iload_notrap1.
+  eassumption.
+  eapply eval_addressing_lessdef_none. eassumption.
+  erewrite eval_addressing_preserved.
+  eassumption. exact symbols_preserved.
+
+  econstructor; eauto. apply set_reg_lessdef; auto.
+
+- (* load notrap2 *)
+  TransfInstr.
+  assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
+  left.
+
+  exploit eval_addressing_lessdef; eauto.
+  intros [a' [ADDR' ALD]].
+  
+  destruct (Mem.loadv chunk m' a') eqn:Echunk2.
+  + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- v) m'); split.
+    eapply exec_Iload with (a:=a'). eassumption.
+    erewrite eval_addressing_preserved.
+    eassumption.
+    exact symbols_preserved.
+    assumption.
+    econstructor; eauto. apply set_reg_lessdef; auto.
+  + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split.
+    eapply exec_Iload_notrap2. eassumption.
+    erewrite eval_addressing_preserved.
+    eassumption.
+    exact symbols_preserved.
+    assumption.
+    econstructor; eauto. apply set_reg_lessdef; auto.
+    
 - (* store *)
   TransfInstr.
   assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index da1ce45a..a4c4a195 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -78,11 +78,11 @@ Definition record_gotos (f: LTL.function) : U.t :=
 Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
   match i with
   | Lbranch s => Lbranch (U.repr uf s)
-  | Lcond cond args s1 s2 =>
+  | Lcond cond args s1 s2 info =>
       let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
       if peq s1' s2'
       then Lbranch s1'
-      else Lcond cond args s1' s2'
+      else Lcond cond args s1' s2' info
   | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
   | _ => i
   end.
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index 4f95ac9b..d3b8a9f0 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -441,6 +441,31 @@ Proof.
   rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
   eauto. eauto.
   econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap1 *)
+  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap1.
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap2 *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  destruct (Mem.loadv chunk tm ta) eqn:Htload.
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap2.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
 - (* Lgetstack *)
   left; simpl; econstructor; split.
   econstructor; eauto.
diff --git a/backend/Unusedglob.v b/backend/Unusedglob.v
index 8ac7c4ce..93ca7af4 100644
--- a/backend/Unusedglob.v
+++ b/backend/Unusedglob.v
@@ -46,14 +46,14 @@ Definition ref_instruction (i: instruction) : list ident :=
   match i with
   | Inop _ => nil
   | Iop op _ _ _ => globals_operation op
-  | Iload _ addr _ _ _ => globals_addressing addr
+  | Iload _ _ addr _ _ _ => globals_addressing addr
   | Istore _ addr _ _ _ => globals_addressing addr
   | Icall _ (inl r) _ _ _ => nil
   | Icall _ (inr id) _ _ _ => id :: nil
   | Itailcall _ (inl r) _ => nil
   | Itailcall _ (inr id) _ => id :: nil
   | Ibuiltin _ args _ _ => globals_of_builtin_args args
-  | Icond cond _ _ _ => nil
+  | Icond cond _ _ _ _ => nil
   | Ijumptable _ _ => nil
   | Ireturn _ => nil
   end.
diff --git a/backend/Unusedglobproof.v b/backend/Unusedglobproof.v
index 680daba7..fa120b6d 100644
--- a/backend/Unusedglobproof.v
+++ b/backend/Unusedglobproof.v
@@ -915,7 +915,7 @@ Proof.
             /\ Val.inject j a ta).
   { apply eval_addressing_inj with (ge1 := ge) (sp1 := Vptr sp0 Ptrofs.zero) (vl1 := rs##args).
     intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
-    apply KEPT. red. exists pc, (Iload chunk addr args dst pc'); auto.
+    apply KEPT. red. exists pc, (Iload trap chunk addr args dst pc'); auto.
     econstructor; eauto.
     apply regs_inject; auto.
     assumption. }
@@ -924,6 +924,36 @@ Proof.
   econstructor; split. eapply exec_Iload; eauto.
   econstructor; eauto. apply set_reg_inject; auto.
 
+- (* load notrap1 *)
+  assert (eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = None).
+  { eapply eval_addressing_inj_none.
+    intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
+    apply KEPT. red. exists pc, (Iload NOTRAP chunk addr args dst pc'); auto.
+    econstructor; eauto.
+    rewrite Ptrofs.add_zero; reflexivity.
+    apply regs_inject; auto.
+    eassumption.
+    assumption. }
+ 
+  econstructor; split. eapply exec_Iload_notrap1; eauto.
+  econstructor; eauto. apply set_reg_inject; auto.
+
+- (* load notrap2 *)
+  assert (A: exists ta,
+               eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = Some ta
+            /\ Val.inject j a ta).
+  { apply eval_addressing_inj with (ge1 := ge) (sp1 := Vptr sp0 Ptrofs.zero) (vl1 := rs##args).
+    intros. apply symbol_address_inject. eapply match_stacks_preserves_globals; eauto.
+    apply KEPT. red. exists pc, (Iload NOTRAP chunk addr args dst pc'); auto.
+    econstructor; eauto.
+    apply regs_inject; auto.
+    assumption. }
+  destruct A as (ta & B & C).
+  destruct (Mem.loadv chunk tm ta) eqn:Echunk2.
+  + econstructor; split. eapply exec_Iload; eauto.
+    econstructor; eauto. apply set_reg_inject; auto.
+  + econstructor; split. eapply exec_Iload_notrap2; eauto.
+    econstructor; eauto. apply set_reg_inject; auto.
 - (* store *)
   assert (A: exists ta,
                eval_addressing tge (Vptr tsp Ptrofs.zero) addr trs##args = Some ta
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index 8dbb67a7..2e79d1a9 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -139,9 +139,14 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
   | Some(Iop op args res s) =>
       let a := eval_static_operation op (aregs ae args) in
       VA.State (AE.set res a ae) am
-  | Some(Iload chunk addr args dst s) =>
+  | Some(Iload TRAP chunk addr args dst s) =>
       let a := loadv chunk rm am (eval_static_addressing addr (aregs ae args)) in
       VA.State (AE.set dst a ae) am
+
+  (* TODO: maybe a case analysis on the results of loadv? *)
+               
+  | Some(Iload NOTRAP chunk addr args dst s) =>
+      VA.State (AE.set dst Vtop ae) am
   | Some(Istore chunk addr args src s) =>
       let am' := storev chunk am (eval_static_addressing addr (aregs ae args)) (areg ae src) in
       VA.State ae am'
@@ -151,7 +156,7 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
       VA.Bot
   | Some(Ibuiltin ef args res s) =>
       transfer_builtin ae am rm ef args res
-  | Some(Icond cond args s1 s2) =>
+  | Some(Icond cond args s1 s2 _) =>
       VA.State ae am
   | Some(Ijumptable arg tbl) =>
       VA.State ae am
@@ -1039,9 +1044,8 @@ Proof.
   red; simpl; intros. destruct (plt b (Mem.nextblock m)).
   exploit RO; eauto. intros (R & P & Q).
   split; auto.
-  split. apply bmatch_incr with bc; auto. apply bmatch_inv with m; auto.
-  intros. eapply Mem.loadbytes_unchanged_on_1. eapply external_call_readonly; eauto.
-  auto. intros; red. apply Q.
+  split. apply bmatch_incr with bc; auto. apply bmatch_ext with m; auto.
+  intros. eapply external_call_readonly with (m2 := m'); eauto.
   intros; red; intros; elim (Q ofs).
   eapply external_call_max_perm with (m2 := m'); eauto.
   destruct (j' b); congruence.
@@ -1148,10 +1152,10 @@ Proof.
 - constructor.
 - assert (Plt sp bound') by eauto with va.
   eapply sound_stack_public_call; eauto. apply IHsound_stack; intros.
-  apply INV. xomega. rewrite SAME; auto. xomega. auto. auto.
+  apply INV. xomega. rewrite SAME; auto with ordered_type. xomega. auto. auto.
 - assert (Plt sp bound') by eauto with va.
   eapply sound_stack_private_call; eauto. apply IHsound_stack; intros.
-  apply INV. xomega. rewrite SAME; auto. xomega. auto. auto.
+  apply INV. xomega. rewrite SAME; auto with ordered_type. xomega. auto. auto.
   apply bmatch_ext with m; auto. intros. apply INV. xomega. auto. auto. auto.
 Qed.
 
@@ -1268,11 +1272,29 @@ Proof.
   apply ematch_update; auto. eapply eval_static_operation_sound; eauto with va.
 
 - (* load *)
+  destruct trap.
+  + eapply sound_succ_state; eauto. simpl; auto.
+    unfold transfer; rewrite H. eauto.
+    apply ematch_update; auto. eapply loadv_sound; eauto with va.
+    eapply eval_static_addressing_sound; eauto with va.
+  + eapply sound_succ_state; eauto. simpl; auto.
+    unfold transfer; rewrite H. eauto.
+    apply ematch_update; auto.
+    eapply vmatch_top.
+    eapply loadv_sound; try eassumption.
+    eapply eval_static_addressing_sound; eauto with va.
+- (* load notrap1 *)
   eapply sound_succ_state; eauto. simpl; auto.
   unfold transfer; rewrite H. eauto.
-  apply ematch_update; auto. eapply loadv_sound; eauto with va.
-  eapply eval_static_addressing_sound; eauto with va.
-
+  apply ematch_update; auto.
+  unfold default_notrap_load_value.
+  constructor.
+- (* load notrap2 *)
+  eapply sound_succ_state; eauto. simpl; auto.
+  unfold transfer; rewrite H. eauto.
+  apply ematch_update; auto.
+  unfold default_notrap_load_value.
+  constructor.
 - (* store *)
   exploit eval_static_addressing_sound; eauto with va. intros VMADDR.
   eapply sound_succ_state; eauto. simpl; auto.
@@ -1362,7 +1384,7 @@ Proof.
   apply sound_stack_exten with bc.
   apply sound_stack_inv with m. auto.
   intros. apply Q. red. eapply Plt_trans; eauto.
-  rewrite C; auto.
+  rewrite C; auto with ordered_type.
   exact AA.
 * (* public builtin call *)
   exploit anonymize_stack; eauto.
@@ -1381,7 +1403,7 @@ Proof.
   apply sound_stack_exten with bc.
   apply sound_stack_inv with m. auto.
   intros. apply Q. red. eapply Plt_trans; eauto.
-  rewrite C; auto.
+  rewrite C; auto with ordered_type.
   exact AA.
   }
   unfold transfer_builtin in TR.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index fd3bd5ae..779e7bb9 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -2093,6 +2093,7 @@ Proof.
 Qed.
 
 Definition sign_ext (nbits: Z) (v: aval) :=
+  if zle nbits 0 then Uns (provenance v) 0 else
   match v with
   | I i => I (Int.sign_ext nbits i)
   | Uns p n => if zlt n nbits then Uns p n else sgn p nbits
@@ -2101,20 +2102,39 @@ Definition sign_ext (nbits: Z) (v: aval) :=
   end.
 
 Lemma sign_ext_sound:
-  forall nbits v x, 0 < nbits -> vmatch v x -> vmatch (Val.sign_ext nbits v) (sign_ext nbits x).
+  forall nbits v x, vmatch v x -> vmatch (Val.sign_ext nbits v) (sign_ext nbits x).
 Proof.
   assert (DFL: forall p nbits i, 0 < nbits -> vmatch (Vint (Int.sign_ext nbits i)) (sgn p nbits)).
   {
     intros. apply vmatch_sgn. apply is_sign_ext_sgn; auto with va.
   }
-  intros. inv H0; simpl; auto with va.
-- destruct (zlt n nbits); eauto with va.
+  intros. unfold sign_ext. destruct (zle nbits 0).
+- destruct v; simpl; auto with va. constructor. omega. 
+  rewrite Int.sign_ext_below by auto. red; intros; apply Int.bits_zero.
+- inv H; simpl; auto with va.
++ destruct (zlt n nbits); eauto with va.
   constructor; auto. eapply is_sign_ext_uns; eauto with va.
-- destruct (zlt n nbits); auto with va.
-- apply vmatch_sgn. apply is_sign_ext_sgn; auto with va.
++ destruct (zlt n nbits); auto with va.
++ apply vmatch_sgn. apply is_sign_ext_sgn; auto with va.
   apply Z.min_case; auto with va.
 Qed.
 
+Definition zero_ext_l (s: Z) := unop_long (Int64.zero_ext s).
+
+Lemma zero_ext_l_sound:
+  forall s v x, vmatch v x -> vmatch (Val.zero_ext_l s v) (zero_ext_l s x).
+Proof.
+  intros s. exact (unop_long_sound (Int64.zero_ext s)).
+Qed.
+
+Definition sign_ext_l (s: Z) := unop_long (Int64.sign_ext s).
+
+Lemma sign_ext_l_sound:
+  forall s v x, vmatch v x -> vmatch (Val.sign_ext_l s v) (sign_ext_l s x).
+Proof.
+  intros s. exact (unop_long_sound (Int64.sign_ext s)).
+Qed.
+
 Definition longofint (v: aval) :=
   match v with
   | I i => L (Int64.repr (Int.signed i))
@@ -3482,11 +3502,6 @@ Proof.
 - omegaContradiction.
 Qed.
 
-Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8.
-Proof.
-  destruct chunk; simpl; omega.
-Qed.
-
 Remark inval_before_contents:
   forall i c chunk' av' j,
   (inval_before i (i - 7) c)##j = Some (ACval chunk' av') ->
@@ -4712,6 +4727,7 @@ Hint Resolve cnot_sound symbol_address_sound
        negfs_sound absfs_sound
        addfs_sound subfs_sound mulfs_sound divfs_sound
        zero_ext_sound sign_ext_sound longofint_sound longofintu_sound
+       zero_ext_l_sound sign_ext_l_sound
        singleoffloat_sound floatofsingle_sound
        intoffloat_sound intuoffloat_sound floatofint_sound floatofintu_sound
        intofsingle_sound intuofsingle_sound singleofint_sound singleofintu_sound
diff --git a/backend/XTL.ml b/backend/XTL.ml
index f10efeed..1d8e89c0 100644
--- a/backend/XTL.ml
+++ b/backend/XTL.ml
@@ -30,13 +30,13 @@ type instruction =
   | Xspill of var * var
   | Xparmove of var list * var list * var * var
   | Xop of operation * var list * var
-  | Xload of memory_chunk * addressing * var list * var
+  | Xload of trapping_mode * memory_chunk * addressing * var list * var
   | Xstore of memory_chunk * addressing * var list * var
   | Xcall of signature * (var, ident) sum * var list * var list
   | Xtailcall of signature * (var, ident) sum * var list
   | Xbuiltin of external_function * var builtin_arg list * var builtin_res
   | Xbranch of node
-  | Xcond of condition * var list * node * node
+  | Xcond of condition * var list * node * node * bool option
   | Xjumptable of var * node list
   | Xreturn of var list
 
@@ -105,7 +105,7 @@ let twin_reg r =
 let rec successors_block = function
   | Xbranch s :: _ -> [s]
   | Xtailcall(sg, vos, args) :: _ -> []
-  | Xcond(cond, args, s1, s2) :: _ -> [s1; s2]
+  | Xcond(cond, args, s1, s2, _) :: _ -> [s1; s2]
   | Xjumptable(arg, tbl) :: _ -> tbl
   | Xreturn  _:: _ -> []
   | instr :: blk -> successors_block blk
@@ -159,7 +159,7 @@ let type_instr = function
       let (targs, tres) = type_of_operation op in
       set_vars_type args targs;
       set_var_type res tres
-  | Xload(chunk, addr, args, dst) ->
+  | Xload(trap, chunk, addr, args, dst) ->
       set_vars_type args (type_of_addressing addr);
       set_var_type dst (type_of_chunk chunk)
   | Xstore(chunk, addr, args, src) ->
@@ -179,7 +179,7 @@ let type_instr = function
       type_builtin_res res (proj_sig_res sg)
   | Xbranch s ->
       ()
-  | Xcond(cond, args, s1, s2) ->
+  | Xcond(cond, args, s1, s2, _) ->
       set_vars_type args (type_of_condition cond)
   | Xjumptable(arg, tbl) ->
       set_var_type arg Tint
diff --git a/backend/XTL.mli b/backend/XTL.mli
index 54988d4b..7b7f7186 100644
--- a/backend/XTL.mli
+++ b/backend/XTL.mli
@@ -31,13 +31,13 @@ type instruction =
   | Xspill of var * var
   | Xparmove of var list * var list * var * var
   | Xop of operation * var list * var
-  | Xload of memory_chunk * addressing * var list * var
+  | Xload of trapping_mode * memory_chunk * addressing * var list * var
   | Xstore of memory_chunk * addressing * var list * var
   | Xcall of signature * (var, ident) sum * var list * var list
   | Xtailcall of signature * (var, ident) sum * var list
   | Xbuiltin of external_function * var builtin_arg list * var builtin_res
   | Xbranch of node
-  | Xcond of condition * var list * node * node
+  | Xcond of condition * var list * node * node * bool option
   | Xjumptable of var * node list
   | Xreturn of var list
 
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index 421c4b07..9f2c4604 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -33,6 +33,7 @@ type inline_status =
 
 type atom_info =
   { a_storage: C.storage;              (* storage class *)
+    a_size: int64 option;              (* size in bytes *)
     a_alignment: int option;           (* alignment *)
     a_sections: Sections.section_name list; (* in which section to put it *)
       (* 1 section for data, 3 sections (code/lit/jumptbl) for functions *)
@@ -61,15 +62,25 @@ let atom_alignof a =
   with Not_found ->
     None
 
+let atom_is_aligned a sz =
+  match atom_alignof a with
+  | None -> false
+  | Some align -> align mod (Z.to_int sz) = 0
+
 let atom_sections a =
   try
     (Hashtbl.find decl_atom a).a_sections
   with Not_found ->
     []
 
-let atom_is_small_data a ofs =
+let atom_is_small_data a ofs  =
   try
-    (Hashtbl.find decl_atom a).a_access = Sections.Access_near
+    let info = Hashtbl.find decl_atom a in
+    info.a_access = Sections.Access_near
+    && (match info.a_size with
+        | None -> false
+        | Some sz ->
+            let ofs = camlint64_of_ptrofs ofs in 0L <= ofs && ofs < sz)
   with Not_found ->
     false
 
@@ -372,6 +383,7 @@ let name_for_string_literal s =
     Hashtbl.add decl_atom id
       { a_storage = C.Storage_static;
         a_alignment = Some 1;
+        a_size = Some (Int64.of_int (String.length s + 1));
         a_sections = [Sections.for_stringlit()];
         a_access = Sections.Access_default;
         a_inline = No_specifier;
@@ -399,9 +411,12 @@ let name_for_wide_string_literal s =
     incr stringNum;
     let name = Printf.sprintf "__stringlit_%d" !stringNum in
     let id = intern_string name in
+    let wchar_size = Machine.((!config).sizeof_wchar) in
     Hashtbl.add decl_atom id
       { a_storage = C.Storage_static;
-        a_alignment = Some Machine.((!config).sizeof_wchar);
+        a_alignment = Some wchar_size;
+        a_size = Some (Int64.(mul (of_int (List.length s + 1))
+                                  (of_int wchar_size)));
         a_sections = [Sections.for_stringlit()];
         a_access = Sections.Access_default;
         a_inline = No_specifier;
@@ -1243,7 +1258,8 @@ let convertFundef loc env fd =
   Hashtbl.add decl_atom id'
     { a_storage = fd.fd_storage;
       a_alignment = None;
-      a_sections = Sections.for_function env id' fd.fd_attrib;
+      a_size = None;
+      a_sections = Sections.for_function env loc id' fd.fd_attrib;
       a_access = Sections.Access_default;
       a_inline = inline;
       a_loc = loc };
@@ -1328,7 +1344,7 @@ let convertGlobvar loc env (sto, id, ty, optinit) =
     | Some i ->
         convertInitializer env ty i in
   let (section, access) =
-    Sections.for_variable env id' ty (optinit <> None) in
+    Sections.for_variable env loc id' ty (optinit <> None) in
   if Z.gt sz (Z.of_uint64 0xFFFF_FFFFL) then
     error "'%s' is too big (%s bytes)"
                    id.name (Z.to_string sz);
@@ -1337,6 +1353,7 @@ let convertGlobvar loc env (sto, id, ty, optinit) =
   Hashtbl.add decl_atom id'
     { a_storage = sto;
       a_alignment = Some (Z.to_int al);
+      a_size = Some (Z.to_int64 sz);
       a_sections = [section];
       a_access = access;
       a_inline = No_specifier;
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index 2942080b..b08c3ad7 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -460,6 +460,14 @@ Definition do_ef_free
       check (zlt 0 (Ptrofs.unsigned sz));
       do m' <- Mem.free m b (Ptrofs.unsigned lo - size_chunk Mptr) (Ptrofs.unsigned lo + Ptrofs.unsigned sz);
       Some(w, E0, Vundef, m')
+  | Vint n :: nil =>
+      if Int.eq_dec n Int.zero && negb Archi.ptr64
+      then Some(w, E0, Vundef, m)
+      else None
+  | Vlong n :: nil =>
+      if Int64.eq_dec n Int64.zero && Archi.ptr64
+      then Some(w, E0, Vundef, m)
+      else None
   | _ => None
   end.
 
@@ -544,45 +552,51 @@ Proof with try congruence.
   - eapply do_external_function_sound; eauto.
   }
   destruct ef; simpl.
-(* EF_external *)
+- (* EF_external *)
   eapply do_external_function_sound; eauto.
-(* EF_builtin *)
+- (* EF_builtin *)
   eapply BF_EX; eauto.
-(* EF_runtime *)
+- (* EF_runtime *)
   eapply BF_EX; eauto.
-(* EF_vload *)
+- (* EF_vload *)
   unfold do_ef_volatile_load. destruct vargs... destruct v... destruct vargs...
   mydestr. destruct p as [[w'' t''] v]; mydestr.
   exploit do_volatile_load_sound; eauto. intuition. econstructor; eauto.
-  auto.
-(* EF_vstore *)
+- (* EF_vstore *)
   unfold do_ef_volatile_store. destruct vargs... destruct v... destruct vargs... destruct vargs...
   mydestr. destruct p as [[w'' t''] m'']. mydestr.
   exploit do_volatile_store_sound; eauto. intuition. econstructor; eauto.
-  auto.
-(* EF_malloc *)
+- (* EF_malloc *)
   unfold do_ef_malloc. destruct vargs... destruct vargs... mydestr.
   destruct (Mem.alloc m (- size_chunk Mptr) (Ptrofs.unsigned i)) as [m1 b] eqn:?. mydestr.
   split. apply SIZE in Heqo. subst v. econstructor; eauto. constructor.
-(* EF_free *)
-  unfold do_ef_free. destruct vargs... destruct v... destruct vargs...
-  mydestr. split. apply SIZE in Heqo0. econstructor; eauto. congruence. omega. constructor.
-(* EF_memcpy *)
+- (* EF_free *)
+  unfold do_ef_free. destruct vargs... destruct v... 
++ destruct vargs... mydestr; InvBooleans; subst i.
+  replace (Vint Int.zero) with Vnullptr. split; constructor.
+  apply negb_true_iff in H0. unfold Vnullptr; rewrite H0; auto.
++ destruct vargs... mydestr; InvBooleans; subst i.
+  replace (Vlong Int64.zero) with Vnullptr. split; constructor.
+  unfold Vnullptr; rewrite H0; auto.
++ destruct vargs... mydestr.
+  split. apply SIZE in Heqo0. econstructor; eauto. congruence. omega.
+  constructor.
+- (* EF_memcpy *)
   unfold do_ef_memcpy. destruct vargs... destruct v... destruct vargs...
   destruct v... destruct vargs... mydestr. 
   apply Decidable_sound in Heqb1. red in Heqb1.
   split. econstructor; eauto; tauto. constructor.
-(* EF_annot *)
+- (* EF_annot *)
   unfold do_ef_annot. mydestr.
   split. constructor. apply list_eventval_of_val_sound; auto.
   econstructor. constructor; eauto. constructor.
-(* EF_annot_val *)
+- (* EF_annot_val *)
   unfold do_ef_annot_val. destruct vargs... destruct vargs... mydestr.
   split. constructor. apply eventval_of_val_sound; auto.
   econstructor. constructor; eauto. constructor.
-(* EF_inline_asm *)
+- (* EF_inline_asm *)
   eapply do_inline_assembly_sound; eauto.
-(* EF_debug *)
+- (* EF_debug *)
   unfold do_ef_debug. mydestr. split; constructor.
 Qed.
 
@@ -605,37 +619,38 @@ Proof.
   - eapply do_external_function_complete; eauto.
   }
   destruct ef; simpl in *.
-(* EF_external *)
+- (* EF_external *)
   eapply do_external_function_complete; eauto.
-(* EF_builtin *)
+- (* EF_builtin *)
   eapply BF_EX; eauto.
-(* EF_runtime *)
+- (* EF_runtime *)
   eapply BF_EX; eauto.
-(* EF_vload *)
+- (* EF_vload *)
   inv H; unfold do_ef_volatile_load.
   exploit do_volatile_load_complete; eauto. intros EQ; rewrite EQ; auto.
-(* EF_vstore *)
+- (* EF_vstore *)
   inv H; unfold do_ef_volatile_store.
   exploit do_volatile_store_complete; eauto. intros EQ; rewrite EQ; auto.
-(* EF_malloc *)
+- (* EF_malloc *)
   inv H; unfold do_ef_malloc.
   inv H0. erewrite SIZE by eauto. rewrite H1, H2. auto.
-(* EF_free *)
+- (* EF_free *)
   inv H; unfold do_ef_free.
-  inv H0. rewrite H1. erewrite SIZE by eauto. rewrite zlt_true. rewrite H3. auto. omega.
-(* EF_memcpy *)
++ inv H0. rewrite H1. erewrite SIZE by eauto. rewrite zlt_true. rewrite H3. auto. omega.
++ inv H0. unfold Vnullptr; destruct Archi.ptr64; auto.
+- (* EF_memcpy *)
   inv H; unfold do_ef_memcpy.
   inv H0. rewrite Decidable_complete. rewrite H7; rewrite H8; auto.
   red. tauto.
-(* EF_annot *)
+- (* EF_annot *)
   inv H; unfold do_ef_annot. inv H0. inv H6. inv H4.
   rewrite (list_eventval_of_val_complete _ _ _ H1). auto.
-(* EF_annot_val *)
+- (* EF_annot_val *)
   inv H; unfold do_ef_annot_val. inv H0. inv H6. inv H4.
   rewrite (eventval_of_val_complete _ _ _ H1). auto.
-(* EF_inline_asm *)
+- (* EF_inline_asm *)
   eapply do_inline_assembly_complete; eauto.
-(* EF_debug *)
+- (* EF_debug *)
   inv H. inv H0. reflexivity.
 Qed.
 
diff --git a/cfrontend/Cop.v b/cfrontend/Cop.v
index aa73abb0..143e87a3 100644
--- a/cfrontend/Cop.v
+++ b/cfrontend/Cop.v
@@ -140,8 +140,8 @@ Definition classify_cast (tfrom tto: type) : classify_cast_cases :=
   | Tfloat F64 _, Tfloat F32 _ => cast_case_s2f
   | Tfloat F32 _, Tfloat F64 _ => cast_case_f2s
   (* To pointer types *)
-  | Tpointer _ _, Tint _ _ _ =>
-      if Archi.ptr64 then cast_case_i2l Unsigned else cast_case_pointer
+  | Tpointer _ _, Tint _ si _ =>
+      if Archi.ptr64 then cast_case_i2l si else cast_case_pointer
   | Tpointer _ _, Tlong _ _ =>
       if Archi.ptr64 then cast_case_pointer else cast_case_l2i I32 Unsigned
   | Tpointer _ _, (Tpointer _ _ | Tarray _ _ _ | Tfunction _ _ _) => cast_case_pointer
diff --git a/cfrontend/Csem.v b/cfrontend/Csem.v
index a76a14ba..6d2b470f 100644
--- a/cfrontend/Csem.v
+++ b/cfrontend/Csem.v
@@ -444,7 +444,7 @@ Lemma red_selection:
 Proof.
   intros. unfold Eselection.
   set (t := typ_of_type ty).
-  set (sg := mksignature (AST.Tint :: t :: t :: nil) (Some t) cc_default).
+  set (sg := mksignature (AST.Tint :: t :: t :: nil) t cc_default).
   assert (LK: lookup_builtin_function "__builtin_sel"%string sg = Some (BI_standard (BI_select t))).
   { unfold sg, t; destruct ty as   [ | ? ? ? | ? | [] ? | ? ? | ? ? ? | ? ? ? | ? ? | ? ? ];
     simpl; unfold Tptr; destruct Archi.ptr64; reflexivity. }
diff --git a/cfrontend/Cshmgen.v b/cfrontend/Cshmgen.v
index 792a73f9..5bd12d00 100644
--- a/cfrontend/Cshmgen.v
+++ b/cfrontend/Cshmgen.v
@@ -23,6 +23,7 @@
 Require Import Coqlib Maps Errors Integers Floats.
 Require Import AST Linking.
 Require Import Ctypes Cop Clight Cminor Csharpminor.
+Require Import Conventions1.
 
 Local Open Scope string_scope.
 Local Open Scope error_monad_scope.
@@ -558,6 +559,34 @@ Fixpoint typlist_of_arglist (al: list Clight.expr) (tyl: typelist)
       typ_of_type (default_argument_conversion (typeof a1)) :: typlist_of_arglist a2 Tnil
   end.
 
+(** Translate a function call.
+    Depending on the ABI, it may be necessary to normalize the value
+    returned by casting it to the return type of the function.
+    For example, in the x86 ABI, a return value of type "char" is
+    returned in register AL, leaving the top 24 bits of EAX
+    unspecified.  Hence, a cast to type "char" is needed to sign- or
+    zero-extend the returned integer before using it. *)
+
+Definition make_normalization (t: type) (a: expr) :=
+  match t with
+  | Tint IBool _ _ => Eunop Ocast8unsigned a
+  | Tint I8 Signed _ => Eunop Ocast8signed a
+  | Tint I8 Unsigned _ => Eunop Ocast8unsigned a
+  | Tint I16 Signed _ => Eunop Ocast16signed a
+  | Tint I16 Unsigned _ => Eunop Ocast16unsigned a
+  | _ => a
+  end.
+
+Definition make_funcall (x: option ident) (tres: type) (sg: signature)
+                      (fn: expr) (args: list expr): stmt :=
+  match x, return_value_needs_normalization sg.(sig_res) with
+  | Some id, true =>
+      Sseq (Scall x sg fn args)
+           (Sset id (make_normalization tres (Evar id)))
+  | _, _ =>
+      Scall x sg fn args
+  end.
+
 (** * Translation of statements *)
 
 (** [transl_statement nbrk ncnt s] returns a Csharpminor statement
@@ -601,10 +630,10 @@ Fixpoint transl_statement (ce: composite_env) (tyret: type) (nbrk ncnt: nat)
       | fun_case_f args res cconv =>
           do tb <- transl_expr ce b;
           do tcl <- transl_arglist ce cl args;
-          OK(Scall x {| sig_args := typlist_of_arglist cl args;
-                        sig_res  := opttyp_of_type res;
-                        sig_cc   := cconv |}
-                   tb tcl)
+          let sg := {| sig_args := typlist_of_arglist cl args;
+                       sig_res  := rettype_of_type res;
+                       sig_cc   := cconv |} in
+          OK (make_funcall x res sg tb tcl)
       | _ => Error(msg "Cshmgen.transl_stmt(call)")
       end
   | Clight.Sbuiltin x ef tyargs bl =>
@@ -667,7 +696,7 @@ Definition transl_var (ce: composite_env) (v: ident * type) :=
 
 Definition signature_of_function (f: Clight.function) :=
   {| sig_args := map typ_of_type (map snd (Clight.fn_params f));
-     sig_res  := opttyp_of_type (Clight.fn_return f);
+     sig_res  := rettype_of_type (Clight.fn_return f);
      sig_cc   := Clight.fn_callconv f |}.
 
 Definition transl_function (ce: composite_env) (f: Clight.function) : res function :=
diff --git a/cfrontend/Cshmgenproof.v b/cfrontend/Cshmgenproof.v
index 09e31cb2..1ceb8e4d 100644
--- a/cfrontend/Cshmgenproof.v
+++ b/cfrontend/Cshmgenproof.v
@@ -15,7 +15,7 @@
 Require Import Coqlib Errors Maps Integers Floats.
 Require Import AST Linking.
 Require Import Values Events Memory Globalenvs Smallstep.
-Require Import Ctypes Cop Clight Cminor Csharpminor.
+Require Import Ctypes Ctyping Cop Clight Cminor Csharpminor.
 Require Import Cshmgen.
 
 (** * Relational specification of the transformation *)
@@ -996,6 +996,26 @@ Proof.
   eapply make_memcpy_correct with (b := b) (v := Vptr b' ofs'); eauto.
 Qed.
 
+Lemma make_normalization_correct:
+  forall e le m a v t,
+  eval_expr ge e le m a v ->
+  wt_val v t ->
+  eval_expr ge e le m (make_normalization t a) v.
+Proof.
+  intros. destruct t; simpl; auto. inv H0.
+- destruct i; simpl in H3.
+  + destruct s; econstructor; eauto; simpl; congruence.
+  + destruct s; econstructor; eauto; simpl; congruence.
+  + auto.
+  + econstructor; eauto; simpl; congruence.
+- auto.
+- destruct i.
+  + destruct s; econstructor; eauto. 
+  + destruct s; econstructor; eauto.
+  + auto.
+  + econstructor; eauto.
+Qed.
+
 End CONSTRUCTORS.
 
 (** * Basic preservation invariants *)
@@ -1360,7 +1380,16 @@ Inductive match_cont: composite_env -> type -> nat -> nat -> Clight.cont -> Csha
       match_cont cu.(prog_comp_env) (Clight.fn_return f) nbrk' ncnt' k tk ->
       match_cont ce tyret nbrk ncnt
                  (Clight.Kcall id f e le k)
-                 (Kcall id tf te le tk).
+                 (Kcall id tf te le tk)
+  | match_Kcall_normalize: forall ce tyret nbrk ncnt nbrk' ncnt' f e k id a tf te le tk cu,
+      linkorder cu prog ->
+      transl_function cu.(prog_comp_env) f = OK tf ->
+      match_env e te ->
+      match_cont cu.(prog_comp_env) (Clight.fn_return f) nbrk' ncnt' k tk ->
+      (forall v e le m, wt_val v tyret -> le!id = Some v -> eval_expr tge e le m a v) ->
+      match_cont ce tyret nbrk ncnt
+                 (Clight.Kcall (Some id) f e le k)
+                 (Kcall (Some id) tf te le (Kseq (Sset id a) tk)).
 
 Inductive match_states: Clight.state -> Csharpminor.state -> Prop :=
   | match_state:
@@ -1377,14 +1406,15 @@ Inductive match_states: Clight.state -> Csharpminor.state -> Prop :=
       forall fd args k m tfd tk targs tres cconv cu ce
           (LINK: linkorder cu prog)
           (TR: match_fundef cu fd tfd)
-          (MK: match_cont ce Tvoid 0%nat 0%nat k tk)
+          (MK: match_cont ce tres 0%nat 0%nat k tk)
           (ISCC: Clight.is_call_cont k)
           (TY: type_of_fundef fd = Tfunction targs tres cconv),
       match_states (Clight.Callstate fd args k m)
                    (Callstate tfd args tk m)
   | match_returnstate:
-      forall res k m tk ce
-          (MK: match_cont ce Tvoid 0%nat 0%nat k tk),
+      forall res tres k m tk ce
+          (MK: match_cont ce tres 0%nat 0%nat k tk)
+          (WT: wt_val res tres),
       match_states (Clight.Returnstate res k m)
                    (Returnstate res tk m).
 
@@ -1442,7 +1472,9 @@ Proof.
 - (* set *)
   auto.
 - (* call *)
-  simpl in TR. destruct (classify_fun (typeof e)); monadInv TR. auto.
+  simpl in TR. destruct (classify_fun (typeof e)); monadInv TR.
+  unfold make_funcall.
+  destruct o; auto; destruct Conventions1.return_value_needs_normalization; auto.
 - (* builtin *)
   auto.
 - (* seq *)
@@ -1500,24 +1532,26 @@ End FIND_LABEL.
 (** Properties of call continuations *)
 
 Lemma match_cont_call_cont:
-  forall ce' tyret' nbrk' ncnt' ce tyret nbrk ncnt k tk,
+  forall ce' nbrk' ncnt' ce tyret nbrk ncnt k tk,
   match_cont ce tyret nbrk ncnt k tk ->
-  match_cont ce' tyret' nbrk' ncnt' (Clight.call_cont k) (call_cont tk).
+  match_cont ce' tyret nbrk' ncnt' (Clight.call_cont k) (call_cont tk).
 Proof.
   induction 1; simpl; auto.
-  constructor.
-  econstructor; eauto.
+- apply match_Kstop.
+- eapply match_Kcall; eauto.
+- eapply match_Kcall_normalize; eauto.
 Qed.
 
 Lemma match_cont_is_call_cont:
-  forall ce tyret nbrk ncnt k tk ce' tyret' nbrk' ncnt',
+  forall ce tyret nbrk ncnt k tk ce' nbrk' ncnt',
   match_cont ce tyret nbrk ncnt k tk ->
   Clight.is_call_cont k ->
-  match_cont ce' tyret' nbrk' ncnt' k tk /\ is_call_cont tk.
+  match_cont ce' tyret nbrk' ncnt' k tk /\ is_call_cont tk.
 Proof.
   intros. inv H; simpl in H0; try contradiction; simpl.
-  split; auto; constructor.
-  split; auto; econstructor; eauto.
+  split; auto; apply match_Kstop.
+  split; auto; eapply match_Kcall; eauto.
+  split; auto; eapply match_Kcall_normalize; eauto.
 Qed.
 
 (** The simulation proof *)
@@ -1549,19 +1583,44 @@ Proof.
 
 - (* call *)
   revert TR. simpl. case_eq (classify_fun (typeof a)); try congruence.
-  intros targs tres cc CF TR. monadInv TR. inv MTR.
+  intros targs tres cc CF TR. monadInv TR.
   exploit functions_translated; eauto. intros (cu' & tfd & FIND & TFD & LINK').
   rewrite H in CF. simpl in CF. inv CF.
-  econstructor; split.
-  apply plus_one. econstructor; eauto.
-  eapply transl_expr_correct with (cunit := cu); eauto.
-  eapply transl_arglist_correct with (cunit := cu); eauto.
-  erewrite typlist_of_arglist_eq by eauto.
-  eapply transl_fundef_sig1; eauto.
-  rewrite H3. auto.
-  econstructor; eauto.
-  eapply match_Kcall with (ce := prog_comp_env cu') (cu := cu); eauto.
-  simpl. auto.
+  set (sg := {| sig_args := typlist_of_arglist al targs;
+                sig_res := rettype_of_type tres;
+                sig_cc := cc |}) in *.
+  assert (SIG: funsig tfd = sg).
+  { unfold sg; erewrite typlist_of_arglist_eq by eauto.
+    eapply transl_fundef_sig1; eauto. rewrite H3; auto. }
+  assert (EITHER: tk' = tk /\ ts' = Scall optid sg x x0
+               \/ exists id, optid = Some id /\
+                  tk' = tk /\ ts' = Sseq (Scall optid sg x x0)
+                                         (Sset id (make_normalization tres (Evar id)))).
+  { unfold make_funcall in MTR.
+    destruct optid. destruct Conventions1.return_value_needs_normalization.
+    inv MTR. right; exists i; auto.
+    inv MTR; auto.
+    inv MTR; auto. }
+  destruct EITHER as [(EK & ES) | (id & EI & EK & ES)]; rewrite EK, ES.
+  + (* without normalization of return value *)
+    econstructor; split.
+    apply plus_one. eapply step_call; eauto.
+    eapply transl_expr_correct with (cunit := cu); eauto.
+    eapply transl_arglist_correct with (cunit := cu); eauto.
+    econstructor; eauto.
+    eapply match_Kcall with (ce := prog_comp_env cu') (cu := cu); eauto.
+    exact I.
+  + (* with normalization of return value *)
+    subst optid.
+    econstructor; split.
+    eapply plus_two. apply step_seq. eapply step_call; eauto. 
+    eapply transl_expr_correct with (cunit := cu); eauto.
+    eapply transl_arglist_correct with (cunit := cu); eauto.
+    traceEq.
+    econstructor; eauto.
+    eapply match_Kcall_normalize  with (ce := prog_comp_env cu') (cu := cu); eauto.
+    intros. eapply make_normalization_correct; eauto. constructor; eauto.
+    exact I.
 
 - (* builtin *)
   monadInv TR. inv MTR.
@@ -1658,6 +1717,7 @@ Proof.
   eapply match_env_free_blocks; eauto.
   eapply match_returnstate with (ce := prog_comp_env cu); eauto.
   eapply match_cont_call_cont. eauto.
+  constructor.
 
 - (* return some *)
   monadInv TR. inv MTR.
@@ -1667,6 +1727,7 @@ Proof.
   eapply match_env_free_blocks; eauto.
   eapply match_returnstate with (ce := prog_comp_env cu); eauto.
   eapply match_cont_call_cont. eauto.
+  apply wt_val_casted. eapply cast_val_is_casted; eauto.
 
 - (* skip call *)
   monadInv TR. inv MTR.
@@ -1675,6 +1736,7 @@ Proof.
   apply plus_one. apply step_skip_call. auto.
   eapply match_env_free_blocks; eauto.
   eapply match_returnstate with (ce := prog_comp_env cu); eauto.
+  constructor.
 
 - (* switch *)
   monadInv TR.
@@ -1738,20 +1800,33 @@ Proof.
   simpl. econstructor; eauto.
   unfold transl_function. rewrite EQ; simpl. rewrite EQ1; simpl. auto.
   constructor.
+  replace (fn_return f) with tres. eassumption.
+  simpl in TY. unfold type_of_function in TY. congruence. 
 
 - (* external function *)
   inv TR.
   exploit match_cont_is_call_cont; eauto. intros [A B].
   econstructor; split.
-  apply plus_one. constructor. eauto.
+  apply plus_one. constructor.
   eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   eapply match_returnstate with (ce := ce); eauto.
+  apply has_rettype_wt_val. 
+  replace (rettype_of_type tres0) with (sig_res (ef_sig ef)).
+  eapply external_call_well_typed_gen; eauto.
+  rewrite H5. simpl. simpl in TY. congruence.
 
 - (* returnstate *)
   inv MK.
-  econstructor; split.
-  apply plus_one. constructor.
-  econstructor; eauto. simpl; reflexivity. constructor.
+  + (* without normalization *)
+    econstructor; split.
+    apply plus_one. constructor.
+    econstructor; eauto. simpl; reflexivity. constructor.
+  + (* with normalization *)
+    econstructor; split.
+    eapply plus_three. econstructor. econstructor. constructor.
+    simpl. apply H13. eauto. apply PTree.gss.
+    traceEq.
+    simpl. rewrite PTree.set2. econstructor; eauto. simpl; reflexivity. constructor.
 Qed.
 
 Lemma transl_initial_states:
diff --git a/cfrontend/Csyntax.v b/cfrontend/Csyntax.v
index c34a5e13..e3e2c1e9 100644
--- a/cfrontend/Csyntax.v
+++ b/cfrontend/Csyntax.v
@@ -106,7 +106,7 @@ Definition Epreincr (id: incr_or_decr) (l: expr) (ty: type) :=
 
 Definition Eselection (r1 r2 r3: expr) (ty: type) :=
   let t := typ_of_type ty in
-  let sg := mksignature (AST.Tint :: t :: t :: nil) (Some t) cc_default in
+  let sg := mksignature (AST.Tint :: t :: t :: nil) t cc_default in
   Ebuiltin (EF_builtin "__builtin_sel"%string sg)
            (Tcons type_bool (Tcons ty (Tcons ty Tnil)))
            (Econs r1 (Econs r2 (Econs r3 Enil)))
diff --git a/cfrontend/Ctypes.v b/cfrontend/Ctypes.v
index bfc5daa9..664a60c5 100644
--- a/cfrontend/Ctypes.v
+++ b/cfrontend/Ctypes.v
@@ -732,8 +732,21 @@ Definition typ_of_type (t: type) : AST.typ :=
   | Tpointer _ _ | Tarray _ _ _ | Tfunction _ _ _ | Tstruct _ _ | Tunion _ _ => AST.Tptr
   end.
 
-Definition opttyp_of_type (t: type) : option AST.typ :=
-  if type_eq t Tvoid then None else Some (typ_of_type t).
+Definition rettype_of_type (t: type) : AST.rettype :=
+  match t with
+  | Tvoid => AST.Tvoid
+  | Tint I32 _ _ => AST.Tint
+  | Tint I8 Signed _ => AST.Tint8signed
+  | Tint I8 Unsigned _ => AST.Tint8unsigned
+  | Tint I16 Signed _ => AST.Tint16signed
+  | Tint I16 Unsigned _ => AST.Tint16unsigned
+  | Tint IBool _ _ => AST.Tint8unsigned
+  | Tlong _ _ => AST.Tlong
+  | Tfloat F32 _ => AST.Tsingle
+  | Tfloat F64 _ => AST.Tfloat
+  | Tpointer _ _ => AST.Tptr
+  | Tarray _ _ _ | Tfunction _ _ _ | Tstruct _ _ | Tunion _ _ => AST.Tvoid
+  end.
 
 Fixpoint typlist_of_typelist (tl: typelist) : list AST.typ :=
   match tl with
@@ -742,7 +755,7 @@ Fixpoint typlist_of_typelist (tl: typelist) : list AST.typ :=
   end.
 
 Definition signature_of_type (args: typelist) (res: type) (cc: calling_convention): signature :=
-  mksignature (typlist_of_typelist args) (opttyp_of_type res) cc.
+  mksignature (typlist_of_typelist args) (rettype_of_type res) cc.
 
 (** * Construction of the composite environment *)
 
diff --git a/cfrontend/Ctyping.v b/cfrontend/Ctyping.v
index b92a9bac..00fcf8ab 100644
--- a/cfrontend/Ctyping.v
+++ b/cfrontend/Ctyping.v
@@ -397,10 +397,10 @@ Inductive wt_rvalue : expr -> Prop :=
       wt_arguments rargs tyargs ->
       (* This typing rule is specialized to the builtin invocations generated
          by C2C, which are either __builtin_sel or builtins returning void. *)
-         (ty = Tvoid /\ sig_res (ef_sig ef) = None)
+         (ty = Tvoid /\ sig_res (ef_sig ef) = AST.Tvoid)
       \/ (tyargs = Tcons type_bool (Tcons ty (Tcons ty Tnil))
           /\ let t := typ_of_type ty in
-             let sg := mksignature (AST.Tint :: t :: t :: nil) (Some t) cc_default in
+             let sg := mksignature (AST.Tint :: t :: t :: nil) t cc_default in
              ef = EF_builtin "__builtin_sel"%string sg) ->
       wt_rvalue (Ebuiltin ef tyargs rargs ty)
   | wt_Eparen: forall r tycast ty,
@@ -521,11 +521,20 @@ Fixpoint bind_globdef (e: typenv) (l: list (ident * globdef fundef type)) : type
   | (id, Gvar v) :: l => bind_globdef (PTree.set id v.(gvar_info) e) l
   end.
 
+Inductive wt_fundef (ce: composite_env) (e: typenv) : fundef -> Prop :=
+  | wt_fundef_internal: forall f,
+      wt_function ce e f ->
+      wt_fundef ce e (Internal f)
+  | wt_fundef_external: forall ef targs tres cc,
+      (ef_sig ef).(sig_res) = rettype_of_type tres ->
+      wt_fundef ce e (External ef targs tres cc).
+
 Inductive wt_program : program -> Prop :=
   | wt_program_intro: forall p,
       let e := bind_globdef (PTree.empty _) p.(prog_defs) in
-      (forall id f, In (id, Gfun (Internal f)) p.(prog_defs) ->
-         wt_function p.(prog_comp_env) e f) ->
+      (forall id fd,
+         In (id, Gfun fd) p.(prog_defs) ->
+         wt_fundef p.(prog_comp_env) e fd) ->
       wt_program p.
 
 Hint Constructors wt_val wt_rvalue wt_lvalue wt_stmt wt_lblstmts: ty.
@@ -745,7 +754,7 @@ Definition ebuiltin (ef: external_function) (tyargs: typelist) (args: exprlist)
   do x1 <- check_rvals args;
   do x2 <- check_arguments args tyargs;
   if type_eq tyres Tvoid
-  && opt_typ_eq (sig_res (ef_sig ef)) None
+  && AST.rettype_eq (sig_res (ef_sig ef)) AST.Tvoid
   then OK (Ebuiltin ef tyargs args tyres)
   else Error (msg "builtin: wrong type decoration").
 
@@ -915,7 +924,8 @@ Definition retype_function (ce: composite_env) (e: typenv) (f: function) : res f
 Definition retype_fundef (ce: composite_env) (e: typenv) (fd: fundef) : res fundef :=
   match fd with
   | Internal f => do f' <- retype_function ce e f; OK (Internal f')
-  | External id args res cc => OK fd
+  | External ef args res cc =>
+      assertion (rettype_eq (ef_sig ef).(sig_res) (rettype_of_type res)); OK fd
   end.
 
 Definition typecheck_program (p: program) : res program :=
@@ -987,6 +997,7 @@ Proof.
           classify_cast (Tint i s a) t2 <> cast_case_default).
   {
     unfold classify_cast. destruct t2; try congruence. destruct f; congruence.
+    destruct Archi.ptr64; congruence.
   }
   destruct i; auto.
 Qed.
@@ -1240,7 +1251,7 @@ Lemma ebuiltin_sound:
 Proof.
   intros. monadInv H.
   destruct (type_eq tyres Tvoid); simpl in EQ2; try discriminate.
-  destruct (opt_typ_eq (sig_res (ef_sig ef)) None); inv EQ2.
+  destruct (rettype_eq (sig_res (ef_sig ef)) AST.Tvoid); inv EQ2.
   econstructor; eauto. eapply check_arguments_sound; eauto.
 Qed.
 
@@ -1372,6 +1383,14 @@ Proof.
   intros. monadInv H. constructor; simpl. eapply retype_stmt_sound; eauto.
 Qed.
 
+Lemma retype_fundef_sound:
+  forall ce e fd fd', retype_fundef ce e fd = OK fd' -> wt_fundef ce e fd'.
+Proof.
+  intros. destruct fd; monadInv H.
+- constructor; eapply retype_function_sound; eauto.
+- constructor; auto.
+Qed.
+
 Theorem typecheck_program_sound:
   forall p p', typecheck_program p = OK p' -> wt_program p'.
 Proof.
@@ -1394,11 +1413,11 @@ Proof.
     inv H1. simpl. auto.
   }
   rewrite ENVS.
-  intros id f. revert MATCH; generalize (prog_defs p) (AST.prog_defs tp).
+  intros id fd. revert MATCH; generalize (prog_defs p) (AST.prog_defs tp).
   induction 1; simpl; intros.
   contradiction.
   destruct H0; auto. subst b1; inv H. simpl in H1. inv H1. 
-  destruct f1; monadInv H4. eapply retype_function_sound; eauto.
+  eapply retype_fundef_sound; eauto.
 Qed.
 
 (** * Subject reduction *)
@@ -1710,6 +1729,26 @@ Proof.
   inv H; auto.
 Qed.
 
+Lemma has_rettype_wt_val:
+  forall v ty,
+  Val.has_rettype v (rettype_of_type ty) -> wt_val v ty.
+Proof.
+  unfold rettype_of_type, Val.has_rettype, Val.has_type; destruct ty; intros.
+- destruct v; contradiction || constructor.
+- destruct i.
+  + destruct s; destruct v; try contradiction; constructor; red; auto.
+  + destruct s; destruct v; try contradiction; constructor; red; auto.
+  + destruct v; try contradiction; constructor; auto.
+  + destruct v; try contradiction; constructor; red; auto.
+- destruct v; try contradiction; constructor; auto.
+- destruct f; destruct v; try contradiction; constructor.
+- unfold Tptr in *; destruct v; destruct Archi.ptr64 eqn:P64; try contradiction; constructor; auto.
+- destruct v; contradiction || constructor.
+- destruct v; contradiction || constructor.
+- destruct v; contradiction || constructor.
+- destruct v; contradiction || constructor.
+Qed.
+
 Lemma wt_rred:
   forall ge tenv a m t a' m',
   rred ge a m t a' m' -> wt_rvalue ge tenv a -> wt_rvalue ge tenv a'.
@@ -1749,7 +1788,7 @@ Proof.
 - (* builtin *) subst. destruct H7 as [(A & B) | (A & B)].
 + subst ty. auto with ty.
 + simpl in B. set (T := typ_of_type ty) in *. 
-  set (sg := mksignature (AST.Tint :: T :: T :: nil) (Some T) cc_default) in *.
+  set (sg := mksignature (AST.Tint :: T :: T :: nil) T cc_default) in *.
   assert (LK: lookup_builtin_function "__builtin_sel"%string sg = Some (BI_standard (BI_select T))).
   { unfold sg, T; destruct ty as   [ | ? ? ? | ? | [] ? | ? ? | ? ? ? | ? ? ? | ? ? | ? ? ];
     simpl; unfold Tptr; destruct Archi.ptr64; reflexivity. }
@@ -1895,12 +1934,6 @@ Hypothesis WTPROG: wt_program prog.
 Let ge := globalenv prog.
 Let gtenv := bind_globdef (PTree.empty _) prog.(prog_defs).
 
-Hypothesis WT_EXTERNAL:
-  forall id ef args res cc vargs m t vres m',
-  In (id, Gfun (External ef args res cc)) prog.(prog_defs) ->
-  external_call ef ge vargs m t vres m' ->
-  wt_val vres res.
-
 Inductive wt_expr_cont: typenv -> function -> cont -> Prop :=
   | wt_Kdo: forall te f k,
       wt_stmt_cont te f k ->
@@ -1999,12 +2032,6 @@ Proof.
   induction 1; simpl; auto; econstructor; eauto.
 Qed.
 
-Definition wt_fundef (fd: fundef) :=
-  match fd with
-  | Internal f => wt_function ge gtenv f
-  | External ef targs tres cc => True
-  end.
-
 Definition fundef_return (fd: fundef) : type :=
   match fd with
   | Internal f => f.(fn_return)
@@ -2012,10 +2039,10 @@ Definition fundef_return (fd: fundef) : type :=
   end.
 
 Lemma wt_find_funct:
-  forall v fd, Genv.find_funct ge v = Some fd -> wt_fundef fd.
+  forall v fd, Genv.find_funct ge v = Some fd -> wt_fundef ge gtenv fd.
 Proof.
   intros. apply Genv.find_funct_prop with (p := prog) (v := v); auto.
-  intros. inv WTPROG. destruct f; simpl; auto. apply H1 with id; auto.
+  intros. inv WTPROG. apply H1 with id; auto.
 Qed.
 
 Inductive wt_state: state -> Prop :=
@@ -2031,7 +2058,7 @@ Inductive wt_state: state -> Prop :=
       wt_state (ExprState f r k e m)
   | wt_call_state: forall b fd vargs k m
         (WTK: wt_call_cont k (fundef_return fd))
-        (WTFD: wt_fundef fd)
+        (WTFD: wt_fundef ge gtenv fd)
         (FIND: Genv.find_funct ge b = Some fd),
       wt_state (Callstate fd vargs k m)
   | wt_return_state: forall v k m ty
@@ -2088,7 +2115,6 @@ Qed.
 
 End WT_FIND_LABEL.
 
-
 Lemma preservation_estep:
   forall S t S', estep ge S t S' -> wt_state S -> wt_state S'.
 Proof.
@@ -2163,9 +2189,10 @@ Proof.
 - inv WTS; eauto with ty.
 - exploit wt_find_label. eexact WTB. eauto. eapply call_cont_wt'; eauto.
   intros [A B]. eauto with ty.
-- simpl in WTFD; inv WTFD. econstructor; eauto. apply wt_call_cont_stmt_cont; auto.
-- exploit (Genv.find_funct_inversion prog); eauto. intros (id & A).
-  econstructor; eauto.
+- inv WTFD. inv H3. econstructor; eauto. apply wt_call_cont_stmt_cont; auto.
+- inv WTFD. econstructor; eauto.
+  apply has_rettype_wt_val. simpl; rewrite <- H1.
+  eapply external_call_well_typed_gen; eauto.
 - inv WTK. eauto with ty.
 Qed.
 
@@ -2180,7 +2207,7 @@ Theorem wt_initial_state:
 Proof.
   intros. inv H. econstructor. constructor.
   apply Genv.find_funct_ptr_prop with (p := prog) (b := b); auto.
-  intros. inv WTPROG. destruct f0; simpl; auto. apply H4 with id; auto.
+  intros. inv WTPROG. apply H4 with id; auto.
   instantiate (1 := (Vptr b Ptrofs.zero)). rewrite Genv.find_funct_find_funct_ptr. auto.
 Qed.
 
diff --git a/cfrontend/PrintClight.ml b/cfrontend/PrintClight.ml
index ca378c11..0e735d2d 100644
--- a/cfrontend/PrintClight.ml
+++ b/cfrontend/PrintClight.ml
@@ -23,9 +23,18 @@ open Cop
 open PrintCsyntax
 open Clight
 
-(* Naming temporaries *)
+(* Naming temporaries.
+   Some temporaries are obtained by lifting variables in SimplLocals.
+   For these we use a meaningful name "$var", as found in the table of
+   atoms.  Other temporaries are generated during SimplExpr, and are
+   not in the table of atoms.  We print them as "$NNN" (a unique
+   integer). *)
 
-let temp_name (id: AST.ident) = "$" ^ Z.to_string (Z.Zpos id)
+let temp_name (id: AST.ident) =
+  try
+    "$" ^ Hashtbl.find string_of_atom id
+  with Not_found ->
+    Printf.sprintf "$%d" (P.to_int id)
 
 (* Declarator (identifier + type) -- reuse from PrintCsyntax *)
 
@@ -236,10 +245,20 @@ and print_stmt_for p s =
   | _ ->
       fprintf p "({ %a })" print_stmt s
 
-let print_function p id f =
+(* There are two versions of Clight, Clight1 and Clight2, that differ
+   only in the meaning of function parameters:
+ - in Clight1, function parameters are variables
+ - in Clight2, function parameters are temporaries.
+*)
+
+type clight_version = Clight1 | Clight2
+
+let name_param = function Clight1 -> extern_atom | Clight2 -> temp_name
+
+let print_function ver p id f =
   fprintf p "%s@ "
-            (name_cdecl (name_function_parameters (extern_atom id)
-                                                  f.fn_params f.fn_callconv)
+            (name_cdecl (name_function_parameters (name_param ver)
+                                 (extern_atom id) f.fn_params f.fn_callconv)
                         f.fn_return);
   fprintf p "@[<v 2>{@ ";
   List.iter
@@ -253,12 +272,12 @@ let print_function p id f =
   print_stmt p f.fn_body;
   fprintf p "@;<0 -2>}@]@ @ "
 
-let print_fundef p id fd =
+let print_fundef ver p id fd =
   match fd with
   | Ctypes.External(_, _, _, _) ->
       ()
   | Internal f ->
-      print_function p id f
+      print_function ver p id f
 
 let print_fundecl p id fd =
   match fd with
@@ -271,9 +290,9 @@ let print_fundecl p id fd =
       fprintf p "%s;@ "
                 (name_cdecl (extern_atom id) (Clight.type_of_function f))
 
-let print_globdef p (id, gd) =
+let print_globdef var p (id, gd) =
   match gd with
-  | AST.Gfun f -> print_fundef p id f
+  | AST.Gfun f -> print_fundef var p id f
   | AST.Gvar v -> print_globvar p id v  (* from PrintCsyntax *)
 
 let print_globdecl p (id, gd) =
@@ -281,20 +300,29 @@ let print_globdecl p (id, gd) =
   | AST.Gfun f -> print_fundecl p id f
   | AST.Gvar v -> ()
 
-let print_program p prog =
+let print_program ver p prog =
   fprintf p "@[<v 0>";
   List.iter (declare_composite p) prog.prog_types;
   List.iter (define_composite p) prog.prog_types;
   List.iter (print_globdecl p) prog.prog_defs;
-  List.iter (print_globdef p) prog.prog_defs;
+  List.iter (print_globdef ver p) prog.prog_defs;
   fprintf p "@]@."
 
 let destination : string option ref = ref None
 
-let print_if prog =
+let print_if_gen ver prog =
   match !destination with
   | None -> ()
   | Some f ->
       let oc = open_out f in
-      print_program (formatter_of_out_channel oc) prog;
+      print_program ver (formatter_of_out_channel oc) prog;
       close_out oc
+
+(* print_if is called from driver/Compiler.v between the SimplExpr
+   and SimplLocals passes.  It receives Clight1 syntax. *)
+let print_if prog = print_if_gen Clight1 prog
+
+(* print_if_2 is called from clightgen/Clightgen.ml, after the
+   SimplLocals pass.  It receives Clight2 syntax. *)
+let print_if_2 prog = print_if_gen Clight2 prog
+
diff --git a/cfrontend/PrintCsyntax.ml b/cfrontend/PrintCsyntax.ml
index 3a44796c..03dc5837 100644
--- a/cfrontend/PrintCsyntax.ml
+++ b/cfrontend/PrintCsyntax.ml
@@ -19,7 +19,7 @@ open Format
 open Camlcoq
 open Values
 open AST
-open Ctypes
+open! Ctypes
 open Cop
 open Csyntax
 
@@ -85,7 +85,7 @@ let name_optid id =
 
 let rec name_cdecl id ty =
   match ty with
-  | Tvoid ->
+  | Ctypes.Tvoid ->
       "void" ^ name_optid id
   | Ctypes.Tint(sz, sg, a) ->
       name_inttype sz sg ^ attributes a ^ name_optid id
@@ -391,7 +391,7 @@ and print_stmt_for p s =
   | _ ->
       fprintf p "({ %a })" print_stmt s
 
-let name_function_parameters fun_name params cconv =
+let name_function_parameters name_param fun_name params cconv =
   let b = Buffer.create 20 in
   Buffer.add_string b fun_name;
   Buffer.add_char b '(';
@@ -404,7 +404,7 @@ let name_function_parameters fun_name params cconv =
           if cconv.cc_vararg then Buffer.add_string b ",..."
       | (id, ty) :: rem ->
           if not first then Buffer.add_string b ", ";
-          Buffer.add_string b (name_cdecl (extern_atom id) ty);
+          Buffer.add_string b (name_cdecl (name_param id) ty);
           add_params false rem in
       add_params true params
   end;
@@ -413,8 +413,8 @@ let name_function_parameters fun_name params cconv =
 
 let print_function p id f =
   fprintf p "%s@ "
-            (name_cdecl (name_function_parameters (extern_atom id)
-                                                  f.fn_params f.fn_callconv)
+            (name_cdecl (name_function_parameters extern_atom
+                             (extern_atom id) f.fn_params f.fn_callconv)
                         f.fn_return);
   fprintf p "@[<v 2>{@ ";
   List.iter
diff --git a/common/AST.v b/common/AST.v
index a91138c9..eb34d675 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -17,7 +17,7 @@
   the abstract syntax trees of many of the intermediate languages. *)
 
 Require Import String.
-Require Import Coqlib Maps Errors Integers Floats.
+Require Import Coqlib Maps Errors Integers Floats BinPos.
 Require Archi.
 
 Set Implicit Arguments.
@@ -45,9 +45,6 @@ Lemma typ_eq: forall (t1 t2: typ), {t1=t2} + {t1<>t2}.
 Proof. decide equality. Defined.
 Global Opaque typ_eq.
 
-Definition opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}
-                     := option_eq typ_eq.
-
 Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2}
                      := list_eq_dec typ_eq.
 
@@ -91,10 +88,34 @@ Fixpoint subtype_list (tyl1 tyl2: list typ) : bool :=
   | _, _ => false
   end.
 
+(** To describe the values returned by functions, we use the more precise
+    types below. *)
+
+Inductive rettype : Type :=
+  | Tret (t: typ)                       (**r like type [t] *)
+  | Tint8signed                         (**r 8-bit signed integer *)
+  | Tint8unsigned                       (**r 8-bit unsigned integer *)
+  | Tint16signed                        (**r 16-bit signed integer *)
+  | Tint16unsigned                      (**r 16-bit unsigned integer *)
+  | Tvoid.                              (**r no value returned *)
+
+Coercion Tret: typ >-> rettype.
+
+Lemma rettype_eq: forall (t1 t2: rettype), {t1=t2} + {t1<>t2}.
+Proof. generalize typ_eq; decide equality. Defined.
+Global Opaque rettype_eq.
+
+Fixpoint proj_rettype (r: rettype) : typ :=
+  match r with
+  | Tret t => t
+  | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => Tint
+  | Tvoid => Tint
+  end.
+
 (** Additionally, function definitions and function calls are annotated
   by function signatures indicating:
 - the number and types of arguments;
-- the type of the returned value, if any;
+- the type of the returned value;
 - additional information on which calling convention to use.
 
 These signatures are used in particular to determine appropriate
@@ -117,24 +138,20 @@ Global Opaque calling_convention_eq.
 
 Record signature : Type := mksignature {
   sig_args: list typ;
-  sig_res: option typ;
+  sig_res: rettype;
   sig_cc: calling_convention
 }.
 
-Definition proj_sig_res (s: signature) : typ :=
-  match s.(sig_res) with
-  | None => Tint
-  | Some t => t
-  end.
+Definition proj_sig_res (s: signature) : typ := proj_rettype s.(sig_res).
 
 Definition signature_eq: forall (s1 s2: signature), {s1=s2} + {s1<>s2}.
 Proof.
-  generalize opt_typ_eq, list_typ_eq, calling_convention_eq; decide equality.
+  generalize rettype_eq, list_typ_eq, calling_convention_eq; decide equality.
 Defined.
 Global Opaque signature_eq.
 
 Definition signature_main :=
-  {| sig_args := nil; sig_res := Some Tint; sig_cc := cc_default |}.
+  {| sig_args := nil; sig_res := Tint; sig_cc := cc_default |}.
 
 (** Memory accesses (load and store instructions) are annotated by
   a ``memory chunk'' indicating the type, size and signedness of the
@@ -177,6 +194,28 @@ Definition type_of_chunk (c: memory_chunk) : typ :=
 Lemma type_of_Mptr: type_of_chunk Mptr = Tptr.
 Proof. unfold Mptr, Tptr; destruct Archi.ptr64; auto. Qed.
 
+(** Same, as a return type. *)
+
+Definition rettype_of_chunk (c: memory_chunk) : rettype :=
+  match c with
+  | Mint8signed => Tint8signed
+  | Mint8unsigned => Tint8unsigned
+  | Mint16signed => Tint16signed
+  | Mint16unsigned => Tint16unsigned
+  | Mint32 => Tint
+  | Mint64 => Tlong
+  | Mfloat32 => Tsingle
+  | Mfloat64 => Tfloat
+  | Many32 => Tany32
+  | Many64 => Tany64
+  end.
+
+Lemma proj_rettype_of_chunk:
+  forall chunk, proj_rettype (rettype_of_chunk chunk) = type_of_chunk chunk.
+Proof.
+  destruct chunk; auto.
+Qed.
+
 (** The chunk that is appropriate to store and reload a value of
   the given type, without losing information. *)
 
@@ -193,6 +232,16 @@ Definition chunk_of_type (ty: typ) :=
 Lemma chunk_of_Tptr: chunk_of_type Tptr = Mptr.
 Proof. unfold Mptr, Tptr; destruct Archi.ptr64; auto. Qed.
 
+(** Trapping mode: does undefined behavior result in a trap or an undefined value (e.g. for loads) *)
+Inductive trapping_mode : Type := TRAP | NOTRAP.
+
+Definition trapping_mode_eq : forall x y : trapping_mode,
+    { x=y } + { x <> y}.
+Proof.
+  decide equality.
+Defined.
+
+
 (** Initialization data for global variables. *)
 
 Inductive init_data: Type :=
@@ -477,15 +526,15 @@ Definition ef_sig (ef: external_function): signature :=
   | EF_external name sg => sg
   | EF_builtin name sg => sg
   | EF_runtime name sg => sg
-  | EF_vload chunk => mksignature (Tptr :: nil) (Some (type_of_chunk chunk)) cc_default
-  | EF_vstore chunk => mksignature (Tptr :: type_of_chunk chunk :: nil) None cc_default
-  | EF_malloc => mksignature (Tptr :: nil) (Some Tptr) cc_default
-  | EF_free => mksignature (Tptr :: nil) None cc_default
-  | EF_memcpy sz al => mksignature (Tptr :: Tptr :: nil) None cc_default
-  | EF_annot kind text targs => mksignature targs None cc_default
-  | EF_annot_val kind text targ => mksignature (targ :: nil) (Some targ) cc_default
+  | EF_vload chunk => mksignature (Tptr :: nil) (rettype_of_chunk chunk) cc_default
+  | EF_vstore chunk => mksignature (Tptr :: type_of_chunk chunk :: nil) Tvoid cc_default
+  | EF_malloc => mksignature (Tptr :: nil) Tptr cc_default
+  | EF_free => mksignature (Tptr :: nil) Tvoid cc_default
+  | EF_memcpy sz al => mksignature (Tptr :: Tptr :: nil) Tvoid cc_default
+  | EF_annot kind text targs => mksignature targs Tvoid cc_default
+  | EF_annot_val kind text targ => mksignature (targ :: nil) targ cc_default
   | EF_inline_asm text sg clob => sg
-  | EF_debug kind text targs => mksignature targs None cc_default
+  | EF_debug kind text targs => mksignature targs Tvoid cc_default
   end.
 
 (** Whether an external function should be inlined by the compiler. *)
@@ -630,11 +679,28 @@ Inductive builtin_arg (A: Type) : Type :=
   | BA_splitlong (hi lo: builtin_arg A)
   | BA_addptr (a1 a2: builtin_arg A).
 
+Definition builtin_arg_eq {A: Type}:
+  (forall x y : A, {x = y} + {x <> y}) ->
+  forall (ba1 ba2: (builtin_arg A)), {ba1=ba2} + {ba1<>ba2}.
+Proof.
+  intro. generalize Integers.int_eq int64_eq float_eq float32_eq chunk_eq ptrofs_eq ident_eq.
+  decide equality.
+Defined.
+Global Opaque builtin_arg_eq.
+
 Inductive builtin_res (A: Type) : Type :=
   | BR (x: A)
   | BR_none
   | BR_splitlong (hi lo: builtin_res A).
 
+Definition builtin_res_eq {A: Type}:
+  (forall x y : A, {x = y} + {x <> y}) ->
+  forall (a b: builtin_res A), {a=b} + {a<>b}.
+Proof.
+  intro. decide equality.
+Defined.
+Global Opaque builtin_res_eq.
+
 Fixpoint globals_of_builtin_arg (A: Type) (a: builtin_arg A) : list ident :=
   match a with
   | BA_loadglobal chunk id ofs => id :: nil
diff --git a/common/Builtins.v b/common/Builtins.v
index c9097e86..476b541e 100644
--- a/common/Builtins.v
+++ b/common/Builtins.v
@@ -29,7 +29,7 @@ Definition builtin_function_sig (b: builtin_function) : signature :=
   | BI_platform b => platform_builtin_sig b
   end.
 
-Definition builtin_function_sem (b: builtin_function) : builtin_sem (proj_sig_res (builtin_function_sig b)) :=
+Definition builtin_function_sem (b: builtin_function) : builtin_sem (sig_res (builtin_function_sig b)) :=
   match b with
   | BI_standard b => standard_builtin_sem b
   | BI_platform b => platform_builtin_sem b
diff --git a/common/Builtins0.v b/common/Builtins0.v
index 5061bf43..a3215545 100644
--- a/common/Builtins0.v
+++ b/common/Builtins0.v
@@ -26,8 +26,8 @@ Require Import AST Integers Floats Values Memdata.
   appropriate for the target.
 *)
 
-Definition val_opt_has_type (ov: option val) (t: typ) : Prop :=
-  match ov with Some v => Val.has_type v t | None => True end.
+Definition val_opt_has_rettype (ov: option val) (t: rettype) : Prop :=
+  match ov with Some v => Val.has_rettype v t | None => True end.
 
 Definition val_opt_inject (j: meminj) (ov ov': option val) : Prop :=
   match ov, ov' with
@@ -42,10 +42,10 @@ Definition val_opt_inject (j: meminj) (ov ov': option val) : Prop :=
   and be compatible with value injections.
 *)
 
-Record builtin_sem (tret: typ) : Type := mkbuiltin {
+Record builtin_sem (tret: rettype) : Type := mkbuiltin {
   bs_sem :> list val -> option val;
   bs_well_typed: forall vl, 
-    val_opt_has_type (bs_sem vl) tret;
+    val_opt_has_rettype (bs_sem vl) tret;
   bs_inject: forall j vl vl',
     Val.inject_list j vl vl' -> val_opt_inject j (bs_sem vl) (bs_sem vl')
 }.
@@ -60,8 +60,8 @@ Record builtin_sem (tret: typ) : Type := mkbuiltin {
 Local Unset Program Cases.
 
 Program Definition mkbuiltin_v1t 
-     (tret: typ) (f: val -> val)
-     (WT: forall v1, Val.has_type (f v1) tret)
+     (tret: rettype) (f: val -> val)
+     (WT: forall v1, Val.has_rettype (f v1) tret)
      (INJ: forall j v1 v1', Val.inject j v1 v1' -> Val.inject j (f v1) (f v1')) :=
   mkbuiltin tret (fun vl => match vl with v1 :: nil => Some (f v1) | _ => None end) _ _.
 Next Obligation.
@@ -72,8 +72,8 @@ Next Obligation.
 Qed.
 
 Program Definition mkbuiltin_v2t 
-     (tret: typ) (f: val -> val -> val)
-     (WT: forall v1 v2, Val.has_type (f v1 v2) tret)
+     (tret: rettype) (f: val -> val -> val)
+     (WT: forall v1 v2, Val.has_rettype (f v1 v2) tret)
      (INJ: forall j v1 v1' v2 v2',
            Val.inject j v1 v1' -> Val.inject j v2 v2' ->
            Val.inject j (f v1 v2) (f v1' v2')) :=
@@ -86,8 +86,8 @@ Next Obligation.
 Qed.
 
 Program Definition mkbuiltin_v3t 
-     (tret: typ) (f: val -> val -> val -> val)
-     (WT: forall v1 v2 v3, Val.has_type (f v1 v2 v3) tret)
+     (tret: rettype) (f: val -> val -> val -> val)
+     (WT: forall v1 v2 v3, Val.has_rettype (f v1 v2 v3) tret)
      (INJ: forall j v1 v1' v2 v2' v3 v3',
            Val.inject j v1 v1' -> Val.inject j v2 v2' -> Val.inject j v3 v3' ->
            Val.inject j (f v1 v2 v3) (f v1' v2' v3')) :=
@@ -100,8 +100,8 @@ Next Obligation.
 Qed.
 
 Program Definition mkbuiltin_v1p
-     (tret: typ) (f: val -> option val)
-     (WT: forall v1, val_opt_has_type (f v1) tret)
+     (tret: rettype) (f: val -> option val)
+     (WT: forall v1, val_opt_has_rettype (f v1) tret)
      (INJ: forall j v1 v1',
         Val.inject j v1 v1' -> val_opt_inject j (f v1) (f v1')) :=
   mkbuiltin tret (fun vl => match vl with v1 :: nil => f v1 | _ => None end) _ _.
@@ -113,8 +113,8 @@ Next Obligation.
 Qed.
 
 Program Definition mkbuiltin_v2p
-     (tret: typ) (f: val -> val -> option val)
-     (WT: forall v1 v2, val_opt_has_type (f v1 v2) tret)
+     (tret: rettype) (f: val -> val -> option val)
+     (WT: forall v1 v2, val_opt_has_rettype (f v1 v2) tret)
      (INJ: forall j v1 v1' v2 v2',
         Val.inject j v1 v1' -> Val.inject j v2 v2' ->
         val_opt_inject j (f v1 v2) (f v1' v2')) :=
@@ -171,7 +171,7 @@ Proof.
   destruct t; intros; constructor.
 Qed.
 
-Lemma inj_num_opt_wt: forall t x, val_opt_has_type (option_map (inj_num t) x) t.
+Lemma inj_num_opt_wt: forall t x, val_opt_has_rettype (option_map (inj_num t) x) t.
 Proof.
   intros. destruct x; simpl. apply inj_num_wt. auto. 
 Qed.
@@ -200,13 +200,13 @@ Proof.
 Qed.
 
 Lemma proj_num_opt_wt:
-  forall tres t k0 k1 v,
+  forall (tres: typ) t k0 k1 v,
   k0 = None \/ k0 = Some Vundef ->
-  (forall x, val_opt_has_type (k1 x) tres) ->
-  val_opt_has_type (proj_num t k0 v k1) tres.
+  (forall x, val_opt_has_rettype (k1 x) tres) ->
+  val_opt_has_rettype (proj_num t k0 v k1) tres.
 Proof.
   intros.
-  assert (val_opt_has_type k0 tres). { destruct H; subst k0; exact I. }
+  assert (val_opt_has_rettype k0 tres). { destruct H; subst k0; exact I. }
   destruct t; simpl; destruct v; auto.
 Qed.
 
@@ -370,11 +370,11 @@ Definition standard_builtin_table : list (string * standard_builtin) :=
  :: ("__builtin_addl", BI_addl)
  :: ("__builtin_subl", BI_subl)
  :: ("__builtin_mull", BI_mull)
+ :: ("__builtin_expect", BI_expect)
  :: ("__builtin_bswap16", BI_i16_bswap)
  :: ("__builtin_bswap", BI_i32_bswap)
  :: ("__builtin_bswap32", BI_i32_bswap)
  :: ("__builtin_bswap64", BI_i64_bswap)
- :: ("__builtin_expect", BI_expect)
  :: ("__compcert_i64_umulh", BI_i64_umulh)
  :: ("__compcert_i64_smulh", BI_i64_smulh)
  :: ("__compcert_i64_sdiv", BI_i64_sdiv)
@@ -395,35 +395,35 @@ Definition standard_builtin_table : list (string * standard_builtin) :=
 Definition standard_builtin_sig (b: standard_builtin) : signature :=
   match b with
   | BI_select t =>
-      mksignature (Tint :: t :: t :: nil) (Some t) cc_default
+      mksignature (Tint :: t :: t :: nil) t cc_default
   | BI_fabs | BI_fsqrt =>
-      mksignature (Tfloat :: nil) (Some Tfloat) cc_default
+      mksignature (Tfloat :: nil) Tfloat cc_default
   | BI_negl =>
-      mksignature (Tlong :: nil) (Some Tlong) cc_default
+      mksignature (Tlong :: nil) Tlong cc_default
   | BI_addl | BI_subl | BI_i64_umulh| BI_i64_smulh 
   | BI_i64_sdiv | BI_i64_udiv | BI_i64_smod | BI_i64_umod =>
-      mksignature (Tlong :: Tlong :: nil) (Some Tlong) cc_default
+      mksignature (Tlong :: Tlong :: nil) Tlong cc_default
   | BI_mull =>
-    mksignature (Tint :: Tint :: nil) (Some Tlong) cc_default
+      mksignature (Tint :: Tint :: nil) Tlong cc_default
   | BI_expect =>
-    mksignature (Tlong :: Tlong :: nil) (Some Tlong) cc_default
+    mksignature (Tlong :: Tlong :: nil) Tlong cc_default
   | BI_i32_bswap =>
-      mksignature (Tint :: nil) (Some Tint) cc_default
+      mksignature (Tint :: nil) Tint cc_default
   | BI_i64_bswap =>
-      mksignature (Tlong :: nil) (Some Tlong) cc_default
+      mksignature (Tlong :: nil) Tlong cc_default
   | BI_i16_bswap =>
-      mksignature (Tint :: nil) (Some Tint) cc_default
+      mksignature (Tint :: nil) Tint cc_default
   | BI_i64_shl  | BI_i64_shr | BI_i64_sar =>
-      mksignature (Tlong :: Tint :: nil) (Some Tlong) cc_default
+      mksignature (Tlong :: Tint :: nil) Tlong cc_default
   | BI_i64_dtos | BI_i64_dtou =>
-      mksignature (Tfloat :: nil) (Some Tlong) cc_default
+      mksignature (Tfloat :: nil) Tlong cc_default
   | BI_i64_stod | BI_i64_utod =>
-      mksignature (Tlong :: nil) (Some Tfloat) cc_default
+      mksignature (Tlong :: nil) Tfloat cc_default
   | BI_i64_stof | BI_i64_utof =>
-      mksignature (Tlong :: nil) (Some Tsingle) cc_default
+      mksignature (Tlong :: nil) Tsingle cc_default
   end.
 
-Program Definition standard_builtin_sem (b: standard_builtin) : builtin_sem (proj_sig_res (standard_builtin_sig b)) :=
+Program Definition standard_builtin_sem (b: standard_builtin) : builtin_sem (sig_res (standard_builtin_sig b)) :=
   match b with
   | BI_select t =>
       mkbuiltin t 
diff --git a/common/Errors.v b/common/Errors.v
index 28933313..6807735a 100644
--- a/common/Errors.v
+++ b/common/Errors.v
@@ -164,7 +164,7 @@ Ltac monadInv1 H :=
   | (match ?X with left _ => _ | right _ => assertion_failed end = OK _) =>
       destruct X; [try (monadInv1 H) | discriminate]
   | (match (negb ?X) with true => _ | false => assertion_failed end = OK _) =>
-      destruct X as [] eqn:?; [discriminate | try (monadInv1 H)]
+      destruct X as [] eqn:?; simpl negb in H; [discriminate | try (monadInv1 H)]
   | (match ?X with true => _ | false => assertion_failed end = OK _) =>
       destruct X as [] eqn:?; [try (monadInv1 H) | discriminate]
   | (mmap ?F ?L = OK ?M) =>
diff --git a/common/Events.v b/common/Events.v
index 3fb84f49..28bb992a 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -623,7 +623,7 @@ Record extcall_properties (sem: extcall_sem) (sg: signature) : Prop :=
   ec_well_typed:
     forall ge vargs m1 t vres m2,
     sem ge vargs m1 t vres m2 ->
-    Val.has_type vres (proj_sig_res sg);
+    Val.has_rettype vres sg.(sig_res);
 
 (** The semantics is invariant under change of global environment that preserves symbols. *)
   ec_symbols_preserved:
@@ -649,9 +649,12 @@ Record extcall_properties (sem: extcall_sem) (sg: signature) : Prop :=
 (** External call cannot modify memory unless they have [Max, Writable]
    permissions. *)
   ec_readonly:
-    forall ge vargs m1 t vres m2,
+    forall ge vargs m1 t vres m2 b ofs n bytes,
     sem ge vargs m1 t vres m2 ->
-    Mem.unchanged_on (loc_not_writable m1) m1 m2;
+    Mem.valid_block m1 b ->
+    Mem.loadbytes m2 b ofs n = Some bytes ->
+    (forall i, ofs <= i < ofs + n -> ~Mem.perm m1 b i Max Writable) ->
+    Mem.loadbytes m1 b ofs n = Some bytes;
 
 (** External calls must commute with memory extensions, in the
   following sense. *)
@@ -771,12 +774,12 @@ Qed.
 Lemma volatile_load_ok:
   forall chunk,
   extcall_properties (volatile_load_sem chunk)
-                     (mksignature (Tptr :: nil) (Some (type_of_chunk chunk)) cc_default).
+                     (mksignature (Tptr :: nil) (rettype_of_chunk chunk) cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
-- unfold proj_sig_res; simpl. inv H. inv H0. apply Val.load_result_type.
-  eapply Mem.load_type; eauto.
+- inv H. inv H0. apply Val.load_result_rettype.
+  eapply Mem.load_rettype; eauto.
 (* symbols *)
 - inv H0. constructor. eapply volatile_load_preserved; eauto.
 (* valid blocks *)
@@ -784,7 +787,7 @@ Proof.
 (* max perms *)
 - inv H; auto.
 (* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
 (* mem extends *)
 - inv H. inv H1. inv H6. inv H4.
   exploit volatile_load_extends; eauto. intros [v' [A B]].
@@ -833,14 +836,27 @@ Proof.
   rewrite C; auto.
 Qed.
 
+Lemma unchanged_on_readonly:
+  forall m1 m2 b ofs n bytes,
+  Mem.unchanged_on (loc_not_writable m1) m1 m2 ->
+  Mem.valid_block m1 b ->
+  Mem.loadbytes m2 b ofs n = Some bytes ->
+  (forall i, ofs <= i < ofs + n -> ~Mem.perm m1 b i Max Writable) ->
+  Mem.loadbytes m1 b ofs n = Some bytes.
+Proof.
+  intros.
+  rewrite <- H1. symmetry.
+  apply Mem.loadbytes_unchanged_on_1 with (P := loc_not_writable m1); auto.
+Qed.
+
 Lemma volatile_store_readonly:
   forall ge chunk1 m1 b1 ofs1 v t m2,
   volatile_store ge chunk1 m1 b1 ofs1 v t m2 ->
   Mem.unchanged_on (loc_not_writable m1) m1 m2.
 Proof.
   intros. inv H.
-  apply Mem.unchanged_on_refl.
-  eapply Mem.store_unchanged_on; eauto.
+- apply Mem.unchanged_on_refl.
+- eapply Mem.store_unchanged_on; eauto.
   exploit Mem.store_valid_access_3; eauto. intros [P Q].
   intros. unfold loc_not_writable. red; intros. elim H2.
   apply Mem.perm_cur_max. apply P. auto.
@@ -922,7 +938,7 @@ Qed.
 Lemma volatile_store_ok:
   forall chunk,
   extcall_properties (volatile_store_sem chunk)
-                     (mksignature (Tptr :: type_of_chunk chunk :: nil) None cc_default).
+                     (mksignature (Tptr :: type_of_chunk chunk :: nil) Tvoid cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
@@ -934,7 +950,7 @@ Proof.
 (* perms *)
 - inv H. inv H2. auto. eauto with mem.
 (* readonly *)
-- inv H. eapply volatile_store_readonly; eauto.
+- inv H. eapply unchanged_on_readonly; eauto. eapply volatile_store_readonly; eauto.
 (* mem extends*)
 - inv H. inv H1. inv H6. inv H7. inv H4.
   exploit volatile_store_extends; eauto. intros [m2' [A [B C]]].
@@ -967,7 +983,7 @@ Inductive extcall_malloc_sem (ge: Senv.t):
 
 Lemma extcall_malloc_ok:
   extcall_properties extcall_malloc_sem
-                     (mksignature (Tptr :: nil) (Some Tptr) cc_default).
+                     (mksignature (Tptr :: nil) Tptr cc_default).
 Proof.
   assert (UNCHANGED:
     forall (P: block -> Z -> Prop) m lo hi v m' b m'',
@@ -984,7 +1000,7 @@ Proof.
   }
   constructor; intros.
 (* well typed *)
-- inv H. unfold proj_sig_res, Tptr; simpl. destruct Archi.ptr64; auto.
+- inv H. simpl. unfold Tptr; destruct Archi.ptr64; auto.
 (* symbols preserved *)
 - inv H0; econstructor; eauto.
 (* valid block *)
@@ -994,7 +1010,7 @@ Proof.
   rewrite dec_eq_false. auto.
   apply Mem.valid_not_valid_diff with m1; eauto with mem.
 (* readonly *)
-- inv H. eapply UNCHANGED; eauto.
+- inv H. eapply unchanged_on_readonly; eauto. 
 (* mem extends *)
 - inv H. inv H1. inv H7.
   assert (SZ: v2 = Vptrofs sz).
@@ -1045,38 +1061,43 @@ Qed.
 
 Inductive extcall_free_sem (ge: Senv.t):
               list val -> mem -> trace -> val -> mem -> Prop :=
-  | extcall_free_sem_intro: forall b lo sz m m',
+  | extcall_free_sem_ptr: forall b lo sz m m',
       Mem.load Mptr m b (Ptrofs.unsigned lo - size_chunk Mptr) = Some (Vptrofs sz) ->
       Ptrofs.unsigned sz > 0 ->
       Mem.free m b (Ptrofs.unsigned lo - size_chunk Mptr) (Ptrofs.unsigned lo + Ptrofs.unsigned sz) = Some m' ->
-      extcall_free_sem ge (Vptr b lo :: nil) m E0 Vundef m'.
+      extcall_free_sem ge (Vptr b lo :: nil) m E0 Vundef m'
+  | extcall_free_sem_null: forall m,
+      extcall_free_sem ge (Vnullptr :: nil) m E0 Vundef m.
 
 Lemma extcall_free_ok:
   extcall_properties extcall_free_sem
-                     (mksignature (Tptr :: nil) None cc_default).
+                     (mksignature (Tptr :: nil) Tvoid cc_default).
 Proof.
   constructor; intros.
 (* well typed *)
-- inv H. unfold proj_sig_res. simpl. auto.
+- inv H; simpl; auto.
 (* symbols preserved *)
 - inv H0; econstructor; eauto.
 (* valid block *)
-- inv H. eauto with mem.
+- inv H; eauto with mem.
 (* perms *)
-- inv H. eapply Mem.perm_free_3; eauto.
+- inv H; eauto using Mem.perm_free_3.
 (* readonly *)
-- inv H. eapply Mem.free_unchanged_on; eauto.
-  intros. red; intros. elim H3.
+- eapply unchanged_on_readonly; eauto. inv H.
++ eapply Mem.free_unchanged_on; eauto.
+  intros. red; intros. elim H6.
   apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem.
   eapply Mem.free_range_perm; eauto.
++ apply Mem.unchanged_on_refl.
 (* mem extends *)
-- inv H. inv H1. inv H8. inv H6.
+- inv H.
++ inv H1. inv H8. inv H6.
   exploit Mem.load_extends; eauto. intros [v' [A B]].
   assert (v' = Vptrofs sz).
   { unfold Vptrofs in *; destruct Archi.ptr64; inv B; auto. }
   subst v'.
   exploit Mem.free_parallel_extends; eauto. intros [m2' [C D]].
-  exists Vundef; exists m2'; intuition.
+  exists Vundef; exists m2'; intuition auto.
   econstructor; eauto.
   eapply Mem.free_unchanged_on; eauto.
   unfold loc_out_of_bounds; intros.
@@ -1084,8 +1105,14 @@ Proof.
   { apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem.
     eapply Mem.free_range_perm. eexact H4. eauto. }
   tauto.
++ inv H1. inv H5. replace v2 with Vnullptr.
+  exists Vundef; exists m1'; intuition auto.
+  constructor.
+  apply Mem.unchanged_on_refl.
+  unfold Vnullptr in *; destruct Archi.ptr64; inv H3; auto.
 (* mem inject *)
-- inv H0. inv H2. inv H7. inv H9.
+- inv H0.
++ inv H2. inv H7. inv H9.
   exploit Mem.load_inject; eauto. intros [v' [A B]].
   assert (v' = Vptrofs sz).
   { unfold Vptrofs in *; destruct Archi.ptr64; inv B; auto. }
@@ -1099,7 +1126,7 @@ Proof.
   intro EQ.
   exploit Mem.free_parallel_inject; eauto. intros (m2' & C & D).
   exists f, Vundef, m2'; split.
-  apply extcall_free_sem_intro with (sz := sz) (m' := m2').
+  apply extcall_free_sem_ptr with (sz := sz) (m' := m2').
     rewrite EQ. rewrite <- A. f_equal. omega.
     auto. auto.
     rewrite ! EQ. rewrite <- C. f_equal; omega.
@@ -1112,14 +1139,19 @@ Proof.
     apply P. omega.
   split. auto.
   red; intros. congruence.
++ inv H2. inv H6. replace v' with Vnullptr.
+  exists f, Vundef, m1'; intuition auto using Mem.unchanged_on_refl.
+  constructor.
+  red; intros; congruence.
+  unfold Vnullptr in *; destruct Archi.ptr64; inv H4; auto.
 (* trace length *)
 - inv H; simpl; omega.
 (* receptive *)
-- assert (t1 = t2). inv H; inv H0; auto. subst t2.
+- assert (t1 = t2) by (inv H; inv H0; auto). subst t2.
   exists vres1; exists m1; auto.
 (* determ *)
-- inv H; inv H0.
-  assert (EQ1: Vptrofs sz0 = Vptrofs sz) by congruence.
+- inv H; inv H0; try (unfold Vnullptr in *; destruct Archi.ptr64; discriminate).
++ assert (EQ1: Vptrofs sz0 = Vptrofs sz) by congruence.
   assert (EQ2: sz0 = sz).
   { unfold Vptrofs in EQ1; destruct Archi.ptr64 eqn:SF.
     rewrite <- (Ptrofs.of_int64_to_int64 SF sz0), <- (Ptrofs.of_int64_to_int64 SF sz). congruence.
@@ -1127,6 +1159,7 @@ Proof.
   }
   subst sz0.
   split. constructor. intuition congruence.
++ split. constructor. intuition auto.
 Qed.
 
 (** ** Semantics of [memcpy] operations. *)
@@ -1147,11 +1180,11 @@ Inductive extcall_memcpy_sem (sz al: Z) (ge: Senv.t):
 Lemma extcall_memcpy_ok:
   forall sz al,
   extcall_properties (extcall_memcpy_sem sz al)
-                     (mksignature (Tptr :: Tptr :: nil) None cc_default).
+                     (mksignature (Tptr :: Tptr :: nil) Tvoid cc_default).
 Proof.
   intros. constructor.
 - (* return type *)
-  intros. inv H. constructor.
+  intros. inv H. exact I.
 - (* change of globalenv *)
   intros. inv H0. econstructor; eauto.
 - (* valid blocks *)
@@ -1159,8 +1192,9 @@ Proof.
 - (* perms *)
   intros. inv H. eapply Mem.perm_storebytes_2; eauto.
 - (* readonly *)
-  intros. inv H. eapply Mem.storebytes_unchanged_on; eauto.
-  intros; red; intros. elim H8.
+  intros. inv H. eapply unchanged_on_readonly; eauto. 
+  eapply Mem.storebytes_unchanged_on; eauto.
+  intros; red; intros. elim H11.
   apply Mem.perm_cur_max. eapply Mem.storebytes_range_perm; eauto.
 - (* extensions *)
   intros. inv H.
@@ -1258,7 +1292,7 @@ Inductive extcall_annot_sem (text: string) (targs: list typ) (ge: Senv.t):
 Lemma extcall_annot_ok:
   forall text targs,
   extcall_properties (extcall_annot_sem text targs)
-                     (mksignature targs None cc_default).
+                     (mksignature targs Tvoid cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
@@ -1271,7 +1305,7 @@ Proof.
 (* perms *)
 - inv H; auto.
 (* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
 (* mem extends *)
 - inv H.
   exists Vundef; exists m1'; intuition.
@@ -1303,11 +1337,11 @@ Inductive extcall_annot_val_sem (text: string) (targ: typ) (ge: Senv.t):
 Lemma extcall_annot_val_ok:
   forall text targ,
   extcall_properties (extcall_annot_val_sem text targ)
-                     (mksignature (targ :: nil) (Some targ) cc_default).
+                     (mksignature (targ :: nil) targ cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
-- inv H. unfold proj_sig_res; simpl. eapply eventval_match_type; eauto.
+- inv H. eapply eventval_match_type; eauto.
 (* symbols *)
 - destruct H as (A & B & C). inv H0. econstructor; eauto.
   eapply eventval_match_preserved; eauto.
@@ -1316,7 +1350,7 @@ Proof.
 (* perms *)
 - inv H; auto.
 (* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
 (* mem extends *)
 - inv H. inv H1. inv H6.
   exists v2; exists m1'; intuition.
@@ -1347,7 +1381,7 @@ Inductive extcall_debug_sem (ge: Senv.t):
 Lemma extcall_debug_ok:
   forall targs,
   extcall_properties extcall_debug_sem
-                     (mksignature targs None cc_default).
+                     (mksignature targs Tvoid cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
@@ -1359,7 +1393,7 @@ Proof.
 (* perms *)
 - inv H; auto.
 (* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
 (* mem extends *)
 - inv H.
   exists Vundef; exists m1'; intuition.
@@ -1396,7 +1430,8 @@ Proof.
   intros. set (bsem := builtin_function_sem bf). constructor; intros.
 (* well typed *)
 - inv H.
-  specialize (bs_well_typed  _ bsem vargs). unfold val_opt_has_type, bsem; rewrite H0.
+  specialize (bs_well_typed  _ bsem vargs).
+  unfold val_opt_has_rettype, bsem; rewrite H0.
   auto.
 (* symbols *)
 - inv H0. econstructor; eauto.
@@ -1405,7 +1440,7 @@ Proof.
 (* perms *)
 - inv H; auto.
 (* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
 (* mem extends *)
 - inv H. fold bsem in H2. apply val_inject_list_lessdef in H1.
   specialize (bs_inject _ bsem _ _ _ H1).
@@ -1516,7 +1551,7 @@ Proof.
   apply extcall_debug_ok.
 Qed.
 
-Definition external_call_well_typed ef := ec_well_typed (external_call_spec ef).
+Definition external_call_well_typed_gen ef := ec_well_typed (external_call_spec ef).
 Definition external_call_symbols_preserved ef := ec_symbols_preserved (external_call_spec ef).
 Definition external_call_valid_block ef := ec_valid_block (external_call_spec ef).
 Definition external_call_max_perm ef := ec_max_perm (external_call_spec ef).
@@ -1527,6 +1562,16 @@ Definition external_call_trace_length ef := ec_trace_length (external_call_spec
 Definition external_call_receptive ef := ec_receptive (external_call_spec ef).
 Definition external_call_determ ef := ec_determ (external_call_spec ef).
 
+(** Corollary of [external_call_well_typed_gen]. *)
+
+Lemma external_call_well_typed:
+  forall ef ge vargs m1 t vres m2,
+  external_call ef ge vargs m1 t vres m2 ->
+  Val.has_type vres (proj_sig_res (ef_sig ef)).
+Proof.
+  intros. apply Val.has_proj_rettype. eapply external_call_well_typed_gen; eauto.
+Qed.
+
 (** Corollary of [external_call_valid_block]. *)
 
 Lemma external_call_nextblock:
diff --git a/common/Memdata.v b/common/Memdata.v
index 7144d72c..a09b90f5 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -44,6 +44,13 @@ Definition size_chunk (chunk: memory_chunk) : Z :=
   | Many64 => 8
   end.
 
+Definition largest_size_chunk := 8.
+
+Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8.
+Proof.
+  destruct chunk; simpl; omega.
+Qed.
+
 Lemma size_chunk_pos:
   forall chunk, size_chunk chunk > 0.
 Proof.
@@ -547,18 +554,26 @@ Proof.
   destruct v1; auto.
 Qed.
 
-Lemma decode_val_type:
+Lemma decode_val_rettype:
   forall chunk cl,
-  Val.has_type (decode_val chunk cl) (type_of_chunk chunk).
+  Val.has_rettype (decode_val chunk cl) (rettype_of_chunk chunk).
 Proof.
   intros. unfold decode_val.
   destruct (proj_bytes cl).
-  destruct chunk; simpl; auto.
-Local Opaque Val.load_result.
+- destruct chunk; simpl; rewrite ? Int.sign_ext_idem, ? Int.zero_ext_idem by omega; auto.
+- Local Opaque Val.load_result.
   destruct chunk; simpl;
   (exact I || apply Val.load_result_type || destruct Archi.ptr64; (exact I || apply Val.load_result_type)).
 Qed.
 
+Lemma decode_val_type:
+  forall chunk cl,
+  Val.has_type (decode_val chunk cl) (type_of_chunk chunk).
+Proof.
+  intros. rewrite <- proj_rettype_of_chunk.
+  apply Val.has_proj_rettype. apply decode_val_rettype.
+Qed.
+
 Lemma encode_val_int8_signed_unsigned:
   forall v, encode_val Mint8signed v = encode_val Mint8unsigned v.
 Proof.
@@ -607,11 +622,9 @@ Lemma decode_val_cast:
   | _ => True
   end.
 Proof.
-  unfold decode_val; intros; destruct chunk; auto; destruct (proj_bytes l); auto.
-  unfold Val.sign_ext. rewrite Int.sign_ext_idem; auto. omega.
-  unfold Val.zero_ext. rewrite Int.zero_ext_idem; auto. omega.
-  unfold Val.sign_ext. rewrite Int.sign_ext_idem; auto. omega.
-  unfold Val.zero_ext. rewrite Int.zero_ext_idem; auto. omega.
+  intros. 
+  assert (A: Val.has_rettype v (rettype_of_chunk chunk)) by apply decode_val_rettype.
+  destruct chunk; auto; simpl in A; destruct v; try contradiction; simpl; congruence.
 Qed.
 
 (** Pointers cannot be forged. *)
diff --git a/common/Memory.v b/common/Memory.v
index b68a5049..cd8a2001 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -38,6 +38,10 @@ Require Import Floats.
 Require Import Values.
 Require Export Memdata.
 Require Export Memtype.
+Require Import Lia.
+
+Definition default_notrap_load_value (chunk : memory_chunk) := Vundef.
+
 
 (* To avoid useless definitions of inductors in extracted code. *)
 Local Unset Elimination Schemes.
@@ -538,6 +542,48 @@ Proof.
   induction vl; simpl; intros. auto. rewrite IHvl. auto.
 Qed.
 
+Remark set_setN_swap_disjoint:
+  forall vl: list memval,
+  forall v: memval,
+  forall m : ZMap.t memval,
+  forall p pl: Z,
+    ~ (Intv.In p (pl, pl + Z.of_nat (length vl))) ->
+    (setN vl pl (ZMap.set p v m)) = (ZMap.set p v (setN vl pl m)).
+Proof.
+  induction vl; simpl; trivial.
+  intros.
+  unfold Intv.In in *; simpl in *.
+  rewrite ZMap.set_disjoint by lia.
+  apply IHvl.
+  lia.
+Qed.
+
+Lemma setN_swap_disjoint:
+  forall vl1 vl2: list memval,
+  forall m : ZMap.t memval,
+  forall p1 p2: Z,
+    Intv.disjoint (p1, p1 + Z.of_nat (length vl1))
+                  (p2, p2 + Z.of_nat (length vl2)) ->
+    (setN vl1 p1 (setN vl2 p2 m)) = (setN vl2 p2 (setN vl1 p1 m)).
+Proof.
+  induction vl1; simpl; trivial.
+  intros until p2. intro DISJOINT.
+  rewrite <- set_setN_swap_disjoint.
+  { rewrite IHvl1.
+    reflexivity.
+    unfold Intv.disjoint, Intv.In in *.
+    simpl in *.
+    intro.
+    intro BOUNDS.
+    apply DISJOINT.
+    lia.
+  }
+  unfold Intv.disjoint, Intv.In in *.
+  simpl in *.
+  apply DISJOINT.
+  lia.
+Qed.
+    
 (** [store chunk m b ofs v] perform a write in memory state [m].
   Value [v] is stored at address [b] and offset [ofs].
   Return the updated memory store, or [None] if the accessed bytes
@@ -682,6 +728,15 @@ Proof.
   apply decode_val_type.
 Qed.
 
+Theorem load_rettype:
+  forall m chunk b ofs v,
+  load chunk m b ofs = Some v ->
+  Val.has_rettype v (rettype_of_chunk chunk).
+Proof.
+  intros. exploit load_result; eauto; intros. rewrite H0.
+  apply decode_val_rettype.
+Qed.
+
 Theorem load_cast:
   forall m chunk b ofs v,
   load chunk m b ofs = Some v ->
@@ -1169,6 +1224,106 @@ Local Hint Resolve store_valid_block_1 store_valid_block_2: mem.
 Local Hint Resolve store_valid_access_1 store_valid_access_2
              store_valid_access_3: mem.
 
+Remark mem_same_proof_irr :
+  forall m1 m2 : mem,
+    (mem_contents m1) = (mem_contents m2) ->
+    (mem_access m1) = (mem_access m2) ->
+    (nextblock m1) = (nextblock m2) ->
+    m1 = m2.
+Proof.
+  destruct m1 as [contents1 access1 nextblock1 access_max1 nextblock_noaccess1 default1].
+  destruct m2 as [contents2 access2 nextblock2 access_max2 nextblock_noaccess2 default2].
+  simpl.
+  intros.
+  subst contents2.
+  subst access2.
+  subst nextblock2.
+  f_equal; apply proof_irr.
+Qed.
+
+Theorem store_store_other:
+  forall chunk b ofs v chunk' b' ofs' v' m0 m1 m1',
+     b' <> b
+  \/ ofs' + size_chunk chunk' <= ofs
+  \/ ofs  + size_chunk chunk  <= ofs' ->
+     store chunk m0 b ofs v = Some m1 ->
+     store chunk' m0 b' ofs' v' = Some m1' ->
+     store chunk' m1 b' ofs' v' =
+     store chunk m1' b ofs v.
+Proof.
+  intros until m1'.
+  intro DISJOINT.
+  intros W0 W0'.
+  assert (valid_access m1' chunk b ofs Writable) as WRITEABLE1' by eauto with mem.
+  (* {
+    eapply store_valid_access_1.
+    apply W0'.
+    eapply store_valid_access_3.
+    apply W0.
+  } *)
+  assert (valid_access m1 chunk' b' ofs' Writable) as WRITABLE1 by eauto with mem.
+  (* {
+    eapply store_valid_access_1.
+    apply W0.
+    eapply store_valid_access_3.
+    apply W0'.
+  } *)
+  unfold store in *.
+  destruct (valid_access_dec m0 chunk b ofs Writable).
+  2: congruence.
+  destruct (valid_access_dec m1 chunk' b' ofs' Writable).
+  2: contradiction.
+  destruct (valid_access_dec m0 chunk' b' ofs' Writable).
+  2: congruence.
+  destruct (valid_access_dec m1' chunk b ofs Writable).
+  2: contradiction.
+  f_equal.
+  inv W0; simpl in *.
+  inv W0'; simpl in *.
+  apply mem_same_proof_irr; simpl; trivial.
+  destruct (eq_block b b').
+  { subst b'.
+    rewrite PMap.gss.
+    rewrite PMap.gss.
+    rewrite PMap.set2.
+    rewrite PMap.set2.
+    f_equal.
+    apply setN_swap_disjoint.
+    unfold Intv.disjoint.
+    rewrite encode_val_length.
+    rewrite <- size_chunk_conv.
+    rewrite encode_val_length.
+    rewrite <- size_chunk_conv.
+    unfold Intv.In; simpl.
+    intros.
+    destruct DISJOINT. contradiction.
+    lia.
+  }
+  {
+    rewrite PMap.set_disjoint by congruence.
+    rewrite PMap.gso by congruence.
+    rewrite PMap.gso by congruence.
+    reflexivity.
+  }
+Qed.
+    
+Section STOREV.
+Variable chunk: memory_chunk.
+Variable m1: mem.
+Variables addr v: val.
+Variable m2: mem.
+Hypothesis STORE: storev chunk m1 addr v = Some m2.
+
+
+Theorem loadv_storev_same:
+  loadv chunk m2 addr = Some (Val.load_result chunk v).
+Proof.
+  destruct addr; simpl in *; try discriminate.
+  eapply load_store_same.
+  eassumption.
+Qed.
+End STOREV.
+
 Lemma load_store_overlap:
   forall chunk m1 b ofs v m2 chunk' ofs' v',
   store chunk m1 b ofs v = Some m2 ->
diff --git a/common/Memtype.v b/common/Memtype.v
index 53775d8b..ca9c6f1f 100644
--- a/common/Memtype.v
+++ b/common/Memtype.v
@@ -300,6 +300,11 @@ Axiom load_type:
   load chunk m b ofs = Some v ->
   Val.has_type v (type_of_chunk chunk).
 
+Axiom load_rettype:
+  forall m chunk b ofs v,
+  load chunk m b ofs = Some v ->
+  Val.has_rettype v (rettype_of_chunk chunk).
+
 (** For a small integer or float type, the value returned by [load]
   is invariant under the corresponding cast. *)
 Axiom load_cast:
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index e477957a..3f718428 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -27,6 +27,14 @@ let name_of_type = function
   | Tany32 -> "any32"
   | Tany64 -> "any64"
 
+let name_of_rettype = function
+  | Tret t -> name_of_type t
+  | Tvoid -> "void"
+  | Tint8signed -> "int8s"
+  | Tint8unsigned -> "int8u"
+  | Tint16signed -> "int16s"
+  | Tint16unsigned -> "int16u"
+
 let name_of_chunk = function
   | Mint8signed -> "int8s"
   | Mint8unsigned -> "int8u"
@@ -90,3 +98,7 @@ let rec print_builtin_res px oc = function
       fprintf oc "splitlong(%a, %a)"
                  (print_builtin_res px) hi (print_builtin_res px) lo
 
+let print_trapping_mode oc = function
+  | TRAP -> ()
+  | NOTRAP -> output_string oc " [notrap]"
+                
diff --git a/common/Sections.ml b/common/Sections.ml
index 30be9e69..839128a5 100644
--- a/common/Sections.ml
+++ b/common/Sections.ml
@@ -160,9 +160,22 @@ let gcc_section name readonly exec =
     sec_writable = not readonly; sec_executable = exec;
     sec_access = Access_default }
 
+(* Check and extract whether a section was given as attribute *)
+
+let get_attr_section loc attr =
+  match Cutil.find_custom_attributes ["section"; "__section__"] attr with
+  | [] -> None
+  | [[C.AString name]] -> Some name
+  | [[_]] ->
+    Diagnostics.error loc "'section' attribute requires a string";
+    None
+  | _ ->
+    Diagnostics.error loc "ambiguous 'section' attribute";
+    None
+
 (* Determine section for a variable definition *)
 
-let for_variable env id ty init =
+let for_variable env loc id ty init =
   let attr = Cutil.attributes_of_type env ty in
   let readonly = List.mem C.AConst attr && not(List.mem C.AVolatile attr) in
   let si =
@@ -170,11 +183,11 @@ let for_variable env id ty init =
       (* 1- Section explicitly associated with #use_section *)
       Hashtbl.find use_section_table id
     with Not_found ->
-      match Cutil.find_custom_attributes ["section"; "__section__"] attr with
-      | [[C.AString name]] ->
+      match get_attr_section loc attr with
+      | Some name ->
         (* 2- Section given as an attribute, gcc-style *)
         gcc_section name readonly false
-      | _ ->
+      | None ->
         (* 3- Default section appropriate for size and const-ness *)
         let size =
           match Cutil.sizeof env ty with Some sz -> sz | None -> max_int in
@@ -190,17 +203,17 @@ let for_variable env id ty init =
 
 (* Determine sections for a function definition *)
 
-let for_function env id attr =
+let for_function env loc id attr =
   let si_code =
     try
       (* 1- Section explicitly associated with #use_section *)
       Hashtbl.find use_section_table id
     with Not_found ->
-      match Cutil.find_custom_attributes ["section"; "__section__"] attr with
-      | [[C.AString name]] ->
+      match get_attr_section loc attr with
+      | Some name ->
           (* 2- Section given as an attribute, gcc-style *)
           gcc_section name true true
-      | _ ->
+      | None ->
           (* 3- Default section *)
           try
             Hashtbl.find current_section_table "CODE"
diff --git a/common/Sections.mli b/common/Sections.mli
index bc97814d..d9fd9239 100644
--- a/common/Sections.mli
+++ b/common/Sections.mli
@@ -46,7 +46,7 @@ val define_section:
          -> ?writable:bool -> ?executable:bool -> ?access:access_mode -> unit -> unit
 val use_section_for: AST.ident -> string -> bool
 
-val for_variable: Env.t -> AST.ident -> C.typ -> bool ->
+val for_variable: Env.t -> C.location -> AST.ident -> C.typ -> bool ->
                                           section_name * access_mode
-val for_function: Env.t -> AST.ident -> C.attributes -> section_name list
+val for_function: Env.t -> C.location -> AST.ident -> C.attributes -> section_name list
 val for_stringlit: unit -> section_name
diff --git a/common/Values.v b/common/Values.v
index 59e6388f..6401ba52 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -149,6 +149,23 @@ Proof.
   auto.
 Defined.
 
+Definition has_rettype (v: val) (r: rettype) : Prop :=
+  match r, v with
+  | Tret t, _ => has_type v t
+  | Tint8signed, Vint n => n = Int.sign_ext 8 n
+  | Tint8unsigned, Vint n => n = Int.zero_ext 8 n
+  | Tint16signed, Vint n => n = Int.sign_ext 16 n
+  | Tint16unsigned, Vint n => n = Int.zero_ext 16 n
+  | _, Vundef => True
+  | _, _ => False
+  end.
+
+Lemma has_proj_rettype: forall v r,
+  has_rettype v r -> has_type v (proj_rettype r).
+Proof.
+  destruct r; simpl; intros; auto; destruct v; try contradiction; exact I.
+Qed.
+
 (** Truth values.  Non-zero integers are treated as [True].
   The integer 0 (also used to represent the null pointer) is [False].
   Other values are neither true nor false. *)
@@ -783,6 +800,17 @@ Definition rolml (v: val) (amount: int) (mask: int64): val :=
   | _ => Vundef
   end.
 
+Definition zero_ext_l (nbits: Z) (v: val) : val :=
+  match v with
+  | Vlong n => Vlong(Int64.zero_ext nbits n)
+  | _ => Vundef
+  end.
+
+Definition sign_ext_l (nbits: Z) (v: val) : val :=
+  match v with
+  | Vlong n => Vlong(Int64.sign_ext nbits n)
+  | _ => Vundef
+  end.
 
 (** Comparisons *)
 
@@ -992,10 +1020,24 @@ Definition load_result (chunk: memory_chunk) (v: val) :=
   | _, _ => Vundef
   end.
 
+Lemma load_result_rettype:
+  forall chunk v, has_rettype (load_result chunk v) (rettype_of_chunk chunk).
+Proof.
+  intros. unfold has_rettype; destruct chunk; destruct v; simpl; auto.
+- rewrite Int.sign_ext_idem by omega; auto.
+- rewrite Int.zero_ext_idem by omega; auto.
+- rewrite Int.sign_ext_idem by omega; auto.
+- rewrite Int.zero_ext_idem by omega; auto.
+- destruct Archi.ptr64 eqn:SF; simpl; auto.
+- destruct Archi.ptr64 eqn:SF; simpl; auto.
+- destruct Archi.ptr64 eqn:SF; simpl; auto.
+Qed.
+
 Lemma load_result_type:
   forall chunk v, has_type (load_result chunk v) (type_of_chunk chunk).
 Proof.
-  intros. unfold has_type; destruct chunk; destruct v; simpl; auto; destruct Archi.ptr64 eqn:SF; simpl; auto.
+  intros. rewrite <- proj_rettype_of_chunk. apply has_proj_rettype.
+  apply load_result_rettype.
 Qed.
 
 Lemma load_result_same:
@@ -1428,6 +1470,60 @@ Proof.
   assert (32 < Int.max_unsigned) by reflexivity. omega.
 Qed.
 
+Theorem shrx1_shr:
+  forall x z,
+  shrx x (Vint (Int.repr 1)) = Some z ->
+  z = shr (add x (shru x (Vint (Int.repr 31)))) (Vint (Int.repr 1)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  change (Int.ltu (Int.repr 1) (Int.repr 31)) with true in H; simpl in H.
+  inversion_clear H.
+  simpl.
+  change (Int.ltu (Int.repr 31) Int.iwordsize) with true; simpl.
+  change (Int.ltu (Int.repr 1) Int.iwordsize) with true; simpl.
+  f_equal.
+  rewrite Int.shrx1_shr by reflexivity.
+  reflexivity.
+Qed.
+
+Theorem shrx_shr_3:
+  forall n x z,
+  shrx x (Vint n) = Some z ->
+  z = (if Int.eq n Int.zero then x else
+         if Int.eq n Int.one
+         then shr (add x (shru x (Vint (Int.repr 31)))) (Vint Int.one)
+         else shr (add x (shru (shr x (Vint (Int.repr 31)))
+                    (Vint (Int.sub (Int.repr 32) n))))
+             (Vint n)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  destruct (Int.ltu n (Int.repr 31)) eqn:LT; inv H.
+  exploit Int.ltu_inv; eauto. change (Int.unsigned (Int.repr 31)) with 31; intros LT'.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. unfold Int.shrx. rewrite Int.shl_zero. unfold Int.divs. change (Int.signed Int.one) with 1.
+  rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
+- predSpec Int.eq Int.eq_spec n Int.one.
+  * subst n. simpl.
+    change (Int.ltu (Int.repr 31) Int.iwordsize) with true. simpl.
+    change (Int.ltu Int.one Int.iwordsize) with true. simpl.
+    f_equal.
+    apply Int.shrx1_shr.
+    reflexivity.
+  * clear H0.
+    simpl. change (Int.ltu (Int.repr 31) Int.iwordsize) with true. simpl.
+    replace (Int.ltu (Int.sub (Int.repr 32) n) Int.iwordsize) with true. simpl.
+    replace (Int.ltu n Int.iwordsize) with true.
+    f_equal; apply Int.shrx_shr_2; assumption.
+    symmetry; apply zlt_true. change (Int.unsigned n < 32); omega.
+    symmetry; apply zlt_true. unfold Int.sub. change (Int.unsigned (Int.repr 32)) with 32.
+    assert (Int.unsigned n <> 0).
+    { red; intros; elim H.
+      rewrite <- (Int.repr_unsigned n), H0. auto. }
+    rewrite Int.unsigned_repr.
+    change (Int.unsigned Int.iwordsize) with 32; omega.
+    assert (32 < Int.max_unsigned) by reflexivity. omega.
+Qed.
+
 Theorem or_rolm:
   forall x n m1 m2,
   or (rolm x n m1) (rolm x n m2) = rolm x n (Int.or m1 m2).
@@ -1687,6 +1783,58 @@ Proof.
   assert (64 < Int.max_unsigned) by reflexivity. omega.
 Qed.
 
+Theorem shrxl1_shrl:
+  forall x z,
+  shrxl x (Vint (Int.repr 1)) = Some z ->
+  z = shrl (addl x (shrlu x (Vint (Int.repr 63)))) (Vint (Int.repr 1)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  change (Int.ltu (Int.repr 1) (Int.repr 63)) with true in H; simpl in H.
+  inversion_clear H.
+  simpl.
+  change (Int.ltu (Int.repr 63) Int64.iwordsize') with true; simpl.
+  change (Int.ltu (Int.repr 1) Int64.iwordsize') with true; simpl.
+  f_equal.
+  rewrite Int64.shrx'1_shr' by reflexivity.
+  reflexivity.
+Qed.
+
+Theorem shrxl_shrl_3:
+  forall n x z,
+  shrxl x (Vint n) = Some z ->
+  z = (if Int.eq n Int.zero then x else
+         if Int.eq n Int.one
+         then shrl (addl x (shrlu x (Vint (Int.repr 63)))) (Vint Int.one)
+         else shrl (addl x (shrlu (shrl x (Vint (Int.repr 63)))
+                    (Vint (Int.sub (Int.repr 64) n))))
+             (Vint n)).
+Proof.
+  intros. destruct x; simpl in H; try discriminate.
+  destruct (Int.ltu n (Int.repr 63)) eqn:LT; inv H.
+  exploit Int.ltu_inv; eauto. change (Int.unsigned (Int.repr 63)) with 63; intros LT'.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. unfold Int64.shrx'. rewrite Int64.shl'_zero. unfold Int64.divs. change (Int64.signed Int64.one) with 1.
+  rewrite Z.quot_1_r. rewrite Int64.repr_signed; auto.
+- predSpec Int.eq Int.eq_spec n Int.one.
+  * subst n. simpl.
+    change (Int.ltu (Int.repr 63) Int64.iwordsize') with true. simpl.
+    change (Int.ltu Int.one Int64.iwordsize') with true. simpl.
+    f_equal.
+    apply Int64.shrx'1_shr'.
+    reflexivity.
+  * clear H0.
+simpl. change (Int.ltu (Int.repr 63) Int64.iwordsize') with true. simpl.
+  replace (Int.ltu (Int.sub (Int.repr 64) n) Int64.iwordsize') with true. simpl.
+  replace (Int.ltu n Int64.iwordsize') with true.
+  f_equal; apply Int64.shrx'_shr_2; assumption.
+  symmetry; apply zlt_true. change (Int.unsigned n < 64); omega.
+  symmetry; apply zlt_true. unfold Int.sub. change (Int.unsigned (Int.repr 64)) with 64.
+  assert (Int.unsigned n <> 0). { red; intros; elim H. rewrite <- (Int.repr_unsigned n), H0. auto. }
+  rewrite Int.unsigned_repr.
+  change (Int.unsigned Int64.iwordsize') with 64; omega.
+  assert (64 < Int.max_unsigned) by reflexivity. omega.
+Qed.
+
 Theorem negate_cmp_bool:
   forall c x y, cmp_bool (negate_comparison c) x y = option_map negb (cmp_bool c x y).
 Proof.
@@ -1899,10 +2047,18 @@ Qed.
 
 Lemma zero_ext_and:
   forall n v,
-  0 < n < Int.zwordsize ->
+  0 <= n ->
   Val.zero_ext n v = Val.and v (Vint (Int.repr (two_p n - 1))).
 Proof.
-  intros. destruct v; simpl; auto. decEq. apply Int.zero_ext_and; auto. omega.
+  intros. destruct v; simpl; auto. decEq. apply Int.zero_ext_and; auto.
+Qed.
+
+Lemma zero_ext_andl:
+  forall n v,
+  0 <= n ->
+  Val.zero_ext_l n v = Val.andl v (Vlong (Int64.repr (two_p n - 1))).
+Proof.
+  intros. destruct v; simpl; auto. decEq. apply Int64.zero_ext_and; auto.
 Qed.
 
 Lemma rolm_lt_zero:
diff --git a/config_aarch64.sh b/config_aarch64.sh
new file mode 100755
index 00000000..ded267bf
--- /dev/null
+++ b/config_aarch64.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh aarch64-linux --toolprefix aarch64-linux-gnu- "$@"
diff --git a/config_arm.sh b/config_arm.sh
new file mode 100755
index 00000000..1861e029
--- /dev/null
+++ b/config_arm.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh arm-linux --toolprefix arm-linux-gnueabi- "$@"
diff --git a/config_armhf.sh b/config_armhf.sh
new file mode 100755
index 00000000..8a1302bd
--- /dev/null
+++ b/config_armhf.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh arm-eabihf --toolprefix arm-linux-gnueabihf- "$@"
diff --git a/config_ia32.sh b/config_ia32.sh
new file mode 100755
index 00000000..b40f2b39
--- /dev/null
+++ b/config_ia32.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh ia32-linux "$@"
diff --git a/config_k1c.sh b/config_k1c.sh
new file mode 100755
index 00000000..20408397
--- /dev/null
+++ b/config_k1c.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh k1c-cos "$@"
diff --git a/config_ppc.sh b/config_ppc.sh
new file mode 100755
index 00000000..d597cda5
--- /dev/null
+++ b/config_ppc.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh ppc-linux --toolprefix powerpc-linux-gnu- "$@"
diff --git a/config_ppc64.sh b/config_ppc64.sh
new file mode 100755
index 00000000..df31c18f
--- /dev/null
+++ b/config_ppc64.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh ppc64-linux --toolprefix powerpc64-linux-gnu- "$@"
diff --git a/config_rv32.sh b/config_rv32.sh
new file mode 100755
index 00000000..a5a5cf1c
--- /dev/null
+++ b/config_rv32.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh rv32-linux --toolprefix riscv64-linux-gnu- "$@"
diff --git a/config_rv64.sh b/config_rv64.sh
new file mode 100755
index 00000000..0698c2ff
--- /dev/null
+++ b/config_rv64.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh rv64-linux --toolprefix riscv64-linux-gnu- "$@"
diff --git a/config_simple.sh b/config_simple.sh
new file mode 100755
index 00000000..e2d3844c
--- /dev/null
+++ b/config_simple.sh
@@ -0,0 +1,11 @@
+arch=$1
+shift
+version=`git rev-parse --short HEAD`
+branch=`git rev-parse --abbrev-ref HEAD`
+date=`date -I`
+
+if test "x$CCOMP_INSTALL_PREFIX" = "x" ;
+then CCOMP_INSTALL_PREFIX=/opt/CompCert ;
+fi
+
+./configure --prefix ${CCOMP_INSTALL_PREFIX}/${branch}/${date}_${version}/$arch "$@" $arch
diff --git a/config_x86_64.sh b/config_x86_64.sh
new file mode 100755
index 00000000..b18ec95b
--- /dev/null
+++ b/config_x86_64.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh x86_64-linux "$@"
diff --git a/configure b/configure
index 626bf3aa..f790281c 100755
--- a/configure
+++ b/configure
@@ -57,10 +57,12 @@ Supported targets:
   rv64-linux           (RISC-V 64 bits, Linux)
   k1c-mbr              (Kalray K1c, bare runtime)
   k1c-cos              (Kalray K1c, ClusterOS)
+  aarch64-linux        (AArch64, i.e. ARMv8 in 64-bit mode, Linux)
   manual               (edit configuration file by hand)
 
 For x86 targets, the "x86_32-" prefix can also be written "ia32-" or "i386-".
 For x86 targets, the "x86_64-" prefix can also be written "amd64-".
+For AArch64 targets, the "aarch64-" prefix can also be written "arm64-".
 
 For PowerPC targets, the "ppc-" prefix can be refined into:
   ppc64-               PowerPC 64 bits
@@ -179,6 +181,8 @@ case "$target" in
       arch="riscV"; model="64"; endianness="little"; bitsize=64;;
   k1c-*)
       arch="mppa_k1c"; model="64"; endianness="little"; bitsize=64;;
+  aarch64-*|arm64-*)
+      arch="aarch64"; model="default"; endianness="little"; bitsize=64;;
   manual)
       ;;
   "")
@@ -453,8 +457,8 @@ if test "$arch" = "mppa_k1c"; then
   fi
   osupper=`echo $os|tr a-z A-Z`
   k1base="k1-$os"
-  casm="$k1base-gcc"
-  casm_options="$model_options -c"
+  casm="k1-elf-as"
+  casm_options="$model_options"
   cc="$k1base-gcc $model_options"
   clinker="$k1base-gcc"
   bindir="$HOME/.usr/bin"
@@ -466,6 +470,29 @@ if test "$arch" = "mppa_k1c"; then
   system="linux"
 fi
 
+#
+# AArch64 (ARMv8 64 bits) Target Configuration
+#
+if test "$arch" = "aarch64"; then
+  case "$target" in
+    linux)
+        abi="standard"
+        casm="${toolprefix}gcc"
+        casm_options="-c"
+        cc="${toolprefix}gcc"
+        clinker="${toolprefix}gcc"
+        clinker_options=""
+        cprepro="${toolprefix}gcc"
+        cprepro_options="-std=c99 -U__GNUC__ -E"
+        libmath="-lm"
+        system="linux";;
+    *)
+        echo "Error: invalid eabi/system '$target' for architecture AArch64." 1>&2
+        echo "$usage" 1>&2
+        exit 2;;
+  esac
+fi
+
 
 #
 # Finalize Target Configuration
@@ -541,14 +568,14 @@ missingtools=false
 echo "Testing Coq... " | tr -d '\n'
 coq_ver=$(${COQBIN}coqc -v 2>/dev/null | sed -n -e 's/The Coq Proof Assistant, version \([^ ]*\).*$/\1/p')
 case "$coq_ver" in
-  8.7.0|8.7.1|8.7.2|8.8.0|8.8.1|8.8.2|8.9.0|8.9.1|8.10)
+  8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0)
         echo "version $coq_ver -- good!";;
   ?*)
         echo "version $coq_ver -- UNSUPPORTED"
         if $ignore_coq_version; then
             echo "Warning: this version of Coq is unsupported, proceed at your own risks."
         else
-            echo "Error: CompCert requires one of the following Coq versions: 8.10, 8.9.1, 8.9.0, 8.8.2, 8.8.1, 8.8.0, 8.7.2, 8.7.1, 8.7.0"
+            echo "Error: CompCert requires one of the following Coq versions: 8.11.0, 8.10.2, 8.10.1, 8.10.0, 8.9.1, 8.9.0, 8.8.2, 8.8.1, 8.8.0"
             missingtools=true
         fi;;
   "")
@@ -560,24 +587,19 @@ esac
 echo "Testing OCaml... " | tr -d '\n'
 ocaml_ver=`ocamlopt -version 2>/dev/null`
 case "$ocaml_ver" in
-  4.00.*|4.01.*)
+  4.00.*|4.01.*| 4.02.*|4.03.*|4.04.*)
         echo "version $ocaml_ver -- UNSUPPORTED"
-        echo "Error: CompCert requires OCaml version 4.02 or later."
+        echo "Error: CompCert requires OCaml version 4.05 or later."
         missingtools=true;;
-  4.02.*|4.03.*|4.04.*)
-      echo "version $ocaml_ver -- good!"
-      echo "WARNING: some Intel processors of the Skylake and Kaby Lake generations"
-      echo "have a hardware bug that can be triggered by this version of OCaml."
-      echo "To avoid this risk, it is recommended to use OCaml 4.05 or later.";;
-  4.0*)
+  4.*)
         echo "version $ocaml_ver -- good!";;
   ?.*)
         echo "version $ocaml_ver -- UNSUPPORTED"
-        echo "Error: CompCert requires OCaml version 4.02 or later."
+        echo "Error: CompCert requires OCaml version 4.05 or later."
         missingtools=true;;
   *)
         echo "NOT FOUND"
-        echo "Error: make sure OCaml version 4.02 or later is installed."
+        echo "Error: make sure OCaml version 4.05 or later is installed."
         missingtools=true;;
 esac
 
@@ -598,8 +620,8 @@ case "$menhir_ver" in
   20[0-9][0-9][0-9][0-9][0-9][0-9])
       if test "$menhir_ver" -ge $MENHIR_REQUIRED; then
           echo "version $menhir_ver -- good!"
-          menhir_include_dir=$(menhir --suggest-menhirLib | tr -d '\r' | tr '\\' '/')
-          if test -z "$menhir_include_dir"; then
+          menhir_dir=$(menhir --suggest-menhirLib | tr -d '\r' | tr '\\' '/')
+          if test -z "$menhir_dir"; then
               echo "Error: cannot determine the location of the Menhir API library."
               echo "This can be due to an incorrect Menhir package."
               echo "Consider using the OPAM package for Menhir."
@@ -693,7 +715,7 @@ MANDIR=$sharedir/man
 SHAREDIR=$sharedir
 COQDEVDIR=$coqdevdir
 OCAML_OPT_COMP=$ocaml_opt_comp
-MENHIR_INCLUDES=-I "$menhir_include_dir"
+MENHIR_DIR=$menhir_dir
 COMPFLAGS=-bin-annot
 EOF
 
@@ -728,6 +750,8 @@ cat >> Makefile.config <<'EOF'
 # ARCH=powerpc
 # ARCH=arm
 # ARCH=x86
+# ARCH=riscV
+# ARCH=aarch6
 ARCH=
 
 # Hardware variant
@@ -741,23 +765,24 @@ ARCH=
 # MODEL=armv7m      # for ARM
 # MODEL=32sse2      # for x86 in 32-bit mode
 # MODEL=64          # for x86 in 64-bit mode
+# MODEL=default     # for others
 MODEL=
 
 # Target ABI
 # ABI=eabi          # for PowerPC / Linux and other SVR4 or EABI platforms
 # ABI=eabi          # for ARM
 # ABI=hardfloat     # for ARM
-# ABI=standard      # for x86
+# ABI=standard      # for others
 ABI=
 
 # Target bit width
-# BITSIZE=64        # for x86 in 64-bit mode
+# BITSIZE=64        # for x86 in 64-bit mode, RiscV in 64-bit mode, AArch64
 # BITSIZE=32        # otherwise
 BITSIZE=
 
 # Target endianness
 # ENDIANNESS=big     # for ARM or PowerPC
-# ENDIANNESS=little  # for ARM or x86
+# ENDIANNESS=little  # for ARM or x86 or RiscV or AArch64
 ENDIANNESS=
 
 # Target operating system and development environment
@@ -766,7 +791,7 @@ ENDIANNESS=
 # SYSTEM=linux
 # SYSTEM=diab
 #
-# Possible choices for ARM:
+# Possible choices for ARM, AArch64, RiscV:
 # SYSTEM=linux
 #
 # Possible choices for x86:
@@ -820,7 +845,7 @@ EXECUTE=k1-cluster --syscall=libstd_scalls.so --
 CFLAGS= -D __K1C_COS__
 SIMU=k1-cluster -- 
 BACKENDLIB=Machblock.v Machblockgen.v Machblockgenproof.v\\
-    Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asmvliw.v\\
+    Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asmvliw.v Asmblockprops.v\\
     ForwardSimulationBlock.v PostpassScheduling.v PostpassSchedulingproof.v\\
     Asmblockdeps.v DecBoolOps.v Chunks.v Peephole.v ExtValues.v ExtFloats.v\\
     AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v\\
@@ -881,6 +906,4 @@ cat <<EOF
     Coq development will not be installed
 EOF
 fi
-
 fi
-
diff --git a/coq b/coq
index 0b04a8c7..fcf744fd 100755
--- a/coq
+++ b/coq
@@ -12,4 +12,4 @@ make -q ${1}o || {
    done)
 }
 
-"${COQBIN}coqide" $INCLUDES $1 && make ${1}o
+"${COQBIN}coqide" -async-proofs off $INCLUDES $1 && make ${1}o
diff --git a/cparser/Ceval.ml b/cparser/Ceval.ml
index 58dea5f4..ecf83779 100644
--- a/cparser/Ceval.ml
+++ b/cparser/Ceval.ml
@@ -271,7 +271,7 @@ let constant_expr env ty e =
   try
     match unroll env ty, cast env ty (expr env e) with
     | TInt(ik, _), I n -> Some(CInt(n, ik, ""))
-    | TPtr(_, _), I n -> Some(CInt(n, IInt, ""))
+    | TPtr(_, _), I n -> Some(CInt(n, ptr_t_ikind (), ""))
     | (TArray(_, _, _) | TPtr(_, _)), S s -> Some(CStr s)
     | (TArray(_, _, _) | TPtr(_, _)), WS s -> Some(CWStr s)
     | TEnum(_, _), I n -> Some(CInt(n, enum_ikind, ""))
diff --git a/cparser/Cutil.ml b/cparser/Cutil.ml
index 7a2f4828..3467c092 100644
--- a/cparser/Cutil.ml
+++ b/cparser/Cutil.ml
@@ -836,6 +836,12 @@ let is_anonymous_composite = function
   | TUnion (id,_) -> id.C.name = ""
   | _ -> false
 
+let is_anonymous_type = function
+  | TEnum (id,_)
+  | TStruct (id,_)
+  | TUnion (id,_) -> id.C.name = ""
+  | _ -> false
+
 let is_function_pointer_type env t =
   match unroll env t with
   | TPtr (ty, _) -> is_function_type env ty
diff --git a/cparser/Cutil.mli b/cparser/Cutil.mli
index f6c4627d..2ddee78c 100644
--- a/cparser/Cutil.mli
+++ b/cparser/Cutil.mli
@@ -174,6 +174,8 @@ val is_function_pointer_type : Env.t -> typ -> bool
   (* Is type a pointer to function type? *)
 val is_anonymous_composite : typ -> bool
   (* Is type an anonymous composite? *)
+val is_anonymous_type : typ -> bool
+  (* Is the type an anonymous composite or enum *)
 val is_qualified_array : typ -> bool
   (* Does the type contain a qualified array type (e.g. int[const 5])? *)
 val pointer_arithmetic_ok : Env.t -> typ -> bool
diff --git a/cparser/Diagnostics.ml b/cparser/Diagnostics.ml
index 012e4b66..7957375c 100644
--- a/cparser/Diagnostics.ml
+++ b/cparser/Diagnostics.ml
@@ -104,30 +104,46 @@ type warning_type =
   | Reduced_alignment
   | Non_linear_cond_expr
 
+(* List of all warnings with default status.
+   "true" means the warning is active by default.
+   "false" means the warning is off by default. *)
+let all_warnings =
+  [ (Unnamed, true);
+    (Unknown_attribute, true);
+    (Zero_length_array, false);
+    (Celeven_extension, false);
+    (Gnu_empty_struct, true);
+    (Missing_declarations, true);
+    (Constant_conversion, true);
+    (Int_conversion, true);
+    (Varargs, true);
+    (Implicit_function_declaration, true);
+    (Pointer_type_mismatch, true);
+    (Compare_distinct_pointer_types, true);
+    (Implicit_int, true);
+    (Main_return_type, true);
+    (Invalid_noreturn, true);
+    (Return_type, true);
+    (Literal_range, true);
+    (Unknown_pragmas, false);
+    (CompCert_conformance, false);
+    (Inline_asm_sdump, true);
+    (Unused_variable, false);
+    (Unused_parameter, false);
+    (Wrong_ais_parameter, true);
+    (Unused_ais_parameter, true);
+    (Ignored_attributes, true);
+    (Extern_after_definition, true);
+    (Static_in_inline, true);
+    (Flexible_array_extensions, false);
+    (Tentative_incomplete_static, false);
+    (Reduced_alignment, false);
+    (Non_linear_cond_expr, false);
+  ]
+
 (* List of active warnings *)
-let active_warnings: warning_type list ref = ref [
-  Unnamed;
-  Unknown_attribute;
-  Gnu_empty_struct;
-  Missing_declarations;
-  Constant_conversion;
-  Int_conversion;
-  Varargs;
-  Implicit_function_declaration;
-  Pointer_type_mismatch;
-  Compare_distinct_pointer_types;
-  Implicit_int;
-  Main_return_type;
-  Invalid_noreturn;
-  Return_type;
-  Literal_range;
-  Inline_asm_sdump;
-  Wrong_ais_parameter;
-  Unused_ais_parameter;
-  Ignored_attributes;
-  Extern_after_definition;
-  Static_in_inline;
-]
+let active_warnings: warning_type list ref =
+  ref (List.map fst (List.filter snd all_warnings))
 
 (* List of errors treated as warning *)
 let error_warnings: warning_type list ref = ref []
@@ -188,76 +204,14 @@ let warning_not_as_error w () =
 
 (* Activate all warnings *)
 let wall () =
-  active_warnings:=[
-    Unnamed;
-    Unknown_attribute;
-    Zero_length_array;
-    Celeven_extension;
-    Gnu_empty_struct;
-    Missing_declarations;
-    Constant_conversion;
-    Int_conversion;
-    Varargs;
-    Implicit_function_declaration;
-    Pointer_type_mismatch;
-    Compare_distinct_pointer_types;
-    Implicit_int;
-    Main_return_type;
-    Invalid_noreturn;
-    Return_type;
-    Literal_range;
-    Unknown_pragmas;
-    CompCert_conformance;
-    Inline_asm_sdump;
-    Unused_variable;
-    Unused_parameter;
-    Wrong_ais_parameter;
-    Ignored_attributes;
-    Extern_after_definition;
-    Static_in_inline;
-    Flexible_array_extensions;
-    Tentative_incomplete_static;
-    Reduced_alignment;
-    Non_linear_cond_expr;
-  ]
+  active_warnings:= List.map fst all_warnings
 
 let wnothing () =
   active_warnings :=[]
 
 (* Make all warnings an error *)
 let werror () =
-  error_warnings:=[
-    Unnamed;
-    Unknown_attribute;
-    Zero_length_array;
-    Celeven_extension;
-    Gnu_empty_struct;
-    Missing_declarations;
-    Constant_conversion;
-    Int_conversion;
-    Varargs;
-    Implicit_function_declaration;
-    Pointer_type_mismatch;
-    Compare_distinct_pointer_types;
-    Implicit_int;
-    Main_return_type;
-    Invalid_noreturn;
-    Return_type;
-    Literal_range;
-    Unknown_pragmas;
-    CompCert_conformance;
-    Inline_asm_sdump;
-    Unused_variable;
-    Wrong_ais_parameter;
-    Unused_ais_parameter;
-    Ignored_attributes;
-    Extern_after_definition;
-    Static_in_inline;
-    Flexible_array_extensions;
-    Tentative_incomplete_static;
-    Reduced_alignment;
-    Non_linear_cond_expr;
-  ]
+  error_warnings:= List.map fst all_warnings
 
 (* Generate the warning key for the message *)
 let key_of_warning w =
@@ -411,37 +365,7 @@ let error_option w =
    Exact ("-Wno-error="^key), Unit ( warning_not_as_error w)]
 
 let warning_options =
-  error_option Unnamed @
-  error_option Unknown_attribute @
-  error_option Zero_length_array @
-  error_option Celeven_extension @
-  error_option Gnu_empty_struct @
-  error_option Missing_declarations @
-  error_option Constant_conversion @
-  error_option Int_conversion @
-  error_option Varargs @
-  error_option Implicit_function_declaration @
-  error_option Pointer_type_mismatch @
-  error_option Compare_distinct_pointer_types @
-  error_option Implicit_int @
-  error_option Main_return_type @
-  error_option Invalid_noreturn @
-  error_option Return_type @
-  error_option Literal_range @
-  error_option Unknown_pragmas @
-  error_option CompCert_conformance @
-  error_option Inline_asm_sdump @
-  error_option Unused_variable @
-  error_option Unused_parameter @
-  error_option Wrong_ais_parameter @
-  error_option Unused_ais_parameter @
-  error_option Ignored_attributes @
-  error_option Extern_after_definition @
-  error_option Static_in_inline @
-  error_option Flexible_array_extensions @
-  error_option Tentative_incomplete_static @
-  error_option Reduced_alignment @
-  error_option Non_linear_cond_expr @
+  List.concat (List.map (fun (w, active) -> error_option w) all_warnings) @
   [Exact ("-Wfatal-errors"), Set error_fatal;
    Exact ("-fdiagnostics-color"), Ignore; (* Either output supports it or no color *)
    Exact ("-fno-diagnostics-color"), Unset color_diagnostics;
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index 3797164d..9e17cb7e 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -21,7 +21,7 @@ open Machine
 open Cabs
 open C
 open Diagnostics
-open !Cutil
+open! Cutil
 
 (** * Utility functions *)
 
@@ -39,7 +39,16 @@ let warning loc =
 let print_typ env fmt ty =
   match ty with
   | TNamed _  ->
-      Format.fprintf fmt "'%a' (aka '%a')" Cprint.typ_raw ty Cprint.typ_raw (unroll env ty)
+    Format.fprintf fmt "'%a'" Cprint.typ_raw ty;
+    let ty' = unroll env ty in
+    if not (is_anonymous_type ty')
+    then Format.fprintf fmt " (aka '%a')" Cprint.typ_raw ty'
+  | TStruct (id,_) when id.C.name = "" ->
+    Format.fprintf fmt "'struct <anonymous>'"
+  | TUnion (id,_) when id.C.name = "" ->
+    Format.fprintf fmt "'union <anonymous>'"
+  | TEnum (id,_) when id.C.name = "" ->
+    Format.fprintf fmt "'enum <anonymous>'"
   | _ -> Format.fprintf fmt "'%a'" Cprint.typ_raw ty
 
 let pp_field fmt id =
@@ -172,7 +181,7 @@ let combine_toplevel_definitions loc env s old_sto old_ty sto ty =
 	error loc "static declaration of '%s' follows non-static declaration" s;
         sto
     | Storage_static,_ -> Storage_static (* Static stays static *)
-    | Storage_extern,_ -> sto
+    | Storage_extern,_ -> if is_function_type env new_ty then Storage_extern else sto
     | Storage_default,Storage_extern ->
       if is_global_defined s && is_function_type env ty then
         warning loc Extern_after_definition "this extern declaration follows a non-extern definition and is ignored";
@@ -443,7 +452,8 @@ let elab_constant loc = function
       let (v, fk) = elab_float_constant f in
       CFloat(v, fk)
   | CONST_CHAR(wide, s) ->
-      CInt(elab_char_constant loc wide s, IInt, "")
+      let ikind = if wide then wchar_ikind () else IInt in
+      CInt(elab_char_constant loc wide s, ikind, "")
   | CONST_STRING(wide, s) ->
       elab_string_literal loc wide s
 
@@ -1056,7 +1066,7 @@ and elab_struct_or_union_info kind loc env members attrs =
   | fld :: rem ->
       if wrap incomplete_type loc env' fld.fld_typ then
         (* Must be fatal otherwise we get problems constructing the init *)
-        fatal_error loc "member '%a' has incomplete type" pp_field fld.fld_name;
+        fatal_error loc "member '%a' has incomplete type %a" pp_field fld.fld_name (print_typ env) fld.fld_typ;
       if wrap contains_flex_array_mem loc env' fld.fld_typ && kind = Struct then
         warning loc Flexible_array_extensions "%a may not be used as a struct member due to flexible array member" (print_typ env) fld.fld_typ;
       check_reduced_alignment loc env' fld.fld_typ;
@@ -1611,7 +1621,7 @@ end;
 try
   elab_item (I.top env root ty_root) ie []
 with No_default_init ->
-  error loc "variable has incomplete type %a" Cprint.typ ty_root;
+  error loc "variable has incomplete type %a" (print_typ env) ty_root;
   raise Exit
 
 (* Elaboration of a top-level initializer *)
@@ -1844,7 +1854,12 @@ let elab_expr ctx loc env a =
            having declared it *)
         match a1 with
         | VARIABLE n when not (Env.ident_is_bound env n) ->
-            warning Implicit_function_declaration "implicit declaration of function '%s' is invalid in C99" n;
+            let is_builtin = String.length n > 10
+                           && String.sub n 0 10 = "__builtin_" in
+            if is_builtin then
+              error "use of unknown builtin '%s'" n
+            else
+              warning Implicit_function_declaration "implicit declaration of function '%s' is invalid in C99" n;
             let ty = TFun(TInt(IInt, []), None, false, []) in
             (* Check against other definitions and enter in env *)
             let (id, sto, env, ty, linkage) =
@@ -1909,6 +1924,8 @@ let elab_expr ctx loc env a =
 
   | CAST ((spec, dcl), ie) ->
       let (ty, env) = elab_type loc env spec dcl in
+      if not (is_array_type env ty) && incomplete_type env ty then
+        fatal_error "ill-formed compound literal with incomplete type %a" (print_typ env) ty;
       begin match elab_initializer loc env "<compound literal>" ty ie with
       | (ty', Some i) -> { edesc = ECompound(ty', i); etyp = ty' },env
       | (ty', None)   -> fatal_error "ill-formed compound literal"
@@ -2411,8 +2428,8 @@ let enter_typedef loc env sto (s, ty, init) =
       env
     end
     else begin
-      error loc "typedef redefinition with different types (%a vs %a)"
-        (print_typ env) ty (print_typ env) ty';
+      error loc "redefinition of typedef '%s' with different type (%a vs %a)"
+        s (print_typ env) ty (print_typ env) ty';
       env
     end
   | _ ->
@@ -2425,9 +2442,10 @@ let enter_typedef loc env sto (s, ty, init) =
 
 let enter_decdef local nonstatic_inline loc sto (decls, env) (s, ty, init) =
   let isfun = is_function_type env ty in
+  let has_init = init <> NO_INIT in
   if sto = Storage_register && has_std_alignas env ty then
     error loc "alignment specified for 'register' object '%s'" s;
-  if sto = Storage_extern && init <> NO_INIT then
+  if sto = Storage_extern && has_init then
     error loc "'extern' declaration variable has an initializer";
   if local && isfun then begin
     match sto with
@@ -2451,10 +2469,14 @@ let enter_decdef local nonstatic_inline loc sto (decls, env) (s, ty, init) =
      initializer can refer to the ident *)
   let (id, sto', env1, ty, linkage) =
     enter_or_refine_ident local loc env s sto1 ty in
-  if init <> NO_INIT && not local then
+  if has_init && not local then
     add_global_define loc s;
-  if not isfun && is_void_type env ty then
-    fatal_error loc "'%s' has incomplete type" s;
+  (* check if the type is void or incomplete and the declaration is initialized *)
+  if not isfun then begin
+    let incomplete_init = not (is_array_type env1 ty) && wrap incomplete_type loc env1 ty && has_init in
+    if is_void_type env1 ty || incomplete_init then
+      fatal_error loc "variable '%s' has incomplete type %a" s (print_typ env) ty;
+  end;
   (* process the initializer *)
   let (ty', init') = elab_initializer loc env1 s ty init in
   (* update environment with refined type *)
@@ -2465,7 +2487,7 @@ let enter_decdef local nonstatic_inline loc sto (decls, env) (s, ty, init) =
       warning loc Tentative_incomplete_static "tentative static definition with incomplete type";
     end
     else if local && sto' <> Storage_extern then
-      error loc "variable has incomplete type %a" (print_typ env) ty';
+      error loc "variable '%s' has incomplete type %a" s (print_typ env) ty';
   (* check if alignment is reduced *)
   check_reduced_alignment loc env ty';
   (* check for static variables in nonstatic inline functions *)
@@ -2659,10 +2681,10 @@ let elab_fundef genv spec name defs body loc =
      and additionally they should have an identifier. In both cases a fatal
      error is raised in order to avoid problems at later places. *)
   let add_param env (id, ty) =
-    if wrap incomplete_type loc env ty then
-      fatal_error loc "parameter has incomplete type";
     if id.C.name = "" then
       fatal_error loc "parameter name omitted";
+    if wrap incomplete_type loc env ty then
+      fatal_error loc "parameter '%s' has incomplete type %a" id.C.name (print_typ env) ty;
     Env.add_ident env id Storage_default ty
   in
   (* Enter parameters and extra declarations in the local environment.
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 346477b5..e44a330f 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -177,7 +177,7 @@ let identifier_nondigit =
 let identifier = identifier_nondigit (identifier_nondigit|digit)*
 
 (* Whitespaces *)
-let whitespace_char_no_newline = [' ' '\t' '\012' '\r']
+let whitespace_char_no_newline = [' ' '\t'  '\011' '\012' '\r']
 
 (* Integer constants *)
 let nonzero_digit = ['1'-'9']
diff --git a/cparser/Machine.ml b/cparser/Machine.ml
index ac34fa5f..193d83c4 100644
--- a/cparser/Machine.ml
+++ b/cparser/Machine.ml
@@ -18,6 +18,7 @@
 type struct_passing_style =
   | SP_ref_callee                       (* by reference, callee takes copy *)
   | SP_ref_caller                       (* by reference, caller takes copy *)
+  | SP_value32_ref_callee               (* by value if <= 32 bits, by ref_callee otherwise *)
   | SP_split_args                       (* by value, as a sequence of ints *)
 
 type struct_return_style =
@@ -268,8 +269,13 @@ let mppa_k1c =
     bigendian = false;
     bitfields_msb_first = false; (* TO CHECK *)
     supports_unaligned_accesses = true;
-    struct_passing_style = SP_split_args;
-    struct_return_style = SR_int1248 }
+    struct_passing_style = SP_value32_ref_callee;
+    struct_return_style = SR_int1to4 }
+
+let aarch64 =
+  { i32lpll64 with name = "aarch64";
+                   struct_passing_style = SP_ref_callee; (* Wrong *)
+                   struct_return_style = SR_ref } (* Wrong *)
 
 (* Add GCC extensions re: sizeof and alignof *)
 
diff --git a/cparser/Machine.mli b/cparser/Machine.mli
index 56d8d0b9..ea25c4f6 100644
--- a/cparser/Machine.mli
+++ b/cparser/Machine.mli
@@ -17,6 +17,7 @@
 type struct_passing_style =
   | SP_ref_callee                       (* by reference, callee takes copy *)
   | SP_ref_caller                       (* by reference, caller takes copy *)
+  | SP_value32_ref_callee               (* by value if <= 32 bits, by ref_callee otherwise *)
   | SP_split_args                       (* by value, as a sequence of ints *)
 
 type struct_return_style =
@@ -87,6 +88,7 @@ val arm_bigendian : t
 val rv32 : t
 val rv64 : t
 val mppa_k1c : t
+val aarch64 : t
 
 val gcc_extensions : t -> t
 val compcert_interpreter : t -> t
diff --git a/cparser/StructPassing.ml b/cparser/StructPassing.ml
index 5c6454f0..3aff090e 100644
--- a/cparser/StructPassing.ml
+++ b/cparser/StructPassing.ml
@@ -68,7 +68,16 @@ let classify_param env ty =
     match !struct_passing_style with
     | SP_ref_callee -> Param_unchanged
     | SP_ref_caller -> Param_ref_caller
-    | _ ->
+    | SP_value32_ref_callee ->
+      (match sizeof env ty, alignof env ty with
+      | Some sz, Some al ->
+          if (sz <= 4) then
+            Param_flattened ((sz+3)/4, sz, al) (* FIXME - why (sz+3)/4 ? *)
+          else
+            Param_unchanged
+      | _, _ -> Param_unchanged (* when parsing prototype with incomplete structure definition *)
+      )
+    | SP_split_args ->
       match sizeof env ty, alignof env ty with
       | Some sz, Some al ->
           Param_flattened ((sz + 3) / 4, sz, al)
diff --git a/debug/DwarfPrinter.ml b/debug/DwarfPrinter.ml
index 9a24041b..2cb8c7d9 100644
--- a/debug/DwarfPrinter.ml
+++ b/debug/DwarfPrinter.ml
@@ -602,8 +602,13 @@ module DwarfPrinter(Target: DWARF_TARGET):
       print_sleb128 oc "" 0;
       print_label oc debug_end (* End of the debug section *)
 
-    let print_location_entry oc c_low l =
+    let print_location_entry oc needs_base c_low l =
       print_label oc (loc_to_label l.loc_id);
+      (* If we have multiple ranges per compilation unit we need to specify a base address for the location *)
+      if needs_base then begin
+        fprintf oc "	%s		-1\n" address;
+        fprintf oc "	%s		%a\n" address label c_low;
+      end;
       List.iter (fun (b,e,loc) ->
         fprintf oc "	%s		%a-%a\n" address label b label c_low;
         fprintf oc "	%s		%a-%a\n" address label e label c_low;
@@ -621,11 +626,11 @@ module DwarfPrinter(Target: DWARF_TARGET):
       fprintf oc "	%s	0\n" address
 
 
-    let print_location_list oc (c_low,l) =
-      let f =  match c_low with
-      | Some s -> print_location_entry oc s
-      | None -> print_location_entry_abs oc in
-     List.iter f l
+    let print_location_list oc needs_base l =
+      let f l =  match l.loc_sec_begin  with
+        | Some s -> print_location_entry oc needs_base s l
+        | None -> print_location_entry_abs oc l in
+      List.iter f l
 
     let list_opt l f =
       match l with
@@ -635,15 +640,15 @@ module DwarfPrinter(Target: DWARF_TARGET):
     let print_diab_entries oc entries =
       let abbrev_start = new_label () in
       abbrev_start_addr := abbrev_start;
-      List.iter (fun e -> compute_abbrev e.entry) entries;
+      List.iter (fun e -> compute_abbrev e.diab_entry) entries;
       print_abbrev oc;
       List.iter (fun e ->
         let name = if e.section_name <> ".text" then Some e.section_name else None in
         section oc (Section_debug_info name);
-        print_debug_info oc e.start_label e.line_label e.entry) entries;
-      if List.exists (fun e -> match e.dlocs with _,[] -> false | _,_ -> true) entries then begin
+        print_debug_info oc e.start_label e.line_label e.diab_entry) entries;
+      if List.exists (fun e -> match e.diab_locs with [] -> false | _ -> true) entries then begin
         section oc Section_debug_loc;
-        List.iter (fun e -> print_location_list oc e.dlocs) entries
+        List.iter (fun e -> print_location_list oc false e.diab_locs) entries
       end
 
     let print_ranges oc r =
@@ -665,8 +670,8 @@ module DwarfPrinter(Target: DWARF_TARGET):
           fprintf oc "	%s	0\n" address;
           fprintf oc "	%s	0\n" address)  r
 
-    let print_gnu_entries oc cp (lpc,loc) s r =
-      compute_abbrev cp;
+    let print_gnu_entries oc entries =
+      compute_abbrev entries.gnu_entry;
       let line_start = new_label ()
       and start = new_label ()
       and  abbrev_start = new_label ()
@@ -674,18 +679,18 @@ module DwarfPrinter(Target: DWARF_TARGET):
       debug_ranges_addr := range_label;
       abbrev_start_addr := abbrev_start;
       section oc (Section_debug_info None);
-      print_debug_info oc start line_start cp;
+      print_debug_info oc start line_start entries.gnu_entry;
       print_abbrev oc;
-      list_opt loc (fun () ->
+      list_opt entries.gnu_locs (fun () ->
         section oc Section_debug_loc;
-        print_location_list oc (lpc,loc));
-      list_opt r (fun () ->
-        print_ranges oc r);
+        print_location_list oc entries.several_secs entries.gnu_locs);
+      list_opt entries.range_table (fun () ->
+        print_ranges oc entries.range_table);
       section oc (Section_debug_line None);
       print_label oc line_start;
-      list_opt s (fun () ->
+      list_opt entries.string_table (fun () ->
         section oc Section_debug_str;
-        let s = List.sort (fun (a,_) (b,_) -> compare a b) s in
+        let s = List.sort (fun (a,_) (b,_) -> compare a b) entries.string_table in
         List.iter (fun (id,s) ->
           print_label oc (loc_to_label id);
           fprintf oc "	.asciz		%S\n" s) s)
@@ -698,6 +703,6 @@ module DwarfPrinter(Target: DWARF_TARGET):
       Hashtbl.clear loc_labels;
       match debug with
       | Diab entries -> print_diab_entries oc entries
-      | Gnu (cp,loc,s,r) -> print_gnu_entries oc cp loc s r
+      | Gnu entries -> print_gnu_entries oc entries
 
   end
diff --git a/debug/DwarfPrinter.mli b/debug/DwarfPrinter.mli
index e1e10601..78dc05fb 100644
--- a/debug/DwarfPrinter.mli
+++ b/debug/DwarfPrinter.mli
@@ -12,7 +12,7 @@
 
 open DwarfTypes
 
-module DwarfPrinter: functor (Target: DWARF_TARGET) ->
+module DwarfPrinter: DWARF_TARGET ->
   sig
     val print_debug: out_channel -> debug_entries -> unit
   end
diff --git a/debug/DwarfTypes.mli b/debug/DwarfTypes.mli
index 5a2bce3b..567c65cd 100644
--- a/debug/DwarfTypes.mli
+++ b/debug/DwarfTypes.mli
@@ -266,11 +266,13 @@ type dw_entry =
 
 (* The type for the location list. *)
 type location_entry =
-    {
-     loc:     (address * address * location_value) list;
-     loc_id:  reference;
-   }
-type dw_locations = constant option * location_entry list
+  {
+    loc:     (address * address * location_value) list;
+    loc_id:  reference;
+    loc_sec_begin :  address option;
+  }
+
+type dw_locations = location_entry list
 
 type range_entry =
   | AddressRange  of (address * address) list
@@ -285,13 +287,20 @@ type diab_entry =
      section_name: string;
      start_label:  int;
      line_label:   int;
-     entry:        dw_entry;
-     dlocs:        dw_locations;
+     diab_entry:   dw_entry;
+     diab_locs:    dw_locations;
    }
 
 type diab_entries =  diab_entry list
 
-type gnu_entries = dw_entry * dw_locations * dw_string * dw_ranges
+type gnu_entries =
+  {
+    string_table: dw_string;
+    range_table:  dw_ranges;
+    gnu_locs:     dw_locations;
+    gnu_entry:    dw_entry;
+    several_secs: bool;
+  }
 
 type debug_entries =
   | Diab of diab_entries
diff --git a/debug/Dwarfgen.ml b/debug/Dwarfgen.ml
index e1b71f13..6c1d0846 100644
--- a/debug/Dwarfgen.ml
+++ b/debug/Dwarfgen.ml
@@ -408,7 +408,7 @@ module Dwarfgenaux (Target: TARGET) =
                   and lo = translate_label f_id lo in
                   hi,lo,range_entry_loc i.var_loc) l in
                 let id = next_id () in
-                Some (LocRef id),[{loc = l;loc_id = id;}]
+                Some (LocRef id),[{loc_sec_begin = !current_section_start; loc = l;loc_id = id;}]
           end
         with Not_found -> None,[]
       else
@@ -574,8 +574,8 @@ let diab_gen_compilation_section sec_name s defs acc =
     section_name = s;
     start_label = debug_start;
     line_label = line_start;
-    entry = cp;
-    dlocs = Some low_pc,accu.locs;
+    diab_entry = cp;
+    diab_locs = accu.locs;
   }::acc
 
 let gen_diab_debug_info sec_name var_section : debug_entries =
@@ -643,6 +643,12 @@ let gen_gnu_debug_info sec_name var_section : debug_entries =
   } in
   let cp = new_entry (next_id ()) (DW_TAG_compile_unit cp) in
   let cp = add_children cp (types@defs) in
-  let loc_pc = if StringSet.cardinal sec > 1 then None else low_pc in
   let string_table = Hashtbl.fold (fun s i acc -> (i,s)::acc) string_table [] in
-  Gnu (cp,(loc_pc,accu.locs),string_table,snd accu.ranges)
+  let cp = {
+    string_table = string_table;
+    range_table = snd accu.ranges;
+    gnu_locs = accu.locs;
+    gnu_entry = cp;
+    several_secs = StringSet.cardinal sec > 1}
+  in
+  Gnu cp
diff --git a/doc/index.html b/doc/index.html
index edb3accd..631c5d99 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -24,7 +24,7 @@ a:active {color : Red; text-decoration : underline; }
 
 <H1 align="center">The CompCert verified compiler</H1>
 <H2 align="center">Commented Coq development</H2>
-<H3 align="center">Version 3.5, 2019-02-27</H3>
+<H3 align="center">Version 3.7, 2020-03-31</H3>
 
 <H2>Introduction</H2>
 
@@ -101,6 +101,8 @@ See also: <A HREF="html/compcert.common.Memdata.html">Memdata</A> (in-memory rep
 <LI> <A HREF="html/compcert.common.Determinism.html">Determinism</A>: determinism properties of small-step semantics.
 <LI> <A HREF="html/compcert.powerpc.Op.html"><I>Op</I></A>: operators, addressing modes and their
 semantics.
+<LI> <A HREF="html/compcert.common.Builtins.html">Builtins</A>: semantics of built-in functions. <BR>
+See also: <A HREF="html/compcert.common.Builtins0.html">Builtins0</A> (target-independent part), <A HREF="html/compcert.powerpc.Builtins1.html"><I>Builtins1</I></A> (target-dependent part).
 <LI> <A HREF="html/compcert.common.Unityping.html">Unityping</A>: a solver for atomic unification constraints.
 </UL>
 
@@ -180,7 +182,8 @@ code.
 </TR>
 
 <TR valign="top">
-  <TD>Recognition of operators<br>and addressing modes</TD>
+  <TD>Recognition of operators<br>and addressing modes;<br>
+      if-conversion</TD>
   <TD>Cminor to CminorSel</TD>
   <TD><A HREF="html/compcert.backend.Selection.html">Selection</A><br>
       <A HREF="html/compcert.powerpc.SelectOp.html"><I>SelectOp</I></A><br>
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index cf1220d1..6986fb96 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -26,7 +26,11 @@ let option_ffloatconstprop = ref 2
 let option_ftailcalls = ref true
 let option_fconstprop = ref true
 let option_fcse = ref true
+let option_fcse2 = ref true
 let option_fredundancy = ref true
+let option_fduplicate = ref (-1)
+let option_finvertcond = ref true
+let option_ftracelinearize = ref false
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
 let option_fifconversion = ref true
@@ -74,4 +78,7 @@ let option_fglobaladdrtmp = ref false
 let option_fglobaladdroffset = ref false
 let option_fxsaddr = ref true  
 let option_faddx = ref false  
-let option_fcoalesce_mem = ref true  
+let option_fcoalesce_mem = ref true
+let option_fforward_moves = ref true
+let option_all_loads_nontrap = ref false
+let option_inline_auto_threshold = ref 0
diff --git a/driver/Compiler.v b/driver/Compiler.v
index c683c136..499feff2 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -38,10 +38,14 @@ Require RTLgen.
 Require Tailcall.
 Require Inlining.
 Require Renumber.
+Require Duplicate.
 Require Constprop.
 Require CSE.
+Require ForwardMoves.
+Require CSE2.
 Require Deadcode.
 Require Unusedglob.
+Require Allnontrap.
 Require Allocation.
 Require Tunneling.
 Require Linearize.
@@ -59,10 +63,14 @@ Require RTLgenproof.
 Require Tailcallproof.
 Require Inliningproof.
 Require Renumberproof.
+Require Duplicateproof.
 Require Constpropproof.
 Require CSEproof.
+Require ForwardMovesproof.
+Require CSE2proof.
 Require Deadcodeproof.
 Require Unusedglobproof.
+Require Allnontrapproof.
 Require Allocproof.
 Require Tunnelingproof.
 Require Linearizeproof.
@@ -126,16 +134,24 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
    @@ print (print_RTL 2)
    @@ time "Renumbering" Renumber.transf_program
    @@ print (print_RTL 3)
-   @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
+  @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
    @@ print (print_RTL 4)
-   @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program)
+   @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
    @@ print (print_RTL 5)
-  @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program)
+   @@ total_if Compopts.optim_constprop (time "Renumbering" Renumber.transf_program)
    @@ print (print_RTL 6)
-  @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
+  @@@ partial_if Compopts.optim_CSE (time "CSE" CSE.transf_program)
    @@ print (print_RTL 7)
-  @@@ time "Unused globals" Unusedglob.transform_program
+   @@ total_if Compopts.optim_CSE2 (time "CSE2" CSE2.transf_program)
    @@ print (print_RTL 8)
+   @@ total_if Compopts.optim_forward_moves ForwardMoves.transf_program
+   @@ print (print_RTL 9)
+  @@@ partial_if Compopts.optim_redundancy (time "Redundancy elimination" Deadcode.transf_program)
+   @@ print (print_RTL 10)
+   @@ total_if Compopts.all_loads_nontrap Allnontrap.transf_program
+   @@ print (print_RTL 11)
+  @@@ time "Unused globals" Unusedglob.transform_program
+   @@ print (print_RTL 12)
   @@@ time "Register allocation" Allocation.transf_program
    @@ print print_LTL
    @@ time "Branch tunneling" Tunneling.tunnel_program
@@ -238,10 +254,14 @@ Definition CompCert's_passes :=
   ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog)
   ::: mkpass Inliningproof.match_prog
   ::: mkpass Renumberproof.match_prog
+  ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog)
   ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
   ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog)
   ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog)
+  ::: mkpass (match_if Compopts.optim_CSE2 CSE2proof.match_prog)
+  ::: mkpass (match_if Compopts.optim_forward_moves ForwardMovesproof.match_prog)
   ::: mkpass (match_if Compopts.optim_redundancy Deadcodeproof.match_prog)
+  ::: mkpass (match_if Compopts.all_loads_nontrap Allnontrapproof.match_prog)
   ::: mkpass Unusedglobproof.match_prog
   ::: mkpass Allocproof.match_prog
   ::: mkpass Tunnelingproof.match_prog
@@ -281,17 +301,21 @@ Proof.
   set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *.
   destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate.
   set (p9 := Renumber.transf_program p8) in *.
-  set (p10 := total_if optim_constprop Constprop.transf_program p9) in *.
-  set (p11 := total_if optim_constprop Renumber.transf_program p10) in *.
-  destruct (partial_if optim_CSE CSE.transf_program p11) as [p12|e] eqn:P12; simpl in T; try discriminate.
-  destruct (partial_if optim_redundancy Deadcode.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate.
-  destruct (Unusedglob.transform_program p13) as [p14|e] eqn:P14; simpl in T; try discriminate.
-  destruct (Allocation.transf_program p14) as [p15|e] eqn:P15; simpl in T; try discriminate.
-  set (p16 := Tunneling.tunnel_program p15) in *.
-  destruct (Linearize.transf_program p16) as [p17|e] eqn:P17; simpl in T; try discriminate.
-  set (p18 := CleanupLabels.transf_program p17) in *.
-  destruct (partial_if debug Debugvar.transf_program p18) as [p19|e] eqn:P19; simpl in T; try discriminate.
-  destruct (Stacking.transf_program p19) as [p20|e] eqn:P20; simpl in T; try discriminate.
+  destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
+  set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
+  set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
+  destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate.
+  set (p13bis := total_if optim_CSE2 CSE2.transf_program p13) in *.
+  set (p13ter := total_if optim_forward_moves ForwardMoves.transf_program p13bis) in *.
+  destruct (partial_if optim_redundancy Deadcode.transf_program p13ter) as [p14|e] eqn:P14; simpl in T; try discriminate.
+  set (p14bis := total_if all_loads_nontrap Allnontrap.transf_program p14) in *.
+  destruct (Unusedglob.transform_program p14bis) as [p15|e] eqn:P15; simpl in T; try discriminate.
+  destruct (Allocation.transf_program p15) as [p16|e] eqn:P16; simpl in T; try discriminate.
+  set (p17 := Tunneling.tunnel_program p16) in *.
+  destruct (Linearize.transf_program p17) as [p18|e] eqn:P18; simpl in T; try discriminate.
+  set (p19 := CleanupLabels.transf_program p18) in *.
+  destruct (partial_if debug Debugvar.transf_program p19) as [p20|e] eqn:P20; simpl in T; try discriminate.
+  destruct (Stacking.transf_program p20) as [p21|e] eqn:P21; simpl in T; try discriminate.
   unfold match_prog; simpl.
   exists p1; split. apply SimplExprproof.transf_program_match; auto.
   exists p2; split. apply SimplLocalsproof.match_transf_program; auto.
@@ -302,17 +326,21 @@ Proof.
   exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match.
   exists p8; split. apply Inliningproof.transf_program_match; auto.
   exists p9; split. apply Renumberproof.transf_program_match; auto.
-  exists p10; split. apply total_if_match. apply Constpropproof.transf_program_match.
-  exists p11; split. apply total_if_match. apply Renumberproof.transf_program_match.
-  exists p12; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
-  exists p13; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match.
-  exists p14; split. apply Unusedglobproof.transf_program_match; auto.
-  exists p15; split. apply Allocproof.transf_program_match; auto.
-  exists p16; split. apply Tunnelingproof.transf_program_match.
-  exists p17; split. apply Linearizeproof.transf_program_match; auto.
-  exists p18; split. apply CleanupLabelsproof.transf_program_match; auto.
-  exists p19; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match.
-  exists p20; split. apply Stackingproof.transf_program_match; auto.
+  exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto.
+  exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
+  exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match.
+  exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
+  exists p13bis; split. apply total_if_match. apply CSE2proof.transf_program_match.
+  exists p13ter; split. eapply total_if_match; eauto. apply ForwardMovesproof.transf_program_match.
+  exists p14; split. eapply partial_if_match; eauto. apply Deadcodeproof.transf_program_match.
+  exists p14bis; split. eapply total_if_match; eauto. apply Allnontrapproof.transf_program_match.
+  exists p15; split. apply Unusedglobproof.transf_program_match; auto.
+  exists p16; split. apply Allocproof.transf_program_match; auto.
+  exists p17; split. apply Tunnelingproof.transf_program_match.
+  exists p18; split. apply Linearizeproof.transf_program_match; auto.
+  exists p19; split. apply CleanupLabelsproof.transf_program_match; auto.
+  exists p20; split. eapply partial_if_match; eauto. apply Debugvarproof.transf_program_match.
+  exists p21; split. apply Stackingproof.transf_program_match; auto.
   exists tp; split. apply Asmgenproof.transf_program_match; auto.
   reflexivity.
 Qed.
@@ -364,7 +392,7 @@ Ltac DestructM :=
       destruct H as (p & M & MM); clear H
   end.
   repeat DestructM. subst tp.
-  assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p21)).
+  assert (F: forward_simulation (Cstrategy.semantics p) (Asm.semantics p25)).
   {
   eapply compose_forward_simulations.
     eapply SimplExprproof.transl_program_correct; eassumption.
@@ -384,14 +412,22 @@ Ltac DestructM :=
     eapply Inliningproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
+    eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct.
+  eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct.
   eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Renumberproof.transf_program_correct.
   eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact CSEproof.transf_program_correct.
   eapply compose_forward_simulations.
+  eapply match_if_simulation. eassumption. exact CSE2proof.transf_program_correct.
+  eapply compose_forward_simulations.
+  eapply match_if_simulation. eassumption. exact ForwardMovesproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Deadcodeproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
+    eapply match_if_simulation. eassumption. exact Allnontrapproof.transf_program_correct.
+  eapply compose_forward_simulations.
     eapply Unusedglobproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply Allocproof.transf_program_correct; eassumption.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 9c6448b7..848657e5 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -27,6 +27,9 @@ Parameter generate_float_constants: unit -> bool.
 (** For value analysis.  Currently always false. *)
 Parameter va_strict: unit -> bool.
 
+(** Flag -fduplicate. Branch prediction annotation + tail duplication *)
+Parameter optim_duplicate: unit -> bool.
+
 (** Flag -ftailcalls.  For tail call optimization. *)
 Parameter optim_tailcalls: unit -> bool.
 
@@ -36,6 +39,9 @@ Parameter optim_constprop: unit -> bool.
 (** Flag -fcse.  For common subexpression elimination. *)
 Parameter optim_CSE: unit -> bool.
 
+(** Flag -fcse2.  For DMonniaux's common subexpression elimination. *)
+Parameter optim_CSE2: unit -> bool.
+
 (** Flag -fredundancy.  For dead code elimination. *)
 Parameter optim_redundancy: unit -> bool.
 
@@ -62,3 +68,13 @@ Parameter thumb: unit -> bool.
 
 (** Flag -g.  For insertion of debugging information. *)
 Parameter debug: unit -> bool.
+
+(** Flag -fall-loads-nontrap. Turn user loads into non trapping. *)
+Parameter all_loads_nontrap: unit -> bool.
+
+(** Flag -fforward-moves. Forward moves after CSE. *)
+Parameter optim_forward_moves: unit -> bool.
+
+(* TODO is there a more appropriate place? *)
+Require Import Coqlib.
+Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
diff --git a/driver/Configuration.ml b/driver/Configuration.ml
index dcd0d60b..08084720 100644
--- a/driver/Configuration.ml
+++ b/driver/Configuration.ml
@@ -123,7 +123,7 @@ let get_bool_config key =
 
 let arch =
   match get_config_string "arch" with
-  | "powerpc"|"arm"|"x86"|"riscV"|"mppa_k1c" as a -> a
+  | "powerpc"|"arm"|"x86"|"riscV"|"mppa_k1c"|"aarch64" as a -> a
   | v -> bad_config "arch" [v]
 let model = get_config_string "model"
 let abi = get_config_string "abi"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 288bb436..388482a0 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -190,15 +190,27 @@ Processing options:
   -Os            Optimize for code size in preference to code speed
   -Obranchless   Optimize to generate fewer conditional branches; try to produce
                  branch-free instruction sequences as much as possible
+  -finline-auto-threshold n   Inline functions under size n
   -ftailcalls    Optimize function calls in tail position [on]
   -fconst-prop   Perform global constant propagation  [on]
   -ffloat-const-prop <n>  Control constant propagation of floats
                    (<n>=0: none, <n>=1: limited, <n>=2: full; default is full)
   -fcse          Perform common subexpression elimination [on]
+  -fcse2         Perform inter-loop common subexpression elimination [on]
   -fredundancy   Perform redundancy elimination [on]
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on]
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+  -fduplicate <nb_nodes> Perform tail duplication to form superblocks on predicted traces
+    nb_nodes control the heuristic deciding to duplicate or not
+    A value of -1 desactivates the entire pass (including branch prediction)
+    A value of 0 desactivates the duplication (but activates the branch prediction)
+    FIXME : this is desactivated by default for now
+    -finvertcond    Invert conditions based on predicted paths (to prefer fallthrough).
+                    Requires -fduplicate to be also activated [on]
+    -ftracelinearize Linearizes based on the traces identified by duplicate phase
+                    It is heavily recommended to activate -finvertcond with this pass [off]
+  -fforward-moves   Forward moves after CSE
   -finline       Perform inlining of functions [on]
   -finline-functions-called-once Integrate functions only required by their
                  single caller [on]
@@ -257,8 +269,10 @@ let dump_mnemonics destfile =
   exit 0
 
 let optimization_options = [
-  option_ftailcalls; option_fifconversion; option_fconstprop; option_fcse;
-  option_fpostpass; option_fredundancy; option_finline_functions_called_once;
+    option_ftailcalls; option_fifconversion; option_fconstprop;
+    option_fcse; option_fcse2;
+    option_fpostpass;
+    option_fredundancy; option_finline; option_finline_functions_called_once;
 ]
 
 let set_all opts () = List.iter (fun r -> r := true) opts
@@ -313,6 +327,7 @@ let cmdline_actions =
   _Regexp "-O[123]$", Unit (set_all optimization_options);
   Exact "-Os", Set option_Osize;
   Exact "-Obranchless", Set option_Obranchless;
+  Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
   Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
   Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
   Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); 
@@ -381,8 +396,12 @@ let cmdline_actions =
   @ f_opt "if-conversion" option_fifconversion
   @ f_opt "const-prop" option_fconstprop
   @ f_opt "cse" option_fcse
+  @ f_opt "cse2" option_fcse2
   @ f_opt "redundancy" option_fredundancy
   @ f_opt "postpass" option_fpostpass
+  @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ]
+  @ f_opt "invertcond" option_finvertcond
+  @ f_opt "tracelinearize" option_ftracelinearize
   @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
   @ f_opt "inline" option_finline
   @ f_opt "inline-functions-called-once" option_finline_functions_called_once
@@ -391,6 +410,8 @@ let cmdline_actions =
   @ f_opt "xsaddr" option_fxsaddr
   @ f_opt "addx" option_faddx
   @ f_opt "coalesce-mem" option_fcoalesce_mem
+  @ f_opt "all-loads-nontrap" option_all_loads_nontrap
+  @ f_opt "forward-moves" option_fforward_moves
 (* Code generation options *)
   @ f_opt "fpu" option_ffpu
   @ f_opt "sse" option_ffpu (* backward compatibility *)
diff --git a/driver/Frontend.ml b/driver/Frontend.ml
index 2584db90..b9db0d23 100644
--- a/driver/Frontend.ml
+++ b/driver/Frontend.ml
@@ -117,6 +117,7 @@ let init () =
                    then Machine.rv64
                    else Machine.rv32
     | "mppa_k1c" -> Machine.mppa_k1c
+    | "aarch64" -> Machine.aarch64
     | _         -> assert false
   end;
   Env.set_builtins C2C.builtins;
diff --git a/driver/Interp.ml b/driver/Interp.ml
index a6841460..d4286779 100644
--- a/driver/Interp.ml
+++ b/driver/Interp.ml
@@ -15,12 +15,12 @@
 open Format
 open Camlcoq
 open AST
-open !Integers
+open! Integers
 open Values
 open Memory
 open Globalenvs
 open Events
-open Ctypes
+open! Ctypes
 open Csyntax
 open Csem
 
diff --git a/exportclight/Clightgen.ml b/exportclight/Clightgen.ml
index 4209975a..f7279a5e 100644
--- a/exportclight/Clightgen.ml
+++ b/exportclight/Clightgen.ml
@@ -45,7 +45,7 @@ let compile_c_ast sourcename csyntax ofile =
     | Errors.Error msg ->
       fatal_error loc "%a" print_error msg in
   (* Dump Clight in C syntax if requested *)
-  PrintClight.print_if clight;
+  PrintClight.print_if_2 clight;
   (* Print Clight in Coq syntax *)
   let oc = open_out ofile in
   ExportClight.print_program (Format.formatter_of_out_channel oc)
diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml
index b124586a..c9d6fced 100644
--- a/exportclight/ExportClight.ml
+++ b/exportclight/ExportClight.ml
@@ -18,7 +18,7 @@
 open Format
 open Camlcoq
 open AST
-open Ctypes
+open! Ctypes
 open Cop
 open Clight
 
@@ -221,6 +221,14 @@ let asttype p t =
       | AST.Tany32 -> "AST.Tany32"
       | AST.Tany64 -> "AST.Tany64")
 
+let astrettype p = function
+  | AST.Tret t -> asttype p t
+  | AST.Tvoid -> fprintf p "AST.Tvoid"
+  | AST.Tint8signed -> fprintf p "AST.Tint8signed"
+  | AST.Tint8unsigned -> fprintf p "AST.Tint8unsigned"
+  | AST.Tint16signed -> fprintf p "AST.Tint16signed"
+  | AST.Tint16unsigned -> fprintf p "AST.Tint16unsigned"
+
 let name_of_chunk = function
   | Mint8signed -> "Mint8signed"
   | Mint8unsigned -> "Mint8unsigned"
@@ -236,7 +244,7 @@ let name_of_chunk = function
 let signatur p sg =
   fprintf p "@[<hov 2>(mksignature@ %a@ %a@ %a)@]"
      (print_list asttype) sg.sig_args
-     (print_option asttype) sg.sig_res
+     astrettype sg.sig_res
      callconv sg.sig_cc
 
 let assertions = ref ([]: (string * typ list) list)
@@ -381,7 +389,7 @@ and lblstmts p = function
               (print_option coqZ) lbl stmt s lblstmts ls
 
 let print_function p (id, f) =
-  fprintf p "Definition f_%s := {|@ " (extern_atom id);
+  fprintf p "Definition f_%s := {|@ " (sanitize (extern_atom id));
   fprintf p "  fn_return := %a;@ " typ f.fn_return;
   fprintf p "  fn_callconv := %a;@ " callconv f.fn_callconv;
   fprintf p "  fn_params := %a;@ " (print_list (print_pair ident typ)) f.fn_params;
@@ -402,7 +410,7 @@ let init_data p = function
   | Init_addrof(id,ofs) -> fprintf p "Init_addrof %a %a" ident id coqptrofs ofs
 
 let print_variable p (id, v) =
-  fprintf p "Definition v_%s := {|@ " (extern_atom id);
+  fprintf p "Definition v_%s := {|@ " (sanitize (extern_atom id));
   fprintf p "  gvar_info := %a;@ " typ v.gvar_info;
   fprintf p "  gvar_init := %a;@ " (print_list init_data) v.gvar_init;
   fprintf p "  gvar_readonly := %B;@ " v.gvar_readonly;
@@ -417,12 +425,12 @@ let print_globdef p (id, gd) =
 
 let print_ident_globdef p = function
   | (id, Gfun(Ctypes.Internal f)) ->
-      fprintf p "(%a, Gfun(Internal f_%s))" ident id (extern_atom id)
+      fprintf p "(%a, Gfun(Internal f_%s))" ident id (sanitize (extern_atom id))
   | (id, Gfun(Ctypes.External(ef, targs, tres, cc))) ->
       fprintf p "@[<hov 2>(%a,@ @[<hov 2>Gfun(External %a@ %a@ %a@ %a))@]@]"
         ident id external_function ef typlist targs typ tres callconv cc
   | (id, Gvar v) ->
-      fprintf p "(%a, Gvar v_%s)" ident id (extern_atom id)
+      fprintf p "(%a, Gvar v_%s)" ident id (sanitize (extern_atom id))
 
 (* Composite definitions *)
 
diff --git a/extraction/extraction.v b/extraction/extraction.v
index ee45b756..9b568951 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -105,10 +105,14 @@ Extract Constant Compopts.generate_float_constants =>
   "fun _ -> !Clflags.option_ffloatconstprop >= 2".
 Extract Constant Compopts.optim_tailcalls =>
   "fun _ -> !Clflags.option_ftailcalls".
+Extract Constant Compopts.optim_duplicate =>
+  "fun _ -> (if !Clflags.option_fduplicate = -1 then false else true)".
 Extract Constant Compopts.optim_constprop =>
   "fun _ -> !Clflags.option_fconstprop".
 Extract Constant Compopts.optim_CSE =>
   "fun _ -> !Clflags.option_fcse".
+Extract Constant Compopts.optim_CSE2 =>
+  "fun _ -> !Clflags.option_fcse2".
 Extract Constant Compopts.optim_redundancy =>
   "fun _ -> !Clflags.option_fredundancy".
 Extract Constant Compopts.optim_postpass =>
@@ -127,6 +131,12 @@ Extract Constant Compopts.optim_addx =>
   "fun _ -> !Clflags.option_faddx".
 Extract Constant Compopts.optim_coalesce_mem =>
   "fun _ -> !Clflags.option_fcoalesce_mem".
+Extract Constant Compopts.optim_forward_moves =>
+  "fun _ -> !Clflags.option_fforward_moves".
+Extract Constant Compopts.va_strict =>
+  "fun _ -> false".
+Extract Constant Compopts.all_loads_nontrap =>
+  "fun _ -> !Clflags.option_all_loads_nontrap".
 
 (* Compiler *)
 Extract Constant Compiler.print_Clight => "PrintClight.print_if".
@@ -136,7 +146,7 @@ Extract Constant Compiler.print_LTL => "PrintLTL.print_if".
 Extract Constant Compiler.print_Mach => "PrintMach.print_if".
 Extract Constant Compiler.print => "fun (f: 'a -> unit) (x: 'a) -> f x; x".
 Extract Constant Compiler.time  => "Timing.time_coq".
-Extract Constant Asmgen.time  => "Timing.time_coq".
+Extract Constant Compopts.time  => "Timing.time_coq".
 
 (*Extraction Inline Compiler.apply_total Compiler.apply_partial.*)
 
diff --git a/lib/BoolEqual.v b/lib/BoolEqual.v
index c9e7bad5..e8c1d831 100644
--- a/lib/BoolEqual.v
+++ b/lib/BoolEqual.v
@@ -106,8 +106,8 @@ Ltac bool_eq_refl_case :=
   end.
 
 Ltac bool_eq_refl :=
-  let H := fresh "Hrec" in let x := fresh "x" in
-  fix H 1; intros x; destruct x; simpl; bool_eq_refl_case.
+  let Hrec := fresh "Hrec" in let x := fresh "x" in
+  fix Hrec 1; intros x; destruct x; simpl; bool_eq_refl_case.
 
 Lemma false_not_true:
   forall (P: Prop), false = true -> P.
@@ -124,7 +124,6 @@ Qed.
 
 Ltac bool_eq_sound_case :=
   match goal with
-  | [ H: false = true |- _ ] => exact (false_not_true _ H)
   | [ H: _ && _ = true |- _ ] => apply andb_prop in H; destruct H; bool_eq_sound_case
   | [ H: proj_sumbool ?a = true |- _ ] => apply proj_sumbool_true in H; bool_eq_sound_case
   | [ |- ?C ?x1 ?x2 ?x3 ?x4 = ?C ?y1 ?y2 ?y3 ?y4 ] => apply f_equal4; auto
@@ -137,7 +136,9 @@ Ltac bool_eq_sound_case :=
 
 Ltac bool_eq_sound :=
   let Hrec := fresh "Hrec" in let x := fresh "x" in let y := fresh "y" in
-  fix Hrec 1; intros x y; destruct x, y; simpl; intro; bool_eq_sound_case.
+  let H := fresh "EQ" in
+  fix Hrec 1; intros x y; destruct x, y; intro H;
+  try (apply (false_not_true _ H)); simpl in H; bool_eq_sound_case.
 
 Lemma dec_eq_from_bool_eq:
   forall (A: Type) (f: A -> A -> bool)
diff --git a/lib/Floats.v b/lib/Floats.v
index 13350dd0..272efa52 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -16,7 +16,7 @@
 
 (** Formalization of floating-point numbers, using the Flocq library. *)
 
-Require Import Coqlib Zbits Integers.
+Require Import Coqlib Zbits Integers Axioms.
 (*From Flocq*)
 Require Import Binary Bits Core.
 Require Import IEEE754_extra.
@@ -27,8 +27,69 @@ Close Scope R_scope.
 Open Scope Z_scope.
 
 Definition float := binary64. (**r the type of IEE754 double-precision FP numbers *)
+
+Definition float_eq: forall (i1 i2: float), {i1=i2} + {i1<>i2}.
+Proof.
+  intros. destruct i1.
+(* B754_zero *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ.
+    + apply eqb_prop in BEQ. subst. left. reflexivity.
+    + apply eqb_false_iff in BEQ. right. intro. inv H. contradiction.
+(* B754_infinity *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ.
+    + apply eqb_prop in BEQ. subst. left. reflexivity.
+    + apply eqb_false_iff in BEQ. right. intro. inv H. contradiction.
+(* B754_nan *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ.
+    + generalize (Pos.eq_dec pl pl0). intro. inv H.
+      ++ left. apply eqb_prop in BEQ. subst.
+         assert (e = e0) by (apply proof_irr). congruence.
+      ++ right. intro. inv H. contradiction.
+    + apply eqb_false_iff in BEQ. right. intro. inv H. contradiction.
+(* B754_finite *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ; [apply eqb_prop in BEQ | apply eqb_false_iff in BEQ].
+    generalize (Pos.eq_dec m m0). intro. inv H.
+    generalize (Z.eq_dec e e1). intro. inv H.
+    1: { left. assert (e0 = e2) by (apply proof_irr). congruence. }
+    all: right; intro; inv H; contradiction.
+Qed.
+
 Definition float32 := binary32. (**r the type of IEE754 single-precision FP numbers *)
 
+Definition float32_eq: forall (i1 i2: float32), {i1=i2} + {i1<>i2}.
+Proof.
+  intros. destruct i1.
+(* B754_zero *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ.
+    + apply eqb_prop in BEQ. subst. left. reflexivity.
+    + apply eqb_false_iff in BEQ. right. intro. inv H. contradiction.
+(* B754_infinity *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ.
+    + apply eqb_prop in BEQ. subst. left. reflexivity.
+    + apply eqb_false_iff in BEQ. right. intro. inv H. contradiction.
+(* B754_nan *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ.
+    + generalize (Pos.eq_dec pl pl0). intro. inv H.
+      ++ left. apply eqb_prop in BEQ. subst.
+         assert (e = e0) by (apply proof_irr). congruence.
+      ++ right. intro. inv H. contradiction.
+    + apply eqb_false_iff in BEQ. right. intro. inv H. contradiction.
+(* B754_finite *)
+  - destruct i2; try (right; discriminate).
+    destruct (eqb s s0) eqn:BEQ; [apply eqb_prop in BEQ | apply eqb_false_iff in BEQ].
+    generalize (Pos.eq_dec m m0). intro. inv H.
+    generalize (Z.eq_dec e e1). intro. inv H.
+    1: { left. assert (e0 = e2) by (apply proof_irr). congruence. }
+    all: right; intro; inv H; contradiction.
+Qed.
+
 (** Boolean-valued comparisons *)
 
 Definition cmp_of_comparison (c: comparison) (x: option Datatypes.comparison) : bool :=
diff --git a/lib/Heaps.v b/lib/Heaps.v
index 9fa07a1d..85343998 100644
--- a/lib/Heaps.v
+++ b/lib/Heaps.v
@@ -256,14 +256,14 @@ Proof.
   eapply gt_heap_trans with y; eauto. red; auto.
 - intuition.
   eapply lt_heap_trans; eauto. red; auto.
-  eapply gt_heap_trans; eauto. red; auto.
+  eapply gt_heap_trans; eauto. red; auto with ordered_type.
 - intuition. eapply gt_heap_trans; eauto. red; auto.
 - rewrite e3 in *; simpl in *. intuition.
   eapply lt_heap_trans with y; eauto. red; auto.
   eapply gt_heap_trans; eauto. red; auto.
 - intuition.
   eapply lt_heap_trans with y; eauto. red; auto.
-  eapply gt_heap_trans; eauto. red; auto.
+  eapply gt_heap_trans; eauto. red; auto with ordered_type.
   eapply gt_heap_trans with x; eauto. red; auto.
 - rewrite e3 in *; simpl in *; intuition.
   eapply gt_heap_trans; eauto. red; auto.
@@ -308,7 +308,7 @@ Proof.
   intros. unfold insert.
   case_eq (partition x h). intros a b EQ; simpl.
   assert (E.eq y x \/ ~E.eq y x).
-    destruct (E.compare y x); auto.
+    destruct (E.compare y x); auto with ordered_type.
     right; red; intros. elim (E.lt_not_eq l). apply E.eq_sym; auto.
   destruct H0.
   tauto.
diff --git a/lib/Integers.v b/lib/Integers.v
index f4213332..246c708c 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -4,7 +4,7 @@
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
 (*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Copyright Institut National de Recherstestche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the GNU General Public License as published by  *)
 (*  the Free Software Foundation, either version 2 of the License, or  *)
@@ -16,7 +16,7 @@
 (** Formalizations of machine integers modulo $2^N$ #2<sup>N</sup>#. *)
 
 Require Import Eqdep_dec Zquot Zwf.
-Require Import Coqlib Zbits.
+Require Import Coqlib Zbits Axioms.
 Require Archi.
 
 (** * Comparisons *)
@@ -29,6 +29,11 @@ Inductive comparison : Type :=
   | Cgt : comparison               (**r greater than *)
   | Cge : comparison.              (**r greater than or equal *)
 
+Definition comparison_eq: forall (x y: comparison), {x = y} + {x <> y}.
+Proof.
+  decide equality.
+Defined.
+
 Definition negate_comparison (c: comparison): comparison :=
   match c with
   | Ceq => Cne
@@ -668,6 +673,11 @@ Proof.
   intros. generalize (eq_spec x y); case (eq x y); intros; congruence.
 Qed.
 
+Theorem same_if_eq: forall x y, eq x y = true -> x = y.
+Proof.
+  intros. generalize (eq_spec x y); rewrite H; auto.
+Qed.
+
 Theorem eq_signed:
   forall x y, eq x y = if zeq (signed x) (signed y) then true else false.
 Proof.
@@ -1139,6 +1149,12 @@ Proof.
   intros. apply Ztestbit_above with wordsize; auto. apply unsigned_range.
 Qed.
 
+Lemma bits_below:
+  forall x i, i < 0 -> testbit x i = false.
+Proof.
+  intros. apply Z.testbit_neg_r; auto.
+Qed.
+
 Lemma bits_zero:
   forall i, testbit zero i = false.
 Proof.
@@ -1178,6 +1194,34 @@ Proof.
   rewrite <- half_modulus_modulus. apply unsigned_range.
 Qed.
 
+Local Transparent repr.
+Lemma sign_bit_of_signed: forall x,
+    (testbit x (zwordsize - 1)) = lt x zero.
+Proof.
+  intro.
+  rewrite sign_bit_of_unsigned.
+  unfold lt.
+  unfold signed, unsigned.
+  simpl.
+  pose proof half_modulus_pos as HMOD.
+  destruct (zlt 0 half_modulus) as [HMOD' | HMOD'].
+  2: omega.
+  clear HMOD'.
+  destruct (zlt (intval x) half_modulus) as [ LOW | HIGH].
+  {
+    destruct x as [ix RANGE].
+    simpl in *.
+    destruct (zlt ix 0). omega.
+    reflexivity.
+  }
+  destruct (zlt _ _) as [LOW' | HIGH']; trivial.
+  destruct x as [ix RANGE].
+  simpl in *.
+  rewrite half_modulus_modulus in *.
+  omega.
+Qed.
+Local Opaque repr.
+
 Lemma bits_signed:
   forall x i, 0 <= i ->
   Z.testbit (signed x) i = testbit x (if zlt i zwordsize then i else zwordsize - 1).
@@ -2411,6 +2455,57 @@ Proof.
   bit_solve. destruct (zlt (i + unsigned (sub iwordsize y)) zwordsize); auto.
 Qed.
 
+Theorem shrx1_shr:
+  forall x,
+  ltu one (repr (zwordsize - 1)) = true ->
+  shrx x (repr 1) = shr (add x (shru x (repr (zwordsize - 1)))) (repr 1).
+Proof.
+  intros.
+  rewrite shrx_shr by assumption.
+  rewrite shl_mul_two_p.
+  rewrite mul_commut. rewrite mul_one.
+  change (repr 1) with one.
+  rewrite unsigned_one.
+  change (two_p 1) with 2.
+  unfold sub.
+  rewrite unsigned_one.
+  assert (0 <= 2 <= max_unsigned).
+  {
+    unfold max_unsigned, modulus.
+    unfold zwordsize in *.
+    unfold ltu in *.
+    rewrite unsigned_one in H.
+    rewrite unsigned_repr in H.
+    {
+      destruct (zlt 1 (Z.of_nat wordsize - 1)) as [ LT | NONE].
+      2: discriminate.
+      clear H.
+      rewrite two_power_nat_two_p.
+      split.
+      omega.
+      set (w := (Z.of_nat wordsize)) in *.
+      assert ((two_p 2) <= (two_p w)) as MONO.
+      {
+        apply two_p_monotone.
+        omega.
+      }
+      change (two_p 2) with 4 in MONO.
+      omega.
+    }
+    generalize wordsize_max_unsigned.
+    fold zwordsize.
+    generalize wordsize_pos.
+    omega.
+  }
+  rewrite unsigned_repr by assumption.
+  simpl.
+  rewrite shru_lt_zero.
+  destruct (lt x zero).
+  reflexivity.
+  rewrite add_zero.
+  reflexivity.
+Qed.
+
 Theorem shrx_carry:
   forall x y,
   ltu y (repr (zwordsize - 1)) = true ->
@@ -2511,12 +2606,11 @@ Proof.
 Qed.
 
 Lemma bits_sign_ext:
-  forall n x i, 0 <= i < zwordsize -> 0 < n ->
+  forall n x i, 0 <= i < zwordsize ->
   testbit (sign_ext n x) i = testbit x (if zlt i n then i else n - 1).
 Proof.
   intros. unfold sign_ext.
-  rewrite testbit_repr; auto. rewrite Zsign_ext_spec. destruct (zlt i n); auto.
-  omega. auto.
+  rewrite testbit_repr; auto. apply Zsign_ext_spec. omega. 
 Qed.
 
 Hint Rewrite bits_zero_ext bits_sign_ext: ints.
@@ -2528,12 +2622,24 @@ Proof.
   rewrite bits_zero_ext. apply zlt_true. omega. omega.
 Qed.
 
+Theorem zero_ext_below:
+  forall n x, n <= 0 -> zero_ext n x = zero.
+Proof.
+  intros. bit_solve. destruct (zlt i n); auto. apply bits_below; omega. omega.
+Qed.
+
 Theorem sign_ext_above:
   forall n x, n >= zwordsize -> sign_ext n x = x.
 Proof.
   intros. apply same_bits_eq; intros.
   unfold sign_ext; rewrite testbit_repr; auto.
-  rewrite Zsign_ext_spec. rewrite zlt_true. auto. omega. omega. omega.
+  rewrite Zsign_ext_spec. rewrite zlt_true. auto. omega. omega.
+Qed.
+
+Theorem sign_ext_below:
+  forall n x, n <= 0 -> sign_ext n x = zero.
+Proof.
+  intros. bit_solve. apply bits_below. destruct (zlt i n); omega.
 Qed.
 
 Theorem zero_ext_and:
@@ -2570,7 +2676,7 @@ Proof.
 Qed.
 
 Theorem sign_ext_widen:
-  forall x n n', 0 < n  <= n' ->
+  forall x n n', 0 < n <= n' ->
   sign_ext n' (sign_ext n x) = sign_ext n x.
 Proof.
   intros. destruct (zlt n' zwordsize).
@@ -2578,9 +2684,8 @@ Proof.
   auto.
   rewrite (zlt_false _ i n).
   destruct (zlt (n' - 1) n); f_equal; omega.
-  omega. omega.
+  omega.
   destruct (zlt i n'); omega.
-  omega. omega.
   apply sign_ext_above; auto.
 Qed.
 
@@ -2594,7 +2699,6 @@ Proof.
   auto.
   rewrite !zlt_false. auto. omega. omega. omega.
   destruct (zlt i n'); omega.
-  omega.
   apply sign_ext_above; auto.
 Qed.
 
@@ -2614,9 +2718,7 @@ Theorem sign_ext_narrow:
 Proof.
   intros. destruct (zlt n zwordsize).
   bit_solve. destruct (zlt i n); f_equal; apply zlt_true; omega.
-  omega.
   destruct (zlt i n); omega.
-  omega. omega.
   rewrite (sign_ext_above n'). auto. omega.
 Qed.
 
@@ -2628,7 +2730,7 @@ Proof.
   bit_solve.
   destruct (zlt i n); auto.
   rewrite zlt_true; auto. omega.
-  omega. omega. omega.
+  omega. omega.
   rewrite sign_ext_above; auto.
 Qed.
 
@@ -2643,7 +2745,7 @@ Theorem sign_ext_idem:
 Proof.
   intros. apply sign_ext_widen. omega.
 Qed.
-
+ 
 Theorem sign_ext_zero_ext:
   forall n x, 0 < n -> sign_ext n (zero_ext n x) = sign_ext n x.
 Proof.
@@ -2671,42 +2773,93 @@ Proof.
   rewrite <- (sign_ext_zero_ext n y H). congruence.
 Qed.
 
-Theorem zero_ext_shru_shl:
+Theorem shru_shl:
+  forall x y z, ltu y iwordsize = true -> ltu z iwordsize = true ->
+  shru (shl x y) z =
+  if ltu z y then shl (zero_ext (zwordsize - unsigned y) x) (sub y z)
+             else zero_ext (zwordsize - unsigned z) (shru x (sub z y)).
+Proof.
+  intros. apply ltu_iwordsize_inv in H; apply ltu_iwordsize_inv in H0.
+  unfold ltu. set (Y := unsigned y) in *; set (Z := unsigned z) in *.
+  apply same_bits_eq; intros. rewrite bits_shru by auto. fold Z.
+  destruct (zlt Z Y).
+- assert (A: unsigned (sub y z) = Y - Z).
+  { apply unsigned_repr. generalize wordsize_max_unsigned; omega. }
+  symmetry; rewrite bits_shl, A by omega.
+  destruct (zlt (i + Z) zwordsize).
++ rewrite bits_shl by omega. fold Y.
+  destruct (zlt i (Y - Z)); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+  rewrite bits_zero_ext by omega. rewrite zlt_true by omega. f_equal; omega.
++ rewrite bits_zero_ext by omega. rewrite ! zlt_false by omega. auto.
+- assert (A: unsigned (sub z y) = Z - Y).
+  { apply unsigned_repr. generalize wordsize_max_unsigned; omega. }
+  rewrite bits_zero_ext, bits_shru, A by omega.
+  destruct (zlt (i + Z) zwordsize); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+  rewrite bits_shl by omega. fold Y.
+  destruct (zlt (i + Z) Y).
++ rewrite zlt_false by omega. auto.
++ rewrite zlt_true by omega. f_equal; omega.
+Qed.
+
+Corollary zero_ext_shru_shl:
   forall n x,
   0 < n < zwordsize ->
   let y := repr (zwordsize - n) in
   zero_ext n x = shru (shl x y) y.
 Proof.
   intros.
-  assert (unsigned y = zwordsize - n).
-    unfold y. apply unsigned_repr. generalize wordsize_max_unsigned. omega.
-  apply same_bits_eq; intros.
-  rewrite bits_zero_ext.
-  rewrite bits_shru; auto.
-  destruct (zlt i n).
-  rewrite zlt_true. rewrite bits_shl. rewrite zlt_false. f_equal. omega.
-  omega. omega. omega.
-  rewrite zlt_false. auto. omega.
-  omega.
-Qed.
-
-Theorem sign_ext_shr_shl:
+  assert (A: unsigned y = zwordsize - n).
+  { unfold y. apply unsigned_repr. generalize wordsize_max_unsigned. omega. }
+  assert (B: ltu y iwordsize = true).
+  { unfold ltu; rewrite A, unsigned_repr_wordsize. apply zlt_true; omega. }
+  rewrite shru_shl by auto. unfold ltu; rewrite zlt_false by omega.
+  rewrite sub_idem, shru_zero. f_equal. rewrite A; omega.
+Qed.
+
+Theorem shr_shl:
+  forall x y z, ltu y iwordsize = true -> ltu z iwordsize = true ->
+  shr (shl x y) z =
+  if ltu z y then shl (sign_ext (zwordsize - unsigned y) x) (sub y z)
+             else sign_ext (zwordsize - unsigned z) (shr x (sub z y)).
+Proof.
+  intros. apply ltu_iwordsize_inv in H; apply ltu_iwordsize_inv in H0.
+  unfold ltu. set (Y := unsigned y) in *; set (Z := unsigned z) in *.
+  apply same_bits_eq; intros. rewrite bits_shr by auto. fold Z.
+  rewrite bits_shl by (destruct (zlt (i + Z) zwordsize); omega). fold Y.
+  destruct (zlt Z Y).
+- assert (A: unsigned (sub y z) = Y - Z).
+  { apply unsigned_repr. generalize wordsize_max_unsigned; omega. }
+  rewrite bits_shl, A by omega.
+  destruct (zlt i (Y - Z)).
++ apply zlt_true. destruct (zlt (i + Z) zwordsize); omega.
++ rewrite zlt_false by (destruct (zlt (i + Z) zwordsize); omega).
+  rewrite bits_sign_ext by omega. f_equal. 
+  destruct (zlt (i + Z) zwordsize).
+  rewrite zlt_true by omega. omega.
+  rewrite zlt_false by omega. omega.
+- assert (A: unsigned (sub z y) = Z - Y).
+  { apply unsigned_repr. generalize wordsize_max_unsigned; omega. }
+  rewrite bits_sign_ext by omega.
+  rewrite bits_shr by (destruct (zlt i (zwordsize - Z)); omega).
+  rewrite A. rewrite zlt_false by (destruct (zlt (i + Z) zwordsize); omega).
+  f_equal. destruct (zlt i (zwordsize - Z)).
++ rewrite ! zlt_true by omega. omega.
++ rewrite ! zlt_false by omega. rewrite zlt_true by omega. omega.
+Qed.
+
+Corollary sign_ext_shr_shl:
   forall n x,
   0 < n < zwordsize ->
   let y := repr (zwordsize - n) in
   sign_ext n x = shr (shl x y) y.
 Proof.
   intros.
-  assert (unsigned y = zwordsize - n).
-    unfold y. apply unsigned_repr. generalize wordsize_max_unsigned. omega.
-  apply same_bits_eq; intros.
-  rewrite bits_sign_ext.
-  rewrite bits_shr; auto.
-  destruct (zlt i n).
-  rewrite zlt_true. rewrite bits_shl. rewrite zlt_false. f_equal. omega.
-  omega. omega. omega.
-  rewrite zlt_false. rewrite bits_shl. rewrite zlt_false. f_equal. omega.
-  omega. omega. omega. omega. omega.
+  assert (A: unsigned y = zwordsize - n).
+  { unfold y. apply unsigned_repr. generalize wordsize_max_unsigned. omega. }
+  assert (B: ltu y iwordsize = true).
+  { unfold ltu; rewrite A, unsigned_repr_wordsize. apply zlt_true; omega. }
+  rewrite shr_shl by auto. unfold ltu; rewrite zlt_false by omega.
+  rewrite sub_idem, shr_zero. f_equal. rewrite A; omega.
 Qed.
 
 (** [zero_ext n x] is the unique integer congruent to [x] modulo [2^n]
@@ -2766,7 +2919,7 @@ Proof.
   apply eqmod_same_bits; intros.
   rewrite H0 in H1. rewrite H0.
   fold (testbit (sign_ext n x) i). rewrite bits_sign_ext.
-  rewrite zlt_true. auto. omega. omega. omega.
+  rewrite zlt_true. auto. omega. omega.
 Qed.
 
 Lemma eqmod_sign_ext:
@@ -2781,6 +2934,132 @@ Proof.
   apply eqmod_sign_ext'; auto.
 Qed.
 
+(** Combinations of shifts and zero/sign extensions *)
+
+Lemma shl_zero_ext:
+  forall n m x, 0 <= n ->
+  shl (zero_ext n x) m = zero_ext (n + unsigned m) (shl x m).
+Proof.
+  intros. apply same_bits_eq; intros.
+  rewrite bits_zero_ext, ! bits_shl by omega.
+  destruct (zlt i (unsigned m)).
+- rewrite zlt_true by omega; auto.
+- rewrite bits_zero_ext by omega.
+  destruct (zlt (i - unsigned m) n); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+Qed.
+
+Lemma shl_sign_ext:
+  forall n m x, 0 < n ->
+  shl (sign_ext n x) m = sign_ext (n + unsigned m) (shl x m).
+Proof.
+  intros. generalize (unsigned_range m); intros.
+  apply same_bits_eq; intros.
+  rewrite bits_sign_ext, ! bits_shl by omega.
+  destruct (zlt i (n + unsigned m)).
+- rewrite bits_shl by auto. destruct (zlt i (unsigned m)); auto.
+  rewrite bits_sign_ext by omega. f_equal. apply zlt_true. omega.
+- rewrite zlt_false by omega. rewrite bits_shl by omega. rewrite zlt_false by omega.
+  rewrite bits_sign_ext by omega. f_equal. rewrite zlt_false by omega. omega.
+Qed.
+
+Lemma shru_zero_ext:
+  forall n m x, 0 <= n ->
+  shru (zero_ext (n + unsigned m) x) m = zero_ext n (shru x m).
+Proof.
+  intros. bit_solve.
+- destruct (zlt (i + unsigned m) zwordsize).
+* destruct (zlt i n); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+* destruct (zlt i n); auto.
+- generalize (unsigned_range m); omega.
+- omega.
+Qed.
+
+Lemma shru_zero_ext_0:
+  forall n m x, n <= unsigned m ->
+  shru (zero_ext n x) m = zero.
+Proof.
+  intros. bit_solve.
+- destruct (zlt (i + unsigned m) zwordsize); auto.
+  apply zlt_false. omega.
+- generalize (unsigned_range m); omega.
+Qed.
+
+Lemma shr_sign_ext:
+  forall n m x, 0 < n -> n + unsigned m < zwordsize ->
+  shr (sign_ext (n + unsigned m) x) m = sign_ext n (shr x m).
+Proof.
+  intros. generalize (unsigned_range m); intros.
+  apply same_bits_eq; intros.
+  rewrite bits_sign_ext, bits_shr by auto.
+  rewrite bits_sign_ext, bits_shr.
+- f_equal.
+  destruct (zlt i n), (zlt (i + unsigned m) zwordsize).
++ apply zlt_true; omega.
++ apply zlt_true; omega.
++ rewrite zlt_false by omega. rewrite zlt_true by omega. omega.
++ rewrite zlt_false by omega. rewrite zlt_true by omega. omega.
+- destruct (zlt i n); omega.
+- destruct (zlt (i + unsigned m) zwordsize); omega.
+Qed.
+
+Lemma zero_ext_shru_min:
+  forall s x n, ltu n iwordsize = true ->
+  zero_ext s (shru x n) = zero_ext (Z.min s (zwordsize - unsigned n)) (shru x n).
+Proof.
+  intros. apply ltu_iwordsize_inv in H.
+  apply Z.min_case_strong; intros; auto.
+  bit_solve; try omega.
+  destruct (zlt i (zwordsize - unsigned n)).
+  rewrite zlt_true by omega. auto.
+  destruct (zlt i s); auto. rewrite zlt_false by omega; auto.
+Qed.
+
+Lemma sign_ext_shr_min:
+  forall s x n, ltu n iwordsize = true ->
+  sign_ext s (shr x n) = sign_ext (Z.min s (zwordsize - unsigned n)) (shr x n).
+Proof.
+  intros. apply ltu_iwordsize_inv in H.
+  rewrite Z.min_comm. 
+  destruct (Z.min_spec (zwordsize - unsigned n) s) as [[A B] | [A B]]; rewrite B; auto.
+  apply same_bits_eq; intros. rewrite ! bits_sign_ext by auto.
+  destruct (zlt i (zwordsize - unsigned n)). 
+  rewrite zlt_true by omega. auto.
+  assert (C: testbit (shr x n) (zwordsize - unsigned n - 1) = testbit x (zwordsize - 1)).
+  { rewrite bits_shr by omega. rewrite zlt_true by omega. f_equal; omega. }
+  rewrite C. destruct (zlt i s); rewrite bits_shr by omega.
+  rewrite zlt_false by omega. auto.
+  rewrite zlt_false by omega. auto.
+Qed.
+
+Lemma shl_zero_ext_min:
+  forall s x n, ltu n iwordsize = true ->
+  shl (zero_ext s x) n = shl (zero_ext (Z.min s (zwordsize - unsigned n)) x) n.
+Proof.
+  intros. apply ltu_iwordsize_inv in H.
+  apply Z.min_case_strong; intros; auto.
+  apply same_bits_eq; intros. rewrite ! bits_shl by auto.
+  destruct (zlt i (unsigned n)); auto.
+  rewrite ! bits_zero_ext by omega.
+  destruct (zlt (i - unsigned n) s).
+  rewrite zlt_true by omega; auto.
+  rewrite zlt_false by omega; auto.
+Qed.
+
+Lemma shl_sign_ext_min:
+  forall s x n, ltu n iwordsize = true ->
+  shl (sign_ext s x) n = shl (sign_ext (Z.min s (zwordsize - unsigned n)) x) n.
+Proof.
+  intros. apply ltu_iwordsize_inv in H.
+  rewrite Z.min_comm. 
+  destruct (Z.min_spec (zwordsize - unsigned n) s) as [[A B] | [A B]]; rewrite B; auto.
+  apply same_bits_eq; intros. rewrite ! bits_shl by auto.
+  destruct (zlt i (unsigned n)); auto.
+  rewrite ! bits_sign_ext by omega. f_equal.
+  destruct (zlt (i - unsigned n) s).
+  rewrite zlt_true by omega; auto.
+  omegaContradiction.
+Qed.
+
 (** ** Properties of [one_bits] (decomposition in sum of powers of two) *)
 
 Theorem one_bits_range:
@@ -3127,10 +3406,11 @@ Proof.
   assert (0 <= Z.min (size a) (size b)).
     generalize (size_range a) (size_range b). zify; omega.
   apply bits_size_3. auto. intros.
-  rewrite bits_and. zify. subst z z0. destruct H1.
-  rewrite (bits_size_2 a). auto. omega.
-  rewrite (bits_size_2 b). apply andb_false_r. omega.
-  omega.
+  rewrite bits_and by omega.
+  rewrite andb_false_iff.
+  generalize (bits_size_2 a i).
+  generalize (bits_size_2 b i).
+  zify; intuition.
 Qed.
 
 Corollary and_interval:
@@ -3392,6 +3672,104 @@ Proof.
   unfold ltu. apply zlt_true. change (unsigned z < 63). rewrite A; omega.
 Qed.
 
+Lemma shr'63:
+  forall x, (shr' x (Int.repr 63)) = if lt x zero then mone else zero.
+Proof.
+  intro.
+  unfold shr', mone, zero.
+  rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; omega).
+  apply same_bits_eq.
+  intros i BIT.
+  rewrite testbit_repr by assumption.
+  rewrite Z.shiftr_spec by omega.
+  rewrite bits_signed by omega.
+  simpl.
+  change zwordsize with 64 in *.
+  destruct (zlt _ _) as [LT | GE].
+  {
+    replace i with 0 in * by omega.
+    change (0 + 63) with (zwordsize - 1).
+    rewrite  sign_bit_of_signed.
+    destruct (lt x _).
+    all: rewrite testbit_repr by (change zwordsize with 64 in *; omega).
+    all: simpl; reflexivity.
+  }
+  change (64 - 1) with (zwordsize - 1).
+  rewrite  sign_bit_of_signed.
+  destruct (lt x _).
+  all: rewrite testbit_repr by (change zwordsize with 64 in *; omega).
+  { symmetry.
+    apply Ztestbit_m1.
+    tauto.
+  }
+  symmetry.
+  apply Ztestbit_0.
+Qed.
+
+Lemma shru'63:
+  forall x, (shru' x (Int.repr 63)) = if lt x zero then one else zero.
+Proof.
+  intro.
+  unfold shru'.
+  rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; omega).
+  apply same_bits_eq.
+  intros i BIT.
+  rewrite testbit_repr by assumption.
+  rewrite Z.shiftr_spec by omega.
+  unfold lt.
+  rewrite signed_zero.
+  unfold one, zero.
+  destruct (zlt _ 0) as [LT | GE].
+  {
+    rewrite testbit_repr by assumption.
+    destruct (zeq i 0) as [IZERO | INONZERO].
+    { subst i.
+      change (Z.testbit (unsigned x) (0 + 63)) with (testbit x (zwordsize - 1)).
+      rewrite sign_bit_of_signed.
+      unfold lt.
+      rewrite signed_zero.
+      destruct (zlt _ _); try omega.
+      reflexivity.
+    }
+    change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i+63)).
+    rewrite bits_above by (change zwordsize with 64; omega).
+    rewrite Ztestbit_1.
+    destruct (zeq i 0); trivial.
+    subst i.
+    omega.
+  }
+  destruct (zeq i 0) as [IZERO | INONZERO].
+  { subst i.
+    change (Z.testbit (unsigned x) (0 + 63)) with (testbit x (zwordsize - 1)).
+    rewrite sign_bit_of_signed.
+    unfold lt.
+    rewrite signed_zero.
+    rewrite bits_zero.
+    destruct (zlt _ _); try omega.
+    reflexivity.
+  }
+  change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i + 63)).
+  rewrite bits_zero.
+  apply bits_above.
+  change zwordsize with 64.
+  omega.
+Qed.
+  
+Theorem shrx'1_shr':
+  forall x,
+  Int.ltu Int.one (Int.repr (zwordsize - 1)) = true ->
+  shrx' x (Int.repr 1) = shr' (add x (shru' x (Int.repr (Int64.zwordsize - 1)))) (Int.repr 1).
+Proof.
+  intros.
+  rewrite shrx'_shr_2 by reflexivity.
+  change (Int.sub (Int.repr 64) (Int.repr 1)) with (Int.repr 63).
+  f_equal. f_equal.
+  rewrite shr'63.
+  rewrite shru'63.
+  rewrite shru'63.
+  destruct (lt x zero); reflexivity.
+Qed.  
+
 Remark int_ltu_2_inv:
   forall y z,
   Int.ltu y iwordsize' = true ->
@@ -3499,6 +3877,190 @@ Proof.
   unfold shr, shr'; rewrite <- A; auto.
 Qed.
 
+Theorem shru'_shl':
+  forall x y z, Int.ltu y iwordsize' = true -> Int.ltu z iwordsize' = true ->
+  shru' (shl' x y) z =
+  if Int.ltu z y then shl' (zero_ext (zwordsize - Int.unsigned y) x) (Int.sub y z)
+                 else zero_ext (zwordsize - Int.unsigned z) (shru' x (Int.sub z y)).
+Proof.
+  intros. apply Int.ltu_inv in H; apply Int.ltu_inv in H0.
+  change (Int.unsigned iwordsize') with zwordsize in *.
+  unfold Int.ltu. set (Y := Int.unsigned y) in *; set (Z := Int.unsigned z) in *.
+  apply same_bits_eq; intros. rewrite bits_shru' by auto. fold Z.
+  destruct (zlt Z Y).
+- assert (A: Int.unsigned (Int.sub y z) = Y - Z).
+  { apply Int.unsigned_repr. assert (zwordsize < Int.max_unsigned) by reflexivity. omega. }
+  symmetry; rewrite bits_shl', A by omega.
+  destruct (zlt (i + Z) zwordsize).
++ rewrite bits_shl' by omega. fold Y.
+  destruct (zlt i (Y - Z)); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+  rewrite bits_zero_ext by omega. rewrite zlt_true by omega. f_equal; omega.
++ rewrite bits_zero_ext by omega. rewrite ! zlt_false by omega. auto.
+- assert (A: Int.unsigned (Int.sub z y) = Z - Y).
+  { apply Int.unsigned_repr. assert (zwordsize < Int.max_unsigned) by reflexivity. omega. }
+  rewrite bits_zero_ext, bits_shru', A by omega.
+  destruct (zlt (i + Z) zwordsize); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+  rewrite bits_shl' by omega. fold Y.
+  destruct (zlt (i + Z) Y).
++ rewrite zlt_false by omega. auto.
++ rewrite zlt_true by omega. f_equal; omega.
+Qed.
+
+Theorem shr'_shl':
+  forall x y z, Int.ltu y iwordsize' = true -> Int.ltu z iwordsize' = true ->
+  shr' (shl' x y) z =
+  if Int.ltu z y then shl' (sign_ext (zwordsize - Int.unsigned y) x) (Int.sub y z)
+                 else sign_ext (zwordsize - Int.unsigned z) (shr' x (Int.sub z y)).
+Proof.
+  intros. apply Int.ltu_inv in H; apply Int.ltu_inv in H0.
+  change (Int.unsigned iwordsize') with zwordsize in *.
+  unfold Int.ltu. set (Y := Int.unsigned y) in *; set (Z := Int.unsigned z) in *.
+  apply same_bits_eq; intros. rewrite bits_shr' by auto. fold Z.
+  rewrite bits_shl' by (destruct (zlt (i + Z) zwordsize); omega). fold Y.
+  destruct (zlt Z Y).
+- assert (A: Int.unsigned (Int.sub y z) = Y - Z).
+  { apply Int.unsigned_repr. assert (zwordsize < Int.max_unsigned) by reflexivity. omega. }
+  rewrite bits_shl', A by omega.
+  destruct (zlt i (Y - Z)).
++ apply zlt_true. destruct (zlt (i + Z) zwordsize); omega.
++ rewrite zlt_false by (destruct (zlt (i + Z) zwordsize); omega).
+  rewrite bits_sign_ext by omega. f_equal. 
+  destruct (zlt (i + Z) zwordsize).
+  rewrite zlt_true by omega. omega.
+  rewrite zlt_false by omega. omega.
+- assert (A: Int.unsigned (Int.sub z y) = Z - Y).
+  { apply Int.unsigned_repr. assert (zwordsize < Int.max_unsigned) by reflexivity. omega. }
+  rewrite bits_sign_ext by omega.
+  rewrite bits_shr' by (destruct (zlt i (zwordsize - Z)); omega).
+  rewrite A. rewrite zlt_false by (destruct (zlt (i + Z) zwordsize); omega).
+  f_equal. destruct (zlt i (zwordsize - Z)).
++ rewrite ! zlt_true by omega. omega.
++ rewrite ! zlt_false by omega. rewrite zlt_true by omega. omega.
+Qed.
+
+Lemma shl'_zero_ext:
+  forall n m x, 0 <= n ->
+  shl' (zero_ext n x) m = zero_ext (n + Int.unsigned m) (shl' x m).
+Proof.
+  intros. apply same_bits_eq; intros.
+  rewrite bits_zero_ext, ! bits_shl' by omega.
+  destruct (zlt i (Int.unsigned m)).
+- rewrite zlt_true by omega; auto.
+- rewrite bits_zero_ext by omega.
+  destruct (zlt (i - Int.unsigned m) n); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+Qed.
+
+Lemma shl'_sign_ext:
+  forall n m x, 0 < n ->
+  shl' (sign_ext n x) m = sign_ext (n + Int.unsigned m) (shl' x m).
+Proof.
+  intros. generalize (Int.unsigned_range m); intros.
+  apply same_bits_eq; intros.
+  rewrite bits_sign_ext, ! bits_shl' by omega.
+  destruct (zlt i (n + Int.unsigned m)).
+- rewrite bits_shl' by auto. destruct (zlt i (Int.unsigned m)); auto.
+  rewrite bits_sign_ext by omega. f_equal. apply zlt_true. omega.
+- rewrite zlt_false by omega. rewrite bits_shl' by omega. rewrite zlt_false by omega.
+  rewrite bits_sign_ext by omega. f_equal. rewrite zlt_false by omega. omega.
+Qed.
+
+Lemma shru'_zero_ext:
+  forall n m x, 0 <= n ->
+  shru' (zero_ext (n + Int.unsigned m) x) m = zero_ext n (shru' x m).
+Proof.
+  intros. generalize (Int.unsigned_range m); intros.
+  bit_solve; [|omega]. rewrite bits_shru', bits_zero_ext, bits_shru' by omega.
+  destruct (zlt (i + Int.unsigned m) zwordsize).
+* destruct (zlt i n); [rewrite zlt_true by omega|rewrite zlt_false by omega]; auto.
+* destruct (zlt i n); auto.
+Qed.
+
+Lemma shru'_zero_ext_0:
+  forall n m x, n <= Int.unsigned m ->
+  shru' (zero_ext n x) m = zero.
+Proof.
+  intros. generalize (Int.unsigned_range m); intros.
+  bit_solve. rewrite bits_shru', bits_zero_ext by omega.
+  destruct (zlt (i + Int.unsigned m) zwordsize); auto.
+  apply zlt_false. omega.
+Qed.
+
+Lemma shr'_sign_ext:
+  forall n m x, 0 < n -> n + Int.unsigned m < zwordsize ->
+  shr' (sign_ext (n + Int.unsigned m) x) m = sign_ext n (shr' x m).
+Proof.
+  intros. generalize (Int.unsigned_range m); intros.
+  apply same_bits_eq; intros.
+  rewrite bits_sign_ext, bits_shr' by auto.
+  rewrite bits_sign_ext, bits_shr'.
+- f_equal.
+  destruct (zlt i n), (zlt (i + Int.unsigned m) zwordsize).
++ apply zlt_true; omega.
++ apply zlt_true; omega.
++ rewrite zlt_false by omega. rewrite zlt_true by omega. omega.
++ rewrite zlt_false by omega. rewrite zlt_true by omega. omega.
+- destruct (zlt i n); omega.
+- destruct (zlt (i + Int.unsigned m) zwordsize); omega.
+Qed.
+
+Lemma zero_ext_shru'_min:
+  forall s x n, Int.ltu n iwordsize' = true ->
+  zero_ext s (shru' x n) = zero_ext (Z.min s (zwordsize - Int.unsigned n)) (shru' x n).
+Proof.
+  intros. apply Int.ltu_inv in H. change (Int.unsigned iwordsize') with zwordsize in H.
+  apply Z.min_case_strong; intros; auto.
+  bit_solve; try omega. rewrite ! bits_shru' by omega. 
+  destruct (zlt i (zwordsize - Int.unsigned n)).
+  rewrite zlt_true by omega. auto.
+  destruct (zlt i s); auto. rewrite zlt_false by omega; auto.
+Qed.
+
+Lemma sign_ext_shr'_min:
+  forall s x n, Int.ltu n iwordsize' = true ->
+  sign_ext s (shr' x n) = sign_ext (Z.min s (zwordsize - Int.unsigned n)) (shr' x n).
+Proof.
+  intros. apply Int.ltu_inv in H. change (Int.unsigned iwordsize') with zwordsize in H.
+  rewrite Z.min_comm. 
+  destruct (Z.min_spec (zwordsize - Int.unsigned n) s) as [[A B] | [A B]]; rewrite B; auto.
+  apply same_bits_eq; intros. rewrite ! bits_sign_ext by auto.
+  destruct (zlt i (zwordsize - Int.unsigned n)). 
+  rewrite zlt_true by omega. auto.
+  assert (C: testbit (shr' x n) (zwordsize - Int.unsigned n - 1) = testbit x (zwordsize - 1)).
+  { rewrite bits_shr' by omega. rewrite zlt_true by omega. f_equal; omega. }
+  rewrite C. destruct (zlt i s); rewrite bits_shr' by omega.
+  rewrite zlt_false by omega. auto.
+  rewrite zlt_false by omega. auto.
+Qed.
+
+Lemma shl'_zero_ext_min:
+  forall s x n, Int.ltu n iwordsize' = true ->
+  shl' (zero_ext s x) n = shl' (zero_ext (Z.min s (zwordsize - Int.unsigned n)) x) n.
+Proof.
+  intros. apply Int.ltu_inv in H. change (Int.unsigned iwordsize') with zwordsize in H.
+  apply Z.min_case_strong; intros; auto.
+  apply same_bits_eq; intros. rewrite ! bits_shl' by auto.
+  destruct (zlt i (Int.unsigned n)); auto.
+  rewrite ! bits_zero_ext by omega.
+  destruct (zlt (i - Int.unsigned n) s).
+  rewrite zlt_true by omega; auto.
+  rewrite zlt_false by omega; auto.
+Qed.
+
+Lemma shl'_sign_ext_min:
+  forall s x n, Int.ltu n iwordsize' = true ->
+  shl' (sign_ext s x) n = shl' (sign_ext (Z.min s (zwordsize - Int.unsigned n)) x) n.
+Proof.
+  intros. apply Int.ltu_inv in H. change (Int.unsigned iwordsize') with zwordsize in H.
+  rewrite Z.min_comm. 
+  destruct (Z.min_spec (zwordsize - Int.unsigned n) s) as [[A B] | [A B]]; rewrite B; auto.
+  apply same_bits_eq; intros. rewrite ! bits_shl' by auto.
+  destruct (zlt i (Int.unsigned n)); auto.
+  rewrite ! bits_sign_ext by omega. f_equal.
+  destruct (zlt (i - Int.unsigned n) s).
+  rewrite zlt_true by omega; auto.
+  omegaContradiction.
+Qed.
+
 (** Powers of two with exponents given as 32-bit ints *)
 
 Definition one_bits' (x: int) : list Int.int :=
@@ -4156,8 +4718,26 @@ End Int64.
 
 Strategy 0 [Wordsize_64.wordsize].
 
+Definition int_eq: forall (i1 i2: int), {i1=i2} + {i1<>i2}.
+Proof.
+  generalize Z.eq_dec. intros.
+  destruct i1. destruct i2. generalize (H intval intval0). intro.
+  inversion H0.
+  - subst. left. assert (intrange = intrange0) by (apply proof_irr). congruence.
+  - right. intro. inversion H2. contradiction.
+Qed.
+
 Notation int64 := Int64.int.
 
+Definition int64_eq: forall (i1 i2: int64), {i1=i2} + {i1<>i2}.
+Proof.
+  generalize Z.eq_dec. intros.
+  destruct i1. destruct i2. generalize (H intval intval0). intro.
+  inversion H0.
+  - subst. left. assert (intrange = intrange0) by (apply proof_irr). congruence.
+  - right. intro. inversion H2. contradiction.
+Qed.
+
 Global Opaque Int.repr Int64.repr Byte.repr.
 
 (** * Specialization to offsets in pointer values *)
@@ -4434,6 +5014,15 @@ Strategy 0 [Wordsize_Ptrofs.wordsize].
 
 Notation ptrofs := Ptrofs.int.
 
+Definition ptrofs_eq: forall (i1 i2: ptrofs), {i1=i2} + {i1<>i2}.
+Proof.
+  generalize Z.eq_dec. intros.
+  destruct i1. destruct i2. generalize (H intval intval0). intro.
+  inversion H0.
+  - subst. left. assert (intrange = intrange0) by (apply proof_irr). congruence.
+  - right. intro. inversion H2. contradiction.
+Qed.
+
 Global Opaque Ptrofs.repr.
 
 Hint Resolve Int.modulus_pos Int.eqm_refl Int.eqm_refl2 Int.eqm_sym Int.eqm_trans
diff --git a/lib/IntvSets.v b/lib/IntvSets.v
index 78c20cc5..b97d9882 100644
--- a/lib/IntvSets.v
+++ b/lib/IntvSets.v
@@ -102,7 +102,7 @@ Proof.
   simpl. rewrite IHok. tauto.
   destruct (zlt h0 l).
   simpl. tauto.
-  rewrite IHok. intuition.
+  rewrite IHok. intuition idtac.
   assert (l0 <= x < h0 \/ l <= x < h) by xomega. tauto.
   left; xomega.
   left; xomega.
diff --git a/lib/Maps.v b/lib/Maps.v
index 9e44a7fe..8de3c892 100644
--- a/lib/Maps.v
+++ b/lib/Maps.v
@@ -116,6 +116,19 @@ Module Type TREE.
     forall (m1: t A) (m2: t B) (i: elt),
     get i (combine f m1 m2) = f (get i m1) (get i m2).
 
+  Parameter combine_null :
+    forall (A B C: Type) (f: A -> B -> option C),
+    t A -> t B -> t C.
+
+  Axiom gcombine_null:
+    forall (A B C: Type) (f: A -> B -> option C),
+    forall (m1: t A) (m2: t B) (i: elt),
+      get i (combine_null f m1 m2) =
+      match (get i m1), (get i m2) with
+      | (Some x1), (Some x2) => f x1 x2
+      | _, _ => None
+      end.
+
   (** Enumerating the bindings of a tree. *)
   Parameter elements:
     forall (A: Type), t A -> list (elt * A).
@@ -151,6 +164,12 @@ Module Type TREE.
     forall (A B: Type) (f: B -> A -> B) (v: B) (m: t A),
     fold1 f m v =
     List.fold_left (fun a p => f a (snd p)) (elements m) v.
+
+  Parameter bempty_canon :
+    forall (A : Type), t A -> bool.
+  Axiom bempty_canon_correct:
+    forall (A : Type) (tr : t A) (i : elt),
+      bempty_canon tr = true -> get i tr = None.
 End TREE.
 
 (** * The abstract signatures of maps *)
@@ -261,6 +280,12 @@ Module PTree <: TREE.
     induction i; simpl; auto.
   Qed.
 
+  Definition bempty_canon (A : Type) (tr : t A) : bool :=
+    match tr with
+    | Leaf => true
+    | _ => false
+    end.
+
   Theorem gss:
     forall (A: Type) (i: positive) (x: A) (m: t A), get i (set i x m) = Some x.
   Proof.
@@ -269,7 +294,16 @@ Module PTree <: TREE.
 
     Lemma gleaf : forall (A : Type) (i : positive), get i (Leaf : t A) = None.
     Proof. exact gempty. Qed.
-
+    
+  Lemma bempty_canon_correct:
+    forall (A : Type) (tr : t A) (i : elt),
+      bempty_canon tr = true -> get i tr = None.
+  Proof.
+    destruct tr; intros.
+    - rewrite gleaf; trivial.
+    - discriminate.
+  Qed.
+  
   Theorem gso:
     forall (A: Type) (i j: positive) (x: A) (m: t A),
     i <> j -> get i (set j x m) = get i m.
@@ -625,7 +659,81 @@ Module PTree <: TREE.
      auto.
   Qed.
 
-    Fixpoint xelements (A : Type) (m : t A) (i : positive)
+  Section COMBINE_NULL.
+
+  Variables A B C: Type.
+  Variable f: A -> B -> option C.
+
+  
+  Fixpoint combine_null (m1: t A) (m2: t B) {struct m1} : t C :=
+    match m1, m2 with
+    | (Node l1 o1 r1), (Node l2 o2 r2) =>
+      Node' (combine_null l1 l2)
+            (match o1, o2 with
+             | (Some x1), (Some x2) => f x1 x2
+             | _, _ => None
+             end)
+            (combine_null r1 r2)
+    | _, _ => Leaf
+    end.
+
+  Theorem gcombine_null:
+      forall (m1: t A) (m2: t B) (i: positive),
+        get i (combine_null m1 m2) =
+        match (get i m1), (get i m2) with
+        | (Some x1), (Some x2) => f x1 x2
+        | _, _ => None
+        end.
+  Proof.
+    induction m1; intros; simpl.
+    - rewrite gleaf. rewrite gleaf.
+      reflexivity.
+    - destruct m2; simpl.
+      + rewrite gleaf. rewrite gleaf.
+        destruct get; reflexivity.
+      + rewrite gnode'.
+        destruct i; simpl; try rewrite IHm1_1; try rewrite IHm1; trivial.
+  Qed.
+
+  End COMBINE_NULL.
+
+  Section REMOVE_TREE.
+
+  Variables A B: Type.
+
+  Fixpoint remove_t (m1: t A) (m2: t B) {struct m1} : t A :=
+    match m1, m2 with
+    | Leaf, _ | _, Leaf => m1
+    | (Node l1 o1 r1), (Node l2 o2 r2) =>
+      Node' (remove_t l1 l2)
+            (match o2 with
+             | Some _ => None
+             | None => o1
+             end)
+            (remove_t r1 r2)
+    end.
+
+  Theorem gremove_t:
+    forall m1 : t A,
+    forall m2 : t B,
+    forall i : positive,
+      get i (remove_t m1 m2) = match get i m2 with
+                               | None => get i m1
+                               | Some _ => None
+                               end.
+  Proof.
+    induction m1; intros; simpl.
+    - rewrite gleaf.
+      destruct get; reflexivity.
+    - destruct m2; simpl.
+      + rewrite gleaf.
+        reflexivity.
+      + rewrite gnode'.
+        destruct i; simpl; try rewrite IHm1_1; try rewrite IHm1; trivial.
+  Qed.      
+  End REMOVE_TREE.
+  
+  Fixpoint xelements (A : Type) (m : t A) (i : positive)
                        (k: list (positive * A)) {struct m}
                        : list (positive * A) :=
       match m with
@@ -958,6 +1066,36 @@ Module PTree <: TREE.
     intros. apply fold1_xelements with (l := @nil (positive * A)).
   Qed.
 
+  Local Open Scope positive.
+  Lemma set_disjoint1:
+    forall (A: Type)(i d : elt) (m: t A)  (x y: A),
+      set (i + d) y (set i x m) = set i x (set (i + d) y m).
+  Proof.
+    induction i; destruct d; destruct m; intro; simpl; trivial;
+      intro; congruence.
+  Qed.
+  
+  Local Open Scope positive.
+  Lemma set_disjoint:
+    forall (A: Type)(i j : elt) (m: t A)  (x y: A),
+      i <> j ->
+      set j y (set i x m) = set i x (set j y m).
+  Proof.
+    intros.
+    destruct (Pos.compare_spec i j) as [Heq | Hlt | Hlt].
+    { congruence. }
+    {
+      rewrite (Pos.lt_iff_add i j) in Hlt.
+      destruct Hlt as [d Hd].
+      subst j.
+      apply set_disjoint1.
+    }
+      rewrite (Pos.lt_iff_add j i) in Hlt.
+      destruct Hlt as [d Hd].
+      subst i.
+      symmetry.
+      apply set_disjoint1.
+  Qed.
 End PTree.
 
 (** * An implementation of maps over type [positive] *)
@@ -1035,6 +1173,15 @@ Module PMap <: MAP.
     intros. unfold set. simpl. decEq. apply PTree.set2.
   Qed.
 
+  Local Open Scope positive.
+  Lemma set_disjoint:
+    forall (A: Type) (i j : elt) (x y: A) (m: t A),
+      i <> j ->
+      set j y (set i x m) = set i x (set j y m).
+  Proof.
+    intros. unfold set. decEq. apply PTree.set_disjoint. assumption.
+  Qed.
+
 End PMap.
 
 (** * An implementation of maps over any type that injects into type [positive] *)
@@ -1102,6 +1249,16 @@ Module IMap(X: INDEXED_TYPE).
     intros. unfold set. apply PMap.set2.
   Qed.
 
+  Lemma set_disjoint:
+    forall (A: Type) (i j : elt) (x y: A) (m: t A),
+      i <> j ->
+      set j y (set i x m) = set i x (set j y m).
+  Proof.
+    intros. unfold set. apply PMap.set_disjoint.
+    intro INEQ.
+    assert (i = j) by (apply X.index_inj; auto).
+    auto.
+  Qed.
 End IMap.
 
 Module ZIndexed.
diff --git a/lib/Ordered.v b/lib/Ordered.v
index bcf24cbd..1adbd330 100644
--- a/lib/Ordered.v
+++ b/lib/Ordered.v
@@ -21,6 +21,8 @@ Require Import Coqlib.
 Require Import Maps.
 Require Import Integers.
 
+Create HintDb ordered_type.
+
 (** The ordered type of positive numbers *)
 
 Module OrderedPositive <: OrderedType.
@@ -173,17 +175,17 @@ Definition eq (x y: t) :=
 
 Lemma eq_refl : forall x : t, eq x x.
 Proof.
-  intros; split; auto.
+  intros; split; auto with ordered_type.
 Qed.
 
 Lemma eq_sym : forall x y : t, eq x y -> eq y x.
 Proof.
-  unfold eq; intros. intuition auto.
+  unfold eq; intros. intuition auto with ordered_type.
 Qed.
 
 Lemma eq_trans : forall x y z : t, eq x y -> eq y z -> eq x z.
 Proof.
-  unfold eq; intros. intuition eauto.
+  unfold eq; intros. intuition eauto with ordered_type.
 Qed.
 
 Definition lt (x y: t) :=
@@ -201,7 +203,7 @@ Proof.
   case (A.compare (fst x) (fst z)); intro.
   assumption.
   generalize (A.lt_not_eq H2); intro. elim H5.
-  apply A.eq_trans with (fst z). auto. auto.
+  apply A.eq_trans with (fst z). auto. auto with ordered_type.
   generalize (@A.lt_not_eq (fst z) (fst y)); intro.
   elim H5. apply A.lt_trans with (fst x); auto.
   apply A.eq_sym; auto.
diff --git a/lib/Zbits.v b/lib/Zbits.v
index dca2a5a2..27586aff 100644
--- a/lib/Zbits.v
+++ b/lib/Zbits.v
@@ -557,7 +557,7 @@ Definition Zzero_ext (n: Z) (x: Z) : Z :=
 Definition Zsign_ext (n: Z) (x: Z) : Z :=
   Z.iter (Z.pred n)
     (fun rec x => Zshiftin (Z.odd x) (rec (Z.div2 x)))
-    (fun x => if Z.odd x then -1 else 0)
+    (fun x => if Z.odd x && zlt 0 n then -1 else 0)
     x.
 
 Lemma Ziter_base:
@@ -606,32 +606,34 @@ Proof.
 Qed.
 
 Lemma Zsign_ext_spec:
-  forall n x i, 0 <= i -> 0 < n ->
+  forall n x i, 0 <= i ->
   Z.testbit (Zsign_ext n x) i = Z.testbit x (if zlt i n then i else n - 1).
 Proof.
-  intros n0 x i I0 N0.
-  revert x i I0. pattern n0. apply Zlt_lower_bound_ind with (z := 1).
-  - unfold Zsign_ext. intros.
-    destruct (zeq x 1).
-    + subst x; simpl.
-      replace (if zlt i 1 then i else 0) with 0.
-      rewrite Ztestbit_base.
-      destruct (Z.odd x0).
-      apply Ztestbit_m1; auto.
-      apply Ztestbit_0.
-      destruct (zlt i 1); omega.
-    + set (x1 := Z.pred x). replace x1 with (Z.succ (Z.pred x1)).
-      rewrite Ziter_succ. rewrite Ztestbit_shiftin.
-      destruct (zeq i 0).
-      * subst i. rewrite zlt_true. rewrite Ztestbit_base; auto. omega.
-      * rewrite H. unfold x1. destruct (zlt (Z.pred i) (Z.pred x)).
-        rewrite zlt_true. rewrite (Ztestbit_eq i x0); auto. rewrite zeq_false; auto. omega.
-        rewrite zlt_false. rewrite (Ztestbit_eq (x - 1) x0). rewrite zeq_false; auto.
-        omega. omega. omega. unfold x1; omega. omega.
-      * omega.
-      * unfold x1; omega.
-      * omega.
-  - omega.
+  intros n0 x i I0. unfold Zsign_ext. 
+  unfold proj_sumbool; destruct (zlt 0 n0) as [N0|N0]; simpl.
+- revert x i I0. pattern n0. apply Zlt_lower_bound_ind with (z := 1); [ | omega ].
+  unfold Zsign_ext. intros.
+  destruct (zeq x 1).
+  + subst x; simpl.
+    replace (if zlt i 1 then i else 0) with 0.
+    rewrite Ztestbit_base.
+    destruct (Z.odd x0); [ apply Ztestbit_m1; auto | apply Ztestbit_0 ].
+    destruct (zlt i 1); omega.
+  + set (x1 := Z.pred x). replace x1 with (Z.succ (Z.pred x1)) by omega.
+    rewrite Ziter_succ by (unfold x1; omega). rewrite Ztestbit_shiftin by auto.
+    destruct (zeq i 0).
+    * subst i. rewrite zlt_true. rewrite Ztestbit_base; auto. omega.
+    * rewrite H by (unfold x1; omega).
+      unfold x1; destruct (zlt (Z.pred i) (Z.pred x)).
+      ** rewrite zlt_true by omega.
+         rewrite (Ztestbit_eq i x0) by omega.
+         rewrite zeq_false by omega. auto.
+      ** rewrite zlt_false by omega.
+         rewrite (Ztestbit_eq (x - 1) x0) by omega.
+         rewrite zeq_false by omega. auto.
+- rewrite Ziter_base by omega. rewrite andb_false_r. 
+  rewrite Z.testbit_0_l, Z.testbit_neg_r. auto.
+  destruct (zlt i n0); omega.
 Qed.
 
 (** [Zzero_ext n x] is [x modulo 2^n] *)
@@ -661,7 +663,7 @@ Qed.
 (** Relation between [Zsign_ext n x] and (Zzero_ext n x] *)
 
 Lemma Zsign_ext_zero_ext:
-  forall n, 0 < n -> forall x,
+  forall n, 0 <= n -> forall x,
   Zsign_ext n x = Zzero_ext n x - (if Z.testbit x (n - 1) then two_p n else 0).
 Proof.
   intros. apply equal_same_bits; intros.
@@ -698,12 +700,12 @@ Proof.
   assert (C: two_p n = 2 * two_p (n - 1)).
   { rewrite <- two_p_S by omega. f_equal; omega. }
   rewrite Zzero_ext_spec, zlt_true in B by omega.
-  rewrite Zsign_ext_zero_ext by auto. rewrite B.
+  rewrite Zsign_ext_zero_ext by omega. rewrite B.
   destruct (zlt (Zzero_ext n x) (two_p (n - 1))); omega.
 Qed.
 
 Lemma eqmod_Zsign_ext:
-  forall n x, 0 < n ->
+  forall n x, 0 <= n ->
   eqmod (two_p n) (Zsign_ext n x) x.
 Proof.
   intros. rewrite Zsign_ext_zero_ext by auto. 
@@ -822,6 +824,14 @@ Proof.
   apply Z.log2_nonneg.
 - reflexivity.
 Qed.
+
+Lemma Z_is_power2_nonneg:
+  forall x i, Z_is_power2 x = Some i -> 0 <= i.
+Proof.
+  unfold Z_is_power2; intros. destruct x; try discriminate.
+  destruct (P_is_power2 p) eqn:P; try discriminate.
+  replace i with (Z.log2 (Z.pos p)) by congruence. apply Z.log2_nonneg.
+Qed.
  
 Lemma Z_is_power2_sound:
   forall x i, Z_is_power2 x = Some i -> x = two_p i /\ i = Z.log2 x.
@@ -857,6 +867,12 @@ Qed.
 
 Definition Z_is_power2m1 (x: Z) : option Z := Z_is_power2 (Z.succ x).
 
+Lemma Z_is_power2m1_nonneg:
+  forall x i, Z_is_power2m1 x = Some i -> 0 <= i.
+Proof.
+  unfold Z_is_power2m1; intros. eapply Z_is_power2_nonneg; eauto.
+Qed.
+
 Lemma Z_is_power2m1_sound:
   forall x i, Z_is_power2m1 x = Some i -> x = two_p i - 1.
 Proof.
@@ -1026,3 +1042,60 @@ Proof.
   exploit (Zsize_interval_1 y). omega.
   omega.
 Qed.
+
+(** ** Bit insertion, bit extraction *)
+
+(** Extract and optionally sign-extend bits [from...from+len-1] of [x] *)
+Definition Zextract_u (x: Z) (from: Z) (len: Z) : Z :=
+  Zzero_ext len (Z.shiftr x from).
+
+Definition Zextract_s (x: Z) (from: Z) (len: Z) : Z :=
+  Zsign_ext len (Z.shiftr x from).
+
+Lemma Zextract_u_spec:
+  forall x from len i,
+  0 <= from -> 0 <= len -> 0 <= i ->
+  Z.testbit (Zextract_u x from len) i =
+  if zlt i len then Z.testbit x (from + i) else false.
+Proof.
+  unfold Zextract_u; intros. rewrite Zzero_ext_spec, Z.shiftr_spec by auto.
+  rewrite Z.add_comm. auto.
+Qed.
+
+Lemma Zextract_s_spec:
+  forall x from len i,
+  0 <= from -> 0 < len -> 0 <= i ->
+  Z.testbit (Zextract_s x from len) i =
+  Z.testbit x (from + (if zlt i len then i else len - 1)).
+Proof.
+  unfold Zextract_s; intros. rewrite Zsign_ext_spec by auto. rewrite Z.shiftr_spec.
+  rewrite Z.add_comm. auto.
+  destruct (zlt i len); omega.
+Qed.
+
+(** Insert bits [0...len-1] of [y] into bits [to...to+len-1] of [x] *)
+
+Definition Zinsert (x y: Z) (to: Z) (len: Z) : Z :=
+  let mask := Z.shiftl (two_p len - 1) to in
+  Z.lor (Z.land (Z.shiftl y to) mask) (Z.ldiff x mask).
+
+Lemma Zinsert_spec:
+  forall x y to len i,
+  0 <= to -> 0 <= len -> 0 <= i ->
+  Z.testbit (Zinsert x y to len) i =
+    if zle to i && zlt i (to + len)
+    then Z.testbit y (i - to)
+    else Z.testbit x i.
+Proof.
+  unfold Zinsert; intros. set (mask := two_p len - 1).
+  assert (M: forall j, 0 <= j -> Z.testbit mask j = if zlt j len then true else false).
+  { intros; apply Ztestbit_two_p_m1; auto. }
+  rewrite Z.lor_spec, Z.land_spec, Z.ldiff_spec by auto. 
+  destruct (zle to i).
+- rewrite ! Z.shiftl_spec by auto. rewrite ! M by omega. 
+  unfold proj_sumbool; destruct (zlt (i - to) len); simpl;
+  rewrite andb_true_r, andb_false_r.
++ rewrite zlt_true by omega. apply orb_false_r.
++ rewrite zlt_false by omega; auto.
+- rewrite ! Z.shiftl_spec_low by omega. simpl. apply andb_true_r.
+Qed.
diff --git a/mppa_k1c/Archi.v b/mppa_k1c/Archi.v
index cdcf58c3..69b32c7c 100644
--- a/mppa_k1c/Archi.v
+++ b/mppa_k1c/Archi.v
@@ -17,7 +17,6 @@
 (** Architecture-dependent parameters for MPPA K1c. Mostly copied from the Risc-V backend *)
 
 Require Import ZArith List.
-(*From Flocq*)
 Require Import Binary Bits.
 
 Definition ptr64 := true.
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index f09aa99c..189e0c76 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -1,753 +1,753 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*           Prashanth Mundkur, SRI International                      *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(*  The contributions by Prashanth Mundkur are reused and adapted      *)
-(*  under the terms of a Contributor License Agreement between         *)
-(*  SRI International and INRIA.                                       *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** * Abstract syntax for K1c textual assembly language.
-
-    Each emittable instruction is defined here. ';;' is also defined as an instruction.
-    The goal of this representation is to stay compatible with the rest of the generic backend of CompCert
-    We define [unfold : list bblock -> list instruction]
-    An Asm function is then defined as : [fn_sig], [fn_blocks], [fn_code], and a proof of [unfold fn_blocks = fn_code]
-    [fn_code] has no semantic. Instead, the semantic of Asm is given by using the AsmVLIW semantic on [fn_blocks] *)
-
-Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import Integers.
-Require Import Floats.
-Require Import Values.
-Require Import ExtValues.
-Require Import Memory.
-Require Import Events.
-Require Import Globalenvs.
-Require Import Smallstep.
-Require Import Locations.
-Require Stacklayout.
-Require Import Conventions.
-Require Import Asmvliw.
-Require Import Linking.
-Require Import Errors.
-
-(** Definitions for OCaml code *)
-Definition label := positive.
-Definition preg := preg.
-
-Inductive addressing : Type :=
-  | AOff (ofs: offset)
-  | AReg (ro: ireg)
-  | ARegXS (ro: ireg)
-.
-
-(** Syntax *)
-Inductive instruction : Type :=
-  (** pseudo instructions *)
-  | Pallocframe (sz: Z) (pos: ptrofs)               (**r allocate new stack frame *)
-  | Pfreeframe  (sz: Z) (pos: ptrofs)               (**r deallocate stack frame and restore previous frame *)
-  | Plabel  (lbl: label)                            (**r define a code label *)
-  | Ploadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the address of a symbol *)
-  | Pbuiltin: external_function -> list (builtin_arg preg)
-              -> builtin_res preg -> instruction   (**r built-in function (pseudo) *)
-  | Psemi                                           (**r semi colon separating bundles *)
-  | Pnop                                            (**r instruction that does nothing *)
-
-  (** Control flow instructions *)
-  | Pget    (rd: ireg) (rs: preg)                   (**r get system register *)
-  | Pset    (rd: preg) (rs: ireg)                   (**r set system register *)
-  | Pret                                            (**r return *)
-  | Pcall   (l: label)                              (**r function call *)
-  | Picall  (rs: ireg)                              (**r function call on register *)
-  (* Pgoto is for tailcalls, Pj_l is for jumping to a particular label *)
-  | Pgoto   (l: label)                              (**r goto *)
-  | Pigoto  (rs: ireg)                              (**r goto from register *)
-  | Pj_l    (l: label)                              (**r jump to label *)
-  | Pcb     (bt: btest) (r: ireg) (l: label)        (**r branch based on btest *)
-  | Pcbu    (bt: btest) (r: ireg) (l: label)        (**r branch based on btest with unsigned semantics *)
-  | Pjumptable (r: ireg) (labels: list label)
-
-  (* For builtins *)
-  | Ploopdo (count: ireg) (loopend: label)
-  | Pgetn   (n: int) (dst: ireg)
-  | Psetn   (n: int) (src: ireg)
-  | Pwfxl   (n: int) (src: ireg)
-  | Pwfxm   (n: int) (src: ireg)
-  | Pldu    (dst: ireg) (addr: ireg)
-  | Plbzu    (dst: ireg) (addr: ireg)
-  | Plhzu    (dst: ireg) (addr: ireg)
-  | Plwzu    (dst: ireg) (addr: ireg)
-  | Pawait
-  | Psleep
-  | Pstop
-  | Pbarrier
-  | Pfence
-  | Pdinval
-  | Pdinvall (addr: ireg)
-  | Pdtouchl (addr: ireg)
-  | Piinval
-  | Piinvals (addr: ireg)
-  | Pitouchl (addr: ireg)
-  | Pdzerol (addr: ireg)
-(*| Pafaddd (addr: ireg) (incr_res: ireg)
-  | Pafaddw (addr: ireg) (incr_res: ireg) *) (* see #157 *)
-  | Palclrd (dst: ireg) (addr: ireg)
-  | Palclrw (dst: ireg) (addr: ireg)
-  | Pclzll (rd rs: ireg)
-  | Pstsud (rd rs1 rs2: ireg)
-            
-  (** Loads **)
-  | Plb     (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load byte *)
-  | Plbu    (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load byte unsigned *)
-  | Plh     (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load half word *)
-  | Plhu    (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load half word unsigned *)
-  | Plw     (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load int32 *)
-  | Plw_a   (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load any32 *)
-  | Pld     (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load int64 *)
-  | Pld_a   (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load any64 *)
-  | Pfls    (rd: freg) (ra: ireg) (ofs: addressing)     (**r load float *)
-  | Pfld    (rd: freg) (ra: ireg) (ofs: addressing)    (**r load 64-bit float *)
-  | Plq     (rs: gpreg_q) (ra: ireg) (ofs: addressing)  (**r load 2*64-bit *)
-  | Plo     (rs: gpreg_o) (ra: ireg) (ofs: addressing)  (**r load 4*64-bit *)
-
-  (** Stores **)
-  | Psb     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store byte *)
-  | Psh     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store half byte *)
-  | Psw     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store int32 *)
-  | Psw_a   (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store any32 *)
-  | Psd     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store int64 *)
-  | Psd_a   (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store any64 *)
-  | Pfss    (rs: freg) (ra: ireg) (ofs: addressing)     (**r store float *)
-  | Pfsd    (rs: freg) (ra: ireg) (ofs: addressing)    (**r store 64-bit float *)
-
-  | Psq     (rs: gpreg_q) (ra: ireg) (ofs: addressing)  (**r store 2*64-bit *)
-  | Pso     (rs: gpreg_o) (ra: ireg) (ofs: addressing)  (**r store 2*64-bit *)
-
-  (** Arith RR *)
-  | Pmv     (rd rs: ireg)                           (**r register move *)
-  | Pnegw   (rd rs: ireg)                           (**r negate word *)
-  | Pnegl   (rd rs: ireg)                           (**r negate long *)
-  | Pcvtl2w (rd rs: ireg)                           (**r Convert Long to Word *)
-  | Psxwd   (rd rs: ireg)                           (**r Sign Extend Word to Double Word *)
-  | Pzxwd   (rd rs: ireg)                           (**r Zero Extend Word to Double Word *)
-
-  | Pextfz  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *)
-  | Pextfs  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *)
-
-  | Pextfzl  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *)
-  | Pextfsl  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *)
-
-  | Pinsf  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *)
-  | Pinsfl  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *)
-
-  | Pfabsd  (rd rs: ireg)                           (**r float absolute double *)
-  | Pfabsw  (rd rs: ireg)                           (**r float absolute word *)
-  | Pfnegd  (rd rs: ireg)                           (**r float negate double *)
-  | Pfnegw  (rd rs: ireg)                           (**r float negate word *)
-  | Pfnarrowdw (rd rs: ireg)                        (**r float narrow 64 -> 32 bits *)
-  | Pfwidenlwd (rd rs: ireg)                        (**r float widen 32 -> 64 bits *)
-  | Pfloatwrnsz (rd rs: ireg)                       (**r Floating Point Conversion from integer (32 -> 32) *)
-  | Pfloatuwrnsz (rd rs: ireg)                      (**r Floating Point Conversion from integer (u32 -> 32) *)
-  | Pfloatudrnsz (rd rs: ireg)                       (**r Floating Point Conversion from unsigned integer (64 bits) *)
-  | Pfloatdrnsz (rd rs: ireg)                       (**r Floating Point Conversion from integer (64 bits) *)
-  | Pfixedwrzz (rd rs: ireg)                        (**r Integer conversion from floating point *)
-  | Pfixeduwrzz (rd rs: ireg)                        (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
-  | Pfixeddrzz (rd rs: ireg)                        (**r Integer conversion from floating point (i64 -> 64 bits) *)
-  | Pfixeddrzz_i32 (rd rs: ireg)                        (**r Integer conversion from floating point (i32 -> f64) *)
-  | Pfixedudrzz (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
-  | Pfixedudrzz_i32 (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *)
-
-  (** Arith RI32 *)
-  | Pmake   (rd: ireg) (imm: int)                   (**r load immediate *)
-
-  (** Arith RI64 *)
-  | Pmakel  (rd: ireg) (imm: int64)                 (**r load immediate long *)
-
-  (** Arith RF32 *)
-  | Pmakefs (rd: ireg) (imm: float32)
-
-  (** Arith RF64 *)
-  | Pmakef  (rd: ireg) (imm: float)
-
-  (** Arith RRR *)
-  | Pcompw  (it: itest) (rd rs1 rs2: ireg)          (**r comparison word *)
-  | Pcompl  (it: itest) (rd rs1 rs2: ireg)          (**r comparison long *)
-  | Pfcompw (ft: ftest) (rd rs1 rs2: ireg)          (**r comparison float *)
-  | Pfcompl (ft: ftest) (rd rs1 rs2: ireg)          (**r comparison float64 *)
-
-  | Paddw               (rd rs1 rs2: ireg)          (**r add word *)
-  | Paddxw (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add word *)
-  | Psubw               (rd rs1 rs2: ireg)          (**r sub word *)
-  | Prevsubxw (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add word *)
-  | Pmulw               (rd rs1 rs2: ireg)          (**r mul word *)
-  | Pandw               (rd rs1 rs2: ireg)          (**r and word *)
-  | Pnandw              (rd rs1 rs2: ireg)          (**r nand word *)
-  | Porw                (rd rs1 rs2: ireg)          (**r or word *)
-  | Pnorw               (rd rs1 rs2: ireg)          (**r nor word *)
-  | Pxorw               (rd rs1 rs2: ireg)          (**r xor word *)
-  | Pnxorw              (rd rs1 rs2: ireg)          (**r xor word *)
-  | Pandnw              (rd rs1 rs2: ireg)          (**r andn word *)
-  | Pornw               (rd rs1 rs2: ireg)          (**r orn word *)
-  | Psraw               (rd rs1 rs2: ireg)          (**r shift right arithmetic word *)
-  | Psrxw               (rd rs1 rs2: ireg)          (**r shift right arithmetic word round to 0*)
-  | Psrlw               (rd rs1 rs2: ireg)          (**r shift right logical word *)
-  | Psllw               (rd rs1 rs2: ireg)          (**r shift left logical word *)
-  | Pmaddw              (rd rs1 rs2: ireg)          (**r multiply-add words *)
-  | Pmsubw              (rd rs1 rs2: ireg)          (**r multiply-add words *)
-  | Pfmaddfw            (rd rs1 rs2: ireg)          (**r float fused multiply-add words *)
-  | Pfmsubfw            (rd rs1 rs2: ireg)          (**r float fused multiply-subtract words *)
-  | Pfmaddfl            (rd rs1 rs2: ireg)          (**r float fused multiply-add longs *)
-  | Pfmsubfl            (rd rs1 rs2: ireg)          (**r float fused multiply-subtract longs *)
-
-  | Paddl               (rd rs1 rs2: ireg)          (**r add long *)
-  | Paddxl (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add long shift *)
-  | Psubl               (rd rs1 rs2: ireg)          (**r sub long *)
-  | Prevsubxl (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r sub long shift *)
-  | Pandl               (rd rs1 rs2: ireg)          (**r and long *)
-  | Pnandl              (rd rs1 rs2: ireg)          (**r nand long *)
-  | Porl                (rd rs1 rs2: ireg)          (**r or long *)
-  | Pnorl               (rd rs1 rs2: ireg)          (**r nor long *)
-  | Pxorl               (rd rs1 rs2: ireg)          (**r xor long *)
-  | Pnxorl              (rd rs1 rs2: ireg)          (**r nxor long *)
-  | Pandnl              (rd rs1 rs2: ireg)          (**r andn long *)
-  | Pornl               (rd rs1 rs2: ireg)          (**r orn long *)
-  | Pmull               (rd rs1 rs2: ireg)          (**r mul long (low part) *)
-  | Pslll               (rd rs1 rs2: ireg)          (**r shift left logical long *)
-  | Psrll               (rd rs1 rs2: ireg)          (**r shift right logical long *)
-  | Psral               (rd rs1 rs2: ireg)          (**r shift right arithmetic long *)
-  | Psrxl               (rd rs1 rs2: ireg)          (**r shift right arithmetic long round to 0*)
-  | Pmaddl              (rd rs1 rs2: ireg)          (**r multiply-add long *)
-  | Pmsubl              (rd rs1 rs2: ireg)          (**r multiply-add long *)
-
-  | Pfaddd              (rd rs1 rs2: ireg)          (**r Float addition double *)
-  | Pfaddw              (rd rs1 rs2: ireg)          (**r Float addition word *)
-  | Pfsbfd              (rd rs1 rs2: ireg)          (**r Float sub double *)
-  | Pfsbfw              (rd rs1 rs2: ireg)          (**r Float sub word *)
-  | Pfmuld              (rd rs1 rs2: ireg)          (**r Float mul double *)
-  | Pfmulw              (rd rs1 rs2: ireg)          (**r Float mul word *)
-  | Pfmind              (rd rs1 rs2: ireg)          (**r Float min double *)
-  | Pfminw              (rd rs1 rs2: ireg)          (**r Float min word *)
-  | Pfmaxd              (rd rs1 rs2: ireg)          (**r Float max double *)
-  | Pfmaxw              (rd rs1 rs2: ireg)          (**r Float max word *)
-  | Pfinvw              (rd rs1: ireg)              (**r Float invert word *)
-                        
-  (** Arith RRI32 *)
-  | Pcompiw (it: itest) (rd rs: ireg) (imm: int)    (**r comparison imm word *)
-
-  | Paddiw              (rd rs: ireg) (imm: int)    (**r add imm word *)
-  | Paddxiw (shift : shift1_4) (rd rs: ireg) (imm: int)    (**r add imm word *)
-  | Prevsubiw              (rd rs: ireg) (imm: int)    (**r subtract imm word *)
-  | Prevsubxiw (shift : shift1_4) (rd rs: ireg) (imm: int)    (**r subtract imm word *)
-  | Pmuliw              (rd rs: ireg) (imm: int)    (**r mul imm word *)
-  | Pandiw              (rd rs: ireg) (imm: int)    (**r and imm word *)
-  | Pnandiw             (rd rs: ireg) (imm: int)    (**r nand imm word *)
-  | Poriw               (rd rs: ireg) (imm: int)    (**r or imm word *)
-  | Pnoriw              (rd rs: ireg) (imm: int)    (**r nor imm word *)
-  | Pxoriw              (rd rs: ireg) (imm: int)    (**r xor imm word *)
-  | Pnxoriw             (rd rs: ireg) (imm: int)    (**r nxor imm word *)
-  | Pandniw             (rd rs: ireg) (imm: int)    (**r andn imm word *)
-  | Porniw              (rd rs: ireg) (imm: int)    (**r orn imm word *)
-  | Psraiw              (rd rs: ireg) (imm: int)    (**r shift right arithmetic imm word *)
-  | Psrxiw              (rd rs: ireg) (imm: int)    (**r shift right arithmetic imm word round to 0*)
-  | Psrliw              (rd rs: ireg) (imm: int)    (**r shift right logical imm word *)
-  | Pslliw              (rd rs: ireg) (imm: int)    (**r shift left logical imm word *)
-  | Proriw              (rd rs: ireg) (imm: int)    (**r rotate right imm word *) 
-  | Pmaddiw             (rd rs: ireg) (imm: int)    (**r multiply add imm word *)
-  | Psllil              (rd rs: ireg) (imm: int)    (**r shift left logical immediate long *)
-  | Psrxil              (rd rs: ireg) (imm: int)    (**r shift right arithmetic imm word round to 0*)
-  | Psrlil              (rd rs: ireg) (imm: int)    (**r shift right logical immediate long *)
-  | Psrail              (rd rs: ireg) (imm: int)    (**r shift right arithmetic immediate long *)
-
-  (** Arith RRI64 *)
-  | Pcompil (it: itest) (rd rs: ireg) (imm: int64)  (**r comparison imm long *)
-  | Paddil              (rd rs: ireg) (imm: int64)  (**r add immediate long *) 
-  | Paddxil (shift : shift1_4) (rd rs: ireg) (imm: int64)  (**r add immediate long *) 
-  | Prevsubil              (rd rs: ireg) (imm: int64)  (**r subtract imm long *)
-  | Prevsubxil (shift : shift1_4) (rd rs: ireg) (imm: int64)  (**r subtract imm long *)
-  | Pmulil              (rd rs: ireg) (imm: int64)  (**r add immediate long *) 
-  | Pandil              (rd rs: ireg) (imm: int64)  (**r and immediate long *) 
-  | Pnandil             (rd rs: ireg) (imm: int64)  (**r and immediate long *) 
-  | Poril               (rd rs: ireg) (imm: int64)  (**r or immediate long *) 
-  | Pnoril              (rd rs: ireg) (imm: int64)  (**r and immediate long *) 
-  | Pxoril              (rd rs: ireg) (imm: int64)  (**r xor immediate long *)
-  | Pnxoril             (rd rs: ireg) (imm: int64)  (**r xor immediate long *)
-  | Pandnil             (rd rs: ireg) (imm: int64)  (**r andn long *)
-  | Pornil              (rd rs: ireg) (imm: int64)  (**r orn long *)
-  | Pmaddil             (rd rs: ireg) (imm: int64)  (**r multiply add imm long *)
-  | Pcmove (bt: btest) (rcond rd rs : ireg) (** conditional move *) 
-  | Pcmoveu (bt: btest) (rcond rd rs : ireg) (** conditional move, unsigned semantics *) 
-  | Pcmoveiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move *) 
-  | Pcmoveuiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move, unsigned semantics *) 
-  | Pcmoveil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move *) 
-  | Pcmoveuil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move, unsigned semantics *) 
-.
-
-(** Correspondance between Asmblock and Asm *)
-
-Definition control_to_instruction (c: control) :=
-  match c with
-  | PExpand (Asmvliw.Pbuiltin ef args res) => Pbuiltin ef args res
-  | PCtlFlow Asmvliw.Pret                  => Pret
-  | PCtlFlow (Asmvliw.Pcall l)             => Pcall l
-  | PCtlFlow (Asmvliw.Picall r)            => Picall r
-  | PCtlFlow (Asmvliw.Pgoto l)             => Pgoto l
-  | PCtlFlow (Asmvliw.Pigoto l)             => Pigoto l
-  | PCtlFlow (Asmvliw.Pj_l l)              => Pj_l l
-  | PCtlFlow (Asmvliw.Pcb bt r l)          => Pcb bt r l
-  | PCtlFlow (Asmvliw.Pcbu bt r l)         => Pcbu bt r l
-  | PCtlFlow (Asmvliw.Pjumptable r label)  => Pjumptable r label
-  end.
-
-Definition basic_to_instruction (b: basic) :=
-  match b with
-  (** Special basics *)
-  | Asmvliw.Pget rd rs         => Pget rd rs
-  | Asmvliw.Pset rd rs         => Pset rd rs
-  | Asmvliw.Pnop               => Pnop
-  | Asmvliw.Pallocframe sz pos => Pallocframe sz pos
-  | Asmvliw.Pfreeframe sz pos  => Pfreeframe sz pos
-
-  (** PArith basics *)
-  (* R *)
-  | PArithR (Asmvliw.Ploadsymbol id ofs) r => Ploadsymbol r id ofs
-
-  (* RR *)
-  | PArithRR Asmvliw.Pmv rd rs     => Pmv rd rs
-  | PArithRR Asmvliw.Pnegw rd rs   => Pnegw rd rs
-  | PArithRR Asmvliw.Pnegl rd rs   => Pnegl rd rs
-  | PArithRR Asmvliw.Pcvtl2w rd rs => Pcvtl2w rd rs
-  | PArithRR Asmvliw.Psxwd rd rs  => Psxwd rd rs
-  | PArithRR Asmvliw.Pzxwd rd rs  => Pzxwd rd rs
-  | PArithRR (Asmvliw.Pextfz stop start) rd rs => Pextfz rd rs stop start
-  | PArithRR (Asmvliw.Pextfs stop start) rd rs => Pextfs rd rs stop start
-  | PArithRR (Asmvliw.Pextfzl stop start) rd rs => Pextfzl rd rs stop start
-  | PArithRR (Asmvliw.Pextfsl stop start) rd rs => Pextfsl rd rs stop start
-  | PArithRR Asmvliw.Pfabsd rd rs => Pfabsd rd rs
-  | PArithRR Asmvliw.Pfabsw rd rs => Pfabsw rd rs
-  | PArithRR Asmvliw.Pfnegd rd rs  => Pfnegd rd rs
-  | PArithRR Asmvliw.Pfnegw rd rs => Pfnegw rd rs
-  | PArithRR Asmvliw.Pfinvw rd rs => Pfinvw rd rs
-  | PArithRR Asmvliw.Pfnarrowdw rd rs => Pfnarrowdw rd rs
-  | PArithRR Asmvliw.Pfwidenlwd rd rs => Pfwidenlwd rd rs
-  | PArithRR Asmvliw.Pfloatuwrnsz rd rs => Pfloatuwrnsz rd rs
-  | PArithRR Asmvliw.Pfloatwrnsz rd rs => Pfloatwrnsz rd rs
-  | PArithRR Asmvliw.Pfloatudrnsz rd rs => Pfloatudrnsz rd rs
-  | PArithRR Asmvliw.Pfloatdrnsz rd rs => Pfloatdrnsz rd rs
-  | PArithRR Asmvliw.Pfixedwrzz rd rs => Pfixedwrzz rd rs
-  | PArithRR Asmvliw.Pfixeduwrzz rd rs => Pfixeduwrzz rd rs
-  | PArithRR Asmvliw.Pfixeddrzz rd rs => Pfixeddrzz rd rs
-  | PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs
-  | PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs
-  | PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs
-
-  (* RI32 *)
-  | PArithRI32 Asmvliw.Pmake rd imm  => Pmake rd imm
-
-  (* RI64 *)
-  | PArithRI64 Asmvliw.Pmakel rd imm => Pmakel rd imm
-
-  (* RF32 *)
-  | PArithRF32 Asmvliw.Pmakefs rd imm => Pmakefs rd imm
-
-  (* RF64 *)
-  | PArithRF64 Asmvliw.Pmakef rd imm => Pmakef rd imm
-
-  (* RRR *)
-  | PArithRRR (Asmvliw.Pcompw it) rd rs1 rs2 => Pcompw it rd rs1 rs2
-  | PArithRRR (Asmvliw.Pcompl it) rd rs1 rs2 => Pcompl it rd rs1 rs2
-  | PArithRRR (Asmvliw.Pfcompw ft) rd rs1 rs2 => Pfcompw ft rd rs1 rs2
-  | PArithRRR (Asmvliw.Pfcompl ft) rd rs1 rs2 => Pfcompl ft rd rs1 rs2
-  | PArithRRR Asmvliw.Paddw rd rs1 rs2       => Paddw rd rs1 rs2
-  | PArithRRR (Asmvliw.Paddxw shift) rd rs1 rs2 => Paddxw shift rd rs1 rs2
-  | PArithRRR Asmvliw.Psubw rd rs1 rs2       => Psubw rd rs1 rs2
-  | PArithRRR (Asmvliw.Prevsubxw shift) rd rs1 rs2 => Prevsubxw shift rd rs1 rs2
-  | PArithRRR Asmvliw.Pmulw rd rs1 rs2       => Pmulw rd rs1 rs2
-  | PArithRRR Asmvliw.Pandw rd rs1 rs2       => Pandw rd rs1 rs2
-  | PArithRRR Asmvliw.Pnandw rd rs1 rs2      => Pnandw rd rs1 rs2
-  | PArithRRR Asmvliw.Porw rd rs1 rs2        => Porw rd rs1 rs2
-  | PArithRRR Asmvliw.Pnorw rd rs1 rs2       => Pnorw rd rs1 rs2
-  | PArithRRR Asmvliw.Pxorw rd rs1 rs2       => Pxorw rd rs1 rs2
-  | PArithRRR Asmvliw.Pnxorw rd rs1 rs2      => Pnxorw rd rs1 rs2
-  | PArithRRR Asmvliw.Pandnw rd rs1 rs2      => Pandnw rd rs1 rs2
-  | PArithRRR Asmvliw.Pornw rd rs1 rs2       => Pornw rd rs1 rs2
-  | PArithRRR Asmvliw.Psraw rd rs1 rs2       => Psraw rd rs1 rs2
-  | PArithRRR Asmvliw.Psrxw rd rs1 rs2       => Psrxw rd rs1 rs2
-  | PArithRRR Asmvliw.Psrlw rd rs1 rs2       => Psrlw rd rs1 rs2
-  | PArithRRR Asmvliw.Psllw rd rs1 rs2       => Psllw rd rs1 rs2
-
-  | PArithRRR Asmvliw.Paddl rd rs1 rs2       => Paddl rd rs1 rs2
-  | PArithRRR (Asmvliw.Paddxl shift) rd rs1 rs2 => Paddxl shift rd rs1 rs2
-  | PArithRRR Asmvliw.Psubl rd rs1 rs2       => Psubl rd rs1 rs2
-  | PArithRRR (Asmvliw.Prevsubxl shift) rd rs1 rs2 => Prevsubxl shift rd rs1 rs2
-  | PArithRRR Asmvliw.Pandl rd rs1 rs2       => Pandl rd rs1 rs2
-  | PArithRRR Asmvliw.Pnandl rd rs1 rs2      => Pnandl rd rs1 rs2
-  | PArithRRR Asmvliw.Porl rd rs1 rs2        => Porl rd rs1 rs2
-  | PArithRRR Asmvliw.Pnorl rd rs1 rs2       => Pnorl rd rs1 rs2
-  | PArithRRR Asmvliw.Pxorl rd rs1 rs2       => Pxorl rd rs1 rs2
-  | PArithRRR Asmvliw.Pnxorl rd rs1 rs2      => Pnxorl rd rs1 rs2
-  | PArithRRR Asmvliw.Pandnl rd rs1 rs2      => Pandnl rd rs1 rs2
-  | PArithRRR Asmvliw.Pornl rd rs1 rs2       => Pornl rd rs1 rs2
-  | PArithRRR Asmvliw.Pmull rd rs1 rs2       => Pmull rd rs1 rs2
-  | PArithRRR Asmvliw.Pslll rd rs1 rs2       => Pslll rd rs1 rs2
-  | PArithRRR Asmvliw.Psrll rd rs1 rs2       => Psrll rd rs1 rs2
-  | PArithRRR Asmvliw.Psral rd rs1 rs2       => Psral rd rs1 rs2
-  | PArithRRR Asmvliw.Psrxl rd rs1 rs2       => Psrxl rd rs1 rs2
-
-  | PArithRRR Asmvliw.Pfaddd rd rs1 rs2      => Pfaddd rd rs1 rs2
-  | PArithRRR Asmvliw.Pfaddw rd rs1 rs2      => Pfaddw rd rs1 rs2
-  | PArithRRR Asmvliw.Pfsbfd rd rs1 rs2      => Pfsbfd rd rs1 rs2
-  | PArithRRR Asmvliw.Pfsbfw rd rs1 rs2      => Pfsbfw rd rs1 rs2
-  | PArithRRR Asmvliw.Pfmuld rd rs1 rs2      => Pfmuld rd rs1 rs2
-  | PArithRRR Asmvliw.Pfmulw rd rs1 rs2      => Pfmulw rd rs1 rs2
-  | PArithRRR Asmvliw.Pfmind rd rs1 rs2      => Pfmind rd rs1 rs2
-  | PArithRRR Asmvliw.Pfminw rd rs1 rs2      => Pfminw rd rs1 rs2
-  | PArithRRR Asmvliw.Pfmaxd rd rs1 rs2      => Pfmaxd rd rs1 rs2
-  | PArithRRR Asmvliw.Pfmaxw rd rs1 rs2      => Pfmaxw rd rs1 rs2
-
-  (* RRI32 *)
-  | PArithRRI32 (Asmvliw.Pcompiw it) rd rs imm => Pcompiw it rd rs imm
-  | PArithRRI32 Asmvliw.Paddiw rd rs imm       => Paddiw rd rs imm
-  | PArithRRI32 (Asmvliw.Paddxiw shift) rd rs imm => Paddxiw shift rd rs imm
-  | PArithRRI32 Asmvliw.Prevsubiw rd rs imm       => Prevsubiw rd rs imm
-  | PArithRRI32 (Asmvliw.Prevsubxiw shift) rd rs imm => Prevsubxiw shift rd rs imm
-  | PArithRRI32 Asmvliw.Pmuliw rd rs imm       => Pmuliw rd rs imm
-  | PArithRRI32 Asmvliw.Pandiw rd rs imm       => Pandiw rd rs imm
-  | PArithRRI32 Asmvliw.Pnandiw rd rs imm      => Pnandiw rd rs imm
-  | PArithRRI32 Asmvliw.Poriw rd rs imm        => Poriw rd rs imm
-  | PArithRRI32 Asmvliw.Pnoriw rd rs imm       => Pnoriw rd rs imm
-  | PArithRRI32 Asmvliw.Pxoriw rd rs imm       => Pxoriw rd rs imm
-  | PArithRRI32 Asmvliw.Pnxoriw rd rs imm      => Pnxoriw rd rs imm
-  | PArithRRI32 Asmvliw.Pandniw rd rs imm      => Pandniw rd rs imm
-  | PArithRRI32 Asmvliw.Porniw rd rs imm       => Porniw rd rs imm
-  | PArithRRI32 Asmvliw.Psraiw rd rs imm       => Psraiw rd rs imm
-  | PArithRRI32 Asmvliw.Psrxiw rd rs imm       => Psrxiw rd rs imm
-  | PArithRRI32 Asmvliw.Psrliw rd rs imm       => Psrliw rd rs imm
-  | PArithRRI32 Asmvliw.Pslliw rd rs imm       => Pslliw rd rs imm
-  | PArithRRI32 Asmvliw.Proriw rd rs imm       => Proriw rd rs imm
-  | PArithRRI32 Asmvliw.Psllil rd rs imm       => Psllil rd rs imm
-  | PArithRRI32 Asmvliw.Psrlil rd rs imm       => Psrlil rd rs imm
-  | PArithRRI32 Asmvliw.Psrxil rd rs imm       => Psrxil rd rs imm
-  | PArithRRI32 Asmvliw.Psrail rd rs imm       => Psrail rd rs imm
-
-  (* RRI64 *)
-  | PArithRRI64 (Asmvliw.Pcompil it) rd rs imm => Pcompil it rd rs imm
-  | PArithRRI64 Asmvliw.Paddil rd rs imm       => Paddil rd rs imm
-  | PArithRRI64 (Asmvliw.Paddxil shift) rd rs imm => Paddxil shift rd rs imm
-  | PArithRRI64 Asmvliw.Prevsubil rd rs imm       => Prevsubil rd rs imm
-  | PArithRRI64 (Asmvliw.Prevsubxil shift) rd rs imm => Prevsubxil shift rd rs imm
-  | PArithRRI64 Asmvliw.Pmulil rd rs imm       => Pmulil rd rs imm
-  | PArithRRI64 Asmvliw.Pandil rd rs imm       => Pandil rd rs imm
-  | PArithRRI64 Asmvliw.Pnandil rd rs imm      => Pnandil rd rs imm
-  | PArithRRI64 Asmvliw.Poril rd rs imm        => Poril rd rs imm
-  | PArithRRI64 Asmvliw.Pnoril rd rs imm       => Pnoril rd rs imm
-  | PArithRRI64 Asmvliw.Pxoril rd rs imm       => Pxoril rd rs imm
-  | PArithRRI64 Asmvliw.Pnxoril rd rs imm      => Pnxoril rd rs imm
-  | PArithRRI64 Asmvliw.Pandnil rd rs imm      => Pandnil rd rs imm
-  | PArithRRI64 Asmvliw.Pornil rd rs imm       => Pornil rd rs imm
-
-  (** ARRR *)
-  | PArithARRR Asmvliw.Pmaddw rd rs1 rs2       => Pmaddw rd rs1 rs2
-  | PArithARRR Asmvliw.Pmaddl rd rs1 rs2       => Pmaddl rd rs1 rs2
-  | PArithARRR Asmvliw.Pmsubw rd rs1 rs2       => Pmsubw rd rs1 rs2
-  | PArithARRR Asmvliw.Pmsubl rd rs1 rs2       => Pmsubl rd rs1 rs2
-  | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2     => Pfmaddfw rd rs1 rs2
-  | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2     => Pfmaddfl rd rs1 rs2
-  | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2     => Pfmsubfw rd rs1 rs2
-  | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2     => Pfmsubfl rd rs1 rs2
-  | PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2
-  | PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2
-
-  (** ARR *)
-  | PArithARR (Asmvliw.Pinsf stop start) rd rs  => Pinsf rd rs stop start
-  | PArithARR (Asmvliw.Pinsfl stop start) rd rs  => Pinsfl rd rs stop start
-
-  (** ARRI32 *)
-  | PArithARRI32 Asmvliw.Pmaddiw rd rs1 imm    => Pmaddiw rd rs1 imm
-  | PArithARRI32 (Asmvliw.Pcmoveiw cond) rd rs1 imm => Pcmoveiw cond rd rs1 imm
-  | PArithARRI32 (Asmvliw.Pcmoveuiw cond) rd rs1 imm => Pcmoveuiw cond rd rs1 imm
-
-  (** ARRI64 *)
-  | PArithARRI64 Asmvliw.Pmaddil rd rs1 imm    => Pmaddil rd rs1 imm
-  | PArithARRI64 (Asmvliw.Pcmoveil cond) rd rs1 imm => Pcmoveil cond rd rs1 imm
-  | PArithARRI64 (Asmvliw.Pcmoveuil cond) rd rs1 imm => Pcmoveuil cond rd rs1 imm                                                          
-  (** Load *)
-  | PLoadRRO Asmvliw.Plb rd ra ofs   => Plb rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Plbu rd ra ofs  => Plbu rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Plh rd ra ofs   => Plh rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Plhu rd ra ofs  => Plhu rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Plw rd ra ofs   => Plw rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Plw_a rd ra ofs => Plw_a rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Pld rd ra ofs   => Pld rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Pld_a rd ra ofs => Pld_a rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Pfls rd ra ofs  => Pfls rd ra (AOff ofs)
-  | PLoadRRO Asmvliw.Pfld rd ra ofs  => Pfld rd ra (AOff ofs)
-
-  | PLoadQRRO qrs ra ofs => Plq qrs ra (AOff ofs)
-  | PLoadORRO qrs ra ofs => Plo qrs ra (AOff ofs)
-
-  | PLoadRRR Asmvliw.Plb rd ra ro   => Plb rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Plbu rd ra ro  => Plbu rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Plh rd ra ro   => Plh rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Plhu rd ra ro  => Plhu rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Plw rd ra ro   => Plw rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Plw_a rd ra ro => Plw_a rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Pld rd ra ro   => Pld rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Pld_a rd ra ro => Pld_a rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Pfls rd ra ro  => Pfls rd ra (AReg ro)
-  | PLoadRRR Asmvliw.Pfld rd ra ro  => Pfld rd ra (AReg ro)
-
-  | PLoadRRRXS Asmvliw.Plb rd ra ro   => Plb rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Plbu rd ra ro  => Plbu rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Plh rd ra ro   => Plh rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Plhu rd ra ro  => Plhu rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Plw rd ra ro   => Plw rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Plw_a rd ra ro => Plw_a rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Pld rd ra ro   => Pld rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Pld_a rd ra ro => Pld_a rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Pfls rd ra ro  => Pfls rd ra (ARegXS ro)
-  | PLoadRRRXS Asmvliw.Pfld rd ra ro  => Pfld rd ra (ARegXS ro)
-
-  (** Store *)
-  | PStoreRRO Asmvliw.Psb rd ra ofs  => Psb rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Psh rd ra ofs => Psh rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Psw rd ra ofs => Psw rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Psw_a rd ra ofs => Psw_a rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Psd rd ra ofs => Psd rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Psd_a rd ra ofs => Psd_a rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Pfss rd ra ofs => Pfss rd ra (AOff ofs)
-  | PStoreRRO Asmvliw.Pfsd rd ra ofs => Pfsd rd ra (AOff ofs)
-
-  | PStoreRRR Asmvliw.Psb rd ra ro  => Psb rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Psh rd ra ro => Psh rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Psw rd ra ro => Psw rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Psw_a rd ra ro => Psw_a rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Psd rd ra ro => Psd rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Psd_a rd ra ro => Psd_a rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Pfss rd ra ro => Pfss rd ra (AReg ro)
-  | PStoreRRR Asmvliw.Pfsd rd ra ro => Pfsd rd ra (AReg ro)
-
-  | PStoreRRRXS Asmvliw.Psb rd ra ro  => Psb rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Psh rd ra ro => Psh rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Psw rd ra ro => Psw rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Psw_a rd ra ro => Psw_a rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Psd rd ra ro => Psd rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Psd_a rd ra ro => Psd_a rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro)
-  | PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro)
-
-  | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs)
-  | PStoreORRO qrs ra ofs => Pso qrs ra (AOff ofs)
-  end.
-
-Section RELSEM.
-
-Definition code := list instruction.
-
-Fixpoint unfold_label (ll: list label) :=
-  match ll with
-  | nil => nil
-  | l :: ll => Plabel l :: unfold_label ll
-  end.
-
-Fixpoint unfold_body (lb: list basic) :=
-  match lb with
-  | nil => nil
-  | b :: lb => basic_to_instruction b :: unfold_body lb
-  end.
-
-Definition unfold_exit (oc: option control) :=
-  match oc with
-  | None => nil
-  | Some c => control_to_instruction c :: nil
-  end.
-
-Definition unfold_bblock (b: bblock) := unfold_label (header b) ++
-  (match (body b), (exit b) with
-   | (((Asmvliw.Pfreeframe _ _ | Asmvliw.Pallocframe _ _)::nil) as bo), None =>
-     unfold_body bo
-   | bo, ex => unfold_body bo ++ unfold_exit ex ++ Psemi :: nil
-  end).
-
-Fixpoint unfold (lb: bblocks) :=
-  match lb with
-  | nil => nil
-  | b :: lb => (unfold_bblock b) ++ unfold lb
-  end.
-
-Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks; fn_code: code; 
-                                       correct: unfold fn_blocks = fn_code }.
-
-Definition fundef := AST.fundef function.
-Definition program := AST.program fundef unit.
-Definition genv := Genv.t fundef unit.
-
-Definition function_proj (f: function) := Asmvliw.mkfunction (fn_sig f) (fn_blocks f).
-
-Definition fundef_proj (fu: fundef) : Asmvliw.fundef := 
-  match fu with
-  | Internal f => Internal (function_proj f)
-  | External ef => External ef
-  end.
-
-Definition globdef_proj (gd: globdef fundef unit) : globdef Asmvliw.fundef unit :=
-  match gd with
-  | Gfun f => Gfun (fundef_proj f)
-  | Gvar gu => Gvar gu
-  end.
-
-Program Definition genv_trans (ge: genv) : Asmvliw.genv :=
-  {|  Genv.genv_public := Genv.genv_public ge;
-      Genv.genv_symb := Genv.genv_symb ge;
-      Genv.genv_defs := PTree.map1 globdef_proj (Genv.genv_defs ge);
-      Genv.genv_next := Genv.genv_next ge |}.
-Next Obligation.
-  destruct ge. simpl in *. eauto.
-Qed. Next Obligation.
-  destruct ge; simpl in *.
-  rewrite PTree.gmap1 in H.
-  destruct (genv_defs ! b) eqn:GEN.
-  - eauto.
-  - discriminate.
-Qed. Next Obligation.
-  destruct ge; simpl in *.
-  eauto.
-Qed.
-
-Fixpoint prog_defs_proj (l: list (ident * globdef fundef unit))
-                          : list (ident * globdef Asmvliw.fundef unit) :=
-  match l with
-  | nil => nil
-  | (i, gd) :: l => (i, globdef_proj gd) :: prog_defs_proj l
-  end.
-
-Definition program_proj (p: program) : Asmvliw.program :=
-  {|  prog_defs := prog_defs_proj (prog_defs p);
-      prog_public := prog_public p;
-      prog_main := prog_main p
-  |}.
-
-End RELSEM.
-
-Definition semantics (p: program) := Asmvliw.semantics (program_proj p).
-
-(** Determinacy of the [Asm] semantics. *)
-
-Lemma semantics_determinate: forall p, determinate (semantics p).
-Proof.
-  intros. apply semantics_determinate.
-Qed.
-
-(** transf_program *)
-
-Program Definition transf_function (f: Asmvliw.function) : function :=
-     {| fn_sig := Asmvliw.fn_sig f; fn_blocks := Asmvliw.fn_blocks f; 
-        fn_code := unfold (Asmvliw.fn_blocks f) |}.
-
-Lemma transf_function_proj: forall f, function_proj (transf_function f) = f.
-Proof.
-  intros f. destruct f as [sig blks]. unfold function_proj. simpl. auto.
-Qed.
-
-Definition transf_fundef : Asmvliw.fundef -> fundef := AST.transf_fundef transf_function.
-
-Lemma transf_fundef_proj: forall f, fundef_proj (transf_fundef f) = f.
-Proof.
-  intros f. destruct f as [f|e]; simpl; auto.
-  rewrite transf_function_proj. auto.
-Qed.
-
-Definition transf_program : Asmvliw.program -> program := transform_program transf_fundef.
-
-Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
-  prog_defs p1 = prog_defs p2 ->
-  prog_public p1 = prog_public p2 ->
-  prog_main p1 = prog_main p2 ->
-  p1 = p2.
-Proof.
-  intros. destruct p1. destruct p2. simpl in *. subst. auto.
-Qed.
-
-Lemma transf_program_proj: forall p, program_proj (transf_program p) = p.
-Proof.
-  intros p. destruct p as [defs pub main]. unfold program_proj. simpl.
-  apply program_equals; simpl; auto.
-  induction defs.
-  - simpl; auto.
-  - simpl. rewrite IHdefs. 
-    destruct a as [id gd]; simpl.
-    destruct gd as [f|v]; simpl; auto.
-    rewrite transf_fundef_proj. auto.
-Qed.
-
-Definition match_prog (p: Asmvliw.program) (tp: program) :=
-  match_program (fun _ f tf => tf = transf_fundef f) eq p tp.
-
-Lemma transf_program_match:
-  forall p tp, transf_program p = tp -> match_prog p tp.
-Proof.
-  intros. rewrite <- H. eapply match_transform_program; eauto.
-Qed.
-
-Lemma cons_extract {A: Type} : forall (l: list A) a b, a = b -> a::l = b::l.
-Proof.
-  intros. congruence.
-Qed.
-
-Lemma match_program_transf:
-  forall p tp, match_prog p tp -> transf_program p = tp.
-Proof.
-  intros p tp H. inversion_clear H. inv H1.
-  destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. simpl in *.
-  subst. unfold transf_program. unfold transform_program. simpl.
-  apply program_equals; simpl; auto.
-  induction H0; simpl; auto.
-  rewrite IHlist_forall2. apply cons_extract.
-  destruct a1 as [ida gda]. destruct b1 as [idb gdb].
-  simpl in *.
-  inv H. inv H2.
-  - simpl in *. subst. auto.
-  - simpl in *. subst. inv H. auto.
-Qed.
-
-Section PRESERVATION.
-
-Variable prog: Asmvliw.program.
-Variable tprog: program.
-Hypothesis TRANSF: match_prog prog tprog.
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Definition match_states (s1 s2: state) := s1 = s2.
-
-Lemma symbols_preserved:
-  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof (Genv.find_symbol_match TRANSF).
-
-Lemma senv_preserved:
-  Senv.equiv ge tge.
-Proof (Genv.senv_match TRANSF).
-
-
-Theorem transf_program_correct:
-  forward_simulation (Asmvliw.semantics prog) (semantics tprog).
-Proof.
-  pose proof (match_program_transf prog tprog TRANSF) as TR.
-  subst. unfold semantics. rewrite transf_program_proj.
-
-  eapply forward_simulation_step with (match_states := match_states); simpl; auto.
-  - intros. exists s1. split; auto. congruence.
-  - intros. inv H. auto.
-  - intros. exists s1'. inv H0. split; auto. congruence.
-Qed.
-
-End PRESERVATION.
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** * Abstract syntax for K1c textual assembly language.
+
+    Each emittable instruction is defined here. ';;' is also defined as an instruction.
+    The goal of this representation is to stay compatible with the rest of the generic backend of CompCert
+    We define [unfold : list bblock -> list instruction]
+    An Asm function is then defined as : [fn_sig], [fn_blocks], [fn_code], and a proof of [unfold fn_blocks = fn_code]
+    [fn_code] has no semantic. Instead, the semantic of Asm is given by using the AsmVLIW semantic on [fn_blocks] *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import ExtValues.
+Require Import Memory.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Locations.
+Require Stacklayout.
+Require Import Conventions.
+Require Import Asmvliw.
+Require Import Linking.
+Require Import Errors.
+
+(** Definitions for OCaml code *)
+Definition label := positive.
+Definition preg := preg.
+
+Inductive addressing : Type :=
+  | AOff (ofs: offset)
+  | AReg (ro: ireg)
+  | ARegXS (ro: ireg)
+.
+
+(** Syntax *)
+Inductive instruction : Type :=
+  (** pseudo instructions *)
+  | Pallocframe (sz: Z) (pos: ptrofs)               (**r allocate new stack frame *)
+  | Pfreeframe  (sz: Z) (pos: ptrofs)               (**r deallocate stack frame and restore previous frame *)
+  | Plabel  (lbl: label)                            (**r define a code label *)
+  | Ploadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the address of a symbol *)
+  | Pbuiltin: external_function -> list (builtin_arg preg)
+              -> builtin_res preg -> instruction   (**r built-in function (pseudo) *)
+  | Psemi                                           (**r semi colon separating bundles *)
+  | Pnop                                            (**r instruction that does nothing *)
+
+  (** Control flow instructions *)
+  | Pget    (rd: ireg) (rs: preg)                   (**r get system register *)
+  | Pset    (rd: preg) (rs: ireg)                   (**r set system register *)
+  | Pret                                            (**r return *)
+  | Pcall   (l: label)                              (**r function call *)
+  | Picall  (rs: ireg)                              (**r function call on register *)
+  (* Pgoto is for tailcalls, Pj_l is for jumping to a particular label *)
+  | Pgoto   (l: label)                              (**r goto *)
+  | Pigoto  (rs: ireg)                              (**r goto from register *)
+  | Pj_l    (l: label)                              (**r jump to label *)
+  | Pcb     (bt: btest) (r: ireg) (l: label)        (**r branch based on btest *)
+  | Pcbu    (bt: btest) (r: ireg) (l: label)        (**r branch based on btest with unsigned semantics *)
+  | Pjumptable (r: ireg) (labels: list label)
+
+  (* For builtins *)
+  | Ploopdo (count: ireg) (loopend: label)
+  | Pgetn   (n: int) (dst: ireg)
+  | Psetn   (n: int) (src: ireg)
+  | Pwfxl   (n: int) (src: ireg)
+  | Pwfxm   (n: int) (src: ireg)
+  | Pldu    (dst: ireg) (addr: ireg)
+  | Plbzu    (dst: ireg) (addr: ireg)
+  | Plhzu    (dst: ireg) (addr: ireg)
+  | Plwzu    (dst: ireg) (addr: ireg)
+  | Pawait
+  | Psleep
+  | Pstop
+  | Pbarrier
+  | Pfence
+  | Pdinval
+  | Pdinvall (addr: ireg)
+  | Pdtouchl (addr: ireg)
+  | Piinval
+  | Piinvals (addr: ireg)
+  | Pitouchl (addr: ireg)
+  | Pdzerol (addr: ireg)
+(*| Pafaddd (addr: ireg) (incr_res: ireg)
+  | Pafaddw (addr: ireg) (incr_res: ireg) *) (* see #157 *)
+  | Palclrd (dst: ireg) (addr: ireg)
+  | Palclrw (dst: ireg) (addr: ireg)
+  | Pclzll (rd rs: ireg)
+  | Pstsud (rd rs1 rs2: ireg)
+            
+  (** Loads **)
+  | Plb     (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load byte *)
+  | Plbu    (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load byte unsigned *)
+  | Plh     (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load half word *)
+  | Plhu    (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load half word unsigned *)
+  | Plw     (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load int32 *)
+  | Plw_a   (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load any32 *)
+  | Pld     (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load int64 *)
+  | Pld_a   (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load any64 *)
+  | Pfls    (trap: trapping_mode) (rd: freg) (ra: ireg) (ofs: addressing)     (**r load float *)
+  | Pfld    (trap: trapping_mode) (rd: freg) (ra: ireg) (ofs: addressing)    (**r load 64-bit float *)
+  | Plq     (rs: gpreg_q) (ra: ireg) (ofs: addressing)  (**r load 2*64-bit *)
+  | Plo     (rs: gpreg_o) (ra: ireg) (ofs: addressing)  (**r load 4*64-bit *)
+
+  (** Stores **)
+  | Psb     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store byte *)
+  | Psh     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store half byte *)
+  | Psw     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store int32 *)
+  | Psw_a   (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store any32 *)
+  | Psd     (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store int64 *)
+  | Psd_a   (rs: ireg) (ra: ireg) (ofs: addressing)     (**r store any64 *)
+  | Pfss    (rs: freg) (ra: ireg) (ofs: addressing)     (**r store float *)
+  | Pfsd    (rs: freg) (ra: ireg) (ofs: addressing)    (**r store 64-bit float *)
+
+  | Psq     (rs: gpreg_q) (ra: ireg) (ofs: addressing)  (**r store 2*64-bit *)
+  | Pso     (rs: gpreg_o) (ra: ireg) (ofs: addressing)  (**r store 2*64-bit *)
+
+  (** Arith RR *)
+  | Pmv     (rd rs: ireg)                           (**r register move *)
+  | Pnegw   (rd rs: ireg)                           (**r negate word *)
+  | Pnegl   (rd rs: ireg)                           (**r negate long *)
+  | Pcvtl2w (rd rs: ireg)                           (**r Convert Long to Word *)
+  | Psxwd   (rd rs: ireg)                           (**r Sign Extend Word to Double Word *)
+  | Pzxwd   (rd rs: ireg)                           (**r Zero Extend Word to Double Word *)
+
+  | Pextfz  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *)
+  | Pextfs  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *)
+
+  | Pextfzl  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *)
+  | Pextfsl  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *)
+
+  | Pinsf  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *)
+  | Pinsfl  (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *)
+
+  | Pfabsd  (rd rs: ireg)                           (**r float absolute double *)
+  | Pfabsw  (rd rs: ireg)                           (**r float absolute word *)
+  | Pfnegd  (rd rs: ireg)                           (**r float negate double *)
+  | Pfnegw  (rd rs: ireg)                           (**r float negate word *)
+  | Pfnarrowdw (rd rs: ireg)                        (**r float narrow 64 -> 32 bits *)
+  | Pfwidenlwd (rd rs: ireg)                        (**r float widen 32 -> 64 bits *)
+  | Pfloatwrnsz (rd rs: ireg)                       (**r Floating Point Conversion from integer (32 -> 32) *)
+  | Pfloatuwrnsz (rd rs: ireg)                      (**r Floating Point Conversion from integer (u32 -> 32) *)
+  | Pfloatudrnsz (rd rs: ireg)                       (**r Floating Point Conversion from unsigned integer (64 bits) *)
+  | Pfloatdrnsz (rd rs: ireg)                       (**r Floating Point Conversion from integer (64 bits) *)
+  | Pfixedwrzz (rd rs: ireg)                        (**r Integer conversion from floating point *)
+  | Pfixeduwrzz (rd rs: ireg)                        (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
+  | Pfixeddrzz (rd rs: ireg)                        (**r Integer conversion from floating point (i64 -> 64 bits) *)
+  | Pfixeddrzz_i32 (rd rs: ireg)                        (**r Integer conversion from floating point (i32 -> f64) *)
+  | Pfixedudrzz (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
+  | Pfixedudrzz_i32 (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *)
+
+  (** Arith RI32 *)
+  | Pmake   (rd: ireg) (imm: int)                   (**r load immediate *)
+
+  (** Arith RI64 *)
+  | Pmakel  (rd: ireg) (imm: int64)                 (**r load immediate long *)
+
+  (** Arith RF32 *)
+  | Pmakefs (rd: ireg) (imm: float32)
+
+  (** Arith RF64 *)
+  | Pmakef  (rd: ireg) (imm: float)
+
+  (** Arith RRR *)
+  | Pcompw  (it: itest) (rd rs1 rs2: ireg)          (**r comparison word *)
+  | Pcompl  (it: itest) (rd rs1 rs2: ireg)          (**r comparison long *)
+  | Pfcompw (ft: ftest) (rd rs1 rs2: ireg)          (**r comparison float *)
+  | Pfcompl (ft: ftest) (rd rs1 rs2: ireg)          (**r comparison float64 *)
+
+  | Paddw               (rd rs1 rs2: ireg)          (**r add word *)
+  | Paddxw (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add word *)
+  | Psubw               (rd rs1 rs2: ireg)          (**r sub word *)
+  | Prevsubxw (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add word *)
+  | Pmulw               (rd rs1 rs2: ireg)          (**r mul word *)
+  | Pandw               (rd rs1 rs2: ireg)          (**r and word *)
+  | Pnandw              (rd rs1 rs2: ireg)          (**r nand word *)
+  | Porw                (rd rs1 rs2: ireg)          (**r or word *)
+  | Pnorw               (rd rs1 rs2: ireg)          (**r nor word *)
+  | Pxorw               (rd rs1 rs2: ireg)          (**r xor word *)
+  | Pnxorw              (rd rs1 rs2: ireg)          (**r xor word *)
+  | Pandnw              (rd rs1 rs2: ireg)          (**r andn word *)
+  | Pornw               (rd rs1 rs2: ireg)          (**r orn word *)
+  | Psraw               (rd rs1 rs2: ireg)          (**r shift right arithmetic word *)
+  | Psrxw               (rd rs1 rs2: ireg)          (**r shift right arithmetic word round to 0*)
+  | Psrlw               (rd rs1 rs2: ireg)          (**r shift right logical word *)
+  | Psllw               (rd rs1 rs2: ireg)          (**r shift left logical word *)
+  | Pmaddw              (rd rs1 rs2: ireg)          (**r multiply-add words *)
+  | Pmsubw              (rd rs1 rs2: ireg)          (**r multiply-add words *)
+  | Pfmaddfw            (rd rs1 rs2: ireg)          (**r float fused multiply-add words *)
+  | Pfmsubfw            (rd rs1 rs2: ireg)          (**r float fused multiply-subtract words *)
+  | Pfmaddfl            (rd rs1 rs2: ireg)          (**r float fused multiply-add longs *)
+  | Pfmsubfl            (rd rs1 rs2: ireg)          (**r float fused multiply-subtract longs *)
+
+  | Paddl               (rd rs1 rs2: ireg)          (**r add long *)
+  | Paddxl (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add long shift *)
+  | Psubl               (rd rs1 rs2: ireg)          (**r sub long *)
+  | Prevsubxl (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r sub long shift *)
+  | Pandl               (rd rs1 rs2: ireg)          (**r and long *)
+  | Pnandl              (rd rs1 rs2: ireg)          (**r nand long *)
+  | Porl                (rd rs1 rs2: ireg)          (**r or long *)
+  | Pnorl               (rd rs1 rs2: ireg)          (**r nor long *)
+  | Pxorl               (rd rs1 rs2: ireg)          (**r xor long *)
+  | Pnxorl              (rd rs1 rs2: ireg)          (**r nxor long *)
+  | Pandnl              (rd rs1 rs2: ireg)          (**r andn long *)
+  | Pornl               (rd rs1 rs2: ireg)          (**r orn long *)
+  | Pmull               (rd rs1 rs2: ireg)          (**r mul long (low part) *)
+  | Pslll               (rd rs1 rs2: ireg)          (**r shift left logical long *)
+  | Psrll               (rd rs1 rs2: ireg)          (**r shift right logical long *)
+  | Psral               (rd rs1 rs2: ireg)          (**r shift right arithmetic long *)
+  | Psrxl               (rd rs1 rs2: ireg)          (**r shift right arithmetic long round to 0*)
+  | Pmaddl              (rd rs1 rs2: ireg)          (**r multiply-add long *)
+  | Pmsubl              (rd rs1 rs2: ireg)          (**r multiply-add long *)
+
+  | Pfaddd              (rd rs1 rs2: ireg)          (**r Float addition double *)
+  | Pfaddw              (rd rs1 rs2: ireg)          (**r Float addition word *)
+  | Pfsbfd              (rd rs1 rs2: ireg)          (**r Float sub double *)
+  | Pfsbfw              (rd rs1 rs2: ireg)          (**r Float sub word *)
+  | Pfmuld              (rd rs1 rs2: ireg)          (**r Float mul double *)
+  | Pfmulw              (rd rs1 rs2: ireg)          (**r Float mul word *)
+  | Pfmind              (rd rs1 rs2: ireg)          (**r Float min double *)
+  | Pfminw              (rd rs1 rs2: ireg)          (**r Float min word *)
+  | Pfmaxd              (rd rs1 rs2: ireg)          (**r Float max double *)
+  | Pfmaxw              (rd rs1 rs2: ireg)          (**r Float max word *)
+  | Pfinvw              (rd rs1: ireg)              (**r Float invert word *)
+                        
+  (** Arith RRI32 *)
+  | Pcompiw (it: itest) (rd rs: ireg) (imm: int)    (**r comparison imm word *)
+
+  | Paddiw              (rd rs: ireg) (imm: int)    (**r add imm word *)
+  | Paddxiw (shift : shift1_4) (rd rs: ireg) (imm: int)    (**r add imm word *)
+  | Prevsubiw              (rd rs: ireg) (imm: int)    (**r subtract imm word *)
+  | Prevsubxiw (shift : shift1_4) (rd rs: ireg) (imm: int)    (**r subtract imm word *)
+  | Pmuliw              (rd rs: ireg) (imm: int)    (**r mul imm word *)
+  | Pandiw              (rd rs: ireg) (imm: int)    (**r and imm word *)
+  | Pnandiw             (rd rs: ireg) (imm: int)    (**r nand imm word *)
+  | Poriw               (rd rs: ireg) (imm: int)    (**r or imm word *)
+  | Pnoriw              (rd rs: ireg) (imm: int)    (**r nor imm word *)
+  | Pxoriw              (rd rs: ireg) (imm: int)    (**r xor imm word *)
+  | Pnxoriw             (rd rs: ireg) (imm: int)    (**r nxor imm word *)
+  | Pandniw             (rd rs: ireg) (imm: int)    (**r andn imm word *)
+  | Porniw              (rd rs: ireg) (imm: int)    (**r orn imm word *)
+  | Psraiw              (rd rs: ireg) (imm: int)    (**r shift right arithmetic imm word *)
+  | Psrxiw              (rd rs: ireg) (imm: int)    (**r shift right arithmetic imm word round to 0*)
+  | Psrliw              (rd rs: ireg) (imm: int)    (**r shift right logical imm word *)
+  | Pslliw              (rd rs: ireg) (imm: int)    (**r shift left logical imm word *)
+  | Proriw              (rd rs: ireg) (imm: int)    (**r rotate right imm word *) 
+  | Pmaddiw             (rd rs: ireg) (imm: int)    (**r multiply add imm word *)
+  | Psllil              (rd rs: ireg) (imm: int)    (**r shift left logical immediate long *)
+  | Psrxil              (rd rs: ireg) (imm: int)    (**r shift right arithmetic imm word round to 0*)
+  | Psrlil              (rd rs: ireg) (imm: int)    (**r shift right logical immediate long *)
+  | Psrail              (rd rs: ireg) (imm: int)    (**r shift right arithmetic immediate long *)
+
+  (** Arith RRI64 *)
+  | Pcompil (it: itest) (rd rs: ireg) (imm: int64)  (**r comparison imm long *)
+  | Paddil              (rd rs: ireg) (imm: int64)  (**r add immediate long *) 
+  | Paddxil (shift : shift1_4) (rd rs: ireg) (imm: int64)  (**r add immediate long *) 
+  | Prevsubil              (rd rs: ireg) (imm: int64)  (**r subtract imm long *)
+  | Prevsubxil (shift : shift1_4) (rd rs: ireg) (imm: int64)  (**r subtract imm long *)
+  | Pmulil              (rd rs: ireg) (imm: int64)  (**r add immediate long *) 
+  | Pandil              (rd rs: ireg) (imm: int64)  (**r and immediate long *) 
+  | Pnandil             (rd rs: ireg) (imm: int64)  (**r and immediate long *) 
+  | Poril               (rd rs: ireg) (imm: int64)  (**r or immediate long *) 
+  | Pnoril              (rd rs: ireg) (imm: int64)  (**r and immediate long *) 
+  | Pxoril              (rd rs: ireg) (imm: int64)  (**r xor immediate long *)
+  | Pnxoril             (rd rs: ireg) (imm: int64)  (**r xor immediate long *)
+  | Pandnil             (rd rs: ireg) (imm: int64)  (**r andn long *)
+  | Pornil              (rd rs: ireg) (imm: int64)  (**r orn long *)
+  | Pmaddil             (rd rs: ireg) (imm: int64)  (**r multiply add imm long *)
+  | Pcmove (bt: btest) (rcond rd rs : ireg) (** conditional move *) 
+  | Pcmoveu (bt: btest) (rcond rd rs : ireg) (** conditional move, unsigned semantics *) 
+  | Pcmoveiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move *) 
+  | Pcmoveuiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move, unsigned semantics *) 
+  | Pcmoveil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move *) 
+  | Pcmoveuil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move, unsigned semantics *) 
+.
+
+(** Correspondance between Asmblock and Asm *)
+
+Definition control_to_instruction (c: control) :=
+  match c with
+  | PExpand (Asmvliw.Pbuiltin ef args res) => Pbuiltin ef args res
+  | PCtlFlow Asmvliw.Pret                  => Pret
+  | PCtlFlow (Asmvliw.Pcall l)             => Pcall l
+  | PCtlFlow (Asmvliw.Picall r)            => Picall r
+  | PCtlFlow (Asmvliw.Pgoto l)             => Pgoto l
+  | PCtlFlow (Asmvliw.Pigoto l)             => Pigoto l
+  | PCtlFlow (Asmvliw.Pj_l l)              => Pj_l l
+  | PCtlFlow (Asmvliw.Pcb bt r l)          => Pcb bt r l
+  | PCtlFlow (Asmvliw.Pcbu bt r l)         => Pcbu bt r l
+  | PCtlFlow (Asmvliw.Pjumptable r label)  => Pjumptable r label
+  end.
+
+Definition basic_to_instruction (b: basic) :=
+  match b with
+  (** Special basics *)
+  | Asmvliw.Pget rd rs         => Pget rd rs
+  | Asmvliw.Pset rd rs         => Pset rd rs
+  | Asmvliw.Pnop               => Pnop
+  | Asmvliw.Pallocframe sz pos => Pallocframe sz pos
+  | Asmvliw.Pfreeframe sz pos  => Pfreeframe sz pos
+
+  (** PArith basics *)
+  (* R *)
+  | PArithR (Asmvliw.Ploadsymbol id ofs) r => Ploadsymbol r id ofs
+
+  (* RR *)
+  | PArithRR Asmvliw.Pmv rd rs     => Pmv rd rs
+  | PArithRR Asmvliw.Pnegw rd rs   => Pnegw rd rs
+  | PArithRR Asmvliw.Pnegl rd rs   => Pnegl rd rs
+  | PArithRR Asmvliw.Pcvtl2w rd rs => Pcvtl2w rd rs
+  | PArithRR Asmvliw.Psxwd rd rs  => Psxwd rd rs
+  | PArithRR Asmvliw.Pzxwd rd rs  => Pzxwd rd rs
+  | PArithRR (Asmvliw.Pextfz stop start) rd rs => Pextfz rd rs stop start
+  | PArithRR (Asmvliw.Pextfs stop start) rd rs => Pextfs rd rs stop start
+  | PArithRR (Asmvliw.Pextfzl stop start) rd rs => Pextfzl rd rs stop start
+  | PArithRR (Asmvliw.Pextfsl stop start) rd rs => Pextfsl rd rs stop start
+  | PArithRR Asmvliw.Pfabsd rd rs => Pfabsd rd rs
+  | PArithRR Asmvliw.Pfabsw rd rs => Pfabsw rd rs
+  | PArithRR Asmvliw.Pfnegd rd rs  => Pfnegd rd rs
+  | PArithRR Asmvliw.Pfnegw rd rs => Pfnegw rd rs
+  | PArithRR Asmvliw.Pfinvw rd rs => Pfinvw rd rs
+  | PArithRR Asmvliw.Pfnarrowdw rd rs => Pfnarrowdw rd rs
+  | PArithRR Asmvliw.Pfwidenlwd rd rs => Pfwidenlwd rd rs
+  | PArithRR Asmvliw.Pfloatuwrnsz rd rs => Pfloatuwrnsz rd rs
+  | PArithRR Asmvliw.Pfloatwrnsz rd rs => Pfloatwrnsz rd rs
+  | PArithRR Asmvliw.Pfloatudrnsz rd rs => Pfloatudrnsz rd rs
+  | PArithRR Asmvliw.Pfloatdrnsz rd rs => Pfloatdrnsz rd rs
+  | PArithRR Asmvliw.Pfixedwrzz rd rs => Pfixedwrzz rd rs
+  | PArithRR Asmvliw.Pfixeduwrzz rd rs => Pfixeduwrzz rd rs
+  | PArithRR Asmvliw.Pfixeddrzz rd rs => Pfixeddrzz rd rs
+  | PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs
+  | PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs
+  | PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs
+
+  (* RI32 *)
+  | PArithRI32 Asmvliw.Pmake rd imm  => Pmake rd imm
+
+  (* RI64 *)
+  | PArithRI64 Asmvliw.Pmakel rd imm => Pmakel rd imm
+
+  (* RF32 *)
+  | PArithRF32 Asmvliw.Pmakefs rd imm => Pmakefs rd imm
+
+  (* RF64 *)
+  | PArithRF64 Asmvliw.Pmakef rd imm => Pmakef rd imm
+
+  (* RRR *)
+  | PArithRRR (Asmvliw.Pcompw it) rd rs1 rs2 => Pcompw it rd rs1 rs2
+  | PArithRRR (Asmvliw.Pcompl it) rd rs1 rs2 => Pcompl it rd rs1 rs2
+  | PArithRRR (Asmvliw.Pfcompw ft) rd rs1 rs2 => Pfcompw ft rd rs1 rs2
+  | PArithRRR (Asmvliw.Pfcompl ft) rd rs1 rs2 => Pfcompl ft rd rs1 rs2
+  | PArithRRR Asmvliw.Paddw rd rs1 rs2       => Paddw rd rs1 rs2
+  | PArithRRR (Asmvliw.Paddxw shift) rd rs1 rs2 => Paddxw shift rd rs1 rs2
+  | PArithRRR Asmvliw.Psubw rd rs1 rs2       => Psubw rd rs1 rs2
+  | PArithRRR (Asmvliw.Prevsubxw shift) rd rs1 rs2 => Prevsubxw shift rd rs1 rs2
+  | PArithRRR Asmvliw.Pmulw rd rs1 rs2       => Pmulw rd rs1 rs2
+  | PArithRRR Asmvliw.Pandw rd rs1 rs2       => Pandw rd rs1 rs2
+  | PArithRRR Asmvliw.Pnandw rd rs1 rs2      => Pnandw rd rs1 rs2
+  | PArithRRR Asmvliw.Porw rd rs1 rs2        => Porw rd rs1 rs2
+  | PArithRRR Asmvliw.Pnorw rd rs1 rs2       => Pnorw rd rs1 rs2
+  | PArithRRR Asmvliw.Pxorw rd rs1 rs2       => Pxorw rd rs1 rs2
+  | PArithRRR Asmvliw.Pnxorw rd rs1 rs2      => Pnxorw rd rs1 rs2
+  | PArithRRR Asmvliw.Pandnw rd rs1 rs2      => Pandnw rd rs1 rs2
+  | PArithRRR Asmvliw.Pornw rd rs1 rs2       => Pornw rd rs1 rs2
+  | PArithRRR Asmvliw.Psraw rd rs1 rs2       => Psraw rd rs1 rs2
+  | PArithRRR Asmvliw.Psrxw rd rs1 rs2       => Psrxw rd rs1 rs2
+  | PArithRRR Asmvliw.Psrlw rd rs1 rs2       => Psrlw rd rs1 rs2
+  | PArithRRR Asmvliw.Psllw rd rs1 rs2       => Psllw rd rs1 rs2
+
+  | PArithRRR Asmvliw.Paddl rd rs1 rs2       => Paddl rd rs1 rs2
+  | PArithRRR (Asmvliw.Paddxl shift) rd rs1 rs2 => Paddxl shift rd rs1 rs2
+  | PArithRRR Asmvliw.Psubl rd rs1 rs2       => Psubl rd rs1 rs2
+  | PArithRRR (Asmvliw.Prevsubxl shift) rd rs1 rs2 => Prevsubxl shift rd rs1 rs2
+  | PArithRRR Asmvliw.Pandl rd rs1 rs2       => Pandl rd rs1 rs2
+  | PArithRRR Asmvliw.Pnandl rd rs1 rs2      => Pnandl rd rs1 rs2
+  | PArithRRR Asmvliw.Porl rd rs1 rs2        => Porl rd rs1 rs2
+  | PArithRRR Asmvliw.Pnorl rd rs1 rs2       => Pnorl rd rs1 rs2
+  | PArithRRR Asmvliw.Pxorl rd rs1 rs2       => Pxorl rd rs1 rs2
+  | PArithRRR Asmvliw.Pnxorl rd rs1 rs2      => Pnxorl rd rs1 rs2
+  | PArithRRR Asmvliw.Pandnl rd rs1 rs2      => Pandnl rd rs1 rs2
+  | PArithRRR Asmvliw.Pornl rd rs1 rs2       => Pornl rd rs1 rs2
+  | PArithRRR Asmvliw.Pmull rd rs1 rs2       => Pmull rd rs1 rs2
+  | PArithRRR Asmvliw.Pslll rd rs1 rs2       => Pslll rd rs1 rs2
+  | PArithRRR Asmvliw.Psrll rd rs1 rs2       => Psrll rd rs1 rs2
+  | PArithRRR Asmvliw.Psral rd rs1 rs2       => Psral rd rs1 rs2
+  | PArithRRR Asmvliw.Psrxl rd rs1 rs2       => Psrxl rd rs1 rs2
+
+  | PArithRRR Asmvliw.Pfaddd rd rs1 rs2      => Pfaddd rd rs1 rs2
+  | PArithRRR Asmvliw.Pfaddw rd rs1 rs2      => Pfaddw rd rs1 rs2
+  | PArithRRR Asmvliw.Pfsbfd rd rs1 rs2      => Pfsbfd rd rs1 rs2
+  | PArithRRR Asmvliw.Pfsbfw rd rs1 rs2      => Pfsbfw rd rs1 rs2
+  | PArithRRR Asmvliw.Pfmuld rd rs1 rs2      => Pfmuld rd rs1 rs2
+  | PArithRRR Asmvliw.Pfmulw rd rs1 rs2      => Pfmulw rd rs1 rs2
+  | PArithRRR Asmvliw.Pfmind rd rs1 rs2      => Pfmind rd rs1 rs2
+  | PArithRRR Asmvliw.Pfminw rd rs1 rs2      => Pfminw rd rs1 rs2
+  | PArithRRR Asmvliw.Pfmaxd rd rs1 rs2      => Pfmaxd rd rs1 rs2
+  | PArithRRR Asmvliw.Pfmaxw rd rs1 rs2      => Pfmaxw rd rs1 rs2
+
+  (* RRI32 *)
+  | PArithRRI32 (Asmvliw.Pcompiw it) rd rs imm => Pcompiw it rd rs imm
+  | PArithRRI32 Asmvliw.Paddiw rd rs imm       => Paddiw rd rs imm
+  | PArithRRI32 (Asmvliw.Paddxiw shift) rd rs imm => Paddxiw shift rd rs imm
+  | PArithRRI32 Asmvliw.Prevsubiw rd rs imm       => Prevsubiw rd rs imm
+  | PArithRRI32 (Asmvliw.Prevsubxiw shift) rd rs imm => Prevsubxiw shift rd rs imm
+  | PArithRRI32 Asmvliw.Pmuliw rd rs imm       => Pmuliw rd rs imm
+  | PArithRRI32 Asmvliw.Pandiw rd rs imm       => Pandiw rd rs imm
+  | PArithRRI32 Asmvliw.Pnandiw rd rs imm      => Pnandiw rd rs imm
+  | PArithRRI32 Asmvliw.Poriw rd rs imm        => Poriw rd rs imm
+  | PArithRRI32 Asmvliw.Pnoriw rd rs imm       => Pnoriw rd rs imm
+  | PArithRRI32 Asmvliw.Pxoriw rd rs imm       => Pxoriw rd rs imm
+  | PArithRRI32 Asmvliw.Pnxoriw rd rs imm      => Pnxoriw rd rs imm
+  | PArithRRI32 Asmvliw.Pandniw rd rs imm      => Pandniw rd rs imm
+  | PArithRRI32 Asmvliw.Porniw rd rs imm       => Porniw rd rs imm
+  | PArithRRI32 Asmvliw.Psraiw rd rs imm       => Psraiw rd rs imm
+  | PArithRRI32 Asmvliw.Psrxiw rd rs imm       => Psrxiw rd rs imm
+  | PArithRRI32 Asmvliw.Psrliw rd rs imm       => Psrliw rd rs imm
+  | PArithRRI32 Asmvliw.Pslliw rd rs imm       => Pslliw rd rs imm
+  | PArithRRI32 Asmvliw.Proriw rd rs imm       => Proriw rd rs imm
+  | PArithRRI32 Asmvliw.Psllil rd rs imm       => Psllil rd rs imm
+  | PArithRRI32 Asmvliw.Psrlil rd rs imm       => Psrlil rd rs imm
+  | PArithRRI32 Asmvliw.Psrxil rd rs imm       => Psrxil rd rs imm
+  | PArithRRI32 Asmvliw.Psrail rd rs imm       => Psrail rd rs imm
+
+  (* RRI64 *)
+  | PArithRRI64 (Asmvliw.Pcompil it) rd rs imm => Pcompil it rd rs imm
+  | PArithRRI64 Asmvliw.Paddil rd rs imm       => Paddil rd rs imm
+  | PArithRRI64 (Asmvliw.Paddxil shift) rd rs imm => Paddxil shift rd rs imm
+  | PArithRRI64 Asmvliw.Prevsubil rd rs imm       => Prevsubil rd rs imm
+  | PArithRRI64 (Asmvliw.Prevsubxil shift) rd rs imm => Prevsubxil shift rd rs imm
+  | PArithRRI64 Asmvliw.Pmulil rd rs imm       => Pmulil rd rs imm
+  | PArithRRI64 Asmvliw.Pandil rd rs imm       => Pandil rd rs imm
+  | PArithRRI64 Asmvliw.Pnandil rd rs imm      => Pnandil rd rs imm
+  | PArithRRI64 Asmvliw.Poril rd rs imm        => Poril rd rs imm
+  | PArithRRI64 Asmvliw.Pnoril rd rs imm       => Pnoril rd rs imm
+  | PArithRRI64 Asmvliw.Pxoril rd rs imm       => Pxoril rd rs imm
+  | PArithRRI64 Asmvliw.Pnxoril rd rs imm      => Pnxoril rd rs imm
+  | PArithRRI64 Asmvliw.Pandnil rd rs imm      => Pandnil rd rs imm
+  | PArithRRI64 Asmvliw.Pornil rd rs imm       => Pornil rd rs imm
+
+  (** ARRR *)
+  | PArithARRR Asmvliw.Pmaddw rd rs1 rs2       => Pmaddw rd rs1 rs2
+  | PArithARRR Asmvliw.Pmaddl rd rs1 rs2       => Pmaddl rd rs1 rs2
+  | PArithARRR Asmvliw.Pmsubw rd rs1 rs2       => Pmsubw rd rs1 rs2
+  | PArithARRR Asmvliw.Pmsubl rd rs1 rs2       => Pmsubl rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2     => Pfmaddfw rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2     => Pfmaddfl rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2     => Pfmsubfw rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2     => Pfmsubfl rd rs1 rs2
+  | PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2
+  | PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2
+
+  (** ARR *)
+  | PArithARR (Asmvliw.Pinsf stop start) rd rs  => Pinsf rd rs stop start
+  | PArithARR (Asmvliw.Pinsfl stop start) rd rs  => Pinsfl rd rs stop start
+
+  (** ARRI32 *)
+  | PArithARRI32 Asmvliw.Pmaddiw rd rs1 imm    => Pmaddiw rd rs1 imm
+  | PArithARRI32 (Asmvliw.Pcmoveiw cond) rd rs1 imm => Pcmoveiw cond rd rs1 imm
+  | PArithARRI32 (Asmvliw.Pcmoveuiw cond) rd rs1 imm => Pcmoveuiw cond rd rs1 imm
+
+  (** ARRI64 *)
+  | PArithARRI64 Asmvliw.Pmaddil rd rs1 imm    => Pmaddil rd rs1 imm
+  | PArithARRI64 (Asmvliw.Pcmoveil cond) rd rs1 imm => Pcmoveil cond rd rs1 imm
+  | PArithARRI64 (Asmvliw.Pcmoveuil cond) rd rs1 imm => Pcmoveuil cond rd rs1 imm                                                          
+  (** Load *)
+  | PLoadRRO trap Asmvliw.Plb rd ra ofs   => Plb trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Plbu rd ra ofs  => Plbu trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Plh rd ra ofs   => Plh trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Plhu rd ra ofs  => Plhu trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Plw rd ra ofs   => Plw trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Plw_a rd ra ofs => Plw_a trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Pld rd ra ofs   => Pld trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Pld_a rd ra ofs => Pld_a trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Pfls rd ra ofs  => Pfls trap rd ra (AOff ofs)
+  | PLoadRRO trap Asmvliw.Pfld rd ra ofs  => Pfld trap rd ra (AOff ofs)
+
+  | PLoadQRRO qrs ra ofs => Plq qrs ra (AOff ofs)
+  | PLoadORRO qrs ra ofs => Plo qrs ra (AOff ofs)
+
+  | PLoadRRR trap Asmvliw.Plb rd ra ro   => Plb trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Plbu rd ra ro  => Plbu trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Plh rd ra ro   => Plh trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Plhu rd ra ro  => Plhu trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Plw rd ra ro   => Plw trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Plw_a rd ra ro => Plw_a trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Pld rd ra ro   => Pld trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Pld_a rd ra ro => Pld_a trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Pfls rd ra ro  => Pfls trap rd ra (AReg ro)
+  | PLoadRRR trap Asmvliw.Pfld rd ra ro  => Pfld trap rd ra (AReg ro)
+
+  | PLoadRRRXS trap Asmvliw.Plb rd ra ro   => Plb trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Plbu rd ra ro  => Plbu trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Plh rd ra ro   => Plh trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Plhu rd ra ro  => Plhu trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Plw rd ra ro   => Plw trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Plw_a rd ra ro => Plw_a trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Pld rd ra ro   => Pld trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Pld_a rd ra ro => Pld_a trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Pfls rd ra ro  => Pfls trap rd ra (ARegXS ro)
+  | PLoadRRRXS trap Asmvliw.Pfld rd ra ro  => Pfld trap rd ra (ARegXS ro)
+
+  (** Store *)
+  | PStoreRRO Asmvliw.Psb rd ra ofs  => Psb rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Psh rd ra ofs => Psh rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Psw rd ra ofs => Psw rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Psw_a rd ra ofs => Psw_a rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Psd rd ra ofs => Psd rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Psd_a rd ra ofs => Psd_a rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Pfss rd ra ofs => Pfss rd ra (AOff ofs)
+  | PStoreRRO Asmvliw.Pfsd rd ra ofs => Pfsd rd ra (AOff ofs)
+
+  | PStoreRRR Asmvliw.Psb rd ra ro  => Psb rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Psh rd ra ro => Psh rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Psw rd ra ro => Psw rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Psw_a rd ra ro => Psw_a rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Psd rd ra ro => Psd rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Psd_a rd ra ro => Psd_a rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Pfss rd ra ro => Pfss rd ra (AReg ro)
+  | PStoreRRR Asmvliw.Pfsd rd ra ro => Pfsd rd ra (AReg ro)
+
+  | PStoreRRRXS Asmvliw.Psb rd ra ro  => Psb rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Psh rd ra ro => Psh rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Psw rd ra ro => Psw rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Psw_a rd ra ro => Psw_a rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Psd rd ra ro => Psd rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Psd_a rd ra ro => Psd_a rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro)
+  | PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro)
+
+  | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs)
+  | PStoreORRO qrs ra ofs => Pso qrs ra (AOff ofs)
+  end.
+
+Section RELSEM.
+
+Definition code := list instruction.
+
+Fixpoint unfold_label (ll: list label) :=
+  match ll with
+  | nil => nil
+  | l :: ll => Plabel l :: unfold_label ll
+  end.
+
+Fixpoint unfold_body (lb: list basic) :=
+  match lb with
+  | nil => nil
+  | b :: lb => basic_to_instruction b :: unfold_body lb
+  end.
+
+Definition unfold_exit (oc: option control) :=
+  match oc with
+  | None => nil
+  | Some c => control_to_instruction c :: nil
+  end.
+
+Definition unfold_bblock (b: bblock) := unfold_label (header b) ++
+  (match (body b), (exit b) with
+   | (((Asmvliw.Pfreeframe _ _ | Asmvliw.Pallocframe _ _)::nil) as bo), None =>
+     unfold_body bo
+   | bo, ex => unfold_body bo ++ unfold_exit ex ++ Psemi :: nil
+  end).
+
+Fixpoint unfold (lb: bblocks) :=
+  match lb with
+  | nil => nil
+  | b :: lb => (unfold_bblock b) ++ unfold lb
+  end.
+
+Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks; fn_code: code; 
+                                       correct: unfold fn_blocks = fn_code }.
+
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+Definition genv := Genv.t fundef unit.
+
+Definition function_proj (f: function) := Asmvliw.mkfunction (fn_sig f) (fn_blocks f).
+
+Definition fundef_proj (fu: fundef) : Asmvliw.fundef := 
+  match fu with
+  | Internal f => Internal (function_proj f)
+  | External ef => External ef
+  end.
+
+Definition globdef_proj (gd: globdef fundef unit) : globdef Asmvliw.fundef unit :=
+  match gd with
+  | Gfun f => Gfun (fundef_proj f)
+  | Gvar gu => Gvar gu
+  end.
+
+Program Definition genv_trans (ge: genv) : Asmvliw.genv :=
+  {|  Genv.genv_public := Genv.genv_public ge;
+      Genv.genv_symb := Genv.genv_symb ge;
+      Genv.genv_defs := PTree.map1 globdef_proj (Genv.genv_defs ge);
+      Genv.genv_next := Genv.genv_next ge |}.
+Next Obligation.
+  destruct ge. simpl in *. eauto.
+Qed. Next Obligation.
+  destruct ge; simpl in *.
+  rewrite PTree.gmap1 in H.
+  destruct (genv_defs ! b) eqn:GEN.
+  - eauto.
+  - discriminate.
+Qed. Next Obligation.
+  destruct ge; simpl in *.
+  eauto.
+Qed.
+
+Fixpoint prog_defs_proj (l: list (ident * globdef fundef unit))
+                          : list (ident * globdef Asmvliw.fundef unit) :=
+  match l with
+  | nil => nil
+  | (i, gd) :: l => (i, globdef_proj gd) :: prog_defs_proj l
+  end.
+
+Definition program_proj (p: program) : Asmvliw.program :=
+  {|  prog_defs := prog_defs_proj (prog_defs p);
+      prog_public := prog_public p;
+      prog_main := prog_main p
+  |}.
+
+End RELSEM.
+
+Definition semantics (p: program) := Asmvliw.semantics (program_proj p).
+
+(** Determinacy of the [Asm] semantics. *)
+
+Lemma semantics_determinate: forall p, determinate (semantics p).
+Proof.
+  intros. apply semantics_determinate.
+Qed.
+
+(** transf_program *)
+
+Program Definition transf_function (f: Asmvliw.function) : function :=
+     {| fn_sig := Asmvliw.fn_sig f; fn_blocks := Asmvliw.fn_blocks f; 
+        fn_code := unfold (Asmvliw.fn_blocks f) |}.
+
+Lemma transf_function_proj: forall f, function_proj (transf_function f) = f.
+Proof.
+  intros f. destruct f as [sig blks]. unfold function_proj. simpl. auto.
+Qed.
+
+Definition transf_fundef : Asmvliw.fundef -> fundef := AST.transf_fundef transf_function.
+
+Lemma transf_fundef_proj: forall f, fundef_proj (transf_fundef f) = f.
+Proof.
+  intros f. destruct f as [f|e]; simpl; auto.
+  rewrite transf_function_proj. auto.
+Qed.
+
+Definition transf_program : Asmvliw.program -> program := transform_program transf_fundef.
+
+Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
+  prog_defs p1 = prog_defs p2 ->
+  prog_public p1 = prog_public p2 ->
+  prog_main p1 = prog_main p2 ->
+  p1 = p2.
+Proof.
+  intros. destruct p1. destruct p2. simpl in *. subst. auto.
+Qed.
+
+Lemma transf_program_proj: forall p, program_proj (transf_program p) = p.
+Proof.
+  intros p. destruct p as [defs pub main]. unfold program_proj. simpl.
+  apply program_equals; simpl; auto.
+  induction defs.
+  - simpl; auto.
+  - simpl. rewrite IHdefs. 
+    destruct a as [id gd]; simpl.
+    destruct gd as [f|v]; simpl; auto.
+    rewrite transf_fundef_proj. auto.
+Qed.
+
+Definition match_prog (p: Asmvliw.program) (tp: program) :=
+  match_program (fun _ f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, transf_program p = tp -> match_prog p tp.
+Proof.
+  intros. rewrite <- H. eapply match_transform_program; eauto.
+Qed.
+
+Lemma cons_extract {A: Type} : forall (l: list A) a b, a = b -> a::l = b::l.
+Proof.
+  intros. congruence.
+Qed.
+
+Lemma match_program_transf:
+  forall p tp, match_prog p tp -> transf_program p = tp.
+Proof.
+  intros p tp H. inversion_clear H. inv H1.
+  destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. simpl in *.
+  subst. unfold transf_program. unfold transform_program. simpl.
+  apply program_equals; simpl; auto.
+  induction H0; simpl; auto.
+  rewrite IHlist_forall2. apply cons_extract.
+  destruct a1 as [ida gda]. destruct b1 as [idb gdb].
+  simpl in *.
+  inv H. inv H2.
+  - simpl in *. subst. auto.
+  - simpl in *. subst. inv H. auto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Asmvliw.program.
+Variable tprog: program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Definition match_states (s1 s2: state) := s1 = s2.
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+
+Theorem transf_program_correct:
+  forward_simulation (Asmvliw.semantics prog) (semantics tprog).
+Proof.
+  pose proof (match_program_transf prog tprog TRANSF) as TR.
+  subst. unfold semantics. rewrite transf_program_proj.
+
+  eapply forward_simulation_step with (match_states := match_states); simpl; auto.
+  - intros. exists s1. split; auto. congruence.
+  - intros. inv H. auto.
+  - intros. exists s1'. inv H0. split; auto. congruence.
+Qed.
+
+End PRESERVATION.
diff --git a/mppa_k1c/Asmaux.v b/mppa_k1c/Asmaux.v
index 94b39f4e..891d1068 100644
--- a/mppa_k1c/Asmaux.v
+++ b/mppa_k1c/Asmaux.v
@@ -2,4 +2,4 @@ Require Import Asm.
 Require Import AST.
 
 (** Constant only needed by Asmexpandaux.ml *)
-Program Definition dummy_function := {| fn_code := nil; fn_sig := signature_main; fn_blocks := nil |}.
+Program Definition dummy_function := {| fn_code := nil; fn_sig := signature_main; fn_blocks := nil |}.
diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v
index 9b4489c5..a05d4726 100644
--- a/mppa_k1c/Asmblock.v
+++ b/mppa_k1c/Asmblock.v
@@ -33,6 +33,19 @@ Require Import Conventions.
 Require Import Errors.
 Require Export Asmvliw.
 
+(* Notations necessary to hook Asmvliw definitions *)
+Notation undef_caller_save_regs := Asmvliw.undef_caller_save_regs.
+Notation regset := Asmvliw.regset.
+Notation extcall_arg := Asmvliw.extcall_arg.
+Notation extcall_arg_pair := Asmvliw.extcall_arg_pair.
+Notation extcall_arguments := Asmvliw.extcall_arguments.
+Notation set_res := Asmvliw.set_res.
+Notation function := Asmvliw.function.
+Notation bblocks := Asmvliw.bblocks.
+Notation header := Asmvliw.header.
+Notation body := Asmvliw.body.
+Notation exit := Asmvliw.exit.
+Notation correct := Asmvliw.correct.
 
 (** * Auxiliary utilies on basic blocks *)
 
@@ -260,11 +273,11 @@ Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset := parexec
 
 (** Auxiliaries for memory accesses *)
 
-Definition exec_load_offset (chunk: memory_chunk) (rs: regset) (m: mem) (d a: ireg) (ofs: offset) := parexec_load_offset chunk rs rs m m d a ofs.
+Definition exec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a: ireg) (ofs: offset) := parexec_load_offset trap chunk rs rs m m d a ofs.
 
-Definition exec_load_reg (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_reg chunk rs rs m m d a ro.
+Definition exec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_reg trap chunk rs rs m m d a ro.
 
-Definition exec_load_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_regxs chunk rs rs m m d a ro.
+Definition exec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_regxs trap chunk rs rs m m d a ro.
 
 Definition exec_load_q_offset (rs: regset) (m: mem) (d : gpreg_q) (a: ireg) (ofs: offset) := parexec_load_q_offset rs rs m m d a ofs.
 
@@ -294,6 +307,31 @@ Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
      end
   end.
 
+
+Theorem builtin_body_nil:
+  forall bb ef args res, exit bb = Some (PExpand (Pbuiltin ef args res)) -> body bb = nil.
+Proof.
+  intros. destruct bb as [hd bdy ex WF]. simpl in *.
+  apply wf_bblock_refl in WF. inv WF. unfold builtin_alone in H1.
+  eapply H1; eauto.
+Qed.
+
+Theorem exec_body_app:
+  forall l l' rs m rs'' m'',
+  exec_body (l ++ l') rs m = Next rs'' m'' ->
+  exists rs' m',
+       exec_body l rs m = Next rs' m'
+    /\ exec_body l' rs' m' = Next rs'' m''.
+Proof.
+  induction l.
+  - intros. simpl in H. repeat eexists. auto.
+  - intros. rewrite <- app_comm_cons in H. simpl in H.
+    destruct (exec_basic_instr a rs m) eqn:EXEBI.
+    + apply IHl in H. destruct H as (rs1 & m1 & EXEB1 & EXEB2).
+      repeat eexists. simpl. rewrite EXEBI. eauto. auto.
+    + discriminate.
+Qed.
+
 (** Position corresponding to a label *)
 
 Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome := par_goto_label f lbl rs rs m.
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index c4c1bbf1..01eda623 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -7,7 +7,7 @@
 
 Require Import AST.
 Require Import Asmblock.
-Require Import Asmblockgenproof0.
+Require Import Asmblockgenproof0 Asmblockprops.
 Require Import Values.
 Require Import Globalenvs.
 Require Import Memory.
@@ -22,6 +22,8 @@ Require Import Parallelizability.
 Require Import Asmvliw Permutation.
 Require Import Chunks.
 
+Require Import Lia.
+
 Open Scope impure.
 
 (** Definition of L *)
@@ -83,9 +85,9 @@ Coercion OArithRRI32: arith_name_rri32 >-> Funclass.
 Coercion OArithRRI64: arith_name_rri64 >-> Funclass.
 
 Inductive load_op :=
-  | OLoadRRO (n: load_name) (ofs: offset)
-  | OLoadRRR (n: load_name)
-  | OLoadRRRXS (n: load_name)
+  | OLoadRRO (n: load_name) (trap: trapping_mode) (ofs: offset)
+  | OLoadRRR (n: load_name) (trap: trapping_mode) 
+  | OLoadRRRXS (n: load_name) (trap: trapping_mode)
 .
 
 Coercion OLoadRRO: load_name >-> Funclass.
@@ -142,33 +144,39 @@ Definition arith_eval (ao: arith_op) (l: list value) :=
   | _, _ => None
   end.
 
-Definition exec_load_deps_offset (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) :=
+Definition exec_incorrect_load trap chunk :=
+  match trap with
+  | TRAP => None
+  | NOTRAP => Some (Val (concrete_default_notrap_load_value chunk))
+  end.
+
+Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) :=
   let (ge, fn) := Ge in
   match (eval_offset ofs) with
   | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with 
-              | None => None
+              | None => exec_incorrect_load trap chunk
               | Some vl => Some (Val vl)
               end
   | _ => None
   end.
 
-Definition exec_load_deps_reg (chunk: memory_chunk) (m: mem) (v vo: val) :=
+Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
   match Mem.loadv chunk m (Val.addl v vo) with
-  | None => None
+  | None => exec_incorrect_load trap chunk
   | Some vl => Some (Val vl)
   end.
 
-Definition exec_load_deps_regxs (chunk: memory_chunk) (m: mem) (v vo: val) :=
+Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
   match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with
-  | None => None
+  | None => exec_incorrect_load trap chunk
   | Some vl => Some (Val vl)
   end.
 
 Definition load_eval (lo: load_op) (l: list value) :=
   match lo, l with
-  | OLoadRRO n ofs, [Val v; Memstate m] => exec_load_deps_offset (load_chunk n) m v ofs
-  | OLoadRRR n, [Val v; Val vo; Memstate m] => exec_load_deps_reg (load_chunk n) m v vo
-  | OLoadRRRXS n, [Val v; Val vo; Memstate m] => exec_load_deps_regxs (load_chunk n) m v vo
+  | OLoadRRO n trap ofs, [Val v; Memstate m] => exec_load_deps_offset trap (load_chunk n) m v ofs
+  | OLoadRRR n trap, [Val v; Val vo; Memstate m] => exec_load_deps_reg trap (load_chunk n) m v vo
+  | OLoadRRRXS n trap, [Val v; Val vo; Memstate m] => exec_load_deps_regxs trap (load_chunk n) m v vo
   | _, _ => None
   end.
 
@@ -202,6 +210,136 @@ Definition store_eval (so: store_op) (l: list value) :=
   | _, _ => None
   end.
 
+Local Open Scope Z.
+
+Remark size_chunk_positive: forall chunk,
+    (size_chunk chunk) > 0.
+Proof.
+  destruct chunk; simpl; lia.
+Qed.
+
+Remark size_chunk_small: forall chunk,
+    (size_chunk chunk) <= 8.
+Proof.
+  destruct chunk; simpl; lia.
+Qed.
+
+Definition disjoint_chunks
+           (ofs1 : offset) (chunk1 : memory_chunk)
+           (ofs2 : offset) (chunk2 : memory_chunk) :=
+  Intv.disjoint ((Ptrofs.unsigned ofs1),
+                 ((Ptrofs.unsigned ofs1) + (size_chunk chunk1)))
+                ((Ptrofs.unsigned ofs2),
+                 ((Ptrofs.unsigned ofs2) + (size_chunk chunk2))).
+
+Definition small_offset_threshold := 18446744073709551608.
+
+Lemma store_store_disjoint_offsets :
+  forall n1 n2 ofs1 ofs2 vs1 vs2 va m0 m1 m2 m1' m2',
+    (disjoint_chunks ofs1 (store_chunk n1) ofs2 (store_chunk n2)) ->
+    (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+    (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+    store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m0] = Some (Memstate m1) ->
+    store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m1] = Some (Memstate m2) ->
+    store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m0] = Some (Memstate m1') ->
+    store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m1'] = Some (Memstate m2') ->
+    m2 = m2'.
+Proof.
+  intros until m2'.
+  intros DISJOINT SMALL1 SMALL2 STORE0 STORE1 STORE0' STORE1'.
+  unfold disjoint_chunks in DISJOINT.
+  destruct vs1 as [v1 | ]; simpl in STORE0, STORE1'; try congruence.
+  destruct vs2 as [v2 | ]; simpl in STORE1, STORE0'; try congruence.
+  destruct va as [base | ]; try congruence.
+  unfold exec_store_deps_offset in *.
+  destruct Ge.
+  unfold eval_offset in *; simpl in *.
+  unfold Mem.storev in *.
+  unfold Val.offset_ptr in *.
+  destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+  destruct (Mem.store _ _ _ _ _) eqn:E0; try congruence.
+  inv STORE0.
+  destruct (Mem.store (store_chunk n2) _ _ _ _) eqn:E1; try congruence.
+  inv STORE1.
+  destruct (Mem.store (store_chunk n2) m0 _ _ _) eqn:E0'; try congruence.
+  inv STORE0'.
+  destruct (Mem.store _ m1' _ _ _) eqn:E1'; try congruence.
+  inv STORE1'.
+  assert (Some m2 = Some m2').
+  2: congruence.
+  rewrite <- E1.
+  rewrite <- E1'.
+  eapply Mem.store_store_other.
+  2, 3: eassumption.
+
+  right.
+  pose proof (size_chunk_positive (store_chunk n1)).
+  pose proof (size_chunk_positive (store_chunk n2)).
+  pose proof (size_chunk_small (store_chunk n1)).
+  pose proof (size_chunk_small (store_chunk n2)).
+  destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+    unfold Intv.empty in DIS; simpl in DIS.
+  1, 2: lia.
+  pose proof (Ptrofs.unsigned_range ofs1).
+  pose proof (Ptrofs.unsigned_range ofs2).
+  unfold small_offset_threshold in *.
+  destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+    destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+      change Ptrofs.modulus with 18446744073709551616 in *; 
+      lia.
+Qed.
+
+Lemma load_store_disjoint_offsets :
+  forall n1 n2 tm ofs1 ofs2 vs va m0 m1,
+    (disjoint_chunks ofs1 (store_chunk n1) ofs2 (load_chunk n2)) ->
+    (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+    (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+    store_eval (OStoreRRO n1 ofs1) [vs; va; Memstate m0] = Some (Memstate m1) ->
+    load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m1] =
+    load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m0].
+Proof.
+  intros until m1.
+  intros DISJOINT SMALL1 SMALL2 STORE0.
+  destruct vs as [v | ]; simpl in STORE0; try congruence.
+  destruct va as [base | ]; try congruence.
+  unfold exec_store_deps_offset in *.
+  unfold eval_offset in *; simpl in *.
+  unfold exec_load_deps_offset.
+  unfold Mem.storev, Mem.loadv in *.
+  destruct Ge in *.
+  unfold eval_offset in *.
+  unfold Val.offset_ptr in *.
+  destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+  destruct (Mem.store _ _ _ _) eqn:E0; try congruence.
+  inv STORE0.
+  assert (
+    (Mem.load (load_chunk n2) m1 wblock
+      (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) =
+    (Mem.load (load_chunk n2) m0 wblock
+              (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) ) as LOADS.
+  {
+    eapply Mem.load_store_other.
+    eassumption.
+    right.
+    pose proof (size_chunk_positive (store_chunk n1)).
+    pose proof (size_chunk_positive (load_chunk n2)).
+    pose proof (size_chunk_small (store_chunk n1)).
+    pose proof (size_chunk_small (load_chunk n2)).
+    destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+      unfold Intv.empty in DIS; simpl in DIS.
+    1,2: lia.
+    
+    pose proof (Ptrofs.unsigned_range ofs1).
+    pose proof (Ptrofs.unsigned_range ofs2).
+    unfold small_offset_threshold in *.
+    destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+      destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+        change Ptrofs.modulus with 18446744073709551616 in *; 
+        lia.
+  }
+  destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence.
+Qed.
+
 Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
   match label_pos lbl 0 (fn_blocks f) with
   | None => None
@@ -364,24 +502,47 @@ Proof.
 Qed.
 Hint Resolve offset_eq_correct: wlp.
 
+Definition trapping_mode_eq trap1 trap2 :=
+  RET (match trap1, trap2 with
+       | TRAP, TRAP | NOTRAP, NOTRAP => true
+       | TRAP, NOTRAP | NOTRAP, TRAP => false
+      end).
+Lemma trapping_mode_eq_correct t1 t2:
+  WHEN trapping_mode_eq t1 t2 ~> b THEN b = true -> t1 = t2.
+Proof.
+  wlp_simplify.
+  destruct t1; destruct t2; trivial; discriminate.
+Qed.
+Hint Resolve trapping_mode_eq_correct: wlp.
+
 Definition load_op_eq (o1 o2: load_op): ?? bool :=
   match o1 with
-  | OLoadRRO n1 ofs1 =>
-     match o2 with OLoadRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end
-  | OLoadRRR n1 =>
-     match o2 with OLoadRRR n2 => phys_eq n1 n2 | _ => RET false end
-  | OLoadRRRXS n1 =>
-     match o2 with OLoadRRRXS n2 => phys_eq n1 n2 | _ => RET false end
+  | OLoadRRO n1 trap ofs1 =>
+    match o2 with
+    | OLoadRRO n2 trap2 ofs2 => iandb (phys_eq n1 n2) (iandb (offset_eq ofs1 ofs2) (trapping_mode_eq trap trap2))
+    | _ => RET false
+    end
+  | OLoadRRR n1 trap =>
+    match o2 with
+    | OLoadRRR n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2) 
+    | _ => RET false
+    end
+  | OLoadRRRXS n1 trap =>
+    match o2 with
+    | OLoadRRRXS n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2)
+    | _ => RET false
+    end
   end.
 
 Lemma load_op_eq_correct o1 o2:
   WHEN load_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
 Proof.
   destruct o1, o2; wlp_simplify; try discriminate.
-  - f_equal. pose (Ptrofs.eq_spec ofs ofs0).
-    rewrite H in *. trivial.
-  - congruence.
-  - congruence.
+  { f_equal.
+    destruct trap, trap0; simpl in *; trivial; discriminate.
+    pose (Ptrofs.eq_spec ofs ofs0).
+    rewrite H in *. trivial. }
+  all: destruct trap, trap0; simpl in *; trivial; discriminate.
 Qed.
 Hint Resolve load_op_eq_correct: wlp.
 Opaque load_op_eq_correct.
@@ -617,21 +778,21 @@ Definition trans_arith (ai: ar_instruction) : inst :=
 Definition trans_basic (b: basic) : inst :=
   match b with
   | PArith ai => trans_arith ai
-  | PLoadRRO n d a ofs => [(#d, Op (Load (OLoadRRO n ofs)) (PReg (#a) @ PReg pmem @ Enil))]
-  | PLoadRRR n d a ro =>  [(#d, Op (Load (OLoadRRR n)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
-  | PLoadRRRXS n d a ro =>  [(#d, Op (Load (OLoadRRRXS n)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+  | PLoadRRO trap n d a ofs => [(#d, Op (Load (OLoadRRO n trap ofs)) (PReg (#a) @ PReg pmem @ Enil))]
+  | PLoadRRR trap n d a ro =>  [(#d, Op (Load (OLoadRRR n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+  | PLoadRRRXS trap n d a ro =>  [(#d, Op (Load (OLoadRRRXS n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
   | PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))]
   | PLoadQRRO qd a ofs =>
     let (d0, d1) := gpreg_q_expand qd in
-      [(#d0, Op (Load (OLoadRRO Pld_a ofs)) (PReg (#a) @ PReg pmem @ Enil));
-       (#d1, Op (Load (OLoadRRO Pld_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
+      [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil));
+       (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
   | PLoadORRO od a ofs =>
     match gpreg_o_expand od with
     | (d0, d1, d2, d3) =>
-        [(#d0, Op (Load (OLoadRRO Pld_a ofs)) (PReg (#a) @ PReg pmem @ Enil));
-         (#d1, Op (Load (OLoadRRO Pld_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
-         (#d2, Op (Load (OLoadRRO Pld_a (Ptrofs.add ofs (Ptrofs.repr 16)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
-         (#d3, Op (Load (OLoadRRO Pld_a (Ptrofs.add ofs (Ptrofs.repr 24)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
+        [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil));
+         (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
+         (#d2, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 16)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
+         (#d3, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 24)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
     end
   | PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
   | PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
@@ -844,7 +1005,7 @@ Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
 Proof.
 
 (* a little tactic to automate reasoning on preg_eq *)
-Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr.
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
 Local Ltac preg_eq_discr r rd :=
   destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
   rewrite (assign_diff _ (#rd) (#r) _); auto;
@@ -861,21 +1022,21 @@ Local Ltac preg_eq_discr r rd :=
       unfold parexec_load_offset; simpl; unfold exec_load_deps_offset; erewrite GENV, H, H0;
       unfold eval_offset;
       simpl; auto;
-      destruct (Mem.loadv _ _ _) eqn:MEML; simpl; auto;
+      destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
       eexists; split; try split; Simpl;
       intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
 
     (* Load Reg *)
     + destruct i; simpl load_chunk. all:
       unfold parexec_load_reg; simpl; unfold exec_load_deps_reg; rewrite H, H0; rewrite (H0 rofs);
-      destruct (Mem.loadv _ _ _) eqn:MEML; simpl; auto;
+      destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
       eexists; split; try split; Simpl;
       intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
 
     (* Load Reg XS *)
     + destruct i; simpl load_chunk. all:
       unfold parexec_load_regxs; simpl; unfold exec_load_deps_regxs; rewrite H, H0; rewrite (H0 rofs);
-      destruct (Mem.loadv _ _ _) eqn:MEML; simpl; auto;
+      destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
       eexists; split; try split; Simpl;
       intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
 
@@ -892,7 +1053,7 @@ Local Ltac preg_eq_discr r rd :=
         preg_eq_discr r rd0. }
 
     (* Load Octuple word *)
-    + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr.
+    + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
       unfold parexec_load_o_offset.
       destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl.
       rewrite H0, H.
@@ -1262,13 +1423,13 @@ Section SECT_BBLOCK_EQUIV.
 
 Variable Ge: genv.
 
-Local Hint Resolve trans_state_match.
+Local Hint Resolve trans_state_match: core.
 
 Lemma bblock_simu_reduce:
   forall p1 p2 ge fn,
   Ge = Genv ge fn ->
   L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
-  Asmblockgenproof0.bblock_simu ge fn p1 p2.
+  Asmblockprops.bblock_simu ge fn p1 p2.
 Proof.
   unfold bblock_simu, res_eq; intros p1 p2 ge fn H1 H2 rs m DONTSTUCK.
   generalize (H2 (trans_state (State rs m))); clear H2.
@@ -1537,9 +1698,9 @@ Definition string_of_load_name (n: load_name) : pstring :=
 
 Definition string_of_load (op: load_op): pstring :=
   match op with
-  | OLoadRRO n _ => string_of_load_name n
-  | OLoadRRR n => string_of_load_name n
-  | OLoadRRRXS n => string_of_load_name n
+  | OLoadRRO n _ _ => string_of_load_name n
+  | OLoadRRR n _ => string_of_load_name n
+  | OLoadRRRXS n _ => string_of_load_name n
   end.
 
 Definition string_of_store_name (n: store_name) : pstring :=
@@ -1626,7 +1787,7 @@ Definition bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock) : ?? bool :=
 Local Hint Resolve IST.bblock_simu_test_correct bblock_simu_reduce IST.verb_bblock_simu_test_correct: wlp.
 
 Theorem bblock_simu_test_correct verb p1 p2 :
-  WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn, Asmblockgenproof0.bblock_simu ge fn p1 p2.
+  WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn, Asmblockprops.bblock_simu ge fn p1 p2.
 Proof.
   wlp_simplify.
 Qed.
@@ -1636,17 +1797,23 @@ Hint Resolve bblock_simu_test_correct: wlp.
 
 Import UnsafeImpure.
 
-Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock): bool := unsafe_coerce (bblock_simu_test verb p1 p2).
+Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock): bool := 
+  match unsafe_coerce (bblock_simu_test verb p1 p2) with
+  | Some b => b
+  | None => false
+  end.
 
-Theorem pure_bblock_simu_test_correct verb p1 p2 ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockgenproof0.bblock_simu ge fn p1 p2.
+Theorem pure_bblock_simu_test_correct verb p1 p2 ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2.
 Proof.
-   intros; unfold pure_bblock_simu_test. intros; eapply bblock_simu_test_correct; eauto.
+   unfold pure_bblock_simu_test. 
+   destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate.
+   intros; subst. eapply bblock_simu_test_correct; eauto.
    apply unsafe_coerce_not_really_correct; eauto.
 Qed.
 
 Definition bblock_simub: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bblock_simu_test true.
 
-Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockgenproof0.bblock_simu ge fn p1 p2.
+Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2.
 Proof.
  eapply (pure_bblock_simu_test_correct true).
 Qed.
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index ade84e7b..36269954 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -28,6 +28,8 @@ Require Import Chunks.
 Local Open Scope string_scope.
 Local Open Scope error_monad_scope.
 
+Import PArithCoercions.
+
 (** The code generation functions take advantage of several
   characteristics of the [Mach] code generated by earlier passes of the
   compiler, mostly that argument and result registers are of the correct
@@ -912,12 +914,12 @@ end.
 
 Definition loadind (base: ireg) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) :=
   match ty, preg_of dst with
-  | Tint,    IR rd => OK (indexed_memory_access (PLoadRRO Plw rd) base ofs ::i k)
-  | Tlong,   IR rd => OK (indexed_memory_access (PLoadRRO Pld rd) base ofs ::i k)
-  | Tsingle, IR rd => OK (indexed_memory_access (PLoadRRO Pfls rd) base ofs ::i k)
-  | Tfloat,  IR rd => OK (indexed_memory_access (PLoadRRO Pfld rd) base ofs ::i k)
-  | Tany32,  IR rd => OK (indexed_memory_access (PLoadRRO Plw_a rd) base ofs ::i k)
-  | Tany64,  IR rd => OK (indexed_memory_access (PLoadRRO Pld_a rd) base ofs ::i k)
+  | Tint,    IR rd => OK (indexed_memory_access (PLoadRRO TRAP Plw rd) base ofs ::i k)
+  | Tlong,   IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pld rd) base ofs ::i k)
+  | Tsingle, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pfls rd) base ofs ::i k)
+  | Tfloat,  IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pfld rd) base ofs ::i k)
+  | Tany32,  IR rd => OK (indexed_memory_access (PLoadRRO TRAP Plw_a rd) base ofs ::i k)
+  | Tany64,  IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pld_a rd) base ofs ::i k)
   | _, _           => Error (msg "Asmblockgen.loadind")
   end.
 
@@ -933,7 +935,7 @@ Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: bcode)
   end.
 
 Definition loadind_ptr (base: ireg) (ofs: ptrofs) (dst: ireg) :=
-  indexed_memory_access (PLoadRRO Pld dst) base ofs.
+  indexed_memory_access (PLoadRRO TRAP Pld dst) base ofs.
 
 Definition storeind_ptr (src: ireg) (base: ireg) (ofs: ptrofs) :=
   indexed_memory_access (PStoreRRO Psd src) base ofs.
@@ -993,27 +995,28 @@ Definition chunk2load (chunk: memory_chunk) :=
   | Many64 => Pld_a
   end.
 
-Definition transl_load_rro (chunk: memory_chunk) (addr: addressing)
+Definition transl_load_rro (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing)
            (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
   do r <- ireg_of dst;
-  transl_memory_access (PLoadRRO (chunk2load chunk) r) addr args k.
+  transl_memory_access (PLoadRRO trap (chunk2load chunk) r) addr args k.
 
-Definition transl_load_rrr (chunk: memory_chunk) (addr: addressing)
+Definition transl_load_rrr (trap: trapping_mode)  (chunk: memory_chunk) (addr: addressing)
            (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
   do r <- ireg_of dst;
-  transl_memory_access2 (PLoadRRR (chunk2load chunk) r) addr args k.
+  transl_memory_access2 (PLoadRRR trap (chunk2load chunk) r) addr args k.
 
-Definition transl_load_rrrXS (chunk: memory_chunk) (scale : Z)
+Definition transl_load_rrrXS (trap: trapping_mode) (chunk: memory_chunk) (scale : Z)
            (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
   do r <- ireg_of dst;
-  transl_memory_access2XS chunk (PLoadRRRXS (chunk2load chunk) r) scale args k.
+  transl_memory_access2XS chunk (PLoadRRRXS trap (chunk2load chunk) r) scale args k.
 
-Definition transl_load (chunk: memory_chunk) (addr: addressing)
+Definition transl_load (trap : trapping_mode)
+           (chunk: memory_chunk) (addr: addressing)
            (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
   match addr with
-  | Aindexed2XS scale => transl_load_rrrXS chunk scale args dst k
-  | Aindexed2 => transl_load_rrr chunk addr args dst k
-  | _ => transl_load_rro chunk addr args dst k
+    | Aindexed2XS scale => transl_load_rrrXS trap chunk scale args dst k
+    | Aindexed2 => transl_load_rrr trap chunk addr args dst k
+    | _ => transl_load_rro trap chunk addr args dst k
   end.
 
 Definition chunk2store (chunk: memory_chunk) :=
@@ -1073,8 +1076,8 @@ Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst)
                 else (loadind_ptr SP f.(fn_link_ofs) FP) ::i c)
   | MBop op args res =>
       transl_op op args res k
-  | MBload chunk addr args dst =>
-      transl_load chunk addr args dst k
+  | MBload trap chunk addr args dst =>
+      transl_load trap chunk addr args dst k
   | MBstore chunk addr args src =>
       transl_store chunk addr args src k
   end.
@@ -1111,10 +1114,12 @@ Definition transl_instr_control (f: Machblock.function) (oi: option Machblock.co
 
 Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
   match i with
+  | MBgetstack ofs ty dst => before && negb (mreg_eq dst MFP)
   | MBsetstack src ofs ty => before
   | MBgetparam ofs ty dst => negb (mreg_eq dst MFP)
   | MBop op args res => before && negb (mreg_eq res MFP)
-  | _ => false
+  | MBload trapping_mode chunk addr args dst => before && negb (mreg_eq dst MFP)
+  | MBstore chunk addr args res => before
   end.
 
 (** This is the naive definition, which is not tail-recursive unlike the other backends *)
diff --git a/mppa_k1c/Asmblockgenproof.v b/mppa_k1c/Asmblockgenproof.v
index c44ef3ff..1a427112 100644
--- a/mppa_k1c/Asmblockgenproof.v
+++ b/mppa_k1c/Asmblockgenproof.v
@@ -1,1797 +1,1804 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for RISC-V generation: main proof. *)
-
-Require Import Coqlib Errors.
-Require Import Integers Floats AST Linking.
-Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations Machblock Conventions Asmblock.
-Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1.
-Require Import Axioms.
-
-Module MB := Machblock.
-Module AB := Asmvliw.
-
-Definition match_prog (p: Machblock.program) (tp: Asmvliw.program) :=
-  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
-
-Lemma transf_program_match:
-  forall p tp, transf_program p = OK tp -> match_prog p tp.
-Proof.
-  intros. eapply match_transform_partial_program; eauto.
-Qed.
-
-Section PRESERVATION.
-
-Variable prog: Machblock.program.
-Variable tprog: Asmvliw.program.
-Hypothesis TRANSF: match_prog prog tprog.
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Lemma symbols_preserved:
-  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof (Genv.find_symbol_match TRANSF).
-
-Lemma senv_preserved:
-  Senv.equiv ge tge.
-Proof (Genv.senv_match TRANSF).
-
-
-Lemma functions_translated:
-  forall b f,
-  Genv.find_funct_ptr ge b = Some f ->
-  exists tf,
-  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
-Proof (Genv.find_funct_ptr_transf_partial TRANSF).
-
-Lemma functions_transl:
-  forall fb f tf,
-  Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  transf_function f = OK tf ->
-  Genv.find_funct_ptr tge fb = Some (Internal tf).
-Proof.
-  intros. exploit functions_translated; eauto. intros [tf' [A B]].
-  monadInv B. rewrite H0 in EQ; inv EQ; auto.
-Qed.
-
-(** * Properties of control flow *)
-
-Lemma transf_function_no_overflow:
-  forall f tf,
-  transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
-Proof.
-  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
-  omega.
-Qed.
-
-(** The following lemmas show that the translation from Mach to Asm
-  preserves labels, in the sense that the following diagram commutes:
-<<
-                          translation
-        Mach code ------------------------ Asm instr sequence
-            |                                          |
-            | Mach.find_label lbl       find_label lbl |
-            |                                          |
-            v                                          v
-        Mach code tail ------------------- Asm instr seq tail
-                          translation
->>
-  The proof demands many boring lemmas showing that Asm constructor
-  functions do not introduce new labels.
-*)
-
-Section TRANSL_LABEL.
-
-Lemma gen_bblocks_label:
-  forall hd bdy ex tbb tc,
-  gen_bblocks hd bdy ex = tbb::tc ->
-  header tbb = hd.
-Proof.
-  intros until tc. intros GENB. unfold gen_bblocks in GENB.
-  destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy.
-  all: inv GENB; simpl; auto.
-Qed.
-
-Lemma gen_bblocks_label2:
-  forall hd bdy ex tbb1 tbb2,
-  gen_bblocks hd bdy ex = tbb1::tbb2::nil ->
-  header tbb2 = nil.
-Proof.
-  intros until tbb2. intros GENB. unfold gen_bblocks in GENB.
-  destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy.
-  all: inv GENB; simpl; auto.
-Qed.
-
-Lemma in_dec_transl:
-  forall lbl hd,
-  (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false).
-Proof.
-  intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto.
-Qed.
-
-Lemma transl_is_label:
-  forall lbl bb tbb f ep tc,
-  transl_block f bb ep = OK (tbb::tc) ->
-  is_label lbl tbb = MB.is_label lbl bb.
-Proof.
-  intros until tc. intros TLB.
-  destruct tbb as [thd tbdy tex]; simpl in *.
-  monadInv TLB.
-  unfold is_label. simpl.
-  apply gen_bblocks_label in H0. simpl in H0. subst.
-  rewrite in_dec_transl. auto.
-Qed.
-
-Lemma transl_is_label_false2:
-  forall lbl bb f ep tbb1 tbb2,
-  transl_block f bb ep = OK (tbb1::tbb2::nil) ->
-  is_label lbl tbb2 = false.
-Proof.
-  intros until tbb2. intros TLB.
-  destruct tbb2 as [thd tbdy tex]; simpl in *.
-  monadInv TLB. apply gen_bblocks_label2 in H0. simpl in H0. subst.
-  apply is_label_correct_false. simpl. auto.
-Qed.
-
-Lemma transl_is_label2:
-  forall f bb ep tbb1 tbb2 lbl,
-  transl_block f bb ep = OK (tbb1::tbb2::nil) ->
-     is_label lbl tbb1 = MB.is_label lbl bb
-  /\ is_label lbl tbb2 = false.
-Proof.
-  intros. split. eapply transl_is_label; eauto. eapply transl_is_label_false2; eauto.
-Qed.
-
-Lemma transl_block_nonil:
-  forall f c ep tc,
-  transl_block f c ep = OK tc ->
-  tc <> nil.
-Proof.
-  intros. monadInv H. unfold gen_bblocks.
-  destruct (extract_ctl x0); try destruct c0; try destruct x; try destruct i.
-  all: discriminate.
-Qed.
-
-Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc,
-  ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc).
-Proof.
-  intros. intro. monadInv H.
-  unfold gen_bblocks in H0.
-  destruct (extract_ctl x0); try destruct x; try destruct c; try destruct i.
-  all: discriminate.
-Qed.
-
-Lemma find_label_transl_false:
-  forall x f lbl bb ep x',
-  transl_block f bb ep = OK x ->
-  MB.is_label lbl bb = false ->
-  find_label lbl (x++x') = find_label lbl x'.
-Proof.
-  intros until x'. intros TLB MBis; simpl; auto.
-  destruct x as [|x0 x1]; simpl; auto.
-  destruct x1 as [|x1 x2]; simpl; auto.
-  - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto.
-  - destruct x2 as [|x2 x3]; simpl; auto.
-    + erewrite <- transl_is_label in MBis; eauto. rewrite MBis.
-      erewrite transl_is_label_false2; eauto.
-    + apply transl_block_limit in TLB. destruct TLB.
-Qed.
-
-Lemma transl_blocks_label:
-  forall lbl f c tc ep,
-  transl_blocks f c ep = OK tc ->
-  match MB.find_label lbl c with
-  | None => find_label lbl tc = None
-  | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc'
-  end.
-Proof.
-  induction c; simpl; intros.
-  inv H. auto.
-  monadInv H.
-  destruct (MB.is_label lbl a) eqn:MBis.
-  - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. }
-    simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis.
-    rewrite ABis.
-    eexists. eexists. split; eauto. simpl transl_blocks.
-    assert (MB.header a <> nil).
-    { apply MB.is_label_correct_true in MBis.
-      destruct (MB.header a). contradiction. discriminate. }
-    destruct (MB.header a); try contradiction.
-    rewrite EQ. simpl. rewrite EQ1. simpl. auto.
-  - apply IHc in EQ1. destruct (MB.find_label lbl c).
-    + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto.
-      erewrite find_label_transl_false; eauto.
-    + erewrite find_label_transl_false; eauto.
-Qed.
-
-Lemma find_label_nil:
-  forall bb lbl c,
-  header bb = nil ->
-  find_label lbl (bb::c) = find_label lbl c.
-Proof.
-  intros. destruct bb as [hd bdy ex]; simpl in *. subst.
-  assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
-  { erewrite <- is_label_correct_false. simpl. auto. }
-  rewrite H. auto.
-Qed.
-
-Lemma transl_find_label:
-  forall lbl f tf,
-  transf_function f = OK tf ->
-  match MB.find_label lbl f.(MB.fn_code) with
-  | None => find_label lbl tf.(fn_blocks) = None
-  | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc
-  end.
-Proof.
-  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g.
-  monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto.
-  eapply transl_blocks_label; eauto.
-Qed.
-
-End TRANSL_LABEL.
-
-(** A valid branch in a piece of Mach code translates to a valid ``go to''
-  transition in the generated Asm code. *)
-
-Lemma find_label_goto_label:
-  forall f tf lbl rs m c' b ofs,
-  Genv.find_funct_ptr ge b = Some (Internal f) ->
-  transf_function f = OK tf ->
-  rs PC = Vptr b ofs ->
-  MB.find_label lbl f.(MB.fn_code) = Some c' ->
-  exists tc', exists rs',
-    goto_label tf lbl rs m = Next rs' m
-  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
-  /\ forall r, r <> PC -> rs'#r = rs#r.
-Proof.
-  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
-  intros (tc & A & B).
-  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
-  intros [pos' [P [Q R]]].
-  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
-  split. unfold goto_label. unfold par_goto_label. rewrite P. rewrite H1. auto.
-  split. rewrite Pregmap.gss. constructor; auto.
-  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
-  auto. omega.
-  generalize (transf_function_no_overflow _ _ H0). omega.
-  intros. apply Pregmap.gso; auto.
-Qed.
-
-(** Existence of return addresses *)
-
-(* NB: the hypothesis in comment on [b] is not needed in the proof ! 
-*)
-Lemma return_address_exists:
-  forall b f (* sg ros *) c, (* b.(MB.exit) = Some (MBcall sg ros) -> *) is_tail (b :: c) f.(MB.fn_code) ->
-  exists ra, return_address_offset f c ra.
-Proof.
-  intros. eapply Asmblockgenproof0.return_address_exists; eauto.
-
-- intros. monadInv H0.
-  destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl.
-(*   rewrite transl_code'_transl_code in EQ0. *)
-  exists x; exists true; split; auto. (*  unfold fn_code. *)
-  repeat constructor.
-  - exact transf_function_no_overflow.
-Qed.
-
-(** * Proof of semantic preservation *)
-
-(** Semantic preservation is proved using simulation diagrams
-  of the following form.
-<<
-           st1 --------------- st2
-            |                   |
-           t|                  *|t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  The invariant is the [match_states] predicate below, which includes:
-- The Asm code pointed by the PC register is the translation of
-  the current Mach code sequence.
-- Mach register values and Asm register values agree.
-*)
-
-(** We need to show that, in the simulation diagram, we cannot
-  take infinitely many Mach transitions that correspond to zero
-  transitions on the Asm side.  Actually, all Mach transitions
-  correspond to at least one Asm transition, except the
-  transition from [Machsem.Returnstate] to [Machsem.State].
-  So, the following integer measure will suffice to rule out
-  the unwanted behaviour. *)
-
-
-Remark preg_of_not_FP: forall r, negb (mreg_eq r MFP) = true -> IR FP <> preg_of r.
-Proof.
-  intros. change (IR FP) with (preg_of MFP). red; intros.
-  exploit preg_of_injective; eauto. intros; subst r; discriminate.
-Qed.
-
-Inductive match_states: Machblock.state -> Asmvliw.state -> Prop :=
-  | match_states_intro:
-      forall s fb sp c ep ms m m' rs f tf tc
-        (STACKS: match_stack ge s)
-        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
-        (MEXT: Mem.extends m m')
-        (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
-        (AG: agree ms sp rs)
-        (DXP: ep = true -> rs#FP = parent_sp s),
-      match_states (Machblock.State s fb sp c ms m)
-                   (Asmvliw.State rs m')
-  | match_states_call:
-      forall s fb ms m m' rs
-        (STACKS: match_stack ge s)
-        (MEXT: Mem.extends m m')
-        (AG: agree ms (parent_sp s) rs)
-        (ATPC: rs PC = Vptr fb Ptrofs.zero)
-        (ATLR: rs RA = parent_ra s),
-      match_states (Machblock.Callstate s fb ms m)
-                   (Asmvliw.State rs m')
-  | match_states_return:
-      forall s ms m m' rs
-        (STACKS: match_stack ge s)
-        (MEXT: Mem.extends m m')
-        (AG: agree ms (parent_sp s) rs)
-        (ATPC: rs PC = parent_ra s),
-      match_states (Machblock.Returnstate s ms m)
-                   (Asmvliw.State rs m').
-
-Record codestate :=
-  Codestate {     pstate: state;
-                  pheader: list label;
-                  pbody1: list basic;
-                  pbody2: list basic;
-                  pctl: option control;
-                  fpok: bool;
-                  rem: list AB.bblock;
-                  cur: option bblock }.
-
-(*   | Codestate: state -> list AB.bblock -> option bblock -> codestate. *)
-
-Inductive match_codestate fb: Machblock.state -> codestate -> Prop :=
-  | match_codestate_intro:
-      forall s sp ms m rs0 m0 f tc ep c bb tbb tbc tbi
-        (STACKS: match_stack ge s)
-        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
-        (MEXT: Mem.extends m m0)
-        (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc)
-        (TIC: transl_instr_control f (MB.exit bb) = OK tbi)
-        (TBLS: transl_blocks f c false = OK tc)
-(*         (TRANS: transl_blocks f (bb::c) ep = OK (tbb::tc)) *)
-        (AG: agree ms sp rs0)
-        (DXP: (if MB.header bb then ep else false) = true -> rs0#FP = parent_sp s)
-        ,
-      match_codestate fb (Machblock.State s fb sp (bb::c) ms m)
-        {|  pstate := (Asmvliw.State rs0 m0);
-            pheader := (MB.header bb);
-            pbody1 := tbc;
-            pbody2 := (extract_basic tbi);
-            pctl := extract_ctl tbi;
-            fpok := ep;
-            rem := tc;
-            cur := Some tbb
-        |}
-.
-
-Inductive match_asmstate fb: codestate -> Asmvliw.state -> Prop :=
-  | match_asmstate_some:
-      forall rs f tf tc m tbb ofs ep tbdy tex lhd
-        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
-        (TRANSF: transf_function f = OK tf)
-        (PCeq: rs PC = Vptr fb ofs)
-        (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc))
-(*         (HDROK: header tbb = lhd) *)
-        ,
-      match_asmstate fb 
-        {|  pstate := (Asmvliw.State rs m);
-            pheader := lhd;
-            pbody1 := tbdy;
-            pbody2 := extract_basic tex;
-            pctl := extract_ctl tex;
-            fpok := ep;
-            rem := tc;
-            cur := Some tbb |}
-        (Asmvliw.State rs m)
-.
-
-Ltac exploreInst :=
-  repeat match goal with
-  | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
-  | [ H : OK _ = OK _ |- _ ] => monadInv H
-  | [ |- context[if ?b then _ else _] ] => destruct b
-  | [ |- context[match ?m with | _ => _ end] ] => destruct m
-  | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
-  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
-  | [ H : Error _ = OK _ |- _ ] => inversion H
-  end.
-
-Lemma transl_blocks_nonil:
-  forall f bb c tc ep,
-  transl_blocks f (bb::c) ep = OK tc ->
-  exists tbb tc', tc = tbb :: tc'.
-Proof.
-  intros until ep. intros TLBS. monadInv TLBS. monadInv EQ. unfold gen_bblocks.
-  destruct (extract_ctl x2).
-  - destruct c0; destruct i; simpl; eauto. destruct x1; simpl; eauto.
-  - destruct x1; simpl; eauto.
-Qed.
-
-Lemma no_builtin_preserved:
-  forall f ex x2,
-  (forall ef args res, ex <> Some (MBbuiltin ef args res)) ->
-  transl_instr_control f ex = OK x2 ->
-  (exists i, extract_ctl x2 = Some (PCtlFlow i))
-    \/ extract_ctl x2 = None.
-Proof.
-  intros until x2. intros Hbuiltin TIC.
-  destruct ex.
-  - destruct c.
-    (* MBcall *)
-    + simpl in TIC. exploreInst; simpl; eauto.
-    (* MBtailcall *)
-    + simpl in TIC. exploreInst; simpl; eauto.
-    (* MBbuiltin *)
-    + assert (H: Some (MBbuiltin e l b) <>  Some (MBbuiltin e l b)).
-        apply Hbuiltin. contradict H; auto.
-    (* MBgoto *)
-    + simpl in TIC. exploreInst; simpl; eauto.
-    (* MBcond *)
-    + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; simpl; eauto.
-      * unfold transl_opt_compuimm. exploreInst; simpl; eauto.
-      * unfold transl_opt_compluimm. exploreInst; simpl; eauto.
-      * unfold transl_comp_float64. exploreInst; simpl; eauto.
-      * unfold transl_comp_notfloat64. exploreInst; simpl; eauto.
-      * unfold transl_comp_float32. exploreInst; simpl; eauto.
-      * unfold transl_comp_notfloat32. exploreInst; simpl; eauto.
-    (* MBjumptable *)
-    + simpl in TIC. exploreInst; simpl; eauto.
-    (* MBreturn *)
-    + simpl in TIC. monadInv TIC. simpl. eauto.
-  - monadInv TIC. simpl; auto.
-Qed.
-
-Lemma transl_blocks_distrib:
-  forall c f bb tbb tc ep,
-  transl_blocks f (bb::c) ep = OK (tbb::tc)
-  -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res))
-  -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil)
-  /\ transl_blocks f c false = OK tc.
-Proof.
-  intros until ep. intros TLBS Hbuiltin.
-  destruct bb as [hd bdy ex].
-  monadInv TLBS. monadInv EQ.
-  exploit no_builtin_preserved; eauto. intros Hectl. destruct Hectl.
-  - destruct H as [i Hectl].
-  unfold gen_bblocks in H0. rewrite Hectl in H0. inv H0.
-  simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
-  unfold gen_bblocks. rewrite Hectl. auto.
-  - unfold gen_bblocks in H0. rewrite H in H0.
-    destruct x1 as [|bi x1].
-    + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
-      unfold gen_bblocks. rewrite H. auto.
-    + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
-      unfold gen_bblocks. rewrite H. auto.
-Qed.
-
-Lemma gen_bblocks_nobuiltin:
-  forall thd tbdy tex tbb,
-  (tbdy <> nil \/ extract_ctl tex <> None) ->
-  (forall ef args res, extract_ctl tex <> Some (PExpand (Pbuiltin ef args res))) ->
-  gen_bblocks thd tbdy tex = tbb :: nil ->
-     header tbb = thd
-  /\ body tbb = tbdy ++ extract_basic tex
-  /\ exit tbb = extract_ctl tex.
-Proof.
-  intros until tbb. intros Hnonil Hnobuiltin GENB. unfold gen_bblocks in GENB.
-  destruct (extract_ctl tex) eqn:ECTL.
-  - destruct c.
-    + destruct i; try (inv GENB; simpl; auto; fail).
-      assert False. eapply Hnobuiltin. eauto. destruct H.
-    + inv GENB. simpl. auto.
-  - inversion Hnonil.
-    + destruct tbdy as [|bi tbdy]; try (contradict H; simpl; auto; fail). inv GENB. auto.
-    + contradict H; simpl; auto.
-Qed.
-
-Lemma transl_instr_basic_nonil:
-  forall k f bi ep x,
-  transl_instr_basic f bi ep k = OK x ->
-  x <> nil.
-Proof.
-  intros until x. intros TIB.
-  destruct bi.
-  - simpl in TIB. unfold loadind in TIB. exploreInst; try discriminate.
-  - simpl in TIB. unfold storeind in TIB. exploreInst; try discriminate.
-  - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate.
-  - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate.
-    unfold transl_cond_op in EQ0. exploreInst; try discriminate.
-    unfold transl_cond_float64. exploreInst; try discriminate.
-    unfold transl_cond_notfloat64. exploreInst; try discriminate.
-    unfold transl_cond_float32. exploreInst; try discriminate.
-    unfold transl_cond_notfloat32. exploreInst; try discriminate.
-  - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate.
-    all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate.
-  - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate.
-    all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate. 
-Qed.
-
-Lemma transl_basic_code_nonil:
-  forall bdy f x ep,
-  bdy <> nil ->
-  transl_basic_code f bdy ep = OK x ->
-  x <> nil.
-Proof.
-  induction bdy as [|bi bdy].
-    intros. contradict H0; auto.
-  destruct bdy as [|bi2 bdy].
-  - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto.
-  - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'.
-    monadInv TBC. 
-    assert (x0 <> nil).
-      eapply IHbdy; eauto. subst bdy'. discriminate.
-    eapply transl_instr_basic_nonil; eauto.
-Qed.
-
-Lemma transl_instr_control_nonil:
-  forall ex f x,
-  ex <> None ->
-  transl_instr_control f ex = OK x ->
-  extract_ctl x <> None.
-Proof.
-  intros ex f x Hnonil TIC.
-  destruct ex as [ex|].
-  - clear Hnonil. destruct ex.
-    all: try (simpl in TIC; exploreInst; discriminate).
-    + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; try discriminate.
-      * unfold transl_opt_compuimm. exploreInst; try discriminate.
-      * unfold transl_opt_compluimm. exploreInst; try discriminate.
-      * unfold transl_comp_float64. exploreInst; try discriminate.
-      * unfold transl_comp_notfloat64. exploreInst; try discriminate.
-      * unfold transl_comp_float32. exploreInst; try discriminate.
-      * unfold transl_comp_notfloat32. exploreInst; try discriminate.
-  - contradict Hnonil; auto.
-Qed.
-
-Lemma transl_instr_control_nobuiltin:
-  forall f ex x,
-  (forall ef args res, ex <> Some (MBbuiltin ef args res)) ->
-  transl_instr_control f ex = OK x ->
-  (forall ef args res, extract_ctl x <> Some (PExpand (Pbuiltin ef args res))).
-Proof.
-  intros until x. intros Hnobuiltin TIC. intros until res.
-  unfold transl_instr_control in TIC. exploreInst.
-  all: try discriminate.
-  - assert False. eapply Hnobuiltin; eauto. destruct H.
-  - unfold transl_cbranch in TIC. exploreInst.
-    all: try discriminate.
-    * unfold transl_opt_compuimm. exploreInst. all: try discriminate.
-    * unfold transl_opt_compluimm. exploreInst. all: try discriminate.
-    * unfold transl_comp_float64. exploreInst; try discriminate.
-    * unfold transl_comp_notfloat64. exploreInst; try discriminate.
-    * unfold transl_comp_float32. exploreInst; try discriminate.
-    * unfold transl_comp_notfloat32. exploreInst; try discriminate.
-Qed.
-
-Theorem match_state_codestate:
-  forall mbs abs s fb sp bb c ms m,
-  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
-  (MB.body bb <> nil \/ MB.exit bb <> None) ->
-  mbs = (Machblock.State s fb sp (bb::c) ms m) ->
-  match_states mbs abs ->
-  exists cs fb f tbb tc ep,
-    match_codestate fb mbs cs /\ match_asmstate fb cs abs
-    /\ Genv.find_funct_ptr ge fb = Some (Internal f)
-    /\ transl_blocks f (bb::c) ep = OK (tbb::tc)
-    /\ body tbb = pbody1 cs ++ pbody2 cs
-    /\ exit tbb = pctl cs
-    /\ cur cs = Some tbb /\ rem cs = tc
-    /\ pstate cs = abs.
-Proof.
-  intros until m. intros Hnobuiltin Hnotempty Hmbs MS. subst. inv MS.
-  inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst.
-  exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2.
-  monadInv TLB. exploit gen_bblocks_nobuiltin; eauto.
-    { inversion Hnotempty.
-      - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail).
-        left. eapply transl_basic_code_nonil; eauto.
-      - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail).
-        right. eapply transl_instr_control_nonil; eauto. }
-    eapply transl_instr_control_nobuiltin; eauto.
-  intros (Hth & Htbdy & Htexit).
-  exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x; pbody2 := extract_basic x0;
-            pctl := extract_ctl x0; fpok := ep; rem := tc'; cur := Some tbb |}, fb, f, tbb, tc', ep.
-  repeat split. 1-2: econstructor; eauto.
-  { destruct (MB.header bb). eauto. discriminate. } eauto.
-  unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl.
-  rewrite TLBS. simpl. rewrite H2.
-  all: simpl; auto.
-Qed.
-
-Definition mb_remove_body (bb: MB.bblock) := 
-  {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}.
-
-Lemma exec_straight_pnil:
-  forall c rs1 m1 rs2 m2,
-  exec_straight tge c rs1 m1 (Pnop::gnil) rs2 m2 ->
-  exec_straight tge c rs1 m1 nil rs2 m2.
-Proof.
-  intros. eapply exec_straight_trans. eapply H. econstructor; eauto.
-Qed.
-
-Lemma transl_block_nobuiltin:
-  forall f bb ep tbb,
-  (MB.body bb <> nil \/ MB.exit bb <> None) ->
-  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
-  transl_block f bb ep = OK (tbb :: nil) ->
-  exists c c',
-     transl_basic_code f (MB.body bb) ep = OK c
-  /\ transl_instr_control f (MB.exit bb) = OK c'
-  /\ body tbb = c ++ extract_basic c'
-  /\ exit tbb = extract_ctl c'.
-Proof.
-  intros until tbb. intros Hnonil Hnobuiltin TLB. monadInv TLB. destruct Hnonil.
-  - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto.
-    left. eapply transl_basic_code_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto.
-  - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto.
-    right. eapply transl_instr_control_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto.
-Qed.
-
-Lemma nextblock_preserves: 
-  forall rs rs' bb r,
-  rs' = nextblock bb rs ->
-  data_preg r = true ->
-  rs r = rs' r.
-Proof.
-  intros. destruct r; try discriminate.
-  subst. Simpl.
-(*   - subst. Simpl. *)
-Qed.
-
-Lemma cons3_app {A: Type}:
-  forall a b c (l: list A),
-  a :: b :: c :: l = (a :: b :: c :: nil) ++ l.
-Proof.
-  intros. simpl. auto.
-Qed.
-
-Lemma exec_straight_opt_body2:
-  forall c rs1 m1 c' rs2 m2,
-  exec_straight_opt tge c rs1 m1 c' rs2 m2 ->
-  exists body,
-     exec_body tge body rs1 m1 = Next rs2 m2
-  /\ (basics_to_code body) ++g c' = c.
-Proof.
-  intros until m2. intros EXES.
-  inv EXES.
-  - exists nil. split; auto.
-  - eapply exec_straight_body2. auto.
-Qed.
-
-Lemma extract_basics_to_code:
-  forall lb c,
-  extract_basic (basics_to_code lb ++ c) = lb ++ extract_basic c.
-Proof.
-  induction lb; intros; simpl; congruence.
-Qed.
-
-Lemma extract_ctl_basics_to_code:
-  forall lb c,
-  extract_ctl (basics_to_code lb ++ c) = extract_ctl c.
-Proof.
-  induction lb; intros; simpl; congruence.
-Qed.
-
-(* Lemma goto_label_inv:
-  forall fn tbb l rs m b ofs,
-  rs PC = Vptr b ofs ->
-  goto_label fn l rs m = goto_label fn l (nextblock tbb rs) m.
-Proof.
-  intros.
-  unfold goto_label. rewrite nextblock_pc. unfold Val.offset_ptr. rewrite H.
-  exploreInst; auto.
-  unfold nextblock. rewrite Pregmap.gss.
-  
-Qed.
-
-
-Lemma exec_control_goto_label_inv:
-  exec_control tge fn (Some ctl) rs m = goto_label fn l rs m ->
-  exec_control tge fn (Some ctl) (nextblock tbb rs) m = goto_label fn l (nextblock tbb rs) m.
-Proof.
-Qed. *)
-
-Theorem step_simu_control:
-  forall bb' fb fn s sp c ms' m' rs2 m2 E0 S'' rs1 m1 tbb tbdy2 tex cs2,
-  MB.body bb' = nil ->
-  (forall ef args res, MB.exit bb' <> Some (MBbuiltin ef args res)) ->
-  Genv.find_funct_ptr tge fb = Some (Internal fn) ->
-  pstate cs2 = (Asmvliw.State rs2 m2) ->
-  pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex ->
-  cur cs2 = Some tbb ->
-  match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 ->
-  match_asmstate fb cs2 (Asmvliw.State rs1 m1) ->
-  exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') E0 S'' ->
-  (exists rs3 m3 rs4 m4,
-      exec_body tge tbdy2 rs2 m2 = Next rs3 m3
-  /\  exec_control_rel tge fn tex tbb rs3 m3 rs4 m4
-  /\  match_states S'' (State rs4 m4)).
-Proof.
-  intros until cs2. intros Hbody Hbuiltin FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP.
-  inv ESTEP.
-  - inv MCS. inv MAS. simpl in *.
-    inv Hcur. inv Hpstate.
-    destruct ctl.
-    + (* MBcall *)
-      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
-      inv TBC. inv TIC. inv H0.
-
-      assert (f0 = f) by congruence. subst f0.
-      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
-        eapply transf_function_no_overflow; eauto.
-      destruct s1 as [rf|fid]; simpl in H7.
-      * (* Indirect call *)
-        monadInv H1.
-        assert (ms' rf = Vptr f' Ptrofs.zero).
-        { unfold find_function_ptr in H14. destruct (ms' rf); try discriminate.
-          revert H14; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
-        assert (rs2 x = Vptr f' Ptrofs.zero).
-        { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
-        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
-        remember (Ptrofs.add _ _) as ofs'.
-        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
-        { econstructor; eauto. }
-        assert (f1 = f) by congruence. subst f1.
-        exploit return_address_offset_correct; eauto. intros; subst ra.
-
-        repeat eexists.
-          rewrite H6. econstructor; eauto.
-          rewrite H7. econstructor; eauto.
-        econstructor; eauto.
-          econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl.
-        simpl. Simpl. rewrite PCeq. rewrite Heqofs'. simpl. auto.
-
-      * (* Direct call *)
-        monadInv H1.
-        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
-        remember (Ptrofs.add _ _) as ofs'.
-        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
-          econstructor; eauto.
-        assert (f1 = f) by congruence. subst f1.
-        exploit return_address_offset_correct; eauto. intros; subst ra.
-        repeat eexists.
-          rewrite H6. econstructor; eauto.
-          rewrite H7. econstructor; eauto.
-        econstructor; eauto.
-          econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl.
-        Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H14. rewrite H14. auto.
-        Simpl. simpl. subst. Simpl. simpl. unfold Val.offset_ptr. rewrite PCeq. auto.
-    + (* MBtailcall *)
-      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
-      inv TBC. inv TIC. inv H0.
-
-      assert (f0 = f) by congruence.  subst f0.
-      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
-        eapply transf_function_no_overflow; eauto.
-      exploit Mem.loadv_extends. eauto. eexact H15. auto. simpl. intros [parent' [A B]].
-      destruct s1 as [rf|fid]; simpl in H13. 
-      * monadInv H1.
-        assert (ms' rf = Vptr f' Ptrofs.zero).
-          { destruct (ms' rf); try discriminate. revert H13. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
-        assert (rs2 x = Vptr f' Ptrofs.zero).
-          { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
-
-        assert (f = f1) by congruence. subst f1. clear FIND1. clear H14.
-        exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
-        exploit exec_straight_body; eauto.
-          { simpl. eauto. }
-        intros EXEB.
-        repeat eexists.
-          rewrite H6. simpl extract_basic. eauto.
-          rewrite H7. simpl extract_ctl. simpl. reflexivity.
-        econstructor; eauto.
-          { apply agree_set_other.
-            - econstructor; auto with asmgen.
-              + apply V.
-              + intro r. destruct r; apply V; auto.
-            - eauto with asmgen. }
-        assert (IR x <> IR GPR12 /\ IR x <> IR GPR32 /\ IR x <> IR GPR16).
-          { clear - EQ. destruct x; repeat split; try discriminate.
-            all: unfold ireg_of in EQ; destruct rf; try discriminate. }
-        Simpl. inv H1. inv H3. rewrite Z; auto; try discriminate.
-      * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H14.
-        exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
-        exploit exec_straight_body; eauto.
-          simpl. eauto.
-        intros EXEB.
-        repeat eexists.
-          rewrite H6. simpl extract_basic. eauto.
-          rewrite H7. simpl extract_ctl. simpl. reflexivity.
-        econstructor; eauto.
-        { apply agree_set_other.
-          - econstructor; auto with asmgen.
-            + apply V.
-            + intro r. destruct r; apply V; auto.
-          - eauto with asmgen. }
-        { Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H13. auto. }
-    + (* MBbuiltin (contradiction) *)
-      assert (MB.exit bb' <> Some (MBbuiltin e l b)) by (apply Hbuiltin).
-      rewrite <- H in H1. contradict H1; auto.
-    + (* MBgoto *)
-      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
-      inv TBC. inv TIC. inv H0.
-
-      assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H11.
-      remember (nextblock tbb rs2) as rs2'.
-      (* inv AT. monadInv H4. *)
-      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'.
-      assert (tf = fn) by congruence. subst tf.
-      exploit find_label_goto_label.
-        eauto. eauto.
-        instantiate (2 := rs2').
-        { subst. unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. eauto. }
-        eauto.
-      intros (tc' & rs' & GOTO & AT2 & INV).
-
-      eexists. eexists. repeat eexists. repeat split.
-        rewrite H6. simpl extract_basic. simpl. eauto.
-        rewrite H7. simpl extract_ctl. simpl. rewrite <- Heqrs2'. eauto.
-      econstructor; eauto.
-        rewrite Heqrs2' in INV. unfold nextblock, incrPC in INV.
-        eapply agree_exten; eauto with asmgen.
-        assert (forall r : preg, r <> PC -> rs' r = rs2 r).
-        { intros. destruct r.
-          - destruct g. all: rewrite INV; Simpl; auto.
-(*           - destruct g. all: rewrite INV; Simpl; auto. *)
-          - rewrite INV; Simpl; auto.
-          - contradiction. }
-        eauto with asmgen.
-        congruence.
-    + (* MBcond *)
-      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
-      inv TBC. inv TIC. inv H0.
-
-      * (* MBcond true *)
-        assert (f0 = f) by congruence. subst f0.
-        exploit eval_condition_lessdef.
-          eapply preg_vals; eauto.
-          all: eauto.
-        intros EC.
-        exploit transl_cbranch_correct_true; eauto. intros (rs' & jmp & A & B & C).
-        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
-        assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. }
-        rewrite PCeq' in PCeq.
-        assert (f1 = f) by congruence. subst f1.
-        exploit find_label_goto_label.
-          4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs')). rewrite nextblock_pc.
-          unfold Val.offset_ptr. rewrite PCeq. eauto.
-        intros (tc' & rs3 & GOTOL & TLPC & Hrs3).
-        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
-        assert (tf = fn) by congruence. subst tf.
-
-        repeat eexists.
-          rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto.
-          rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto.
-
-        econstructor; eauto.
-          eapply agree_exten with rs2; eauto with asmgen.
-          { intros. destruct r; try destruct g; try discriminate.
-            all: rewrite Hrs3; try discriminate; unfold nextblock, incrPC; Simpl. }
-        intros. discriminate.
-
-      * (* MBcond false *)
-        assert (f0 = f) by congruence. subst f0.
-        exploit eval_condition_lessdef.
-          eapply preg_vals; eauto.
-          all: eauto.
-        intros EC.
-
-        exploit transl_cbranch_correct_false; eauto. intros (rs' & jmp & A & B & C).
-        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
-        assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. }
-        rewrite PCeq' in PCeq.
-        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
-        assert (tf = fn) by congruence. subst tf.
-
-        assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned).
-          eapply transf_function_no_overflow; eauto.
-        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
-
-        repeat eexists.
-          rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto.
-          rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto.
-
-        econstructor; eauto.
-          unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto.
-          eapply agree_exten with rs2; eauto with asmgen.
-          { intros. destruct r; try destruct g; try discriminate.
-            all: rewrite <- C; try discriminate; unfold nextblock, incrPC; Simpl. }
-        intros. discriminate.
-    + (* MBjumptable *)
-      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
-      inv TBC. inv TIC. inv H0.
-
-      assert (f0 = f) by congruence. subst f0.
-      monadInv H1.
-      generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV.
-      assert (f1 = f) by congruence. subst f1.
-      exploit find_label_goto_label. 4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs2) # GPR62 <- Vundef # GPR63 <- Vundef).
-        unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity.
-      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn.
-
-      intros [tc' [rs' [A [B C]]]].
-      exploit ireg_val; eauto. rewrite H13. intros LD; inv LD.
-      
-      repeat eexists.
-        rewrite H6. simpl extract_basic. simpl. eauto.
-        rewrite H7. simpl extract_ctl. simpl. Simpl. rewrite <- H1. unfold Mach.label in H14. unfold label. rewrite H14. eapply A.
-      econstructor; eauto.
-        eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen.
-        { assert (destroyed_by_jumptable = R62 :: R63 :: nil) by auto. rewrite H2 in H0. simpl in H0. inv H0.
-          destruct (preg_eq r' GPR63). subst. contradiction.
-          destruct (preg_eq r' GPR62). subst. contradiction.
-          destruct r'; Simpl. }
-        discriminate.
-    + (* MBreturn *)
-      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
-      inv TBC. inv TIC. inv H0.
-
-      assert (f0 = f) by congruence. subst f0.
-      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
-        eapply transf_function_no_overflow; eauto.
-      exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
-      exploit exec_straight_body; eauto.
-        simpl. eauto.
-      intros EXEB.
-      assert (f1 = f) by congruence. subst f1.
-      
-      repeat eexists.
-        rewrite H6. simpl extract_basic. eauto.
-        rewrite H7. simpl extract_ctl. simpl. reflexivity.
-      econstructor; eauto.
-        unfold nextblock, incrPC. repeat apply agree_set_other; auto with asmgen.
-
-  - inv MCS. inv MAS. simpl in *. subst. inv Hpstate. inv Hcur.
-(*     exploit transl_blocks_distrib; eauto. (* rewrite <- H2. discriminate. *)
-    intros (TLB & TLBS).
- *) destruct bb' as [hd' bdy' ex']; simpl in *. subst.
-(*     unfold transl_block in TLB. simpl in TLB. unfold gen_bblocks in TLB; simpl in TLB. inv TLB. *)
-    monadInv TBC. monadInv TIC. simpl in *. rewrite H5. rewrite H6.
-    simpl. repeat eexists.
-    econstructor. 4: instantiate (3 := false). all:eauto.
-      unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq.
-      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
-        eapply transf_function_no_overflow; eauto.
-      assert (f = f0) by congruence. subst f0. econstructor; eauto.
-      generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto.
-    eapply agree_exten; eauto. intros. Simpl.
-    discriminate.
-Qed.
-
-Definition mb_remove_first (bb: MB.bblock) := 
-  {| MB.header := MB.header bb; MB.body := tail (MB.body bb); MB.exit := MB.exit bb |}.
-
-Lemma exec_straight_body:
-  forall c c' lc rs1 m1 rs2 m2,
-  exec_straight tge c rs1 m1 c' rs2 m2 ->
-  code_to_basics c = Some lc ->
-  exists l ll,
-     c = l ++ c'
-  /\ code_to_basics l = Some ll
-  /\ exec_body tge ll rs1 m1 = Next rs2 m2.
-Proof.
-  induction c; try (intros; inv H; fail).
-  intros until m2. intros EXES CTB. inv EXES.
-  - exists (i1 ::g nil),(i1::nil). repeat (split; simpl; auto). rewrite H6. auto.
-  - inv CTB. destruct (code_to_basics c); try discriminate. inv H0.
-    eapply IHc in H7; eauto. destruct H7 as (l' & ll & Hc & CTB & EXECB). subst.
-    exists (i ::g l'),(i::ll). repeat (split; simpl; auto).
-      rewrite CTB. auto.
-      rewrite H1. auto.
-Qed.
-
-Lemma basics_to_code_app:
-  forall c l x ll,
-  basics_to_code c = l ++ basics_to_code x ->
-  code_to_basics l = Some ll ->
-  c = ll ++ x.
-Proof.
-  intros. apply (f_equal code_to_basics) in H.
-  erewrite code_to_basics_dist in H; eauto. 2: eapply code_to_basics_id.
-  rewrite code_to_basics_id in H. inv H. auto.
-Qed.
-
-Lemma basics_to_code_app2:
-  forall i c l x ll,
-  (PBasic i) :: basics_to_code c = l ++ basics_to_code x ->
-  code_to_basics l = Some ll ->
-  i :: c = ll ++ x.
-Proof.
-  intros until ll. intros.
-  exploit basics_to_code_app. instantiate (3 := (i::c)). simpl.
-  all: eauto.
-Qed.
-
-Lemma step_simu_basic:
-  forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy,
-  MB.header bb = nil -> MB.body bb = bi::(bdy) -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
-  bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} ->
-  basic_step ge s fb sp ms m bi ms' m' ->
-  pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy ->
-  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
-  (exists rs2 m2 l cs2 tbdy',
-       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1;
-                pctl := pctl cs1; fpok := fp_is_parent (fpok cs1) bi; rem := rem cs1; cur := cur cs1 |}
-    /\ tbdy = l ++ tbdy'
-    /\ exec_body tge l rs1 m1 = Next rs2 m2
-    /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2).
-Proof.
-  intros until bdy. intros Hheader Hbody Hnobuiltin (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS.
-  simpl in *. inv Hpstate.
-  rewrite Hbody in TBC. monadInv TBC.
-  inv BSTEP.
-  - (* MBgetstack *)
-    simpl in EQ0.
-    unfold Mach.load_stack in H.
-    exploit Mem.loadv_extends; eauto. intros [v' [A B]].
-    rewrite (sp_val _ _ _ AG) in A.
-    exploit loadind_correct; eauto with asmgen.
-    intros (rs2 & EXECS & Hrs'1 & Hrs'2).
-    eapply exec_straight_body in EXECS.
-      2: eapply code_to_basics_id; eauto.
-    destruct EXECS as (l & Hlbi & BTC & CTB & EXECB).
-    exists rs2, m1, Hlbi.
-    eexists. eexists. split. instantiate (1 := x). eauto.
-    repeat (split; auto).
-      eapply basics_to_code_app; eauto.
-    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-(*     assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
-    rewrite <- Hheadereq. *) subst.
-
-    eapply match_codestate_intro; eauto. simpl. simpl in EQ. (*  { destruct (MB.header bb); auto. } *)
-    eapply agree_set_mreg; eauto with asmgen.
-    intro Hep. simpl in Hep. inv Hep.
-  - (* MBsetstack *)
-    simpl in EQ0.
-    unfold Mach.store_stack in H.
-    assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. }
-    exploit Mem.storev_extends; eauto. intros [m2' [A B]].
-    exploit storeind_correct; eauto with asmgen.
-    rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]].
-
-    eapply exec_straight_body in P.
-      2: eapply code_to_basics_id; eauto.
-    destruct P as (l & ll & TBC & CTB & EXECB).
-    exists rs', m2', ll.
-    eexists. eexists. split. instantiate (1 := x). eauto.
-    repeat (split; auto).
-      eapply basics_to_code_app; eauto.
-    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-(*     assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
-    rewrite <- Hheadereq. *) subst.
-    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
-
-    eapply agree_undef_regs; eauto with asmgen.
-    simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto.
-  - (* MBgetparam *)
-    simpl in EQ0.
-
-    assert (f0 = f) by congruence; subst f0.
-    unfold Mach.load_stack in *.
-    exploit Mem.loadv_extends. eauto. eexact H0. auto.
-    intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-    exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
-    exploit Mem.loadv_extends. eauto. eexact H1. auto.
-    intros [v' [C D]].
-
-  (* Opaque loadind. *)
-(*     left; eapply exec_straight_steps; eauto; intros. monadInv TR. *)
-    monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP.
-    destruct ep eqn:EPeq.
-  (* RTMP contains parent *)
-    + exploit loadind_correct. eexact EQ1.
-      instantiate (2 := rs1). rewrite DXP; eauto.
-      intros [rs2 [P [Q R]]].
-
-      eapply exec_straight_body in P.
-        2: eapply code_to_basics_id; eauto.
-      destruct P as (l & ll & BTC & CTB & EXECB).
-      exists rs2, m1, ll. eexists.
-      eexists. split. instantiate (1 := x). eauto.
-      repeat (split; auto).
-      { eapply basics_to_code_app; eauto. }
-      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
-      (* rewrite <- Hheadereq.  *)subst.
-      eapply match_codestate_intro; eauto.
-
-      eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
-      simpl; intros. rewrite R; auto with asmgen.
-      apply preg_of_not_FP; auto.
-
-  (* GPR11 does not contain parent *)
-    + rewrite chunk_of_Tptr in A. 
-      exploit loadind_ptr_correct. eexact A. intros [rs2 [P [Q R]]].
-      exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto.
-      intros [rs3 [S [T U]]].
-
-      exploit exec_straight_trans.
-        eapply P.
-        eapply S.
-      intros EXES.
-
-      eapply exec_straight_body in EXES.
-        2: simpl. 2: erewrite code_to_basics_id; eauto.
-      destruct EXES as (l & ll & BTC & CTB & EXECB).
-      exists rs3, m1, ll.
-      eexists. eexists. split. instantiate (1 := x). eauto.
-      repeat (split; auto).
-        eapply basics_to_code_app2; eauto.
-      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
-      subst.
-      eapply match_codestate_intro; eauto.
-      eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
-      instantiate (1 := rs2#FP <- (rs3#FP)). intros.
-      rewrite Pregmap.gso; auto with asmgen.
-      congruence.
-      intros. unfold Pregmap.set. destruct (PregEq.eq r' FP). congruence. auto with asmgen.
-      simpl; intros. rewrite U; auto with asmgen.
-      apply preg_of_not_FP; auto.
-  - (* MBop *)
-    simpl in EQ0. rewrite Hheader in DXP.
-    
-    assert (eval_operation tge sp op (map ms args) m' = Some v).
-      rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
-    exploit eval_operation_lessdef.
-      eapply preg_vals; eauto.
-      2: eexact H0.
-      all: eauto.
-    intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-    exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
-
-    eapply exec_straight_body in P.
-      2: eapply code_to_basics_id; eauto.
-    destruct P as (l & ll & TBC & CTB & EXECB).
-    exists rs2, m1, ll.
-    eexists. eexists. split. instantiate (1 := x). eauto.
-    repeat (split; auto).
-      eapply basics_to_code_app; eauto.
-    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-(*     assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
-    rewrite <- Hheadereq. *) subst.
-    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
-    apply agree_set_undef_mreg with rs1; auto. 
-    apply Val.lessdef_trans with v'; auto.
-    simpl; intros. destruct (andb_prop _ _ H1); clear H1.
-    rewrite R; auto. apply preg_of_not_FP; auto.
-Local Transparent destroyed_by_op.
-    destruct op; simpl; auto; congruence.
-  - (* MBload *)
-    simpl in EQ0. rewrite Hheader in DXP.
-
-    assert (eval_addressing tge sp addr (map ms args) = Some a).
-      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
-    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
-    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-    exploit Mem.loadv_extends; eauto. intros [v' [C D]].
-    exploit transl_load_correct; eauto.
-    intros [rs2 [P [Q R]]].
-
-    eapply exec_straight_body in P.
-      2: eapply code_to_basics_id; eauto.
-    destruct P as (l & ll & TBC & CTB & EXECB).
-    exists rs2, m1, ll.
-    eexists. eexists. split. instantiate (1 := x). eauto.
-    repeat (split; auto).
-      eapply basics_to_code_app; eauto.
-    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-(*     assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
-    rewrite <- Hheadereq. *) subst.
-    eapply match_codestate_intro; eauto. simpl. simpl in EQ.
-
-    eapply agree_set_undef_mreg; eauto. intros; auto with asmgen.
-    simpl; congruence.
-
-  - (* MBstore *)
-    simpl in EQ0. rewrite Hheader in DXP.
-
-    assert (eval_addressing tge sp addr (map ms args) = Some a).
-      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
-    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
-    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-    assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto.
-    exploit Mem.storev_extends; eauto. intros [m2' [C D]].
-    exploit transl_store_correct; eauto. intros [rs2 [P Q]].
-
-    eapply exec_straight_body in P.
-      2: eapply code_to_basics_id; eauto.
-    destruct P as (l & ll & TBC & CTB & EXECB).
-    exists rs2, m2', ll.
-    eexists. eexists. split. instantiate (1 := x). eauto.
-    repeat (split; auto).
-      eapply basics_to_code_app; eauto.
-    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
-    subst.
-    eapply match_codestate_intro; eauto. simpl. simpl in EQ.
-
-    eapply agree_undef_regs; eauto with asmgen.
-    simpl; congruence.
-Qed.
-
-Lemma exec_body_trans:
-  forall l l' rs0 m0 rs1 m1 rs2 m2,
-  exec_body tge l rs0 m0 = Next rs1 m1 ->
-  exec_body tge l' rs1 m1 = Next rs2 m2 ->
-  exec_body tge (l++l') rs0 m0 = Next rs2 m2.
-Proof.
-  induction l.
-  - simpl. congruence.
-  - intros until m2. intros EXEB1 EXEB2.
-    inv EXEB1. destruct (exec_basic_instr _) eqn:EBI; try discriminate.
-    simpl. rewrite EBI. eapply IHl; eauto.
-Qed.
-
-Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}.
-
-Program Definition remove_header tbb := {| header := nil; body := body tbb; exit := exit tbb |}.
-Next Obligation.
-  destruct tbb. simpl. auto.
-Qed.
-
-Inductive exec_header: codestate -> codestate -> Prop :=
-  | exec_header_cons: forall cs1,
-      exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1;
-                          pctl := pctl cs1; fpok := (if pheader cs1 then fpok cs1 else false); rem := rem cs1;
-                          (* cur := match cur cs1 with None => None | Some bcur => Some (remove_header bcur) end *)
-                          cur := cur cs1 |}.
-
-Lemma step_simu_header:
-  forall bb s fb sp c ms m rs1 m1 cs1,
-(*   (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) -> *)
-  pstate cs1 = (State rs1 m1) ->
-  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
-  (exists cs1',
-       exec_header cs1 cs1'
-    /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1').
-Proof.
-  intros until cs1. intros Hpstate MCS.
-  eexists. split; eauto.
-  econstructor; eauto.
-  inv MCS. simpl in *. inv Hpstate.
-  econstructor; eauto.
-Qed.
-
-Lemma step_matchasm_header:
-  forall fb cs1 cs1' s1,
-  match_asmstate fb cs1 s1 ->
-  exec_header cs1 cs1' ->
-  match_asmstate fb cs1' s1.
-Proof.
-  intros until s1. intros MAS EXH.
-  inv MAS. inv EXH.
-  simpl. econstructor; eauto.
-Qed.
-
-Lemma step_simu_body:
-  forall bb s fb sp c ms m rs1 m1 ms' cs1 m',
-  MB.header bb = nil ->
-  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
-  body_step ge s fb sp (MB.body bb) ms m ms' m' ->
-  pstate cs1 = (State rs1 m1) ->
-  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
-  (exists rs2 m2 cs2 ep,
-       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1;
-                pctl := pctl cs1; fpok := ep; rem := rem cs1; cur := cur cs1 |}
-    /\ exec_body tge (pbody1 cs1) rs1 m1 = Next rs2 m2
-    /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2).
-Proof.
-  intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy].
-  - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS.
-    inv BSTEP.
-    exists rs1, m1, cs1, (fpok cs1).
-    inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto).
-    econstructor; eauto.
-  - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS. inv BSTEP.
-    rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'.
-    exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto.
-    intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS').
-    simpl in *.
-    exploit IHbdy. auto. 2: eapply H6. 3: eapply MCS'. all: eauto. subst; eauto. simpl; auto.
-    intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS'').
-    exists rs3, m3, cs3, ep.
-    repeat (split; simpl; auto). subst. simpl in *. auto.
-    rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto.
-Qed.
-
-(* Lemma exec_body_straight:
-  forall l rs0 m0 rs1 m1,
-  l <> nil ->
-  exec_body tge l rs0 m0 = Next rs1 m1 ->
-  exec_straight tge l rs0 m0 nil rs1 m1.
-Proof.
-  induction l as [|i1 l].
-    intros. contradict H; auto.
-  destruct l as [|i2 l].
-  - intros until m1. intros _ EXEB. simpl in EXEB.
-    destruct (exec_basic_instr _ _ _) eqn:EBI; try discriminate.
-    inv EXEB. econstructor; eauto.
-  - intros until m1. intros _ EXEB. simpl in EXEB. simpl in IHl.
-    destruct (exec_basic_instr tge i1 rs0 m0) eqn:EBI; try discriminate.
-    econstructor; eauto. eapply IHl; eauto. discriminate.
-Qed. *)
-
-Lemma exec_body_pc:
-  forall l rs1 m1 rs2 m2,
-  exec_body tge l rs1 m1 = Next rs2 m2 ->
-  rs2 PC = rs1 PC.
-Proof.
-  induction l.
-  - intros. inv H. auto.
-  - intros until m2. intro EXEB.
-    inv EXEB. destruct (exec_basic_instr _ _ _) eqn:EBI; try discriminate.
-    eapply IHl in H0. rewrite H0.
-    erewrite exec_basic_instr_pc; eauto.
-Qed.
-
-Lemma exec_body_control:
-  forall b rs1 m1 rs2 m2 rs3 m3 fn,
-  exec_body tge (body b) rs1 m1 = Next rs2 m2 ->
-  exec_control_rel tge fn (exit b) b rs2 m2 rs3 m3 ->
-  exec_bblock_rel tge fn b rs1 m1 rs3 m3.
-Proof.
-  intros until fn. intros EXEB EXECTL.
-  econstructor; eauto. inv EXECTL.
-  unfold exec_bblock. rewrite EXEB. auto.
-Qed.
-
-Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat.
-
-Lemma mbsize_eqz:
-  forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None.
-Proof.
-  intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H.
-  remember (length _) as a. remember (length_opt _) as b.
-  assert (a = 0%nat) by omega. assert (b = 0%nat) by omega. subst. clear H.
-  inv H0. inv H1. destruct bdy; destruct ex; auto.
-  all: try discriminate.
-Qed.
-
-Lemma mbsize_neqz:
-  forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None).
-Proof.
-  intros. destruct bb as [hd bdy ex]; simpl in *.
-  destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate).
-  contradict H. unfold mbsize. simpl. auto.
-Qed.
-
-(* Alternative form of step_simulation_bblock, easier to prove *)
-Lemma step_simulation_bblock':
-  forall sf f sp bb bb' bb'' rs m rs' m' s'' c S1,
-  bb' = mb_remove_header bb ->
-  body_step ge sf f sp (Machblock.body bb') rs m rs' m' ->
-  bb'' = mb_remove_body bb' ->
-  (forall ef args res, MB.exit bb'' <> Some (MBbuiltin ef args res)) ->
-  exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') E0 s'' ->
-  match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
-  exists S2 : state, plus step tge S1 E0 S2 /\ match_states s'' S2.
-Proof.
-  intros until S1. intros Hbb' BSTEP Hbb'' Hbuiltin ESTEP MS.
-  destruct (mbsize bb) eqn:SIZE.
-  - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit).
-    destruct bb as [hd bdy ex]; simpl in *; subst.
-    inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc.
-    monadInv H2. simpl in *. inv ESTEP. inv BSTEP.
-    eexists. split. eapply plus_one.
-    exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'.
-    assert (x = tf) by congruence. subst x.
-    eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto.
-    unfold exec_bblock. simpl. eauto.
-    econstructor. eauto. eauto. eauto.
-    unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite <- H.
-    assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
-      eapply transf_function_no_overflow; eauto.
-    econstructor; eauto.
-      generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto.
-    eapply agree_exten; eauto. intros. Simpl.
-    intros. discriminate.
-  - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. }
-    intros Hnotempty.
-
-    (* initial setting *)
-    exploit match_state_codestate.
-      2: eapply Hnotempty.
-      all: eauto.
-    intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate).
-
-    (* step_simu_header part *)
-    assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. }
-    destruct H as (rs1 & m1 & Hpstate2). subst.
-    assert (f = fb). { inv MCS. auto. } subst fb.
-    exploit step_simu_header.
-      2: eapply MCS.
-      all: eauto.
-    intros (cs1' & EXEH & MCS2).
-
-    (* step_simu_body part *)
-(*     assert (MB.body bb = MB.body (mb_remove_header bb)). { destruct bb; simpl; auto. }
-    rewrite H in BSTEP. clear H. *)
-    assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. }
-    exploit step_simu_body.
-      3: eapply BSTEP.
-      4: eapply MCS2.
-      all: eauto. rewrite Hpstate'. eauto.
-    intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS').
-
-    (* step_simu_control part *)
-    assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)).
-    { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. }
-    destruct H as (tf & FIND').
-    assert (exists tex, pbody2 cs1 = extract_basic tex /\ pctl cs1 = extract_ctl tex).
-    { inv MAS. simpl in *. eauto. }
-    destruct H as (tex & Hpbody2 & Hpctl).
-    inv EXEH. simpl in *.
-    subst. exploit step_simu_control.
-      9: eapply MCS'. all: simpl.
-      10: eapply ESTEP.
-      all: simpl; eauto.
-      rewrite Hpbody2. rewrite Hpctl. rewrite Hcur.
-      { inv MAS; simpl in *. inv Hcur. inv Hpstate2. eapply match_asmstate_some; eauto.
-        erewrite exec_body_pc; eauto. }
-    intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS').
-
-    (* bringing the pieces together *)
-    exploit exec_body_trans.
-      eapply EXEB.
-      eauto.
-    intros EXEB2.
-    exploit exec_body_control; eauto.
-    rewrite <- Hpbody2 in EXEB2. rewrite <- Hbody in EXEB2. eauto.
-    rewrite Hexit. rewrite Hpctl. eauto.
-    intros EXECB. inv EXECB.
-    exists (State rs4 m4).
-    split; auto. eapply plus_one. rewrite Hpstate2.
-    assert (exists ofs, rs1 PC = Vptr f ofs).
-    { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. }
-    destruct H0 as (ofs & Hrs1pc).
-    eapply exec_step_internal; eauto.
-
-    (* proving the initial find_bblock *)
-    rewrite Hpstate2 in MAS. inv MAS. simpl in *. 
-    assert (f1 = f0) by congruence. subst f0.
-    rewrite PCeq in Hrs1pc. inv Hrs1pc.
-    exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''.
-    inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ. inv Hcur.
-    eapply find_bblock_tail; eauto.
-Qed.
-
-Lemma step_simulation_bblock:
-  forall sf f sp bb ms m ms' m' S2 c,
-  body_step ge sf f sp (Machblock.body bb) ms m ms' m' ->
-  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
-  exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') E0 S2 ->
-  forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
-  exists S2' : state, plus step tge S1' E0 S2' /\ match_states S2 S2'.
-Proof.
-  intros until c. intros BSTEP Hbuiltin ESTEP S1' MS.
-  eapply step_simulation_bblock'; eauto.
-  all: destruct bb as [hd bdy ex]; simpl in *; eauto.
-  inv ESTEP.
-  - econstructor. inv H; try (econstructor; eauto; fail).
-  - econstructor.
-Qed.
-
-Definition measure (s: MB.state) : nat :=
-  match s with
-  | MB.State _ _ _ _ _ _ => 0%nat
-  | MB.Callstate _ _ _ _ => 0%nat
-  | MB.Returnstate _ _ _ => 1%nat
-  end.
-
-Definition split (c: MB.code) :=
-  match c with
-  | nil => nil
-  | bb::c => {| MB.header := MB.header bb; MB.body := MB.body bb; MB.exit := None |}
-              :: {| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |} :: c
-  end.
-
-Lemma cons_ok_eq3 {A: Type} :
-  forall (x:A) y z x' y' z',
-  x = x' -> y = y' -> z = z' ->
-  OK (x::y::z) = OK (x'::y'::z').
-Proof.
-  intros. subst. auto.
-Qed.
-
-Lemma transl_blocks_split_builtin:
-  forall bb c ep f ef args res,
-  MB.exit bb = Some (MBbuiltin ef args res) -> MB.body bb <> nil ->
-  transl_blocks f (split (bb::c)) ep = transl_blocks f (bb::c) ep.
-Proof.
-  intros until res. intros Hexit Hbody. simpl split.
-  unfold transl_blocks. fold transl_blocks. unfold transl_block.
-  simpl. remember (transl_basic_code _ _ _) as tbc. remember (transl_instr_control _ _) as tbi.
-  remember (transl_blocks _ _ _) as tlbs.
-  destruct tbc; destruct tbi; destruct tlbs.
-  all: try simpl; auto.
-  - simpl. rewrite Hexit in Heqtbi. simpl in Heqtbi. monadInv Heqtbi. simpl.
-    unfold gen_bblocks. simpl. destruct l.
-    + exploit transl_basic_code_nonil; eauto. intro. destruct H.
-    + simpl. rewrite app_nil_r. apply cons_ok_eq3. all: try eapply bblock_equality. all: simpl; auto.
-Qed.
-
-Lemma transl_code_at_pc_split_builtin:
-  forall rs f f0 bb c ep tf tc ef args res,
-  MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
-  transl_code_at_pc ge (rs PC) f f0 (bb :: c) ep tf tc ->
-  transl_code_at_pc ge (rs PC) f f0 (split (bb :: c)) ep tf tc.
-Proof.
-  intros until res. intros Hbody Hexit AT. inv AT.
-  econstructor; eauto. erewrite transl_blocks_split_builtin; eauto.
-Qed.
-
-Theorem match_states_split_builtin:
-  forall sf f sp bb c rs m ef args res S1,
-  MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
-  match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
-  match_states (Machblock.State sf f sp (split (bb::c)) rs m) S1.
-Proof.
-  intros until S1. intros Hbody Hexit MS.
-  inv MS.
-  econstructor; eauto.
-  eapply transl_code_at_pc_split_builtin; eauto.
-Qed.
-
-Lemma step_simulation_builtin:
-  forall ef args res bb sf f sp c ms m t S2,
-  MB.body bb = nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
-  exit_step return_address_offset ge (MB.exit bb) (Machblock.State sf f sp (bb :: c) ms m) t S2 ->
-  forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
-  exists S2' : state, plus step tge S1' t S2' /\ match_states S2 S2'.
-Proof.
-  intros until S2. intros Hbody Hexit ESTEP S1' MS.
-  inv MS. inv AT. monadInv H2. monadInv EQ.
-  rewrite Hbody in EQ0. monadInv EQ0.
-  rewrite Hexit in EQ. monadInv EQ.
-  rewrite Hexit in ESTEP. inv ESTEP. inv H4.
-
-  exploit functions_transl; eauto. intro FN.
-  generalize (transf_function_no_overflow _ _ H1); intro NOOV.
-  exploit builtin_args_match; eauto. intros [vargs' [P Q]].
-  exploit external_call_mem_extends; eauto.
-  intros [vres' [m2' [A [B [C D]]]]].
-  econstructor; split. apply plus_one.
-  simpl in H3.
-  eapply exec_step_builtin. eauto. eauto.
-    eapply find_bblock_tail; eauto.
-    simpl. eauto.
-    erewrite <- sp_val by eauto.
-    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
-    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-    eauto.
-  econstructor; eauto.
-    instantiate (2 := tf); instantiate (1 := x0).
-    unfold nextblock, incrPC. rewrite Pregmap.gss.
-    rewrite set_res_other. rewrite undef_regs_other_2. rewrite Pregmap.gso by congruence. 
-    rewrite <- H. simpl. econstructor; eauto.
-    eapply code_tail_next_int; eauto.
-    rewrite preg_notin_charact. intros. auto with asmgen.
-    auto with asmgen.
-    apply agree_nextblock. eapply agree_set_res; auto.
-    eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
-    apply Pregmap.gso; auto with asmgen.
-    congruence.
-Qed.
-
-Lemma next_sep:
-  forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'.
-Proof.
-  congruence.
-Qed.
-
-Theorem step_simulation:
-  forall S1 t S2, MB.step return_address_offset ge S1 t S2 ->
-  forall S1' (MS: match_states S1 S1'),
-  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
-  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
-Proof.
-  induction 1; intros.
-
-- (* bblock *)
-  left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0.
-  all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock; 
-            try (rewrite MBE; try discriminate); eauto).
-  + (* MBbuiltin *)
-    destruct (MB.body bb) eqn:MBB.
-    * inv H. eapply step_simulation_builtin; eauto. rewrite MBE. eauto.
-    * eapply match_states_split_builtin in MS; eauto.
-        2: rewrite MBB; discriminate.
-      simpl split in MS.
-      rewrite <- MBB in H.
-      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb1.
-      assert (MB.body bb = MB.body bb1). { subst. simpl. auto. }
-      rewrite H1 in H. subst.
-      exploit step_simulation_bblock. eapply H.
-        discriminate.
-        simpl. constructor.
-        eauto.
-      intros (S2' & PLUS1 & MS').
-      rewrite MBE in MS'.
-      assert (exit_step return_address_offset ge (Some (MBbuiltin e l b)) 
-              (MB.State sf f sp ({| MB.header := nil; MB.body := nil; MB.exit := Some (MBbuiltin e l b) |}::c) 
-                rs' m') t s').
-      { inv H0. inv H3. econstructor. econstructor; eauto. }
-      exploit step_simulation_builtin.
-        4: eapply MS'.
-        all: simpl; eauto.
-      intros (S3' & PLUS'' & MS'').
-      exists S3'. split; eauto.
-      eapply plus_trans. eapply PLUS1. eapply PLUS''. eauto.
-  + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto.
-
-- (* internal function *)
-  inv MS.
-  exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
-  generalize EQ; intros EQ'. monadInv EQ'.
-  destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0.
-  unfold Mach.store_stack in *.
-  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros [m1' [C D]].
-  exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
-  intros [m2' [F G]].
-  simpl chunk_of_type in F.
-  exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
-  intros [m3' [P Q]].
-  (* Execution of function prologue *)
-  monadInv EQ0. (* rewrite transl_code'_transl_code in EQ1. *)
-  set (tfbody := make_prologue f x0) in *.
-  set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *.
-  set (rs2 := rs0#FP <- (parent_sp s) #SP <- sp #RTMP <- Vundef).
-  exploit (Pget_correct tge GPRA RA nil rs2 m2'); auto.
-  intros (rs' & U' & V').
-(*   exploit (exec_straight_through_singleinst); eauto.
-  intro W'. remember (nextblock _ rs') as rs''. *)
-  exploit (storeind_ptr_correct tge SP (fn_retaddr_ofs f) GPRA nil rs' m2').
-    rewrite chunk_of_Tptr in P.
-    assert (rs' GPRA = rs0 RA). { apply V'. }
-    assert (rs' SP = rs2 SP). { apply V'; discriminate. }
-    rewrite H4. rewrite H3.
-    (* change (rs' GPRA) with (rs0 RA). *)
-    rewrite ATLR.
-    change (rs2 SP) with sp. eexact P.
-  intros (rs3 & U & V).
-(*   exploit (exec_straight_through_singleinst); eauto.
-  intro W. *)
-  assert (EXEC_PROLOGUE: exists rs3',
-            exec_straight_blocks tge tf
-              tf.(fn_blocks) rs0 m'
-              x0 rs3' m3'
-          /\ forall r, r <> PC -> rs3' r = rs3 r).
-  { eexists. split.
-    - change (fn_blocks tf) with tfbody; unfold tfbody.
-      econstructor; eauto. unfold exec_bblock. simpl exec_body.
-      rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F. simpl in F. rewrite F.
-      Simpl. unfold parexec_store_offset. rewrite Ptrofs.of_int64_to_int64. unfold eval_offset.
-      rewrite chunk_of_Tptr in P. Simpl. rewrite ATLR. unfold Mptr in P. assert (Archi.ptr64 = true) by auto. 2: auto. rewrite H3 in P. rewrite P.
-      simpl. apply next_sep; eauto. reflexivity.
-    - intros. destruct V' as (V'' & V'). destruct r.
-      + Simpl.
-        destruct (gpreg_eq g0 GPR16). { subst. Simpl. rewrite V; try discriminate. rewrite V''. subst rs2. Simpl. }
-        destruct (gpreg_eq g0 GPR32). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
-        destruct (gpreg_eq g0 GPR12). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
-        destruct (gpreg_eq g0 GPR17). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
-        Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. { destruct g0; try discriminate. contradiction. }
-      + Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl.
-      + contradiction.
-  } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3').
-  exploit exec_straight_steps_2; eauto using functions_transl.
-  simpl fn_blocks. simpl fn_blocks in g. omega. constructor.
-  intros (ofs' & X & Y).                    
-  left; exists (State rs3' m3'); split.
-  eapply exec_straight_steps_1; eauto.
-  simpl fn_blocks. simpl fn_blocks in g. omega.
-  constructor.
-  econstructor; eauto.
-  rewrite X; econstructor; eauto. 
-  apply agree_exten with rs2; eauto with asmgen.
-  unfold rs2. 
-  apply agree_set_other; auto with asmgen.
-  apply agree_change_sp with (parent_sp s). 
-  apply agree_undef_regs with rs0. auto.
-Local Transparent destroyed_at_function_entry.
-  simpl; intros; Simpl.
-  unfold sp; congruence.
-
-  intros.
-  assert (r <> RTMP). { contradict H3; rewrite H3; unfold data_preg; auto. }
-  rewrite Heqrs3'. Simpl. rewrite V. inversion V'. rewrite H6. auto.
-  assert (r <> GPRA). { contradict H3; rewrite H3; unfold data_preg; auto. }
-  assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. }
-  (* rewrite H8; auto. *)
-  contradict H3; rewrite H3; unfold data_preg; auto.
-  contradict H3; rewrite H3; unfold data_preg; auto.
-  contradict H3; rewrite H3; unfold data_preg; auto.
-  contradict H3; rewrite H3; unfold data_preg; auto.
-  intros. rewrite Heqrs3'. rewrite V by auto with asmgen.
-  assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. }
-  rewrite H4 by auto with asmgen. reflexivity. discriminate.
-- (* external function *)
-  inv MS.
-  exploit functions_translated; eauto.
-  intros [tf [A B]]. simpl in B. inv B.
-  exploit extcall_arguments_match; eauto.
-  intros [args' [C D]].
-  exploit external_call_mem_extends; eauto.
-  intros [res' [m2' [P [Q [R S]]]]].
-  left; econstructor; split.
-  apply plus_one. eapply exec_step_external; eauto.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  econstructor; eauto.
-  unfold loc_external_result.
-  apply agree_set_other; auto.
-  apply agree_set_pair; auto.
-  apply agree_undef_caller_save_regs; auto.
-
-- (* return *) 
-  inv MS.
-  inv STACKS. simpl in *.
-  right. split. omega. split. auto.
-  rewrite <- ATPC in H5.
-  econstructor; eauto. congruence.
-Qed.
-
-Lemma transf_initial_states:
-  forall st1, MB.initial_state prog st1 ->
-  exists st2, AB.initial_state tprog st2 /\ match_states st1 st2.
-Proof.
-  intros. inversion H. unfold ge0 in *.
-  econstructor; split.
-  econstructor.
-  eapply (Genv.init_mem_transf_partial TRANSF); eauto.
-  replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
-     with (Vptr fb Ptrofs.zero).
-  econstructor; eauto.
-  constructor.
-  apply Mem.extends_refl.
-  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
-  intros. rewrite Mach.Regmap.gi. auto.
-  unfold Genv.symbol_address.
-  rewrite (match_program_main TRANSF).
-  rewrite symbols_preserved.
-  unfold ge; rewrite H1. auto.
-Qed.
-
-Lemma transf_final_states:
-  forall st1 st2 r,
-  match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r.
-Proof.
-  intros. inv H0. inv H. constructor. assumption.
-  compute in H1. inv H1.
-  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
-Qed.
-
-Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop := 
-  Asmblockgenproof0.return_address_offset.
-
-Theorem transf_program_correct:
-  forward_simulation (MB.semantics return_address_offset prog) (Asmblock.semantics tprog).
-Proof.
-  eapply forward_simulation_star with (measure := measure).
-  - apply senv_preserved.
-  - eexact transf_initial_states.
-  - eexact transf_final_states.
-  - exact step_simulation.
-Qed.
-
-End PRESERVATION.
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for RISC-V generation: main proof. *)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock.
+Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops.
+Require Import Axioms.
+
+Module MB := Machblock.
+Module AB := Asmvliw.
+
+Definition match_prog (p: Machblock.program) (tp: Asmvliw.program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+  intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Machblock.program.
+Variable tprog: Asmvliw.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+Lemma functions_translated:
+  forall b f,
+  Genv.find_funct_ptr ge b = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSF).
+
+Lemma functions_transl:
+  forall fb f tf,
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  transf_function f = OK tf ->
+  Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+  intros. exploit functions_translated; eauto. intros [tf' [A B]].
+  monadInv B. rewrite H0 in EQ; inv EQ; auto.
+Qed.
+
+Lemma transf_function_no_overflow:
+  forall f tf,
+  transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
+  omega.
+Qed.
+
+Section TRANSL_LABEL. (* Lemmas on translation of MB.is_label into AB.is_label *)
+
+Lemma gen_bblocks_label:
+  forall hd bdy ex tbb tc,
+  gen_bblocks hd bdy ex = tbb::tc ->
+  header tbb = hd.
+Proof.
+  intros until tc. intros GENB. unfold gen_bblocks in GENB.
+  destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy.
+  all: inv GENB; simpl; auto.
+Qed.
+
+Lemma gen_bblocks_label2:
+  forall hd bdy ex tbb1 tbb2,
+  gen_bblocks hd bdy ex = tbb1::tbb2::nil ->
+  header tbb2 = nil.
+Proof.
+  intros until tbb2. intros GENB. unfold gen_bblocks in GENB.
+  destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy.
+  all: inv GENB; simpl; auto.
+Qed.
+
+Remark in_dec_transl:
+  forall lbl hd,
+  (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false).
+Proof.
+  intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto.
+Qed.
+
+Lemma transl_is_label:
+  forall lbl bb tbb f ep tc,
+  transl_block f bb ep = OK (tbb::tc) ->
+  is_label lbl tbb = MB.is_label lbl bb.
+Proof.
+  intros until tc. intros TLB.
+  destruct tbb as [thd tbdy tex]; simpl in *.
+  monadInv TLB.
+  unfold is_label. simpl.
+  apply gen_bblocks_label in H0. simpl in H0. subst.
+  rewrite in_dec_transl. auto.
+Qed.
+
+Lemma transl_is_label_false2:
+  forall lbl bb f ep tbb1 tbb2,
+  transl_block f bb ep = OK (tbb1::tbb2::nil) ->
+  is_label lbl tbb2 = false.
+Proof.
+  intros until tbb2. intros TLB.
+  destruct tbb2 as [thd tbdy tex]; simpl in *.
+  monadInv TLB. apply gen_bblocks_label2 in H0. simpl in H0. subst.
+  apply is_label_correct_false. simpl. auto.
+Qed.
+
+Lemma transl_is_label2:
+  forall f bb ep tbb1 tbb2 lbl,
+  transl_block f bb ep = OK (tbb1::tbb2::nil) ->
+     is_label lbl tbb1 = MB.is_label lbl bb
+  /\ is_label lbl tbb2 = false.
+Proof.
+  intros. split. eapply transl_is_label; eauto. eapply transl_is_label_false2; eauto.
+Qed.
+
+Lemma transl_block_nonil:
+  forall f c ep tc,
+  transl_block f c ep = OK tc ->
+  tc <> nil.
+Proof.
+  intros. monadInv H. unfold gen_bblocks.
+  destruct (extract_ctl x0); try destruct c0; try destruct x; try destruct i.
+  all: discriminate.
+Qed.
+
+Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc,
+  ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc).
+Proof.
+  intros. intro. monadInv H.
+  unfold gen_bblocks in H0.
+  destruct (extract_ctl x0); try destruct x; try destruct c; try destruct i.
+  all: discriminate.
+Qed.
+
+Lemma find_label_transl_false:
+  forall x f lbl bb ep x',
+  transl_block f bb ep = OK x ->
+  MB.is_label lbl bb = false ->
+  find_label lbl (x++x') = find_label lbl x'.
+Proof.
+  intros until x'. intros TLB MBis; simpl; auto.
+  destruct x as [|x0 x1]; simpl; auto.
+  destruct x1 as [|x1 x2]; simpl; auto.
+  - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto.
+  - destruct x2 as [|x2 x3]; simpl; auto.
+    + erewrite <- transl_is_label in MBis; eauto. rewrite MBis.
+      erewrite transl_is_label_false2; eauto.
+    + apply transl_block_limit in TLB. destruct TLB.
+Qed.
+
+Lemma transl_blocks_label:
+  forall lbl f c tc ep,
+  transl_blocks f c ep = OK tc ->
+  match MB.find_label lbl c with
+  | None => find_label lbl tc = None
+  | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc'
+  end.
+Proof.
+  induction c; simpl; intros.
+  inv H. auto.
+  monadInv H.
+  destruct (MB.is_label lbl a) eqn:MBis.
+  - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. }
+    simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis.
+    rewrite ABis.
+    eexists. eexists. split; eauto. simpl transl_blocks.
+    assert (MB.header a <> nil).
+    { apply MB.is_label_correct_true in MBis.
+      destruct (MB.header a). contradiction. discriminate. }
+    destruct (MB.header a); try contradiction.
+    rewrite EQ. simpl. rewrite EQ1. simpl. auto.
+  - apply IHc in EQ1. destruct (MB.find_label lbl c).
+    + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto.
+      erewrite find_label_transl_false; eauto.
+    + erewrite find_label_transl_false; eauto.
+Qed.
+
+Lemma find_label_nil:
+  forall bb lbl c,
+  header bb = nil ->
+  find_label lbl (bb::c) = find_label lbl c.
+Proof.
+  intros. destruct bb as [hd bdy ex]; simpl in *. subst.
+  assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
+  { erewrite <- is_label_correct_false. simpl. auto. }
+  rewrite H. auto.
+Qed.
+
+Theorem transl_find_label:
+  forall lbl f tf,
+  transf_function f = OK tf ->
+  match MB.find_label lbl f.(MB.fn_code) with
+  | None => find_label lbl tf.(fn_blocks) = None
+  | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc
+  end.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g.
+  monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto.
+  eapply transl_blocks_label; eauto.
+Qed.
+
+End TRANSL_LABEL.
+
+(** A valid branch in a piece of Machblock code translates to a valid ``go to''
+  transition in the generated Asmblock code. *)
+
+Lemma find_label_goto_label:
+  forall f tf lbl rs m c' b ofs,
+  Genv.find_funct_ptr ge b = Some (Internal f) ->
+  transf_function f = OK tf ->
+  rs PC = Vptr b ofs ->
+  MB.find_label lbl f.(MB.fn_code) = Some c' ->
+  exists tc', exists rs',
+    goto_label tf lbl rs m = Next rs' m
+  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
+  /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
+  intros (tc & A & B).
+  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
+  intros [pos' [P [Q R]]].
+  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
+  split. unfold goto_label. unfold par_goto_label. rewrite P. rewrite H1. auto.
+  split. rewrite Pregmap.gss. constructor; auto.
+  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
+  auto. omega.
+  generalize (transf_function_no_overflow _ _ H0). omega.
+  intros. apply Pregmap.gso; auto.
+Qed.
+
+(** Existence of return addresses *)
+
+Lemma return_address_exists:
+  forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+  exists ra, return_address_offset f c ra.
+Proof.
+  intros. eapply Asmblockgenproof0.return_address_exists; eauto.
+
+- intros. monadInv H0.
+  destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl.
+  exists x; exists true; split; auto.
+  repeat constructor.
+- exact transf_function_no_overflow.
+Qed.
+
+(** * Proof of semantic preservation *)
+
+(** Semantic preservation is proved using a complex simulation diagram
+  of the following form.
+<<
+                                     MB.step
+                      ---------------------------------------->
+                      header      body          exit
+                  st1 -----> st2 -----> st3 ------------------> st4
+                   |          |          |                       |
+                   |   (A)    |   (B)    |         (C)           |
+   match_codestate |          |          |                       |
+                   |  header  |   body1  |  body2                |  match_states
+                  cs1 -----> cs2 -----> cs3 ------> cs4          |
+                   |                  /                \  exit   |
+   match_asmstate  |   ---------------                  --->---  |
+                   |  /   match_asmstate                       \ |
+                  st'1 ---------------------------------------> st'2
+                                     AB.step                  *
+>>
+  The invariant between each MB.step/AB.step is the [match_states] predicate below.
+  However, we also need to introduce an intermediary state [Codestate] which allows
+  us to reason on a finer grain, executing header, body and exit separately.
+
+  This [Codestate] consists in a state like [Asmblock.State], except that the
+  code is directly stored in the state, much like [Machblock.State]. It also features
+  additional useful elements to keep track of while executing a bblock.
+*)
+
+Remark preg_of_not_FP: forall r, negb (mreg_eq r MFP) = true -> IR FP <> preg_of r.
+Proof.
+  intros. change (IR FP) with (preg_of MFP). red; intros.
+  exploit preg_of_injective; eauto. intros; subst r; discriminate.
+Qed.
+
+Inductive match_states: Machblock.state -> Asmvliw.state -> Prop :=
+  | match_states_intro:
+      forall s fb sp c ep ms m m' rs f tf tc
+        (STACKS: match_stack ge s)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (MEXT: Mem.extends m m')
+        (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
+        (AG: agree ms sp rs)
+        (DXP: ep = true -> rs#FP = parent_sp s),
+      match_states (Machblock.State s fb sp c ms m)
+                   (Asmvliw.State rs m')
+  | match_states_call:
+      forall s fb ms m m' rs
+        (STACKS: match_stack ge s)
+        (MEXT: Mem.extends m m')
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = Vptr fb Ptrofs.zero)
+        (ATLR: rs RA = parent_ra s),
+      match_states (Machblock.Callstate s fb ms m)
+                   (Asmvliw.State rs m')
+  | match_states_return:
+      forall s ms m m' rs
+        (STACKS: match_stack ge s)
+        (MEXT: Mem.extends m m')
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = parent_ra s),
+      match_states (Machblock.Returnstate s ms m)
+                   (Asmvliw.State rs m').
+
+Record codestate :=
+  Codestate {     pstate: state;        (**r projection to Asmblock.state *)
+                  pheader: list label;
+                  pbody1: list basic;   (**r list of basic instructions coming from the translation of the Machblock body *)
+                  pbody2: list basic;   (**r list of basic instructions coming from the translation of the Machblock exit *)
+                  pctl: option control; (**r exit instruction, coming from the translation of the Machblock exit *)
+                  ep: bool;             (**r reflects the [ep] variable used in the translation *)
+                  rem: list AB.bblock;  (**r remaining bblocks to execute *)
+                  cur: bblock           (**r current bblock to execute - to keep track of its size when incrementing PC *)
+            }.
+
+(* The part that deals with Machblock <-> Codestate agreement
+ * Note about DXP: the property of [ep] only matters if the current block doesn't have a header, hence the condition *)
+Inductive match_codestate fb: Machblock.state -> codestate -> Prop :=
+  | match_codestate_intro:
+      forall s sp ms m rs0 m0 f tc ep c bb tbb tbc tbi
+        (STACKS: match_stack ge s)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (MEXT: Mem.extends m m0)
+        (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc)
+        (TIC: transl_instr_control f (MB.exit bb) = OK tbi)
+        (TBLS: transl_blocks f c false = OK tc)
+        (AG: agree ms sp rs0)
+        (DXP: (if MB.header bb then ep else false) = true -> rs0#FP = parent_sp s)
+        ,
+      match_codestate fb (Machblock.State s fb sp (bb::c) ms m)
+        {|  pstate := (Asmvliw.State rs0 m0);
+            pheader := (MB.header bb);
+            pbody1 := tbc;
+            pbody2 := extract_basic tbi;
+            pctl := extract_ctl tbi;
+            ep := ep;
+            rem := tc;
+            cur := tbb
+        |}
+.
+
+(* The part ensuring that the code in Codestate actually resides at [rs PC] *)
+Inductive match_asmstate fb: codestate -> Asmvliw.state -> Prop :=
+  | match_asmstate_some:
+      forall rs f tf tc m tbb ofs ep tbdy tex lhd
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (TRANSF: transf_function f = OK tf)
+        (PCeq: rs PC = Vptr fb ofs)
+        (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc))
+        ,
+      match_asmstate fb 
+        {|  pstate := (Asmvliw.State rs m);
+            pheader := lhd;
+            pbody1 := tbdy;
+            pbody2 := extract_basic tex;
+            pctl := extract_ctl tex;
+            ep := ep;
+            rem := tc;
+            cur := tbb |}
+        (Asmvliw.State rs m)
+.
+
+(* Useful for dealing with the many cases in some proofs *)
+Ltac exploreInst :=
+  repeat match goal with
+  | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
+  | [ H : OK _ = OK _ |- _ ] => monadInv H
+  | [ |- context[if ?b then _ else _] ] => destruct b
+  | [ |- context[match ?m with | _ => _ end] ] => destruct m
+  | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
+  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H : Error _ = OK _ |- _ ] => inversion H
+  end.
+
+(** Some translation properties *)
+
+Lemma transl_blocks_nonil:
+  forall f bb c tc ep,
+  transl_blocks f (bb::c) ep = OK tc ->
+  exists tbb tc', tc = tbb :: tc'.
+Proof.
+  intros until ep0. intros TLBS. monadInv TLBS. monadInv EQ. unfold gen_bblocks.
+  destruct (extract_ctl x2).
+  - destruct c0; destruct i; simpl; eauto. destruct x1; simpl; eauto.
+  - destruct x1; simpl; eauto.
+Qed.
+
+Lemma no_builtin_preserved:
+  forall f ex x2,
+  (forall ef args res, ex <> Some (MBbuiltin ef args res)) ->
+  transl_instr_control f ex = OK x2 ->
+  (exists i, extract_ctl x2 = Some (PCtlFlow i))
+    \/ extract_ctl x2 = None.
+Proof.
+  intros until x2. intros Hbuiltin TIC.
+  destruct ex.
+  - destruct c.
+    (* MBcall *)
+    + simpl in TIC. exploreInst; simpl; eauto.
+    (* MBtailcall *)
+    + simpl in TIC. exploreInst; simpl; eauto.
+    (* MBbuiltin *)
+    + assert (H: Some (MBbuiltin e l b) <>  Some (MBbuiltin e l b)).
+        apply Hbuiltin. contradict H; auto.
+    (* MBgoto *)
+    + simpl in TIC. exploreInst; simpl; eauto.
+    (* MBcond *)
+    + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; simpl; eauto.
+      * unfold transl_opt_compuimm. exploreInst; simpl; eauto.
+      * unfold transl_opt_compluimm. exploreInst; simpl; eauto.
+      * unfold transl_comp_float64. exploreInst; simpl; eauto.
+      * unfold transl_comp_notfloat64. exploreInst; simpl; eauto.
+      * unfold transl_comp_float32. exploreInst; simpl; eauto.
+      * unfold transl_comp_notfloat32. exploreInst; simpl; eauto.
+    (* MBjumptable *)
+    + simpl in TIC. exploreInst; simpl; eauto.
+    (* MBreturn *)
+    + simpl in TIC. monadInv TIC. simpl. eauto.
+  - monadInv TIC. simpl; auto.
+Qed.
+
+Lemma transl_blocks_distrib:
+  forall c f bb tbb tc ep,
+  transl_blocks f (bb::c) ep = OK (tbb::tc)
+  -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res))
+  -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil)
+  /\ transl_blocks f c false = OK tc.
+Proof.
+  intros until ep0. intros TLBS Hbuiltin.
+  destruct bb as [hd bdy ex].
+  monadInv TLBS. monadInv EQ.
+  exploit no_builtin_preserved; eauto. intros Hectl. destruct Hectl.
+  - destruct H as [i Hectl].
+  unfold gen_bblocks in H0. rewrite Hectl in H0. inv H0.
+  simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
+  unfold gen_bblocks. rewrite Hectl. auto.
+  - unfold gen_bblocks in H0. rewrite H in H0.
+    destruct x1 as [|bi x1].
+    + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
+      unfold gen_bblocks. rewrite H. auto.
+    + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
+      unfold gen_bblocks. rewrite H. auto.
+Qed.
+
+Lemma gen_bblocks_nobuiltin:
+  forall thd tbdy tex tbb,
+  (tbdy <> nil \/ extract_ctl tex <> None) ->
+  (forall ef args res, extract_ctl tex <> Some (PExpand (Pbuiltin ef args res))) ->
+  gen_bblocks thd tbdy tex = tbb :: nil ->
+     header tbb = thd
+  /\ body tbb = tbdy ++ extract_basic tex
+  /\ exit tbb = extract_ctl tex.
+Proof.
+  intros until tbb. intros Hnonil Hnobuiltin GENB. unfold gen_bblocks in GENB.
+  destruct (extract_ctl tex) eqn:ECTL.
+  - destruct c.
+    + destruct i; try (inv GENB; simpl; auto; fail).
+      assert False. eapply Hnobuiltin. eauto. destruct H.
+    + inv GENB. simpl. auto.
+  - inversion Hnonil.
+    + destruct tbdy as [|bi tbdy]; try (contradict H; simpl; auto; fail). inv GENB. auto.
+    + contradict H; simpl; auto.
+Qed.
+
+Lemma transl_instr_basic_nonil:
+  forall k f bi ep x,
+  transl_instr_basic f bi ep k = OK x ->
+  x <> nil.
+Proof.
+  intros until x. intros TIB.
+  destruct bi.
+  - simpl in TIB. unfold loadind in TIB. exploreInst; try discriminate.
+  - simpl in TIB. unfold storeind in TIB. exploreInst; try discriminate.
+  - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate.
+  - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate.
+    unfold transl_cond_op in EQ0. exploreInst; try discriminate.
+    unfold transl_cond_float64. exploreInst; try discriminate.
+    unfold transl_cond_notfloat64. exploreInst; try discriminate.
+    unfold transl_cond_float32. exploreInst; try discriminate.
+    unfold transl_cond_notfloat32. exploreInst; try discriminate.
+  - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate.
+    all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate.
+  - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate.
+    all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate. 
+Qed.
+
+Lemma transl_basic_code_nonil:
+  forall bdy f x ep,
+  bdy <> nil ->
+  transl_basic_code f bdy ep = OK x ->
+  x <> nil.
+Proof.
+  induction bdy as [|bi bdy].
+    intros. contradict H0; auto.
+  destruct bdy as [|bi2 bdy].
+  - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto.
+  - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'.
+    monadInv TBC. 
+    assert (x0 <> nil).
+      eapply IHbdy; eauto. subst bdy'. discriminate.
+    eapply transl_instr_basic_nonil; eauto.
+Qed.
+
+Lemma transl_instr_control_nonil:
+  forall ex f x,
+  ex <> None ->
+  transl_instr_control f ex = OK x ->
+  extract_ctl x <> None.
+Proof.
+  intros ex f x Hnonil TIC.
+  destruct ex as [ex|].
+  - clear Hnonil. destruct ex.
+    all: try (simpl in TIC; exploreInst; discriminate).
+    + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; try discriminate.
+      * unfold transl_opt_compuimm. exploreInst; try discriminate.
+      * unfold transl_opt_compluimm. exploreInst; try discriminate.
+      * unfold transl_comp_float64. exploreInst; try discriminate.
+      * unfold transl_comp_notfloat64. exploreInst; try discriminate.
+      * unfold transl_comp_float32. exploreInst; try discriminate.
+      * unfold transl_comp_notfloat32. exploreInst; try discriminate.
+  - contradict Hnonil; auto.
+Qed.
+
+Lemma transl_instr_control_nobuiltin:
+  forall f ex x,
+  (forall ef args res, ex <> Some (MBbuiltin ef args res)) ->
+  transl_instr_control f ex = OK x ->
+  (forall ef args res, extract_ctl x <> Some (PExpand (Pbuiltin ef args res))).
+Proof.
+  intros until x. intros Hnobuiltin TIC. intros until res.
+  unfold transl_instr_control in TIC. exploreInst.
+  all: try discriminate.
+  - assert False. eapply Hnobuiltin; eauto. destruct H.
+  - unfold transl_cbranch in TIC. exploreInst.
+    all: try discriminate.
+    * unfold transl_opt_compuimm. exploreInst. all: try discriminate.
+    * unfold transl_opt_compluimm. exploreInst. all: try discriminate.
+    * unfold transl_comp_float64. exploreInst; try discriminate.
+    * unfold transl_comp_notfloat64. exploreInst; try discriminate.
+    * unfold transl_comp_float32. exploreInst; try discriminate.
+    * unfold transl_comp_notfloat32. exploreInst; try discriminate.
+Qed.
+
+(* Proving that one can decompose a [match_state] relation into a [match_codestate]
+   and a [match_asmstate], along with some helpful properties tying both relations together *)
+
+Theorem match_state_codestate:
+  forall mbs abs s fb sp bb c ms m,
+  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+  (MB.body bb <> nil \/ MB.exit bb <> None) ->
+  mbs = (Machblock.State s fb sp (bb::c) ms m) ->
+  match_states mbs abs ->
+  exists cs fb f tbb tc ep,
+    match_codestate fb mbs cs /\ match_asmstate fb cs abs
+    /\ Genv.find_funct_ptr ge fb = Some (Internal f)
+    /\ transl_blocks f (bb::c) ep = OK (tbb::tc)
+    /\ body tbb = pbody1 cs ++ pbody2 cs
+    /\ exit tbb = pctl cs
+    /\ cur cs = tbb /\ rem cs = tc
+    /\ pstate cs = abs.
+Proof.
+  intros until m. intros Hnobuiltin Hnotempty Hmbs MS. subst. inv MS.
+  inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst.
+  exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2.
+  monadInv TLB. exploit gen_bblocks_nobuiltin; eauto.
+    { inversion Hnotempty.
+      - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail).
+        left. eapply transl_basic_code_nonil; eauto.
+      - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail).
+        right. eapply transl_instr_control_nonil; eauto. }
+    eapply transl_instr_control_nobuiltin; eauto.
+  intros (Hth & Htbdy & Htexit).
+  exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x; pbody2 := extract_basic x0;
+            pctl := extract_ctl x0; ep := ep0; rem := tc'; cur := tbb |}, fb, f, tbb, tc', ep0.
+  repeat split. 1-2: econstructor; eauto.
+  { destruct (MB.header bb). eauto. discriminate. } eauto.
+  unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl.
+  rewrite TLBS. simpl. rewrite H2.
+  all: simpl; auto.
+Qed.
+
+Definition mb_remove_body (bb: MB.bblock) := 
+  {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}.
+
+Lemma exec_straight_pnil:
+  forall c rs1 m1 rs2 m2,
+  exec_straight tge c rs1 m1 (Pnop ::g nil) rs2 m2 ->
+  exec_straight tge c rs1 m1 nil rs2 m2.
+Proof.
+  intros. eapply exec_straight_trans. eapply H. econstructor; eauto.
+Qed.
+
+Lemma transl_block_nobuiltin:
+  forall f bb ep tbb,
+  (MB.body bb <> nil \/ MB.exit bb <> None) ->
+  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+  transl_block f bb ep = OK (tbb :: nil) ->
+  exists c c',
+     transl_basic_code f (MB.body bb) ep = OK c
+  /\ transl_instr_control f (MB.exit bb) = OK c'
+  /\ body tbb = c ++ extract_basic c'
+  /\ exit tbb = extract_ctl c'.
+Proof.
+  intros until tbb. intros Hnonil Hnobuiltin TLB. monadInv TLB. destruct Hnonil.
+  - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto.
+    left. eapply transl_basic_code_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto.
+  - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto.
+    right. eapply transl_instr_control_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto.
+Qed.
+
+Lemma nextblock_preserves: 
+  forall rs rs' bb r,
+  rs' = nextblock bb rs ->
+  data_preg r = true ->
+  rs r = rs' r.
+Proof.
+  intros. destruct r; try discriminate.
+  subst. Simpl.
+Qed.
+
+Remark cons3_app {A: Type}:
+  forall a b c (l: list A),
+  a :: b :: c :: l = (a :: b :: c :: nil) ++ l.
+Proof.
+  intros. simpl. auto.
+Qed.
+
+Lemma exec_straight_opt_body2:
+  forall c rs1 m1 c' rs2 m2,
+  exec_straight_opt tge c rs1 m1 c' rs2 m2 ->
+  exists body,
+     exec_body tge body rs1 m1 = Next rs2 m2
+  /\ (basics_to_code body) ++g c' = c.
+Proof.
+  intros until m2. intros EXES.
+  inv EXES.
+  - exists nil. split; auto.
+  - eapply exec_straight_body2. auto.
+Qed.
+
+Lemma extract_basics_to_code:
+  forall lb c,
+  extract_basic (basics_to_code lb ++ c) = lb ++ extract_basic c.
+Proof.
+  induction lb; intros; simpl; congruence.
+Qed.
+
+Lemma extract_ctl_basics_to_code:
+  forall lb c,
+  extract_ctl (basics_to_code lb ++ c) = extract_ctl c.
+Proof.
+  induction lb; intros; simpl; congruence.
+Qed.
+
+(* See (C) in the diagram. The proofs are mostly adapted from the previous Mach->Asm proofs, but are
+   unfortunately quite cumbersome. To reproduce them, it's best to have a Coq IDE with you and see by
+   yourself the steps *)
+Theorem step_simu_control:
+  forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2,
+  MB.body bb' = nil ->
+  (forall ef args res, MB.exit bb' <> Some (MBbuiltin ef args res)) ->
+  Genv.find_funct_ptr tge fb = Some (Internal fn) ->
+  pstate cs2 = (Asmvliw.State rs2 m2) ->
+  pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex ->
+  cur cs2 = tbb ->
+  match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 ->
+  match_asmstate fb cs2 (Asmvliw.State rs1 m1) ->
+  exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' ->
+  (exists rs3 m3 rs4 m4,
+      exec_body tge tbdy2 rs2 m2 = Next rs3 m3
+  /\  exec_control_rel tge fn tex tbb rs3 m3 rs4 m4
+  /\  match_states S'' (State rs4 m4)).
+Proof.
+  intros until cs2. intros Hbody Hbuiltin FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP.
+  inv ESTEP.
+  - inv MCS. inv MAS. simpl in *.
+    inv Hpstate.
+    destruct ctl.
+    + (* MBcall *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      destruct s1 as [rf|fid]; simpl in H7.
+      * (* Indirect call *)
+        monadInv H1.
+        assert (ms' rf = Vptr f' Ptrofs.zero).
+        { unfold find_function_ptr in H14. destruct (ms' rf); try discriminate.
+          revert H14; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+        assert (rs2 x = Vptr f' Ptrofs.zero).
+        { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+        remember (Ptrofs.add _ _) as ofs'.
+        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+        { econstructor; eauto. }
+        assert (f1 = f) by congruence. subst f1.
+        exploit return_address_offset_correct; eauto. intros; subst ra.
+
+        repeat eexists.
+          rewrite H6. econstructor; eauto.
+          rewrite H7. econstructor; eauto.
+        econstructor; eauto.
+          econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl.
+        simpl. Simpl. rewrite PCeq. rewrite Heqofs'. simpl. auto.
+
+      * (* Direct call *)
+        monadInv H1.
+        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+        remember (Ptrofs.add _ _) as ofs'.
+        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+          econstructor; eauto.
+        assert (f1 = f) by congruence. subst f1.
+        exploit return_address_offset_correct; eauto. intros; subst ra.
+        repeat eexists.
+          rewrite H6. econstructor; eauto.
+          rewrite H7. econstructor; eauto.
+        econstructor; eauto.
+          econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl.
+        Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H14. rewrite H14. auto.
+        Simpl. simpl. subst. Simpl. simpl. unfold Val.offset_ptr. rewrite PCeq. auto.
+    + (* MBtailcall *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence.  subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      exploit Mem.loadv_extends. eauto. eexact H15. auto. simpl. intros [parent' [A B]].
+      destruct s1 as [rf|fid]; simpl in H13. 
+      * monadInv H1.
+        assert (ms' rf = Vptr f' Ptrofs.zero).
+          { destruct (ms' rf); try discriminate. revert H13. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+        assert (rs2 x = Vptr f' Ptrofs.zero).
+          { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+
+        assert (f = f1) by congruence. subst f1. clear FIND1. clear H14.
+        exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+        exploit exec_straight_body; eauto.
+          { simpl. eauto. }
+        intros EXEB.
+        repeat eexists.
+          rewrite H6. simpl extract_basic. eauto.
+          rewrite H7. simpl extract_ctl. simpl. reflexivity.
+        econstructor; eauto.
+          { apply agree_set_other.
+            - econstructor; auto with asmgen.
+              + apply V.
+              + intro r. destruct r; apply V; auto.
+            - eauto with asmgen. }
+        assert (IR x <> IR GPR12 /\ IR x <> IR GPR32 /\ IR x <> IR GPR16).
+          { clear - EQ. destruct x; repeat split; try discriminate.
+            all: unfold ireg_of in EQ; destruct rf; try discriminate. }
+        Simpl. inv H1. inv H3. rewrite Z; auto; try discriminate.
+      * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H14.
+        exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+        exploit exec_straight_body; eauto.
+          simpl. eauto.
+        intros EXEB.
+        repeat eexists.
+          rewrite H6. simpl extract_basic. eauto.
+          rewrite H7. simpl extract_ctl. simpl. reflexivity.
+        econstructor; eauto.
+        { apply agree_set_other.
+          - econstructor; auto with asmgen.
+            + apply V.
+            + intro r. destruct r; apply V; auto.
+          - eauto with asmgen. }
+        { Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H13. auto. }
+    + (* MBbuiltin (contradiction) *)
+      assert (MB.exit bb' <> Some (MBbuiltin e l b)) by (apply Hbuiltin).
+      rewrite <- H in H1. contradict H1; auto.
+    + (* MBgoto *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H11.
+      remember (nextblock tbb rs2) as rs2'.
+      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'.
+      assert (tf = fn) by congruence. subst tf.
+      exploit find_label_goto_label.
+        eauto. eauto.
+        instantiate (2 := rs2').
+        { subst. unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. eauto. }
+        eauto.
+      intros (tc' & rs' & GOTO & AT2 & INV).
+
+      eexists. eexists. repeat eexists. repeat split.
+        rewrite H6. simpl extract_basic. simpl. eauto.
+        rewrite H7. simpl extract_ctl. simpl. rewrite <- Heqrs2'. eauto.
+      econstructor; eauto.
+        rewrite Heqrs2' in INV. unfold nextblock, incrPC in INV.
+        eapply agree_exten; eauto with asmgen.
+        assert (forall r : preg, r <> PC -> rs' r = rs2 r).
+        { intros. destruct r.
+          - destruct g. all: rewrite INV; Simpl; auto.
+          - rewrite INV; Simpl; auto.
+          - contradiction. }
+        eauto with asmgen.
+        congruence.
+    + (* MBcond *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      * (* MBcond true *)
+        assert (f0 = f) by congruence. subst f0.
+        exploit eval_condition_lessdef.
+          eapply preg_vals; eauto.
+          all: eauto.
+        intros EC.
+        exploit transl_cbranch_correct_true; eauto. intros (rs' & jmp & A & B & C).
+        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+        assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. }
+        rewrite PCeq' in PCeq.
+        assert (f1 = f) by congruence. subst f1.
+        exploit find_label_goto_label.
+          4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs')). rewrite nextblock_pc.
+          unfold Val.offset_ptr. rewrite PCeq. eauto.
+        intros (tc' & rs3 & GOTOL & TLPC & Hrs3).
+        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+        assert (tf = fn) by congruence. subst tf.
+
+        repeat eexists.
+          rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto.
+          rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto.
+
+        econstructor; eauto.
+          eapply agree_exten with rs2; eauto with asmgen.
+          { intros. destruct r; try destruct g; try discriminate.
+            all: rewrite Hrs3; try discriminate; unfold nextblock, incrPC; Simpl. }
+        intros. discriminate.
+
+      * (* MBcond false *)
+        assert (f0 = f) by congruence. subst f0.
+        exploit eval_condition_lessdef.
+          eapply preg_vals; eauto.
+          all: eauto.
+        intros EC.
+
+        exploit transl_cbranch_correct_false; eauto. intros (rs' & jmp & A & B & C).
+        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+        assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. }
+        rewrite PCeq' in PCeq.
+        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+        assert (tf = fn) by congruence. subst tf.
+
+        assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned).
+          eapply transf_function_no_overflow; eauto.
+        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+
+        repeat eexists.
+          rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto.
+          rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto.
+
+        econstructor; eauto.
+          unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto.
+          eapply agree_exten with rs2; eauto with asmgen.
+          { intros. destruct r; try destruct g; try discriminate.
+            all: rewrite <- C; try discriminate; unfold nextblock, incrPC; Simpl. }
+        intros. discriminate.
+    + (* MBjumptable *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0.
+      monadInv H1.
+      generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV.
+      assert (f1 = f) by congruence. subst f1.
+      exploit find_label_goto_label. 4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs2) # GPR62 <- Vundef # GPR63 <- Vundef).
+        unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity.
+      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn.
+
+      intros [tc' [rs' [A [B C]]]].
+      exploit ireg_val; eauto. rewrite H13. intros LD; inv LD.
+      
+      repeat eexists.
+        rewrite H6. simpl extract_basic. simpl. eauto.
+        rewrite H7. simpl extract_ctl. simpl. Simpl. rewrite <- H1. unfold Mach.label in H14. unfold label. rewrite H14. eapply A.
+      econstructor; eauto.
+        eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen.
+        { assert (destroyed_by_jumptable = R62 :: R63 :: nil) by auto. rewrite H2 in H0. simpl in H0. inv H0.
+          destruct (preg_eq r' GPR63). subst. contradiction.
+          destruct (preg_eq r' GPR62). subst. contradiction.
+          destruct r'; Simpl. }
+        discriminate.
+    + (* MBreturn *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+      exploit exec_straight_body; eauto.
+        simpl. eauto.
+      intros EXEB.
+      assert (f1 = f) by congruence. subst f1.
+      
+      repeat eexists.
+        rewrite H6. simpl extract_basic. eauto.
+        rewrite H7. simpl extract_ctl. simpl. reflexivity.
+      econstructor; eauto.
+        unfold nextblock, incrPC. repeat apply agree_set_other; auto with asmgen.
+
+  - inv MCS. inv MAS. simpl in *. subst. inv Hpstate.
+    destruct bb' as [hd' bdy' ex']; simpl in *. subst.
+    monadInv TBC. monadInv TIC. simpl in *. rewrite H5. rewrite H6.
+    simpl. repeat eexists.
+    econstructor. 4: instantiate (3 := false). all:eauto.
+      unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      assert (f = f0) by congruence. subst f0. econstructor; eauto.
+      generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto.
+    eapply agree_exten; eauto. intros. Simpl.
+    discriminate.
+Qed.
+
+Definition mb_remove_first (bb: MB.bblock) := 
+  {| MB.header := MB.header bb; MB.body := tail (MB.body bb); MB.exit := MB.exit bb |}.
+
+Lemma exec_straight_body:
+  forall c c' lc rs1 m1 rs2 m2,
+  exec_straight tge c rs1 m1 c' rs2 m2 ->
+  code_to_basics c = Some lc ->
+  exists l ll,
+     c = l ++ c'
+  /\ code_to_basics l = Some ll
+  /\ exec_body tge ll rs1 m1 = Next rs2 m2.
+Proof.
+  induction c; try (intros; inv H; fail).
+  intros until m2. intros EXES CTB. inv EXES.
+  - exists (i1 ::g nil),(i1::nil). repeat (split; simpl; auto). rewrite H6. auto.
+  - inv CTB. destruct (code_to_basics c); try discriminate. inv H0.
+    eapply IHc in H7; eauto. destruct H7 as (l' & ll & Hc & CTB & EXECB). subst.
+    exists (i ::g l'),(i::ll). repeat (split; simpl; auto).
+      rewrite CTB. auto.
+      rewrite H1. auto.
+Qed.
+
+Lemma basics_to_code_app:
+  forall c l x ll,
+  basics_to_code c = l ++ basics_to_code x ->
+  code_to_basics l = Some ll ->
+  c = ll ++ x.
+Proof.
+  intros. apply (f_equal code_to_basics) in H.
+  erewrite code_to_basics_dist in H; eauto. 2: eapply code_to_basics_id.
+  rewrite code_to_basics_id in H. inv H. auto.
+Qed.
+
+Lemma basics_to_code_app2:
+  forall i c l x ll,
+  (PBasic i) :: basics_to_code c = l ++ basics_to_code x ->
+  code_to_basics l = Some ll ->
+  i :: c = ll ++ x.
+Proof.
+  intros until ll. intros.
+  exploit basics_to_code_app. instantiate (3 := (i::c)). simpl.
+  all: eauto.
+Qed.
+
+(* Handling the individual instructions of theorem (B) in the above diagram. A bit less cumbersome, but still tough *)
+Theorem step_simu_basic:
+  forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy,
+  MB.header bb = nil -> MB.body bb = bi::(bdy) -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+  bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} ->
+  basic_step ge s fb sp ms m bi ms' m' ->
+  pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy ->
+  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+  (exists rs2 m2 l cs2 tbdy',
+       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1;
+                pctl := pctl cs1; ep := fp_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |}
+    /\ tbdy = l ++ tbdy'
+    /\ exec_body tge l rs1 m1 = Next rs2 m2
+    /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2).
+Proof.
+  intros until bdy. intros Hheader Hbody Hnobuiltin (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS.
+  simpl in *. inv Hpstate.
+  rewrite Hbody in TBC. monadInv TBC.
+  inv BSTEP.
+
+  - (* MBgetstack *)
+    simpl in EQ0.
+    unfold Mach.load_stack in H.
+    exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+    rewrite (sp_val _ _ _ AG) in A.
+    exploit loadind_correct; eauto with asmgen.
+    intros (rs2 & EXECS & Hrs'1 & Hrs'2).
+    eapply exec_straight_body in EXECS.
+      2: eapply code_to_basics_id; eauto.
+    destruct EXECS as (l & Hlbi & BTC & CTB & EXECB).
+    exists rs2, m1, Hlbi.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+      eapply basics_to_code_app; eauto.
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+    subst. simpl in Hheadereq.
+
+    eapply match_codestate_intro; eauto.
+      { simpl. simpl in EQ. rewrite <- Hheadereq in EQ. assumption. }
+    eapply agree_set_mreg; eauto with asmgen.
+    intro Hep. simpl in Hep. 
+    destruct (andb_prop _ _ Hep). clear Hep.
+    rewrite <- Hheadereq in DXP. subst. rewrite <- DXP. rewrite Hrs'2. reflexivity.
+    discriminate. apply preg_of_not_FP; assumption. reflexivity.
+
+  - (* MBsetstack *)
+    simpl in EQ0.
+    unfold Mach.store_stack in H.
+    assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. }
+    exploit Mem.storev_extends; eauto. intros [m2' [A B]].
+    exploit storeind_correct; eauto with asmgen.
+    rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]].
+
+    eapply exec_straight_body in P.
+      2: eapply code_to_basics_id; eauto.
+    destruct P as (l & ll & TBC & CTB & EXECB).
+    exists rs', m2', ll.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+      eapply basics_to_code_app; eauto.
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    subst.
+    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+
+    eapply agree_undef_regs; eauto with asmgen.
+    simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto.
+  - (* MBgetparam *)
+    simpl in EQ0.
+
+    assert (f0 = f) by congruence; subst f0.
+    unfold Mach.load_stack in *.
+    exploit Mem.loadv_extends. eauto. eexact H0. auto.
+    intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
+    exploit Mem.loadv_extends. eauto. eexact H1. auto.
+    intros [v' [C D]].
+
+    monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP.
+    destruct ep0 eqn:EPeq.
+
+  (* RTMP contains parent *)
+    + exploit loadind_correct. eexact EQ1.
+      instantiate (2 := rs1). rewrite DXP; eauto.
+      intros [rs2 [P [Q R]]].
+
+      eapply exec_straight_body in P.
+        2: eapply code_to_basics_id; eauto.
+      destruct P as (l & ll & BTC & CTB & EXECB).
+      exists rs2, m1, ll. eexists.
+      eexists. split. instantiate (1 := x). eauto.
+      repeat (split; auto).
+      { eapply basics_to_code_app; eauto. }
+      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+      subst.
+      eapply match_codestate_intro; eauto.
+
+      eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+      simpl; intros. rewrite R; auto with asmgen.
+      apply preg_of_not_FP; auto.
+
+  (* RTMP does not contain parent *)
+    + rewrite chunk_of_Tptr in A. 
+      exploit loadind_ptr_correct. eexact A. intros [rs2 [P [Q R]]].
+      exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto.
+      intros [rs3 [S [T U]]].
+
+      exploit exec_straight_trans.
+        eapply P.
+        eapply S.
+      intros EXES.
+
+      eapply exec_straight_body in EXES.
+        2: simpl. 2: erewrite code_to_basics_id; eauto.
+      destruct EXES as (l & ll & BTC & CTB & EXECB).
+      exists rs3, m1, ll.
+      eexists. eexists. split. instantiate (1 := x). eauto.
+      repeat (split; auto).
+        eapply basics_to_code_app2; eauto.
+      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+      subst.
+      eapply match_codestate_intro; eauto.
+      eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+      instantiate (1 := rs2#FP <- (rs3#FP)). intros.
+      rewrite Pregmap.gso; auto with asmgen.
+      congruence.
+      intros. unfold Pregmap.set. destruct (PregEq.eq r' FP). congruence. auto with asmgen.
+      simpl; intros. rewrite U; auto with asmgen.
+      apply preg_of_not_FP; auto.
+  - (* MBop *)
+    simpl in EQ0. rewrite Hheader in DXP.
+    
+    assert (eval_operation tge sp op (map ms args) m' = Some v).
+      rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
+    exploit eval_operation_lessdef.
+      eapply preg_vals; eauto.
+      2: eexact H0.
+      all: eauto.
+    intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
+
+    eapply exec_straight_body in P.
+      2: eapply code_to_basics_id; eauto.
+    destruct P as (l & ll & TBC & CTB & EXECB).
+    exists rs2, m1, ll.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+      eapply basics_to_code_app; eauto.
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    subst.
+    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+    apply agree_set_undef_mreg with rs1; auto. 
+    apply Val.lessdef_trans with v'; auto.
+    simpl; intros. destruct (andb_prop _ _ H1); clear H1.
+    rewrite R; auto. apply preg_of_not_FP; auto.
+Local Transparent destroyed_by_op.
+    destruct op; simpl; auto; congruence.
+  - (* MBload *)
+    simpl in EQ0. rewrite Hheader in DXP.
+
+    assert (eval_addressing tge sp addr (map ms args) = Some a).
+      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    exploit Mem.loadv_extends; eauto. intros [v' [C D]].
+    exploit transl_load_correct; eauto.
+    intros [rs2 [P [Q R]]].
+
+    eapply exec_straight_body in P.
+      2: eapply code_to_basics_id; eauto.
+    destruct P as (l & ll & TBC & CTB & EXECB).
+    exists rs2, m1, ll.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+      eapply basics_to_code_app; eauto.
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+    subst.
+    eapply match_codestate_intro; eauto. simpl. simpl in EQ.
+    rewrite <- Hheadereq in EQ. assumption.
+    eapply agree_set_mreg; eauto with asmgen.
+    intro Hep. simpl in Hep. 
+    destruct (andb_prop _ _ Hep). clear Hep.
+    subst. rewrite <- DXP. rewrite R; try discriminate. reflexivity.
+    apply preg_of_not_FP; assumption. reflexivity.
+
+  - (* notrap1 cannot happen *)
+    simpl in EQ0. unfold transl_load in EQ0.
+    destruct addr; simpl in H.
+    all: unfold transl_load_rrrXS, transl_load_rrr, transl_load_rro in EQ0;
+    monadInv EQ0; unfold transl_memory_access2XS, transl_memory_access2, transl_memory_access in EQ2;
+    destruct args as [|h0 t0]; try discriminate;
+    destruct t0 as [|h1 t1]; try discriminate;
+    destruct t1 as [|h2 t2]; try discriminate.
+    
+  - (* MBload notrap2 TODO *)
+    simpl in EQ0. rewrite Hheader in DXP.
+
+    assert (eval_addressing tge sp addr (map ms args) = Some a).
+      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+
+    destruct (Mem.loadv chunk m1 a') as [v' | ] eqn:Hload.
+    {
+    exploit transl_load_correct; eauto.
+    intros [rs2 [P [Q R]]].
+
+    eapply exec_straight_body in P.
+      2: eapply code_to_basics_id; eauto.
+    destruct P as (l & ll & TBC & CTB & EXECB).
+    exists rs2, m1, ll.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+    eapply basics_to_code_app; eauto.
+    eapply match_codestate_intro; eauto. simpl. rewrite Hheader in *.
+    simpl in EQ. assumption.
+
+    eapply agree_set_undef_mreg; eauto. intros; auto with asmgen.
+
+    simpl. intro.
+    rewrite R; try congruence.
+    apply DXP.
+    destruct ep0; simpl in *; congruence.
+    apply preg_of_not_FP.
+    destruct ep0; simpl in *; congruence.
+    }
+    { 
+    exploit transl_load_correct_notrap2; eauto.
+    intros [rs2 [P [Q R]]].
+
+    eapply exec_straight_body in P.
+      2: eapply code_to_basics_id; eauto.
+    destruct P as (l & ll & TBC & CTB & EXECB).
+    exists rs2, m1, ll.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+      eapply basics_to_code_app; eauto.
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+(*     assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+    rewrite <- Hheadereq. *) subst.
+    eapply match_codestate_intro; eauto. simpl. rewrite Hheader in *. simpl in EQ. assumption.
+
+    eapply agree_set_undef_mreg; eauto. intros; auto with asmgen.
+    simpl. intro.
+    rewrite R; try congruence.
+    apply DXP.
+    destruct ep0; simpl in *; congruence.
+    apply preg_of_not_FP.
+    destruct ep0; simpl in *; congruence.
+    }
+  - (* MBstore *)
+    simpl in EQ0. rewrite Hheader in DXP.
+
+    assert (eval_addressing tge sp addr (map ms args) = Some a).
+      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto.
+    exploit Mem.storev_extends; eauto. intros [m2' [C D]].
+    exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+
+    eapply exec_straight_body in P.
+      2: eapply code_to_basics_id; eauto.
+    destruct P as (l & ll & TBC & CTB & EXECB).
+    exists rs2, m2', ll.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+      eapply basics_to_code_app; eauto.
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+    subst.
+    eapply match_codestate_intro; eauto. simpl. simpl in EQ.
+    rewrite <- Hheadereq in EQ. assumption.
+    eapply agree_undef_regs; eauto with asmgen.
+    intro Hep. simpl in Hep.
+    subst. rewrite <- DXP. rewrite Q; try discriminate. reflexivity. reflexivity.
+Qed.
+
+Lemma exec_body_trans:
+  forall l l' rs0 m0 rs1 m1 rs2 m2,
+  exec_body tge l rs0 m0 = Next rs1 m1 ->
+  exec_body tge l' rs1 m1 = Next rs2 m2 ->
+  exec_body tge (l++l') rs0 m0 = Next rs2 m2.
+Proof.
+  induction l.
+  - simpl. congruence.
+  - intros until m2. intros EXEB1 EXEB2.
+    inv EXEB1. destruct (exec_basic_instr _) eqn:EBI; try discriminate.
+    simpl. rewrite EBI. eapply IHl; eauto.
+Qed.
+
+Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}.
+
+Program Definition remove_header tbb := {| header := nil; body := body tbb; exit := exit tbb |}.
+Next Obligation.
+  destruct tbb. simpl. auto.
+Qed.
+
+Inductive exec_header: codestate -> codestate -> Prop :=
+  | exec_header_cons: forall cs1,
+      exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1;
+                          pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1;
+                          cur := cur cs1 |}.
+
+(* Theorem (A) in the diagram, the easiest of all *)
+Theorem step_simu_header:
+  forall bb s fb sp c ms m rs1 m1 cs1,
+  pstate cs1 = (State rs1 m1) ->
+  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+  (exists cs1',
+       exec_header cs1 cs1'
+    /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1').
+Proof.
+  intros until cs1. intros Hpstate MCS.
+  eexists. split; eauto.
+  econstructor; eauto.
+  inv MCS. simpl in *. inv Hpstate.
+  econstructor; eauto.
+Qed.
+
+Lemma step_matchasm_header:
+  forall fb cs1 cs1' s1,
+  match_asmstate fb cs1 s1 ->
+  exec_header cs1 cs1' ->
+  match_asmstate fb cs1' s1.
+Proof.
+  intros until s1. intros MAS EXH.
+  inv MAS. inv EXH.
+  simpl. econstructor; eauto.
+Qed.
+
+(* Theorem (B) in the diagram, using step_simu_basic + induction on the Machblock body *)
+Theorem step_simu_body:
+  forall bb s fb sp c ms m rs1 m1 ms' cs1 m',
+  MB.header bb = nil ->
+  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+  body_step ge s fb sp (MB.body bb) ms m ms' m' ->
+  pstate cs1 = (State rs1 m1) ->
+  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+  (exists rs2 m2 cs2 ep,
+       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1;
+                pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |}
+    /\ exec_body tge (pbody1 cs1) rs1 m1 = Next rs2 m2
+    /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2).
+Proof.
+  intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy].
+  - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS.
+    inv BSTEP.
+    exists rs1, m1, cs1, (ep cs1).
+    inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto).
+    econstructor; eauto.
+  - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS. inv BSTEP.
+    rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'.
+    exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto.
+    intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS').
+    simpl in *.
+    exploit IHbdy. auto. 2: eapply H6. 3: eapply MCS'. all: eauto. subst; eauto. simpl; auto.
+    intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS'').
+    exists rs3, m3, cs3, ep.
+    repeat (split; simpl; auto). subst. simpl in *. auto.
+    rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto.
+Qed.
+
+Lemma exec_body_control:
+  forall b rs1 m1 rs2 m2 rs3 m3 fn,
+  exec_body tge (body b) rs1 m1 = Next rs2 m2 ->
+  exec_control_rel tge fn (exit b) b rs2 m2 rs3 m3 ->
+  exec_bblock_rel tge fn b rs1 m1 rs3 m3.
+Proof.
+  intros until fn. intros EXEB EXECTL.
+  econstructor; eauto. inv EXECTL.
+  unfold exec_bblock. rewrite EXEB. auto.
+Qed.
+
+Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat.
+
+Lemma mbsize_eqz:
+  forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None.
+Proof.
+  intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H.
+  remember (length _) as a. remember (length_opt _) as b.
+  assert (a = 0%nat) by omega. assert (b = 0%nat) by omega. subst. clear H.
+  inv H0. inv H1. destruct bdy; destruct ex; auto.
+  all: try discriminate.
+Qed.
+
+Lemma mbsize_neqz:
+  forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None).
+Proof.
+  intros. destruct bb as [hd bdy ex]; simpl in *.
+  destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate).
+  contradict H. unfold mbsize. simpl. auto.
+Qed.
+
+(* Bringing theorems (A), (B) and (C) together, for the case of the absence of builtin instruction *)
+(* This more general form is easier to prove, but the actual theorem is step_simulation_bblock further below *)
+Lemma step_simulation_bblock':
+  forall sf f sp bb bb' bb'' rs m rs' m' s'' c S1,
+  bb' = mb_remove_header bb ->
+  body_step ge sf f sp (Machblock.body bb') rs m rs' m' ->
+  bb'' = mb_remove_body bb' ->
+  (forall ef args res, MB.exit bb'' <> Some (MBbuiltin ef args res)) ->
+  exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') E0 s'' ->
+  match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
+  exists S2 : state, plus step tge S1 E0 S2 /\ match_states s'' S2.
+Proof.
+  intros until S1. intros Hbb' BSTEP Hbb'' Hbuiltin ESTEP MS.
+  destruct (mbsize bb) eqn:SIZE.
+  - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit).
+    destruct bb as [hd bdy ex]; simpl in *; subst.
+    inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc.
+    monadInv H2. simpl in *. inv ESTEP. inv BSTEP.
+    eexists. split. eapply plus_one.
+    exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'.
+    assert (x = tf) by congruence. subst x.
+    eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto.
+    unfold exec_bblock. simpl. eauto.
+    econstructor. eauto. eauto. eauto.
+    unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite <- H.
+    assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+      eapply transf_function_no_overflow; eauto.
+    econstructor; eauto.
+      generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto.
+    eapply agree_exten; eauto. intros. Simpl.
+    intros. discriminate.
+  - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. }
+    intros Hnotempty.
+
+    (* initial setting *)
+    exploit match_state_codestate.
+      2: eapply Hnotempty.
+      all: eauto.
+    intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate).
+
+    (* step_simu_header part *)
+    assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. }
+    destruct H as (rs1 & m1 & Hpstate2). subst.
+    assert (f = fb). { inv MCS. auto. } subst fb.
+    exploit step_simu_header.
+      2: eapply MCS.
+      all: eauto.
+    intros (cs1' & EXEH & MCS2).
+
+    (* step_simu_body part *)
+    assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. }
+    exploit step_simu_body.
+      3: eapply BSTEP.
+      4: eapply MCS2.
+      all: eauto. rewrite Hpstate'. eauto.
+    intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS').
+
+    (* step_simu_control part *)
+    assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)).
+    { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. }
+    destruct H as (tf & FIND').
+    assert (exists tex, pbody2 cs1 = extract_basic tex /\ pctl cs1 = extract_ctl tex).
+    { inv MAS. simpl in *. eauto. }
+    destruct H as (tex & Hpbody2 & Hpctl).
+    inv EXEH. simpl in *.
+    subst. exploit step_simu_control.
+      9: eapply MCS'. all: simpl.
+      10: eapply ESTEP.
+      all: simpl; eauto.
+      rewrite Hpbody2. rewrite Hpctl.
+      { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto.
+        erewrite exec_body_pc; eauto. }
+    intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS').
+
+    (* bringing the pieces together *)
+    exploit exec_body_trans.
+      eapply EXEB.
+      eauto.
+    intros EXEB2.
+    exploit exec_body_control; eauto.
+    rewrite <- Hpbody2 in EXEB2. rewrite <- Hbody in EXEB2. eauto.
+    rewrite Hexit. rewrite Hpctl. eauto.
+    intros EXECB. inv EXECB.
+    exists (State rs4 m4).
+    split; auto. eapply plus_one. rewrite Hpstate2.
+    assert (exists ofs, rs1 PC = Vptr f ofs).
+    { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. }
+    destruct H0 as (ofs & Hrs1pc).
+    eapply exec_step_internal; eauto.
+
+    (* proving the initial find_bblock *)
+    rewrite Hpstate2 in MAS. inv MAS. simpl in *. 
+    assert (f1 = f0) by congruence. subst f0.
+    rewrite PCeq in Hrs1pc. inv Hrs1pc.
+    exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''.
+    inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ.
+    eapply find_bblock_tail; eauto.
+Qed.
+
+Theorem step_simulation_bblock:
+  forall sf f sp bb ms m ms' m' S2 c,
+  body_step ge sf f sp (Machblock.body bb) ms m ms' m' ->
+  (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+  exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') E0 S2 ->
+  forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
+  exists S2' : state, plus step tge S1' E0 S2' /\ match_states S2 S2'.
+Proof.
+  intros until c. intros BSTEP Hbuiltin ESTEP S1' MS.
+  eapply step_simulation_bblock'; eauto.
+  all: destruct bb as [hd bdy ex]; simpl in *; eauto.
+  inv ESTEP.
+  - econstructor. inv H; try (econstructor; eauto; fail).
+  - econstructor.
+Qed.
+
+(** Dealing now with the builtin case *)
+
+Definition split (c: MB.code) :=
+  match c with
+  | nil => nil
+  | bb::c => {| MB.header := MB.header bb; MB.body := MB.body bb; MB.exit := None |}
+              :: {| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |} :: c
+  end.
+
+Lemma cons_ok_eq3 {A: Type} :
+  forall (x:A) y z x' y' z',
+  x = x' -> y = y' -> z = z' ->
+  OK (x::y::z) = OK (x'::y'::z').
+Proof.
+  intros. subst. auto.
+Qed.
+
+Lemma transl_blocks_split_builtin:
+  forall bb c ep f ef args res,
+  MB.exit bb = Some (MBbuiltin ef args res) -> MB.body bb <> nil ->
+  transl_blocks f (split (bb::c)) ep = transl_blocks f (bb::c) ep.
+Proof.
+  intros until res. intros Hexit Hbody. simpl split.
+  unfold transl_blocks. fold transl_blocks. unfold transl_block.
+  simpl. remember (transl_basic_code _ _ _) as tbc. remember (transl_instr_control _ _) as tbi.
+  remember (transl_blocks _ _ _) as tlbs.
+  destruct tbc; destruct tbi; destruct tlbs.
+  all: try simpl; auto.
+  - simpl. rewrite Hexit in Heqtbi. simpl in Heqtbi. monadInv Heqtbi. simpl.
+    unfold gen_bblocks. simpl. destruct l.
+    + exploit transl_basic_code_nonil; eauto. intro. destruct H.
+    + simpl. rewrite app_nil_r. apply cons_ok_eq3. all: try eapply bblock_equality. all: simpl; auto.
+Qed.
+
+Lemma transl_code_at_pc_split_builtin:
+  forall rs f f0 bb c ep tf tc ef args res,
+  MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
+  transl_code_at_pc ge (rs PC) f f0 (bb :: c) ep tf tc ->
+  transl_code_at_pc ge (rs PC) f f0 (split (bb :: c)) ep tf tc.
+Proof.
+  intros until res. intros Hbody Hexit AT. inv AT.
+  econstructor; eauto. erewrite transl_blocks_split_builtin; eauto.
+Qed.
+
+Theorem match_states_split_builtin:
+  forall sf f sp bb c rs m ef args res S1,
+  MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
+  match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
+  match_states (Machblock.State sf f sp (split (bb::c)) rs m) S1.
+Proof.
+  intros until S1. intros Hbody Hexit MS.
+  inv MS.
+  econstructor; eauto.
+  eapply transl_code_at_pc_split_builtin; eauto.
+Qed.
+
+Theorem step_simulation_builtin:
+  forall ef args res bb sf f sp c ms m t S2,
+  MB.body bb = nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
+  exit_step return_address_offset ge (MB.exit bb) (Machblock.State sf f sp (bb :: c) ms m) t S2 ->
+  forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
+  exists S2' : state, plus step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  intros until S2. intros Hbody Hexit ESTEP S1' MS.
+  inv MS. inv AT. monadInv H2. monadInv EQ.
+  rewrite Hbody in EQ0. monadInv EQ0.
+  rewrite Hexit in EQ. monadInv EQ.
+  rewrite Hexit in ESTEP. inv ESTEP. inv H4.
+
+  exploit functions_transl; eauto. intro FN.
+  generalize (transf_function_no_overflow _ _ H1); intro NOOV.
+  exploit builtin_args_match; eauto. intros [vargs' [P Q]].
+  exploit external_call_mem_extends; eauto.
+  intros [vres' [m2' [A [B [C D]]]]].
+  econstructor; split. apply plus_one.
+  simpl in H3.
+  eapply exec_step_builtin. eauto. eauto.
+    eapply find_bblock_tail; eauto.
+    simpl. eauto.
+    erewrite <- sp_val by eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+    eauto.
+  econstructor; eauto.
+    instantiate (2 := tf); instantiate (1 := x0).
+    unfold nextblock, incrPC. rewrite Pregmap.gss.
+    rewrite set_res_other. rewrite undef_regs_other_2. rewrite Pregmap.gso by congruence. 
+    rewrite <- H. simpl. econstructor; eauto.
+    eapply code_tail_next_int; eauto.
+    rewrite preg_notin_charact. intros. auto with asmgen.
+    auto with asmgen.
+    apply agree_nextblock. eapply agree_set_res; auto.
+    eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
+    apply Pregmap.gso; auto with asmgen.
+    congruence.
+Qed.
+
+Lemma next_sep:
+  forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+  congruence.
+Qed.
+
+(* Measure to prove finite stuttering, see the other backends *)
+Definition measure (s: MB.state) : nat :=
+  match s with
+  | MB.State _ _ _ _ _ _ => 0%nat
+  | MB.Callstate _ _ _ _ => 0%nat
+  | MB.Returnstate _ _ _ => 1%nat
+  end.
+
+(* The actual MB.step/AB.step simulation, using the above theorems, plus extra proofs
+   for the internal and external function cases *)
+Theorem step_simulation:
+  forall S1 t S2, MB.step return_address_offset ge S1 t S2 ->
+  forall S1' (MS: match_states S1 S1'),
+  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
+  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+  induction 1; intros.
+
+- (* bblock *)
+  left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0.
+  all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock; 
+            try (rewrite MBE; try discriminate); eauto).
+  + (* MBbuiltin *)
+    destruct (MB.body bb) eqn:MBB.
+    * inv H. eapply step_simulation_builtin; eauto. rewrite MBE. eauto.
+    * eapply match_states_split_builtin in MS; eauto.
+        2: rewrite MBB; discriminate.
+      simpl split in MS.
+      rewrite <- MBB in H.
+      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb1.
+      assert (MB.body bb = MB.body bb1). { subst. simpl. auto. }
+      rewrite H1 in H. subst.
+      exploit step_simulation_bblock. eapply H.
+        discriminate.
+        simpl. constructor.
+        eauto.
+      intros (S2' & PLUS1 & MS').
+      rewrite MBE in MS'.
+      assert (exit_step return_address_offset ge (Some (MBbuiltin e l b)) 
+              (MB.State sf f sp ({| MB.header := nil; MB.body := nil; MB.exit := Some (MBbuiltin e l b) |}::c) 
+                rs' m') t s').
+      { inv H0. inv H3. econstructor. econstructor; eauto. }
+      exploit step_simulation_builtin.
+        4: eapply MS'.
+        all: simpl; eauto.
+      intros (S3' & PLUS'' & MS'').
+      exists S3'. split; eauto.
+      eapply plus_trans. eapply PLUS1. eapply PLUS''. eauto.
+  + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto.
+
+- (* internal function *)
+  inv MS.
+  exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
+  generalize EQ; intros EQ'. monadInv EQ'.
+  destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0.
+  unfold Mach.store_stack in *.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros [m1' [C D]].
+  exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
+  intros [m2' [F G]].
+  simpl chunk_of_type in F.
+  exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
+  intros [m3' [P Q]].
+  (* Execution of function prologue *)
+  monadInv EQ0.
+  set (tfbody := make_prologue f x0) in *.
+  set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *.
+  set (rs2 := rs0#FP <- (parent_sp s) #SP <- sp #RTMP <- Vundef).
+  exploit (Pget_correct tge GPRA RA nil rs2 m2'); auto.
+  intros (rs' & U' & V').
+  exploit (storeind_ptr_correct tge SP (fn_retaddr_ofs f) GPRA nil rs' m2').
+  { rewrite chunk_of_Tptr in P.
+    assert (rs' GPRA = rs0 RA). { apply V'. }
+    assert (rs' SP = rs2 SP). { apply V'; discriminate. }
+    rewrite H4. rewrite H3.
+    rewrite ATLR.
+    change (rs2 SP) with sp. eexact P. }
+  intros (rs3 & U & V).
+  assert (EXEC_PROLOGUE: exists rs3',
+            exec_straight_blocks tge tf
+              tf.(fn_blocks) rs0 m'
+              x0 rs3' m3'
+          /\ forall r, r <> PC -> rs3' r = rs3 r).
+  { eexists. split.
+    - change (fn_blocks tf) with tfbody; unfold tfbody.
+      econstructor; eauto. unfold exec_bblock. simpl exec_body.
+      rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F. simpl in F. rewrite F.
+      Simpl. unfold parexec_store_offset. rewrite Ptrofs.of_int64_to_int64. unfold eval_offset.
+      rewrite chunk_of_Tptr in P. Simpl. rewrite ATLR. unfold Mptr in P. assert (Archi.ptr64 = true) by auto. 2: auto. rewrite H3 in P. rewrite P.
+      simpl. apply next_sep; eauto. reflexivity.
+    - intros. destruct V' as (V'' & V'). destruct r.
+      + Simpl.
+        destruct (gpreg_eq g0 GPR16). { subst. Simpl. rewrite V; try discriminate. rewrite V''. subst rs2. Simpl. }
+        destruct (gpreg_eq g0 GPR32). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
+        destruct (gpreg_eq g0 GPR12). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
+        destruct (gpreg_eq g0 GPR17). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
+        Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. { destruct g0; try discriminate. contradiction. }
+      + Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl.
+      + contradiction.
+  } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3').
+  exploit exec_straight_steps_2; eauto using functions_transl.
+  simpl fn_blocks. simpl fn_blocks in g. omega. constructor.
+  intros (ofs' & X & Y).                    
+  left; exists (State rs3' m3'); split.
+  eapply exec_straight_steps_1; eauto.
+  simpl fn_blocks. simpl fn_blocks in g. omega.
+  constructor.
+  econstructor; eauto.
+  rewrite X; econstructor; eauto. 
+  apply agree_exten with rs2; eauto with asmgen.
+  unfold rs2. 
+  apply agree_set_other; auto with asmgen.
+  apply agree_change_sp with (parent_sp s). 
+  apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry.
+  simpl; intros; Simpl.
+  unfold sp; congruence.
+
+  intros.
+  assert (r <> RTMP). { contradict H3; rewrite H3; unfold data_preg; auto. }
+  rewrite Heqrs3'. Simpl. rewrite V. inversion V'. rewrite H6. auto.
+  assert (r <> GPRA). { contradict H3; rewrite H3; unfold data_preg; auto. }
+  assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. }
+  contradict H3; rewrite H3; unfold data_preg; auto.
+  contradict H3; rewrite H3; unfold data_preg; auto.
+  contradict H3; rewrite H3; unfold data_preg; auto.
+  contradict H3; rewrite H3; unfold data_preg; auto.
+  intros. rewrite Heqrs3'. rewrite V by auto with asmgen.
+  assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. }
+  rewrite H4 by auto with asmgen. reflexivity. discriminate.
+
+- (* external function *)
+  inv MS.
+  exploit functions_translated; eauto.
+  intros [tf [A B]]. simpl in B. inv B.
+  exploit extcall_arguments_match; eauto.
+  intros [args' [C D]].
+  exploit external_call_mem_extends; eauto.
+  intros [res' [m2' [P [Q [R S]]]]].
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_external; eauto.
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  econstructor; eauto.
+  unfold loc_external_result.
+  apply agree_set_other; auto.
+  apply agree_set_pair; auto.
+  apply agree_undef_caller_save_regs; auto.
+
+- (* return *) 
+  inv MS.
+  inv STACKS. simpl in *.
+  right. split. omega. split. auto.
+  rewrite <- ATPC in H5.
+  econstructor; eauto. congruence.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, MB.initial_state prog st1 ->
+  exists st2, AB.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. unfold ge0 in *.
+  econstructor; split.
+  econstructor.
+  eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+  replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
+     with (Vptr fb Ptrofs.zero).
+  econstructor; eauto.
+  constructor.
+  apply Mem.extends_refl.
+  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
+  intros. rewrite Mach.Regmap.gi. auto.
+  unfold Genv.symbol_address.
+  rewrite (match_program_main TRANSF).
+  rewrite symbols_preserved.
+  unfold ge; rewrite H1. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. constructor. assumption.
+  compute in H1. inv H1.
+  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+Qed.
+
+Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop := 
+  Asmblockgenproof0.return_address_offset.
+
+Theorem transf_program_correct:
+  forward_simulation (MB.semantics return_address_offset prog) (Asmblock.semantics tprog).
+Proof.
+  eapply forward_simulation_star with (measure := measure).
+  - apply senv_preserved.
+  - eexact transf_initial_states.
+  - eexact transf_final_states.
+  - exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/mppa_k1c/Asmblockgenproof1.v b/mppa_k1c/Asmblockgenproof1.v
index e1e2b0b0..00df01e3 100644
--- a/mppa_k1c/Asmblockgenproof1.v
+++ b/mppa_k1c/Asmblockgenproof1.v
@@ -20,9 +20,11 @@
 Require Import Coqlib Errors Maps.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op Locations Machblock Conventions.
-Require Import Asmblock Asmblockgen Asmblockgenproof0.
+Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops.
 Require Import Chunks.
 
+Import PArithCoercions.
+
 (** Decomposition of integer constants. *)
 
 Lemma make_immed32_sound:
@@ -859,7 +861,7 @@ Proof.
     destruct cmp; discriminate.
 Qed.
 
-Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct.
+Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct: core.
 
 Lemma transl_cbranch_correct_1:
   forall cond args lbl k c m ms b sp rs m' tbb,
@@ -1163,7 +1165,7 @@ Proof.
   split; intros; Simpl.
 Qed.
 
-Local Hint Resolve Val_cmpu_correct Val_cmplu_correct.
+Local Hint Resolve Val_cmpu_correct Val_cmplu_correct: core.
 
 Lemma transl_condimm_int32u_correct:
   forall cmp rd r1 n k rs m,
@@ -1481,6 +1483,8 @@ Proof.
   destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; omega.
 Qed.
 
+Ltac splitall := repeat match goal with |- _ /\ _ => split end.
+
 Lemma transl_op_correct:
   forall op args res k (rs: regset) m v c,
   transl_op op args res k = OK c ->
@@ -1515,21 +1519,21 @@ Opaque Int.eq.
 - (* Ocast8signed *)
   econstructor; split.
   eapply exec_straight_two. simpl;eauto. simpl;eauto.
-  split; intros; simpl; Simpl.
+  repeat split; intros; simpl; Simpl.
   assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
   destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A.
   apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity.
 - (* Ocast16signed *)
   econstructor; split.
   eapply exec_straight_two. simpl;eauto. simpl;eauto.
-  split; intros; Simpl.
+  repeat split; intros; Simpl.
   assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
   destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A. 
   apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity.
 - (* Oshrximm *)
   econstructor; split.
   + apply exec_straight_one. simpl. eauto.
-  + split.
+  + repeat split.
     * rewrite Pregmap.gss.
       subst v.
       destruct (rs x0); simpl; trivial.
@@ -1540,7 +1544,7 @@ Opaque Int.eq.
 - (* Oshrxlimm *)
   econstructor; split.
   + apply exec_straight_one. simpl. eauto.
-  + split.
+  + repeat split.
     * rewrite Pregmap.gss.
       subst v.
       destruct (rs x0); simpl; trivial.
@@ -1551,7 +1555,7 @@ Opaque Int.eq.
       
 - (* Ocmp *)
   exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; split. eexact A. eauto with asmgen.
+  exists rs'; repeat split; eauto with asmgen.
   
 - (* Osel *)
   unfold conditional_move in *.
@@ -1570,72 +1574,73 @@ Opaque Int.eq.
 
   destruct c0; simpl in *.
 
-  all:
-    destruct c; simpl in *; inv EQ2;
-    econstructor; split; try (apply exec_straight_one; constructor);
-    split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption);
-    unfold Val.select; simpl;
-    unfold cmove, cmoveu;   
-    rewrite Pregmap.gss;
-    destruct (rs x1); simpl; trivial;
-    try rewrite int_ltu_to_neq;
-    try rewrite int64_ltu_to_neq;
-    try change (Int64.eq Int64.zero Int64.zero) with true;
-    try destruct Archi.ptr64;
-    repeat rewrite if_neg;
-    simpl;
-    trivial;
-    try destruct (_ || _);
-    trivial;
-    try apply Val.lessdef_normalize.
+    all: destruct c.
+    all: simpl in *.
+    all: inv EQ2.
+    all: econstructor; splitall.
+    all: try apply exec_straight_one.
+    all: intros; simpl; trivial.
+    all: unfold Val.select, cmove, cmoveu; simpl.
+    all: destruct (rs x1); simpl; trivial.
+    all: try rewrite int_ltu_to_neq.
+    all: try rewrite int64_ltu_to_neq.
+    all: try change (Int64.eq Int64.zero Int64.zero) with true.
+    all: try destruct Archi.ptr64.
+    all: try rewrite Pregmap.gss.
+    all: repeat rewrite if_neg.
+    all: simpl.
+    all: try destruct (_ || _).
+    all: try apply Val.lessdef_normalize.
+    all: trivial. (* no more lessdef *)
+    all: apply Pregmap.gso; congruence.
 
 - (* Oselimm *)
   unfold conditional_move_imm32 in *.
   destruct c0; simpl in *.
 
-  all:
-    destruct c; simpl in *; inv EQ0;
-    econstructor; split; try (apply exec_straight_one; constructor);
-    split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption);
-    unfold Val.select; simpl;
-    unfold cmove, cmoveu;   
-    rewrite Pregmap.gss;
-    destruct (rs x0); simpl; trivial;
-    try rewrite int_ltu_to_neq;
-    try rewrite int64_ltu_to_neq;
-    try change (Int64.eq Int64.zero Int64.zero) with true;
-    try destruct Archi.ptr64;
-    repeat rewrite if_neg;
-    simpl;
-    trivial;
-    try destruct (_ || _);
-    trivial;
-    try apply Val.lessdef_normalize.
-
+    all: destruct c.
+    all: simpl in *.
+    all: inv EQ0.
+    all: econstructor; splitall.
+    all: try apply exec_straight_one.
+    all: intros; simpl; trivial.
+    all: unfold Val.select, cmove, cmoveu; simpl.
+    all: destruct (rs x0); simpl; trivial.
+    all: try rewrite int_ltu_to_neq.
+    all: try rewrite int64_ltu_to_neq.
+    all: try change (Int64.eq Int64.zero Int64.zero) with true.
+    all: try destruct Archi.ptr64.
+    all: try rewrite Pregmap.gss.
+    all: repeat rewrite if_neg.
+    all: simpl.
+    all: try destruct (_ || _).
+    all: try apply Val.lessdef_normalize.
+    all: trivial. (* no more lessdef *)
+    all: apply Pregmap.gso; congruence.
 
 - (* Osellimm *)
   unfold conditional_move_imm64 in *.
   destruct c0; simpl in *.
 
-  all:
-    destruct c; simpl in *; inv EQ0;
-    econstructor; split; try (apply exec_straight_one; constructor);
-    split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption);
-    unfold Val.select; simpl;
-    unfold cmove, cmoveu;   
-    rewrite Pregmap.gss;
-    destruct (rs x0); simpl; trivial;
-    try rewrite int_ltu_to_neq;
-    try rewrite int64_ltu_to_neq;
-    try change (Int64.eq Int64.zero Int64.zero) with true;
-    try destruct Archi.ptr64;
-    repeat rewrite if_neg;
-    simpl;
-    trivial;
-    try destruct (_ || _);
-    trivial;
-    try apply Val.lessdef_normalize.
-
+    all: destruct c.
+    all: simpl in *.
+    all: inv EQ0.
+    all: econstructor; splitall.
+    all: try apply exec_straight_one.
+    all: intros; simpl; trivial.
+    all: unfold Val.select, cmove, cmoveu; simpl.
+    all: destruct (rs x0); simpl; trivial.
+    all: try rewrite int_ltu_to_neq.
+    all: try rewrite int64_ltu_to_neq.
+    all: try change (Int64.eq Int64.zero Int64.zero) with true.
+    all: try destruct Archi.ptr64.
+    all: try rewrite Pregmap.gss.
+    all: repeat rewrite if_neg.
+    all: simpl.
+    all: try destruct (_ || _).
+    all: try apply Val.lessdef_normalize.
+    all: trivial. (* no more lessdef *)
+    all: apply Pregmap.gso; congruence.
 Qed.
 
 (** Memory accesses *)
@@ -1661,9 +1666,9 @@ Qed.
 
 
 Lemma indexed_load_access_correct:
-  forall chunk (mk_instr: ireg -> offset -> basic) rd m,
+  forall trap chunk (mk_instr: ireg -> offset -> basic) rd m,
   (forall base ofs rs,
-     exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset chunk rs m rd base ofs) ->
+     exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs) ->
   forall (base: ireg) ofs k (rs: regset) v,
   Mem.loadv chunk m (Val.offset_ptr rs#base ofs) = Some v ->
   exists rs',
@@ -1716,7 +1721,7 @@ Proof.
              /\ c = indexed_memory_access mk_instr base ofs :: k
              /\ forall base' ofs' rs',
                    exec_basic_instr ge (mk_instr base' ofs') rs' m =
-                   exec_load_offset (chunk_of_type ty) rs' m rd base' ofs').
+                   exec_load_offset TRAP (chunk_of_type ty) rs' m rd base' ofs').
   { unfold loadind in TR.
     destruct ty, (preg_of dst); inv TR; econstructor; esplit; eauto. }
   destruct A as (mk_instr & rd & rdEq & B & C). subst c. rewrite rdEq.
@@ -1784,7 +1789,9 @@ Lemma loadind_ptr_correct:
   /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r.
 Proof.
   intros. eapply indexed_load_access_correct; eauto with asmgen.
-  intros. unfold Mptr. assert (Archi.ptr64 = true). auto. rewrite H0. auto.
+  intros. unfold Mptr. assert (Archi.ptr64 = true). auto. rewrite H0.
+  instantiate (1 := TRAP).
+  auto.
 Qed.
 
 Lemma storeind_ptr_correct:
@@ -1877,11 +1884,11 @@ Proof.
 Qed.
 
 Lemma transl_load_access2_correct:
-  forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro v',
+  forall trap chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro v',
   args = mr1 :: mro :: nil ->
   ireg_of mro = OK ro ->
   (forall base rs,
-     exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg chunk rs m rd base ro) ->
+     exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg trap chunk rs m rd base ro) ->
   transl_memory_access2 mk_instr addr args k = OK c ->
   eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
   Mem.loadv chunk m v = Some v' ->
@@ -1899,12 +1906,35 @@ Proof.
   split; intros; Simpl. auto.
 Qed.
 
+Lemma transl_load_access2_correct_notrap2:
+  forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro,
+  args = mr1 :: mro :: nil ->
+  ireg_of mro = OK ro ->
+  (forall base rs,
+     exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg NOTRAP chunk rs m rd base ro) ->
+  transl_memory_access2 mk_instr addr args k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+  Mem.loadv chunk m v = None ->
+  exists rs',
+     exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+  /\ rs'#rd = concrete_default_notrap_load_value chunk
+  /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until ro; intros ARGS IREGE INSTR TR EV LOAD. 
+  exploit transl_memory_access2_correct; eauto.
+  intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS.
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+  rewrite INSTR. unfold exec_load_reg. unfold parexec_load_reg. rewrite B, LOAD. reflexivity. Simpl. 
+  split; intros; Simpl. auto.
+Qed.
+
 Lemma transl_load_access2XS_correct:
-  forall chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro v',
+  forall trap chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro v',
   args = mr1 :: mro :: nil ->
   ireg_of mro = OK ro ->
   (forall base rs,
-     exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs chunk rs m rd base ro) ->
+     exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs trap chunk rs m rd base ro) ->
   transl_memory_access2XS chunk mk_instr scale args k = OK c ->
   eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v ->
   Mem.loadv chunk m v = Some v' ->
@@ -1922,13 +1952,39 @@ Proof.
   unfold scale_of_chunk.
   subst scale.
   rewrite B, LOAD. reflexivity. Simpl. 
-  split; intros; Simpl. auto.
+  split. trivial. intros. Simpl.
+Qed.
+
+Lemma transl_load_access2XS_correct_notrap2:
+  forall chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro,
+  args = mr1 :: mro :: nil ->
+  ireg_of mro = OK ro ->
+  (forall base rs,
+     exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs NOTRAP chunk rs m rd base ro) ->
+  transl_memory_access2XS chunk mk_instr scale args k = OK c ->
+  eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v ->
+  Mem.loadv chunk m v = None ->
+  exists rs',
+     exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+  /\ rs'#rd = concrete_default_notrap_load_value chunk
+  /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until ro; intros ARGS IREGE INSTR TR EV LOAD. 
+  exploit transl_memory_access2XS_correct; eauto.
+  intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS.
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+  rewrite INSTR. unfold exec_load_regxs. unfold parexec_load_regxs.
+  unfold scale_of_chunk.
+  subst scale.
+  rewrite B, LOAD. reflexivity. Simpl. 
+  split. trivial. intros. Simpl.
 Qed.
 
 Lemma transl_load_access_correct:
-  forall chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v v',
+  forall trap chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v v',
   (forall base ofs rs,
-     exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset chunk rs m rd base ofs) ->
+     exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs) ->
   transl_memory_access mk_instr addr args k = OK c ->
   eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
   Mem.loadv chunk m v = Some v' ->
@@ -1946,54 +2002,119 @@ Proof.
   split; intros; Simpl. auto.
 Qed.
 
+Lemma transl_load_access_correct_notrap2:
+  forall chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v,
+  (forall base ofs rs,
+     exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset NOTRAP chunk rs m rd base ofs) ->
+  transl_memory_access mk_instr addr args k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+  Mem.loadv chunk m v = None ->
+  exists rs',
+     exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+  /\ rs'#rd = concrete_default_notrap_load_value chunk
+  /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until v; intros INSTR TR EV LOAD. 
+  exploit transl_memory_access_correct; eauto.
+  intros (base & ofs & rs' & ptr & A & PtrEq & B & C).
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. 
+  rewrite INSTR. unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq, B, LOAD. reflexivity. Simpl. 
+  split. trivial. intros. Simpl.
+Qed.
+
 Lemma transl_load_memory_access_ok:
-  forall addr chunk args dst k c rs a v m,
+  forall addr trap chunk args dst k c rs a v m,
   (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) ->
-  transl_load chunk addr args dst k = OK c ->
+  transl_load trap chunk addr args dst k = OK c ->
   eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists mk_instr rd,
      preg_of dst = IR rd
   /\ transl_memory_access mk_instr addr args k = OK c
   /\ forall base ofs rs,
-        exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset chunk rs m rd base ofs.
+        exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs.
 Proof.
   intros until m. intros ADDR TR ? ?.
   unfold transl_load in TR. destruct addr; try contradiction.
   - monadInv TR. destruct chunk; ArgsInv; econstructor; (esplit; eauto).
   - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
-    [ instantiate (1 := (PLoadRRO _ x)); simpl; reflexivity
+    [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
     | eauto ].
   - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
-    [ instantiate (1 := (PLoadRRO _ x)); simpl; reflexivity
+    [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
     | eauto ].
 Qed.
 
-Lemma transl_load_memory_access2_ok:
-  forall addr chunk args dst k c rs a v m,
-  addr = Aindexed2 ->
-  transl_load chunk addr args dst k = OK c ->
+Lemma transl_load_memory_access_ok_notrap2:
+  forall addr chunk args dst k c rs a m,
+  (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) ->
+  transl_load NOTRAP chunk addr args dst k = OK c ->
   eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists mk_instr rd,
+     preg_of dst = IR rd
+  /\ transl_memory_access mk_instr addr args k = OK c
+  /\ forall base ofs rs,
+        exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset NOTRAP chunk rs m rd base ofs.
+Proof.
+  intros until m. intros ADDR TR ? ?.
+  unfold transl_load in TR. destruct addr; try contradiction.
+  - monadInv TR. destruct chunk; ArgsInv; econstructor; (esplit; eauto).
+  - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
+    [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
+    | eauto ].
+  - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
+    [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
+    | eauto ].
+Qed.
+
+Lemma transl_load_memory_access2_ok:
+  forall trap chunk args dst k c rs a v m,
+  transl_load trap chunk Aindexed2 args dst k = OK c ->
+  eval_addressing ge (rs (IR SP)) Aindexed2 (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists mk_instr mr0 mro rd ro,
       args = mr0 :: mro :: nil
    /\ preg_of dst = IR rd
    /\ preg_of mro = IR ro
-   /\ transl_memory_access2 mk_instr addr args k = OK c
+   /\ transl_memory_access2 mk_instr Aindexed2 args k = OK c
+   /\ forall base rs,
+      exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg trap chunk rs m rd base ro.
+Proof.
+  intros until m. intros TR ? ?.
+  unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
+  unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+  [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
+  | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ _ x)); simpl; reflexivity
+  | eauto].
+Qed.
+
+
+Lemma transl_load_memory_access2_ok_notrap2:
+  forall chunk args dst k c rs a m,
+  transl_load NOTRAP chunk Aindexed2 args dst k = OK c ->
+  eval_addressing ge (rs (IR SP)) Aindexed2 (map rs (map preg_of args)) = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists mk_instr mr0 mro rd ro,
+      args = mr0 :: mro :: nil
+   /\ preg_of dst = IR rd
+   /\ preg_of mro = IR ro
+   /\ transl_memory_access2 mk_instr Aindexed2 args k = OK c
    /\ forall base rs,
-      exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg chunk rs m rd base ro.
+      exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg NOTRAP chunk rs m rd base ro.
 Proof.
-  intros until m. intros ? TR ? ?.
+  intros until m. intros TR ? ?.
   unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
   unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
   [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
-  | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ x)); simpl; reflexivity
+  | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ _ x)); simpl; reflexivity
   | eauto].
 Qed.
 
 Lemma transl_load_memory_access2XS_ok:
-  forall scale chunk args dst k c rs a v m,
-  transl_load chunk (Aindexed2XS scale) args dst k = OK c ->
+  forall scale trap chunk args dst k c rs a v m,
+  transl_load trap chunk (Aindexed2XS scale) args dst k = OK c ->
   eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists mk_instr mr0 mro rd ro,
@@ -2002,19 +2123,41 @@ Lemma transl_load_memory_access2XS_ok:
    /\ preg_of mro = IR ro
    /\ transl_memory_access2XS chunk mk_instr scale args k = OK c
    /\ forall base rs,
-      exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs chunk rs m rd base ro.
+      exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs trap chunk rs m rd base ro.
 Proof.
   intros until m. intros TR ? ?.
   unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
   unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
   [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
-  | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ x)); simpl; rewrite Heqb; eauto
+  | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ _ x)); simpl; rewrite Heqb; eauto
+  | eauto].
+Qed.
+
+
+Lemma transl_load_memory_access2XS_ok_notrap2:
+  forall scale chunk args dst k c rs a m,
+  transl_load NOTRAP chunk (Aindexed2XS scale) args dst k = OK c ->
+  eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists mk_instr mr0 mro rd ro,
+      args = mr0 :: mro :: nil
+   /\ preg_of dst = IR rd
+   /\ preg_of mro = IR ro
+   /\ transl_memory_access2XS chunk mk_instr scale args k = OK c
+   /\ forall base rs,
+      exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs NOTRAP chunk rs m rd base ro.
+Proof.
+  intros until m. intros TR ? ?.
+  unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
+  unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+  [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
+  | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ _ x)); simpl; rewrite Heqb; eauto
   | eauto].
 Qed.
 
 Lemma transl_load_correct:
-  forall chunk addr args dst k c (rs: regset) m a v,
-  transl_load chunk addr args dst k = OK c ->
+  forall trap chunk addr args dst k c (rs: regset) m a v,
+  transl_load trap chunk addr args dst k = OK c ->
   eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists rs',
@@ -2038,6 +2181,32 @@ Proof.
       eapply transl_load_access_correct; eauto with asmgen.
 Qed.
 
+Lemma transl_load_correct_notrap2:
+  forall chunk addr args dst k c (rs: regset) m a,
+  transl_load NOTRAP chunk addr args dst k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
+  Mem.loadv chunk m a = None ->
+  exists rs',
+     exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+  /\ rs'#(preg_of dst) = (concrete_default_notrap_load_value chunk)
+  /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r.
+Proof.
+  intros until a; intros TR EV LOAD. destruct addr.
+  - exploit transl_load_memory_access2XS_ok_notrap2; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C).
+    rewrite rdEq. eapply transl_load_access2XS_correct_notrap2; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity.
+  - exploit transl_load_memory_access2_ok_notrap2; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C).
+    rewrite rdEq. eapply transl_load_access2_correct_notrap2; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity.
+  - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity).
+    intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+      eapply transl_load_access_correct_notrap2; eauto with asmgen.
+  - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity).
+    intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+      eapply transl_load_access_correct_notrap2; eauto with asmgen.
+  - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity).
+    intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+      eapply transl_load_access_correct_notrap2; eauto with asmgen.
+Qed.
+
 Lemma transl_store_access2_correct:
   forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c r1 (rs: regset) m v mr1 mro ro m',
   args = mr1 :: mro :: nil ->
diff --git a/mppa_k1c/Asmblockprops.v b/mppa_k1c/Asmblockprops.v
new file mode 100644
index 00000000..3c6ba534
--- /dev/null
+++ b/mppa_k1c/Asmblockprops.v
@@ -0,0 +1,343 @@
+(** Common definition and proofs on Asmblock required by various modules *)
+
+Require Import Coqlib.
+Require Import Integers.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Values.
+Require Import Asmblock.
+Require Import Axioms.
+
+Definition bblock_simu (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+  forall rs m,
+    exec_bblock ge f bb rs m <> Stuck ->
+    exec_bblock ge f bb rs m = exec_bblock ge f bb' rs m.
+    
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma preg_of_data:
+  forall r, data_preg (preg_of r) = true.
+Proof.
+  intros. destruct r; reflexivity.
+Qed.
+Hint Resolve preg_of_data: asmgen.
+
+Lemma data_diff:
+  forall r r',
+  data_preg r = true -> data_preg r' = false -> r <> r'.
+Proof.
+  congruence.
+Qed.
+Hint Resolve data_diff: asmgen.
+
+Lemma preg_of_not_PC:
+  forall r, preg_of r <> PC.
+Proof.
+  intros. apply data_diff; auto with asmgen.
+Qed.
+
+Lemma preg_of_not_SP:
+  forall r, preg_of r <> SP.
+Proof.
+  intros. unfold preg_of; destruct r; simpl; congruence.
+Qed.
+
+Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
+
+
+Lemma nextblock_pc:
+  forall b rs, (nextblock b rs)#PC = Val.offset_ptr rs#PC (Ptrofs.repr (size b)).
+Proof.
+  intros. apply Pregmap.gss.
+Qed.
+
+Lemma nextblock_inv:
+  forall b r rs, r <> PC -> (nextblock b rs)#r = rs#r.
+Proof.
+  intros. unfold nextblock. apply Pregmap.gso. red; intro; subst. auto.
+Qed.
+
+Lemma nextblock_inv1:
+  forall b r rs, data_preg r = true -> (nextblock b rs)#r = rs#r.
+Proof.
+  intros. apply nextblock_inv. red; intro; subst; discriminate.
+Qed.
+
+Ltac Simplif :=
+  ((rewrite nextblock_inv by eauto with asmgen)
+  || (rewrite nextblock_inv1 by eauto with asmgen)
+  || (rewrite Pregmap.gss)
+  || (rewrite nextblock_pc)
+  || (rewrite Pregmap.gso by eauto with asmgen)
+  ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+(* For Asmblockgenproof0 *)
+
+Theorem exec_basic_instr_pc:
+  forall ge b rs1 m1 rs2 m2,
+  exec_basic_instr ge b rs1 m1 = Next rs2 m2 ->
+  rs2 PC = rs1 PC.
+Proof.
+  intros. destruct b; try destruct i; try destruct i.
+  all: try (inv H; Simpl).
+  1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail.
+
+  1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail.
+
+  { (* PLoadQRRO *)
+    unfold  parexec_load_q_offset in H1.
+    destruct (gpreg_q_expand _) as [r0 r1] in H1.
+    destruct (Mem.loadv _ _ _) in H1; try discriminate.
+    destruct (Mem.loadv _ _ _) in H1; try discriminate.
+    inv H1. Simpl. }
+  { (* PLoadORRO *)
+    unfold  parexec_load_o_offset in H1.
+    destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
+    destruct (Mem.loadv _ _ _) in H1; try discriminate.
+    destruct (Mem.loadv _ _ _) in H1; try discriminate.
+    destruct (Mem.loadv _ _ _) in H1; try discriminate.
+    destruct (Mem.loadv _ _ _) in H1; try discriminate.
+    inv H1. Simpl. }
+  1-8: unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]; fail.
+  1-8: unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail.
+  1-8: unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail.
+  
+  { (* PStoreQRRO *)
+    unfold  parexec_store_q_offset in H1.
+    destruct (gpreg_q_expand _) as [r0 r1] in H1.
+    unfold eval_offset in H1; try discriminate.
+    destruct (Mem.storev _ _ _) in H1; try discriminate.
+    destruct (Mem.storev _ _ _) in H1; try discriminate.
+    inv H1. Simpl. reflexivity. }
+  { (* PStoreORRO *)
+    unfold  parexec_store_o_offset in H1.
+    destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
+    unfold eval_offset in H1; try discriminate.
+    destruct (Mem.storev _ _ _) in H1; try discriminate.
+    destruct (Mem.storev _ _ _) in H1; try discriminate.
+    destruct (Mem.storev _ _ _) in H1; try discriminate.
+    destruct (Mem.storev _ _ _) in H1; try discriminate.
+    inv H1. Simpl. reflexivity. }
+  - destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate.
+  - destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate.
+    destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate.
+  - destruct rs; try discriminate. inv H1. Simpl.
+  - destruct rd; try discriminate. inv H1; Simpl.
+  - reflexivity.
+Qed.
+
+(* For PostpassSchedulingproof *)
+
+Lemma regset_double_set:
+  forall r1 r2 (rs: regset) v1 v2,
+  r1 <> r2 ->
+  (rs # r1 <- v1 # r2 <- v2) = (rs # r2 <- v2 # r1 <- v1).
+Proof.
+  intros. apply functional_extensionality. intros r. destruct (preg_eq r r1).
+  - subst. rewrite Pregmap.gso; auto. repeat (rewrite Pregmap.gss). auto.
+  - destruct (preg_eq r r2).
+    + subst. rewrite Pregmap.gss. rewrite Pregmap.gso; auto. rewrite Pregmap.gss. auto.
+    + repeat (rewrite Pregmap.gso; auto).
+Qed.
+
+Lemma next_eq:
+  forall (rs rs': regset) m m',
+  rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+  intros; apply f_equal2; auto.
+Qed.
+
+Lemma exec_load_offset_pc_var:
+  forall trap t rs m rd ra ofs rs' m' v,
+  exec_load_offset trap t rs m rd ra ofs = Next rs' m' ->
+  exec_load_offset trap t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_load_offset in *. unfold parexec_load_offset in *. rewrite Pregmap.gso; try discriminate. destruct (eval_offset ofs); try discriminate.
+  destruct (Mem.loadv _ _ _).
+  - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+  - unfold parexec_incorrect_load in *.
+    destruct trap; try discriminate.
+    inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_reg_pc_var:
+  forall trap t rs m rd ra ro rs' m' v,
+    exec_load_reg trap t rs m rd ra ro = Next rs' m' ->
+    exec_load_reg trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_load_reg in *. unfold parexec_load_reg in *. rewrite Pregmap.gso; try discriminate.
+  destruct (Mem.loadv _ _ _).
+  - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+  - unfold parexec_incorrect_load in *.
+    destruct trap; try discriminate.
+    inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_regxs_pc_var:
+  forall trap t rs m rd ra ro rs' m' v,
+  exec_load_regxs trap t rs m rd ra ro = Next rs' m' ->
+  exec_load_regxs trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_load_regxs in *. unfold parexec_load_regxs in *. rewrite Pregmap.gso; try discriminate.
+  destruct (Mem.loadv _ _ _).
+  - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+  - unfold parexec_incorrect_load in *.
+    destruct trap; try discriminate.
+    inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_offset_q_pc_var:
+  forall rs m rd ra ofs rs' m' v,
+  exec_load_q_offset rs m rd ra ofs = Next rs' m' ->
+  exec_load_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_load_q_offset in *. unfold parexec_load_q_offset in *.
+  destruct (gpreg_q_expand rd) as [rd0 rd1].
+  (* destruct (ireg_eq rd0 ra); try discriminate. *)
+  rewrite Pregmap.gso; try discriminate.
+  destruct (Mem.loadv _ _ _); try discriminate.
+  inv H.
+  destruct (Mem.loadv _ _ _); try discriminate.
+  inv H1. f_equal.
+  rewrite (regset_double_set PC rd0) by discriminate.
+  rewrite (regset_double_set PC rd1) by discriminate.
+  reflexivity.
+Qed.
+
+Lemma exec_load_offset_o_pc_var:
+  forall rs m rd ra ofs rs' m' v,
+  exec_load_o_offset rs m rd ra ofs = Next rs' m' ->
+  exec_load_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_load_o_offset in *. unfold parexec_load_o_offset in *.
+  destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3].
+(*
+  destruct (ireg_eq rd0 ra); try discriminate.
+  destruct (ireg_eq rd1 ra); try discriminate.
+  destruct (ireg_eq rd2 ra); try discriminate.
+*)
+  rewrite Pregmap.gso; try discriminate.
+  simpl in *.
+  destruct (Mem.loadv _ _ _); try discriminate.
+  destruct (Mem.loadv _ _ _); try discriminate.
+  destruct (Mem.loadv _ _ _); try discriminate.
+  destruct (Mem.loadv _ _ _); try discriminate.
+  rewrite (regset_double_set PC rd0) by discriminate.
+  rewrite (regset_double_set PC rd1) by discriminate.
+  rewrite (regset_double_set PC rd2) by discriminate.
+  rewrite (regset_double_set PC rd3) by discriminate.
+  inv H.
+  trivial.
+Qed.
+
+Lemma exec_store_offset_pc_var:
+  forall t rs m rd ra ofs rs' m' v,
+  exec_store_offset t rs m rd ra ofs = Next rs' m' ->
+  exec_store_offset t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_store_offset in *. unfold parexec_store_offset in *. rewrite Pregmap.gso; try discriminate.
+  destruct (eval_offset ofs); try discriminate.
+  destruct (Mem.storev _ _ _).
+  - inv H. apply next_eq; auto.
+  - discriminate.
+Qed.
+
+Lemma exec_store_q_offset_pc_var:
+  forall rs m rd ra ofs rs' m' v,
+  exec_store_q_offset rs m rd ra ofs = Next rs' m' ->
+  exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate.
+  simpl in *.
+  destruct (gpreg_q_expand _) as [s0 s1].
+  destruct (Mem.storev _ _ _); try discriminate.
+  destruct (Mem.storev _ _ _); try discriminate.
+  inv H. apply next_eq; auto.
+Qed.
+
+Lemma exec_store_o_offset_pc_var:
+  forall rs m rd ra ofs rs' m' v,
+  exec_store_o_offset rs m rd ra ofs = Next rs' m' ->
+  exec_store_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+  intros.
+  unfold exec_store_o_offset in *. unfold parexec_store_o_offset in *.
+  destruct (gpreg_o_expand _) as [[[s0 s1] s2] s3].
+  destruct (Mem.storev _ _ _); try discriminate.
+  destruct (Mem.storev _ _ _); try discriminate.
+  destruct (Mem.storev _ _ _); try discriminate.
+  destruct (Mem.storev _ _ _); try discriminate.
+  inv H.
+  trivial.
+Qed.
+  
+Lemma exec_store_reg_pc_var:
+  forall t rs m rd ra ro rs' m' v,
+  exec_store_reg t rs m rd ra ro = Next rs' m' ->
+  exec_store_reg t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_store_reg in *. unfold parexec_store_reg in *. rewrite Pregmap.gso; try discriminate.
+  destruct (Mem.storev _ _ _).
+  - inv H. apply next_eq; auto.
+  - discriminate.
+Qed.
+
+Lemma exec_store_regxs_pc_var:
+  forall t rs m rd ra ro rs' m' v,
+  exec_store_regxs t rs m rd ra ro = Next rs' m' ->
+  exec_store_regxs t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+  intros. unfold exec_store_regxs in *. unfold parexec_store_regxs in *. rewrite Pregmap.gso; try discriminate.
+  destruct (Mem.storev _ _ _).
+  - inv H. apply next_eq; auto.
+  - discriminate.
+Qed.
+
+Theorem exec_basic_instr_pc_var:
+  forall ge i rs m rs' m' v,
+  exec_basic_instr ge i rs m = Next rs' m' ->
+  exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'.
+Proof.
+  intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i.
+  - unfold exec_arith_instr in *. destruct i; destruct i.
+      all: try (exploreInst; inv H; apply next_eq; auto;
+      apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate).
+(* 
+      (* Some cases treated seperately because exploreInst destructs too much *)
+      all: try (inv H; apply next_eq; auto; apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate). *)
+  - destruct i.
+    + exploreInst; apply exec_load_offset_pc_var; auto.
+    + exploreInst; apply exec_load_reg_pc_var; auto.
+    + exploreInst; apply exec_load_regxs_pc_var; auto.
+    + apply exec_load_offset_q_pc_var; auto.
+    + apply exec_load_offset_o_pc_var; auto.
+  - destruct i.
+    + exploreInst; apply exec_store_offset_pc_var; auto.
+    + exploreInst; apply exec_store_reg_pc_var; auto.
+    + exploreInst; apply exec_store_regxs_pc_var; auto.
+    + apply exec_store_q_offset_pc_var; auto.
+    + apply exec_store_o_offset_pc_var; auto.
+  - destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate).
+    destruct (Mem.storev _ _ _ _); try discriminate.
+    inv H. apply next_eq; auto. apply functional_extensionality. intros.
+    rewrite (regset_double_set GPR32 PC); try discriminate.
+    rewrite (regset_double_set GPR12 PC); try discriminate.
+    rewrite (regset_double_set FP PC); try discriminate. reflexivity.
+  - repeat (rewrite Pregmap.gso; try discriminate).
+    destruct (Mem.loadv _ _ _); try discriminate.
+    destruct (rs GPR12); try discriminate.
+    destruct (Mem.free _ _ _ _); try discriminate.
+    inv H. apply next_eq; auto.
+    rewrite (regset_double_set GPR32 PC).
+    rewrite (regset_double_set GPR12 PC). reflexivity.
+    all: discriminate.
+  - destruct rs0; try discriminate. inv H. apply next_eq; auto.
+    repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
+  - destruct rd; try discriminate. inv H. apply next_eq; auto.
+    repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
+  - inv H. apply next_eq; auto.
+Qed.
+
+
diff --git a/mppa_k1c/Asmexpand.ml b/mppa_k1c/Asmexpand.ml
index 67ef6f52..8ab10bc5 100644
--- a/mppa_k1c/Asmexpand.ml
+++ b/mppa_k1c/Asmexpand.ml
@@ -190,10 +190,10 @@ let expand_builtin_memcpy_big sz al src dst =
         end);
     
       cpy tmpbuf2 16L (fun x y z -> Plq(x, y, z)) (fun x y z -> Psq(x, y, z));
-      cpy tmpbuf 8L (fun x y z -> Pld(x, y, z)) (fun x y z -> Psd(x, y, z));
-      cpy tmpbuf 4L (fun x y z -> Plw(x, y, z)) (fun x y z -> Psw(x, y, z));
-      cpy tmpbuf 2L (fun x y z -> Plh(x, y, z)) (fun x y z -> Psh(x, y, z));
-      cpy tmpbuf 1L (fun x y z -> Plb(x, y, z)) (fun x y z -> Psb(x, y, z));
+      cpy tmpbuf 8L (fun x y z -> Pld(TRAP, x, y, z)) (fun x y z -> Psd(x, y, z));
+      cpy tmpbuf 4L (fun x y z -> Plw(TRAP, x, y, z)) (fun x y z -> Psw(x, y, z));
+      cpy tmpbuf 2L (fun x y z -> Plh(TRAP, x, y, z)) (fun x y z -> Psh(x, y, z));
+      cpy tmpbuf 1L (fun x y z -> Plb(TRAP, x, y, z)) (fun x y z -> Psb(x, y, z));
       assert (!remaining = 0L)
     end
   else
@@ -203,7 +203,7 @@ let expand_builtin_memcpy_big sz al src dst =
       let lbl = new_label() in
       emit (Ploopdo (tmpbuf, lbl));
       emit Psemi;
-      emit (Plb (tmpbuf, srcptr, AOff Z.zero));
+      emit (Plb (TRAP, tmpbuf, srcptr, AOff Z.zero));
       emit (Paddil (srcptr, srcptr, Z.one));
       emit Psemi;
       emit (Psb (tmpbuf, dstptr, AOff Z.zero));
@@ -223,30 +223,30 @@ let expand_builtin_memcpy  sz al args =
 let expand_builtin_vload_common chunk base ofs res =
   match chunk, res with
   | Mint8unsigned, BR(Asmvliw.IR res) ->
-     emit (Plbu (res, base, AOff ofs))
+     emit (Plbu (TRAP, res, base, AOff ofs))
   | Mint8signed, BR(Asmvliw.IR res) ->
-     emit (Plb  (res, base, AOff ofs))
+     emit (Plb  (TRAP, res, base, AOff ofs))
   | Mint16unsigned, BR(Asmvliw.IR res) ->
-     emit (Plhu (res, base, AOff ofs))
+     emit (Plhu (TRAP, res, base, AOff ofs))
   | Mint16signed, BR(Asmvliw.IR res) ->
-     emit (Plh  (res, base, AOff ofs))
+     emit (Plh  (TRAP, res, base, AOff ofs))
   | Mint32, BR(Asmvliw.IR res) ->
-     emit (Plw  (res, base, AOff ofs))
+     emit (Plw  (TRAP, res, base, AOff ofs))
   | Mint64, BR(Asmvliw.IR res) ->
-     emit (Pld  (res, base, AOff ofs))
+     emit (Pld  (TRAP, res, base, AOff ofs))
   | Mint64, BR_splitlong(BR(Asmvliw.IR res1), BR(Asmvliw.IR res2)) ->
      let ofs' = Integers.Ptrofs.add ofs _4 in
      if base <> res2 then begin
-         emit (Plw (res2, base, AOff ofs));
-         emit (Plw (res1, base, AOff ofs'))
+         emit (Plw (TRAP, res2, base, AOff ofs));
+         emit (Plw (TRAP, res1, base, AOff ofs'))
        end else begin
-         emit (Plw (res1, base, AOff ofs'));
-         emit (Plw (res2, base, AOff ofs))
+         emit (Plw (TRAP, res1, base, AOff ofs'));
+         emit (Plw (TRAP, res2, base, AOff ofs))
        end
   | Mfloat32, BR(Asmvliw.IR res) ->
-     emit (Pfls (res, base, AOff ofs))
+     emit (Pfls (TRAP, res, base, AOff ofs))
   | Mfloat64, BR(Asmvliw.IR res) ->
-     emit (Pfld (res, base, AOff ofs))
+     emit (Pfld (TRAP, res, base, AOff ofs))
   | _ ->
      assert false
 
@@ -514,8 +514,8 @@ let expand_instruction instr =
       end else begin
         let below = Integers.Ptrofs.repr (Z.neg sz) in
         expand_addptrofs stack_pointer stack_pointer below;
+        emit Psemi; (* Psemi required to fit in resource constraints *)
         expand_storeind_ptr stack_pointer stack_pointer (Integers.Ptrofs.add ofs below);
-        (* DM we don't need it emit Psemi; *)
         vararg_start_ofs := None
       end
   | Pfreeframe (sz, ofs) ->
diff --git a/mppa_k1c/Asmgen.v b/mppa_k1c/Asmgen.v
index c3588871..8875a4ac 100644
--- a/mppa_k1c/Asmgen.v
+++ b/mppa_k1c/Asmgen.v
@@ -19,10 +19,11 @@ Require Import Integers.
 Require Import Mach Asm Asmblock Asmblockgen Machblockgen.
 Require Import PostpassScheduling.
 Require Import Errors String.
+Require Compopts.
 
 Local Open Scope error_monad_scope.
 
-Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
+Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := Compopts.time name f.
 
 Definition transf_program (p: Mach.program) : res Asm.program :=
   let mbp := (time "Machblock generation" Machblockgen.transf_program) p in
diff --git a/mppa_k1c/Asmgenproof.v b/mppa_k1c/Asmgenproof.v
index 5d7bb81f..7388f6da 100644
--- a/mppa_k1c/Asmgenproof.v
+++ b/mppa_k1c/Asmgenproof.v
@@ -35,7 +35,7 @@ Proof.
   intros p tp H.
   unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H.
   inversion_clear H. apply bind_inversion in H1. destruct H1.
-  inversion_clear H. inversion H2. unfold time in *. remember (Machblockgen.transf_program p) as mbp.
+  inversion_clear H. inversion H2. unfold time, Compopts.time in *. remember (Machblockgen.transf_program p) as mbp.
   unfold match_prog; simpl.
   exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
   exists x; split. apply Asmblockgenproof.transf_program_match; auto.
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index 54e9c847..946007c1 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -308,6 +308,16 @@ Inductive cf_instruction : Type :=
 .
 
 (** Loads **)
+Definition concrete_default_notrap_load_value (chunk : memory_chunk) :=
+  match chunk with
+  | Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned
+  | Mint32 => Vint Int.zero
+  | Mint64 => Vlong Int64.zero
+  | Many32 | Many64 => Vundef
+  | Mfloat32 => Vsingle Float32.zero
+  | Mfloat64 => Vfloat Float.zero
+  end.
+
 Inductive load_name : Type :=
   | Plb                                             (**r load byte *)
   | Plbu                                            (**r load byte unsigned *)
@@ -322,9 +332,9 @@ Inductive load_name : Type :=
 .
 
 Inductive ld_instruction : Type :=
-  | PLoadRRO   (i: load_name) (rd: ireg) (ra: ireg) (ofs: offset)
-  | PLoadRRR   (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg)
-  | PLoadRRRXS (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg)
+  | PLoadRRO   (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (ofs: offset)
+  | PLoadRRR   (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg)
+  | PLoadRRRXS (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg)
   | PLoadQRRO  (rd: gpreg_q) (ra: ireg) (ofs: offset)
   | PLoadORRO  (rd: gpreg_o) (ra: ireg) (ofs: offset)
 .
@@ -545,6 +555,8 @@ Inductive ar_instruction : Type :=
   | PArithARRI64 (i: arith_name_arri64) (rd rs: ireg) (imm: int64)
 .
 
+Module PArithCoercions.
+
 Coercion PArithR:       arith_name_r        >-> Funclass.
 Coercion PArithRR:      arith_name_rr       >-> Funclass.
 Coercion PArithRI32:    arith_name_ri32     >-> Funclass.
@@ -559,6 +571,8 @@ Coercion PArithARR:     arith_name_arr      >-> Funclass.
 Coercion PArithARRI32:   arith_name_arri32    >-> Funclass.
 Coercion PArithARRI64:   arith_name_arri64    >-> Funclass.
 
+End PArithCoercions.
+
 Inductive basic : Type :=
   | PArith          (i: ar_instruction)
   | PLoad           (i: ld_instruction)
@@ -1202,10 +1216,16 @@ Definition eval_offset (ofs: offset) : res ptrofs := OK ofs.
 
 (** * load/store *)
 
-Definition parexec_load_offset (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) :=
+Definition parexec_incorrect_load trap chunk d rsw mw :=
+  match trap with
+  | TRAP => Stuck
+  | NOTRAP => Next (rsw#d <- (concrete_default_notrap_load_value chunk)) mw
+  end.
+
+Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) :=
   match (eval_offset ofs) with
   | OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with
-              | None => Stuck
+              | None => parexec_incorrect_load trap chunk d rsw mw
               | Some v => Next (rsw#d <- v) mw
               end
   | _ => Stuck
@@ -1250,15 +1270,15 @@ Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a
     end
   end.
 
-Definition parexec_load_reg (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
+Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
   match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with
-  | None => Stuck
+  | None => parexec_incorrect_load trap chunk d rsw mw
   | Some v => Next (rsw#d <- v) mw
   end.
 
-Definition parexec_load_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
+Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
   match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with
-  | None => Stuck
+  | None => parexec_incorrect_load trap chunk d rsw mw
   | Some v => Next (rsw#d <- v) mw
   end.
 
@@ -1271,7 +1291,8 @@ Definition parexec_store_offset (chunk: memory_chunk) (rsr rsw: regset) (mr mw:
   | _ => Stuck
   end.
 
-Definition parexec_store_reg (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a ro: ireg) :=
+Definition parexec_store_reg
+           (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a ro: ireg) :=
   match Mem.storev chunk mr (Val.addl (rsr a) (rsr ro)) (rsr s) with
   | None => Stuck
   | Some m' => Next rsw m'
@@ -1329,7 +1350,7 @@ Definition load_chunk n :=
   | Pfls => Mfloat32
   | Pfld => Mfloat64
   end.
-
+              
 Definition store_chunk n :=
   match n with
   | Psb => Mint8unsigned
@@ -1348,12 +1369,12 @@ Definition bstep (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
   match bi with
   | PArith ai => Next (parexec_arith_instr ai rsr rsw) mw
 
-  | PLoadRRO n d a ofs => parexec_load_offset (load_chunk n) rsr rsw mr mw d a ofs
-  | PLoadRRR n d a ro => parexec_load_reg (load_chunk n) rsr rsw mr mw d a ro
-  | PLoadRRRXS n d a ro => parexec_load_regxs (load_chunk n) rsr rsw mr mw d a ro
-  | PLoadQRRO d a ofs => 
+  | PLoad (PLoadRRO trap n d a ofs) => parexec_load_offset trap (load_chunk n) rsr rsw mr mw d a ofs
+  | PLoad (PLoadRRR trap n d a ro) => parexec_load_reg trap (load_chunk n) rsr rsw mr mw d a ro
+  | PLoad (PLoadRRRXS trap n d a ro) => parexec_load_regxs trap (load_chunk n) rsr rsw mr mw d a ro
+  | PLoad (PLoadQRRO d a ofs) => 
     parexec_load_q_offset rsr rsw mr mw d a ofs
-  | PLoadORRO d a ofs => 
+  | PLoad (PLoadORRO d a ofs) => 
     parexec_load_o_offset rsr rsw mr mw d a ofs
 
   | PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs
@@ -1692,7 +1713,7 @@ Proof.
 Qed.
 
 
-Local Hint Resolve parexec_bblock_write_in_order.
+Local Hint Resolve parexec_bblock_write_in_order: core.
 
 Lemma det_parexec_write_in_order f b rs m rs' m':
    det_parexec f b rs m rs' m' -> parexec_wio_bblock f b rs m = Next rs' m'.
diff --git a/mppa_k1c/Builtins1.v b/mppa_k1c/Builtins1.v
index 6186961f..3b5cd419 100644
--- a/mppa_k1c/Builtins1.v
+++ b/mppa_k1c/Builtins1.v
@@ -43,18 +43,18 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
   match b with
   | BI_fmin | BI_fmax =>
-      mksignature (Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default
+      mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default
   | BI_fminf | BI_fmaxf =>
-      mksignature (Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default
+      mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default
   | BI_fabsf =>
-      mksignature (Tsingle :: nil) (Some Tsingle) cc_default
+      mksignature (Tsingle :: nil) Tsingle cc_default
   | BI_fma =>
-      mksignature (Tfloat :: Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default
+      mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default
   | BI_fmaf =>
-      mksignature (Tsingle :: Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default
+      mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default
   end.
 
-Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
   match b with
   | BI_fmin => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.min
   | BI_fmax => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.max
diff --git a/mppa_k1c/CSE2deps.v b/mppa_k1c/CSE2deps.v
new file mode 100644
index 00000000..8ab9242a
--- /dev/null
+++ b/mppa_k1c/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+     (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+  && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+  && ((ofsw + size_chunk chunkw <=? ofsr) ||
+      (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+  match addr, addr', args, args' with
+  | (Aindexed ofs), (Aindexed ofs'),
+    (base :: nil), (base' :: nil) =>
+    if peq base base'
+    then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+    else true  | _, _, _, _ => true
+  end.
diff --git a/mppa_k1c/CSE2depsproof.v b/mppa_k1c/CSE2depsproof.v
new file mode 100644
index 00000000..a3811e78
--- /dev/null
+++ b/mppa_k1c/CSE2depsproof.v
@@ -0,0 +1,127 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+  Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat.
+Proof.
+  unfold Ptrofs.wordsize.
+  unfold Wordsize_Ptrofs.wordsize.
+  trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+  unfold Ptrofs.modulus.
+  rewrite ptrofs_size.
+  destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section MEMORY_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
+
+  Lemma load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                          \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+    destruct base; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try rewrite ptrofs_modulus in *.
+    all: destruct Archi.ptr64.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Lemma may_overlap_sound:
+  forall m m' : mem,
+  forall chunk addr args chunk' addr' args' v a a' rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+    (may_overlap chunk addr args chunk' addr' args') = false ->
+    (Mem.storev chunk m a v) = Some m' ->
+    (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m  a').
+Proof.
+  intros until rs.
+  intros ADDR ADDR' OVERLAP STORE.
+  destruct addr; destruct addr'; try discriminate.
+  { (* Aindexed / Aindexed *)
+  destruct args as [ | base [ | ]]. 1,3: discriminate.
+  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+  simpl in OVERLAP.
+  destruct (peq base base'). 2: discriminate.
+  subst base'.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  simpl in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  }
+Qed.
+
+End SOUNDNESS.
diff --git a/mppa_k1c/Conventions1.v b/mppa_k1c/Conventions1.v
index d41f1095..48346a6d 100644
--- a/mppa_k1c/Conventions1.v
+++ b/mppa_k1c/Conventions1.v
@@ -90,12 +90,17 @@ Definition is_float_reg (r: mreg) := false.
   returned value.  We treat a function without result as a function
   with one integer result. *)
 
+
 Definition loc_result (s: signature) : rpair mreg :=
-   match s.(sig_res) with
-   | None => One R0
-   | Some (Tint | Tany32) => One R0
-   | Some (Tfloat | Tsingle | Tany64) => One R0
-   | Some Tlong => if Archi.ptr64 then One R0 else One R0
+  match s.(sig_res) with
+   | Tvoid => One R0
+   | Tint8signed => One R0
+   | Tint8unsigned => One R0
+   | Tint16signed => One R0
+   | Tint16unsigned => One R0
+   | Tint | Tany32 => One R0
+   | Tfloat | Tsingle | Tany64 => One R0
+   | Tlong => if Archi.ptr64 then One R0 else One R0
    end.
 
 (** The result registers have types compatible with that given in the signature. *)
@@ -104,8 +109,8 @@ Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold proj_sig_res, loc_result, mreg_type;
-  destruct (sig_res sig) as [[]|]; auto; destruct Archi.ptr64; auto.
+  intros. unfold proj_sig_res, loc_result, mreg_type.
+  destruct (sig_res sig); try destruct Archi.ptr64; simpl; trivial; destruct t; trivial.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -115,7 +120,7 @@ Lemma loc_result_caller_save:
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
   intros. unfold loc_result, is_callee_save;
-  destruct (sig_res s) as [[]|]; simpl; auto; destruct Archi.ptr64; simpl; auto.
+            destruct (sig_res s); simpl; auto; try destruct Archi.ptr64; simpl; auto; try destruct t; simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -125,14 +130,15 @@ Lemma loc_result_pair:
   match loc_result sg with
   | One _ => True
   | Twolong r1 r2 =>
-       r1 <> r2 /\ sg.(sig_res) = Some Tlong
+       r1 <> r2 /\ proj_sig_res sg = Tlong
     /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true 
     /\ Archi.ptr64 = false
   end.
 Proof.
   intros.
-  unfold loc_result; destruct (sig_res sg) as [[]|]; auto.
-  unfold mreg_type; destruct Archi.ptr64; auto.
+  unfold loc_result; destruct (sig_res sg); auto;
+    unfold mreg_type; try destruct Archi.ptr64; auto;
+      destruct t; auto.
 Qed.
 
 (** The location of the result depends only on the result part of the signature *)
@@ -409,3 +415,6 @@ Lemma loc_arguments_main:
 Proof.
   reflexivity.
 Qed.
+
+
+Definition return_value_needs_normalization (t: rettype) : bool := false.
diff --git a/mppa_k1c/DuplicateOpcodeHeuristic.ml b/mppa_k1c/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..2ec314c1
--- /dev/null
+++ b/mppa_k1c/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,27 @@
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c | Ccompfs c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c | Cnotcompfs c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
diff --git a/mppa_k1c/NeedOp.v b/mppa_k1c/NeedOp.v
index d2d4d5f5..7111c48b 100644
--- a/mppa_k1c/NeedOp.v
+++ b/mppa_k1c/NeedOp.v
@@ -383,20 +383,6 @@ Proof.
   rewrite modarith_idem.
   apply mul_sound;
   rewrite modarith_idem; trivial.
-  (* maddl *)
-- apply addl_sound; trivial.
-  apply mull_sound; trivial.
-  rewrite default_idem; trivial.
-  rewrite default_idem; trivial.
-  (* msubl *)
-- apply subl_sound; trivial.
-  apply mull_sound; trivial.
-  rewrite default_idem; trivial.
-  rewrite default_idem; trivial.
-- apply vagree_triple_op_float; assumption.
-- apply vagree_triple_op_float; assumption.
-- apply vagree_triple_op_single; assumption.
-- apply vagree_triple_op_single; assumption.
 - destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
   erewrite needs_of_condition0_sound by eauto.
   apply select_sound; auto.
diff --git a/mppa_k1c/Op.v b/mppa_k1c/Op.v
index f9a774e8..92061d04 100644
--- a/mppa_k1c/Op.v
+++ b/mppa_k1c/Op.v
@@ -1030,6 +1030,34 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
     + constructor.
 Qed.
 
+Definition is_trapping_op (op : operation) :=
+  match op with
+  | Odiv | Odivl | Odivu | Odivlu
+  | Omod | Omodl | Omodu | Omodlu
+  | Oshrximm _ | Oshrxlimm _
+  | Ointoffloat | Ointuoffloat
+  | Ointofsingle | Ointuofsingle
+  | Olongoffloat | Olonguoffloat
+  | Olongofsingle | Olonguofsingle
+  | Osingleofint | Osingleofintu
+  | Osingleoflong | Osingleoflongu
+  | Ofloatoflong | Ofloatoflongu => true
+  | _ => false
+  end.
+
+Lemma is_trapping_op_sound:
+  forall op vl sp m,
+    op <> Omove ->
+    is_trapping_op op = false ->
+    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    eval_operation genv sp op vl m <> None.
+Proof.
+  destruct op; intros; simpl in *; try congruence.
+  all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+  all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+  all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+  all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
@@ -1706,6 +1734,27 @@ Proof.
   - apply Val.offset_ptr_inject; auto. 
 Qed.
 
+Lemma eval_addressing_inj_none:
+  forall addr sp1 vl1 sp2 vl2,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = None ->
+  eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+  intros until vl2. intros Hglobal Hinjsp Hinjvl.
+  destruct addr; simpl in *.
+  1,2: inv Hinjvl; trivial;
+       inv H0; trivial;
+       inv H2; trivial;
+       discriminate.
+  2,3: inv Hinjvl; trivial; discriminate.
+  inv Hinjvl; trivial; inv H0; trivial;
+    inv H; trivial; discriminate.
+Qed.
+  
 End EVAL_COMPAT.
 
 (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
@@ -1812,6 +1861,24 @@ Proof.
   destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
 Qed.
 
+
+Lemma eval_addressing_lessdef_none:
+  forall sp addr vl1 vl2,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = None ->
+  eval_addressing genv sp addr vl2 = None.
+Proof.
+  intros until vl2. intros Hlessdef Heval1.
+  destruct addr; simpl in *.
+  1, 2, 4, 5: inv Hlessdef; trivial;
+  inv H0; trivial;
+  inv H2; trivial;
+    discriminate.
+  inv Hlessdef; trivial.
+  inv H0; trivial.
+  discriminate.
+Qed.
+  
 End EVAL_LESSDEF.
 
 (** Compatibility of the evaluation functions with memory injections. *)
@@ -1864,6 +1931,19 @@ Proof.
   econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
 Qed.
 
+Lemma eval_addressing_inject_none:
+  forall addr vl1 vl2,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+  eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+  
 Lemma eval_operation_inject:
   forall op vl1 vl2 v1 m1 m2,
   Val.inject_list f vl1 vl2 ->
diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v
index 7c8f65a8..0611fdda 100644
--- a/mppa_k1c/Peephole.v
+++ b/mppa_k1c/Peephole.v
@@ -2,6 +2,7 @@ Require Import Coqlib.
 Require Import Asmvliw.
 Require Import Values.
 Require Import Integers.
+Require Import AST.
 Require Compopts.
 
 Definition gpreg_q_list : list gpreg_q :=
@@ -89,8 +90,8 @@ Fixpoint coalesce_mem (insns : list basic) : list basic :=
           | None => h0 :: (coalesce_mem t0)
           end
             
-        | (PLoadRRO Pld_a rd0 ra0 ofs0),
-          (PLoadRRO Pld_a rd1 ra1 ofs1) =>
+        | (PLoad (PLoadRRO TRAP Pld_a rd0 ra0 ofs0)),
+          (PLoad (PLoadRRO TRAP Pld_a rd1 ra1 ofs1)) =>
           match gpreg_q_search rd0 rd1 with
           | Some rd0rd1 =>
             let zofs0 := Ptrofs.signed ofs0 in
@@ -100,8 +101,8 @@ Fixpoint coalesce_mem (insns : list basic) : list basic :=
               if coalesce_octuples
               then
                 match t1 with
-                | (PLoadRRO Pld_a rd2 ra2 ofs2) :: 
-                  (PLoadRRO Pld_a rd3 ra3 ofs3) :: t3 =>
+                | (PLoad (PLoadRRO TRAP Pld_a rd2 ra2 ofs2)) :: 
+                  (PLoad (PLoadRRO TRAP Pld_a rd3 ra3 ofs3)) :: t3 =>
                   match gpreg_o_search rd0 rd1 rd2 rd3 with
                   | Some octuple =>
                     let zofs2 := Ptrofs.signed ofs2 in
diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v
index 8b6de1e2..31180cea 100644
--- a/mppa_k1c/PostpassScheduling.v
+++ b/mppa_k1c/PostpassScheduling.v
@@ -12,7 +12,7 @@
 
 Require Import Coqlib Errors AST Integers.
 Require Import Asmblock Axioms Memory Globalenvs.
-Require Import Asmblockdeps Asmblockgenproof0.
+Require Import Asmblockdeps Asmblockgenproof0 Asmblockprops.
 Require Peephole.
 
 Local Open Scope error_monad_scope.
diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml
index 327901f3..686979a6 100644
--- a/mppa_k1c/PostpassSchedulingOracle.ml
+++ b/mppa_k1c/PostpassSchedulingOracle.ml
@@ -302,7 +302,7 @@ let arith_rec i =
   | PArithR (i, rd) -> arith_r_rec i (IR rd)
 
 let load_rec i = match i with
-  | PLoadRRO (i, rs1, rs2, imm) ->
+  | PLoadRRO (trap, i, rs1, rs2, imm) ->
       { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;
         read_at_id = []; read_at_e1 = [] }
   | PLoadQRRO(rs, ra, imm) ->
@@ -313,7 +313,7 @@ let load_rec i = match i with
      let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
      {  inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];
         imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}
-  | PLoadRRR (i, rs1, rs2, rs3) | PLoadRRRXS (i, rs1, rs2, rs3) ->
+  | PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->
       { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;
         read_at_id = []; read_at_e1 = [] }
 
@@ -442,7 +442,9 @@ let encode_imm (imm:int64) =
     else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
 
 (** Resources *)
-let resource_names = ["ISSUE"; "TINY"; "LITE"; "ALU"; "LSU"; "MAU"; "BCU"; "ACC"; "DATA"; "TCA"; "BRE"; "BRO"; "NOP"]
+type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop
+
+let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]
 
 let rec find_index elt l =
   match l with
@@ -454,98 +456,110 @@ let resource_id resource : int = find_index resource resource_names
 
 let resource_bound resource : int =
   match resource with
-  | "ISSUE" -> 8
-  | "TINY" -> 4
-  | "LITE" -> 2
-  | "ALU" -> 1
-  | "LSU" -> 1
-  | "MAU" -> 1
-  | "BCU" -> 1
-  | "ACC" -> 1
-  | "DATA" -> 1
-  | "TCA" -> 1
-  | "BRE" -> 1
-  | "BRO" -> 1
-  | "NOP" -> 4
-  | _ -> raise Not_found
+  | Rissue -> 8
+  | Rtiny -> 4
+  | Rlite -> 2
+  | Rfull -> 1
+  | Rlsu -> 1
+  | Rmau -> 1
+  | Rbcu -> 1
+  | Rtca -> 1
+  | Rauxr -> 1
+  | Rauxw -> 1
+  | Rcrrp -> 1
+  | Rcrwl -> 1
+  | Rcrwh -> 1
+  | Rnop -> 4
 
 let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)
 
 (** Reservation tables *)
-let alu_tiny : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 1 | "TINY" -> 1 | _ -> 0 
-  in Array.of_list (List.map resmap resource_names)
-
-let alu_tiny_x : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 2 | "TINY" -> 1 | _ -> 0 
-  in Array.of_list (List.map resmap resource_names)
-
-let alu_tiny_y : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 3 | "TINY" -> 1 | _ -> 0 
+let alu_full : int array = let resmap = fun r -> match r with
+  | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 let alu_lite : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 1 | "TINY" -> 1 | "LITE" -> 1 |  _ -> 0 
+  | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 |  _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 let alu_lite_x : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 2 | "TINY" -> 1 | "LITE" -> 1 |  _ -> 0 
+  | Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 |  _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 let alu_lite_y : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 3 | "TINY" -> 1 | "LITE" -> 1 |  _ -> 0 
-  in Array.of_list (List.map resmap resource_names)
-
-let alu_full : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 1 | "TINY" -> 1 | "LITE" -> 1 | "ALU" -> 1 | _ -> 0
+  | Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 |  _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 let alu_nop : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 1 | "NOP" -> 1 | _ -> 0 
+  | Rissue -> 1 | Rnop -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let mau : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 1 | "TINY" -> 1 | "MAU" -> 1 |  _ -> 0 
+let alu_tiny : int array = let resmap = fun r -> match r with
+  | Rissue -> 1 | Rtiny -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let mau_x : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 2 | "TINY" -> 1 | "MAU" -> 1 | _ -> 0 
+let alu_tiny_x : int array = let resmap = fun r -> match r with
+  | Rissue -> 2 | Rtiny -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let mau_y : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 3 | "TINY" -> 1 | "MAU" -> 1 | _ -> 0 
+let alu_tiny_y : int array = let resmap = fun r -> match r with
+  | Rissue -> 3 | Rtiny -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 let bcu : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 1 | "BCU" -> 1 | _ -> 0 
+  | Rissue -> 1 | Rbcu -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with 
-  | "ISSUE" -> 1 | "TINY" -> 2 | "MAU" -> 1 | "BCU" -> 1 | "NOP" -> 4 | _ -> 0 
+  | Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0
+  in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr : int array = let resmap = fun r -> match r with
+  | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+  in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr_x : int array = let resmap = fun r -> match r with
+  | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+  in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr_y : int array = let resmap = fun r -> match r with
+  | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+  in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw : int array = let resmap = fun r -> match r with
+  | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+  in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw_x : int array = let resmap = fun r -> match r with
+  | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+  in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw_y : int array = let resmap = fun r -> match r with
+  | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let lsu_acc : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 1 | "TINY" -> 1 | "LSU" -> 1 | "ACC" -> 1 | _ -> 0
+let mau : int array = let resmap = fun r -> match r with
+  | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 |  _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let lsu_acc_x : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 2 | "TINY" -> 1 | "LSU" -> 1 | "ACC" -> 1 | _ -> 0
+let mau_x : int array = let resmap = fun r -> match r with
+  | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let lsu_acc_y : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 3 | "TINY" -> 1 | "LSU" -> 1 | "ACC" -> 1 | _ -> 0
+let mau_y : int array = let resmap = fun r -> match r with
+  | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let lsu_data : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 1 | "TINY" -> 1 | "LSU" -> 1 | "DATA" -> 1 | _ -> 0
+let mau_auxr : int array = let resmap = fun r -> match r with
+  | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let lsu_data_x : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 2 | "TINY" -> 1 | "LSU" -> 1 | "DATA" -> 1 | _ -> 0
+let mau_auxr_x : int array = let resmap = fun r -> match r with
+  | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
-let lsu_data_y : int array = let resmap = fun r -> match r with
-  | "ISSUE" -> 3 | "TINY" -> 1 | "LSU" -> 1 | "DATA" -> 1 | _ -> 0
+let mau_auxr_y : int array = let resmap = fun r -> match r with
+  | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
   in Array.of_list (List.map resmap resource_names)
 
 (** Real instructions *)
@@ -599,10 +613,16 @@ let rec_to_usage r =
                           | Some U27L5 | Some U27L10 -> alu_tiny_x 
                           | Some E27U27L10 -> alu_tiny_y 
                           | _ -> raise InvalidEncoding)
-  | Mulw| Maddw | Msbfw -> (match encoding with None -> mau
+  | Maddw -> (match encoding with None -> mau_auxr
+                | Some U6 | Some S10 | Some U27L5 -> mau_auxr_x
+                | _ -> raise InvalidEncoding)
+  | Maddd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr
+                | Some U27L5 | Some U27L10 -> mau_auxr_x
+                | Some E27U27L10 -> mau_auxr_y)
+  | Mulw| Msbfw -> (match encoding with None -> mau
                                 | Some U6 | Some S10 | Some U27L5 -> mau_x
                                 | _ -> raise InvalidEncoding)
-  | Muld | Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau
+  | Muld | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau
                                 | Some U27L5 | Some U27L10 -> mau_x
                                 | Some E27U27L10 -> mau_y)
   | Nop -> alu_nop
@@ -612,13 +632,13 @@ let rec_to_usage r =
   | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)
   | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau
   | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 
-      (match encoding with None | Some U6 | Some S10 -> lsu_data 
-                          | Some U27L5 | Some U27L10 -> lsu_data_x 
-                          | Some E27U27L10 -> lsu_data_y)
+      (match encoding with None | Some U6 | Some S10 -> lsu_auxw
+                          | Some U27L5 | Some U27L10 -> lsu_auxw_x
+                          | Some E27U27L10 -> lsu_auxw_y)
   | Sb | Sh | Sw | Sd | Sq | So ->
-      (match encoding with None | Some U6 | Some S10 -> lsu_acc 
-                          | Some U27L5 | Some U27L10 -> lsu_acc_x 
-                          | Some E27U27L10 -> lsu_acc_y)
+      (match encoding with None | Some U6 | Some S10 -> lsu_auxr
+                          | Some U27L5 | Some U27L10 -> lsu_auxr_x
+                          | Some E27U27L10 -> lsu_auxr_y)
   | Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu
   | Get -> bcu_tiny_tiny_mau_xnop
   | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index 21af276b..8cc7f0ab 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -14,7 +14,7 @@ Require Import Coqlib Errors.
 Require Import Integers Floats AST Linking.
 Require Import Values Memory Events Globalenvs Smallstep.
 Require Import Op Locations Machblock Conventions Asmblock.
-Require Import Asmblockgenproof0.
+Require Import Asmblockgenproof0 Asmblockprops.
 Require Import PostpassScheduling.
 Require Import Asmblockgenproof.
 Require Import Axioms.
@@ -30,62 +30,6 @@ Proof.
   intros. eapply match_transform_partial_program; eauto.
 Qed.
 
-Remark builtin_body_nil:
-  forall bb ef args res, exit bb = Some (PExpand (Pbuiltin ef args res)) -> body bb = nil.
-Proof.
-  intros. destruct bb as [hd bdy ex WF]. simpl in *.
-  apply wf_bblock_refl in WF. inv WF. unfold builtin_alone in H1.
-  eapply H1; eauto.
-Qed.
-
-Lemma exec_body_app:
-  forall l l' ge rs m rs'' m'',
-  exec_body ge (l ++ l') rs m = Next rs'' m'' ->
-  exists rs' m',
-       exec_body ge l rs m = Next rs' m'
-    /\ exec_body ge l' rs' m' = Next rs'' m''.
-Proof.
-  induction l.
-  - intros. simpl in H. repeat eexists. auto.
-  - intros. rewrite <- app_comm_cons in H. simpl in H.
-    destruct (exec_basic_instr ge a rs m) eqn:EXEBI.
-    + apply IHl in H. destruct H as (rs1 & m1 & EXEB1 & EXEB2).
-      repeat eexists. simpl. rewrite EXEBI. eauto. auto.
-    + discriminate.
-Qed.
-
-Lemma exec_body_pc:
-  forall l ge rs1 m1 rs2 m2,
-  exec_body ge l rs1 m1 = Next rs2 m2 ->
-  rs2 PC = rs1 PC.
-Proof.
-  induction l.
-  - intros. inv H. auto.
-  - intros until m2. intro EXEB.
-    inv EXEB. destruct (exec_basic_instr _ _ _) eqn:EBI; try discriminate.
-    eapply IHl in H0. rewrite H0.
-    erewrite exec_basic_instr_pc; eauto.
-Qed.
-
-Lemma next_eq:
-  forall (rs rs': regset) m m',
-  rs = rs' -> m = m' -> Next rs m = Next rs' m'.
-Proof.
-  intros; apply f_equal2; auto.
-Qed.
-
-Lemma regset_double_set:
-  forall r1 r2 (rs: regset) v1 v2,
-  r1 <> r2 ->
-  (rs # r1 <- v1 # r2 <- v2) = (rs # r2 <- v2 # r1 <- v1).
-Proof.
-  intros. apply functional_extensionality. intros r. destruct (preg_eq r r1).
-  - subst. rewrite Pregmap.gso; auto. repeat (rewrite Pregmap.gss). auto.
-  - destruct (preg_eq r r2).
-    + subst. rewrite Pregmap.gss. rewrite Pregmap.gso; auto. rewrite Pregmap.gss. auto.
-    + repeat (rewrite Pregmap.gso; auto).
-Qed.
-
 Lemma regset_double_set_id:
   forall r (rs: regset) v1 v2,
   (rs # r <- v1 # r <- v2) = (rs # r <- v2).
@@ -95,191 +39,6 @@ Proof.
   - repeat (rewrite Pregmap.gso); auto.
 Qed.
 
-Lemma exec_load_offset_pc_var:
-  forall t rs m rd ra ofs rs' m' v,
-  exec_load_offset t rs m rd ra ofs = Next rs' m' ->
-  exec_load_offset t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_load_offset in *. unfold parexec_load_offset in *. rewrite Pregmap.gso; try discriminate. destruct (eval_offset ofs); try discriminate.
-  destruct (Mem.loadv _ _ _).
-  - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
-  - discriminate.
-Qed.
-
-Lemma exec_load_reg_pc_var:
-  forall t rs m rd ra ro rs' m' v,
-  exec_load_reg t rs m rd ra ro = Next rs' m' ->
-  exec_load_reg t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_load_reg in *. unfold parexec_load_reg in *. rewrite Pregmap.gso; try discriminate.
-  destruct (Mem.loadv _ _ _).
-  - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
-  - discriminate.
-Qed.
-
-Lemma exec_load_regxs_pc_var:
-  forall t rs m rd ra ro rs' m' v,
-  exec_load_regxs t rs m rd ra ro = Next rs' m' ->
-  exec_load_regxs t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_load_regxs in *. unfold parexec_load_regxs in *. rewrite Pregmap.gso; try discriminate.
-  destruct (Mem.loadv _ _ _).
-  - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
-  - discriminate.
-Qed.
-
-Lemma exec_load_offset_q_pc_var:
-  forall rs m rd ra ofs rs' m' v,
-  exec_load_q_offset rs m rd ra ofs = Next rs' m' ->
-  exec_load_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_load_q_offset in *. unfold parexec_load_q_offset in *.
-  destruct (gpreg_q_expand rd) as [rd0 rd1].
-  (* destruct (ireg_eq rd0 ra); try discriminate. *)
-  rewrite Pregmap.gso; try discriminate.
-  destruct (Mem.loadv _ _ _); try discriminate.
-  inv H.
-  destruct (Mem.loadv _ _ _); try discriminate.
-  inv H1. f_equal.
-  rewrite (regset_double_set PC rd0) by discriminate.
-  rewrite (regset_double_set PC rd1) by discriminate.
-  reflexivity.
-Qed.
-
-Lemma exec_load_offset_o_pc_var:
-  forall rs m rd ra ofs rs' m' v,
-  exec_load_o_offset rs m rd ra ofs = Next rs' m' ->
-  exec_load_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_load_o_offset in *. unfold parexec_load_o_offset in *.
-  destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3].
-(*
-  destruct (ireg_eq rd0 ra); try discriminate.
-  destruct (ireg_eq rd1 ra); try discriminate.
-  destruct (ireg_eq rd2 ra); try discriminate.
-*)
-  rewrite Pregmap.gso; try discriminate.
-  simpl in *.
-  destruct (Mem.loadv _ _ _); try discriminate.
-  destruct (Mem.loadv _ _ _); try discriminate.
-  destruct (Mem.loadv _ _ _); try discriminate.
-  destruct (Mem.loadv _ _ _); try discriminate.
-  rewrite (regset_double_set PC rd0) by discriminate.
-  rewrite (regset_double_set PC rd1) by discriminate.
-  rewrite (regset_double_set PC rd2) by discriminate.
-  rewrite (regset_double_set PC rd3) by discriminate.
-  inv H.
-  trivial.
-Qed.
-
-Lemma exec_store_offset_pc_var:
-  forall t rs m rd ra ofs rs' m' v,
-  exec_store_offset t rs m rd ra ofs = Next rs' m' ->
-  exec_store_offset t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_store_offset in *. unfold parexec_store_offset in *. rewrite Pregmap.gso; try discriminate.
-  destruct (eval_offset ofs); try discriminate.
-  destruct (Mem.storev _ _ _).
-  - inv H. apply next_eq; auto.
-  - discriminate.
-Qed.
-
-Lemma exec_store_q_offset_pc_var:
-  forall rs m rd ra ofs rs' m' v,
-  exec_store_q_offset rs m rd ra ofs = Next rs' m' ->
-  exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate.
-  simpl in *.
-  destruct (gpreg_q_expand _) as [s0 s1].
-  destruct (Mem.storev _ _ _); try discriminate.
-  destruct (Mem.storev _ _ _); try discriminate.
-  inv H. apply next_eq; auto.
-Qed.
-
-Lemma exec_store_o_offset_pc_var:
-  forall rs m rd ra ofs rs' m' v,
-  exec_store_o_offset rs m rd ra ofs = Next rs' m' ->
-  exec_store_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
-Proof.
-  intros.
-  unfold exec_store_o_offset in *. unfold parexec_store_o_offset in *.
-  destruct (gpreg_o_expand _) as [[[s0 s1] s2] s3].
-  destruct (Mem.storev _ _ _); try discriminate.
-  destruct (Mem.storev _ _ _); try discriminate.
-  destruct (Mem.storev _ _ _); try discriminate.
-  destruct (Mem.storev _ _ _); try discriminate.
-  inv H.
-  trivial.
-Qed.
-  
-Lemma exec_store_reg_pc_var:
-  forall t rs m rd ra ro rs' m' v,
-  exec_store_reg t rs m rd ra ro = Next rs' m' ->
-  exec_store_reg t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_store_reg in *. unfold parexec_store_reg in *. rewrite Pregmap.gso; try discriminate.
-  destruct (Mem.storev _ _ _).
-  - inv H. apply next_eq; auto.
-  - discriminate.
-Qed.
-
-Lemma exec_store_regxs_pc_var:
-  forall t rs m rd ra ro rs' m' v,
-  exec_store_regxs t rs m rd ra ro = Next rs' m' ->
-  exec_store_regxs t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
-Proof.
-  intros. unfold exec_store_regxs in *. unfold parexec_store_regxs in *. rewrite Pregmap.gso; try discriminate.
-  destruct (Mem.storev _ _ _).
-  - inv H. apply next_eq; auto.
-  - discriminate.
-Qed.
-
-Lemma exec_basic_instr_pc_var:
-  forall ge i rs m rs' m' v,
-  exec_basic_instr ge i rs m = Next rs' m' ->
-  exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'.
-Proof.
-  intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i.
-  - unfold exec_arith_instr in *. destruct i; destruct i.
-      all: try (exploreInst; inv H; apply next_eq; auto;
-      apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate).
-(* 
-      (* Some cases treated seperately because exploreInst destructs too much *)
-      all: try (inv H; apply next_eq; auto; apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate). *)
-  - destruct i.
-    + exploreInst; apply exec_load_offset_pc_var; auto.
-    + exploreInst; apply exec_load_reg_pc_var; auto.
-    + exploreInst; apply exec_load_regxs_pc_var; auto.
-    + apply exec_load_offset_q_pc_var; auto.
-    + apply exec_load_offset_o_pc_var; auto.
-  - destruct i.
-    + exploreInst; apply exec_store_offset_pc_var; auto.
-    + exploreInst; apply exec_store_reg_pc_var; auto.
-    + exploreInst; apply exec_store_regxs_pc_var; auto.
-    + apply exec_store_q_offset_pc_var; auto.
-    + apply exec_store_o_offset_pc_var; auto.
-  - destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate).
-    destruct (Mem.storev _ _ _ _); try discriminate.
-    inv H. apply next_eq; auto. apply functional_extensionality. intros.
-    rewrite (regset_double_set GPR32 PC); try discriminate.
-    rewrite (regset_double_set GPR12 PC); try discriminate.
-    rewrite (regset_double_set FP PC); try discriminate. reflexivity.
-  - repeat (rewrite Pregmap.gso; try discriminate).
-    destruct (Mem.loadv _ _ _); try discriminate.
-    destruct (rs GPR12); try discriminate.
-    destruct (Mem.free _ _ _ _); try discriminate.
-    inv H. apply next_eq; auto.
-    rewrite (regset_double_set GPR32 PC).
-    rewrite (regset_double_set GPR12 PC). reflexivity.
-    all: discriminate.
-  - destruct rs0; try discriminate. inv H. apply next_eq; auto.
-    repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
-  - destruct rd; try discriminate. inv H. apply next_eq; auto.
-    repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
-  - inv H. apply next_eq; auto.
-Qed.
-
 Lemma exec_body_pc_var:
   forall l ge rs m rs' m' v,
   exec_body ge l rs m = Next rs' m' ->
@@ -302,9 +61,9 @@ Proof.
   - subst. repeat (rewrite Pregmap.gss); auto.
     destruct v; simpl; auto.
     rewrite Ptrofs.add_assoc.
-    cutrewrite (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)); auto.
+    enough (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)) as ->; auto.
     unfold Ptrofs.add.
-    cutrewrite (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)); auto.
+    enough (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)) as ->; auto.
     repeat (rewrite Ptrofs.unsigned_repr); auto.
   - repeat (rewrite Pregmap.gso; auto).
 Qed.
@@ -461,7 +220,8 @@ Proof.
     destruct (zeq pos 0).
     + inv H. exists lbb. constructor; auto.
     + apply IHlbb in H. destruct H as (c & TAIL). exists c.
-      cutrewrite (pos = pos - size a + size a). apply code_tail_S; auto.
+      enough (pos = pos - size a + size a) as ->.
+      apply code_tail_S; auto.
       omega.
 Qed.
 
@@ -776,12 +536,8 @@ Qed.
 
 End PRESERVATION_ASMBLOCK.
 
-
-
-
 Require Import Asmvliw.
 
-
 Lemma verified_par_checks_alls_bundles lb x: forall bundle,
   verify_par lb = OK x ->
   List.In bundle lb -> verify_par_bblock bundle = OK tt.
@@ -792,7 +548,6 @@ Proof.
   destruct x0; auto.
 Qed.
 
-
 Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
   verified_schedule_nob bb = OK lb ->
   List.In bundle lb -> verify_par_bblock bundle = OK tt.
@@ -813,7 +568,7 @@ Proof.
   unfold builtin_alone in H0. erewrite H0; eauto.
 Qed.
 
-Local Hint Resolve verified_schedule_nob_checks_alls_bundles.
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
 
 Lemma verified_schedule_checks_alls_bundles bb lb bundle:
   verified_schedule bb = OK lb ->
@@ -914,9 +669,6 @@ Qed.
 
 End PRESERVATION_ASMVLIW.
 
-
-
-
 Section PRESERVATION.
 
 Variables prog tprog: program.
diff --git a/mppa_k1c/SelectOpproof.v b/mppa_k1c/SelectOpproof.v
index 583fb545..d3eb1dde 100644
--- a/mppa_k1c/SelectOpproof.v
+++ b/mppa_k1c/SelectOpproof.v
@@ -1204,7 +1204,8 @@ Qed.
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
   red; intros until x. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto.
+
+  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate.
 Qed.
 
 Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
@@ -1217,7 +1218,7 @@ Qed.
 Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
 Proof.
   red; intros until x. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate.
 Qed.
 
 Theorem eval_intoffloat:
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 0c179a07..930b1c51 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -251,6 +251,10 @@ module Target (*: TARGET*) =
     | ARegXS _ -> fprintf oc ".xs"
     | _ -> ()
 
+    let lsvariant oc = function
+      | TRAP -> ()
+      | NOTRAP -> output_string oc ".s"
+      
     let icond_name = let open Asmvliw in function
       | ITne | ITneu -> "ne"
       | ITeq | ITequ -> "eq"
@@ -420,18 +424,18 @@ module Target (*: TARGET*) =
          section oc Section_text
 
       (* Load/Store instructions *)
-      | Plb(rd, ra, adr) ->
-         fprintf oc "	lbs%a	%a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra
-      | Plbu(rd, ra, adr) ->
-         fprintf oc "	lbz%a	%a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra
-      | Plh(rd, ra, adr) ->
-         fprintf oc "	lhs%a	%a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra
-      | Plhu(rd, ra, adr) ->
-         fprintf oc "	lhz%a	%a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra
-      | Plw(rd, ra, adr) | Plw_a(rd, ra, adr) | Pfls(rd, ra, adr) ->
-         fprintf oc "	lws%a	%a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra
-      | Pld(rd, ra, adr) | Pfld(rd, ra, adr) | Pld_a(rd, ra, adr) -> assert Archi.ptr64;
-         fprintf oc "	ld%a	%a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra
+      | Plb(trap, rd, ra, adr) ->
+         fprintf oc "	lbs%a%a	%a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+      | Plbu(trap, rd, ra, adr) ->
+         fprintf oc "	lbz%a%a	%a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+      | Plh(trap, rd, ra, adr) ->
+         fprintf oc "	lhs%a%a	%a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+      | Plhu(trap, rd, ra, adr) ->
+         fprintf oc "	lhz%a%a	%a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+      | Plw(trap, rd, ra, adr) | Plw_a(trap, rd, ra, adr) | Pfls(trap, rd, ra, adr) ->
+         fprintf oc "	lws%a%a	%a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+      | Pld(trap, rd, ra, adr) | Pfld(trap, rd, ra, adr) | Pld_a(trap, rd, ra, adr) -> assert Archi.ptr64;
+         fprintf oc "	ld%a%a	%a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
       | Plq(rd, ra, adr) ->
          fprintf oc "	lq%a	%a = %a[%a]\n" xscale adr gpreg_q rd addressing adr ireg ra
       | Plo(rd, ra, adr) ->
diff --git a/mppa_k1c/ValueAOp.v b/mppa_k1c/ValueAOp.v
index 2c9bdf3e..7d84447e 100644
--- a/mppa_k1c/ValueAOp.v
+++ b/mppa_k1c/ValueAOp.v
@@ -472,6 +472,24 @@ Proof.
   rewrite Ptrofs.add_zero_l; eauto with va.
 Qed.
 
+Theorem eval_static_addressing_sound_none:
+  forall addr vargs aargs,
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = None ->
+  list_forall2 (vmatch bc) vargs aargs ->
+  (eval_static_addressing addr aargs) = Vbot.
+Proof.
+  unfold eval_addressing, eval_static_addressing.
+  intros until aargs. intros Heval_none Hlist.
+  inv Hlist.
+  destruct addr; trivial; discriminate.
+  inv H0.
+  destruct addr; trivial; discriminate.
+  inv H2.
+  destruct addr; trivial; discriminate.
+  inv H3;
+  destruct addr; trivial; discriminate.
+Qed.
+
 Theorem eval_static_operation_sound:
   forall op vargs m vres aargs,
   eval_operation ge (Vptr sp Ptrofs.zero) op vargs m = Some vres ->
diff --git a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
index 5c94d435..cf46072f 100644
--- a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
+++ b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
@@ -403,7 +403,7 @@ Proof.
     * eapply H2; eauto. intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff; auto.
     * intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff, <- in_rev; auto.
 Qed.
-Local Hint Resolve app_fail_allvalid_correct.
+Local Hint Resolve app_fail_allvalid_correct: core.
 
 Lemma app_fail_correct l pt t1 t2: 
   match_pt t1 pt -> 
diff --git a/mppa_k1c/abstractbb/ImpSimuTest.v b/mppa_k1c/abstractbb/ImpSimuTest.v
index ea55b735..7a77ec15 100644
--- a/mppa_k1c/abstractbb/ImpSimuTest.v
+++ b/mppa_k1c/abstractbb/ImpSimuTest.v
@@ -304,12 +304,12 @@ Proof.
       rewrite <- EQT; eauto.
     + exploit smem_valid_set_decompose_1; eauto.
   - clear DM0. unfold hsmem_post_eval, hsmem_post_eval in * |- *; simpl.
-    Local Hint Resolve smem_valid_set_decompose_1.
+    Local Hint Resolve smem_valid_set_decompose_1: core.
     intros; case (R.eq_dec x x0).
     + intros; subst; rewrite !Dict.set_spec_eq; simpl; eauto.
     + intros; rewrite !Dict.set_spec_diff; simpl; eauto.
 Qed.
-Local Hint Resolve naive_set_correct.
+Local Hint Resolve naive_set_correct: core.
 
 Definition equiv_hsmem ge (hd1 hd2: hsmem) := 
       (forall m, allvalid ge hd1.(hpre) m <-> allvalid ge hd2.(hpre) m)
@@ -523,7 +523,7 @@ Lemma hinst_smem_correct i: forall hd hod,
   WHEN hinst_smem i hd hod ~> hd' THEN
     forall ge od d, smem_model ge od hod -> smem_model ge d hd -> (forall m, smem_valid ge d m -> smem_valid ge od m) -> smem_model ge (inst_smem i d od) hd'.
 Proof.
-  Local Hint Resolve smem_valid_set_proof.
+  Local Hint Resolve smem_valid_set_proof: core.
   induction i; simpl; wlp_simplify; eauto 15 with wlp.
 Qed.
 Global Opaque hinst_smem.
@@ -563,7 +563,7 @@ Definition bblock_hsmem: bblock -> ?? hsmem
 Lemma bblock_hsmem_correct p:
   WHEN bblock_hsmem p ~> hd THEN forall ge, smem_model ge (bblock_smem p) hd.
 Proof.
-  Local Hint Resolve hsmem_empty_correct.
+  Local Hint Resolve hsmem_empty_correct: core.
   wlp_simplify.
 Qed.
 Global Opaque bblock_hsmem.
@@ -775,7 +775,7 @@ Proof.
   intro H; erewrite <- list_term_eval_set_hid; rewrite H. apply list_term_eval_set_hid.
 Qed.
 
-Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv.
+Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv: core.
 
 Program Definition bblock_simu_test (p1 p2: bblock): ?? bool :=
   DO log <~ count_logger ();;
@@ -802,7 +802,7 @@ Obligation 2.
   wlp_simplify.
 Qed.
 
-Local Hint Resolve g_bblock_simu_test_correct.
+Local Hint Resolve g_bblock_simu_test_correct: core.
 
 Theorem bblock_simu_test_correct p1 p2:
   WHEN bblock_simu_test p1 p2 ~> b THEN b=true -> forall ge, bblock_simu ge p1 p2.
@@ -1123,7 +1123,7 @@ Definition get {A} (d:t A) (x:R.t): option A
 Definition set {A} (d:t A) (x:R.t) (v:A): t A
  := PositiveMap.add x v d.
 
-Local Hint Unfold PositiveMap.E.eq.
+Local Hint Unfold PositiveMap.E.eq: core.
 
 Lemma set_spec_eq A d x (v: A):
   get (set d x v) x = Some v.
diff --git a/mppa_k1c/abstractbb/Impure/ImpConfig.v b/mppa_k1c/abstractbb/Impure/ImpConfig.v
index 1bd93d4c..e49a4611 100644
--- a/mppa_k1c/abstractbb/Impure/ImpConfig.v
+++ b/mppa_k1c/abstractbb/Impure/ImpConfig.v
@@ -22,9 +22,9 @@ Module Type ImpureView.
 (* START COMMENT *)
  Module UnsafeImpure.
 
- Parameter unsafe_coerce: forall {A}, t A -> A.
+ Parameter unsafe_coerce: forall {A}, t A -> option A.
 
- Parameter unsafe_coerce_not_really_correct: forall A (k: t A) (x:A), (unsafe_coerce k)=x -> mayRet k x. 
+ Parameter unsafe_coerce_not_really_correct: forall A (k: t A) (x:A), (unsafe_coerce k)=Some x -> mayRet k x.
 
  Extraction Inline unsafe_coerce.
 
@@ -41,11 +41,11 @@ Module Impure: ImpureView.
 
   Module UnsafeImpure.
 
-  Definition unsafe_coerce {A} (x:t A) := x.
+  Definition unsafe_coerce {A} (x:t A) := Some x.
 
-  Lemma unsafe_coerce_not_really_correct: forall A (k: t A) x, (unsafe_coerce k)=x -> mayRet k x.
+  Lemma unsafe_coerce_not_really_correct: forall A (k: t A) x, (unsafe_coerce k)=Some x -> mayRet k x.
   Proof.
-    unfold unsafe_coerce, mayRet; auto.
+    unfold unsafe_coerce, mayRet; congruence.
   Qed.
 
   End UnsafeImpure.
diff --git a/mppa_k1c/abstractbb/Impure/ImpHCons.v b/mppa_k1c/abstractbb/Impure/ImpHCons.v
index d8002375..637116cc 100644
--- a/mppa_k1c/abstractbb/Impure/ImpHCons.v
+++ b/mppa_k1c/abstractbb/Impure/ImpHCons.v
@@ -95,7 +95,7 @@ Proof.
   wlp_simplify.
 Qed.
 Global Opaque assert_list_incl.
-Hint Resolve assert_list_incl_correct.
+Hint Resolve assert_list_incl_correct: wlp.
 
 End Sets.
 
@@ -165,7 +165,7 @@ Lemma hConsV_correct A (hasheq: A -> A -> ?? bool):
     (forall x y, WHEN hasheq x y ~> b THEN b=true -> x=y) -> 
     forall x, WHEN hco.(hC) x ~> x' THEN x.(hdata).(data)=x'.(data).
 Proof.
-  Local Hint Resolve f_equal2.
+  Local Hint Resolve f_equal2: core.
   wlp_simplify.
   exploit H; eauto.
   + wlp_simplify.
diff --git a/mppa_k1c/abstractbb/Parallelizability.v b/mppa_k1c/abstractbb/Parallelizability.v
index 22809095..30904b5d 100644
--- a/mppa_k1c/abstractbb/Parallelizability.v
+++ b/mppa_k1c/abstractbb/Parallelizability.v
@@ -332,7 +332,7 @@ Fixpoint bblock_wframe(p:bblock): list R.t :=
   | i::p' => (inst_wframe i)++(bblock_wframe p') 
   end.
 
-Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm.
+Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm: core.
 
 Lemma bblock_wframe_Permutation p p': 
  Permutation p p' -> Permutation (bblock_wframe p)  (bblock_wframe p').
@@ -620,7 +620,7 @@ Include ParallelizablityChecking L.
 Section PARALLEL2.
 Variable ge: genv.
 
-Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame.
+Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame: core.
 
 (** Now, refinement of each operation toward parallelizable *)
 
@@ -659,14 +659,14 @@ Fixpoint inst_sframe (i: inst): S.t :=
   | a::i' => S.add (fst a) (S.union (exp_sframe (snd a)) (inst_sframe i'))
   end.
 
-Local Hint Resolve exp_sframe_correct.
+Local Hint Resolve exp_sframe_correct: core.
 
 Lemma inst_sframe_correct i: S.match_frame (inst_sframe i) (inst_frame i).
 Proof.
   induction i as [|[y e] i']; simpl; auto.
 Qed.
 
-Local Hint Resolve inst_wsframe_correct inst_sframe_correct.
+Local Hint Resolve inst_wsframe_correct inst_sframe_correct: core.
 
 Fixpoint is_pararec (p: bblock) (wsframe: S.t): bool :=
   match p with
diff --git a/mppa_k1c/abstractbb/SeqSimuTheory.v b/mppa_k1c/abstractbb/SeqSimuTheory.v
index 649dd083..e234883f 100644
--- a/mppa_k1c/abstractbb/SeqSimuTheory.v
+++ b/mppa_k1c/abstractbb/SeqSimuTheory.v
@@ -102,9 +102,6 @@ Fixpoint bblock_smem_rec (p: bblock) (d: smem): smem :=
      let d':=inst_smem i d d in
      bblock_smem_rec p' d'
   end.
-(*
-Local Hint Resolve smem_eval_empty.
-*)
 
 Definition bblock_smem: bblock -> smem
  := fun p => bblock_smem_rec p smem_empty.
@@ -124,7 +121,7 @@ Proof.
   intros d a H; eapply inst_smem_pre_monotonic; eauto.
 Qed.
 
-Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic.
+Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic: core.
 
 Lemma term_eval_exp e (od:smem) m0 old:
   (forall x, term_eval ge (od x) m0 = Some (old x)) ->
@@ -185,7 +182,7 @@ Lemma bblocks_smem_rec_Some_correct1 p m0: forall (m1 m2: mem) (d: smem),
   (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
   forall x, term_eval ge (bblock_smem_rec p d x) m0 = Some (m2 x).
 Proof.
-  Local Hint Resolve inst_smem_Some_correct1.
+  Local Hint Resolve inst_smem_Some_correct1: core.
   induction p as [ | i p]; simpl; intros m1 m2 d H.
   - inversion_clear H; eauto.
   - intros H0 x0.
@@ -299,7 +296,7 @@ Lemma block_smem_rec_valid p m0: forall (m1 m2: mem) (d:smem),
   (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
   pre (bblock_smem_rec p d) ge m0.
 Proof.
-  Local Hint Resolve inst_valid.
+  Local Hint Resolve inst_valid: core.
   induction p as [ | i p]; simpl; intros m1 d H; auto.
   intros H0 H1.
   destruct (inst_run ge i m1 m1) eqn: Heqov; eauto.
@@ -326,7 +323,7 @@ Theorem bblock_smem_simu p1 p2:
    smem_simu (bblock_smem p1) (bblock_smem p2) ->
    bblock_simu ge p1 p2.
 Proof.
-  Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1.
+  Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1: core.
   intros (INCL & EQUIV) m DONTFAIL; unfold smem_valid in * |-.
   destruct (run ge p1 m) as [m1|] eqn: RUN1; simpl; try congruence.
   assert (X: forall x, term_eval ge (bblock_smem p1 x) m = Some (m1 x)); eauto.
diff --git a/mppa_k1c/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v
index decc3e2e..58455ada 100644
--- a/mppa_k1c/Asmblockgenproof0.v
+++ b/mppa_k1c/lib/Asmblockgenproof0.v
@@ -22,16 +22,10 @@ Require Import Asmblockgen.
 Require Import Conventions1.
 Require Import Axioms.
 Require Import Machblockgenproof. (* FIXME: only use to import [is_tail_app] and [is_tail_app_inv] *)
+Require Import Asmblockprops.
 
 Module MB:=Machblock.
-Module AB:=Asmvliw.
-
-Hint Extern 2 (_ <> _) => congruence: asmgen.
-
-Definition bblock_simu (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
-  forall rs m,
-    exec_bblock ge f bb rs m <> Stuck ->
-    exec_bblock ge f bb rs m = exec_bblock ge f bb' rs m.
+Module AB:=Asmblock.
 
 Lemma ireg_of_eq:
   forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
@@ -51,53 +45,6 @@ Proof.
   destruct r1; destruct r2; simpl; intros; reflexivity || discriminate.
 Qed.
 
-Lemma preg_of_data:
-  forall r, data_preg (preg_of r) = true.
-Proof.
-  intros. destruct r; reflexivity.
-Qed.
-Hint Resolve preg_of_data: asmgen.
-
-Lemma data_diff:
-  forall r r',
-  data_preg r = true -> data_preg r' = false -> r <> r'.
-Proof.
-  congruence.
-Qed.
-Hint Resolve data_diff: asmgen.
-
-Lemma preg_of_not_SP:
-  forall r, preg_of r <> SP.
-Proof.
-  intros. unfold preg_of; destruct r; simpl; congruence.
-Qed.
-
-Lemma preg_of_not_PC:
-  forall r, preg_of r <> PC.
-Proof.
-  intros. apply data_diff; auto with asmgen.
-Qed.
-
-Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
-
-Lemma nextblock_pc:
-  forall b rs, (nextblock b rs)#PC = Val.offset_ptr rs#PC (Ptrofs.repr (size b)).
-Proof.
-  intros. apply Pregmap.gss.
-Qed.
-
-Lemma nextblock_inv:
-  forall b r rs, r <> PC -> (nextblock b rs)#r = rs#r.
-Proof.
-  intros. unfold nextblock. apply Pregmap.gso. red; intro; subst. auto.
-Qed.
-
-Lemma nextblock_inv1:
-  forall b r rs, data_preg r = true -> (nextblock b rs)#r = rs#r.
-Proof.
-  intros. apply nextblock_inv. red; intro; subst; discriminate.
-Qed.
-
 Lemma undef_regs_other:
   forall r rl rs,
   (forall r', In r' rl -> r <> r') ->
@@ -294,9 +241,9 @@ Qed.
 Lemma agree_undef_caller_save_regs:
   forall ms sp rs,
   agree ms sp rs ->
-  agree (Mach.undef_caller_save_regs ms) sp (Asmvliw.undef_caller_save_regs rs).
+  agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs).
 Proof.
-  intros. destruct H. unfold Mach.undef_caller_save_regs, Asmvliw.undef_caller_save_regs; split.
+  intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split.
 - unfold proj_sumbool; rewrite dec_eq_true. auto.
 - auto.
 - intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP). 
@@ -467,7 +414,7 @@ Proof.
 Qed.
 
 
-Local Hint Resolve code_tail_0 code_tail_S.
+Local Hint Resolve code_tail_0 code_tail_S: core.
 
 Lemma code_tail_next:
   forall fn ofs c0,
@@ -511,7 +458,7 @@ Proof.
   omega.
 Qed.
 
-Local Hint Resolve code_tail_next.
+Local Hint Resolve code_tail_next: core.
 
 Lemma code_tail_next_int:
   forall fn ofs bi c,
@@ -752,6 +699,19 @@ Proof.
   intros. destruct H. auto.
 Qed.
 
+Lemma exec_body_pc:
+  forall ge l rs1 m1 rs2 m2,
+  exec_body ge l rs1 m1 = Next rs2 m2 ->
+  rs2 PC = rs1 PC.
+Proof.
+  induction l.
+  - intros. inv H. auto.
+  - intros until m2. intro EXEB.
+    inv EXEB. destruct (exec_basic_instr _ _ _ _) eqn:EBI; try discriminate.
+    eapply IHl in H0. rewrite H0.
+    erewrite exec_basic_instr_pc; eauto.
+Qed.
+
 Section STRAIGHTLINE.
 
 Variable ge: genv.
@@ -880,67 +840,6 @@ Qed.
 
 (** Linking exec_straight with exec_straight_blocks *)
 
-Ltac Simplif :=
-  ((rewrite nextblock_inv by eauto with asmgen)
-  || (rewrite nextblock_inv1 by eauto with asmgen)
-  || (rewrite Pregmap.gss)
-  || (rewrite nextblock_pc)
-  || (rewrite Pregmap.gso by eauto with asmgen)
-  ); auto with asmgen.
-
-Ltac Simpl := repeat Simplif.
-
-Lemma exec_basic_instr_pc:
-  forall b rs1 m1 rs2 m2,
-  exec_basic_instr ge b rs1 m1 = Next rs2 m2 ->
-  rs2 PC = rs1 PC.
-Proof.
-  intros. destruct b; try destruct i; try destruct i.
-  all: try (inv H; Simpl).
-  1-10: try (unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); [inv H1; Simpl | discriminate]).
-  1-10: try (unfold parexec_load_reg in H1; destruct (Mem.loadv _ _ _); [inv H1; Simpl | discriminate]).
-  1-10: try (unfold parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); [inv H1; Simpl | discriminate]).
-  1-10: try (unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]).
-  1-10: try (unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto.
-  1-10: try (unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto.
-  - (* PLoadQRRO *)
-    unfold  parexec_load_q_offset in H1.
-    destruct (gpreg_q_expand _) as [r0 r1] in H1.
-    destruct (Mem.loadv _ _ _) in H1; try discriminate.
-    destruct (Mem.loadv _ _ _) in H1; try discriminate.
-    inv H1. Simpl.
-  - (* PLoadORRO *)
-    unfold  parexec_load_o_offset in H1.
-    destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
-    destruct (Mem.loadv _ _ _) in H1; try discriminate.
-    destruct (Mem.loadv _ _ _) in H1; try discriminate.
-    destruct (Mem.loadv _ _ _) in H1; try discriminate.
-    destruct (Mem.loadv _ _ _) in H1; try discriminate.
-    inv H1. Simpl.
-  - (* PStoreQRRO *)
-    unfold  parexec_store_q_offset in H1.
-    destruct (gpreg_q_expand _) as [r0 r1] in H1.
-    unfold eval_offset in H1; try discriminate.
-    destruct (Mem.storev _ _ _) in H1; try discriminate.
-    destruct (Mem.storev _ _ _) in H1; try discriminate.
-    inv H1. Simpl. reflexivity.
-  - (* PStoreORRO *)
-    unfold  parexec_store_o_offset in H1.
-    destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
-    unfold eval_offset in H1; try discriminate.
-    destruct (Mem.storev _ _ _) in H1; try discriminate.
-    destruct (Mem.storev _ _ _) in H1; try discriminate.
-    destruct (Mem.storev _ _ _) in H1; try discriminate.
-    destruct (Mem.storev _ _ _) in H1; try discriminate.
-    inv H1. Simpl. reflexivity.
-  - destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate.
-  - destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate.
-    destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate.
-  - destruct rs; try discriminate. inv H1. Simpl.
-  - destruct rd; try discriminate. inv H1; Simpl.
-  - reflexivity.
-Qed.
-
 Lemma exec_straight_pc:
   forall c c' rs1 m1 rs2 m2,
   exec_straight c rs1 m1 c' rs2 m2 ->
diff --git a/mppa_k1c/lib/ForwardSimulationBlock.v b/mppa_k1c/lib/ForwardSimulationBlock.v
index 39dd2234..224eda0a 100644
--- a/mppa_k1c/lib/ForwardSimulationBlock.v
+++ b/mppa_k1c/lib/ForwardSimulationBlock.v
@@ -21,7 +21,7 @@ Section starN_lemma.
 
 Variable L: semantics.
 
-Local Hint Resolve starN_refl starN_step Eapp_assoc.
+Local Hint Resolve starN_refl starN_step Eapp_assoc: core.
 
 Lemma starN_split n s t s':
   starN (step L) (globalenv L) n s t s' ->
@@ -93,7 +93,7 @@ Hypothesis simu_end_block:
 
 (** Introduction d'une sémantique par bloc sur L1 appelée "memoL1" *)
 
-Local Hint Resolve starN_refl starN_step.
+Local Hint Resolve starN_refl starN_step: core.
 
 Definition follows_in_block (head current: state L1): Prop :=
   dist_end_block head >= dist_end_block current 
@@ -164,7 +164,7 @@ Inductive is_well_memorized (s s': memostate): Prop :=
     memorized s' = None ->
     is_well_memorized s s'.
 
-Local Hint Resolve StartBloc MidBloc ExitBloc.
+Local Hint Resolve StartBloc MidBloc ExitBloc: core.
 
 Definition memoL1 := {| 
   state := memostate;
diff --git a/mppa_k1c/lib/Machblock.v b/mppa_k1c/lib/Machblock.v
index 2759c49d..5a7f1782 100644
--- a/mppa_k1c/lib/Machblock.v
+++ b/mppa_k1c/lib/Machblock.v
@@ -20,7 +20,7 @@ Inductive basic_inst: Type :=
   | MBsetstack: mreg -> ptrofs -> typ -> basic_inst
   | MBgetparam: ptrofs -> typ -> mreg -> basic_inst
   | MBop: operation -> list mreg -> mreg -> basic_inst
-  | MBload: memory_chunk -> addressing -> list mreg -> mreg -> basic_inst
+  | MBload: trapping_mode -> memory_chunk -> addressing -> list mreg -> mreg -> basic_inst
   | MBstore: memory_chunk -> addressing -> list mreg -> mreg -> basic_inst
   .
 
@@ -207,11 +207,22 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m:
       rs' = ((undef_regs (destroyed_by_op op) rs)#res <- v) ->
       basic_step s fb sp rs m (MBop op args res) rs' m
   | exec_MBload:
-      forall addr args a v rs' chunk dst,
+      forall addr args a v rs' trap chunk dst,
       eval_addressing ge sp addr rs##args = Some a ->
       Mem.loadv chunk m a = Some v ->
       rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- v) ->
-      basic_step s fb sp rs m (MBload chunk addr args dst) rs' m
+      basic_step s fb sp rs m (MBload trap chunk addr args dst) rs' m
+  | exec_MBload_notrap1:
+      forall addr args rs' chunk dst,
+      eval_addressing ge sp addr rs##args = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+      basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m
+  | exec_MBload_notrap2:
+      forall addr args a rs' chunk dst,
+      eval_addressing ge sp addr rs##args = Some a ->
+      Mem.loadv chunk m a = None ->
+      rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+      basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m
   | exec_MBstore:
       forall chunk addr args src m' a rs',
       eval_addressing ge sp addr rs##args = Some a ->
diff --git a/mppa_k1c/lib/Machblockgen.v b/mppa_k1c/lib/Machblockgen.v
index db48934e..2ba42814 100644
--- a/mppa_k1c/lib/Machblockgen.v
+++ b/mppa_k1c/lib/Machblockgen.v
@@ -33,7 +33,7 @@ Definition trans_inst (i:Mach.instruction) : Machblock_inst :=
   | Msetstack src ofs ty          => MB_basic (MBsetstack src ofs ty)
   | Mgetparam ofs ty dst          => MB_basic (MBgetparam ofs ty dst)
   | Mop       op args res         => MB_basic (MBop       op args res)
-  | Mload     chunk addr args dst => MB_basic (MBload     chunk addr args dst)
+  | Mload trap chunk addr args dst=> MB_basic (MBload trap chunk addr args dst)
   | Mstore    chunk addr args src => MB_basic (MBstore    chunk addr args src)
   | Mlabel l => MB_label l
   end.
@@ -105,7 +105,7 @@ Inductive is_end_block: Machblock_inst -> code -> Prop :=
   | End_basic bi bh bl: header bh <> nil -> is_end_block (MB_basic bi) (bh::bl)
   | End_cfi cfi bl: bl <> nil -> is_end_block (MB_cfi cfi) bl. 
 
-Local Hint Resolve End_empty End_basic End_cfi.
+Local Hint Resolve End_empty End_basic End_cfi: core.
 
 Inductive is_trans_code: Mach.code -> code -> Prop :=
   | Tr_nil: is_trans_code nil nil
@@ -123,7 +123,7 @@ Inductive is_trans_code: Mach.code -> code -> Prop :=
       header bh = nil ->
       is_trans_code (i::c) (add_basic bi bh::bl).
 
-Local Hint Resolve Tr_nil Tr_end_block.
+Local Hint Resolve Tr_nil Tr_end_block: core.
 
 Lemma add_to_code_is_trans_code i c bl:
   is_trans_code c bl ->
@@ -145,7 +145,7 @@ Proof.
       rewrite <- Heqti. eapply End_cfi. congruence.
 Qed.
 
-Local Hint Resolve add_to_code_is_trans_code.
+Local Hint Resolve add_to_code_is_trans_code: core.
 
 Lemma trans_code_is_trans_code_rev c1: forall c2 mbi, 
   is_trans_code c2 mbi ->
@@ -185,7 +185,7 @@ Proof.
   exists mbi1. split; congruence.
 Qed.
 
-Local Hint Resolve trans_code_is_trans_code.
+Local Hint Resolve trans_code_is_trans_code: core.
 
 Theorem is_trans_code_inv c bl: is_trans_code c bl <-> bl=(trans_code c).
 Proof.
diff --git a/mppa_k1c/lib/Machblockgenproof.v b/mppa_k1c/lib/Machblockgenproof.v
index ab7fff74..0de2df52 100644
--- a/mppa_k1c/lib/Machblockgenproof.v
+++ b/mppa_k1c/lib/Machblockgenproof.v
@@ -72,7 +72,7 @@ Proof.
   apply match_states_trans_state.
 Qed.
 
-Local Hint Resolve match_states_trans_state.
+Local Hint Resolve match_states_trans_state: core.
 
 Lemma symbols_preserved:
   forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
@@ -284,7 +284,7 @@ Proof.
 Qed.
 
 Local Hint Resolve symbols_preserved senv_preserved init_mem_preserved prog_main_preserved functions_translated
-                   parent_sp_preserved.
+                   parent_sp_preserved: core.
 
 
 Definition dist_end_block_code (c: Mach.code) := 
@@ -299,8 +299,8 @@ Definition dist_end_block (s: Mach.state): nat :=
   | _ => 0
   end.
 
-Local Hint Resolve exec_nil_body exec_cons_body.
-Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore.
+Local Hint Resolve exec_nil_body exec_cons_body: core.
+Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore: core.
 
 Lemma size_add_label l bh: size (add_label l bh) = size bh + 1.
 Proof.
@@ -336,7 +336,7 @@ Proof.
     omega.
 Qed.
 
-Local Hint Resolve dist_end_block_code_simu_mid_block.
+Local Hint Resolve dist_end_block_code_simu_mid_block: core.
 
 
 Lemma size_nonzero c b bl:
@@ -392,8 +392,8 @@ destruct i; congruence.
 Qed.
 
 
-Local Hint Resolve Mlabel_is_not_cfi.
-Local Hint Resolve MBbasic_is_not_cfi.
+Local Hint Resolve Mlabel_is_not_cfi: core.
+Local Hint Resolve MBbasic_is_not_cfi: core.
 
 Lemma add_to_new_block_is_label i:
   header (add_to_new_bblock (trans_inst i)) <> nil -> exists l, i = Mlabel l.
@@ -408,7 +408,7 @@ Proof.
   + unfold cfi_bblock in H; simpl in H; congruence.
 Qed.
 
-Local Hint Resolve Mlabel_is_not_basic.
+Local Hint Resolve Mlabel_is_not_basic: core.
 
 Lemma trans_code_decompose c: forall b bl,
   is_trans_code c (b::bl) ->
@@ -483,6 +483,10 @@ Proof.
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
   - eapply exec_MBload; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
+  - eapply exec_MBload_notrap1; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
+    unfold Genv.symbol_address; rewrite symbols_preserved; auto.
+  - eapply exec_MBload_notrap2; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
+    unfold Genv.symbol_address; rewrite symbols_preserved; auto.
   - eapply exec_MBstore; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
 Qed.
@@ -506,8 +510,8 @@ Proof.
     rewrite Hs2, Hb2; eauto.
     Qed. 
 
-Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit.
-Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same.
+Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit: core.
+Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same: core.
 
 
 Lemma match_states_concat_trans_code st f sp c rs m h: 
@@ -719,13 +723,13 @@ Proof.
     exists nil; simpl; eexists. eapply Tr_add_label; eauto.
   - (*i=basic*)
     destruct i'.
-    Focus 10. exists (add_to_new_bblock (MB_basic bi)::nil).  exists b. 
+    10: { exists (add_to_new_bblock (MB_basic bi)::nil).  exists b. 
       cutrewrite ((add_to_new_bblock (MB_basic bi) :: nil) ++ (b::l)=(add_to_new_bblock (MB_basic bi) :: (b::l)));eauto.
       rewrite Heqti.        
       eapply  Tr_end_block; eauto.
       rewrite <-Heqti.
       eapply End_basic. inversion H; try(simpl; congruence).
-      simpl in H5; congruence.
+      simpl in H5; congruence. }
     all: try(exists nil; simpl; eexists;  eapply  Tr_add_basic; eauto; inversion H; try(eauto || congruence)).
   - (*i=cfi*)
     destruct i; try(simpl in Heqti; congruence).
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index ab348c14..10f38391 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -30,6 +30,10 @@ Definition align_float64 := 8%Z.
 (** Can we use the 64-bit extensions to the PowerPC architecture? *)
 Parameter ppc64 : bool.
 
+(** Should single-precision FP arguments passed on stack be passed 
+    as singles or use double FP format. *)
+Parameter single_passed_as_single : bool.
+
 Definition splitlong := negb ppc64.
 
 Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index b9300fd7..4fb38ff8 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -864,7 +864,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pisel rd r1 r2 bit =>
       let v :=
           match rs#(reg_of_crbit bit) with
-          | Vint n => if Int.eq n Int.zero then rs#r2 else rs#r1
+          | Vint n => if Int.eq n Int.zero then rs#r2 else (gpr_or_zero rs r1)
           | _ => Vundef
           end in
       Next (nextinstr (rs #rd <- v #GPR0 <- Vundef)) m
diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml
index 99c51e43..f4d4285a 100644
--- a/powerpc/AsmToJSON.ml
+++ b/powerpc/AsmToJSON.ml
@@ -17,12 +17,10 @@ open AST
 open BinNums
 open Camlcoq
 open Json
-open Format
 open JsonAST
 
 let pp_reg pp t n =
-  let s = sprintf "%s%s" t n in
-  pp_jsingle_object pp "Register" pp_jstring s
+  pp_jsingle_object pp "Register" pp_jstring (t ^ n)
 
 let pp_ireg pp reg =
   pp_reg pp "r" (TargetPrinter.int_reg_name reg)
@@ -31,8 +29,8 @@ let pp_freg pp reg =
   pp_reg pp "f" (TargetPrinter.float_reg_name reg)
 
 let preg_annot = function
-  | IR r -> sprintf "r%s" (TargetPrinter.int_reg_name r)
-  | FR r -> sprintf "f%s" (TargetPrinter.float_reg_name r)
+  | IR r -> "r" ^ (TargetPrinter.int_reg_name r)
+  | FR r -> "f" ^ (TargetPrinter.float_reg_name r)
   | _    -> assert false
 
 let pp_constant pp c =
@@ -86,28 +84,31 @@ let pp_arg pp = function
   | Atom a -> pp_atom_constant pp a
   | String s -> pp_jsingle_object pp "String" pp_jstring s
 
-let mnemonic_names  =["Padd"; "Paddc"; "Padde"; "Paddi"; "Paddic"; "Paddis"; "Paddze"; "Pand_";
-                      "Pandc"; "Pandi_"; "Pandis_"; "Pannot"; "Pb"; "Pbctr"; "Pbctrl"; "Pbdnz";
-                      "Pbf"; "Pbl"; "Pblr"; "Pbs"; "Pbt"; "Pbtbl"; "Pcmpb"; "Pcmpd"; "Pcmpdi";
-                      "Pcmpld"; "Pcmpldi"; "Pcmplw"; "Pcmplwi"; "Pcmpw"; "Pcmpwi"; "Pcntlzd";
-                      "Pcntlzw"; "Pcreqv"; "Pcror"; "Pcrxor"; "Pdcbf"; "Pdcbi"; "Pdcbt";
-                      "Pdcbtls"; "Pdcbtst"; "Pdcbz"; "Pdivd"; "Pdivdu"; "Pdivw"; "Pdivwu";
-                      "Peieio"; "Peqv"; "Pextsb"; "Pextsh"; "Pextsw"; "Pfabs"; "Pfadd"; "Pfadds";
-                      "Pfcfid"; "Pfcmpu"; "Pfctidz"; "Pfctiw"; "Pfctiwz"; "Pfdiv"; "Pfdivs";
-                      "Pfmadd"; "Pfmr"; "Pfmsub"; "Pfmul"; "Pfmuls"; "Pfneg"; "Pfnmadd";
-                      "Pfnmsub"; "Pfres"; "Pfrsp"; "Pfrsqrte"; "Pfsel"; "Pfsqrt"; "Pfsub";
-                      "Pfsubs"; "Picbi"; "Picbtls"; "Pinlineasm"; "Pisel"; "Pisync"; "Plabel";
-                      "Plbz"; "Plbzx"; "Pld"; "Pldbrx"; "Pldi"; "Pldx"; "Plfd"; "Plfdx"; "Plfi"; "Plfis";
-                      "Plfs"; "Plfsx"; "Plha"; "Plhax"; "Plhbrx"; "Plhz"; "Plhzx"; "Plwarx";
-                      "Plwbrx"; "Plwsync"; "Plwz"; "Plwzu"; "Plwzx"; "Pmbar"; "Pmfcr"; "Pmflr";
-                      "Pmfspr"; "Pmr"; "Pmtctr"; "Pmtlr"; "Pmtspr"; "Pmulhd"; "Pmulhdu"; "Pmulhw";
-                      "Pmulhwu"; "Pmulld"; "Pmulli"; "Pmullw"; "Pnand"; "Pnor"; "Por"; "Porc";
-                      "Pori"; "Poris"; "Prldicl"; "Prldimi"; "Prldinm"; "Prlwimi"; "Prlwinm";
-                      "Psld"; "Pslw"; "Psrad"; "Psradi"; "Psraw"; "Psrawi"; "Psrd"; "Psrw";
-                      "Pstb"; "Pstbx"; "Pstd"; "Pstdbrx"; "Pstdu"; "Pstdx"; "Pstfd"; "Pstfdu"; "Pstfdx";
-                      "Pstfs"; "Pstfsx"; "Psth"; "Psthbrx"; "Psthx"; "Pstw"; "Pstwbrx"; "Pstwcx_";
-                      "Pstwu"; "Pstwux"; "Pstwx"; "Psubfc"; "Psubfe"; "Psubfic"; "Psubfze";
-                      "Psync"; "Ptrap"; "Pxor"; "Pxori"; "Pxoris"]
+module StringSet = Set.Make(String)
+
+let mnemonic_names = StringSet.of_list
+    ["Padd"; "Paddc"; "Padde"; "Paddi"; "Paddic"; "Paddis"; "Paddze"; "Pand_";
+     "Pandc"; "Pandi_"; "Pandis_"; "Pannot"; "Pb"; "Pbctr"; "Pbctrl"; "Pbdnz";
+     "Pbf"; "Pbl"; "Pblr"; "Pbs"; "Pbt"; "Pbtbl"; "Pcmpb"; "Pcmpd"; "Pcmpdi";
+     "Pcmpld"; "Pcmpldi"; "Pcmplw"; "Pcmplwi"; "Pcmpw"; "Pcmpwi"; "Pcntlzd";
+     "Pcntlzw"; "Pcreqv"; "Pcror"; "Pcrxor"; "Pdcbf"; "Pdcbi"; "Pdcbt";
+     "Pdcbtls"; "Pdcbtst"; "Pdcbz"; "Pdivd"; "Pdivdu"; "Pdivw"; "Pdivwu";
+     "Peieio"; "Peqv"; "Pextsb"; "Pextsh"; "Pextsw"; "Pfabs"; "Pfadd"; "Pfadds";
+     "Pfcfid"; "Pfcmpu"; "Pfctidz"; "Pfctiw"; "Pfctiwz"; "Pfdiv"; "Pfdivs";
+     "Pfmadd"; "Pfmr"; "Pfmsub"; "Pfmul"; "Pfmuls"; "Pfneg"; "Pfnmadd";
+     "Pfnmsub"; "Pfres"; "Pfrsp"; "Pfrsqrte"; "Pfsel"; "Pfsqrt"; "Pfsub";
+     "Pfsubs"; "Picbi"; "Picbtls"; "Pinlineasm"; "Pisel"; "Pisync"; "Plabel";
+     "Plbz"; "Plbzx"; "Pld"; "Pldbrx"; "Pldi"; "Pldx"; "Plfd"; "Plfdx"; "Plfi";
+     "Plfis"; "Plfs"; "Plfsx"; "Plha"; "Plhax"; "Plhbrx"; "Plhz"; "Plhzx";
+     "Plwarx"; "Plwbrx"; "Plwsync"; "Plwz"; "Plwzu"; "Plwzx"; "Pmbar"; "Pmfcr";
+     "Pmflr"; "Pmfspr"; "Pmr"; "Pmtctr"; "Pmtlr"; "Pmtspr"; "Pmulhd"; "Pmulhdu";
+     "Pmulhw"; "Pmulhwu"; "Pmulld"; "Pmulli"; "Pmullw"; "Pnand"; "Pnor"; "Por";
+     "Porc"; "Pori"; "Poris"; "Prldicl"; "Prldimi"; "Prldinm"; "Prlwimi";
+     "Prlwinm"; "Psld"; "Pslw"; "Psrad"; "Psradi"; "Psraw"; "Psrawi"; "Psrd";
+     "Psrw"; "Pstb"; "Pstbx"; "Pstd"; "Pstdbrx"; "Pstdu"; "Pstdx"; "Pstfd";
+     "Pstfdu"; "Pstfdx"; "Pstfs"; "Pstfsx"; "Psth"; "Psthbrx"; "Psthx"; "Pstw";
+     "Pstwbrx"; "Pstwcx_"; "Pstwu"; "Pstwux"; "Pstwx"; "Psubfc"; "Psubfe";
+     "Psubfic"; "Psubfze"; "Psync"; "Ptrap"; "Pxor"; "Pxori"; "Pxoris"]
 
 let pp_instructions pp ic =
   let ic = List.filter (fun s -> match s with
@@ -126,7 +127,7 @@ let pp_instructions pp ic =
       | Pcfi_rel_offset _ -> false
       | _ -> true) ic in
   let instruction pp n args =
-    assert (List.mem n mnemonic_names);
+    assert (StringSet.mem n mnemonic_names);
     pp_jobject_start pp;
     pp_jmember ~first:true pp "Instruction Name" pp_jstring n;
     pp_jmember pp "Args" (pp_jarray pp_arg) args;
@@ -251,7 +252,7 @@ let pp_instructions pp ic =
   | Plhbrx (ir1,ir2,ir3) -> instruction pp "Plhbrx" [Ireg ir1; Ireg ir2; Ireg ir3]
   | Plhz (ir1,c,ir2) -> instruction pp "Plhz" [Ireg ir1; Constant c; Ireg ir2]
   | Plhzx (ir1,ir2,ir3) -> instruction pp "Plhzx" [Ireg ir1; Ireg ir2; Ireg ir3]
-  | Pldi (ir,c) -> instruction pp "Pldi" [Ireg ir; Long c] (* FIXME Cint is too small, we need Clong *)
+  | Pldi (ir,c) -> instruction pp "Pldi" [Ireg ir; Long c]
   | Plmake _ (* Should not occur *)
   | Pllo _ (* Should not occur *)
   | Plhi _ -> assert false (* Should not occur *)
@@ -385,8 +386,8 @@ let print_if prog sourcename =
   | Some f ->
     let f = Filename.concat !sdump_folder f in
     let oc = open_out_bin f in
-    pp_ast (formatter_of_out_channel oc) pp_instructions prog sourcename;
+    pp_ast oc pp_instructions prog sourcename;
     close_out oc
 
 let pp_mnemonics pp =
-  pp_mnemonics pp mnemonic_names
+  pp_mnemonics pp (StringSet.elements mnemonic_names)
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
index 704b0aba..ce88778c 100644
--- a/powerpc/Asmexpand.ml
+++ b/powerpc/Asmexpand.ml
@@ -852,7 +852,7 @@ let expand_instruction instr =
       if variadic then begin
         emit (Pmflr GPR0);
         emit (Pbl(intern_string "__compcert_va_saveregs",
-                  {sig_args = []; sig_res = None; sig_cc = cc_default}));
+                  {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}));
         emit (Pmtlr GPR0)
       end;
       current_function_stacksize := sz;
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index a686414a..29e2c028 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -783,8 +783,13 @@ Definition transl_memory_access
       Error(msg "Asmgen.transl_memory_access")
   end.
 
-Definition transl_load (chunk: memory_chunk) (addr: addressing)
-                       (args: list mreg) (dst: mreg) (k: code) :=
+Definition transl_load
+           (trap : trapping_mode)
+           (chunk: memory_chunk) (addr: addressing)
+           (args: list mreg) (dst: mreg) (k: code) :=
+  match trap with
+  | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on PPC")
+  | TRAP =>
   match chunk with
   | Mint8signed =>
       do r <- ireg_of dst;
@@ -812,6 +817,7 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
       transl_memory_access (Plfd r) (Plfdx r) addr args GPR12 k
   | _ =>
       Error (msg "Asmgen.transl_load")
+  end
   end.
 
 Definition transl_store (chunk: memory_chunk) (addr: addressing)
@@ -869,8 +875,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
          loadind GPR1 f.(fn_link_ofs) Tint R11 k1)
   | Mop op args res =>
       transl_op op args res k
-  | Mload chunk addr args dst =>
-      transl_load chunk addr args dst k
+  | Mload trap chunk addr args dst =>
+      transl_load trap chunk addr args dst k
   | Mstore chunk addr args src =>
       transl_store chunk addr args src k
   | Mcall sig (inl r) =>
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index d653633c..21d5ce48 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -328,6 +328,7 @@ Proof.
   eapply loadind_label; eauto.
   eapply tail_nolabel_trans; eapply loadind_label; eauto.
   eapply transl_op_label; eauto.
+  destruct t; try discriminate.
   destruct m; monadInv H; (eapply tail_nolabel_trans; [eapply transl_memory_access_label; TailNoLabel|TailNoLabel]).
   destruct m; monadInv H; eapply transl_memory_access_label; TailNoLabel.
   destruct s0; monadInv H; TailNoLabel.
@@ -657,6 +658,13 @@ Opaque loadind.
   split. simpl; congruence.
   apply R; auto with asmgen.
 
+
+- (* Mload notrap *) (* isn't there a nicer way? *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mload notrap *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
 - (* Mstore *)
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index 884d5366..1b797999 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -1284,7 +1284,9 @@ Proof.
       reflexivity.
     + Simpl.
       rewrite <- (C r1), <- (C r2) by auto.
-      rewrite B. destruct dir; destruct ob as [[]|]; simpl; auto using Val.lessdef_normalize.
+      rewrite B, gpr_or_zero_not_zero.
+      destruct dir; destruct ob as [[]|]; simpl; auto using Val.lessdef_normalize.
+      destruct dir; intros e; subst; discriminate.
     + intros. Simpl.
 Qed.
 
@@ -1677,8 +1679,8 @@ Qed.
 (** Translation of loads *)
 
 Lemma transl_load_correct:
-  forall chunk addr args dst k c (rs: regset) m a v,
-  transl_load chunk addr args dst k = OK c ->
+  forall trap chunk addr args dst k c (rs: regset) m a v,
+  transl_load trap chunk addr args dst k = OK c ->
   eval_addressing ge (rs#GPR1) addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists rs',
@@ -1687,6 +1689,7 @@ Lemma transl_load_correct:
   /\ forall r, r <> PC -> r <> GPR12 -> r <> GPR0 -> r <> preg_of dst -> rs' r = rs r.
 Proof.
   intros.
+  destruct trap; try discriminate.
   assert (LD: forall v, Val.lessdef a v -> v = a).
   { intros. inv H2; auto. discriminate H1. }
   assert (BASE: forall mk1 mk2 k' chunk' v',
diff --git a/powerpc/Builtins1.v b/powerpc/Builtins1.v
index f6e643d2..53c83d7e 100644
--- a/powerpc/Builtins1.v
+++ b/powerpc/Builtins1.v
@@ -29,5 +29,5 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
   match b with end.
 
-Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
   match b with end.
diff --git a/powerpc/CSE2deps.v b/powerpc/CSE2deps.v
new file mode 100644
index 00000000..9db51bbb
--- /dev/null
+++ b/powerpc/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+     (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+  && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+  && ((ofsw + size_chunk chunkw <=? ofsr) ||
+      (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+  match addr, addr', args, args' with
+  | (Aindexed ofs), (Aindexed ofs'),
+    (base :: nil), (base' :: nil) =>
+    if peq base base'
+    then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
+    else true  | _, _, _, _ => true
+  end.
diff --git a/powerpc/CSE2depsproof.v b/powerpc/CSE2depsproof.v
new file mode 100644
index 00000000..fdded9b6
--- /dev/null
+++ b/powerpc/CSE2depsproof.v
@@ -0,0 +1,135 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+  Ptrofs.wordsize = if Archi.ptr64 then 64%nat else 32%nat.
+Proof.
+  unfold Ptrofs.wordsize.
+  unfold Wordsize_Ptrofs.wordsize.
+  trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+  unfold Ptrofs.modulus.
+  rewrite ptrofs_size.
+  destruct Archi.ptr64; reflexivity.
+Qed.
+
+Lemma ptrofs_max_unsigned :
+  Ptrofs.max_unsigned = if Archi.ptr64 then 18446744073709551615 else 4294967295.
+Proof.
+  unfold Ptrofs.max_unsigned.
+  rewrite ptrofs_modulus.
+  destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section MEMORY_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : int.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
+
+  Lemma load_store_away1 :
+    forall RANGEW : 0 <= Int.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Int.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Int.unsigned ofsw + size_chunk chunkw <= Int.unsigned ofsr
+                       \/ Int.unsigned ofsr + size_chunk chunkr <= Int.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+    destruct base; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsr)) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsw)) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: unfold Ptrofs.of_int.
+
+    all: repeat rewrite Ptrofs.unsigned_repr by (unfold Ptrofs.max_unsigned; rewrite ptrofs_modulus; destruct Archi.ptr64; lia).
+    all: repeat rewrite ptrofs_modulus.
+    all: destruct Archi.ptr64; intuition lia.
+  Qed.
+  
+  Theorem load_store_away :
+    can_swap_accesses_ofs (Int.unsigned ofsr) chunkr (Int.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Lemma may_overlap_sound:
+  forall m m' : mem,
+  forall chunk addr args chunk' addr' args' v a a' rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+    (may_overlap chunk addr args chunk' addr' args') = false ->
+    (Mem.storev chunk m a v) = Some m' ->
+    (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m  a').
+Proof.
+  intros until rs.
+  intros ADDR ADDR' OVERLAP STORE.
+  destruct addr; destruct addr'; try discriminate.
+  { (* Aindexed / Aindexed *)
+  destruct args as [ | base [ | ]]. 1,3: discriminate.
+  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+  simpl in OVERLAP.
+  destruct (peq base base'). 2: discriminate.
+  subst base'.
+  destruct (can_swap_accesses_ofs (Int.unsigned i0) chunk' (Int.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  simpl in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  }
+Qed.
+
+End SOUNDNESS.
diff --git a/powerpc/Conventions1.v b/powerpc/Conventions1.v
index 1de55c1a..5c9cbd4f 100644
--- a/powerpc/Conventions1.v
+++ b/powerpc/Conventions1.v
@@ -117,18 +117,16 @@ Definition dummy_float_reg := F0.   (**r Used in [Coloring]. *)
   We treat a function without result as a function with one integer result. *)
 
 Definition loc_result_32 (s: signature) : rpair mreg :=
-  match s.(sig_res) with
-  | None => One R3
-  | Some (Tint | Tany32) => One R3
-  | Some (Tfloat | Tsingle | Tany64) => One F1
-  | Some Tlong => Twolong R3 R4
+  match proj_sig_res s with
+  | Tint | Tany32 => One R3
+  | Tfloat | Tsingle | Tany64 => One F1
+  | Tlong => Twolong R3 R4
   end.
 
 Definition loc_result_64 (s: signature) : rpair mreg :=
-  match s.(sig_res) with
-  | None => One R3
-  | Some (Tint | Tlong | Tany32 | Tany64) => One R3
-  | Some (Tfloat | Tsingle) => One F1
+  match proj_sig_res s with
+  | Tint | Tlong | Tany32 | Tany64 => One R3
+  | Tfloat | Tsingle => One F1
   end.
 
 Definition loc_result :=
@@ -140,8 +138,8 @@ Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold proj_sig_res, loc_result, loc_result_32, loc_result_64, mreg_type.
-  destruct Archi.ptr64 eqn:?; destruct (sig_res sig) as [[]|]; destruct Archi.ppc64; simpl; auto.
+  intros. unfold loc_result, loc_result_32, loc_result_64, mreg_type.
+  destruct Archi.ptr64 eqn:?; destruct (proj_sig_res sig); destruct Archi.ppc64; simpl; auto.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -151,7 +149,7 @@ Lemma loc_result_caller_save:
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
   intros. unfold loc_result, loc_result_32, loc_result_64, is_callee_save;
-  destruct Archi.ptr64; destruct (sig_res s) as [[]|]; simpl; auto.
+  destruct Archi.ptr64; destruct (proj_sig_res s); simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -161,13 +159,13 @@ Lemma loc_result_pair:
   match loc_result sg with
   | One _ => True
   | Twolong r1 r2 =>
-        r1 <> r2 /\ sg.(sig_res) = Some Tlong
+        r1 <> r2 /\ proj_sig_res sg = Tlong
      /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
      /\ Archi.ptr64 = false
   end.
 Proof.
   intros; unfold loc_result, loc_result_32, loc_result_64, mreg_type;
-  destruct Archi.ptr64; destruct (sig_res sg) as [[]|]; destruct Archi.ppc64; simpl; auto.
+  destruct Archi.ptr64; destruct (proj_sig_res sg); destruct Archi.ppc64; simpl; auto.
   split; auto. congruence.
   split; auto. congruence.
 Qed.
@@ -177,7 +175,7 @@ Qed.
 Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
-  intros. unfold loc_result, loc_result_32, loc_result_64.
+  intros. unfold loc_result, loc_result_32, loc_result_64, proj_sig_res.
   destruct Archi.ptr64; rewrite H; auto.
 Qed.
 
@@ -210,7 +208,16 @@ Fixpoint loc_arguments_rec
       | Some ireg =>
           One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs
       end
-  | (Tfloat | Tsingle | Tany64) as ty :: tys =>
+  | Tsingle as ty :: tys =>
+      match list_nth_z float_param_regs fr with
+      | None =>
+          let ty := if Archi.single_passed_as_single then Tsingle else Tany64 in
+          let ofs := align ofs (typesize ty) in
+          One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + (typesize ty))
+      | Some freg =>
+          One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs
+      end
+  | (Tfloat | Tany64) as ty :: tys =>
       match list_nth_z float_param_regs fr with
       | None =>
           let ofs := align ofs 2 in
@@ -238,33 +245,6 @@ Fixpoint loc_arguments_rec
 Definition loc_arguments (s: signature) : list (rpair loc) :=
   loc_arguments_rec s.(sig_args) 0 0 0.
 
-(** [size_arguments s] returns the number of [Outgoing] slots used
-  to call a function with signature [s]. *)
-
-Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
-  match tyl with
-  | nil => ofs
-  | (Tint | Tany32) :: tys =>
-      match list_nth_z int_param_regs ir with
-      | None => size_arguments_rec tys ir fr (ofs + 1)
-      | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
-      end
-  | (Tfloat | Tsingle | Tany64) :: tys =>
-      match list_nth_z float_param_regs fr with
-      | None => size_arguments_rec tys ir fr (align ofs 2 + 2)
-      | Some freg => size_arguments_rec tys ir (fr + 1) ofs
-      end
-  | Tlong :: tys =>
-      let ir := align ir 2 in
-      match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with
-      | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs
-      | _, _ => size_arguments_rec tys ir fr (align ofs 2 + 2)
-      end
-  end.
-
-Definition size_arguments (s: signature) : Z :=
-  size_arguments_rec s.(sig_args) 0 0 0.
-
 (** Argument locations are either caller-save registers or [Outgoing]
   stack slots at nonnegative offsets. *)
 
@@ -324,12 +304,14 @@ Opaque list_nth_z.
   apply align_divides; omega. apply Z.divide_1_l. apply Z.divide_1_l.
   eapply Y; eauto. omega.
 - (* single *)
+  assert (ofs <= align ofs 1) by (apply align_le; omega).
   assert (ofs <= align ofs 2) by (apply align_le; omega).
   destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H.
   subst. right. eapply list_nth_z_in; eauto.
   eapply IHtyl; eauto.
-  subst. split. omega. apply Z.divide_1_l.
-  eapply Y; eauto. omega.
+  subst. split. destruct Archi.single_passed_as_single; simpl; omega.
+  destruct Archi.single_passed_as_single; simpl; apply Z.divide_1_l.
+  eapply Y; eauto. destruct Archi.single_passed_as_single; simpl; omega.
 - (* any32 *)
   destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H.
   subst. left. eapply list_nth_z_in; eauto.
@@ -361,107 +343,14 @@ Qed.
 
 Hint Resolve loc_arguments_acceptable: locs.
 
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_rec_above:
-  forall tyl ir fr ofs0,
-  ofs0 <= size_arguments_rec tyl ir fr ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  omega.
-  destruct a.
-  destruct (list_nth_z int_param_regs ir); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
-  destruct (list_nth_z float_param_regs fr); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-  set (ir' := align ir 2).
-  destruct (list_nth_z int_param_regs ir'); eauto.
-  destruct (list_nth_z int_param_regs (ir' + 1)); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-  destruct (list_nth_z float_param_regs fr); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-  destruct (list_nth_z int_param_regs ir); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
-  destruct (list_nth_z float_param_regs fr); eauto.
-  apply Z.le_trans with (align ofs0 2). apply align_le; omega.
-  apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-Qed.
-
-Lemma size_arguments_above:
-  forall s, size_arguments s >= 0.
-Proof.
-  intros; unfold size_arguments. apply Z.le_ge.
-  apply size_arguments_rec_above.
-Qed.
-
-Lemma loc_arguments_bounded:
-  forall (s: signature) (ofs: Z) (ty: typ),
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
-  ofs + typesize ty <= size_arguments s.
-Proof.
-  intros.
-  assert (forall tyl ir fr ofs0,
-    In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_rec tyl ir fr ofs0)) ->
-    ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0).
-{
-  induction tyl; simpl; intros.
-  elim H0.
-  destruct a.
-- (* int *)
-  destruct (list_nth_z int_param_regs ir); destruct H0.
-  congruence.
-  eauto.
-  inv H0. apply size_arguments_rec_above.
-  eauto.
-- (* float *)
-  destruct (list_nth_z float_param_regs fr); destruct H0.
-  congruence.
-  eauto.
-  inv H0. apply size_arguments_rec_above. eauto.
-- (* long *)
-  set (ir' := align ir 2) in *.
-  assert (DFL:
-    In (S Outgoing ofs ty) (regs_of_rpairs
-                             ((if Archi.ptr64
-                               then One (S Outgoing (align ofs0 2) Tlong)
-                               else Twolong (S Outgoing (align ofs0 2) Tint)
-                                            (S Outgoing (align ofs0 2 + 1) Tint))
-                          :: loc_arguments_rec tyl ir' fr (align ofs0 2 + 2))) ->
-    ofs + typesize ty <= size_arguments_rec tyl ir' fr (align ofs0 2 + 2)).
-  { destruct Archi.ptr64; intros IN.
-  - destruct IN. inv H1. apply size_arguments_rec_above. auto.
-  - destruct IN. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
-    destruct H1. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
-    auto. }
-  destruct (list_nth_z int_param_regs ir'); auto.
-  destruct (list_nth_z int_param_regs (ir' + 1)); auto.
-  destruct H0. congruence. destruct H0. congruence. eauto.
-- (* single *)
-  destruct (list_nth_z float_param_regs fr); destruct H0.
-  congruence.
-  eauto.
-  inv H0. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
-  eauto.
-- (* any32 *)
-  destruct (list_nth_z int_param_regs ir); destruct H0.
-  congruence.
-  eauto.
-  inv H0. apply size_arguments_rec_above.
-  eauto.
-- (* any64 *)
-  destruct (list_nth_z float_param_regs fr); destruct H0.
-  congruence.
-  eauto.
-  inv H0. apply size_arguments_rec_above. eauto.
-  }
-  eauto.
-Qed.
-
 Lemma loc_arguments_main:
   loc_arguments signature_main = nil.
 Proof.
   reflexivity.
 Qed.
+
+(** ** Normalization of function results *)
+
+(** No normalization needed. *)
+
+Definition return_value_needs_normalization (t: rettype) := false.
diff --git a/powerpc/DuplicateOpcodeHeuristic.ml b/powerpc/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..33be79e8
--- /dev/null
+++ b/powerpc/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,27 @@
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 0f082c1f..b73cb14b 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -581,6 +581,30 @@ Proof with (try exact I; try reflexivity).
   unfold Val.select. destruct (eval_condition c vl m). apply Val.normalize_type. exact I.
 Qed.
 
+Definition is_trapping_op (op : operation) :=
+  match op with
+  | Odiv | Odivl | Odivu | Odivlu
+  | Oshrximm _ | Oshrxlimm _
+  | Ointoffloat | Ointuoffloat
+  | Ofloatofint | Ofloatofintu
+  | Olongoffloat
+  | Ofloatoflong => true
+  | _ => false
+  end.
+
+Lemma is_trapping_op_sound:
+  forall op vl sp m,
+    op <> Omove ->
+    is_trapping_op op = false ->
+    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    eval_operation genv sp op vl m <> None.
+Proof.
+  destruct op; intros; simpl in *; try congruence.
+  all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+  all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+  all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+  all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
@@ -1032,6 +1056,21 @@ Proof.
   apply Val.add_inject; auto. apply H; simpl; auto.
 Qed.
 
+
+Lemma eval_addressing_inj_none:
+  forall addr sp1 vl1 sp2 vl2,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = None ->
+  eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+  intros until vl2. intros Hglobal Hinjsp Hinjvl.
+  destruct addr; simpl in *;
+  inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
+Qed.
 End EVAL_COMPAT.
 
 (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
@@ -1098,6 +1137,20 @@ Proof.
   rewrite <- val_inject_list_lessdef. eauto. auto.
 Qed.
 
+
+Lemma eval_addressing_lessdef_none:
+  forall sp addr vl1 vl2,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = None ->
+  eval_addressing genv sp addr vl2 = None.
+Proof.
+  intros until vl2. intros Hlessdef Heval1.
+  destruct addr; simpl in *;
+  inv Hlessdef; trivial; try discriminate;
+  inv H0; trivial; try discriminate;
+  inv H2; trivial; try discriminate.
+Qed.
+
 Lemma eval_operation_lessdef:
   forall sp op vl1 vl2 v1 m1 m2,
   Val.lessdef_list vl1 vl2 ->
@@ -1189,6 +1242,19 @@ Proof.
   econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
+Lemma eval_addressing_inject_none:
+  forall addr vl1 vl2,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+  eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
 Lemma eval_operation_inject:
   forall op vl1 vl2 v1 m1 m2,
   Val.inject_list f vl1 vl2 ->
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index b0cd70a4..8135bad6 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -809,7 +809,7 @@ Qed.
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
   red; intros. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm; auto. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm; auto. omega.
 Qed.
 
 Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
@@ -822,7 +822,7 @@ Qed.
 Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
 Proof.
   red; intros. unfold cast16unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm; auto. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm; auto. omega.
 Qed.
 
 Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
diff --git a/powerpc/extractionMachdep.v b/powerpc/extractionMachdep.v
index 7482435f..a3e945bf 100644
--- a/powerpc/extractionMachdep.v
+++ b/powerpc/extractionMachdep.v
@@ -34,3 +34,6 @@ Extract Constant Archi.ppc64 =>
    | ""e5500"" -> true
    | _ -> false
    end".
+
+(* Choice of passing of single *)
+Extract Constant Archi.single_passed_as_single => "Configuration.gnu_toolchain".
diff --git a/riscV/Asmexpand.ml b/riscV/Asmexpand.ml
index 3e734747..7e36abf8 100644
--- a/riscV/Asmexpand.ml
+++ b/riscV/Asmexpand.ml
@@ -23,7 +23,7 @@ open Asm
 open Asmexpandaux
 open AST
 open Camlcoq
-open !Integers
+open! Integers
 
 exception Error of string
 
@@ -63,44 +63,44 @@ let expand_storeind_ptr src base ofs =
 let int_param_regs   = [| X10; X11; X12; X13; X14; X15; X16; X17 |]
 let float_param_regs = [| F10; F11; F12; F13; F14; F15; F16; F17 |]
 
-let rec fixup_variadic_call pos tyl =
-  if pos < 8 then
+let rec fixup_variadic_call ri rf tyl =
+  if ri < 8 then
     match tyl with
     | [] ->
         ()
     | (Tint | Tany32) :: tyl ->
-        fixup_variadic_call (pos + 1) tyl
+        fixup_variadic_call (ri + 1) rf tyl
     | Tsingle :: tyl ->
-        let rs =float_param_regs.(pos)
-        and rd = int_param_regs.(pos) in
+        let rs = float_param_regs.(rf)
+        and rd = int_param_regs.(ri) in
         emit (Pfmvxs(rd, rs));
-        fixup_variadic_call (pos + 1) tyl
+        fixup_variadic_call (ri + 1) (rf + 1) tyl
     | Tlong :: tyl ->
-        let pos' = if Archi.ptr64 then pos + 1 else align pos 2 + 2 in
-        fixup_variadic_call pos' tyl
+        let ri' = if Archi.ptr64 then ri + 1 else align ri 2 + 2 in
+        fixup_variadic_call ri' rf tyl
     | (Tfloat | Tany64) :: tyl ->
         if Archi.ptr64 then begin
-          let rs = float_param_regs.(pos)
-          and rd = int_param_regs.(pos) in
+          let rs = float_param_regs.(rf)
+          and rd = int_param_regs.(ri) in
           emit (Pfmvxd(rd, rs));
-          fixup_variadic_call (pos + 1) tyl
+          fixup_variadic_call (ri + 1) (rf + 1) tyl
         end else begin
-          let pos = align pos 2 in
-          if pos < 8 then begin
-            let rs = float_param_regs.(pos)
-            and rd1 = int_param_regs.(pos)
-            and rd2 = int_param_regs.(pos + 1) in
+          let ri = align ri 2 in
+          if ri < 8 then begin
+            let rs = float_param_regs.(rf)
+            and rd1 = int_param_regs.(ri)
+            and rd2 = int_param_regs.(ri + 1) in
             emit (Paddiw(X2, X X2, Integers.Int.neg _16));
             emit (Pfsd(rs, X2, Ofsimm _0));
             emit (Plw(rd1, X2, Ofsimm _0));
             emit (Plw(rd2, X2, Ofsimm _4));
             emit (Paddiw(X2, X X2, _16));
-            fixup_variadic_call (pos + 2) tyl
+            fixup_variadic_call (ri + 2) (rf + 1) tyl
           end
         end
         
 let fixup_call sg =
-  if sg.sig_cc.cc_vararg then fixup_variadic_call 0 sg.sig_args
+  if sg.sig_cc.cc_vararg then fixup_variadic_call 0 0 sg.sig_args
 
 (* Handling of annotations *)
 
@@ -483,7 +483,7 @@ let expand_instruction instr =
       emit (Pmv (X30, X2));
       if sg.sig_cc.cc_vararg then begin
         let n = arguments_size sg in
-        let extra_sz = if n >= 8 then 0 else align 16 ((8 - n) * wordsize) in
+        let extra_sz = if n >= 8 then 0 else align ((8 - n) * wordsize) 16 in
         let full_sz = Z.add sz (Z.of_uint extra_sz) in
         expand_addptrofs X2 X2 (Ptrofs.repr (Z.neg full_sz));
         expand_storeind_ptr X30 X2 ofs;
@@ -501,7 +501,7 @@ let expand_instruction instr =
      let extra_sz =
       if sg.sig_cc.cc_vararg then begin
         let n = arguments_size sg in
-        if n >= 8 then 0 else align 16 ((8 - n) * wordsize)
+        if n >= 8 then 0 else align ((8 - n) * wordsize) 16
       end else 0 in
      expand_addptrofs X2 X2 (Ptrofs.repr (Z.add sz (Z.of_uint extra_sz)))
 
diff --git a/riscV/Asmgen.v b/riscV/Asmgen.v
index a704ed74..b431d63d 100644
--- a/riscV/Asmgen.v
+++ b/riscV/Asmgen.v
@@ -25,6 +25,8 @@ Require Import Op Locations Mach Asm.
 Local Open Scope string_scope.
 Local Open Scope error_monad_scope.
 
+Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
+
 (** The code generation functions take advantage of several
   characteristics of the [Mach] code generated by earlier passes of the
   compiler, mostly that argument and result registers are of the correct
@@ -503,11 +505,16 @@ Definition transl_op
       OK (Psrliw rd rs n :: k)
   | Oshrximm n, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
-      OK (if Int.eq n Int.zero then Pmv rd rs :: k else
-          Psraiw X31 rs (Int.repr 31) ::
-          Psrliw X31 X31 (Int.sub Int.iwordsize n) ::
-          Paddw X31 rs X31 ::
-          Psraiw rd X31 n :: k)  
+        OK (if Int.eq n Int.zero
+            then Pmv rd rs :: k
+            else if Int.eq n Int.one
+                 then Psrliw X31 rs (Int.repr 31) ::
+                      Paddw X31 rs X31 ::
+                      Psraiw rd X31 Int.one :: k
+                 else Psraiw X31 rs (Int.repr 31) ::
+                      Psrliw X31 X31 (Int.sub Int.iwordsize n) ::
+                      Paddw X31 rs X31 ::
+                      Psraiw rd X31 n :: k)  
 
   (* [Omakelong], [Ohighlong]  should not occur *)
   | Olowlong, a1 :: nil =>
@@ -592,11 +599,16 @@ Definition transl_op
       OK (Psrlil rd rs n :: k)
   | Oshrxlimm n, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
-      OK (if Int.eq n Int.zero then Pmv rd rs :: k else
-          Psrail X31 rs (Int.repr 63) ::
-          Psrlil X31 X31 (Int.sub Int64.iwordsize' n) ::
-          Paddl X31 rs X31 ::
-          Psrail rd X31 n :: k)  
+        OK (if Int.eq n Int.zero
+            then Pmv rd rs :: k
+            else if Int.eq n Int.one
+                 then Psrlil X31 rs (Int.repr 63) ::
+                      Paddl X31 rs X31 ::
+                      Psrail rd X31 Int.one :: k
+                 else Psrail X31 rs (Int.repr 63) ::
+                      Psrlil X31 X31 (Int.sub Int64.iwordsize' n) ::
+                      Paddl X31 rs X31 ::
+                      Psrail rd X31 n :: k)  
 
   | Onegf, a1 :: nil =>
       do rd <- freg_of res; do rs <- freg_of a1;
@@ -770,9 +782,13 @@ Definition transl_memory_access
       Error(msg "Asmgen.transl_memory_access")
   end.
 
-Definition transl_load (chunk: memory_chunk) (addr: addressing)
+Definition transl_load (trap : trapping_mode)
+           (chunk: memory_chunk) (addr: addressing)
            (args: list mreg) (dst: mreg) (k: code) :=
-  match chunk with
+  match trap with
+  | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on Arm")
+  | TRAP =>
+    match chunk with
   | Mint8signed =>
       do r <- ireg_of dst;
       transl_memory_access (Plb r)  addr args k
@@ -799,6 +815,7 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
       transl_memory_access (Pfld r) addr args k
   | _ =>
       Error (msg "Asmgen.transl_load")
+    end
   end.
 
 Definition transl_store (chunk: memory_chunk) (addr: addressing)
@@ -848,8 +865,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
                 else loadind_ptr SP f.(fn_link_ofs) X30 c)
   | Mop op args res =>
       transl_op op args res k
-  | Mload chunk addr args dst =>
-      transl_load chunk addr args dst k
+  | Mload trap chunk addr args dst =>
+      transl_load trap chunk addr args dst k
   | Mstore chunk addr args src =>
       transl_store chunk addr args src k
   | Mcall sig (inl r) =>
diff --git a/riscV/Asmgenproof.v b/riscV/Asmgenproof.v
index 5ec57886..8e9f022c 100644
--- a/riscV/Asmgenproof.v
+++ b/riscV/Asmgenproof.v
@@ -285,12 +285,12 @@ Opaque Int.eq.
 - apply opimm32_label; intros; exact I.
 - apply opimm32_label; intros; exact I.
 - apply opimm32_label; intros; exact I.
-- destruct (Int.eq n Int.zero); TailNoLabel.
+- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
 - apply opimm64_label; intros; exact I.
 - apply opimm64_label; intros; exact I.
 - apply opimm64_label; intros; exact I.
 - apply opimm64_label; intros; exact I.
-- destruct (Int.eq n Int.zero); TailNoLabel.
+- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
 - eapply transl_cond_op_label; eauto.
 Qed.
 
@@ -359,7 +359,7 @@ Proof.
 - destruct ep. eapply loadind_label; eauto.
   eapply tail_nolabel_trans. apply loadind_ptr_label. eapply loadind_label; eauto. 
 - eapply transl_op_label; eauto.
-- destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
+- destruct t; (try discriminate); destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
 - destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
 - destruct s0; monadInv H; TailNoLabel.
 - destruct s0; monadInv H; (eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]).
@@ -725,6 +725,12 @@ Local Transparent destroyed_by_op.
   intros; auto with asmgen.
   simpl; congruence.
 
+- (* Mload notrap *) (* isn't there a nicer way? *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mload notrap *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
 - (* Mstore *)
   assert (eval_addressing tge sp addr (map rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v
index 98d5bd33..8678a5dc 100644
--- a/riscV/Asmgenproof1.v
+++ b/riscV/Asmgenproof1.v
@@ -400,22 +400,6 @@ Ltac ArgsInv :=
   | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
   end).
 
-Inductive exec_straight_opt: code -> regset -> mem -> code -> regset -> mem -> Prop :=
-  | exec_straight_opt_refl: forall c rs m,
-      exec_straight_opt c rs m c rs m
-  | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2,
-      exec_straight ge fn c1 rs1 m1 c2 rs2 m2 ->
-      exec_straight_opt c1 rs1 m1 c2 rs2 m2.
-
-Remark exec_straight_opt_right:
-  forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
-  exec_straight_opt c1 rs1 m1 c2 rs2 m2 ->
-  exec_straight ge fn c2 rs2 m2 c3 rs3 m3 ->
-  exec_straight ge fn c1 rs1 m1 c3 rs3 m3.
-Proof.
-  destruct 1; intros. auto. eapply exec_straight_trans; eauto. 
-Qed.
-
 Lemma transl_cbranch_correct_1:
   forall cond args lbl k c m ms b sp rs m',
   transl_cbranch cond args lbl k = OK c ->
@@ -423,7 +407,7 @@ Lemma transl_cbranch_correct_1:
   agree ms sp rs ->
   Mem.extends m m' ->
   exists rs', exists insn,
-     exec_straight_opt c rs m' (insn :: k) rs' m'
+     exec_straight_opt ge fn c rs m' (insn :: k) rs' m'
   /\ exec_instr ge fn insn rs' m' = eval_branch fn lbl rs' m' (Some b)
   /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
@@ -518,7 +502,7 @@ Lemma transl_cbranch_correct_true:
   agree ms sp rs ->
   Mem.extends m m' ->
   exists rs', exists insn,
-     exec_straight_opt c rs m' (insn :: k) rs' m'
+     exec_straight_opt ge fn c rs m' (insn :: k) rs' m'
   /\ exec_instr ge fn insn rs' m' = goto_label fn lbl rs' m'
   /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
@@ -1051,17 +1035,23 @@ Opaque Int.eq.
   intros (rs' & A & B & C).
   exists rs'; split; eauto. rewrite B; auto with asmgen.
 - (* shrximm *)
-  clear H. exploit Val.shrx_shr_2; eauto. intros E; subst v; clear EV.
+  clear H. exploit Val.shrx_shr_3; eauto. intros E; subst v; clear EV.
   destruct (Int.eq n Int.zero).
 + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl. 
-+ change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
-  econstructor; split.
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  apply exec_straight_one. simpl; reflexivity. auto. 
-  split; intros; Simpl.
++ destruct (Int.eq n Int.one).
+  * econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    apply exec_straight_one. simpl; reflexivity. auto.
+    split; intros; Simpl.
+  * change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
+    econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    apply exec_straight_one. simpl; reflexivity. auto. 
+    split; intros; Simpl.
 - (* longofintu *)
   econstructor; split.
   eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto.
@@ -1086,17 +1076,24 @@ Opaque Int.eq.
   intros (rs' & A & B & C).
   exists rs'; split; eauto. rewrite B; auto with asmgen.
 - (* shrxlimm *)
-  clear H. exploit Val.shrxl_shrl_2; eauto. intros E; subst v; clear EV.
+  clear H. exploit Val.shrxl_shrl_3; eauto. intros E; subst v; clear EV.
   destruct (Int.eq n Int.zero).
 + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl. 
-+ change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
-  econstructor; split.
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  eapply exec_straight_step. simpl; reflexivity. auto. 
-  apply exec_straight_one. simpl; reflexivity. auto. 
-  split; intros; Simpl.
++ destruct (Int.eq n Int.one).
+  * econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    eapply exec_straight_step. simpl; reflexivity. auto.
+    apply exec_straight_one. simpl; reflexivity. auto.
+    split; intros; Simpl.
+
+  * change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
+    econstructor; split.
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    eapply exec_straight_step. simpl; reflexivity. auto. 
+    apply exec_straight_one. simpl; reflexivity. auto. 
+    split; intros; Simpl.
 - (* cond *)
   exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
   exists rs'; split. eexact A. eauto with asmgen.
@@ -1108,7 +1105,7 @@ Lemma indexed_memory_access_correct:
   forall mk_instr base ofs k rs m,
   base <> X31 ->
   exists base' ofs' rs',
-     exec_straight_opt (indexed_memory_access mk_instr base ofs k) rs m
+     exec_straight_opt ge fn (indexed_memory_access mk_instr base ofs k) rs m
                        (mk_instr base' ofs' :: k) rs' m
   /\ Val.offset_ptr rs'#base' (eval_offset ge ofs') = Val.offset_ptr rs#base ofs
   /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
@@ -1258,7 +1255,7 @@ Lemma transl_memory_access_correct:
   transl_memory_access mk_instr addr args k = OK c ->
   eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
   exists base ofs rs',
-     exec_straight_opt c rs m (mk_instr base ofs :: k) rs' m
+     exec_straight_opt ge fn c rs m (mk_instr base ofs :: k) rs' m
   /\ Val.offset_ptr rs'#base (eval_offset ge ofs) = v
   /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
@@ -1318,8 +1315,8 @@ Proof.
 Qed.
 
 Lemma transl_load_correct:
-  forall chunk addr args dst k c (rs: regset) m a v,
-  transl_load chunk addr args dst k = OK c ->
+  forall trap chunk addr args dst k c (rs: regset) m a v,
+  transl_load trap chunk addr args dst k = OK c ->
   eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists rs',
@@ -1327,7 +1324,8 @@ Lemma transl_load_correct:
   /\ rs'#(preg_of dst) = v
   /\ forall r, r <> PC -> r <> X31 -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  intros until v; intros TR EV LOAD. 
+  intros until v; intros TR EV LOAD.
+  destruct trap; try (simpl in *; discriminate).
   assert (A: exists mk_instr,
       transl_memory_access mk_instr addr args k = OK c
    /\ forall base ofs rs,
diff --git a/riscV/Builtins1.v b/riscV/Builtins1.v
index f6e643d2..53c83d7e 100644
--- a/riscV/Builtins1.v
+++ b/riscV/Builtins1.v
@@ -29,5 +29,5 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
   match b with end.
 
-Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
   match b with end.
diff --git a/riscV/CSE2deps.v b/riscV/CSE2deps.v
new file mode 100644
index 00000000..8ab9242a
--- /dev/null
+++ b/riscV/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+     (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+  && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+  && ((ofsw + size_chunk chunkw <=? ofsr) ||
+      (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+  match addr, addr', args, args' with
+  | (Aindexed ofs), (Aindexed ofs'),
+    (base :: nil), (base' :: nil) =>
+    if peq base base'
+    then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+    else true  | _, _, _, _ => true
+  end.
diff --git a/riscV/CSE2depsproof.v b/riscV/CSE2depsproof.v
new file mode 100644
index 00000000..a3811e78
--- /dev/null
+++ b/riscV/CSE2depsproof.v
@@ -0,0 +1,127 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+  Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat.
+Proof.
+  unfold Ptrofs.wordsize.
+  unfold Wordsize_Ptrofs.wordsize.
+  trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+  unfold Ptrofs.modulus.
+  rewrite ptrofs_size.
+  destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section MEMORY_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
+
+  Lemma load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                          \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+    destruct base; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try rewrite ptrofs_modulus in *.
+    all: destruct Archi.ptr64.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Lemma may_overlap_sound:
+  forall m m' : mem,
+  forall chunk addr args chunk' addr' args' v a a' rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+    (may_overlap chunk addr args chunk' addr' args') = false ->
+    (Mem.storev chunk m a v) = Some m' ->
+    (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m  a').
+Proof.
+  intros until rs.
+  intros ADDR ADDR' OVERLAP STORE.
+  destruct addr; destruct addr'; try discriminate.
+  { (* Aindexed / Aindexed *)
+  destruct args as [ | base [ | ]]. 1,3: discriminate.
+  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+  simpl in OVERLAP.
+  destruct (peq base base'). 2: discriminate.
+  subst base'.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  simpl in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  }
+Qed.
+
+End SOUNDNESS.
diff --git a/riscV/Conventions1.v b/riscV/Conventions1.v
index df7ddfd2..17326139 100644
--- a/riscV/Conventions1.v
+++ b/riscV/Conventions1.v
@@ -105,7 +105,9 @@ Definition is_float_reg (r: mreg) :=
   of function arguments), but this leaves much liberty in choosing actual
   locations.  To ensure binary interoperability of code generated by our
   compiler with libraries compiled by another compiler, we
-  implement the standard RISC-V conventions.  *)
+  implement the standard RISC-V conventions as found here:
+  https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md
+*)
 
 (** ** Location of function result *)
 
@@ -115,11 +117,10 @@ Definition is_float_reg (r: mreg) :=
   with one integer result. *)
 
 Definition loc_result (s: signature) : rpair mreg :=
-  match s.(sig_res) with
-  | None => One R10
-  | Some (Tint | Tany32) => One R10
-  | Some (Tfloat | Tsingle | Tany64) => One F10
-  | Some Tlong => if Archi.ptr64 then One R10 else Twolong R11 R10
+  match proj_sig_res s with
+  | Tint | Tany32 => One R10
+  | Tfloat | Tsingle | Tany64 => One F10
+  | Tlong => if Archi.ptr64 then One R10 else Twolong R11 R10
   end.
 
 (** The result registers have types compatible with that given in the signature. *)
@@ -128,8 +129,8 @@ Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold proj_sig_res, loc_result, mreg_type;
-  destruct (sig_res sig) as [[]|]; auto; destruct Archi.ptr64; auto.
+  intros. unfold loc_result, mreg_type;
+  destruct (proj_sig_res sig); auto; destruct Archi.ptr64; auto.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -139,7 +140,7 @@ Lemma loc_result_caller_save:
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
   intros. unfold loc_result, is_callee_save;
-  destruct (sig_res s) as [[]|]; simpl; auto; destruct Archi.ptr64; simpl; auto.
+  destruct (proj_sig_res s); simpl; auto; destruct Archi.ptr64; simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -149,13 +150,13 @@ Lemma loc_result_pair:
   match loc_result sg with
   | One _ => True
   | Twolong r1 r2 =>
-       r1 <> r2 /\ sg.(sig_res) = Some Tlong
+       r1 <> r2 /\ proj_sig_res sg = Tlong
     /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true 
     /\ Archi.ptr64 = false
   end.
 Proof.
   intros.
-  unfold loc_result; destruct (sig_res sg) as [[]|]; auto.
+  unfold loc_result; destruct (proj_sig_res sg); auto.
   unfold mreg_type; destruct Archi.ptr64; auto.
   split; auto. congruence.
 Qed.
@@ -165,43 +166,37 @@ Qed.
 Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
-  intros. unfold loc_result. rewrite H; auto.  
+  intros. unfold loc_result, proj_sig_res. rewrite H; auto.  
 Qed.
 
 (** ** Location of function arguments *)
 
-(** The RISC-V ABI states the following convention for passing arguments
+(** The RISC-V ABI states the following conventions for passing arguments
   to a function:
 
-- Arguments are passed in registers when possible.
-
-- Up to eight integer registers (ai: int_param_regs) and up to eight
-  floating-point registers (fai: float_param_regs) are used for this
-  purpose.
-
-- If the arguments to a function are conceptualized as fields of a C
-  struct, each with pointer alignment, the argument registers are a
-  shadow of the first eight pointer-words of that struct. If argument
-  i < 8 is a floating-point type, it is passed in floating-point
-  register fa_i; otherwise, it is passed in integer register a_i.
-
-- When primitive arguments twice the size of a pointer-word are passed
-  on the stack, they are naturally aligned. When they are passed in the
-  integer registers, they reside in an aligned even-odd register pair,
-  with the even register holding the least-significant bits.
-
-- Floating-point arguments to variadic functions (except those that
-  are explicitly named in the parameter list) are passed in integer
-  registers.
-
-- The portion of the conceptual struct that is not passed in argument
-  registers is passed on the stack. The stack pointer sp points to the
-  first argument not passed in a register.
-
-The bit about variadic functions doesn't quite fit CompCert's model.
-We do our best by passing the FP arguments in registers, as usual,
-and reserving the corresponding integer registers, so that fixup
-code can be introduced in the Asmexpand pass.
+- RV64, not variadic: pass the first 8 integer arguments in
+  integer registers (a1...a8: int_param_regs), the first 8 FP arguments
+  in FP registers (fa1...fa8: float_param_regs), and the remaining
+  arguments on the stack, in 8-byte slots.
+
+- RV32, not variadic: same, but arguments of 64-bit integer type
+  are passed in two consecutive integer registers (a(i), a(i+1))
+  or in a(8) and on a 32-bit word on the stack.  Stack-allocated
+  arguments are aligned to their natural alignment.
+
+- RV64, variadic: pass the first 8 arguments in integer registers
+  (a1...a8), including FP arguments; pass the remaining arguments on
+  the stack, in 8-byte slots.
+
+- RV32, variadic: same, but arguments of 64-bit types (integers as well
+  as floats) are passed in two consecutive aligned integer registers
+  (a(2i), a(2i+1)).  
+
+The passing of FP arguments to variadic functions in integer registers
+doesn't quite fit CompCert's model.  We do our best by passing the FP
+arguments in registers, as usual, and reserving the corresponding
+integer registers, so that fixup code can be introduced in the
+Asmexpand pass.
 *)
 
 Definition int_param_regs :=
@@ -209,80 +204,84 @@ Definition int_param_regs :=
 Definition float_param_regs :=
   F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: F16 :: F17 :: nil.
 
-Definition one_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ)
-                           (rec: Z -> Z -> list (rpair loc)) :=
-  match list_nth_z regs rn with
+Definition int_arg (ri rf ofs: Z) (ty: typ)
+                   (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match list_nth_z int_param_regs ri with
   | Some r =>
-      One(R r) :: rec (rn + 1) ofs
+      One(R r) :: rec (ri + 1) rf ofs
   | None   =>
-      let ofs := align ofs (typealign ty) in
-      One(S Outgoing ofs ty) :: rec rn (ofs + (if Archi.ptr64 then 2 else typesize ty))
+      let ofs := align ofs (typesize ty) in
+      One(S Outgoing ofs ty)
+      :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty))
   end.
 
-Definition two_args (regs: list mreg) (rn: Z) (ofs: Z)
-                    (rec: Z -> Z -> list (rpair loc)) :=
-  let rn := align rn 2 in
-  match list_nth_z regs rn, list_nth_z regs (rn + 1) with
-  | Some r1, Some r2 =>
-      Twolong (R r2) (R r1) :: rec (rn + 2) ofs
-  | _, _ =>
-      let ofs := align ofs 2 in
-      Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) ::
-      rec rn (ofs + 2)
+Definition float_arg (va: bool) (ri rf ofs: Z) (ty: typ)
+                     (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match list_nth_z float_param_regs rf with
+  | Some r =>
+      if va then
+       (let ri' :=  (* reserve 1 or 2 aligned integer registers *)
+          if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2 in
+        if zle ri' 8 then
+            (* we have enough integer registers, put argument in FP reg
+               and fixup code will put it in one or two integer regs *)
+            One (R r) :: rec ri' (rf + 1) ofs
+        else
+            (* we are out of integer registers, pass argument on stack *)
+            let ofs := align ofs (typesize ty) in
+            One(S Outgoing ofs ty)
+            :: rec ri' rf (ofs + (if Archi.ptr64 then 2 else typesize ty)))
+      else
+        One (R r) :: rec ri (rf + 1) ofs
+  | None   =>
+      let ofs := align ofs (typesize ty) in
+      One(S Outgoing ofs ty)
+      :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty))
   end.
 
-Definition hybrid_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ)
-                      (rec: Z -> Z -> list (rpair loc)) :=
-  let rn := align rn 2 in
-  match list_nth_z regs rn with
-  | Some r =>
-      One (R r) :: rec (rn + 2) ofs
-  | None =>
+Definition split_long_arg (va: bool) (ri rf ofs: Z)
+                          (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  let ri := if va then align ri 2 else ri in
+  match list_nth_z int_param_regs ri, list_nth_z int_param_regs (ri + 1) with
+  | Some r1, Some r2 =>
+      Twolong (R r2) (R r1) :: rec (ri + 2) rf ofs
+  | Some r1, None =>
+      Twolong (S Outgoing ofs Tint) (R r1) :: rec (ri + 1) rf (ofs + 1)
+  | None, _ =>
       let ofs := align ofs 2 in
-      One (S Outgoing ofs ty) :: rec rn (ofs + 2)
+      Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) ::
+      rec ri rf (ofs + 2)
   end.
 
 Fixpoint loc_arguments_rec (va: bool)
-    (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) :=
+    (tyl: list typ) (ri rf ofs: Z) {struct tyl} : list (rpair loc) :=
   match tyl with
   | nil => nil
   | (Tint | Tany32) as ty :: tys =>
-      one_arg int_param_regs r ofs ty (loc_arguments_rec va tys)
+      (* pass in one integer register or on stack *)
+      int_arg ri rf ofs ty (loc_arguments_rec va tys)
   | Tsingle as ty :: tys =>
-      one_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+      (* pass in one FP register or on stack.
+         If vararg, reserve 1 integer register. *)
+      float_arg va ri rf ofs ty (loc_arguments_rec va tys)
   | Tlong as ty :: tys =>
-      if Archi.ptr64
-      then one_arg int_param_regs r ofs ty (loc_arguments_rec va tys)
-      else two_args int_param_regs r ofs  (loc_arguments_rec va tys)
+      if Archi.ptr64 then
+        (* pass in one integer register or on stack *)
+        int_arg ri rf ofs ty (loc_arguments_rec va tys)
+      else
+        (* pass in register pair or on stack; align register pair if vararg *)
+        split_long_arg va ri rf ofs(loc_arguments_rec va tys)
   | (Tfloat | Tany64) as ty :: tys =>
-      if va && negb Archi.ptr64
-      then hybrid_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
-      else one_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+      (* pass in one FP register or on stack.
+         If vararg, reserve 1 or 2 integer registers. *)
+      float_arg va ri rf ofs ty (loc_arguments_rec va tys)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
   when calling a function with signature [s].  *)
 
 Definition loc_arguments (s: signature) : list (rpair loc) :=
-  loc_arguments_rec s.(sig_cc).(cc_vararg) s.(sig_args) 0 0.
-
-(** [size_arguments s] returns the number of [Outgoing] slots used
-  to call a function with signature [s]. *)
-
-Definition max_outgoing_1 (accu: Z) (l: loc) : Z :=
-  match l with
-  | S Outgoing ofs ty => Z.max accu (ofs + typesize ty)
-  | _ => accu
-  end.
-
-Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z :=
-  match rl with
-  | One l => max_outgoing_1 accu l
-  | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2
-  end.
-
-Definition size_arguments (s: signature) : Z :=
-  List.fold_left max_outgoing_2 (loc_arguments s) 0.
+  loc_arguments_rec s.(sig_cc).(cc_vararg) s.(sig_args) 0 0 0.
 
 (** Argument locations are either non-temporary registers or [Outgoing]
   stack slots at nonnegative offsets. *)
@@ -295,90 +294,87 @@ Definition loc_argument_acceptable (l: loc) : Prop :=
   end.
 
 Lemma loc_arguments_rec_charact:
-  forall va tyl rn ofs p,
+  forall va tyl ri rf ofs p,
   ofs >= 0 ->
-  In p (loc_arguments_rec va tyl rn ofs) -> forall_rpair loc_argument_acceptable p.
+  In p (loc_arguments_rec va tyl ri rf ofs) -> forall_rpair loc_argument_acceptable p.
 Proof.
   set (OK := fun (l: list (rpair loc)) =>
              forall p, In p l -> forall_rpair loc_argument_acceptable p).
-  set (OKF := fun (f: Z -> Z -> list (rpair loc)) =>
-              forall rn ofs, ofs >= 0 -> OK (f rn ofs)).
-  set (OKREGS := fun (l: list mreg) => forall r, In r l -> is_callee_save r = false).
-  assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typealign ty) >= 0).
+  set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) =>
+              forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)).
+  assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0).
   { intros. 
-    assert (ofs <= align ofs (typealign ty)) by (apply align_le; apply typealign_pos).
+    assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos).
     omega. }
+  assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))).
+  { intros. eapply Z.divide_trans. apply typealign_typesize.
+    apply align_divides. apply typesize_pos. }
   assert (SK: (if Archi.ptr64 then 2 else 1) > 0).
   { destruct Archi.ptr64; omega. }
   assert (SKK: forall ty, (if Archi.ptr64 then 2 else typesize ty) > 0).
   { intros. destruct Archi.ptr64. omega. apply typesize_pos.  }
-  assert (A: forall regs rn ofs ty f,
-             OKREGS regs -> OKF f -> ofs >= 0 -> OK (one_arg regs rn ofs ty f)).
-  { intros until f; intros OR OF OO; red; unfold one_arg; intros.
-    destruct (list_nth_z regs rn) as [r|] eqn:NTH; destruct H.
-  - subst p; simpl. apply OR. eapply list_nth_z_in; eauto. 
+  assert (A: forall ri rf ofs ty f,
+             OKF f -> ofs >= 0 -> OK (int_arg ri rf ofs ty f)).
+  { intros until f; intros OF OO; red; unfold int_arg; intros.
+    destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; destruct H.
+  - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto. 
   - eapply OF; eauto. 
   - subst p; simpl. auto using align_divides, typealign_pos.
   - eapply OF; [idtac|eauto].
     generalize (AL ofs ty OO) (SKK ty); omega.
   }
-  assert (B: forall regs rn ofs f,
-             OKREGS regs -> OKF f -> ofs >= 0 -> OK (two_args regs rn ofs f)).
-  { intros until f; intros OR OF OO; unfold two_args.
-    set (rn' := align rn 2).
+  assert (B: forall va ri rf ofs ty f,
+             OKF f -> ofs >= 0 -> OK (float_arg va ri rf ofs ty f)).
+  { intros until f; intros OF OO; red; unfold float_arg; intros.
+    destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH.
+  - set (ri' := if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2) in *.
+    destruct va; [destruct (zle ri' 8)|idtac]; destruct H.
+    + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto.
+    + eapply OF; eauto.
+    + subst p; repeat split; auto.
+    + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega.
+    + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto.
+    + eapply OF; eauto.
+  - destruct H.
+    + subst p; repeat split; auto.
+    + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega.
+  }
+  assert (C: forall va ri rf ofs f,
+             OKF f -> ofs >= 0 -> OK (split_long_arg va ri rf ofs f)).
+  { intros until f; intros OF OO; unfold split_long_arg.
+    set (ri' := if va then align ri 2 else ri).
     set (ofs' := align ofs 2).
     assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto).
-    assert (DFL: OK (Twolong (S Outgoing (ofs' + 1) Tint) (S Outgoing ofs' Tint)
-                     :: f rn' (ofs' + 2))).
-    { red; simpl; intros. destruct H.
-    - subst p; simpl. 
-      repeat split; auto using Z.divide_1_l. omega.
-    - eapply OF; [idtac|eauto]. omega.
-    }
-    destruct (list_nth_z regs rn') as [r1|] eqn:NTH1;
-    destruct (list_nth_z regs (rn' + 1)) as [r2|] eqn:NTH2;
-    try apply DFL.
-    red; simpl; intros; destruct H.
-  - subst p; simpl. split; apply OR; eauto using list_nth_z_in.  
-  - eapply OF; [idtac|eauto]. auto.
+    destruct (list_nth_z int_param_regs ri') as [r1|] eqn:NTH1;
+    [destruct (list_nth_z int_param_regs (ri'+1)) as [r2|] eqn:NTH2 | idtac].
+  - red; simpl; intros; destruct H.
+    + subst p; split; apply CSI; eauto using list_nth_z_in.
+    + eapply OF; [idtac|eauto]. omega.
+  - red; simpl; intros; destruct H.
+    + subst p; split. split; auto using Z.divide_1_l. apply CSI; eauto using list_nth_z_in.
+    + eapply OF; [idtac|eauto]. omega.
+  - red; simpl; intros; destruct H.
+    + subst p; repeat split; auto using Z.divide_1_l. omega. 
+    + eapply OF; [idtac|eauto]. omega.
   }
-  assert (C: forall regs rn ofs ty f,
-             OKREGS regs -> OKF f -> ofs >= 0 -> typealign ty = 1 -> OK (hybrid_arg regs rn ofs ty f)).
-  { intros until f; intros OR OF OO OTY; unfold hybrid_arg; red; intros.
-    set (rn' := align rn 2) in *.
-    destruct (list_nth_z regs rn') as [r|] eqn:NTH; destruct H.
-  - subst p; simpl. apply OR. eapply list_nth_z_in; eauto. 
-  - eapply OF; eauto. 
-  - subst p; simpl. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l. 
-  - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); simpl; omega.
-  }
-  assert (D: OKREGS int_param_regs).
-  { red. decide_goal. }
-  assert (E: OKREGS float_param_regs).
-  { red. decide_goal. }
-
-  cut (forall va tyl rn ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl rn ofs)).
+  cut (forall va tyl ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl ri rf ofs)).
   unfold OK. eauto.
   induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl.
 - red; simpl; tauto.
 - destruct ty1.
 + (* int *) apply A; auto.
-+ (* float *) 
-  destruct (va && negb Archi.ptr64).
-  apply C; auto.
-  apply A; auto.
++ (* float *) apply B; auto.
 + (* long *)
   destruct Archi.ptr64.
   apply A; auto.
-  apply B; auto.
-+ (* single *)
-  apply A; auto.
-+ (* any32 *)
-  apply A; auto.
-+ (* any64 *)
-  destruct (va && negb Archi.ptr64).
   apply C; auto.
-  apply A; auto.
++ (* single *) apply B; auto.
++ (* any32 *) apply A; auto.
++ (* any64 *) apply B; auto.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -388,54 +384,14 @@ Proof.
   unfold loc_arguments; intros. eapply loc_arguments_rec_charact; eauto. omega.
 Qed.
 
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark fold_max_outgoing_above:
-  forall l n, fold_left max_outgoing_2 l n >= n.
-Proof.
-  assert (A: forall n l, max_outgoing_1 n l >= n).
-  { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
-  induction l; simpl; intros. 
-  - omega.
-  - eapply Zge_trans. eauto.
-    destruct a; simpl. apply A. eapply Zge_trans; eauto.
-Qed.
-
-Lemma size_arguments_above:
-  forall s, size_arguments s >= 0.
-Proof.
-  intros. apply fold_max_outgoing_above.
-Qed.
-
-Lemma loc_arguments_bounded:
-  forall (s: signature) (ofs: Z) (ty: typ),
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
-  ofs + typesize ty <= size_arguments s.
-Proof.
-  intros until ty.
-  assert (A: forall n l, n <= max_outgoing_1 n l).
-  { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
-  assert (B: forall p n,
-             In (S Outgoing ofs ty) (regs_of_rpair p) ->
-             ofs + typesize ty <= max_outgoing_2 n p).
-  { intros. destruct p; simpl in H; intuition; subst; simpl.
-  - xomega.
-  - eapply Z.le_trans. 2: apply A. xomega.
-  - xomega. }
-  assert (C: forall l n,
-             In (S Outgoing ofs ty) (regs_of_rpairs l) ->
-             ofs + typesize ty <= fold_left max_outgoing_2 l n).
-  { induction l; simpl; intros.
-  - contradiction.
-  - rewrite in_app_iff in H. destruct H.
-  + eapply Z.le_trans. eapply B; eauto. apply Z.ge_le. apply fold_max_outgoing_above.
-  + apply IHl; auto.
-  }
-  apply C. 
-Qed.
-
 Lemma loc_arguments_main:
   loc_arguments signature_main = nil.
 Proof.
   reflexivity.
 Qed.
+
+(** ** Normalization of function results *)
+
+(** No normalization needed. *)
+
+Definition return_value_needs_normalization (t: rettype) := false.
diff --git a/riscV/DuplicateOpcodeHeuristic.ml b/riscV/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..2ec314c1
--- /dev/null
+++ b/riscV/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,27 @@
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c | Ccompfs c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c | Cnotcompfs c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
diff --git a/riscV/Op.v b/riscV/Op.v
index bb04f786..a71696c7 100644
--- a/riscV/Op.v
+++ b/riscV/Op.v
@@ -666,6 +666,36 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct (eval_condition cond vl m)... destruct b...
 Qed.
 
+
+Definition is_trapping_op (op : operation) :=
+  match op with
+  | Odiv | Odivl | Odivu | Odivlu
+  | Omod | Omodl | Omodu | Omodlu
+  | Oshrximm _ | Oshrxlimm _
+  | Ointoffloat | Ointuoffloat
+  | Ointofsingle | Ointuofsingle
+  | Olongoffloat | Olonguoffloat
+  | Olongofsingle | Olonguofsingle
+  | Osingleofint | Osingleofintu
+  | Osingleoflong | Osingleoflongu
+  | Ofloatofint | Ofloatofintu
+  | Ofloatoflong | Ofloatoflongu => true
+  | _ => false
+  end.
+
+Lemma is_trapping_op_sound:
+  forall op vl sp m,
+    op <> Omove ->
+    is_trapping_op op = false ->
+    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    eval_operation genv sp op vl m <> None.
+Proof.
+  destruct op; intros; simpl in *; try congruence.
+  all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+  all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+  all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+  all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
@@ -1159,6 +1189,20 @@ Proof.
   apply Val.offset_ptr_inject; auto. 
 Qed.
 
+Lemma eval_addressing_inj_none:
+  forall addr sp1 vl1 sp2 vl2,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = None ->
+  eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+  intros until vl2. intros Hglobal Hinjsp Hinjvl.
+  destruct addr; simpl in *;
+  inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
+Qed.
 End EVAL_COMPAT.
 
 (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
@@ -1265,6 +1309,18 @@ Proof.
   destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
 Qed.
 
+Lemma eval_addressing_lessdef_none:
+  forall sp addr vl1 vl2,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = None ->
+  eval_addressing genv sp addr vl2 = None.
+Proof.
+  intros until vl2. intros Hlessdef Heval1.
+  destruct addr; simpl in *;
+  inv Hlessdef; trivial; try discriminate;
+  inv H0; trivial; try discriminate;
+  inv H2; trivial; try discriminate.
+Qed.
 End EVAL_LESSDEF.
 
 (** Compatibility of the evaluation functions with memory injections. *)
@@ -1317,6 +1373,20 @@ Proof.
   econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
 Qed.
 
+
+Lemma eval_addressing_inject_none:
+  forall addr vl1 vl2,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+  eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
 Lemma eval_operation_inject:
   forall op vl1 vl2 v1 m1 m2,
   Val.inject_list f vl1 vl2 ->
diff --git a/riscV/SelectOpproof.v b/riscV/SelectOpproof.v
index 52a535fb..7f2014dc 100644
--- a/riscV/SelectOpproof.v
+++ b/riscV/SelectOpproof.v
@@ -767,7 +767,7 @@ Qed.
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
   red; intros until x. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm. omega.
 Qed.
 
 Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
@@ -780,7 +780,7 @@ Qed.
 Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
 Proof.
   red; intros until x. unfold cast8unsigned.
-  rewrite Val.zero_ext_and. apply eval_andimm. compute; auto.
+  rewrite Val.zero_ext_and. apply eval_andimm. omega.
 Qed.
 
 Theorem eval_intoffloat:
diff --git a/runtime/Makefile b/runtime/Makefile
index 0a18e0f8..3b1cabc4 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -28,6 +28,8 @@ OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o vararg.o\
 		 i64_shl.o i64_shr.o
 # Missing: i64_utod.o i64_utof.o i64_stod.o i64_stof.o
 DOMAKE:=$(shell (cd mppa_k1c && make))
+else ifeq ($(ARCH),aarch64)
+OBJS=vararg.o
 else
 OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
   i64_shr.o i64_smod.o i64_stod.o i64_stof.o \
diff --git a/runtime/aarch64/sysdeps.h b/runtime/aarch64/sysdeps.h
new file mode 100644
index 00000000..0cee9ae3
--- /dev/null
+++ b/runtime/aarch64/sysdeps.h
@@ -0,0 +1,45 @@
+// *****************************************************************
+//
+//               The Compcert verified compiler
+//
+//           Xavier Leroy, Collège de France and INRIA Paris
+//
+// Copyright (c) Institut National de Recherche en Informatique et
+//  en Automatique.
+//	
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of the <organization> nor the
+//       names of its contributors may be used to endorse or promote products
+//       derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+// HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// *********************************************************************
+
+// System dependencies
+
+#define FUNCTION(f) \
+	.text; \
+        .balign 16; \
+	.globl f; \
+f:
+
+#define ENDFUNCTION(f) \
+	.type f, @function; .size f, . - f
+
diff --git a/runtime/aarch64/vararg.S b/runtime/aarch64/vararg.S
new file mode 100644
index 00000000..b7347d65
--- /dev/null
+++ b/runtime/aarch64/vararg.S
@@ -0,0 +1,109 @@
+// *****************************************************************
+//
+//               The Compcert verified compiler
+//
+//           Xavier Leroy, Collège de France and INRIA Paris
+//
+// Copyright (c) Institut National de Recherche en Informatique et
+//  en Automatique.
+//	
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of the <organization> nor the
+//       names of its contributors may be used to endorse or promote products
+//       derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+// HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// *********************************************************************
+
+// Helper functions for variadic functions <stdarg.h>.  AArch64 version.
+
+#include "sysdeps.h"	
+
+// typedef struct __va_list {
+//   void *__stack;             // next stack parameter
+//   void *__gr_top;            // top of the save area for int regs
+//   void *__vr_top;            // top of the save area for float regs
+//   int__gr_offs;              // offset from gr_top to next int reg
+//   int__vr_offs;              // offset from gr_top to next FP reg
+// }
+// typedef struct __va_list va_list;   // struct passed by reference
+// unsigned int __compcert_va_int32(va_list * ap);
+// unsigned long long __compcert_va_int64(va_list * ap);
+// double __compcert_va_float64(va_list * ap);
+
+FUNCTION(__compcert_va_int32)
+        ldr     w1, [x0, #24]   // w1 = gr_offs
+        cbz     w1, 1f
+  // gr_offs is not zero: load from int save area and update gr_offs
+        ldr     x2, [x0, #8]    // x2 = gr_top
+        ldr     w2, [x2, w1, sxtw] // w2 = the next integer
+        add     w1, w1, #8
+        str     w1, [x0, #24]   // update gr_offs
+        mov     w0, w2
+	ret
+  // gr_offs is zero: load from stack save area and update stack pointer
+1:      ldr     x1, [x0, #0]    // x1 = stack
+        ldr     w2, [x1, #0]    // w2 = the next integer
+        add     x1, x1, #8
+        str     x1, [x0, #0]    // update stack
+        mov     w0, w2
+	ret
+ENDFUNCTION(__compcert_va_int32)
+
+FUNCTION(__compcert_va_int64)
+        ldr     w1, [x0, #24]   // w1 = gr_offs
+        cbz     w1, 1f
+  // gr_offs is not zero: load from int save area and update gr_offs
+        ldr     x2, [x0, #8]    // x2 = gr_top
+        ldr     x2, [x2, w1, sxtw] // w2 = the next long integer
+        add     w1, w1, #8
+        str     w1, [x0, #24]   // update gr_offs
+        mov     x0, x2
+	ret
+  // gr_offs is zero: load from stack save area and update stack pointer
+1:      ldr     x1, [x0, #0]    // x1 = stack
+        ldr     x2, [x1, #0]    // w2 = the next long integer
+        add     x1, x1, #8
+        str     x1, [x0, #0]    // update stack
+        mov     x0, x2
+	ret
+ENDFUNCTION(__compcert_va_int64)
+
+FUNCTION(__compcert_va_float64)
+        ldr     w1, [x0, #28]   // w1 = vr_offs
+        cbz     w1, 1f
+  // vr_offs is not zero: load from float save area and update vr_offs
+        ldr     x2, [x0, #16]   // x2 = vr_top
+        ldr     d0, [x2, w1, sxtw] // d0 = the next float
+        add     w1, w1, #16
+        str     w1, [x0, #28]   // update vr_offs
+	ret
+  // gr_offs is zero: load from stack save area and update stack pointer
+1:      ldr     x1, [x0, #0]    // x1 = stack
+        ldr     d0, [x1, #0]    // d0 = the next float
+        add     x1, x1, #8
+        str     x1, [x0, #0]    // update stack
+	ret
+ENDFUNCTION(__compcert_va_float64)
+
+// Right now we pass structs by reference.  This is not ABI conformant.
+FUNCTION(__compcert_va_composite)
+	b	__compcert_va_int64
+ENDFUNCTION(__compcert_va_composite)
diff --git a/runtime/arm/i64_stof.S b/runtime/arm/i64_stof.S
index bcfa471c..11e00a2a 100644
--- a/runtime/arm/i64_stof.S
+++ b/runtime/arm/i64_stof.S
@@ -39,12 +39,11 @@
 @@@ Conversion from signed 64-bit integer to single float
 
 FUNCTION(__compcert_i64_stof)
-    @ Check whether -2^53 <= X < 2^53
-        ASR r2, Reg0HI, #21
-        ASR r3, Reg0HI, #31     @ (r2,r3) = X >> 53
+   @ Check whether -2^53 <= X < 2^53
+        ASR r2, Reg0HI, #21     @ r2 = high 32 bits of X >> 53
+   @ -2^53 <= X < 2^53 iff r2 is -1 or 0, that is, iff r2 + 1 is 0 or 1
         adds r2, r2, #1
-        adc r3, r3, #0          @ (r2,r3) = X >> 53 + 1
-        cmp r3, #2
+        cmp r2, #2
         blo 1f
    @ X is large enough that double rounding can occur.
    @ Avoid it by nudging X away from the points where double rounding
diff --git a/runtime/include/math.h b/runtime/include/math.h
index 060968c8..01b8d8d8 100644
--- a/runtime/include/math.h
+++ b/runtime/include/math.h
@@ -1,8 +1,12 @@
 #ifndef _COMPCERT_MATH_H
 #define _COMPCERT_MATH_H
 
+#ifdef __K1C__
+
 #define isfinite(__y) (fpclassify((__y)) >= FP_ZERO)
 
+#include_next <math.h>
+
 #ifndef COMPCERT_NO_FP_MACROS
 #define fmin(x, y) __builtin_fmin((x),(y))
 #define fmax(x, y) __builtin_fmax((x),(y))
@@ -14,5 +18,9 @@
 #define fmaf(x, y, z) __builtin_fmaf((x),(y),(z))
 #endif
 
+#else
+
 #include_next <math.h>
+
+#endif
 #endif
diff --git a/runtime/mppa_k1c/vararg.S b/runtime/mppa_k1c/vararg.s
index 9e23e0b3..65c1eab8 100644
--- a/runtime/mppa_k1c/vararg.S
+++ b/runtime/mppa_k1c/vararg.s
@@ -1,7 +1,7 @@
 
-// typedef void * va_list;
-// unsigned int __compcert_va_int32(va_list * ap);
-// unsigned long long __compcert_va_int64(va_list * ap);
+# typedef void * va_list;
+# unsigned int __compcert_va_int32(va_list * ap);
+# unsigned long long __compcert_va_int64(va_list * ap);
 
 	.text
 	.balign 2
diff --git a/runtime/powerpc/i64_dtos.s b/runtime/powerpc/i64_dtos.s
new file mode 100644
index 00000000..85c60b27
--- /dev/null
+++ b/runtime/powerpc/i64_dtos.s
@@ -0,0 +1,100 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Conversion from double float to signed long	
+
+        .balign 16
+        .globl __compcert_i64_dtos
+__compcert_i64_dtos:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        srawi r10, r3, 31       # save sign of double in r10
+ # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+ # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, abs(double) is < 1.0
+        blt 1f
+        cmpwi r5, 11            # if EXP >= 63 - 52, abs(double) is >= 2^63
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        cmpwi r5, 0
+        blt 3f
+  # if EXP >= 0, shift left by EXP.  Note that EXP < 11.
+        subfic r6, r5, 32       # r6 = 32 - EXP
+        slw r3, r3, r5
+        srw r0, r4, r6
+        or r3, r3, r0
+        slw r4, r4, r5
+        b 4f
+  # if EXP < 0, shift right by -EXP.  Note that -EXP <= 52 but can be >= 32.
+3:      subfic r5, r5, 0        # r5 = -EXP = shift amount
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32  (see i64_shr.s)
+        srw r4, r4, r5
+        slw r0, r3, r6
+        or r4, r4, r0
+        srw r0, r3, r7
+        or r4, r4, r0
+        srw r3, r3, r5
+  # apply sign to result	
+4:      xor r4, r4, r10
+        xor r3, r3, r10
+        subfc r4, r10, r4
+        subfe r3, r10, r3
+        blr
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      li r4, -1               # result is MAX_SINT or MIN_SINT
+        bge 5f                  # depending on sign
+        li r4, -1               # result is MAX_SINT =  0x7FFF_FFFF
+        srwi r3, r4, 1
+        blr
+5:      lis r3, 0x8000          # result is MIN_SINT = 0x8000_0000
+        li r4, 0
+        blr
+        .type __compcert_i64_dtos, @function
+        .size __compcert_i64_dtos, .-__compcert_i64_dtos
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_dtou.s b/runtime/powerpc/i64_dtou.s
new file mode 100644
index 00000000..67a721d4
--- /dev/null
+++ b/runtime/powerpc/i64_dtou.s
@@ -0,0 +1,92 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Conversion from double float to unsigned long	
+
+        .balign 16
+        .globl __compcert_i64_dtou
+__compcert_i64_dtou:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        cmpwi r3, 0             # is double < 0?
+        blt 1f                  # then it converts to 0
+  # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+  # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, double is < 1.0
+        blt 1f
+        cmpwi r5, 12            # if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        cmpwi r5, 0
+        blt 3f
+  # if EXP >= 0, shift left by EXP.  Note that EXP < 12.
+        subfic r6, r5, 32       # r6 = 32 - EXP
+        slw r3, r3, r5
+        srw r0, r4, r6
+        or r3, r3, r0
+        slw r4, r4, r5
+        blr
+  # if EXP < 0, shift right by -EXP.  Note that -EXP <= 52 but can be >= 32.
+3:      subfic r5, r5, 0        # r5 = -EXP = shift amount
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32 (see i64_shr.s)
+        srw r4, r4, r5
+        slw r0, r3, r6
+        or r4, r4, r0
+        srw r0, r3, r7
+        or r4, r4, r0
+        srw r3, r3, r5
+        blr
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      li r3, -1               # result is MAX_UINT
+        li r4, -1
+        blr
+        .type __compcert_i64_dtou, @function
+        .size __compcert_i64_dtou, .-__compcert_i64_dtou
+
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_sar.s b/runtime/powerpc/i64_sar.s
new file mode 100644
index 00000000..c7da448f
--- /dev/null
+++ b/runtime/powerpc/i64_sar.s
@@ -0,0 +1,60 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+# Shift right signed	
+	
+        .balign 16
+        .globl __compcert_i64_sar
+__compcert_i64_sar:
+        andi. r5, r5, 63        # take amount modulo 64
+        cmpwi r5, 32
+        bge 1f                  # amount < 32?
+        subfic r6, r5, 32       # r6 = 32 - amount
+        srw r4, r4, r5          # RH = XH >>s amount
+        slw r0, r3, r6          # RL = XL >>u amount | XH << (32 - amount)
+        or r4, r4, r0
+        sraw r3, r3, r5
+        blr
+1:      addi r6, r5, -32        # amount >= 32
+        sraw r4, r3, r6         # RL = XH >>s (amount - 32)
+        srawi r3, r3, 31        # RL = sign extension of XH
+        blr
+        .type __compcert_i64_sar, @function
+        .size __compcert_i64_sar, .-__compcert_i64_sar
+
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_sdiv.s b/runtime/powerpc/i64_sdiv.s
new file mode 100644
index 00000000..9787ea3b
--- /dev/null
+++ b/runtime/powerpc/i64_sdiv.s
@@ -0,0 +1,71 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Signed division	
+	
+        .balign 16
+        .globl __compcert_i64_sdiv
+__compcert_i64_sdiv:
+	mflr r0
+        stw r0, 4(r1)           # save return address in caller's frame
+        xor r0, r3, r5          # compute sign of result (top bit)
+	mtctr r0                # save it in CTR (why not?)
+        srawi r0, r3, 31        # take absolute value of N
+        xor r4, r4, r0          # (i.e.  N = N ^ r0 - r0,
+        xor r3, r3, r0          #  where r0 = 0 if N >= 0 and r0 = -1 if N < 0)
+        subfc r4, r0, r4
+        subfe r3, r0, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0          # (same trick)
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __compcert_i64_udivmod        # do unsigned division
+        lwz r0, 4(r1)
+        mtlr r0                 # restore return address
+        mfctr r0
+        srawi r0, r0, 31        # apply expected sign to quotient
+        xor r6, r6, r0          # RES = Q if CTR >= 0, -Q if CTR < 0
+        xor r5, r5, r0
+        subfc r4, r0, r6
+        subfe r3, r0, r5
+        blr
+        .type __compcert_i64_sdiv, @function
+        .size __compcert_i64_sdiv, .-__compcert_i64_sdiv
+	
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_shl.s b/runtime/powerpc/i64_shl.s
new file mode 100644
index 00000000..f6edb6c2
--- /dev/null
+++ b/runtime/powerpc/i64_shl.s
@@ -0,0 +1,64 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+# Shift left
+	
+        .balign 16
+        .globl __compcert_i64_shl
+__compcert_i64_shl:
+# On PowerPC, shift instructions with amount mod 64 >= 32 return 0
+# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32))
+# lo = lo << amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0
+# if 32 <= amount < 64:
+#    lo << amount == 0
+#    (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        slw r3, r3, r5
+        srw r0, r4, r6
+        or r3, r3, r0
+        slw r0, r4, r7
+        or r3, r3, r0
+        slw r4, r4, r5
+        blr
+        .type __compcert_i64_shl, @function
+        .size __compcert_i64_shl, .-__compcert_i64_shl
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_shr.s b/runtime/powerpc/i64_shr.s
new file mode 100644
index 00000000..b634aafd
--- /dev/null
+++ b/runtime/powerpc/i64_shr.s
@@ -0,0 +1,65 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+# Shift right unsigned
+	
+        .balign 16
+        .globl __compcert_i64_shr
+__compcert_i64_shr:
+# On PowerPC, shift instructions with amount mod 64 >= 32 return 0
+# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32))
+# hi = hi >> amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0
+# if 32 <= amount < 64:
+#    hi >> amount == 0
+#    (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        srw r4, r4, r5
+        slw r0, r3, r6
+        or r4, r4, r0
+        srw r0, r3, r7
+        or r4, r4, r0
+        srw r3, r3, r5
+        blr
+        .type __compcert_i64_shr, @function
+        .size __compcert_i64_shr, .-__compcert_i64_shr
+	
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_smod.s b/runtime/powerpc/i64_smod.s
new file mode 100644
index 00000000..6b4e1f89
--- /dev/null
+++ b/runtime/powerpc/i64_smod.s
@@ -0,0 +1,70 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+## Signed remainder	
+	
+        .balign 16
+        .globl __compcert_i64_smod
+__compcert_i64_smod:
+	mflr r0
+        stw r0, 4(r1)           # save return address in caller's frame
+	mtctr r3                # save sign of result in CTR (sign of N)
+        srawi r0, r3, 31        # take absolute value of N
+        xor r4, r4, r0          # (i.e.  N = N ^ r0 - r0,
+        xor r3, r3, r0          #  where r0 = 0 if N >= 0 and r0 = -1 if N < 0)
+        subfc r4, r0, r4
+        subfe r3, r0, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0          # (same trick)
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __compcert_i64_udivmod        # do unsigned division
+        lwz r0, 4(r1)
+        mtlr r0                 # restore return address
+        mfctr r0
+        srawi r0, r0, 31        # apply expected sign to remainder
+        xor r4, r4, r0          # RES = R if CTR >= 0, -Q if CTR < 0
+        xor r3, r3, r0
+        subfc r4, r0, r4
+        subfe r3, r0, r3
+        blr
+        .type __compcert_i64_smod, @function
+        .size __compcert_i64_smod, .-__compcert_i64_smod
+
+        
+\ No newline at end of file
diff --git a/runtime/powerpc/i64_smulh.s b/runtime/powerpc/i64_smulh.s
new file mode 100644
index 00000000..73393fce
--- /dev/null
+++ b/runtime/powerpc/i64_smulh.s
@@ -0,0 +1,80 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris
+#
+# Copyright (c) 2016 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Signed multiply-high
+
+# Hacker's Delight section 8.3:
+# - compute high 64 bits of the unsigned product X * Y  (see i64_umulh.S)
+# - subtract X if Y < 0
+# - subtract Y if X < 0
+
+        .balign 16
+        .globl __compcert_i64_smulh
+__compcert_i64_smulh:
+# r7:r8:r9 accumulate bits 127:32 of the full unsigned product
+        mulhwu  r9, r4, r6        # r9 = high half of XL.YL
+        mullw   r0, r4, r5        # r0 = low half of XL.YH
+        addc    r9, r9, r0
+        mulhwu  r0, r4, r5        # r0 = high half of XL.YH
+        addze   r8, r0
+        mullw   r0, r3, r6        # r0 = low half of XH.YL
+        addc    r9, r9, r0
+        mulhwu  r0, r3, r6        # r0 = high half of XH.YL
+        adde    r8, r8, r0
+        li      r7, 0
+        addze   r7, r7
+        mullw   r0, r3, r5        # r0 = low half of XH.YH
+        addc    r8, r8, r0
+        mulhwu  r0, r3, r5        # r0 = high half of XH.YH
+        adde    r7, r7, r0
+# Here r7:r8 contains the high 64 bits of the unsigned product.
+# Now, test signs and subtract if needed
+        srawi   r0, r3, 31        # r0 = -1 if X < 0, r0 = 0 if X >= 0
+        srawi   r9, r5, 31        # r9 = -1 if Y < 0, r9 = 0 if Y >= 0
+        and     r3, r3, r9        # set X = 0 if Y >= 0
+        and     r4, r4, r9
+        and     r5, r5, r0        # set Y = 0 if X >= 0
+        and     r6, r6, r0
+        subfc   r8, r4, r8        # subtract X
+        subfe   r7, r3, r7
+        subfc   r4, r6, r8        # subtract Y
+        subfe   r3, r5, r7
+        blr
+        .type __compcert_i64_smulh, @function
+        .size __compcert_i64_smulh, .-__compcert_i64_smulh
+
diff --git a/runtime/powerpc/i64_stod.s b/runtime/powerpc/i64_stod.s
new file mode 100644
index 00000000..0c1ab720
--- /dev/null
+++ b/runtime/powerpc/i64_stod.s
@@ -0,0 +1,67 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+### Conversion from signed long to double float	
+
+        .balign 16
+        .globl __compcert_i64_stod
+__compcert_i64_stod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)           # 0(r1) = 2^52 + (double) XL
+        stw r5, 8(r1)
+        stw r6, 12(r1)          # 8(r1) = 2^52
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL (unsigned) as a double
+        lis r5, 0x4530
+        lis r6, 0x8000
+        stw r5, 0(r1)           # 0(r1) = 2^84 + ((double)XH - 2^31) * 2^32
+        add r3, r3, r6
+        stw r3, 4(r1)
+        stw r5, 8(r1)           # 8(r1) = 2^84 + 2^31 * 2^32
+        stw r6, 12(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH (signed) * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __compcert_i64_stod, @function
+        .size __compcert_i64_stod, .-__compcert_i64_stod
+
diff --git a/runtime/powerpc/i64_stof.s b/runtime/powerpc/i64_stof.s
new file mode 100644
index 00000000..ea23a1c8
--- /dev/null
+++ b/runtime/powerpc/i64_stof.s
@@ -0,0 +1,67 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Conversion from signed long to single float	
+
+        .balign 16
+        .globl __compcert_i64_stof
+__compcert_i64_stof:
+	mflr r9
+   # Check whether -2^53 <= X < 2^53	
+        srawi r5, r3, 21        # r5 = high 32 bits of X >> 53
+   # -2^53 <= X < 2^53 iff r5 is -1 or 0, that is, iff r5 + 1 is 0 or 1
+        addi r5, r5, 1
+        cmplwi r5, 2
+        blt 1f
+   # X is large enough that double rounding can occur.
+   # Avoid it by nudging X away from the points where double rounding
+   # occurs (the "round to odd" technique)
+        rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF      # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r5 are 0
+        or r4, r4, r5           # correct bit number 12 of X
+        rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
+   # Convert to double, then round to single	
+1:      bl __compcert_i64_stod
+        mtlr r9
+        frsp f1, f1
+        blr
+        .type __compcert_i64_stof, @function
+        .size __compcert_i64_stof, .-__compcert_i64_stof
+	
diff --git a/runtime/powerpc/i64_udiv.s b/runtime/powerpc/i64_udiv.s
new file mode 100644
index 00000000..e2da855a
--- /dev/null
+++ b/runtime/powerpc/i64_udiv.s
@@ -0,0 +1,54 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Unsigned division	
+
+        .balign 16
+        .globl __compcert_i64_udiv
+__compcert_i64_udiv:
+	mflr r0
+        stw r0, 4(r1)           # save return address in caller's frame
+        bl __compcert_i64_udivmod        # unsigned divide
+        lwz r0, 4(r1)
+        mtlr r0                 # restore return address
+        mr r3, r5               # result = quotient
+        mr r4, r6
+        blr
+        .type __compcert_i64_udiv, @function
+        .size __compcert_i64_udiv, .-__compcert_i64_udiv
+
diff --git a/runtime/powerpc/i64_udivmod.s b/runtime/powerpc/i64_udivmod.s
new file mode 100644
index 00000000..e81c6cef
--- /dev/null
+++ b/runtime/powerpc/i64_udivmod.s
@@ -0,0 +1,234 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+	.text
+	
+# Unsigned division and modulus
+
+# This function computes both the quotient and the remainder of two
+# unsigned 64-bit integers.
+
+# Input:  numerator N in (r3,r4), divisor D in (r5,r6)
+# Output: quotient Q in (r5,r6),  remainder R in (r3,r4)
+# Destroys: all integer caller-save registers
+	
+	.globl  __compcert_i64_udivmod
+	.balign 16
+__compcert_i64_udivmod:
+        cmplwi  r5, 0           # DH == 0 ?
+	stwu    r1, -32(r1)
+        mflr    r0
+        stw     r0, 8(r1)
+        stw     r31, 12(r1)
+        beq     1f
+# The general case	
+        stw     r30, 16(r1)
+        stw     r29, 20(r1)
+        stw     r28, 24(r1)
+        mr      r28, r3         # Save N in (r28, r29)
+        mr      r29, r4
+        mr      r30, r5         # Save D in (r30, r31)
+        mr      r31, r6 
+  # Scale N and D down, giving N' and D', such that 2^31 <= D' < 2^32
+        cntlzw  r7, r5          # r7 = leading zeros in DH = 32 - shift amount
+        subfic  r8, r7, 32      # r8 = shift amount
+	slw     r0, r3, r7      # N' = N >> shift amount
+        srw     r3, r3, r8
+        srw     r4, r4, r8
+        or      r4, r4, r0
+	slw     r0, r5, r7      # D' = D >> shift amount
+        srw     r6, r6, r8
+        or      r5, r6, r0
+  # Divide N' by D' to get an approximate quotient Q
+        bl      __compcert_i64_udiv6432  # r3 = quotient, r4 = remainder
+        mr      r6, r3          # low half of quotient Q
+        li      r5, 0           # high half of quotient is 0
+  # Tentative quotient is either correct or one too high
+  # Compute Q * D in (r7, r8)
+4:      mullw   r7, r6, r30     # r7 = Q * DH
+        mullw   r8, r6, r31     # r8 = low 32 bits of Q * DL
+        mulhwu  r0, r6, r31     # r0 = high 32 bits of Q * DL
+        addc    r7, r7, r0
+        subfe.  r0, r0, r0      # test carry: EQ iff carry
+        beq     2f              # handle overflow case
+   # Compute R = N - Q * D, with borrow
+        subfc   r4, r8, r29
+        subfe   r3, r7, r28
+        subfe.  r0, r0, r0      # test borrow: EQ iff no borrow
+        beq     3f              # no borrow: N >= Q * D, we are good
+        addi    r6, r6, -1      # borrow: adjust Q down by 1
+        addc    r4, r4, r31     # and R up by D
+        adde    r3, r3, r30
+   # Finished	
+3:      lwz     r0, 8(r1)
+        mtlr    r0
+        lwz     r31, 12(r1)
+        lwz     r30, 16(r1)
+        lwz     r29, 20(r1)
+        lwz     r28, 24(r1)
+        addi    r1, r1, 32
+        blr
+   # Special case when Q * D overflows
+2:      addi    r6, r6, -1      # adjust Q down by 1
+        b       4b              # and redo computation and check of remainder
+	.balign 16
+# Special case 64 bits divided by 32 bits
+1:      cmplwi  r3, 0           # NH == 0?
+        beq     4f
+        divwu   r31, r3, r6     # Divide NH by DL, quotient QH in r31
+        mullw   r0, r31, r6
+        subf    r3, r0, r3      # NH is remainder of this division
+        mr      r5, r6
+        bl      __compcert_i64_udiv6432  # divide NH : NL by DL
+        mr      r5, r31         # high word of quotient
+        mr      r6, r3          # low word of quotient
+                                # r4 contains low word of remainder
+        li      r3, 0           # high word of remainder = 0
+	lwz     r0, 8(r1)
+        mtlr    r0
+        lwz     r31, 12(r1)
+        addi    r1, r1, 32
+        blr
+	.balign 16
+# Special case 32 bits divided by 32 bits
+4:      mr      r0, r6
+	divwu   r6, r4, r6      # low word of quotient
+	li      r5, 0           # high word of quotient is 0
+        mullw   r0, r6, r0
+        subf    r4, r0, r4      # low word of remainder
+        li      r3, 0           # high word of remainder is 0
+        addi    r1, r1, 32
+        blr
+        
+        .type __compcert_i64_udivmod, @function
+        .size __compcert_i64_udivmod, .-__compcert_i64_udivmod
+
+# Auxiliary division function: 64 bit integer divided by 32 bit integer	
+# Not exported
+# Input:  numerator N in (r3,r4), divisor D in r5
+# Output: quotient Q in r3, remainder R in r4
+# Destroys: all integer caller-save registers
+# Assumes: high word of N is less than D	
+        
+	.balign 16
+__compcert_i64_udiv6432: 
+# Algorithm 9.3 from Hacker's Delight, section 9.4	
+# Initially: u1 in r3, u0 in r4, v in r5
+#   s = __builtin_clz(v);
+	cntlzw  r6, r5          # s in r6
+#   v = v << s;
+        slw     r5, r5, r6
+#   vn1 = v >> 16;              # vn1 in r7
+        srwi    r7, r5, 16
+#   vn0 = v & 0xFFFF;           # vn0 in r8
+        rlwinm  r8, r5, 0, 16, 31
+#   un32 = (u1 << s) | (u0 >> 32 - s);
+	subfic  r0, r6, 32
+        srw     r0, r4, r0
+        slw     r3, r3, r6     # u1 dies, un32 in r3
+        or      r3, r3, r0
+#   un10 = u0 << s;
+        slw     r4, r4, r6     # u0 dies, un10 in r4
+#   un1 = un10 >> 16;
+        srwi    r9, r4, 16     # un1 in r9
+#   un0 = un10 & 0xFFFF;
+        rlwinm  r4, r4, 0, 16, 31 # un10 dies, un0 in r4
+#   q1 = un32/vn1;
+        divwu   r10, r3, r7    # q in r10
+#   rhat = un32 - q1*vn1;
+        mullw   r0, r10, r7
+        subf    r11, r0, r3    # rhat in r11
+#  again1:
+1:      
+#   if (q1 >= b || q1*vn0 > b*rhat + un1) {
+        cmplwi  r10, 0xFFFF
+        bgt     2f
+        mullw   r0, r10, r8
+        slwi    r12, r11, 16
+        add     r12, r12, r9
+        cmplw   r0, r12
+        ble     3f
+2:      
+#     q1 = q1 - 1;
+        addi    r10, r10, -1
+#     rhat = rhat + vn1;
+        add     r11, r11, r7
+#     if (rhat < b) goto again1;}
+        cmplwi  r11, 0xFFFF
+        ble     1b
+3:      
+#   un21 = un32*b + un1 - q1*v;
+        slwi    r0, r3, 16     # un32 dies
+        add     r9, r0, r9     # un1 dies
+        mullw   r0, r10, r5
+        subf    r9, r0, r9     # un21 in r9
+#   q0 = un21/vn1;
+	divwu   r3, r9, r7     # q0 in r3
+#   rhat = un21 - q0*vn1;
+        mullw   r0, r3, r7
+        subf    r11, r0, r9    # rhat in r11
+# again2:
+4:      
+#   if (q0 >= b || q0*vn0 > b*rhat + un0) {
+        cmplwi  r3, 0xFFFF
+        bgt     5f
+        mullw   r0, r3, r8
+        slwi    r12, r11, 16
+        add     r12, r12, r4
+        cmplw   r0, r12
+        ble     6f
+5:      
+#     q0 = q0 - 1;
+	addi    r3, r3, -1
+#     rhat = rhat + vn1;
+        add     r11, r11, r7
+#     if (rhat < b) goto again2;}
+	cmplwi  r11, 0xFFFF
+        ble     4b
+6:      
+#   remainder = (un21*b + un0 - q0*v) >> s;
+        slwi    r0, r9, 16
+        add     r4, r0, r4  # un0 dies, remainder in r4
+        mullw   r0, r3, r5
+        subf    r4, r0, r4
+        srw     r4, r4, r6
+#   quotient = q1*b + q0;
+        slwi    r0, r10, 16
+        add     r3, r0, r3
+        blr
+
+	.type	__compcert_i64_udiv6432, @function
+	.size	__compcert_i64_udiv6432,.-__compcert_i64_udiv6432
diff --git a/runtime/powerpc/i64_umod.s b/runtime/powerpc/i64_umod.s
new file mode 100644
index 00000000..bf8d6121
--- /dev/null
+++ b/runtime/powerpc/i64_umod.s
@@ -0,0 +1,47 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Unsigned modulus
+
+        .balign 16
+        .globl __compcert_i64_umod
+__compcert_i64_umod:
+        b __compcert_i64_udivmod
+        .type __compcert_i64_umod, @function
+        .size __compcert_i64_umod, .-__compcert_i64_umod
+
diff --git a/runtime/powerpc/i64_umulh.s b/runtime/powerpc/i64_umulh.s
new file mode 100644
index 00000000..53b72948
--- /dev/null
+++ b/runtime/powerpc/i64_umulh.s
@@ -0,0 +1,65 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris
+#
+# Copyright (c) 2016 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Unsigned multiply-high
+
+# X * Y = 2^64 XH.YH + 2^32 (XH.YL + XL.YH) + XL.YL
+
+        .balign 16
+        .globl __compcert_i64_umulh
+__compcert_i64_umulh:
+# r7:r8:r9 accumulate bits 127:32 of the full product
+        mulhwu  r9, r4, r6        # r9 = high half of XL.YL
+        mullw   r0, r4, r5        # r0 = low half of XL.YH
+        addc    r9, r9, r0
+        mulhwu  r0, r4, r5        # r0 = high half of XL.YH
+        addze   r8, r0
+        mullw   r0, r3, r6        # r0 = low half of XH.YL
+        addc    r9, r9, r0
+        mulhwu  r0, r3, r6        # r0 = high half of XH.YL
+        adde    r8, r8, r0
+        li      r7, 0
+        addze   r7, r7
+        mullw   r0, r3, r5        # r0 = low half of XH.YH
+        addc    r4, r8, r0
+        mulhwu  r0, r3, r5        # r0 = high half of XH.YH
+        adde    r3, r7, r0
+        blr
+        .type __compcert_i64_umulh, @function
+        .size __compcert_i64_umulh, .-__compcert_i64_umulh
+
diff --git a/runtime/powerpc/i64_utod.s b/runtime/powerpc/i64_utod.s
new file mode 100644
index 00000000..69de6fdb
--- /dev/null
+++ b/runtime/powerpc/i64_utod.s
@@ -0,0 +1,66 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Conversion from unsigned long to double float	
+
+        .balign 16
+        .globl __compcert_i64_utod
+__compcert_i64_utod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)           # 0(r1) = 2^52 + (double) XL
+        stw r5, 8(r1)
+        stw r6, 12(r1)          # 8(r1) = 2^52
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is (double) XL
+        lis r5, 0x4530
+        stw r5, 0(r1)           # 0(r1) = 2^84 + (double) XH * 2^32
+        stw r3, 4(r1)
+        stw r5, 8(r1)           # 8(r1) = 2^84
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __compcert_i64_utod, @function
+        .size __compcert_i64_utod, .-__compcert_i64_utod
+	
diff --git a/runtime/powerpc/i64_utof.s b/runtime/powerpc/i64_utof.s
new file mode 100644
index 00000000..4a2a172b
--- /dev/null
+++ b/runtime/powerpc/i64_utof.s
@@ -0,0 +1,64 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Conversion from unsigned long to single float	
+
+        .balign 16
+        .globl __compcert_i64_utof
+__compcert_i64_utof:
+	mflr r9
+   # Check whether X < 2^53	
+        andis. r0, r3, 0xFFE0       # test bits 53...63 of X
+        beq 1f
+   # X is large enough that double rounding can occur.
+   # Avoid it by nudging X away from the points where double rounding
+   # occurs (the "round to odd" technique)
+        rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF      # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r5 are 0
+        or r4, r4, r5           # correct bit number 12 of X
+        rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
+   # Convert to double, then round to single	
+1:      bl __compcert_i64_utod
+        mtlr r9
+        frsp f1, f1
+        blr
+        .type __compcert_i64_utof, @function
+        .size __compcert_i64_utof, .-__compcert_i64_utof
+	
diff --git a/runtime/powerpc/vararg.s b/runtime/powerpc/vararg.s
new file mode 100644
index 00000000..8d7e62c8
--- /dev/null
+++ b/runtime/powerpc/vararg.s
@@ -0,0 +1,163 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for variadic functions <stdarg.h>.  IA32 version
+
+# typedef struct {
+#   unsigned char ireg;    // index of next integer register
+#   unsigned char freg;    // index of next FP register
+#   char * stk;            // pointer to next argument in stack
+#   struct {
+#     int iregs[8];
+#     double fregs[8];
+#   } * regs;              // pointer to saved register area
+# } va_list[1];
+#
+# unsigned int __compcert_va_int32(va_list ap);
+# unsigned long long __compcert_va_int64(va_list ap);
+# double __compcert_va_float64(va_list ap);
+
+        .text
+
+        .balign 16
+        .globl __compcert_va_int32
+__compcert_va_int32:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 0(r3)       # r4 = ap->ireg = next integer register
+        cmplwi  r4, 8
+        bge     1f
+  # Next argument was passed in an integer register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+        rlwinm  r6, r4, 2, 0, 29 # r6 = r4 * 4
+	addi    r4, r4, 1       # increment ap->ireg
+        stb     r4, 0(r3)
+        lwzx    r3, r5, r6      # load argument in r3
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+        addi    r5, r5, 4       # advance ap->stk by 4
+        stw     r5, 4(r3)
+	lwz     r3, -4(r5)      # load argument in r3
+        blr
+        .type   __compcert_va_int32, @function
+        .size   __compcert_va_int32, .-__compcert_va_int32
+
+        .balign 16
+        .globl __compcert_va_int64
+__compcert_va_int64:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 0(r3)       # r4 = ap->ireg = next integer register
+        cmplwi  r4, 7
+        bge     1f
+  # Next argument was passed in two consecutive integer register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+	addi    r4, r4, 3       # round r4 up to an even number and add 2
+        rlwinm  r4, r4, 0, 0, 30 
+        rlwinm  r6, r4, 2, 0, 29 # r6 = r4 * 4
+	add     r5, r5, r6      # r5 = address of argument + 8
+        stb     r4, 0(r3)       # update ap->ireg
+        lwz     r3, -8(r5)      # load argument in r3:r4
+        lwz     r4, -4(r5)
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+	li      r4, 8
+        stb     r4, 0(r3)       # set ap->ireg = 8 so that no ireg is left
+	addi    r5, r5, 15      # round r5 to a multiple of 8 and add 8
+        rlwinm  r5, r5, 0, 0, 28
+	stw     r5, 4(r3)       # update ap->stk
+        lwz     r3, -8(r5)      # load argument in r3:r4
+        lwz     r4, -4(r5)
+        blr
+        .type   __compcert_va_int64, @function
+        .size   __compcert_va_int64, .-__compcert_va_int64
+
+        .balign 16
+        .globl __compcert_va_float64
+__compcert_va_float64:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 1(r3)       # r4 = ap->freg = next float register
+        cmplwi  r4, 8
+        bge     1f
+  # Next argument was passed in a FP register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+        rlwinm  r6, r4, 3, 0, 28 # r6 = r4 * 8
+	add     r5, r5, r6
+        lfd     f1, 32(r5)      # load argument in f1
+	addi    r4, r4, 1       # increment ap->freg
+        stb     r4, 1(r3)
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+	addi    r5, r5, 15      # round r5 to a multiple of 8 and add 8
+        rlwinm  r5, r5, 0, 0, 28
+        lfd     f1, -8(r5)      # load argument in f1
+	stw     r5, 4(r3)       # update ap->stk
+        blr
+        .type   __compcert_va_float64, @function
+        .size   __compcert_va_float64, .-__compcert_va_int64
+
+        .balign 16
+        .globl __compcert_va_composite
+__compcert_va_composite:
+	b       __compcert_va_int32
+        .type   __compcert_va_composite, @function
+        .size   __compcert_va_composite, .-__compcert_va_composite
+
+# Save integer and FP registers at beginning of vararg function        
+
+        .balign 16
+        .globl  __compcert_va_saveregs
+__compcert_va_saveregs:
+        lwz     r11, 0(r1)      # r11 point to top of our frame
+        stwu    r3, -96(r11)    # register save area is 96 bytes below
+        stw     r4, 4(r11)
+        stw     r5, 8(r11)
+        stw     r6, 12(r11)
+        stw     r7, 16(r11)
+        stw     r8, 20(r11)
+        stw     r9, 24(r11)
+        stw     r10, 28(r11)
+	bf      6, 1f           # don't save FP regs if CR6 bit is clear
+        stfd    f1, 32(r11)
+        stfd    f2, 40(r11)
+        stfd    f3, 48(r11)
+        stfd    f4, 56(r11)
+        stfd    f5, 64(r11)
+        stfd    f6, 72(r11)
+        stfd    f7, 80(r11)
+        stfd    f8, 88(r11)
+1:      blr
+        .type   __compcert_va_saveregs, @function
+        .size   __compcert_va_saveregs, .-__compcert_va_saveregs
diff --git a/runtime/powerpc64/i64_dtou.s b/runtime/powerpc64/i64_dtou.s
new file mode 100644
index 00000000..e58bcfaf
--- /dev/null
+++ b/runtime/powerpc64/i64_dtou.s
@@ -0,0 +1,66 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
+
+        .text
+
+### Conversion from double float to unsigned long	
+
+        .balign 16
+        .globl __compcert_i64_dtou
+__compcert_i64_dtou:
+        lis r0, 0x5f00          # 0x5f00_0000 = 2^63 in binary32 format
+        stwu r0, -16(r1)
+        lfs f2, 0(r1)           # f2 = 2^63
+        fcmpu cr0, f1, f2       # crbit 0 is f1 < f2
+        bf 0, 1f                # branch if f1 >= 2^63 (or f1 is NaN)
+	fctidz f1, f1           # convert as signed
+        stfd f1, 0(r1)
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        blr
+1:      fsub f1, f1, f2         # shift argument down by 2^63
+	fctidz f1, f1           # convert as signed
+        stfd f1, 0(r1)
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addis r3, r3, 0x8000    # shift result up by 2^63
+        addi r1, r1, 16
+        blr
+        .type __compcert_i64_dtou, @function
+        .size __compcert_i64_dtou, .-__compcert_i64_dtou
+
+        
diff --git a/runtime/powerpc64/i64_stof.s b/runtime/powerpc64/i64_stof.s
new file mode 100644
index 00000000..779cbc18
--- /dev/null
+++ b/runtime/powerpc64/i64_stof.s
@@ -0,0 +1,68 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
+
+        .text
+
+### Conversion from signed long to single float	
+
+        .balign 16
+        .globl __compcert_i64_stof
+__compcert_i64_stof:
+        rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
+   # Check whether -2^53 <= X < 2^53	
+        sradi r5, r4, 53
+        addi r5, r5, 1
+        cmpldi r5, 2
+	blt 1f
+   # X is large enough that double rounding can occur.
+   # Avoid it by nudging X away from the points where double rounding
+   # occurs (the "round to odd" technique)
+        rldicl r5, r4, 0, 53     # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF       # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-63 of r5 are 0
+        or r4, r4, r5            # correct bit number 12 of X
+        rldicr r4, r4, 0, 52     # set to 0 bits 0 to 11 of X
+   # Convert to double, then round to single
+1:      stdu r4, -16(r1)
+        lfd f1, 0(r1)
+        fcfid f1, f1
+	frsp f1, f1
+        addi r1, r1, 16
+        blr
+        .type __compcert_i64_stof, @function
+        .size __compcert_i64_stof, .-__compcert_i64_stof
+	
diff --git a/runtime/powerpc64/i64_utod.s b/runtime/powerpc64/i64_utod.s
new file mode 100644
index 00000000..491ee26b
--- /dev/null
+++ b/runtime/powerpc64/i64_utod.s
@@ -0,0 +1,79 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC 64 version.
+
+        .text
+
+### Conversion from unsigned long to double float	
+
+        .balign 16
+        .globl __compcert_i64_utod
+__compcert_i64_utod:
+	rldicl r3, r3, 0, 32    # clear top 32 bits
+	rldicl r4, r4, 0, 32    # clear top 32 bits
+	lis r5, 0x4f80          # 0x4f80_0000 = 2^32 in binary32 format
+        stdu r3, -32(r1)
+        std r4, 8(r1)
+        stw r5, 16(r1)
+        lfd f1, 0(r1)           # high 32 bits of argument
+        lfd f2, 8(r1)           # low 32 bits of argument
+	lfs f3, 16(r1)          # 2^32
+        fcfid f1, f1            # convert both 32-bit halves to FP (exactly)
+	fcfid f2, f2
+        fmadd f1, f1, f3, f2    # compute hi * 2^32 + lo
+        addi r1, r1, 32
+        blr
+        .type __compcert_i64_utod, @function
+        .size __compcert_i64_utod, .-__compcert_i64_utod
+	
+# Alternate implementation using round-to-odd:
+#       rldimi r4, r3, 32, 0   # reassemble (r3,r4) as a 64-bit integer in r4
+#       cmpdi r4, 0            # is r4 >= 2^63 ?
+#       blt 1f
+#	stdu r4, -16(r1)       # r4 < 2^63: convert as signed
+#       lfd f1, 0(r1)
+#       fcfid f1, f1
+#       addi r1, r1, 16
+#       blr
+#1:     rldicl r0, r4, 0, 63   # extract low bit of r4
+#       srdi r4, r4, 1
+#       or r4, r4, r0          # round r4 to 63 bits, using round-to-odd
+#	stdu r4, -16(r1)       # convert to binary64
+#       lfd f1, 0(r1)
+#       fcfid f1, f1
+#       fadd f1, f1, f1        # multiply result by 2
+#       addi r1, r1, 16
+#       blr
+        
+\ No newline at end of file
diff --git a/runtime/powerpc64/i64_utof.s b/runtime/powerpc64/i64_utof.s
new file mode 100644
index 00000000..4a2a172b
--- /dev/null
+++ b/runtime/powerpc64/i64_utof.s
@@ -0,0 +1,64 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+        .text
+
+### Conversion from unsigned long to single float	
+
+        .balign 16
+        .globl __compcert_i64_utof
+__compcert_i64_utof:
+	mflr r9
+   # Check whether X < 2^53	
+        andis. r0, r3, 0xFFE0       # test bits 53...63 of X
+        beq 1f
+   # X is large enough that double rounding can occur.
+   # Avoid it by nudging X away from the points where double rounding
+   # occurs (the "round to odd" technique)
+        rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF      # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r5 are 0
+        or r4, r4, r5           # correct bit number 12 of X
+        rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
+   # Convert to double, then round to single	
+1:      bl __compcert_i64_utod
+        mtlr r9
+        frsp f1, f1
+        blr
+        .type __compcert_i64_utof, @function
+        .size __compcert_i64_utof, .-__compcert_i64_utof
+	
diff --git a/runtime/powerpc64/vararg.s b/runtime/powerpc64/vararg.s
new file mode 100644
index 00000000..8d7e62c8
--- /dev/null
+++ b/runtime/powerpc64/vararg.s
@@ -0,0 +1,163 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for variadic functions <stdarg.h>.  IA32 version
+
+# typedef struct {
+#   unsigned char ireg;    // index of next integer register
+#   unsigned char freg;    // index of next FP register
+#   char * stk;            // pointer to next argument in stack
+#   struct {
+#     int iregs[8];
+#     double fregs[8];
+#   } * regs;              // pointer to saved register area
+# } va_list[1];
+#
+# unsigned int __compcert_va_int32(va_list ap);
+# unsigned long long __compcert_va_int64(va_list ap);
+# double __compcert_va_float64(va_list ap);
+
+        .text
+
+        .balign 16
+        .globl __compcert_va_int32
+__compcert_va_int32:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 0(r3)       # r4 = ap->ireg = next integer register
+        cmplwi  r4, 8
+        bge     1f
+  # Next argument was passed in an integer register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+        rlwinm  r6, r4, 2, 0, 29 # r6 = r4 * 4
+	addi    r4, r4, 1       # increment ap->ireg
+        stb     r4, 0(r3)
+        lwzx    r3, r5, r6      # load argument in r3
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+        addi    r5, r5, 4       # advance ap->stk by 4
+        stw     r5, 4(r3)
+	lwz     r3, -4(r5)      # load argument in r3
+        blr
+        .type   __compcert_va_int32, @function
+        .size   __compcert_va_int32, .-__compcert_va_int32
+
+        .balign 16
+        .globl __compcert_va_int64
+__compcert_va_int64:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 0(r3)       # r4 = ap->ireg = next integer register
+        cmplwi  r4, 7
+        bge     1f
+  # Next argument was passed in two consecutive integer register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+	addi    r4, r4, 3       # round r4 up to an even number and add 2
+        rlwinm  r4, r4, 0, 0, 30 
+        rlwinm  r6, r4, 2, 0, 29 # r6 = r4 * 4
+	add     r5, r5, r6      # r5 = address of argument + 8
+        stb     r4, 0(r3)       # update ap->ireg
+        lwz     r3, -8(r5)      # load argument in r3:r4
+        lwz     r4, -4(r5)
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+	li      r4, 8
+        stb     r4, 0(r3)       # set ap->ireg = 8 so that no ireg is left
+	addi    r5, r5, 15      # round r5 to a multiple of 8 and add 8
+        rlwinm  r5, r5, 0, 0, 28
+	stw     r5, 4(r3)       # update ap->stk
+        lwz     r3, -8(r5)      # load argument in r3:r4
+        lwz     r4, -4(r5)
+        blr
+        .type   __compcert_va_int64, @function
+        .size   __compcert_va_int64, .-__compcert_va_int64
+
+        .balign 16
+        .globl __compcert_va_float64
+__compcert_va_float64:
+	                        # r3 = ap = address of va_list structure
+        lbz     r4, 1(r3)       # r4 = ap->freg = next float register
+        cmplwi  r4, 8
+        bge     1f
+  # Next argument was passed in a FP register
+        lwz     r5, 8(r3)       # r5 = ap->regs = base of saved register area
+        rlwinm  r6, r4, 3, 0, 28 # r6 = r4 * 8
+	add     r5, r5, r6
+        lfd     f1, 32(r5)      # load argument in f1
+	addi    r4, r4, 1       # increment ap->freg
+        stb     r4, 1(r3)
+        blr
+  # Next argument was passed on stack
+1:      lwz     r5, 4(r3)       # r5 = ap->stk = next argument passed on stack
+	addi    r5, r5, 15      # round r5 to a multiple of 8 and add 8
+        rlwinm  r5, r5, 0, 0, 28
+        lfd     f1, -8(r5)      # load argument in f1
+	stw     r5, 4(r3)       # update ap->stk
+        blr
+        .type   __compcert_va_float64, @function
+        .size   __compcert_va_float64, .-__compcert_va_int64
+
+        .balign 16
+        .globl __compcert_va_composite
+__compcert_va_composite:
+	b       __compcert_va_int32
+        .type   __compcert_va_composite, @function
+        .size   __compcert_va_composite, .-__compcert_va_composite
+
+# Save integer and FP registers at beginning of vararg function        
+
+        .balign 16
+        .globl  __compcert_va_saveregs
+__compcert_va_saveregs:
+        lwz     r11, 0(r1)      # r11 point to top of our frame
+        stwu    r3, -96(r11)    # register save area is 96 bytes below
+        stw     r4, 4(r11)
+        stw     r5, 8(r11)
+        stw     r6, 12(r11)
+        stw     r7, 16(r11)
+        stw     r8, 20(r11)
+        stw     r9, 24(r11)
+        stw     r10, 28(r11)
+	bf      6, 1f           # don't save FP regs if CR6 bit is clear
+        stfd    f1, 32(r11)
+        stfd    f2, 40(r11)
+        stfd    f3, 48(r11)
+        stfd    f4, 56(r11)
+        stfd    f5, 64(r11)
+        stfd    f6, 72(r11)
+        stfd    f7, 80(r11)
+        stfd    f8, 88(r11)
+1:      blr
+        .type   __compcert_va_saveregs, @function
+        .size   __compcert_va_saveregs, .-__compcert_va_saveregs
diff --git a/test/Makefile b/test/Makefile
index 7efcd8f1..e9c5d6a1 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -4,7 +4,9 @@ include ../Makefile.config
 
 # Kalray note - removing compression, raytracer and spass that cannot be executed by the simulator in reasonable time
 ifeq ($(ARCH),mppa_k1c)
-	DIRS:=c regression
+	DIRS=c regression
+else
+	DIRS=c compression raytracer spass regression
 endif
 
 ifeq ($(CLIGHTGEN),true)
diff --git a/test/c/Makefile b/test/c/Makefile
index 6bd05718..a2a80e06 100644
--- a/test/c/Makefile
+++ b/test/c/Makefile
@@ -8,8 +8,9 @@ EXECUTE:=timeout --signal=SIGTERM 20s $(EXECUTE)
 
 LIBS=$(LIBMATH)
 
-#TIME=xtime -o /dev/null -mintime 2.0     # Xavier's hack
-TIME=time >/dev/null                    # Otherwise
+TIME=time >/dev/null
+# FIXME - maybe this is better? From v3.6
+# TIME=ocaml unix.cma ../../tools/xtime.ml -o /dev/null -mintime 2.0 -minruns 4
 
 PROGS?=fib integr qsort fft fftsp fftw sha1 sha3 aes almabench \
   lists binarytrees fannkuch mandelbrot nbody \
@@ -56,12 +57,12 @@ test_gcc: all_gcc
 
 bench_gcc: all_gcc
 	@for i in $(PROGS); do \
-	   echo -n "$$i: "; $(TIME) ./$$i.gcc; \
+	   $(TIME) -name $$i -- ./$$i.gcc; \
          done
 
 bench: all
 	@for i in $(PROGS); do \
-	   echo -n "$$i: "; $(TIME) ./$$i.compcert; \
+	   $(TIME) -name $$i -- ./$$i.compcert; \
          done
 
 clean:
diff --git a/test/c/Results/mandelbrot-mppa_k1c b/test/c/Results/mandelbrot-mppa_k1c
index 246f7ce1..f50961fe 100644
--- a/test/c/Results/mandelbrot-mppa_k1c
+++ b/test/c/Results/mandelbrot-mppa_k1c
diff --git a/test/c/aes.c b/test/c/aes.c
index 28d51a78..0a64fe60 100644
--- a/test/c/aes.c
+++ b/test/c/aes.c
@@ -27,6 +27,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include "../endian.h"
 
 #define MAXKC	(256/32)
 #define MAXKB	(256/8)
@@ -36,15 +37,6 @@ typedef unsigned char	u8;
 typedef unsigned short	u16;
 typedef unsigned int	u32;
 
-#if defined(__ppc__) || defined(__PPC__) || defined(__ARMEB__)
-#define ARCH_BIG_ENDIAN
-#elif defined(__i386__) || defined(__x86_64__) || defined(__ARMEL__) \
-   || defined(__riscv) || defined(__K1C__)
-#undef ARCH_BIG_ENDIAN
-#else
-#error "unknown endianness"
-#endif
-
 #ifdef ARCH_BIG_ENDIAN
 #define GETU32(pt) (*(u32 *)(pt))
 #define PUTU32(ct,st) (*(u32 *)(ct) = (st))
@@ -1449,6 +1441,10 @@ int main(int argc, char ** argv)
           (u8 *)"\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF",
           (u8 *)"\x8E\xA2\xB7\xCA\x51\x67\x45\xBF\xEA\xFC\x49\x90\x4B\x49\x60\x89",
           5, 6);
+#ifdef __K1C__
+  do_bench(2000);
+#else
   do_bench(1000000);
+#endif
   return 0;
 }
diff --git a/test/c/almabench.c b/test/c/almabench.c
index 5487b062..4417200c 100644
--- a/test/c/almabench.c
+++ b/test/c/almabench.c
@@ -42,10 +42,15 @@
 #define R2D (180.0 / PI)
 #define GAUSSK 0.01720209895
 #define TEST_LOOPS 20
-#define TEST_LENGTH 36525
 #define sineps 0.3977771559319137
 #define coseps 0.9174820620691818
 
+#ifdef __K1C__
+#define TEST_LENGTH 12
+#else
+#define TEST_LENGTH 36525
+#endif
+
 const double amas [8] = { 6023600.0, 408523.5, 328900.5, 3098710.0, 1047.355, 3498.5, 22869.0, 19314.0 };
 
 const double a [8][3] =
diff --git a/test/c/mandelbrot.c b/test/c/mandelbrot.c
index 133d55c5..548c3ffa 100644
--- a/test/c/mandelbrot.c
+++ b/test/c/mandelbrot.c
@@ -27,7 +27,7 @@ int main (int argc, char **argv)
 
     if (argc < 2) {
 #ifdef __K1C__
-      w = h = 50;
+      w = h = 40;
 #else
       w = h = 1000;
 #endif
@@ -59,7 +59,6 @@ int main (int argc, char **argv)
 
             if(bit_num == 8)
             {
-                printf("%c", byte_acc);
                 putc(byte_acc,stdout);
 #ifdef __K1C__ // stdout isn't flushed enough when --syscall=libstd_scalls.so is passed to the simulator k1-cluster
                 fflush(stdout);
@@ -70,7 +69,6 @@ int main (int argc, char **argv)
             else if(x == w-1)
             {
                 byte_acc <<= (8-w%8);
-                printf("%c", byte_acc);
                 putc(byte_acc,stdout);
 #ifdef __K1C__ // stdout isn't flushed enough when --syscall=libstd_scalls.so is passed to the simulator k1-cluster
                 fflush(stdout);
diff --git a/test/c/sha1.c b/test/c/sha1.c
index 0a6ac8fe..624030cc 100644
--- a/test/c/sha1.c
+++ b/test/c/sha1.c
@@ -231,6 +231,10 @@ int main(int argc, char ** argv)
   }
   do_test(test_input_1, test_output_1);
   do_test(test_input_2, test_output_2);
+#ifdef __K1C__
+  do_bench(500);
+#else
   do_bench(200000);
+#endif
   return 0;
 }
diff --git a/test/c/sha3.c b/test/c/sha3.c
index a0905817..164e3086 100644
--- a/test/c/sha3.c
+++ b/test/c/sha3.c
@@ -190,8 +190,13 @@ test_triplet_t testvec[4] = {
   }
 };
 
+#ifdef __K1C__
+#define DATALEN 1000
+#define NITER 7
+#else
 #define DATALEN 100000
 #define NITER 25
+#endif
 
 int main()
 {
diff --git a/test/c/siphash24.c b/test/c/siphash24.c
index 4a42e013..ce0df78c 100644
--- a/test/c/siphash24.c
+++ b/test/c/siphash24.c
@@ -235,13 +235,19 @@ int test_vectors()
 
 u8 testdata[100] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 34, 56, 78, 90 };
 
+#ifdef __K1C__
+#define NITER 1000
+#else
+#define NITER 1000000
+#endif
+
 int speed_test(void)
 {
   u8 out[8], k[16];
   int i;
 
   for(i = 0; i < 16; ++i ) k[i] = i;
-  for(i = 0; i < 1000000; i++) {
+  for(i = 0; i < NITER; i++) {
     testdata[99] = (u8) i;
     crypto_auth(out, testdata, 100, k);
   }
diff --git a/test/clightgen/issue319.c b/test/clightgen/issue319.c
new file mode 100644
index 00000000..be9f3f7e
--- /dev/null
+++ b/test/clightgen/issue319.c
@@ -0,0 +1,12 @@
+/* Dollar signs in identifiers */
+
+int c$d = 42;
+
+int a$b(int x$$) {
+  return c$d + x$$;
+}
+
+int main(int argc, const char *argv[])
+{
+  return a$b(6);
+}
diff --git a/test/compression/Makefile b/test/compression/Makefile
index 48521b14..ff7032d5 100644
--- a/test/compression/Makefile
+++ b/test/compression/Makefile
@@ -5,7 +5,7 @@ SIMU=timeout --signal=SIGKILL 20s $(EXECUTE)
 CC=../../ccomp 
 CFLAGS=$(CCOMPOPTS) -U__GNUC__ -stdlib ../../runtime -dclight -dasm
 LIBS=
-TIME=xtime -o /dev/null -mintime 1.0
+TIME=ocaml unix.cma ../../tools/xtime.ml -mintime 2.0 -minruns 2
 
 EXE=arcode lzw lzss
 
@@ -54,8 +54,7 @@ test:
 bench:
 	@rm -f $(TESTCOMPR)
 	@for i in $(EXE); do \
-          echo -n "$$i: "; \
-          $(TIME) sh -c "./$$i -c -i $(LIGHTERFILE) -o $(TESTCOMPR) && ./$$i -d -i $(TESTCOMPR) -o /dev/null"; \
+          $(TIME) -name $$i -- sh -c "./$$i -c -i $(TESTFILE) -o $(TESTCOMPR) && ./$$i -d -i $(TESTCOMPR) -o /dev/null"; \
          done
 	@rm -f $(TESTCOMPR)
 
diff --git a/test/cse2/globals.c b/test/cse2/globals.c
new file mode 100644
index 00000000..c6dd59cd
--- /dev/null
+++ b/test/cse2/globals.c
@@ -0,0 +1,8 @@
+int glob1, glob2;
+
+void toto() {
+  if (glob1 > 4) {
+    glob2 ++;
+    glob1 --;
+  }
+}
diff --git a/test/cse2/indexed_addr.c b/test/cse2/indexed_addr.c
new file mode 100644
index 00000000..30a7c571
--- /dev/null
+++ b/test/cse2/indexed_addr.c
@@ -0,0 +1,6 @@
+void foo(int *t) {
+  if (t[0] > 4) {
+    t[1] ++;
+    t[0] --;
+  }
+}
diff --git a/test/endian.h b/test/endian.h
new file mode 100644
index 00000000..d6e121f4
--- /dev/null
+++ b/test/endian.h
@@ -0,0 +1,8 @@
+#if defined(__ppc__) || defined(__PPC__) || defined(__ARMEB__)
+#define ARCH_BIG_ENDIAN
+#elif defined(__i386__) || defined(__x86_64__) || defined(__ARMEL__) \
+   || defined(__riscv) || defined(__aarch64__) || defined(__K1C__)
+#undef ARCH_BIG_ENDIAN
+#else
+#error "unknown endianness"
+#endif
diff --git a/test/monniaux/Makefile b/test/monniaux/Makefile
index a6b19891..d7437eea 100644
--- a/test/monniaux/Makefile
+++ b/test/monniaux/Makefile
@@ -20,9 +20,12 @@ oracle_times.txt: PostpassSchedulingOracle.patch
 	bash build_benches.sh $@
 
 measures.csv:
-	(cd ../../ && make -j20 && make install)
-	bash build_benches.sh
-	bash run_benches.sh $@
+	@echo "Building compcert.."
+	@(cd ../../ && make -s -j20 && make -s install)
+	@echo "Building benches..."
+	@bash build_benches.sh
+	@echo "Benches built. Running benches..."
+	@bash run_benches.sh $@
 
 #compile_times.pdf: gencompile.py verifier_times.txt oracle_times.txt
 #	python3.5 $^ $@
@@ -32,4 +35,5 @@ measures.csv:
 
 .PHONY:
 clean:
+	@bash clean_benches.sh
 	rm -f verifier_times.txt oracle_times.txt compile_times.pdf measure_times.k1c.pdf measures.csv
diff --git a/test/monniaux/README.md b/test/monniaux/README.md
index dbb3f337..14b062da 100644
--- a/test/monniaux/README.md
+++ b/test/monniaux/README.md
@@ -1,13 +1,18 @@
-# Benchmarking CompCert and GCC
+# Benchmarking `CompCert` and GCC
 
-rules.mk contains generic rules to compile with gcc and ccomp, with different
-optimizations, and producing different binaries. It also produces a
-measures.csv file containing the different timings given by the bench.
+## Compiling `CompCert`
 
-Up to 5 different optimizations can be used.
+The first step to benchmark `CompCert` is to compile it - the `INSTALL.md` instructions of the project root folder should guide you on installing it.
 
-To use this rule.mk, create a folder, put inside all the .c/.h source files,
-and write a Makefile ressembling:
+For the benchmarks to work, the compiler `ccomp` should be on your `$PATH`, with the runtime libraries installed correctly (with a successful `make install` on the project root directory).
+
+## Using the harness
+
+`rules.mk` contains generic rules to compile with `gcc` and `ccomp`, with different optimizations, and producing different binaries. It also produces a `measures.csv` file containing the different timings given by the bench.
+
+Up to 5 different sets of optimizations per compiler can be used.
+
+To use this `rules.mk`, create a folder, put inside all the .c/.h source files, and write a Makefile resembling:
 ```make
 TARGET=float_mat
 MEASURES="c1 c2 c3 c4 c5 c6 c7 c8"
@@ -15,57 +20,88 @@ MEASURES="c1 c2 c3 c4 c5 c6 c7 c8"
 include ../rules.mk
 ```
 
-This is all that is required to write, the rules.mk handles everything.
+This is all that is required to write, the `rules.mk` handles everything.
 
-There is the possibility to define some variables to finetune what you want.
-For instance, `ALL_CFILES` describes the .c source files whose objects are
-to be linked.
+There is the possibility to define some variables to fine tune what you want. For instance, `ALL_CFILES` describes the .c source files whose objects are to be linked.
 
 Here is an exhaustive list of the variables:
 - `TARGET`: name of the binary to produce
 - `MEASURES`: list of the different timings. This supposes that the program
-prints something of the form "c3 cycles: 44131" for instance. In the above
-example, the Makefile would generate such a line:
-```
-float_mat c3,  1504675,  751514,  553235, 1929369,  1372441 
-```
+prints something of the form `c3 cycles: 44131`.
 - `ALL_CFILES`: list of .c files to compile. By default, `$(wildcard *.c)`
-- `CLOCK`: basename of the clock file to compile. Default `../clock`
-- `ALL_CFLAGS`: cflags that are to be included for all compilers
+- `CLOCK`: `basename` of the clock file to compile. Default `../clock`
+- `ALL_CFLAGS`: `cflags` that are to be included for all compilers
 - `ALL_GCCFLAGS`: same, but GCC specific
-- `ALL_CCOMPFLAGS`: same, but ccomp specific
-- `K1C_CC`: GCC compiler (default k1-cos-gcc)
-- `K1C_CCOMP`: compcert compiler (default ccomp)
-- `EXECUTE_CYCLES`: running command (default `k1-cluster` with some options)
-- `GCCiFLAGS` with i from 0 to 4: the wanted optimizations. If one of these flags is empty, nothing is done. Same for `CCOMPiFLAGS`. For now, the default values:
-```
-# You can define up to GCC4FLAGS and CCOMP4FLAGS
-GCC0FLAGS?=
-GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
-GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
-GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
-GCC4FLAGS?=
-CCOMP0FLAGS?=
-CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -fno-postpass
-CCOMP2FLAGS?=$(ALL_CCOMPFLAGS)
-CCOMP3FLAGS?=
-CCOMP4FLAGS?=
-
-# Prefix names
-GCC0PREFIX?=
-GCC1PREFIX?=.gcc.o1
-GCC2PREFIX?=.gcc.o2
-GCC3PREFIX?=.gcc.o3
-GCC4PREFIX?=
-CCOMP0PREFIX?=
-CCOMP1PREFIX?=.ccomp.o1
-CCOMP2PREFIX?=.ccomp.o2
-CCOMP3PREFIX?=
-CCOMP4PREFIX?= 
-```
+- `ALL_CCOMPFLAGS`: same, but `ccomp` specific
+- `K1C_CC`: GCC compiler (default `k1-cos-gcc`)
+- `K1C_CCOMP`: `CompCert` compiler (default `ccomp`)
+- `EXECUTE_CYCLES`: running command (default is `k1-cluster --syscall=libstd_scalls.so --cycle-based --`)
+- `EXECUTE_ARGS`: execution arguments. You can use a macro `__BASE__` which expands to the name of the binary being executed.
+- `GCCiFLAGS` with `i` from 0 to 4: the wanted optimizations. If one of these flags is empty, nothing is done. Same for `CCOMPiFLAGS`. Look at `rules.mk` to see the default values. You might find something like this:
+
+    # You can define up to GCC4FLAGS and CCOMP4FLAGS
+    GCC0FLAGS?=
+    GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
+    GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
+    GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
+    GCC4FLAGS?=
+    CCOMP0FLAGS?=
+    CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -fno-postpass
+    CCOMP2FLAGS?=$(ALL_CCOMPFLAGS)
+    CCOMP3FLAGS?=
+    CCOMP4FLAGS?=
+
+    # Prefix names
+    GCC0PREFIX?=
+    GCC1PREFIX?=.gcc.o1
+    GCC2PREFIX?=.gcc.o2
+    GCC3PREFIX?=.gcc.o3
+    GCC4PREFIX?=
+    CCOMP0PREFIX?=
+    CCOMP1PREFIX?=.ccomp.o1
+    CCOMP2PREFIX?=.ccomp.o2
+    CCOMP3PREFIX?=
+    CCOMP4PREFIX?=
+
+The `PREFIX` are the prefixes to add to the secondary produced files (assembly, object, executable, ..). You should be careful that if a `FLAGS` is set, then the according `PREFIX` should be set as well.
+
+Assembly files are generated in `asm/`, objects in `obj/`, binaries in `bin/` and outputs in `out/`.
+
+To compile and execute all the benches : `make` while in the `monniaux` directory (without any `-j` flag). Doing so will compile CompCert, install it, and then proceed to execute each bench.
+
+To compile and/or execute a single bench, `cd` to the bench directory, then:
+- `make` for compiling the bench
+- `make run` for running it
+
+You can use `-j` flag when in a single bench directory.
+
+## Individual scripts
+
+If you want to run the building and running scripts individually without having to use the `Makefile` from `test/monniaux`, you can run the `build_benches.sh` script which builds each bench using all the available cores on your machine.
+
+Once the benches are built, you can then run `run_benches.sh file.csv` where `file.csv` is where you want to store the timings of the benchmarks. `run_benches.sh` also uses all the available cores of your machine.
+
+## Adding timings to a benchmark
+
+If you just add a benchmark without any timing function, the resulting `measures.csv` file will be empty for lack of timing output.
+
+To add a timing, you must use the functions whose prototypes are in `clock.h`
+
+    #include "../clock.h"
+    /* ... */
+    clock_prepare();
+    /* ... */
+    clock_start();
+    /* .. computations .. */
+    clock_stop();
+    /* ... */
+    print_total_clock(); // print to stdout
+    printerr_total_clock(); // print to stderr
+
+If the benchmark doesn't use `stdout` in a binary way you can use `print_total_clock()`. However, some benchmarks like `jpeg-6b` print their binary content to `stdout`, which then messes up the `grep` command when attempting to use it to extract the cycles from `stdout`.
 
-The `PREFIX` are the prefixes to add to the .s, .o, etc.. You should be careful that if a FLAGS is set, then the according PREFIX should be set as well.
+The solution is then to use `printerr_total_clock()` which will print the cycles to `stderr`, and use `EXECUTE_ARGS` ressembling this:
 
-Assembly files will be generated in `asm/`, objects in `obj/`, binaries in `bin/` and outputs in `out/`.
+    EXECUTE_ARGS=-dct int -outfile __BASE__.jpg testimg.ppm 2> __BASE__.out
 
-To compile and execute all the benches : `make`
+`__BASE__` is a macro that gets expanded to the base name - that is, the `TARGET` concatenated with one of the `GCCiPREFIX` or `CCOMPiPREFIX`. For instance, in `jpeg-6b`, `__BASE__` could be `jpeg-6b.ccomp.o2`.
diff --git a/test/monniaux/benches.sh b/test/monniaux/benches.sh
index 2365063a..434e1b15 100644
--- a/test/monniaux/benches.sh
+++ b/test/monniaux/benches.sh
@@ -1,3 +1,3 @@
-benches="binary_search bitsliced-aes bitsliced-tea complex float_mat glibc_qsort heapsort idea number_theoretic_transform quicksort sha-2 tacle-bench-lift tacle-bench-powerwindow too_slow heptagon_radio_transmitter lustrev4_lustrec_heater_control lustrev4_lv4_heater_control lustrev4_lv6-en-2cgc_heater_control lustrev6-convertible-en-2cgc xor_and_mat"
+benches="binary_search bitsliced-aes bitsliced-tea complex float_mat glibc_qsort heapsort idea number_theoretic_transform quicksort sha-2 tacle-bench-lift tacle-bench-powerwindow too_slow heptagon_radio_transmitter lustrev4_lustrec_heater_control lustrev4_lv4_heater_control lustrev4_lv6-en-2cgc_heater_control lustrev6-convertible-en-2cgc xor_and_mat glpk-4.65 picosat-965 genann jpeg-6b zlib-1.2.11 ocaml tiff-4.0.10 ncompress"
 
 # Removed for now : ternary
diff --git a/test/monniaux/build_benches.sh b/test/monniaux/build_benches.sh
index 931cebac..01abf55d 100755
--- a/test/monniaux/build_benches.sh
+++ b/test/monniaux/build_benches.sh
@@ -2,15 +2,21 @@
 
 TMPFILE=/tmp/1513times.txt
 
+cores=$(grep -c ^processor /proc/cpuinfo)
 source benches.sh
 
+default="\e[39m"
+magenta="\e[35m"
+red="\e[31m"
+
 rm -f commands.txt
 rm -f $TMPFILE
 for bench in $benches; do
+  echo -e "${magenta}Building $bench..${default}"
   if [ "$1" == "" ]; then 
-    (cd $bench && make -j20)
+    (cd $bench && make -s -j$cores > /dev/null &> /dev/null) || { echo -e "${red}Build failed" && break; }
   else
-    (cd $bench && make -j20) | grep -P "\d+: \d+\.\d+" >> $TMPFILE
+    (cd $bench && make -j$cores) | grep -P "\d+: \d+\.\d+" >> $TMPFILE
   fi
 done
 
diff --git a/test/monniaux/clean_benches.sh b/test/monniaux/clean_benches.sh
index c0a87ff9..dff15fd4 100755
--- a/test/monniaux/clean_benches.sh
+++ b/test/monniaux/clean_benches.sh
@@ -1,8 +1,12 @@
 
 source benches.sh
 
+blue="\e[34m"
+default="\e[39m"
+
 rm -f commands.txt
 for bench in $benches; do
-  (cd $bench && make clean)
+  echo -e "${blue}Cleaning $bench..${default}"
+  (cd $bench && make -s clean)
 done
 rm -f *.o
diff --git a/test/monniaux/clock.c b/test/monniaux/clock.c
index fb636667..4ec679f6 100644
--- a/test/monniaux/clock.c
+++ b/test/monniaux/clock.c
@@ -24,9 +24,9 @@ cycle_t get_current_cycle(void) {
 }
 
 void print_total_clock(void) {
-  printf("time cycles: %lu\n", total_clock);
+  printf("time cycles: %" PRcycle "\n", total_clock);
 }
 
 void printerr_total_clock(void) {
-  fprintf(stderr, "time cycles: %lu\n", total_clock);
+  fprintf(stderr, "time cycles: %" PRcycle "\n", total_clock);
 }
diff --git a/test/monniaux/cse2/loopaccess.c b/test/monniaux/cse2/loopaccess.c
new file mode 100644
index 00000000..5ddaeb66
--- /dev/null
+++ b/test/monniaux/cse2/loopaccess.c
@@ -0,0 +1,7 @@
+double toto(double x, int count) {
+  double r = 5*x + 3;
+  while (count > r) {
+    count --;
+  }
+  return 5*x + 3;
+}
diff --git a/test/monniaux/cse2/loopinvariant.c b/test/monniaux/cse2/loopinvariant.c
new file mode 100644
index 00000000..64caf80b
--- /dev/null
+++ b/test/monniaux/cse2/loopinvariant.c
@@ -0,0 +1,7 @@
+int toto(int *t, int n) {
+  int x = t[0];
+  for(int i=1; i<n; i++) {
+    if (t[i] > t[0]) return i;
+  }
+  return 0;
+}
diff --git a/test/monniaux/cse2/loopload.c b/test/monniaux/cse2/loopload.c
new file mode 100644
index 00000000..6e0925f7
--- /dev/null
+++ b/test/monniaux/cse2/loopload.c
@@ -0,0 +1,5 @@
+int find_index(int *t, int n) {
+  if (t[0] > 0) return 3;
+  while (n > 0) n--;
+  return t[0];
+}
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index 21541145..c7dc582b 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -1,13 +1,11 @@
+#include <stdint.h>
 #include <inttypes.h>
 #include <stdio.h>
-typedef unsigned long cycle_t;
-
-#ifdef MAX_MEASURES
-  static cycle_t _last_stop[MAX_MEASURES] = {0};
-  static cycle_t _total_cycles[MAX_MEASURES] = {0};
-#endif
 
 #ifdef __K1C__
+typedef uint64_t cycle_t;
+#define PRcycle PRId64
+
 #include <../../k1-cos/include/hal/cos_registers.h>
 
 static inline void cycle_count_config(void)
@@ -27,18 +25,57 @@ static inline cycle_t get_cycle(void)
 #else // not K1C
 static inline void cycle_count_config(void) { }
 
-#ifdef  __x86_64__
+#if defined(__i386__) || defined( __x86_64__)
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
 #include <x86intrin.h>
 static inline cycle_t get_cycle(void) { return __rdtsc(); }
 
 #elif __riscv
+#ifdef __riscv32
+#define PRcycle PRId32
+typedef uint32_t cycle_t;
+#else
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
+#endif
 static inline cycle_t get_cycle(void) {
   cycle_t cycles;
   asm volatile ("rdcycle %0" : "=r" (cycles));
   return cycles;
 }
 
+#elif defined (__ARM_ARCH) && (__ARM_ARCH >= 6)
+#if (__ARM_ARCH < 8)
+typedef uint32_t cycle_t;
+#define PRcycle PRId32
+
+/* need this kernel module
+https://github.com/zertyz/MTL/tree/master/cpp/time/kernel/arm */
+static inline cycle_t get_cycle(void) {
+  cycle_t cycles;
+  __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (cycles));
+  return cycles;
+}
 #else
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
+/* need this kernel module:
+https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0
+
+on 5+ kernels, remove first argument of access_ok macro */
+
+static inline cycle_t get_cycle(void)
+{
+  uint64_t val;
+  __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(val));
+  return val;
+}
+#endif
+
+#else
+#define PRcycle PRId32
+typedef uint32_t cycle_t;
 static inline cycle_t get_cycle(void) { return 0; }
 #endif
 #endif
@@ -48,3 +85,9 @@ static inline cycle_t get_cycle(void) { return 0; }
   #define TIMESTOP(i) {cycle_t cur = get_cycle(); _total_cycles[i] += cur - _last_stop[i]; _last_stop[i] = cur;}
   #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("%d cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
 #endif
+
+
+#ifdef MAX_MEASURES
+  static cycle_t _last_stop[MAX_MEASURES] = {0};
+  static cycle_t _total_cycles[MAX_MEASURES] = {0};
+#endif
diff --git a/test/monniaux/genann/Makefile b/test/monniaux/genann/Makefile
new file mode 100644
index 00000000..2e76ec63
--- /dev/null
+++ b/test/monniaux/genann/Makefile
@@ -0,0 +1,4 @@
+ALL_CFILES= example4shorter.c genann.c
+TARGET=genann4
+
+include ../rules.mk
diff --git a/test/monniaux/genann/make.proto b/test/monniaux/genann/make.proto
deleted file mode 100644
index 7c4248bf..00000000
--- a/test/monniaux/genann/make.proto
+++ /dev/null
@@ -1,2 +0,0 @@
-sources: example4shorter.c genann.c
-target: genann4
-\ No newline at end of file
diff --git a/test/monniaux/glpk-4.65/Makefile b/test/monniaux/glpk-4.65/Makefile
index a0ab40dc..eaa3f4b0 100644
--- a/test/monniaux/glpk-4.65/Makefile
+++ b/test/monniaux/glpk-4.65/Makefile
@@ -1,39 +1,6 @@
 ALL_CFLAGS += -I src/amd -I src/colamd -I src/mpl -I src/simplex -I src/api -I src/intopt -I src/minisat -I src/npp -I src/zlib -I src/bflib -I src/env -I src/misc -I src/draft -I src
-
-include ../rules.mk
-
-LIBS = -lm
-
-src=examples/glpsol.c $(wildcard src/*/*.c)
-
-PRODUCTS?=glpsol.gcc.host glpsol.ccomp.host glpsol.gcc.k1c glpsol.gcc.o1.k1c glpsol.ccomp.k1c
-PRODUCTS_OUT=$(addsuffix .out,$(PRODUCTS))
-
-all: $(PRODUCTS)
-
-.PHONY:
-run: measures.csv
-
-
-glpsol.gcc.host: $(src:.c=.gcc.host.o) ../clock.gcc.host.o 
-	$(CC) $(CFLAGS) $+ $(LIBS) -o $@
-glpsol.ccomp.host: $(src:.c=.ccomp.host.o) ../clock.gcc.host.o 
-	$(CCOMP) $(CCOMPFLAGS) $+ $(LIBS) -o $@
-glpsol.gcc.k1c: $(src:.c=.gcc.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS) $+  $(LIBS) -o $@
-glpsol.gcc.o1.k1c: $(src:.c=.gcc.o1.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS_O1) $+  $(LIBS) -o $@
-glpsol.ccomp.k1c: $(src:.c=.ccomp.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+  $(LIBS) -o $@
-
+ALL_CFILES=examples/glpsol.c $(wildcard src/*/*.c)
+TARGET=glpk
 EXECUTE_ARGS=--math examples/prod.mod
 
-measures.csv: $(PRODUCTS_OUT)
-	echo "benches, gcc host,ccomp host,gcc k1c,gcc o1 k1c,ccomp k1c" > $@
-
-.SECONDARY:
-
-.PHONY:
-clean:
-	rm -f *.o *.s *.k1c *.csv
-
+include ../rules.mk
diff --git a/test/monniaux/jpeg-6b/Makefile b/test/monniaux/jpeg-6b/Makefile
index 5a45b729..2bec9bb7 100644
--- a/test/monniaux/jpeg-6b/Makefile
+++ b/test/monniaux/jpeg-6b/Makefile
@@ -1,6 +1,6 @@
-all: cjpeg.gcc.k1c.out djpeg.gcc.k1c.out cjpeg.gcc.o1.k1c.out djpeg.gcc.o1.k1c.out cjpeg.ccomp.k1c.out djpeg.ccomp.k1c.out
+TARGET=jpeg-6b
 
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+ALL_CFILES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
         jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
         jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
         jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
@@ -8,36 +8,53 @@ LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
         jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
         jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
         jquant2.c jutils.c jmemmgr.c jmemansi.c
-CSOURCES=$(LIBSOURCES) rdppm.c rdgif.c rdtarga.c rdrle.c rdbmp.c rdswitch.c cdjpeg.c wrppm.c wrgif.c wrtarga.c wrrle.c wrbmp.c rdcolmap.c
+ALL_CFILES+=rdppm.c rdgif.c rdtarga.c rdrle.c rdbmp.c rdswitch.c cdjpeg.c wrppm.c wrgif.c wrtarga.c wrrle.c wrbmp.c rdcolmap.c
+ALL_CFILES+=cjpeg.c
 
-LIB_K1C_GCC_OFILES=$(CSOURCES:.c=.gcc.k1c.o)
-LIB_K1C_GCC_O1_OFILES=$(CSOURCES:.c=.gcc.o1.k1c.o)
-LIB_K1C_CCOMP_OFILES=$(CSOURCES:.c=.ccomp.k1c.o)
+EXECUTE_ARGS=-dct int -outfile __BASE__.jpg testimg.ppm 2> __BASE__.out
 
 include ../rules.mk
 
-cjpeg.gcc.k1c: $(LIB_K1C_GCC_OFILES) cjpeg.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS) -o $@ $+ ../clock.gcc.k1c.o
-djpeg.gcc.k1c: $(LIB_K1C_GCC_OFILES) djpeg.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS) -o $@ $+ ../clock.gcc.k1c.o
-
-cjpeg.gcc.o1.k1c: $(LIB_K1C_GCC_O1_OFILES) cjpeg.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS_O1) -o $@ $+ ../clock.gcc.k1c.o
-djpeg.gcc.o1.k1c: $(LIB_K1C_GCC_O1_OFILES) djpeg.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS_O1) -o $@ $+ ../clock.gcc.k1c.o
-
-cjpeg.ccomp.k1c: $(LIB_K1C_CCOMP_OFILES) cjpeg.gcc.k1c.o
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) -o $@ $+ ../clock.gcc.k1c.o
-djpeg.ccomp.k1c: $(LIB_K1C_CCOMP_OFILES) djpeg.gcc.k1c.o
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) -o $@ $+ ../clock.gcc.k1c.o
-
-
-djpeg.%.out: djpeg.%
-	$(EXECUTE_CYCLES) $< -dct int -ppm -outfile $@.ppm testorig.jpg 2> $@
-	cmp $@.ppm testimg.ppm 2>> $@
-
-cjpeg.%.out: cjpeg.%
-	$(EXECUTE_CYCLES) $< -dct int -outfile $@.jpg testimg.ppm 2> $@
-	cmp $@.jpg testimg.jpg 2>> $@
-
-.SECONDARY:
+#all: cjpeg.gcc.k1c.out djpeg.gcc.k1c.out cjpeg.gcc.o1.k1c.out djpeg.gcc.o1.k1c.out cjpeg.ccomp.k1c.out djpeg.ccomp.k1c.out
+#
+#LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
+#        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
+#        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
+#        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
+#        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
+#        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
+#        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
+#        jquant2.c jutils.c jmemmgr.c jmemansi.c
+#CSOURCES=$(LIBSOURCES) rdppm.c rdgif.c rdtarga.c rdrle.c rdbmp.c rdswitch.c cdjpeg.c wrppm.c wrgif.c wrtarga.c wrrle.c wrbmp.c rdcolmap.c
+#
+#LIB_K1C_GCC_OFILES=$(CSOURCES:.c=.gcc.k1c.o)
+#LIB_K1C_GCC_O1_OFILES=$(CSOURCES:.c=.gcc.o1.k1c.o)
+#LIB_K1C_CCOMP_OFILES=$(CSOURCES:.c=.ccomp.k1c.o)
+#
+#include ../rules.mk
+#
+#cjpeg.gcc.k1c: $(LIB_K1C_GCC_OFILES) cjpeg.gcc.k1c.o
+#	$(K1C_CC) $(K1C_CFLAGS) -o $@ $+ ../clock.gcc.k1c.o
+#djpeg.gcc.k1c: $(LIB_K1C_GCC_OFILES) djpeg.gcc.k1c.o
+#	$(K1C_CC) $(K1C_CFLAGS) -o $@ $+ ../clock.gcc.k1c.o
+#
+#cjpeg.gcc.o1.k1c: $(LIB_K1C_GCC_O1_OFILES) cjpeg.gcc.k1c.o
+#	$(K1C_CC) $(K1C_CFLAGS_O1) -o $@ $+ ../clock.gcc.k1c.o
+#djpeg.gcc.o1.k1c: $(LIB_K1C_GCC_O1_OFILES) djpeg.gcc.k1c.o
+#	$(K1C_CC) $(K1C_CFLAGS_O1) -o $@ $+ ../clock.gcc.k1c.o
+#
+#cjpeg.ccomp.k1c: $(LIB_K1C_CCOMP_OFILES) cjpeg.gcc.k1c.o
+#	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) -o $@ $+ ../clock.gcc.k1c.o
+#djpeg.ccomp.k1c: $(LIB_K1C_CCOMP_OFILES) djpeg.gcc.k1c.o
+#	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) -o $@ $+ ../clock.gcc.k1c.o
+#
+#
+#djpeg.%.out: djpeg.%
+#	$(EXECUTE_CYCLES) $< -dct int -ppm -outfile $@.ppm testorig.jpg 2> $@
+#	cmp $@.ppm testimg.ppm 2>> $@
+#
+#cjpeg.%.out: cjpeg.%
+#	$(EXECUTE_CYCLES) $< -dct int -outfile $@.jpg testimg.ppm 2> $@
+#	cmp $@.jpg testimg.jpg 2>> $@
+#
+#.SECONDARY:
diff --git a/test/monniaux/moves/array.c b/test/monniaux/moves/array.c
new file mode 100644
index 00000000..faa1d96b
--- /dev/null
+++ b/test/monniaux/moves/array.c
@@ -0,0 +1,18 @@
+void incr_double_array(double *t) {
+  double x0 = 1.0;
+  double t0 = t[0];
+  double x1 = 1.0;
+  double t1 = t[1];
+  double x2 = 1.0;
+  double t2 = t[2];
+  double x3 = 1.0;
+  double t3 = t[3];
+  t0 = t0 + x0;
+  t1 = t1 + x1;
+  t2 = t2 + x2;
+  t3 = t3 + x3;
+  t[0] = t0;
+  t[1] = t1;
+  t[2] = t2;
+  t[3] = t3;
+}
diff --git a/test/monniaux/ncompress/Makefile b/test/monniaux/ncompress/Makefile
index cf543976..14a99d0b 100644
--- a/test/monniaux/ncompress/Makefile
+++ b/test/monniaux/ncompress/Makefile
@@ -1,52 +1,4 @@
-include ../rules.mk
-
-all: check
-
-
-all: compress.gcc.host compress.ccomp.host compress.gcc.k1c compress.ccomp.k1c
-
-compress.gcc.host : compress42.c ../clock.gcc.host.o
-	$(CC) $(CFLAGS) $+ -o $@
-
-compress.ccomp.host : compress42.c ../clock.gcc.host.o
-	$(CCOMP) $(CCOMPFLAGS) $+ -o $@
-
-compress.gcc.k1c : compress42.gcc.k1c.o ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS) $+ -o $@
-
-compress.ccomp.k1c : compress42.ccomp.k1c.o ../clock.gcc.k1c.o
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
-
-INFILE=Makefile
-COMPRESSED=foo.gcc.host.Z
-
-foo.gcc.host.Z: compress.gcc.host $(INFILE)
-	./compress.gcc.host <$(INFILE) >foo.gcc.host.Z 2> foo.gcc.host.Z.out
+TARGET=compress
+EXECUTE_ARGS= < Makefile > __BASE__.Z 2> __BASE__.out
 
-foo.ccomp.k1c.Z: compress.ccomp.k1c $(INFILE)
-	$(EXECUTE) ./compress.ccomp.k1c <$(INFILE) >foo.ccomp.k1c.Z 2> foo.ccomp.k1c.Z.out
-
-foo.gcc.k1c.Z: compress.gcc.k1c $(INFILE)
-	$(EXECUTE) ./compress.gcc.k1c <$(INFILE) >foo.gcc.k1c.Z 2> foo.gcc.k1c.Z.out
-
-foo.gcc.host.txt: compress.gcc.host $(COMPRESSED)
-	./compress.gcc.host -d <$(COMPRESSED) >foo.gcc.host.txt 2> foo.gcc.host.txt.out
-
-foo.ccomp.k1c.txt: compress.gcc.host $(COMPRESSED)
-	$(EXECUTE) ./compress.ccomp.k1c -d <$(COMPRESSED) >foo.ccomp.k1c.txt 2> foo.ccomp.k1c.txt.out
-
-foo.gcc.k1c.txt: compress.gcc.host $(COMPRESSED)
-	$(EXECUTE) ./compress.gcc.k1c -d <$(COMPRESSED) >foo.gcc.k1c.txt 2> foo.gcc.k1c.txt.out
-
-check: foo.gcc.host.Z foo.gcc.host.txt foo.ccomp.k1c.Z foo.ccomp.k1c.txt foo.gcc.k1c.Z foo.gcc.k1c.txt
-	cmp foo.gcc.host.Z foo.ccomp.k1c.Z
-	cmp foo.gcc.host.Z foo.gcc.k1c.Z
-	cmp foo.gcc.host.txt foo.ccomp.k1c.txt
-	cmp foo.gcc.host.txt foo.gcc.k1c.txt
-
-clean:
-	rm -f *.Z *.txt *.out *.o *.s *.host *.k1c
-
-.PHONY: clean
-
-.SECONDARY: %.s
+include ../rules.mk
diff --git a/test/monniaux/ocaml/Makefile b/test/monniaux/ocaml/Makefile
index b63c8864..20f32b65 100644
--- a/test/monniaux/ocaml/Makefile
+++ b/test/monniaux/ocaml/Makefile
@@ -1,33 +1,6 @@
-ALL_CFLAGS=-Ibyterun
+TARGET=ocaml
+ALL_CFLAGS=-Ibyterun -lm
+ALL_CFILES=$(wildcard byterun/*.c)
 EXECUTE_ARGS=examples/quicksort
 
 include ../rules.mk
-
-ALL_CCOMPFLAGS=
-LDLIBS=-lm
-
-CFILES=$(wildcard byterun/*.c)
-
-CCOMP_K1C_S=$(patsubst %.c,%.ccomp.k1c.s,$(CFILES))
-CCOMP_HOST_S=$(patsubst %.c,%.ccomp.host.s,$(CFILES))
-
-GCC_K1C_S=$(patsubst %.c,%.gcc.k1c.s,$(CFILES))
-GCC_O1_K1C_S=$(patsubst %.c,%.gcc.o1.k1c.s,$(CFILES))
-GCC_HOST_S=$(patsubst %.c,%.gcc.host.s,$(CFILES))
-
-all: $(CCOMP_K1C_S) $(GCC_K1C_S) ocamlrun.ccomp.k1c.out ocamlrun.gcc.k1c.out ocamlrun.gcc.o1.k1c.out
-
-ocamlrun.ccomp.k1c : $(CCOMP_K1C_S) ../clock.gcc.k1c.o
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@ $(LDLIBS)
-
-ocamlrun.ccomp.host : $(CCOMP_HOST_S) ../clock.gcc.host.o
-	$(CCOMP) $(CCOMPFLAGS) $+ -o $@ $(LDLIBS)
-
-ocamlrun.gcc.k1c : $(GCC_K1C_S) ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS) $+ -o $@ $(LDLIBS)
-
-ocamlrun.gcc.o1.k1c : $(GCC_O1_K1C_S) ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS_O1) $+ -o $@ $(LDLIBS)
-
-ocamlrun.gcc.host : $(GCC_HOST_S) ../clock.gcc.host.o
-	$(CC) $(CFLAGS) $+ -o $@ $(LDLIBS)
diff --git a/test/monniaux/pcre2-10.32/Makefile b/test/monniaux/pcre2-10.32/Makefile
index 98c2c8c2..b6b66c37 100644
--- a/test/monniaux/pcre2-10.32/Makefile
+++ b/test/monniaux/pcre2-10.32/Makefile
@@ -1,4 +1,7 @@
-CFILES = \
+TARGET=pcre2
+ALL_CFLAGS = -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 
+EXECUTE_ARGS=testdata/testinput6 > /dev/null 2> __BASE__.out
+ALL_CFILES = \
        pcre2_auto_possess.c \
        pcre2_chartables.c \
        pcre2_compile.c \
@@ -28,39 +31,5 @@ CFILES = \
        pcre2posix.c \
        pcre2test.c
 
-HFILES = config.h  pcre2_internal.h    pcre2posix.h \
-pcre2.h   pcre2_intmodedep.h  pcre2_ucp.h
-
-K1C_GCC_OFILES=$(CFILES:.c=.gcc.k1c.o)
-K1C_GCC_OFILES_O1=$(CFILES:.c=.gcc.o1.k1c.o)
-K1C_CCOMP_OFILES=$(CFILES:.c=.ccomp.k1c.o)
-K1C_GCC_SFILES=$(CFILES:.c=.gcc.k1c.s)
-K1C_CCOMP_SFILES=$(CFILES:.c=.ccomp.k1c.s)
-HOST_GCC_OFILES=$(CFILES:.c=.gcc.host.o)
-
-all:	pcre2test.gcc.o1.k1c.out pcre2test.gcc.k1c.out pcre2test.ccomp.k1c.out $(K1C_GCC_SFILES) $(K1C_CCOMP_SFILES)
-
-ALL_CFLAGS = -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 
-EXECUTE_ARGS = testdata/testinput6
-
 include ../rules.mk
 
-
-$(K1C_GCC_SFILES) $(K1C_GCC_OFILES_O1) $(K1C_CCOMP_SFILES) $(HOST_GCC_OFILES): $(HFILES)
-
-pcre2test.gcc.host: $(HOST_GCC_OFILES)
-	$(CC) $(CFLAGS) -o $@ $+ ../clock.gcc.host.o
-
-pcre2test.gcc.k1c: $(K1C_GCC_OFILES) ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS) -o $@ $+
-
-pcre2test.gcc.o1.k1c: $(K1C_GCC_OFILES_O1) ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS_O1) -o $@ $+
-
-pcre2test.ccomp.k1c: $(K1C_CCOMP_OFILES) ../clock.gcc.k1c.o
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) -o $@ $+
-
-.PHONY: clean
-
-clean:
-	rm -f *.s *.o *.k1c
diff --git a/test/monniaux/pcre2-10.32/pcre2test.c b/test/monniaux/pcre2-10.32/pcre2test.c
index 25a7c4a1..a1fb64cb 100644
--- a/test/monniaux/pcre2-10.32/pcre2test.c
+++ b/test/monniaux/pcre2-10.32/pcre2test.c
@@ -8792,7 +8792,7 @@ FREECONTEXTS;
 #endif
 
   clock_stop();
-  print_total_clock();
+  printerr_total_clock();
   
 return yield;
 }
diff --git a/test/monniaux/picosat-965/Makefile b/test/monniaux/picosat-965/Makefile
index 991278ff..a887c0de 100644
--- a/test/monniaux/picosat-965/Makefile
+++ b/test/monniaux/picosat-965/Makefile
@@ -1,37 +1,11 @@
 EXECUTE_ARGS=sudoku.sat
-
-include ../rules.mk
-
-#ALL_CFLAGS = -DNDEBUG
 ALL_CFLAGS = -DNALARM -DNZIP -DNGETRUSAGE -DNDEBUG
-K1C_CFLAGS += $(EMBEDDED_CFLAGS)
-K1C_CCOMPFLAGS += $(EMBEDDED_CFLAGS)
-CCOMPFLAGS += -fbitfields
-K1C_CCOMPFLAGS += -fbitfields # -fno-if-conversion
-
-K1C_CFLAGS += $(ALL_CFLAGS)
-K1C_CCOMPFLAGS += $(ALL_CFLAGS)
-CCOMPFLAGS += $(ALL_CFLAGS)
-CFLAGS += $(ALL_CFLAGS)
-
-all: picosat.ccomp.k1c.s version.ccomp.k1c.s app.ccomp.k1c.s main.ccomp.k1c.s picosat.gcc.k1c.s version.gcc.k1c.s app.gcc.k1c.s main.gcc.k1c.s picosat.ccomp.k1c.out picosat.gcc.o1.k1c.out picosat.gcc.k1c.out picosat picosat.ccomp.host.out picosat.gcc.host.out
-
-picosat.ccomp.k1c : picosat.ccomp.k1c.s version.ccomp.k1c.s app.ccomp.k1c.s main.ccomp.k1c.s ../clock.gcc.k1c.o
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
+ALL_CCOMPFLAGS += -fbitfields # -fno-if-conversion
+TARGET=picosat
+ALL_CFILES=picosat.c version.c app.c main.c
 
-picosat.gcc.k1c : picosat.gcc.k1c.s version.gcc.k1c.s app.gcc.k1c.s main.gcc.k1c.s ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS) $+ -o $@
-
-picosat.gcc.o1.k1c : picosat.gcc.o1.k1c.s version.gcc.o1.k1c.s app.gcc.o1.k1c.s main.gcc.o1.k1c.s ../clock.gcc.k1c.o
-	$(K1C_CC) $(K1C_CFLAGS_O1) $+ -o $@
-
-picosat.ccomp.host : picosat.ccomp.host.s version.ccomp.host.s app.ccomp.host.s main.ccomp.host.s ../clock.gcc.host.o
-	$(CCOMP) $(CCOMPFLAGS) $+ -o $@
-
-picosat.gcc.host : picosat.gcc.host.s version.gcc.host.s app.gcc.host.s main.gcc.host.s ../clock.gcc.host.o
-	$(CC) $(FLAGS) $+ -o $@
-
-clean:
-	-rm -f *.s *.k1c *.out
+include ../rules.mk
 
-.PHONY: clean
+# FIXME - what were these for?
+#K1C_CFLAGS += $(EMBEDDED_CFLAGS)
+#K1C_CCOMPFLAGS += $(EMBEDDED_CFLAGS)
diff --git a/test/monniaux/quicksort/quicksort_run.c b/test/monniaux/quicksort/quicksort_run.c
index c35d0752..3c640b24 100644
--- a/test/monniaux/quicksort/quicksort_run.c
+++ b/test/monniaux/quicksort/quicksort_run.c
@@ -13,7 +13,7 @@ int main (void) {
   quicksort(vec, len);
   quicksort_time = get_cycle() - quicksort_time;
   printf("sorted=%s\n"
-	 "time cycles:%" PRIu64 "\n",
+	 "time cycles:%" PRcycle "\n",
 	 data_vec_is_sorted(vec, len)?"true":"false",
 	 quicksort_time);
   free(vec);
diff --git a/test/monniaux/rules.mk b/test/monniaux/rules.mk
index 9d05b4d6..2de2c466 100644
--- a/test/monniaux/rules.mk
+++ b/test/monniaux/rules.mk
@@ -7,6 +7,9 @@ ALL_CFILES?=$(wildcard *.c)
 # Name of the target
 TARGET?=toto
 
+# Arguments of execution
+EXECUTE_ARGS?=
+
 # Name of the clock object
 CLOCK=../clock
 
@@ -25,30 +28,31 @@ K1C_CC?=k1-cos-gcc
 K1C_CCOMP?=ccomp
 
 # Command to execute
+#EXECUTE_CYCLES?=timeout --signal=SIGTERM 3m k1-cluster --syscall=libstd_scalls.so --cycle-based --
 EXECUTE_CYCLES?=k1-cluster --syscall=libstd_scalls.so --cycle-based --
 
 # You can define up to GCC4FLAGS and CCOMP4FLAGS
-GCC0FLAGS?=
-GCC1FLAGS?=$(ALL_GCCFLAGS) -O1 -g
+GCC0FLAGS?=$(ALL_GCCFLAGS) -O0
+GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
 GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
 GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
 GCC4FLAGS?=
-CCOMP0FLAGS?=
-CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O1 -g
-CCOMP2FLAGS?=$(ALL_CCOMPFLAGS)
-CCOMP3FLAGS?=
+CCOMP0FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fno-postpass
+CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fpostpass= greedy
+CCOMP2FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fno-if-conversion
+CCOMP3FLAGS?=$(ALL_CCOMPFLAGS) -O2
 CCOMP4FLAGS?=
 
 # Prefix names
-GCC0PREFIX?=
+GCC0PREFIX?=.gcc.o0
 GCC1PREFIX?=.gcc.o1
 GCC2PREFIX?=.gcc.o2
 GCC3PREFIX?=.gcc.o3
 GCC4PREFIX?=
-CCOMP0PREFIX?=
-CCOMP1PREFIX?=.ccomp.o1
-CCOMP2PREFIX?=.ccomp.o2
-CCOMP3PREFIX?=
+CCOMP0PREFIX?=.ccomp.nobundle
+CCOMP1PREFIX?=.ccomp.greedy
+CCOMP2PREFIX?=.ccomp.noif
+CCOMP3PREFIX?=.ccomp
 CCOMP4PREFIX?=
 
 # List of outfiles, updated by gen_rules
@@ -92,7 +96,8 @@ obj/%.o: asm/%.s
 
 out/%.out: bin/%.bin
 	@mkdir -p $(@D)
-	$(EXECUTE_CYCLES) $< | tee $@
+	@rm -f $@
+	$(EXECUTE_CYCLES) $< $(subst __BASE__,$(patsubst %.out,%,$@),$(EXECUTE_ARGS)) | tee -a $@
 
 ##
 # Generating the rules for all the compiler/flags..
@@ -132,7 +137,7 @@ endif
 
 measures.csv: $(OUTFILES)
 	@echo $(FIRSTLINE) > $@
-	@for i in $(MEASURES); do\
+	@for i in "$(MEASURES)"; do\
 		first=$$(grep "$$i cycles" $(firstword $(OUTFILES)));\
 		if test ! -z "$$first"; then\
 			if [ "$$i" != "time" ]; then\
@@ -153,5 +158,5 @@ run: measures.csv
 
 clean:
 	rm -f *.o *.s *.bin *.out
-	rm -f asm/*.s bin/*.bin obj/*.o out/*.out
+	rm -rf asm/ bin/ obj/ out/
 
diff --git a/test/monniaux/run_benches.sh b/test/monniaux/run_benches.sh
index 5f9f22cb..2b2e28d6 100755
--- a/test/monniaux/run_benches.sh
+++ b/test/monniaux/run_benches.sh
@@ -1,12 +1,16 @@
 
 source benches.sh
 
+cores=$(grep -c ^processor /proc/cpuinfo)
+processes=$((cores/4))
+
 rm -f commands.txt
 for bench in $benches; do
-  echo "(cd $bench && make -j5 run)" >> commands.txt
+  echo "(cd $bench && echo \"Running $bench..\" &&\
+    make -j4 run > /dev/null && echo \"$bench DONE\")" >> commands.txt
 done
 
-cat commands.txt | xargs -n1 -I{} -P4 bash -c '{}'
+cat commands.txt | xargs -n1 -I{} -P$processes bash -c '{}'
 
 ##
 # Gather all the CSV files
diff --git a/test/monniaux/tiff-4.0.10/Makefile b/test/monniaux/tiff-4.0.10/Makefile
index db3428fa..ac1aa276 100644
--- a/test/monniaux/tiff-4.0.10/Makefile
+++ b/test/monniaux/tiff-4.0.10/Makefile
@@ -1,52 +1,7 @@
+TARGET=ppm2tiff
+ALL_CFLAGS=-lm
 ALL_CCOMPFLAGS = -flongdouble
+EXECUTE_ARGS= -c g3 __BASE__.g3.tif < example_bw.pbm
 
 include ../rules.mk
 
-LIBS=-lm
-
-src=$(wildcard *.c)
-
-PRODUCTS?=ppm2tiff.gcc.host ppm2tiff.ccomp.host ppm2tiff.gcc.k1c ppm2tiff.gcc.o1.k1c ppm2tiff.ccomp.k1c
-PRODUCTS_OUT=$(addsuffix .out,$(PRODUCTS))
-
-all: $(PRODUCTS)
-
-.PHONY:
-run: measures.csv
-
-ppm2tiff.gcc.host: $(src:.c=.gcc.host.o) ../clock.gcc.host.o 
-	$(CC) $(CFLAGS) $+ $(LIBS) -o $@
-ppm2tiff.ccomp.host: $(src:.c=.ccomp.host.o) ../clock.gcc.host.o 
-	$(CCOMP) $(CCOMPFLAGS) $+ $(LIBS) -o $@
-ppm2tiff.gcc.k1c: $(src:.c=.gcc.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS) $+ $(LIBS) -o $@
-ppm2tiff.gcc.o1.k1c: $(src:.c=.gcc.o1.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS_O1) $+ $(LIBS) -o $@
-ppm2tiff.ccomp.k1c: $(src:.c=.ccomp.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ $(LIBS) -o $@
-
-ppm2tiff.gcc.host.out: ppm2tiff.gcc.host
-	bunzip2 < example_bw.pbm.bz2 | ./$< -c g3 $<.g3.tif | tee $@
-
-ppm2tiff.ccomp.host.out: ppm2tiff.ccomp.host
-	bunzip2 < example_bw.pbm.bz2 | ./$< -c g3 $<.g3.tif | tee $@
-
-ppm2tiff.gcc.k1c.out: ppm2tiff.gcc.k1c
-	bunzip2 < example_bw.pbm.bz2 | $(EXECUTE_CYCLES) ./$< -c g3 $<.g3.tif | tee $@
-
-ppm2tiff.gcc.o1.k1c.out: ppm2tiff.gcc.o1.k1c
-	bunzip2 < example_bw.pbm.bz2 | $(EXECUTE_CYCLES) ./$< -c g3 $<.g3.tif | tee $@
-
-ppm2tiff.ccomp.k1c.out: ppm2tiff.ccomp.k1c
-	bunzip2 < example_bw.pbm.bz2 | $(EXECUTE_CYCLES) ./$< -c g3 $<.g3.tif | tee $@
-
-measures.csv: $(PRODUCTS_OUT)
-	echo "benches, gcc host,ccomp host,gcc k1c,gcc o1 k1c,ccomp k1c" > $@
-	echo "ppm2tiff ", $$(grep 'cycles' ppm2tiff.gcc.host.out | cut -d':' -f2), $$(grep 'cycles' ppm2tiff.ccomp.host.out | cut -d':' -f2), $$(grep 'cycles' ppm2tiff.gcc.k1c.out | cut -d':' -f2), $$(grep 'cycles' ppm2tiff.gcc.o1.k1c.out | cut -d':' -f2), $$(grep 'cycles' ppm2tiff.ccomp.k1c.out | cut -d':' -f2)>> $@
-
-.SECONDARY:
-
-.PHONY:
-clean:
-	rm -f *.o *.s *.k1c *.csv
-
diff --git a/test/monniaux/tiff-4.0.10/example_bw.pbm b/test/monniaux/tiff-4.0.10/example_bw.pbm
new file mode 100644
index 00000000..971a82bb
--- /dev/null
+++ b/test/monniaux/tiff-4.0.10/example_bw.pbm
diff --git a/test/monniaux/yarpgen/Makefile b/test/monniaux/yarpgen/Makefile
index 9da82deb..28bd5ae0 100644
--- a/test/monniaux/yarpgen/Makefile
+++ b/test/monniaux/yarpgen/Makefile
@@ -1,52 +1,98 @@
-YARPGEN=yarpgen
-MAX=300
+TARGET_CCOMP=../../../ccomp
+TARGET_CC=gcc
+
+all:
+
+.SECONDARY:
+
+ifndef YARPGEN
+YARPGEN=./yarpgen
+GENERATOR=yarpgen
+endif
+
+ifdef BITS
+YARPGEN+=-m $(BITS)
+CFLAGS+=-m$(BITS)
+endif
+
+MAX=129
 PREFIX=ran%06.f
-include ../rules.mk
-
-K1C_CCOMPFLAGS += -funprototyped -fbitfields
-CCOMPFLAGS += -funprototyped -fbitfields
-
-TARGETS_C=$(shell seq --format $(PREFIX)/func.c 0 $(MAX)) \
-	$(shell seq --format $(PREFIX)/driver.c 0 $(MAX)) \
-	$(shell seq --format $(PREFIX)/init.h 0 $(MAX))
-TARGETS_CCOMP_K1C_S=$(shell seq --format $(PREFIX)/func.ccomp.k1c.s 0 $(MAX)) \
-	$(shell seq --format $(PREFIX)/driver.ccomp.k1c.s 0 $(MAX))
-TARGETS_GCC_K1C_S=$(shell seq --format $(PREFIX)/func.gcc.k1c.s 0 $(MAX)) \
-	$(shell seq --format $(PREFIX)/driver.gcc.k1c.s 0 $(MAX))
-TARGETS_CCOMP_HOST_S=$(shell seq --format $(PREFIX)/func.ccomp.host.s 0 $(MAX)) \
-	$(shell seq --format $(PREFIX)/driver.ccomp.host.s 0 $(MAX))
-TARGETS_GCC_HOST_S=$(shell seq --format $(PREFIX)/func.gcc.host.s 0 $(MAX)) \
-	$(shell seq --format $(PREFIX)/driver.gcc.host.s 0 $(MAX))
-TARGETS_CCOMP_K1C_OUT=$(shell seq --format $(PREFIX)/example.ccomp.k1c.out 0 $(MAX))
-TARGETS_GCC_K1C_OUT=$(shell seq --format $(PREFIX)/example.gcc.k1c.out 0 $(MAX))
-TARGETS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 0 $(MAX))
-TARGETS_CCOMP_HOST_OUT=$(shell seq --format $(PREFIX)/example.ccomp.host.out 0 $(MAX))
-TARGETS_CMP=$(shell seq --format $(PREFIX)/example.k1c.cmp 0 $(MAX))
-
-all:	$(TARGETS_CCOMP_K1C_OUT) $(TARGETS_GCC_K1C_OUT) $(TARGETS_GCC_HOST_OUT) $(TARGETS_CCOMP_HOST_OUT) $(TARGETS_CCOMP_K1C_S) $(TARGETS_GCC_K1C_S) $(TARGETS_GCC_HOST_S) $(TARGETS_CCOMP_HOST_S) $(TARGETS_CMP) $(TARGETS_C)
-
-ran%/func.ccomp.k1c.s ran%/func.gcc.k1c.s ran%/func.ccomp.host.s ran%/func.gcc.host.s : ran%/init.h
-
-ran%/example.ccomp.k1c: ran%/func.ccomp.k1c.o ran%/driver.ccomp.k1c.o 
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
-
-ran%/example.gcc.k1c: ran%/func.gcc.k1c.o ran%/driver.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS) $+ -o $@
-
-ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o 
-	$(CC) $(CFLAGS) $+ -o $@
 
-ran%/example.ccomp.host: ran%/func.ccomp.host.o ran%/driver.ccomp.host.o 
-	$(CCOMP) $(CCOMPFLAGS) $+ -o $@
+CCOMPOPTS=-static
+CCOMPFLAGS+=-funprototyped -fbitfields -fno-cse2 -stdlib ../../../runtime # FIXME
+
+TESTS_C=$(shell seq --format $(PREFIX)/func.c 1 $(MAX)) \
+	$(shell seq --format $(PREFIX)/driver.c 1 $(MAX)) \
+	$(shell seq --format $(PREFIX)/init.c 1 $(MAX)) \
+	$(shell seq --format $(PREFIX)/hash.c 1 $(MAX)) \
+	$(shell seq --format $(PREFIX)/check.c 1 $(MAX)) \
+	$(shell seq --format $(PREFIX)/init.h 1 $(MAX))
+
+$(TESTS_C): $(GENERATOR)
+
+TESTS_CCOMP_TARGET_S=$(TEST_C:.c=.ccomp.target.s)
+TESTS_GCC_TARGET_S=$(TEST_C:.c=.gcc.target.s)
+TESTS_GCC_HOST_S=$(TEST_C:.c=.gcc.host.s)
+TESTS_CCOMP_TARGET_OUT=$(shell seq --format $(PREFIX)/example.ccomp.target.out 1 $(MAX))
+TESTS_GCC_TARGET_OUT=$(shell seq --format $(PREFIX)/example.gcc.target.out 1 $(MAX))
+TESTS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 1 $(MAX))
+TESTS_CMP=$(shell seq --format $(PREFIX)/example.target.cmp 1 $(MAX)) # $(shell seq --format $(PREFIX)/example.host_target.cmp 1 $(MAX))
+
+all:	$(TESTS_CCOMP_TARGET_OUT) $(TESTS_GCC_TARGET_OUT) $(TESTS_CCOMP_TARGET_S) $(TESTS_GCC_TARGET_S) $(TESTS_CMP) $(TESTS_C)
+
+tests_c: $(TESTS_C)
+
+tests_s: $(TESTS_CCOMP_TARGET_S)
+
+%.ccomp.target.s : %.c
+	$(TARGET_CCOMP) $(CCOMPOPTS) $(CCOMPFLAGS) -S -o $@ $<
+
+%.gcc.target.s : %.c
+	$(TARGET_CC) $(CCOMPOPTS) -S -o $@ $<
 
-ran%/driver.c ran%/func.c ran%/init.h:
-	-mkdir ran$*
+%.gcc.host.s : %.c
+	$(CC) $(CFLAGS) -S -o $@ $<
+
+%.target.o : %.target.s
+	$(TARGET_CC) -c -o $@ $<
+
+%.host.o : %.host.s
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+%.target.out : %.target
+	$(EXECUTE) $< | tee $@
+
+%.host.out : %.host
+	./$< | tee $@
+
+ran%/func.ccomp.target.s ran%/func.gcc.target.s ran%/func.ccomp.host.s ran%/func.gcc.host.s ran%/init.gcc.host.s : ran%/init.h
+
+ran%/example.ccomp.target: ran%/func.ccomp.target.o ran%/driver.ccomp.target.o  ran%/init.ccomp.target.o ran%/check.ccomp.target.o ran%/hash.ccomp.target.o
+	$(TARGET_CCOMP) $(CCOMPOPTS) $(CCOMPFLAGS) $+ -o $@
+
+ran%/example.gcc.target: ran%/func.gcc.target.o ran%/driver.gcc.target.o ran%/init.gcc.target.o ran%/check.gcc.target.o ran%/hash.gcc.target.o
+	$(TARGET_CC) $(TARGET_CFLAGS) $+ -o $@
+
+ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o ran%/init.gcc.host.o ran%/check.gcc.host.o ran%/hash.gcc.host.o
+	$(CC) $(CFLAGS) $+ -o $@
+
+ran%/driver.c ran%/func.c ran%/init.c ran%/check.c ran%/hash.c ran%/init.h:
+	mkdir -p ran$*
 	$(YARPGEN) --seed=$* --out-dir=ran$*/ --std=c99
 
-ran%/example.k1c.cmp : ran%/example.gcc.k1c.out ran%/example.ccomp.k1c.out
+ran%/example.target.cmp : ran%/example.gcc.target.out ran%/example.ccomp.target.out
 	cmp $+ > $@
 
-.PHONY: all clean
+ran%/example.host_target.cmp : ran%/example.gcc.host.out ran%/example.ccomp.target.out
+	cmp $+ > $@
+
+yarpgen:
+	curl -L -o yarpgen_v1.1.tar.gz https://github.com/intel/yarpgen/archive/v1.1.tar.gz
+	tar xfz yarpgen_v1.1.tar.gz
+	$(MAKE) CXX=g++ -C yarpgen-1.1
+	cp yarpgen-1.1/yarpgen $@
+
+.PHONY: all clean tests_c tests_c
 
 clean:
 	-rm -rf ran*
diff --git a/test/monniaux/yarpgen/Makefile.old b/test/monniaux/yarpgen/Makefile.old
new file mode 100644
index 00000000..9da82deb
--- /dev/null
+++ b/test/monniaux/yarpgen/Makefile.old
@@ -0,0 +1,52 @@
+YARPGEN=yarpgen
+MAX=300
+PREFIX=ran%06.f
+include ../rules.mk
+
+K1C_CCOMPFLAGS += -funprototyped -fbitfields
+CCOMPFLAGS += -funprototyped -fbitfields
+
+TARGETS_C=$(shell seq --format $(PREFIX)/func.c 0 $(MAX)) \
+	$(shell seq --format $(PREFIX)/driver.c 0 $(MAX)) \
+	$(shell seq --format $(PREFIX)/init.h 0 $(MAX))
+TARGETS_CCOMP_K1C_S=$(shell seq --format $(PREFIX)/func.ccomp.k1c.s 0 $(MAX)) \
+	$(shell seq --format $(PREFIX)/driver.ccomp.k1c.s 0 $(MAX))
+TARGETS_GCC_K1C_S=$(shell seq --format $(PREFIX)/func.gcc.k1c.s 0 $(MAX)) \
+	$(shell seq --format $(PREFIX)/driver.gcc.k1c.s 0 $(MAX))
+TARGETS_CCOMP_HOST_S=$(shell seq --format $(PREFIX)/func.ccomp.host.s 0 $(MAX)) \
+	$(shell seq --format $(PREFIX)/driver.ccomp.host.s 0 $(MAX))
+TARGETS_GCC_HOST_S=$(shell seq --format $(PREFIX)/func.gcc.host.s 0 $(MAX)) \
+	$(shell seq --format $(PREFIX)/driver.gcc.host.s 0 $(MAX))
+TARGETS_CCOMP_K1C_OUT=$(shell seq --format $(PREFIX)/example.ccomp.k1c.out 0 $(MAX))
+TARGETS_GCC_K1C_OUT=$(shell seq --format $(PREFIX)/example.gcc.k1c.out 0 $(MAX))
+TARGETS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 0 $(MAX))
+TARGETS_CCOMP_HOST_OUT=$(shell seq --format $(PREFIX)/example.ccomp.host.out 0 $(MAX))
+TARGETS_CMP=$(shell seq --format $(PREFIX)/example.k1c.cmp 0 $(MAX))
+
+all:	$(TARGETS_CCOMP_K1C_OUT) $(TARGETS_GCC_K1C_OUT) $(TARGETS_GCC_HOST_OUT) $(TARGETS_CCOMP_HOST_OUT) $(TARGETS_CCOMP_K1C_S) $(TARGETS_GCC_K1C_S) $(TARGETS_GCC_HOST_S) $(TARGETS_CCOMP_HOST_S) $(TARGETS_CMP) $(TARGETS_C)
+
+ran%/func.ccomp.k1c.s ran%/func.gcc.k1c.s ran%/func.ccomp.host.s ran%/func.gcc.host.s : ran%/init.h
+
+ran%/example.ccomp.k1c: ran%/func.ccomp.k1c.o ran%/driver.ccomp.k1c.o 
+	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
+
+ran%/example.gcc.k1c: ran%/func.gcc.k1c.o ran%/driver.gcc.k1c.o 
+	$(K1C_CC) $(K1C_CFLAGS) $+ -o $@
+
+ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o 
+	$(CC) $(CFLAGS) $+ -o $@
+
+ran%/example.ccomp.host: ran%/func.ccomp.host.o ran%/driver.ccomp.host.o 
+	$(CCOMP) $(CCOMPFLAGS) $+ -o $@
+
+ran%/driver.c ran%/func.c ran%/init.h:
+	-mkdir ran$*
+	$(YARPGEN) --seed=$* --out-dir=ran$*/ --std=c99
+
+ran%/example.k1c.cmp : ran%/example.gcc.k1c.out ran%/example.ccomp.k1c.out
+	cmp $+ > $@
+
+.PHONY: all clean
+
+clean:
+	-rm -rf ran*
diff --git a/test/monniaux/zlib-1.2.11/Makefile b/test/monniaux/zlib-1.2.11/Makefile
index 202f2ea4..9e6920f5 100644
--- a/test/monniaux/zlib-1.2.11/Makefile
+++ b/test/monniaux/zlib-1.2.11/Makefile
@@ -1,53 +1,62 @@
-ALL_CCOMPFLAGS = -faddx
-ALL_CFLAGS = -D_POSIX_C_SOURCE=2 -D_LARGEFILE64_SOURCE=1 -U__STRICT_ANSI__
+TARGET=zlib
 
-include ../rules.mk
-
-src=$(wildcard *.c)
-
-PRODUCTS?=minigzip.gcc.host minigzip.ccomp.host minigzip.gcc.k1c minigzip.gcc.o1.k1c minigzip.ccomp.k1c
-PRODUCTS_OUT=$(addsuffix .out,$(PRODUCTS))
-
-all: $(PRODUCTS)
-
-.PHONY:
-run: measures.csv
-
-
-minigzip.gcc.host: $(src:.c=.gcc.host.o) ../clock.gcc.host.o 
-	$(CC) $(CFLAGS) $+ -lm -o $@
-minigzip.ccomp.host: $(src:.c=.ccomp.host.o) ../clock.gcc.host.o 
-	$(CCOMP) $(CCOMPFLAGS) $+ -lm -o $@
-minigzip.gcc.k1c: $(src:.c=.gcc.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS) $+ -lm -o $@
-minigzip.gcc.o1.k1c: $(src:.c=.gcc.o1.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CC) $(K1C_CFLAGS_O1) $+ -lm -o $@
-minigzip.ccomp.k1c: $(src:.c=.ccomp.k1c.o) ../clock.gcc.k1c.o 
-	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -lm -o $@
-measures.csv: $(PRODUCTS_OUT)
-	echo "benches, gcc host,ccomp host,gcc k1c,gcc o1 k1c,ccomp k1c" > $@
-	echo "zlib ", $$(grep 'cycles' minigzip.gcc.host.out | cut -d':' -f2), $$(grep 'cycles' minigzip.ccomp.host.out | cut -d':' -f2), $$(grep 'cycles' minigzip.gcc.k1c.out | cut -d':' -f2), $$(grep 'cycles' minigzip.gcc.o1.k1c.out | cut -d':' -f2), $$(grep 'cycles' minigzip.ccomp.k1c.out | cut -d':' -f2)>> $@
+ALL_CCOMPFLAGS=-faddx
+ALL_CFLAGS=  -D_POSIX_C_SOURCE=2 -D_LARGEFILE64_SOURCE=1 -U__STRICT_ANSI__
+EXECUTE_ARGS=< zlib_small.txt > /dev/null 2> __BASE__.out
 
-SAMPLE_FILE=zlib.h
-
-minigzip.gcc.host.out minigzip.gcc.host.output: minigzip.gcc.host
-	./$< < $(SAMPLE_FILE) > $<.output 2> $@
-
-minigzip.ccomp.host.out minigzip.ccomp.host.output: minigzip.ccomp.host
-	./$< < $(SAMPLE_FILE) > $<.output 2> $@
-
-minigzip.gcc.k1c.out minigzip.gcc.k1c.output: minigzip.gcc.k1c
-	$(EXECUTE_CYCLES) $< < $(SAMPLE_FILE) > $<.output 2> $@
-
-minigzip.gcc.o1.k1c.out minigzip.gcc.o1.k1c.output: minigzip.gcc.o1.k1c
-	$(EXECUTE_CYCLES) $< < $(SAMPLE_FILE) > $<.output 2> $@
-
-minigzip.ccomp.k1c.out minigzip.ccomp.k1c.output: minigzip.ccomp.k1c
-	$(EXECUTE_CYCLES) $< < $(SAMPLE_FILE) > $<.output 2> $@
-
-.SECONDARY:
+include ../rules.mk
 
-.PHONY:
-clean:
-	rm -f *.o *.s *.k1c *.csv
 
+#ALL_CCOMPFLAGS = -faddx
+#ALL_CFLAGS = -D_POSIX_C_SOURCE=2 -D_LARGEFILE64_SOURCE=1 -U__STRICT_ANSI__
+#
+#include ../rules.mk
+#
+#src=$(wildcard *.c)
+#
+#PRODUCTS?=minigzip.gcc.host minigzip.ccomp.host minigzip.gcc.k1c minigzip.gcc.o1.k1c minigzip.ccomp.k1c
+#PRODUCTS_OUT=$(addsuffix .out,$(PRODUCTS))
+#
+#all: $(PRODUCTS)
+#
+#.PHONY:
+#run: measures.csv
+#
+#
+#minigzip.gcc.host: $(src:.c=.gcc.host.o) ../clock.gcc.host.o 
+#	$(CC) $(CFLAGS) $+ -lm -o $@
+#minigzip.ccomp.host: $(src:.c=.ccomp.host.o) ../clock.gcc.host.o 
+#	$(CCOMP) $(CCOMPFLAGS) $+ -lm -o $@
+#minigzip.gcc.k1c: $(src:.c=.gcc.k1c.o) ../clock.gcc.k1c.o 
+#	$(K1C_CC) $(K1C_CFLAGS) $+ -lm -o $@
+#minigzip.gcc.o1.k1c: $(src:.c=.gcc.o1.k1c.o) ../clock.gcc.k1c.o 
+#	$(K1C_CC) $(K1C_CFLAGS_O1) $+ -lm -o $@
+#minigzip.ccomp.k1c: $(src:.c=.ccomp.k1c.o) ../clock.gcc.k1c.o 
+#	$(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -lm -o $@
+#measures.csv: $(PRODUCTS_OUT)
+#	echo "benches, gcc host,ccomp host,gcc k1c,gcc o1 k1c,ccomp k1c" > $@
+#	echo "zlib ", $$(grep 'cycles' minigzip.gcc.host.out | cut -d':' -f2), $$(grep 'cycles' minigzip.ccomp.host.out | cut -d':' -f2), $$(grep 'cycles' minigzip.gcc.k1c.out | cut -d':' -f2), $$(grep 'cycles' minigzip.gcc.o1.k1c.out | cut -d':' -f2), $$(grep 'cycles' minigzip.ccomp.k1c.out | cut -d':' -f2)>> $@
+#
+#SAMPLE_FILE=zlib.h
+#
+#minigzip.gcc.host.out minigzip.gcc.host.output: minigzip.gcc.host
+#	./$< < $(SAMPLE_FILE) > $<.output 2> $@
+#
+#minigzip.ccomp.host.out minigzip.ccomp.host.output: minigzip.ccomp.host
+#	./$< < $(SAMPLE_FILE) > $<.output 2> $@
+#
+#minigzip.gcc.k1c.out minigzip.gcc.k1c.output: minigzip.gcc.k1c
+#	$(EXECUTE_CYCLES) $< < $(SAMPLE_FILE) > $<.output 2> $@
+#
+#minigzip.gcc.o1.k1c.out minigzip.gcc.o1.k1c.output: minigzip.gcc.o1.k1c
+#	$(EXECUTE_CYCLES) $< < $(SAMPLE_FILE) > $<.output 2> $@
+#
+#minigzip.ccomp.k1c.out minigzip.ccomp.k1c.output: minigzip.ccomp.k1c
+#	$(EXECUTE_CYCLES) $< < $(SAMPLE_FILE) > $<.output 2> $@
+#
+#.SECONDARY:
+#
+#.PHONY:
+#clean:
+#	rm -f *.o *.s *.k1c *.csv
+#
diff --git a/test/monniaux/zlib-1.2.11/zlib_small.txt b/test/monniaux/zlib-1.2.11/zlib_small.txt
new file mode 100644
index 00000000..2c494200
--- /dev/null
+++ b/test/monniaux/zlib-1.2.11/zlib_small.txt
@@ -0,0 +1,539 @@
+
+
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.11"
+#define ZLIB_VERNUM 0x12b0
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 2
+#define ZLIB_VER_REVISION 11
+#define ZLIB_VER_SUBREVISION 0
+
+
+
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    z_const Bytef *next_in;     
+
+    uLong    total_in;  
+
+    uInt     avail_out; 
+
+
+    z_const char *msg;  
+
+
+    alloc_func zalloc;  
+
+    voidpf     opaque;  
+
+    uLong   adler;      
+
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+
+
+typedef struct gz_header_s {
+    int     text;       
+
+    int     xflags;     
+
+    Bytef   *extra;     
+
+    uInt    extra_max;  
+
+    uInt    name_max;   
+
+    uInt    comm_max;   
+
+    int     done;       
+
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+
+
+
+                        
+
+
+#define Z_OK            0
+#define Z_STREAM_END    1
+#define Z_NEED_DICT     2
+#define Z_ERRNO        (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR   (-3)
+#define Z_MEM_ERROR    (-4)
+#define Z_BUF_ERROR    (-5)
+#define Z_VERSION_ERROR (-6)
+
+
+
+#define Z_NO_COMPRESSION         0
+#define Z_BEST_SPEED             1
+#define Z_BEST_COMPRESSION       9
+#define Z_DEFAULT_COMPRESSION  (-1)
+
+
+
+#define Z_BINARY   0
+#define Z_TEXT     1
+#define Z_ASCII    Z_TEXT   
+
+
+#define Z_DEFLATED   8
+
+
+
+#define zlib_version zlibVersion()
+
+
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+
+
+
+
+
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+
+
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+
+
+
+
+
+
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+
+
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+
+
+
+
+                        
+
+
+
+
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+
+
+
+ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+
+
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+
+
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+
+
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+                                      int level,
+                                      int strategy));
+
+
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+                                    int good_length,
+                                    int max_lazy,
+                                    int nice_length,
+                                    int max_chain));
+
+
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+                                       uLong sourceLen));
+
+
+
+ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
+                                       unsigned *pending,
+                                       int *bits));
+
+
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+
+
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+
+
+
+
+
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+
+
+
+ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+
+
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+
+
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+
+
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+
+
+
+ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
+                                      int windowBits));
+
+
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+
+
+
+ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+
+
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+
+
+
+
+
+
+typedef unsigned (*in_func) OF((void FAR *,
+                                z_const unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+                                    in_func in, void FAR *in_desc,
+                                    out_func out, void FAR *out_desc));
+
+
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+
+
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+
+
+
+#ifndef Z_SOLO
+
+                        
+
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
+                                 const Bytef *source, uLong sourceLen));
+
+
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
+                                  const Bytef *source, uLong sourceLen,
+                                  int level));
+
+
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+
+
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
+                                   const Bytef *source, uLong sourceLen));
+
+
+
+ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
+                                    const Bytef *source, uLong *sourceLen));
+
+
+
+                        
+
+
+typedef struct gzFile_s *gzFile;    
+
+
+ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+
+
+
+ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+
+
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+
+
+
+ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+
+
+
+ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
+                                     gzFile file));
+
+
+
+ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
+                                voidpc buf, unsigned len));
+
+
+
+ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
+                                      z_size_t nitems, gzFile file));
+
+
+
+ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+
+
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+
+
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+
+
+
+ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+
+
+
+ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+
+
+
+ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+
+
+
+ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+
+
+
+
+
+
+ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+
+
+
+
+
+
+
+
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+
+
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+
+
+
+ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+
+
+
+ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+
+
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+
+
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+
+
+
+#endif 
+
+
+
+
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+
+
+
+ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
+                                    z_size_t len));
+
+
+
+
+
+
+ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+
+
+
+ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf,
+                                  z_size_t len));
+
+
+
+
+
+
+
+                        
+
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
+                                      int windowBits, int memLevel,
+                                      int strategy, const char *version,
+                                      int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
+                                      const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+                                         unsigned char FAR *window,
+                                         const char *version,
+                                         int stream_size));
+#ifdef Z_PREFIX_SET
+#  define z_deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define z_inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#else
+#  define deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#endif
+
+#ifndef Z_SOLO
+
+
+
+struct gzFile_s {
+    unsigned have;
+    unsigned char *next;
+    z_off64_t pos;
+};
+ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  
+
+#ifdef Z_LARGE64
+   ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+   ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+   ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
+   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+#endif
+
+#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
+#  ifdef Z_PREFIX_SET
+#    define z_gzopen z_gzopen64
+#    define z_gzseek z_gzseek64
+#    define z_gztell z_gztell64
+#    define z_gzoffset z_gzoffset64
+#    define z_adler32_combine z_adler32_combine64
+#    define z_crc32_combine z_crc32_combine64
+#  else
+#    define gzopen gzopen64
+#    define gzseek gzseek64
+#    define gztell gztell64
+#    define gzoffset gzoffset64
+#    define adler32_combine adler32_combine64
+#    define crc32_combine crc32_combine64
+#  endif
+#  ifndef Z_LARGE64
+     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+     ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
+     ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
+     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
+     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+#  endif
+#else
+   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
+   ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
+   ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
+   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
+   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+#endif
+
+#else 
+
+
+
+
diff --git a/test/mppa/check.sh b/test/mppa/check.sh
deleted file mode 100755
index f25c3e31..00000000
--- a/test/mppa/check.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-# Tests the execution of the binaries produced by CompCert
-
-source do_test.sh
-
-do_test check $1
diff --git a/test/mppa/hardcheck.sh b/test/mppa/hardcheck.sh
new file mode 100755
index 00000000..82b63182
--- /dev/null
+++ b/test/mppa/hardcheck.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Tests the execution of the binaries produced by CompCert, in hardware
+
+source do_test.sh
+
+do_test hardcheck
diff --git a/test/mppa/hardtest.sh b/test/mppa/hardtest.sh
new file mode 100755
index 00000000..09511da6
--- /dev/null
+++ b/test/mppa/hardtest.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Tests the validity of the tests, in hardware
+
+source do_test.sh
+
+do_test hardtest
diff --git a/test/mppa/instr/Makefile b/test/mppa/instr/Makefile
index 69446796..37f7d0ab 100644
--- a/test/mppa/instr/Makefile
+++ b/test/mppa/instr/Makefile
@@ -5,10 +5,11 @@ CC ?= gcc
 CCOMP ?= ccomp
 OPTIM ?= -O2
 CFLAGS ?= $(OPTIM)
-CCOMPFLAGS ?= $(CFLAGS) -faddx
+CCOMPFLAGS ?= $(CFLAGS)
 SIMU ?= k1-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
 DIFF ?= python2.7 floatcmp.py -reltol .00001
+HARDRUN ?= k1-jtag-runner
 
 DIR=./
 SRCDIR=$(DIR)
@@ -30,10 +31,11 @@ SIMUPATH=$(shell which $(SIMU))
 
 TESTNAMES?=$(notdir $(subst .c,,$(wildcard $(DIR)/*.c)))
 X86_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .x86-gcc.out,$(TESTNAMES)))
-GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.out,$(TESTNAMES)))
-CCOMP_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.out,$(TESTNAMES)))
+GCC_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.simu.out,$(TESTNAMES)))
+CCOMP_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.simu.out,$(TESTNAMES)))
+GCC_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.hard.out,$(TESTNAMES)))
+CCOMP_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.hard.out,$(TESTNAMES)))
 
-OUT=$(X86_GCC_OUT) $(GCC_OUT) $(CCOMP_OUT)
 BIN=$(addprefix $(BINDIR)/,$(addsuffix .x86-gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .ccomp.bin,$(TESTNAMES)))
@@ -49,12 +51,18 @@ RED=\033[0;31m
 YELLOW=\033[0;33m
 NC=\033[0m
 
+.PHONY:
+test: simutest
+
+.PHONY:
+check: simucheck
+
 .PHONY: 
-test: $(X86_GCC_OUT) $(GCC_OUT)
+simutest: $(X86_GCC_OUT) $(GCC_SIMUOUT)
 	@echo "Comparing x86 gcc output to k1 gcc.."
 	for test in $(TESTNAMES); do\
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
 		if grep "__K1C__" -q $$test.c; then\
 			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __K1C__\`\n";\
 		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
@@ -65,11 +73,39 @@ test: $(X86_GCC_OUT) $(GCC_OUT)
 	done
 
 .PHONY:
-check: $(GCC_OUT) $(CCOMP_OUT)
+simucheck: $(GCC_SIMUOUT) $(CCOMP_SIMUOUT)
 	@echo "Comparing k1 gcc output to ccomp.."
 	@for test in $(TESTNAMES); do\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
-		ccompout=$(OUTDIR)/$$test.ccomp.out;\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.simu.out;\
+		if $(DIFF) $$ccompout $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
+			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$ccompout and $$gccout concur$(NC)\n";\
+		fi;\
+	done
+
+.PHONY: 
+hardtest: $(X86_GCC_OUT) $(GCC_HARDOUT)
+	@echo "Comparing x86 gcc output to k1 gcc.."
+	for test in $(TESTNAMES); do\
+		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
+		if grep "__K1C__" -q $$test.c; then\
+			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __K1C__\`\n";\
+		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
+			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$x86out and $$gccout concur$(NC)\n";\
+		fi;\
+	done
+
+.PHONY:
+hardcheck: $(GCC_HARDOUT) $(CCOMP_HARDOUT)
+	@echo "Comparing k1 gcc output to ccomp.."
+	@for test in $(TESTNAMES); do\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.hard.out;\
 		if $(DIFF) $$ccompout $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
 			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
 		else\
@@ -95,14 +131,22 @@ $(OUTDIR)/%.x86-gcc.out: $(BINDIR)/%.x86-gcc.bin
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) ./$< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.simu.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.ccomp.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+$(OUTDIR)/%.ccomp.simu.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
+$(OUTDIR)/%.gcc.hard.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.ccomp.hard.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
 # Assembly to binary
 
 $(BINDIR)/%.x86-gcc.bin: $(ASMDIR)/%.x86-gcc.s $(LIB) $(CCPATH)
diff --git a/test/mppa/interop/Makefile b/test/mppa/interop/Makefile
index e615e89a..3a83d51c 100644
--- a/test/mppa/interop/Makefile
+++ b/test/mppa/interop/Makefile
@@ -6,6 +6,7 @@ CCOMP ?= ccomp
 CFLAGS ?= -O2 -Wno-varargs
 SIMU ?= k1-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
+HARDRUN ?= k1-jtag-runner
 
 DIR=./
 SRCDIR=$(DIR)
@@ -33,17 +34,23 @@ SIMUPATH=$(shell which $(SIMU))
 TESTNAMES ?= $(filter-out $(VAARG_COMMON),$(filter-out $(COMMON),$(notdir $(subst .c,,$(wildcard $(DIR)/*.c)))))
 
 X86_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .x86-gcc.out,$(TESTNAMES)))
-GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.out,$(TESTNAMES)))
-GCC_REV_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.out,$(TESTNAMES)))
-CCOMP_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.out,$(TESTNAMES)))
+GCC_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.simu.out,$(TESTNAMES)))
+GCC_REV_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.simu.out,$(TESTNAMES)))
+CCOMP_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.simu.out,$(TESTNAMES)))
+
+GCC_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.hard.out,$(TESTNAMES)))
+GCC_REV_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.hard.out,$(TESTNAMES)))
+CCOMP_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.hard.out,$(TESTNAMES)))
 
 VAARG_X86_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .x86-gcc.vaarg.out,$(TESTNAMES)))
-VAARG_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.vaarg.out,$(TESTNAMES)))
-VAARG_GCC_REV_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.vaarg.out,$(TESTNAMES)))
-VAARG_CCOMP_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.vaarg.out,$(TESTNAMES)))
+VAARG_GCC_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.vaarg.simu.out,$(TESTNAMES)))
+VAARG_GCC_REV_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.vaarg.simu.out,$(TESTNAMES)))
+VAARG_CCOMP_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.vaarg.simu.out,$(TESTNAMES)))
+
+VAARG_GCC_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.vaarg.hard.out,$(TESTNAMES)))
+VAARG_GCC_REV_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.vaarg.hard.out,$(TESTNAMES)))
+VAARG_CCOMP_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.vaarg.hard.out,$(TESTNAMES)))
 
-OUT=$(X86_GCC_OUT) $(GCC_OUT) $(CCOMP_OUT) $(GCC_REV_OUT)\
-		$(VAARG_GCC_OUT) $(VAARG_GCC_OUT) $(VAARG_CCOMP_OUT) $(VAARG_GCC_REV_OUT)
 BIN=$(addprefix $(BINDIR)/,$(addsuffix .x86-gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .ccomp.bin,$(TESTNAMES)))\
@@ -63,14 +70,72 @@ GREEN=\033[0;32m
 RED=\033[0;31m
 NC=\033[0m
 
+.PHONY:
+test: simutest
+
+.PHONY:
+simutest: $(X86_GCC_OUT) $(GCC_SIMUOUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_SIMUOUT)
+	@echo "Comparing x86 gcc output to k1 gcc.."
+	@for test in $(TESTNAMES); do\
+		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
+		vaarg_x86out=$(OUTDIR)/$$test.x86-gcc.vaarg.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.simu.out;\
+		if ! diff $$x86out $$gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$x86out and $$gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$vaarg_x86out $$vaarg_gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$vaarg_x86out and $$vaarg_gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$vaarg_x86out and $$vaarg_gccout concur$(NC)\n";\
+		fi;\
+	done
+
+.PHONY:
+check: simucheck
+
+.PHONY:
+simucheck: $(GCC_SIMUOUT) $(CCOMP_SIMUOUT) $(GCC_REV_SIMUOUT) $(VAARG_GCC_SIMUOUT) $(VAARG_CCOMP_SIMUOUT) $(VAARG_GCC_REV_SIMUOUT)
+	@echo "Comparing k1 gcc output to ccomp.."
+	@for test in $(TESTNAMES); do\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.simu.out;\
+		gccrevout=$(OUTDIR)/$$test.gcc.rev.simu.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.simu.out;\
+		vaarg_ccompout=$(OUTDIR)/$$test.ccomp.vaarg.simu.out;\
+		vaarg_gccrevout=$(OUTDIR)/$$test.gcc.rev.vaarg.simu.out;\
+		if ! diff $$ccompout $$gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$ccompout and $$gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$gccrevout $$gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$gccrevout and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$gccrevout and $$gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$vaarg_ccompout $$vaarg_gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$vaarg_ccompout and $$vaarg_gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$vaarg_ccompout and $$vaarg_gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$vaarg_gccrevout $$vaarg_gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$vaarg_gccrevout and $$vaarg_gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$vaarg_gccrevout and $$vaarg_gccout concur$(NC)\n";\
+		fi;\
+	done
+
 .PHONY: 
-test: $(X86_GCC_OUT) $(GCC_OUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_OUT)
+hardtest: $(X86_GCC_OUT) $(GCC_HARDOUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_HARDOUT)
 	@echo "Comparing x86 gcc output to k1 gcc.."
 	@for test in $(TESTNAMES); do\
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
 		vaarg_x86out=$(OUTDIR)/$$test.x86-gcc.vaarg.out;\
-		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.hard.out;\
 		if ! diff $$x86out $$gccout > /dev/null; then\
 			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
 		else\
@@ -84,15 +149,15 @@ test: $(X86_GCC_OUT) $(GCC_OUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_OUT)
 	done
 
 .PHONY:
-check: $(GCC_OUT) $(CCOMP_OUT) $(GCC_REV_OUT) $(VAARG_GCC_OUT) $(VAARG_CCOMP_OUT) $(VAARG_GCC_REV_OUT)
+hardcheck: $(GCC_HARDOUT) $(CCOMP_HARDOUT) $(GCC_REV_HARDOUT) $(VAARG_GCC_HARDOUT) $(VAARG_CCOMP_HARDOUT) $(VAARG_GCC_REV_HARDOUT)
 	@echo "Comparing k1 gcc output to ccomp.."
 	@for test in $(TESTNAMES); do\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
-		ccompout=$(OUTDIR)/$$test.ccomp.out;\
-		gccrevout=$(OUTDIR)/$$test.gcc.rev.out;\
-		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.out;\
-		vaarg_ccompout=$(OUTDIR)/$$test.ccomp.vaarg.out;\
-		vaarg_gccrevout=$(OUTDIR)/$$test.gcc.rev.vaarg.out;\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.hard.out;\
+		gccrevout=$(OUTDIR)/$$test.gcc.rev.hard.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.hard.out;\
+		vaarg_ccompout=$(OUTDIR)/$$test.ccomp.vaarg.hard.out;\
+		vaarg_gccrevout=$(OUTDIR)/$$test.gcc.rev.vaarg.hard.out;\
 		if ! diff $$ccompout $$gccout > /dev/null; then\
 			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
 		else\
@@ -144,36 +209,60 @@ $(OUTDIR)/%.x86-gcc.out: $(BINDIR)/%.x86-gcc.bin
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) ./$< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.simu.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.rev.out: $(BINDIR)/%.gcc.rev.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.rev.simu.out: $(BINDIR)/%.gcc.rev.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.ccomp.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+$(OUTDIR)/%.ccomp.simu.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
+$(OUTDIR)/%.gcc.hard.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.gcc.rev.hard.out: $(BINDIR)/%.gcc.rev.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.ccomp.hard.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
 ## With vaarg
 
 $(OUTDIR)/%.x86-gcc.vaarg.out: $(BINDIR)/%.x86-gcc.vaarg.bin
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) ./$< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.vaarg.out: $(BINDIR)/%.gcc.vaarg.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.vaarg.simu.out: $(BINDIR)/%.gcc.vaarg.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.rev.vaarg.out: $(BINDIR)/%.gcc.rev.vaarg.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.rev.vaarg.simu.out: $(BINDIR)/%.gcc.rev.vaarg.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.ccomp.vaarg.out: $(BINDIR)/%.ccomp.vaarg.bin $(SIMUPATH)
+$(OUTDIR)/%.ccomp.vaarg.simu.out: $(BINDIR)/%.ccomp.vaarg.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
+$(OUTDIR)/%.gcc.vaarg.hard.out: $(BINDIR)/%.gcc.vaarg.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.gcc.rev.vaarg.hard.out: $(BINDIR)/%.gcc.rev.vaarg.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.ccomp.vaarg.hard.out: $(BINDIR)/%.ccomp.vaarg.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
 ##
 # Object to binary
 ##
diff --git a/test/mppa/simucheck.sh b/test/mppa/simucheck.sh
new file mode 100755
index 00000000..25fb9947
--- /dev/null
+++ b/test/mppa/simucheck.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Tests the execution of the binaries produced by CompCert, by simulation
+
+source do_test.sh
+
+do_test check $1
diff --git a/test/mppa/test.sh b/test/mppa/simutest.sh
index 30806a6b..3b1021e6 100755
--- a/test/mppa/test.sh
+++ b/test/mppa/simutest.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Tests the validity of the tests
+# Tests the validity of the tests, in simulator
 
 source do_test.sh
 
diff --git a/test/raytracer/Makefile b/test/raytracer/Makefile
index 8f6541a1..24461bd1 100644
--- a/test/raytracer/Makefile
+++ b/test/raytracer/Makefile
@@ -3,7 +3,7 @@ include ../../Makefile.config
 CC=../../ccomp 
 CFLAGS=$(CCOMPOPTS) -stdlib ../../runtime -dparse -dclight -dasm -fstruct-return
 LIBS=$(LIBMATH)
-TIME=xtime
+TIME=ocaml unix.cma ../../tools/xtime.ml -mintime 2.0 -minruns 4
 
 OBJS=memory.o gmllexer.o gmlparser.o eval.o \
   arrays.o vector.o matrix.o object.o intersect.o surface.o light.o \
@@ -30,4 +30,4 @@ test:
         fi
 
 bench:
-	@echo -n "raytracer: "; $(TIME) sh -c './render < kal.gml'
+	@$(TIME) -name raytracer -- sh -c './render < kal.gml'
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 09d12f9a..97c25f6c 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -9,7 +9,8 @@ LIBS=$(LIBMATH)
 
 # Can run, both in compiled mode and in interpreter mode,
 # and have reference output in Results
-TESTS?=int32 int64 floats floats-basics \
+
+TESTS?=int32 int64 floats floats-basics floats-lit \
   expr1 expr6 funptr2 initializers initializers2 initializers3 \
   volatile1 volatile2 volatile3 volatile4 \
   funct3 expr5 struct7 struct8 struct11 struct12 casts1 casts2 char1 \
@@ -34,9 +35,9 @@ endif
 # but produce processor-dependent results, so no reference output in Results
 
 TESTS_DIFF=NaNs
-ifeq ($(ARCH),mppa_k1c)
+# FIXME ifeq ($(ARCH),mppa_k1c)
 	TESTS_DIFF:=$(filter-out NaNs,$(TESTS_DIFF))
-endif
+# endif
 
 # Other tests: should compile to .s without errors (but expect warnings)
 
diff --git a/test/regression/Results/builtins-aarch64 b/test/regression/Results/builtins-aarch64
new file mode 100644
index 00000000..c70432d8
--- /dev/null
+++ b/test/regression/Results/builtins-aarch64
@@ -0,0 +1,15 @@
+bswap(12345678) = 78563412
+bswap16(1234) = 3412
+bswap64(123456789abcdef0) = f0debc9a78563412
+clz(12345678) = 3
+clzll(12345678) = 35
+clzll(1234567812345678) = 3
+cls(1234567) = 10
+cls(-9999) = 17
+clsll(1234567) = 42
+clsll(-9999) = 49
+fsqrt(3.141590) = 1.772453
+fmadd(3.141590, 2.718000, 1.414000) = 9.952842
+fmsub(3.141590, 2.718000, 1.414000) = -7.124842
+fnmadd(3.141590, 2.718000, 1.414000) = -9.952842
+fnmsub(3.141590, 2.718000, 1.414000) = 7.124842
diff --git a/test/regression/Results/floats-lit b/test/regression/Results/floats-lit
new file mode 100644
index 00000000..6cde72fb
--- /dev/null
+++ b/test/regression/Results/floats-lit
@@ -0,0 +1,2 @@
+--- Double-precision test
+--- Single-precision test
diff --git a/test/regression/Results/int64 b/test/regression/Results/int64
index af444cf6..ae8a3cc1 100644
--- a/test/regression/Results/int64
+++ b/test/regression/Results/int64
@@ -335,6 +335,48 @@ utof x = 0
 stof x = 0
 
 x = 0
+y = 52ce6b4000000063
+-x = 0
+x + y = 52ce6b4000000063
+x - y = ad3194bfffffff9d
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 0
+x %u 3 = 0
+x /s 3 = 0
+x %s 3 = 0
+x /u 5 = 0
+x %u 5 = 0
+x /s 5 = 0
+x %s 5 = 0
+x /u 11 = 0
+x %u 11 = 0
+x /s 11 = 0
+x %s 11 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000063
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+utof x = 0
+stof x = 0
+
+x = 0
 y = 14057b7ef767814f
 -x = 0
 x + y = 14057b7ef767814f
@@ -755,6 +797,48 @@ utof x = 3f800000
 stof x = 3f800000
 
 x = 1
+y = 52ce6b4000000063
+-x = ffffffffffffffff
+x + y = 52ce6b4000000064
+x - y = ad3194bfffffff9e
+x * y = 52ce6b4000000063
+x /u y = 0
+x %u y = 1
+x /s y = 0
+x %s y = 1
+x /u y2 = 0
+x %u y2 = 1
+x /s y3 = 0
+x %s y3 = 1
+x /u 3 = 0
+x %u 3 = 1
+x /s 3 = 0
+x %s 3 = 1
+x /u 5 = 0
+x %u 5 = 1
+x /s 5 = 0
+x %s 5 = 1
+x /u 11 = 0
+x %u 11 = 1
+x /s 11 = 0
+x %s 11 = 1
+~x = fffffffffffffffe
+x & y = 1
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000062
+x << i = 800000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+utof x = 3f800000
+stof x = 3f800000
+
+x = 1
 y = 9af678222e728119
 -x = ffffffffffffffff
 x + y = 9af678222e72811a
@@ -1175,6 +1259,48 @@ utof x = 5f800000
 stof x = bf800000
 
 x = ffffffffffffffff
+y = 52ce6b4000000063
+-x = 1
+x + y = 52ce6b4000000062
+x - y = ad3194bfffffff9c
+x * y = ad3194bfffffff9d
+x /u y = 3
+x %u y = 794be3ffffffed6
+x /s y = 0
+x %s y = ffffffffffffffff
+x /u y2 = 3176fe836
+x %u y2 = 3683607f
+x /s y3 = 0
+x %s y3 = ffffffffffffffff
+x /u 3 = 5555555555555555
+x %u 3 = 0
+x /s 3 = 0
+x %s 3 = ffffffffffffffff
+x /u 5 = 3333333333333333
+x %u 5 = 0
+x /s 5 = 0
+x %s 5 = ffffffffffffffff
+x /u 11 = 1745d1745d1745d1
+x %u 11 = 4
+x /s 11 = 0
+x %s 11 = ffffffffffffffff
+~x = 0
+x & y = 52ce6b4000000063
+x | y = ffffffffffffffff
+x ^ y = ad3194bfffffff9c
+x << i = fffffff800000000
+x >>u i = 1fffffff
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+utof x = 5f800000
+stof x = bf800000
+
+x = ffffffffffffffff
 y = 62354cda6226d1f3
 -x = 1
 x + y = 62354cda6226d1f2
@@ -1595,6 +1721,48 @@ utof x = 4f000000
 stof x = 4f000000
 
 x = 7fffffff
+y = 52ce6b4000000063
+-x = ffffffff80000001
+x + y = 52ce6b4080000062
+x - y = ad3194c07fffff9c
+x * y = ad3194f17fffff9d
+x /u y = 0
+x %u y = 7fffffff
+x /s y = 0
+x %s y = 7fffffff
+x /u y2 = 1
+x %u y2 = 2d3194bf
+x /s y3 = 1
+x %s y3 = 2d3194bf
+x /u 3 = 2aaaaaaa
+x %u 3 = 1
+x /s 3 = 2aaaaaaa
+x %s 3 = 1
+x /u 5 = 19999999
+x %u 5 = 2
+x /s 5 = 19999999
+x %s 5 = 2
+x /u 11 = ba2e8ba
+x %u 11 = 1
+x /s 11 = ba2e8ba
+x %s 11 = 1
+~x = ffffffff80000000
+x & y = 63
+x | y = 52ce6b407fffffff
+x ^ y = 52ce6b407fffff9c
+x << i = fffffff800000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+utof x = 4f000000
+stof x = 4f000000
+
+x = 7fffffff
 y = 144093704fadba5d
 -x = ffffffff80000001
 x + y = 14409370cfadba5c
@@ -2015,6 +2183,48 @@ utof x = 4f000000
 stof x = 4f000000
 
 x = 80000000
+y = 52ce6b4000000063
+-x = ffffffff80000000
+x + y = 52ce6b4080000063
+x - y = ad3194c07fffff9d
+x * y = 3180000000
+x /u y = 0
+x %u y = 80000000
+x /s y = 0
+x %s y = 80000000
+x /u y2 = 1
+x %u y2 = 2d3194c0
+x /s y3 = 1
+x %s y3 = 2d3194c0
+x /u 3 = 2aaaaaaa
+x %u 3 = 2
+x /s 3 = 2aaaaaaa
+x %s 3 = 2
+x /u 5 = 19999999
+x %u 5 = 3
+x /s 5 = 19999999
+x %s 5 = 3
+x /u 11 = ba2e8ba
+x %u 11 = 2
+x /s 11 = ba2e8ba
+x %s 11 = 2
+~x = ffffffff7fffffff
+x & y = 0
+x | y = 52ce6b4080000063
+x ^ y = 52ce6b4080000063
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+utof x = 4f000000
+stof x = 4f000000
+
+x = 80000000
 y = 7b985bc1e7bce4d7
 -x = ffffffff80000000
 x + y = 7b985bc267bce4d7
@@ -2435,6 +2645,48 @@ utof x = 5f000000
 stof x = 5f000000
 
 x = 7fffffffffffffff
+y = 52ce6b4000000063
+-x = 8000000000000001
+x + y = d2ce6b4000000062
+x - y = 2d3194bfffffff9c
+x * y = 2d3194bfffffff9d
+x /u y = 1
+x %u y = 2d3194bfffffff9c
+x /s y = 1
+x %s y = 2d3194bfffffff9c
+x /u y2 = 18bb7f41b
+x %u y2 = 1b41b03f
+x /s y3 = 18bb7f41b
+x %s y3 = 1b41b03f
+x /u 3 = 2aaaaaaaaaaaaaaa
+x %u 3 = 1
+x /s 3 = 2aaaaaaaaaaaaaaa
+x %s 3 = 1
+x /u 5 = 1999999999999999
+x %u 5 = 2
+x /s 5 = 1999999999999999
+x %s 5 = 2
+x /u 11 = ba2e8ba2e8ba2e8
+x %u 11 = 7
+x /s 11 = ba2e8ba2e8ba2e8
+x %s 11 = 7
+~x = 8000000000000000
+x & y = 52ce6b4000000063
+x | y = 7fffffffffffffff
+x ^ y = 2d3194bfffffff9c
+x << i = fffffff800000000
+x >>u i = fffffff
+x >>s i = fffffff
+x cmpu y = gt
+x cmps y = gt
+utod x = 43e0000000000000
+dtou f = 346dc5d638865
+stod x = 43e0000000000000
+dtos f = 346dc5d638865
+utof x = 5f000000
+stof x = 5f000000
+
+x = 7fffffffffffffff
 y = a220229ec164ffe1
 -x = 8000000000000001
 x + y = 2220229ec164ffe0
@@ -2855,6 +3107,48 @@ utof x = 5f000000
 stof x = df000000
 
 x = 8000000000000000
+y = 52ce6b4000000063
+-x = 8000000000000000
+x + y = d2ce6b4000000063
+x - y = 2d3194bfffffff9d
+x * y = 8000000000000000
+x /u y = 1
+x %u y = 2d3194bfffffff9d
+x /s y = ffffffffffffffff
+x %s y = d2ce6b4000000063
+x /u y2 = 18bb7f41b
+x %u y2 = 1b41b040
+x /s y3 = fffffffe74480be5
+x %s y3 = ffffffffe4be4fc0
+x /u 3 = 2aaaaaaaaaaaaaaa
+x %u 3 = 2
+x /s 3 = d555555555555556
+x %s 3 = fffffffffffffffe
+x /u 5 = 1999999999999999
+x %u 5 = 3
+x /s 5 = e666666666666667
+x %s 5 = fffffffffffffffd
+x /u 11 = ba2e8ba2e8ba2e8
+x %u 11 = 8
+x /s 11 = f45d1745d1745d18
+x %s 11 = fffffffffffffff8
+~x = 7fffffffffffffff
+x & y = 0
+x | y = d2ce6b4000000063
+x ^ y = d2ce6b4000000063
+x << i = 0
+x >>u i = 10000000
+x >>s i = fffffffff0000000
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e0000000000000
+dtou f = 346dc5d638865
+stod x = c3e0000000000000
+dtos f = fffcb923a29c779b
+utof x = 5f000000
+stof x = df000000
+
+x = 8000000000000000
 y = c73aa0d9a415dfb
 -x = 8000000000000000
 x + y = 8c73aa0d9a415dfb
@@ -3275,6 +3569,48 @@ utof x = 4f800000
 stof x = 4f800000
 
 x = 100000003
+y = 52ce6b4000000063
+-x = fffffffefffffffd
+x + y = 52ce6b4100000066
+x - y = ad3194c0ffffffa0
+x * y = f86b422300000129
+x /u y = 0
+x %u y = 100000003
+x /s y = 0
+x %s y = 100000003
+x /u y2 = 3
+x %u y2 = 794be43
+x /s y3 = 3
+x %s y3 = 794be43
+x /u 3 = 55555556
+x %u 3 = 1
+x /s 3 = 55555556
+x %s 3 = 1
+x /u 5 = 33333333
+x %u 5 = 4
+x /s 5 = 33333333
+x %s 5 = 4
+x /u 11 = 1745d174
+x %u 11 = 7
+x /s 11 = 1745d174
+x %s 11 = 7
+~x = fffffffefffffffc
+x & y = 3
+x | y = 52ce6b4100000063
+x ^ y = 52ce6b4100000060
+x << i = 1800000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 41f0000000300000
+dtou f = 68db8
+stod x = 41f0000000300000
+dtos f = 68db8
+utof x = 4f800000
+stof x = 4f800000
+
+x = 100000003
 y = e9bcd26890f095a5
 -x = fffffffefffffffd
 x + y = e9bcd26990f095a8
@@ -3358,47 +3694,467 @@ dtos f = 14bb101261e18
 utof x = 5e4a72c9
 stof x = 5e4a72c9
 
-x = 8362aa9340fe215f
-y = f986342416ec8002
--x = 7c9d556cbf01dea1
-x + y = 7ce8deb757eaa161
-x - y = 89dc766f2a11a15d
-x * y = e4a2b426803fc2be
+x = 52ce6b4000000063
+y = 0
+-x = ad3194bfffffff9d
+x + y = 52ce6b4000000063
+x - y = 52ce6b4000000063
+x * y = 0
 x /u y = 0
-x %u y = 8362aa9340fe215f
-x /s y = 13
-x %s y = fe6ccbe58d70a139
-x /u y2 = 86cb918b
-x %u y2 = 910b6dd3
-x /s y3 = 133e437097
-x %s y3 = fffffffffe99a023
-x /u 3 = 2bcb8e3115aa0b1f
-x %u 3 = 2
-x /s 3 = d67638dbc054b5cb
-x %s 3 = fffffffffffffffe
-x /u 5 = 1a46eeea4032d379
-x %u 5 = 2
-x /s 5 = e713bbb70cffa047
-x %s 5 = fffffffffffffffc
-x /u 11 = bf1b26a7a45a5f1
-x %u 11 = 4
-x /s 11 = f4abe0f61d2e6020
-x %s 11 = ffffffffffffffff
-~x = 7c9d556cbf01dea0
-x & y = 8102200000ec0002
-x | y = fbe6beb756fea15f
-x ^ y = 7ae49eb75612a15d
-x << i = d8aaa4d03f8857c
-x >>u i = 20d8aaa4d03f8857
-x >>s i = e0d8aaa4d03f8857
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 0
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000063
+x << i = 52ce6b4000000063
+x >>u i = 52ce6b4000000063
+x >>s i = 52ce6b4000000063
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 1
+-x = ad3194bfffffff9d
+x + y = 52ce6b4000000064
+x - y = 52ce6b4000000062
+x * y = 52ce6b4000000063
+x /u y = 52ce6b4000000063
+x %u y = 0
+x /s y = 52ce6b4000000063
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 1
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000062
+x << i = a59cd680000000c6
+x >>u i = 296735a000000031
+x >>s i = 296735a000000031
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = ffffffffffffffff
+-x = ad3194bfffffff9d
+x + y = 52ce6b4000000062
+x - y = 52ce6b4000000064
+x * y = ad3194bfffffff9d
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = ad3194bfffffff9d
+x %s y = 0
+x /u y2 = 52ce6b40
+x %u y2 = 52ce6ba3
+x /s y3 = ad3194bfffffff9d
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 52ce6b4000000063
+x | y = ffffffffffffffff
+x ^ y = ad3194bfffffff9c
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 7fffffff
+-x = ad3194bfffffff9d
+x + y = 52ce6b4080000062
+x - y = 52ce6b3f80000064
+x * y = ad3194f17fffff9d
+x /u y = a59cd681
+x %u y = 259cd6e4
+x /s y = a59cd681
+x %s y = 259cd6e4
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 63
+x | y = 52ce6b407fffffff
+x ^ y = 52ce6b407fffff9c
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 80000000
+-x = ad3194bfffffff9d
+x + y = 52ce6b4080000063
+x - y = 52ce6b3f80000063
+x * y = 3180000000
+x /u y = a59cd680
+x %u y = 63
+x /s y = a59cd680
+x %s y = 63
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 0
+x | y = 52ce6b4080000063
+x ^ y = 52ce6b4080000063
+x << i = 52ce6b4000000063
+x >>u i = 52ce6b4000000063
+x >>s i = 52ce6b4000000063
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 7fffffffffffffff
+-x = ad3194bfffffff9d
+x + y = d2ce6b4000000062
+x - y = d2ce6b4000000064
+x * y = 2d3194bfffffff9d
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = 0
+x %s y = 52ce6b4000000063
+x /u y2 = a59cd681
+x %u y2 = 259cd6e4
+x /s y3 = a59cd681
+x %s y3 = 259cd6e4
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 52ce6b4000000063
+x | y = 7fffffffffffffff
+x ^ y = 2d3194bfffffff9c
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
 x cmpu y = lt
 x cmps y = lt
-utod x = 43e06c5552681fc4
-dtou f = 35d0c262d14d7
-stod x = c3df27555b2fc078
-dtos f = fffccf536b66040d
-utof x = 5f0362ab
-stof x = def93aab
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 8000000000000000
+-x = ad3194bfffffff9d
+x + y = d2ce6b4000000063
+x - y = d2ce6b4000000063
+x * y = 8000000000000000
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = 0
+x %s y = 52ce6b4000000063
+x /u y2 = a59cd680
+x %u y2 = 63
+x /s y3 = ffffffff5a632980
+x %s y3 = 63
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 0
+x | y = d2ce6b4000000063
+x ^ y = d2ce6b4000000063
+x << i = 52ce6b4000000063
+x >>u i = 52ce6b4000000063
+x >>s i = 52ce6b4000000063
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 100000003
+-x = ad3194bfffffff9d
+x + y = 52ce6b4100000066
+x - y = 52ce6b3f00000060
+x * y = f86b422300000129
+x /u y = 52ce6b3f
+x %u y = 794bea6
+x /s y = 52ce6b3f
+x %s y = 794bea6
+x /u y2 = 52ce6b4000000063
+x %u y2 = 0
+x /s y3 = 52ce6b4000000063
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 3
+x | y = 52ce6b4100000063
+x ^ y = 52ce6b4100000060
+x << i = 96735a0000000318
+x >>u i = a59cd680000000c
+x >>s i = a59cd680000000c
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 52ce6b4000000063
+-x = ad3194bfffffff9d
+x + y = a59cd680000000c6
+x - y = 0
+x * y = ba6f38000002649
+x /u y = 1
+x %u y = 0
+x /s y = 1
+x %s y = 0
+x /u y2 = 100000000
+x %u y2 = 63
+x /s y3 = 100000000
+x %s y3 = 63
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 52ce6b4000000063
+x | y = 52ce6b4000000063
+x ^ y = 0
+x << i = 31800000000
+x >>u i = a59cd68
+x >>s i = a59cd68
+x cmpu y = eq
+x cmps y = eq
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 8362aa9340fe215f
+-x = ad3194bfffffff9d
+x + y = d63115d340fe21c2
+x - y = cf6bc0acbf01df04
+x * y = 8f1503b22246e7bd
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = 0
+x %s y = 52ce6b4000000063
+x /u y2 = a158656f
+x %u y2 = 5640ba6
+x /s y3 = ffffffff55e35d11
+x %s y3 = 5f2245a0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 2422a0000000043
+x | y = d3eeebd340fe217f
+x ^ y = d1acc1d340fe213c
+x << i = 3180000000
+x >>u i = a59cd680
+x >>s i = a59cd680
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = f986342416ec8002
+y = 52ce6b4000000063
+-x = 679cbdbe9137ffe
+x + y = 4c549f6416ec8065
+x - y = a6b7c8e416ec7f9f
+x * y = b9230074dd7580c6
+x /u y = 3
+x %u y = 11af26416ec7ed9
+x /s y = 0
+x %s y = f986342416ec8002
+x /u y2 = 3036abea3
+x %u y2 = 164b642
+x /s y3 = ffffffffebfad66d
+x %s y3 = ffffffffcae155c2
+x /u 3 = 532cbc0c07a42aab
+x %u 3 = 1
+x /s 3 = fdd766b6b24ed556
+x %s 3 = 0
+x /u 5 = 31e7a40737c8e666
+x %u 5 = 4
+x /s 5 = feb470d40495b334
+x %s 5 = fffffffffffffffe
+x /u 11 = 16af1c0347e6f45d
+x %u 11 = 3
+x /s 11 = ff694a8eeacfae8c
+x %s 11 = fffffffffffffffe
+~x = 679cbdbe9137ffd
+x & y = 5086200000000002
+x | y = fbce7f6416ec8063
+x ^ y = ab485f6416ec8061
+x << i = b764001000000000
+x >>u i = 1f30c684
+x >>s i = ffffffffff30c684
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ef30c68482dd90
+dtou f = 6634832136daf
+stod x = c399e72f6fa44e00
+dtos f = ffffd58f774c5ce4
+utof x = 5f798634
+stof x = dccf397b
 
 x = 368083376ba4ffa9
 y = 6912b247b79a4904
@@ -7558,3 +8314,45 @@ dtos f = b3fdf698d581
 utof x = 5ddbb784
 stof x = 5ddbb784
 
+x = ca9a47c1649d27a7
+y = d56d650045e652aa
+-x = 3565b83e9b62d859
+x + y = a007acc1aa837a51
+x - y = f52ce2c11eb6d4fd
+x * y = 630e3c88ca19d2e6
+x /u y = 0
+x %u y = ca9a47c1649d27a7
+x /s y = 1
+x %s y = f52ce2c11eb6d4fd
+x /u y2 = f3042098
+x %u y2 = 6b092fa7
+x /s y3 = 141176486
+x %s y3 = ffffffffdee649a7
+x /u 3 = 4388c295cc34628d
+x %u 3 = 0
+x /s 3 = ee336d4076df0d38
+x %s 3 = ffffffffffffffff
+x /u 5 = 2885418d141f6e54
+x %u 5 = 3
+x /s 5 = f5520e59e0ec3b22
+x %s 5 = fffffffffffffffd
+x /u 11 = 126b1dcbc3541ae0
+x %u 11 = 7
+x /s 11 = fb254c57663cd510
+x %s 11 = fffffffffffffff7
+~x = 3565b83e9b62d858
+x & y = c0084500448402a2
+x | y = dfff67c165ff77af
+x ^ y = 1ff722c1217b750d
+x << i = 749e9c0000000000
+x >>u i = 32a691
+x >>s i = fffffffffff2a691
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e95348f82c93a5
+dtou f = 52fc6dac31674
+stod x = c3cab2dc1f4db16c
+dtos f = fffea20e1ffc05aa
+utof x = 5f4a9a48
+stof x = de5596e1
+
diff --git a/test/regression/Results/interop1 b/test/regression/Results/interop1
index 990dfe9d..6e32c1cb 100644
--- a/test/regression/Results/interop1
+++ b/test/regression/Results/interop1
@@ -1,4 +1,8 @@
 --- CompCert calling native:
+si8u: 177
+si8s: -79
+si16u: 64305
+si16s: -1231
 s1: { a = 'a' }
 s2: { a = 'a', b = 'b' }
 s3: { a = 'a', b = 'b', c = ' c' }
@@ -44,6 +48,10 @@ ru6: { a = 55555, b = 666 }
 ru7: { a = -10001, b = -789, c = 'z' }
 ru8: { a = 'x', b = 12345 }
 --- native calling CompCert:
+si8u: 177
+si8s: -79
+si16u: 64305
+si16s: -1231
 s1: { a = 'a' }
 s2: { a = 'a', b = 'b' }
 s3: { a = 'a', b = 'b', c = ' c' }
diff --git a/test/regression/builtins-aarch64.c b/test/regression/builtins-aarch64.c
new file mode 100644
index 00000000..2cfa2d09
--- /dev/null
+++ b/test/regression/builtins-aarch64.c
@@ -0,0 +1,47 @@
+/* Fun with builtin functions */
+
+#include <stdio.h>
+
+int main(int argc, char ** argv)
+{
+  unsigned int x = 0x12345678;
+  unsigned int y = 0xDEADBEEF;
+  unsigned long long xx = 0x1234567812345678ULL;
+  unsigned long long yy = 0x1234567800000000ULL;
+  unsigned long long zz = 0x123456789ABCDEF0ULL;
+  unsigned z;
+  double a = 3.14159;
+  double b = 2.718;
+  double c = 1.414;
+  unsigned short s = 0x1234;
+  signed int u = 1234567;
+  signed int v = -9999;
+
+  printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
+  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
+  printf("bswap64(%llx) = %llx\n", zz, __builtin_bswap64(zz));
+  printf("clz(%x) = %d\n", x, __builtin_clz(x));
+  printf("clzll(%llx) = %d\n", (unsigned long long) x, __builtin_clzll(x));
+  printf("clzll(%llx) = %d\n", xx, __builtin_clzll(xx));
+  printf("cls(%d) = %d\n", u, __builtin_cls(u));
+  printf("cls(%d) = %d\n", v, __builtin_cls(v));
+  printf("clsll(%lld) = %d\n", (signed long long) u, __builtin_clsll(u));
+  printf("clsll(%lld) = %d\n", (signed long long) v, __builtin_clsll(v));
+
+  printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
+  printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c));
+  printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c));
+  printf("fnmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmadd(a, b, c));
+  printf("fnmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmsub(a, b, c));
+
+  /* Make sure that ignoring the result of a builtin
+     doesn't cause an internal error */
+  (void) __builtin_bswap(x);
+  (void) __builtin_fsqrt(a);
+  return 0;
+}
+
+
+  
+
+
diff --git a/test/regression/extasm.c b/test/regression/extasm.c
index 95628f7f..352b930b 100644
--- a/test/regression/extasm.c
+++ b/test/regression/extasm.c
@@ -5,14 +5,16 @@ int clobbers(int x, int z)
 {
   int y;
   asm("TEST0 out:%0 in:%1" : "=r"(y) : "r"(x) : "cc"
-#if defined(__x86_64__)
+#if defined(ARCH_x86) && defined(MODEL_64)
       , "rax", "rdx", "rbx"
-#elif defined(__i386__)
+#elif defined(ARCH_x86) && !defined(MODEL_64)
       , "eax", "edx", "ebx"
-#elif defined(__arm__)
+#elif defined(ARCH_arm)
       , "r0", "r1", "r4"
-#elif defined(__PPC__)
+#elif defined(ARCH_powerpc)
       , "r0", "r3", "r4", "r31"
+#elif defined(ARCH_aarch64)
+      , "x0", "x1", "x16", "x29", "x30"
 #endif
 );
   return y + z;
@@ -22,7 +24,8 @@ int clobbers(int x, int z)
  || (defined(ARCH_riscV) && defined(MODEL_64)) \
  || (defined(ARCH_powerpc) && defined(MODEL_ppc64)) \
  || (defined(ARCH_powerpc) && defined(MODEL_e5500)) \
- || (defined(ARCH_mppa_k1c) && defined(MODEL_64))
+ || (defined(ARCH_mppa_k1c) && defined(MODEL_64)) \
+ || defined(ARCH_aarch64)
 #define SIXTYFOUR
 #else
 #undef SIXTYFOUR
diff --git a/test/regression/floats-basics.c b/test/regression/floats-basics.c
index a0225181..876a0d42 100644
--- a/test/regression/floats-basics.c
+++ b/test/regression/floats-basics.c
@@ -1,18 +1,10 @@
-#include<stdio.h>
-#include<stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../endian.h"
 
 #define STR_EXPAND(tok) #tok
 #define STR(tok) STR_EXPAND(tok)
 
-#if defined(__ppc__) || defined(__PPC__) || defined(__ARMEB__)
-#define ARCH_BIG_ENDIAN
-#elif defined(__i386__) || defined(__x86_64__) || defined(__ARMEL__) \
-   || defined(__riscv) || defined(__K1C__)
-#undef ARCH_BIG_ENDIAN
-#else
-#error "unknown endianness"
-#endif
-
 union converter64 {
   double dbl;
   struct {
diff --git a/test/regression/floats-lit.c b/test/regression/floats-lit.c
new file mode 100644
index 00000000..a1098faf
--- /dev/null
+++ b/test/regression/floats-lit.c
@@ -0,0 +1,559 @@
+#include <stdio.h>
+
+int error = 0;
+
+void dbl(double x, unsigned long long bits)
+{
+  union { double d; unsigned long long i; } u;
+  u.d = x;
+  if (u.i != bits) {
+    printf("Error: expected 0x%016llx, got 0x%016llx\n", bits, u.i);
+    error = 1;
+  }
+}
+
+void sng(float x, unsigned int bits)
+{
+  union { float f; unsigned int i; } u;
+  u.f = x;
+  if (u.i != bits) {
+    printf("Error: expected 0x%08x, got 0x%08x\n", bits, u.i);
+    error = 1;
+  }
+}
+
+void testdbl(void)
+{
+  printf("--- Double-precision test\n");
+  dbl(0.0, 0ULL);
+  dbl((-0.0), 0x8000000000000000ULL);
+  // The following are the "fmov immediate" of aarch64
+  // They include +1.0 and -1.0
+  dbl(0x1p-3, 0x3fc0000000000000ULL);
+  dbl(0x1.1p-3, 0x3fc1000000000000ULL);
+  dbl(0x1.2p-3, 0x3fc2000000000000ULL);
+  dbl(0x1.3p-3, 0x3fc3000000000000ULL);
+  dbl(0x1.4p-3, 0x3fc4000000000000ULL);
+  dbl(0x1.5p-3, 0x3fc5000000000000ULL);
+  dbl(0x1.6p-3, 0x3fc6000000000000ULL);
+  dbl(0x1.7p-3, 0x3fc7000000000000ULL);
+  dbl(0x1.8p-3, 0x3fc8000000000000ULL);
+  dbl(0x1.9p-3, 0x3fc9000000000000ULL);
+  dbl(0x1.ap-3, 0x3fca000000000000ULL);
+  dbl(0x1.bp-3, 0x3fcb000000000000ULL);
+  dbl(0x1.cp-3, 0x3fcc000000000000ULL);
+  dbl(0x1.dp-3, 0x3fcd000000000000ULL);
+  dbl(0x1.ep-3, 0x3fce000000000000ULL);
+  dbl(0x1.fp-3, 0x3fcf000000000000ULL);
+  dbl(0x1p-2, 0x3fd0000000000000ULL);
+  dbl(0x1.1p-2, 0x3fd1000000000000ULL);
+  dbl(0x1.2p-2, 0x3fd2000000000000ULL);
+  dbl(0x1.3p-2, 0x3fd3000000000000ULL);
+  dbl(0x1.4p-2, 0x3fd4000000000000ULL);
+  dbl(0x1.5p-2, 0x3fd5000000000000ULL);
+  dbl(0x1.6p-2, 0x3fd6000000000000ULL);
+  dbl(0x1.7p-2, 0x3fd7000000000000ULL);
+  dbl(0x1.8p-2, 0x3fd8000000000000ULL);
+  dbl(0x1.9p-2, 0x3fd9000000000000ULL);
+  dbl(0x1.ap-2, 0x3fda000000000000ULL);
+  dbl(0x1.bp-2, 0x3fdb000000000000ULL);
+  dbl(0x1.cp-2, 0x3fdc000000000000ULL);
+  dbl(0x1.dp-2, 0x3fdd000000000000ULL);
+  dbl(0x1.ep-2, 0x3fde000000000000ULL);
+  dbl(0x1.fp-2, 0x3fdf000000000000ULL);
+  dbl(0x1p-1, 0x3fe0000000000000ULL);
+  dbl(0x1.1p-1, 0x3fe1000000000000ULL);
+  dbl(0x1.2p-1, 0x3fe2000000000000ULL);
+  dbl(0x1.3p-1, 0x3fe3000000000000ULL);
+  dbl(0x1.4p-1, 0x3fe4000000000000ULL);
+  dbl(0x1.5p-1, 0x3fe5000000000000ULL);
+  dbl(0x1.6p-1, 0x3fe6000000000000ULL);
+  dbl(0x1.7p-1, 0x3fe7000000000000ULL);
+  dbl(0x1.8p-1, 0x3fe8000000000000ULL);
+  dbl(0x1.9p-1, 0x3fe9000000000000ULL);
+  dbl(0x1.ap-1, 0x3fea000000000000ULL);
+  dbl(0x1.bp-1, 0x3feb000000000000ULL);
+  dbl(0x1.cp-1, 0x3fec000000000000ULL);
+  dbl(0x1.dp-1, 0x3fed000000000000ULL);
+  dbl(0x1.ep-1, 0x3fee000000000000ULL);
+  dbl(0x1.fp-1, 0x3fef000000000000ULL);
+  dbl(0x1p+0, 0x3ff0000000000000ULL);
+  dbl(0x1.1p+0, 0x3ff1000000000000ULL);
+  dbl(0x1.2p+0, 0x3ff2000000000000ULL);
+  dbl(0x1.3p+0, 0x3ff3000000000000ULL);
+  dbl(0x1.4p+0, 0x3ff4000000000000ULL);
+  dbl(0x1.5p+0, 0x3ff5000000000000ULL);
+  dbl(0x1.6p+0, 0x3ff6000000000000ULL);
+  dbl(0x1.7p+0, 0x3ff7000000000000ULL);
+  dbl(0x1.8p+0, 0x3ff8000000000000ULL);
+  dbl(0x1.9p+0, 0x3ff9000000000000ULL);
+  dbl(0x1.ap+0, 0x3ffa000000000000ULL);
+  dbl(0x1.bp+0, 0x3ffb000000000000ULL);
+  dbl(0x1.cp+0, 0x3ffc000000000000ULL);
+  dbl(0x1.dp+0, 0x3ffd000000000000ULL);
+  dbl(0x1.ep+0, 0x3ffe000000000000ULL);
+  dbl(0x1.fp+0, 0x3fff000000000000ULL);
+  dbl(0x1p+1, 0x4000000000000000ULL);
+  dbl(0x1.1p+1, 0x4001000000000000ULL);
+  dbl(0x1.2p+1, 0x4002000000000000ULL);
+  dbl(0x1.3p+1, 0x4003000000000000ULL);
+  dbl(0x1.4p+1, 0x4004000000000000ULL);
+  dbl(0x1.5p+1, 0x4005000000000000ULL);
+  dbl(0x1.6p+1, 0x4006000000000000ULL);
+  dbl(0x1.7p+1, 0x4007000000000000ULL);
+  dbl(0x1.8p+1, 0x4008000000000000ULL);
+  dbl(0x1.9p+1, 0x4009000000000000ULL);
+  dbl(0x1.ap+1, 0x400a000000000000ULL);
+  dbl(0x1.bp+1, 0x400b000000000000ULL);
+  dbl(0x1.cp+1, 0x400c000000000000ULL);
+  dbl(0x1.dp+1, 0x400d000000000000ULL);
+  dbl(0x1.ep+1, 0x400e000000000000ULL);
+  dbl(0x1.fp+1, 0x400f000000000000ULL);
+  dbl(0x1p+2, 0x4010000000000000ULL);
+  dbl(0x1.1p+2, 0x4011000000000000ULL);
+  dbl(0x1.2p+2, 0x4012000000000000ULL);
+  dbl(0x1.3p+2, 0x4013000000000000ULL);
+  dbl(0x1.4p+2, 0x4014000000000000ULL);
+  dbl(0x1.5p+2, 0x4015000000000000ULL);
+  dbl(0x1.6p+2, 0x4016000000000000ULL);
+  dbl(0x1.7p+2, 0x4017000000000000ULL);
+  dbl(0x1.8p+2, 0x4018000000000000ULL);
+  dbl(0x1.9p+2, 0x4019000000000000ULL);
+  dbl(0x1.ap+2, 0x401a000000000000ULL);
+  dbl(0x1.bp+2, 0x401b000000000000ULL);
+  dbl(0x1.cp+2, 0x401c000000000000ULL);
+  dbl(0x1.dp+2, 0x401d000000000000ULL);
+  dbl(0x1.ep+2, 0x401e000000000000ULL);
+  dbl(0x1.fp+2, 0x401f000000000000ULL);
+  dbl(0x1p+3, 0x4020000000000000ULL);
+  dbl(0x1.1p+3, 0x4021000000000000ULL);
+  dbl(0x1.2p+3, 0x4022000000000000ULL);
+  dbl(0x1.3p+3, 0x4023000000000000ULL);
+  dbl(0x1.4p+3, 0x4024000000000000ULL);
+  dbl(0x1.5p+3, 0x4025000000000000ULL);
+  dbl(0x1.6p+3, 0x4026000000000000ULL);
+  dbl(0x1.7p+3, 0x4027000000000000ULL);
+  dbl(0x1.8p+3, 0x4028000000000000ULL);
+  dbl(0x1.9p+3, 0x4029000000000000ULL);
+  dbl(0x1.ap+3, 0x402a000000000000ULL);
+  dbl(0x1.bp+3, 0x402b000000000000ULL);
+  dbl(0x1.cp+3, 0x402c000000000000ULL);
+  dbl(0x1.dp+3, 0x402d000000000000ULL);
+  dbl(0x1.ep+3, 0x402e000000000000ULL);
+  dbl(0x1.fp+3, 0x402f000000000000ULL);
+  dbl(0x1p+4, 0x4030000000000000ULL);
+  dbl(0x1.1p+4, 0x4031000000000000ULL);
+  dbl(0x1.2p+4, 0x4032000000000000ULL);
+  dbl(0x1.3p+4, 0x4033000000000000ULL);
+  dbl(0x1.4p+4, 0x4034000000000000ULL);
+  dbl(0x1.5p+4, 0x4035000000000000ULL);
+  dbl(0x1.6p+4, 0x4036000000000000ULL);
+  dbl(0x1.7p+4, 0x4037000000000000ULL);
+  dbl(0x1.8p+4, 0x4038000000000000ULL);
+  dbl(0x1.9p+4, 0x4039000000000000ULL);
+  dbl(0x1.ap+4, 0x403a000000000000ULL);
+  dbl(0x1.bp+4, 0x403b000000000000ULL);
+  dbl(0x1.cp+4, 0x403c000000000000ULL);
+  dbl(0x1.dp+4, 0x403d000000000000ULL);
+  dbl(0x1.ep+4, 0x403e000000000000ULL);
+  dbl(0x1.fp+4, 0x403f000000000000ULL);
+  dbl((-0x1p-3), 0xbfc0000000000000ULL);
+  dbl((-0x1.1p-3), 0xbfc1000000000000ULL);
+  dbl((-0x1.2p-3), 0xbfc2000000000000ULL);
+  dbl((-0x1.3p-3), 0xbfc3000000000000ULL);
+  dbl((-0x1.4p-3), 0xbfc4000000000000ULL);
+  dbl((-0x1.5p-3), 0xbfc5000000000000ULL);
+  dbl((-0x1.6p-3), 0xbfc6000000000000ULL);
+  dbl((-0x1.7p-3), 0xbfc7000000000000ULL);
+  dbl((-0x1.8p-3), 0xbfc8000000000000ULL);
+  dbl((-0x1.9p-3), 0xbfc9000000000000ULL);
+  dbl((-0x1.ap-3), 0xbfca000000000000ULL);
+  dbl((-0x1.bp-3), 0xbfcb000000000000ULL);
+  dbl((-0x1.cp-3), 0xbfcc000000000000ULL);
+  dbl((-0x1.dp-3), 0xbfcd000000000000ULL);
+  dbl((-0x1.ep-3), 0xbfce000000000000ULL);
+  dbl((-0x1.fp-3), 0xbfcf000000000000ULL);
+  dbl((-0x1p-2), 0xbfd0000000000000ULL);
+  dbl((-0x1.1p-2), 0xbfd1000000000000ULL);
+  dbl((-0x1.2p-2), 0xbfd2000000000000ULL);
+  dbl((-0x1.3p-2), 0xbfd3000000000000ULL);
+  dbl((-0x1.4p-2), 0xbfd4000000000000ULL);
+  dbl((-0x1.5p-2), 0xbfd5000000000000ULL);
+  dbl((-0x1.6p-2), 0xbfd6000000000000ULL);
+  dbl((-0x1.7p-2), 0xbfd7000000000000ULL);
+  dbl((-0x1.8p-2), 0xbfd8000000000000ULL);
+  dbl((-0x1.9p-2), 0xbfd9000000000000ULL);
+  dbl((-0x1.ap-2), 0xbfda000000000000ULL);
+  dbl((-0x1.bp-2), 0xbfdb000000000000ULL);
+  dbl((-0x1.cp-2), 0xbfdc000000000000ULL);
+  dbl((-0x1.dp-2), 0xbfdd000000000000ULL);
+  dbl((-0x1.ep-2), 0xbfde000000000000ULL);
+  dbl((-0x1.fp-2), 0xbfdf000000000000ULL);
+  dbl((-0x1p-1), 0xbfe0000000000000ULL);
+  dbl((-0x1.1p-1), 0xbfe1000000000000ULL);
+  dbl((-0x1.2p-1), 0xbfe2000000000000ULL);
+  dbl((-0x1.3p-1), 0xbfe3000000000000ULL);
+  dbl((-0x1.4p-1), 0xbfe4000000000000ULL);
+  dbl((-0x1.5p-1), 0xbfe5000000000000ULL);
+  dbl((-0x1.6p-1), 0xbfe6000000000000ULL);
+  dbl((-0x1.7p-1), 0xbfe7000000000000ULL);
+  dbl((-0x1.8p-1), 0xbfe8000000000000ULL);
+  dbl((-0x1.9p-1), 0xbfe9000000000000ULL);
+  dbl((-0x1.ap-1), 0xbfea000000000000ULL);
+  dbl((-0x1.bp-1), 0xbfeb000000000000ULL);
+  dbl((-0x1.cp-1), 0xbfec000000000000ULL);
+  dbl((-0x1.dp-1), 0xbfed000000000000ULL);
+  dbl((-0x1.ep-1), 0xbfee000000000000ULL);
+  dbl((-0x1.fp-1), 0xbfef000000000000ULL);
+  dbl((-0x1p+0), 0xbff0000000000000ULL);
+  dbl((-0x1.1p+0), 0xbff1000000000000ULL);
+  dbl((-0x1.2p+0), 0xbff2000000000000ULL);
+  dbl((-0x1.3p+0), 0xbff3000000000000ULL);
+  dbl((-0x1.4p+0), 0xbff4000000000000ULL);
+  dbl((-0x1.5p+0), 0xbff5000000000000ULL);
+  dbl((-0x1.6p+0), 0xbff6000000000000ULL);
+  dbl((-0x1.7p+0), 0xbff7000000000000ULL);
+  dbl((-0x1.8p+0), 0xbff8000000000000ULL);
+  dbl((-0x1.9p+0), 0xbff9000000000000ULL);
+  dbl((-0x1.ap+0), 0xbffa000000000000ULL);
+  dbl((-0x1.bp+0), 0xbffb000000000000ULL);
+  dbl((-0x1.cp+0), 0xbffc000000000000ULL);
+  dbl((-0x1.dp+0), 0xbffd000000000000ULL);
+  dbl((-0x1.ep+0), 0xbffe000000000000ULL);
+  dbl((-0x1.fp+0), 0xbfff000000000000ULL);
+  dbl((-0x1p+1), 0xc000000000000000ULL);
+  dbl((-0x1.1p+1), 0xc001000000000000ULL);
+  dbl((-0x1.2p+1), 0xc002000000000000ULL);
+  dbl((-0x1.3p+1), 0xc003000000000000ULL);
+  dbl((-0x1.4p+1), 0xc004000000000000ULL);
+  dbl((-0x1.5p+1), 0xc005000000000000ULL);
+  dbl((-0x1.6p+1), 0xc006000000000000ULL);
+  dbl((-0x1.7p+1), 0xc007000000000000ULL);
+  dbl((-0x1.8p+1), 0xc008000000000000ULL);
+  dbl((-0x1.9p+1), 0xc009000000000000ULL);
+  dbl((-0x1.ap+1), 0xc00a000000000000ULL);
+  dbl((-0x1.bp+1), 0xc00b000000000000ULL);
+  dbl((-0x1.cp+1), 0xc00c000000000000ULL);
+  dbl((-0x1.dp+1), 0xc00d000000000000ULL);
+  dbl((-0x1.ep+1), 0xc00e000000000000ULL);
+  dbl((-0x1.fp+1), 0xc00f000000000000ULL);
+  dbl((-0x1p+2), 0xc010000000000000ULL);
+  dbl((-0x1.1p+2), 0xc011000000000000ULL);
+  dbl((-0x1.2p+2), 0xc012000000000000ULL);
+  dbl((-0x1.3p+2), 0xc013000000000000ULL);
+  dbl((-0x1.4p+2), 0xc014000000000000ULL);
+  dbl((-0x1.5p+2), 0xc015000000000000ULL);
+  dbl((-0x1.6p+2), 0xc016000000000000ULL);
+  dbl((-0x1.7p+2), 0xc017000000000000ULL);
+  dbl((-0x1.8p+2), 0xc018000000000000ULL);
+  dbl((-0x1.9p+2), 0xc019000000000000ULL);
+  dbl((-0x1.ap+2), 0xc01a000000000000ULL);
+  dbl((-0x1.bp+2), 0xc01b000000000000ULL);
+  dbl((-0x1.cp+2), 0xc01c000000000000ULL);
+  dbl((-0x1.dp+2), 0xc01d000000000000ULL);
+  dbl((-0x1.ep+2), 0xc01e000000000000ULL);
+  dbl((-0x1.fp+2), 0xc01f000000000000ULL);
+  dbl((-0x1p+3), 0xc020000000000000ULL);
+  dbl((-0x1.1p+3), 0xc021000000000000ULL);
+  dbl((-0x1.2p+3), 0xc022000000000000ULL);
+  dbl((-0x1.3p+3), 0xc023000000000000ULL);
+  dbl((-0x1.4p+3), 0xc024000000000000ULL);
+  dbl((-0x1.5p+3), 0xc025000000000000ULL);
+  dbl((-0x1.6p+3), 0xc026000000000000ULL);
+  dbl((-0x1.7p+3), 0xc027000000000000ULL);
+  dbl((-0x1.8p+3), 0xc028000000000000ULL);
+  dbl((-0x1.9p+3), 0xc029000000000000ULL);
+  dbl((-0x1.ap+3), 0xc02a000000000000ULL);
+  dbl((-0x1.bp+3), 0xc02b000000000000ULL);
+  dbl((-0x1.cp+3), 0xc02c000000000000ULL);
+  dbl((-0x1.dp+3), 0xc02d000000000000ULL);
+  dbl((-0x1.ep+3), 0xc02e000000000000ULL);
+  dbl((-0x1.fp+3), 0xc02f000000000000ULL);
+  dbl((-0x1p+4), 0xc030000000000000ULL);
+  dbl((-0x1.1p+4), 0xc031000000000000ULL);
+  dbl((-0x1.2p+4), 0xc032000000000000ULL);
+  dbl((-0x1.3p+4), 0xc033000000000000ULL);
+  dbl((-0x1.4p+4), 0xc034000000000000ULL);
+  dbl((-0x1.5p+4), 0xc035000000000000ULL);
+  dbl((-0x1.6p+4), 0xc036000000000000ULL);
+  dbl((-0x1.7p+4), 0xc037000000000000ULL);
+  dbl((-0x1.8p+4), 0xc038000000000000ULL);
+  dbl((-0x1.9p+4), 0xc039000000000000ULL);
+  dbl((-0x1.ap+4), 0xc03a000000000000ULL);
+  dbl((-0x1.bp+4), 0xc03b000000000000ULL);
+  dbl((-0x1.cp+4), 0xc03c000000000000ULL);
+  dbl((-0x1.dp+4), 0xc03d000000000000ULL);
+  dbl((-0x1.ep+4), 0xc03e000000000000ULL);
+  dbl((-0x1.fp+4), 0xc03f000000000000ULL);
+}
+
+void testsng(void)
+{
+  printf("--- Single-precision test\n");
+  sng(0x0p+0, 0x0U);
+  sng(-0x0p+0, 0x80000000U);
+  sng(0x1p-3, 0x3e000000U);
+  sng(0x1.1p-3, 0x3e080000U);
+  sng(0x1.2p-3, 0x3e100000U);
+  sng(0x1.3p-3, 0x3e180000U);
+  sng(0x1.4p-3, 0x3e200000U);
+  sng(0x1.5p-3, 0x3e280000U);
+  sng(0x1.6p-3, 0x3e300000U);
+  sng(0x1.7p-3, 0x3e380000U);
+  sng(0x1.8p-3, 0x3e400000U);
+  sng(0x1.9p-3, 0x3e480000U);
+  sng(0x1.ap-3, 0x3e500000U);
+  sng(0x1.bp-3, 0x3e580000U);
+  sng(0x1.cp-3, 0x3e600000U);
+  sng(0x1.dp-3, 0x3e680000U);
+  sng(0x1.ep-3, 0x3e700000U);
+  sng(0x1.fp-3, 0x3e780000U);
+  sng(0x1p-2, 0x3e800000U);
+  sng(0x1.1p-2, 0x3e880000U);
+  sng(0x1.2p-2, 0x3e900000U);
+  sng(0x1.3p-2, 0x3e980000U);
+  sng(0x1.4p-2, 0x3ea00000U);
+  sng(0x1.5p-2, 0x3ea80000U);
+  sng(0x1.6p-2, 0x3eb00000U);
+  sng(0x1.7p-2, 0x3eb80000U);
+  sng(0x1.8p-2, 0x3ec00000U);
+  sng(0x1.9p-2, 0x3ec80000U);
+  sng(0x1.ap-2, 0x3ed00000U);
+  sng(0x1.bp-2, 0x3ed80000U);
+  sng(0x1.cp-2, 0x3ee00000U);
+  sng(0x1.dp-2, 0x3ee80000U);
+  sng(0x1.ep-2, 0x3ef00000U);
+  sng(0x1.fp-2, 0x3ef80000U);
+  sng(0x1p-1, 0x3f000000U);
+  sng(0x1.1p-1, 0x3f080000U);
+  sng(0x1.2p-1, 0x3f100000U);
+  sng(0x1.3p-1, 0x3f180000U);
+  sng(0x1.4p-1, 0x3f200000U);
+  sng(0x1.5p-1, 0x3f280000U);
+  sng(0x1.6p-1, 0x3f300000U);
+  sng(0x1.7p-1, 0x3f380000U);
+  sng(0x1.8p-1, 0x3f400000U);
+  sng(0x1.9p-1, 0x3f480000U);
+  sng(0x1.ap-1, 0x3f500000U);
+  sng(0x1.bp-1, 0x3f580000U);
+  sng(0x1.cp-1, 0x3f600000U);
+  sng(0x1.dp-1, 0x3f680000U);
+  sng(0x1.ep-1, 0x3f700000U);
+  sng(0x1.fp-1, 0x3f780000U);
+  sng(0x1p+0, 0x3f800000U);
+  sng(0x1.1p+0, 0x3f880000U);
+  sng(0x1.2p+0, 0x3f900000U);
+  sng(0x1.3p+0, 0x3f980000U);
+  sng(0x1.4p+0, 0x3fa00000U);
+  sng(0x1.5p+0, 0x3fa80000U);
+  sng(0x1.6p+0, 0x3fb00000U);
+  sng(0x1.7p+0, 0x3fb80000U);
+  sng(0x1.8p+0, 0x3fc00000U);
+  sng(0x1.9p+0, 0x3fc80000U);
+  sng(0x1.ap+0, 0x3fd00000U);
+  sng(0x1.bp+0, 0x3fd80000U);
+  sng(0x1.cp+0, 0x3fe00000U);
+  sng(0x1.dp+0, 0x3fe80000U);
+  sng(0x1.ep+0, 0x3ff00000U);
+  sng(0x1.fp+0, 0x3ff80000U);
+  sng(0x1p+1, 0x40000000U);
+  sng(0x1.1p+1, 0x40080000U);
+  sng(0x1.2p+1, 0x40100000U);
+  sng(0x1.3p+1, 0x40180000U);
+  sng(0x1.4p+1, 0x40200000U);
+  sng(0x1.5p+1, 0x40280000U);
+  sng(0x1.6p+1, 0x40300000U);
+  sng(0x1.7p+1, 0x40380000U);
+  sng(0x1.8p+1, 0x40400000U);
+  sng(0x1.9p+1, 0x40480000U);
+  sng(0x1.ap+1, 0x40500000U);
+  sng(0x1.bp+1, 0x40580000U);
+  sng(0x1.cp+1, 0x40600000U);
+  sng(0x1.dp+1, 0x40680000U);
+  sng(0x1.ep+1, 0x40700000U);
+  sng(0x1.fp+1, 0x40780000U);
+  sng(0x1p+2, 0x40800000U);
+  sng(0x1.1p+2, 0x40880000U);
+  sng(0x1.2p+2, 0x40900000U);
+  sng(0x1.3p+2, 0x40980000U);
+  sng(0x1.4p+2, 0x40a00000U);
+  sng(0x1.5p+2, 0x40a80000U);
+  sng(0x1.6p+2, 0x40b00000U);
+  sng(0x1.7p+2, 0x40b80000U);
+  sng(0x1.8p+2, 0x40c00000U);
+  sng(0x1.9p+2, 0x40c80000U);
+  sng(0x1.ap+2, 0x40d00000U);
+  sng(0x1.bp+2, 0x40d80000U);
+  sng(0x1.cp+2, 0x40e00000U);
+  sng(0x1.dp+2, 0x40e80000U);
+  sng(0x1.ep+2, 0x40f00000U);
+  sng(0x1.fp+2, 0x40f80000U);
+  sng(0x1p+3, 0x41000000U);
+  sng(0x1.1p+3, 0x41080000U);
+  sng(0x1.2p+3, 0x41100000U);
+  sng(0x1.3p+3, 0x41180000U);
+  sng(0x1.4p+3, 0x41200000U);
+  sng(0x1.5p+3, 0x41280000U);
+  sng(0x1.6p+3, 0x41300000U);
+  sng(0x1.7p+3, 0x41380000U);
+  sng(0x1.8p+3, 0x41400000U);
+  sng(0x1.9p+3, 0x41480000U);
+  sng(0x1.ap+3, 0x41500000U);
+  sng(0x1.bp+3, 0x41580000U);
+  sng(0x1.cp+3, 0x41600000U);
+  sng(0x1.dp+3, 0x41680000U);
+  sng(0x1.ep+3, 0x41700000U);
+  sng(0x1.fp+3, 0x41780000U);
+  sng(0x1p+4, 0x41800000U);
+  sng(0x1.1p+4, 0x41880000U);
+  sng(0x1.2p+4, 0x41900000U);
+  sng(0x1.3p+4, 0x41980000U);
+  sng(0x1.4p+4, 0x41a00000U);
+  sng(0x1.5p+4, 0x41a80000U);
+  sng(0x1.6p+4, 0x41b00000U);
+  sng(0x1.7p+4, 0x41b80000U);
+  sng(0x1.8p+4, 0x41c00000U);
+  sng(0x1.9p+4, 0x41c80000U);
+  sng(0x1.ap+4, 0x41d00000U);
+  sng(0x1.bp+4, 0x41d80000U);
+  sng(0x1.cp+4, 0x41e00000U);
+  sng(0x1.dp+4, 0x41e80000U);
+  sng(0x1.ep+4, 0x41f00000U);
+  sng(0x1.fp+4, 0x41f80000U);
+  sng(-0x1p-3, 0xbe000000U);
+  sng(-0x1.1p-3, 0xbe080000U);
+  sng(-0x1.2p-3, 0xbe100000U);
+  sng(-0x1.3p-3, 0xbe180000U);
+  sng(-0x1.4p-3, 0xbe200000U);
+  sng(-0x1.5p-3, 0xbe280000U);
+  sng(-0x1.6p-3, 0xbe300000U);
+  sng(-0x1.7p-3, 0xbe380000U);
+  sng(-0x1.8p-3, 0xbe400000U);
+  sng(-0x1.9p-3, 0xbe480000U);
+  sng(-0x1.ap-3, 0xbe500000U);
+  sng(-0x1.bp-3, 0xbe580000U);
+  sng(-0x1.cp-3, 0xbe600000U);
+  sng(-0x1.dp-3, 0xbe680000U);
+  sng(-0x1.ep-3, 0xbe700000U);
+  sng(-0x1.fp-3, 0xbe780000U);
+  sng(-0x1p-2, 0xbe800000U);
+  sng(-0x1.1p-2, 0xbe880000U);
+  sng(-0x1.2p-2, 0xbe900000U);
+  sng(-0x1.3p-2, 0xbe980000U);
+  sng(-0x1.4p-2, 0xbea00000U);
+  sng(-0x1.5p-2, 0xbea80000U);
+  sng(-0x1.6p-2, 0xbeb00000U);
+  sng(-0x1.7p-2, 0xbeb80000U);
+  sng(-0x1.8p-2, 0xbec00000U);
+  sng(-0x1.9p-2, 0xbec80000U);
+  sng(-0x1.ap-2, 0xbed00000U);
+  sng(-0x1.bp-2, 0xbed80000U);
+  sng(-0x1.cp-2, 0xbee00000U);
+  sng(-0x1.dp-2, 0xbee80000U);
+  sng(-0x1.ep-2, 0xbef00000U);
+  sng(-0x1.fp-2, 0xbef80000U);
+  sng(-0x1p-1, 0xbf000000U);
+  sng(-0x1.1p-1, 0xbf080000U);
+  sng(-0x1.2p-1, 0xbf100000U);
+  sng(-0x1.3p-1, 0xbf180000U);
+  sng(-0x1.4p-1, 0xbf200000U);
+  sng(-0x1.5p-1, 0xbf280000U);
+  sng(-0x1.6p-1, 0xbf300000U);
+  sng(-0x1.7p-1, 0xbf380000U);
+  sng(-0x1.8p-1, 0xbf400000U);
+  sng(-0x1.9p-1, 0xbf480000U);
+  sng(-0x1.ap-1, 0xbf500000U);
+  sng(-0x1.bp-1, 0xbf580000U);
+  sng(-0x1.cp-1, 0xbf600000U);
+  sng(-0x1.dp-1, 0xbf680000U);
+  sng(-0x1.ep-1, 0xbf700000U);
+  sng(-0x1.fp-1, 0xbf780000U);
+  sng(-0x1p+0, 0xbf800000U);
+  sng(-0x1.1p+0, 0xbf880000U);
+  sng(-0x1.2p+0, 0xbf900000U);
+  sng(-0x1.3p+0, 0xbf980000U);
+  sng(-0x1.4p+0, 0xbfa00000U);
+  sng(-0x1.5p+0, 0xbfa80000U);
+  sng(-0x1.6p+0, 0xbfb00000U);
+  sng(-0x1.7p+0, 0xbfb80000U);
+  sng(-0x1.8p+0, 0xbfc00000U);
+  sng(-0x1.9p+0, 0xbfc80000U);
+  sng(-0x1.ap+0, 0xbfd00000U);
+  sng(-0x1.bp+0, 0xbfd80000U);
+  sng(-0x1.cp+0, 0xbfe00000U);
+  sng(-0x1.dp+0, 0xbfe80000U);
+  sng(-0x1.ep+0, 0xbff00000U);
+  sng(-0x1.fp+0, 0xbff80000U);
+  sng(-0x1p+1, 0xc0000000U);
+  sng(-0x1.1p+1, 0xc0080000U);
+  sng(-0x1.2p+1, 0xc0100000U);
+  sng(-0x1.3p+1, 0xc0180000U);
+  sng(-0x1.4p+1, 0xc0200000U);
+  sng(-0x1.5p+1, 0xc0280000U);
+  sng(-0x1.6p+1, 0xc0300000U);
+  sng(-0x1.7p+1, 0xc0380000U);
+  sng(-0x1.8p+1, 0xc0400000U);
+  sng(-0x1.9p+1, 0xc0480000U);
+  sng(-0x1.ap+1, 0xc0500000U);
+  sng(-0x1.bp+1, 0xc0580000U);
+  sng(-0x1.cp+1, 0xc0600000U);
+  sng(-0x1.dp+1, 0xc0680000U);
+  sng(-0x1.ep+1, 0xc0700000U);
+  sng(-0x1.fp+1, 0xc0780000U);
+  sng(-0x1p+2, 0xc0800000U);
+  sng(-0x1.1p+2, 0xc0880000U);
+  sng(-0x1.2p+2, 0xc0900000U);
+  sng(-0x1.3p+2, 0xc0980000U);
+  sng(-0x1.4p+2, 0xc0a00000U);
+  sng(-0x1.5p+2, 0xc0a80000U);
+  sng(-0x1.6p+2, 0xc0b00000U);
+  sng(-0x1.7p+2, 0xc0b80000U);
+  sng(-0x1.8p+2, 0xc0c00000U);
+  sng(-0x1.9p+2, 0xc0c80000U);
+  sng(-0x1.ap+2, 0xc0d00000U);
+  sng(-0x1.bp+2, 0xc0d80000U);
+  sng(-0x1.cp+2, 0xc0e00000U);
+  sng(-0x1.dp+2, 0xc0e80000U);
+  sng(-0x1.ep+2, 0xc0f00000U);
+  sng(-0x1.fp+2, 0xc0f80000U);
+  sng(-0x1p+3, 0xc1000000U);
+  sng(-0x1.1p+3, 0xc1080000U);
+  sng(-0x1.2p+3, 0xc1100000U);
+  sng(-0x1.3p+3, 0xc1180000U);
+  sng(-0x1.4p+3, 0xc1200000U);
+  sng(-0x1.5p+3, 0xc1280000U);
+  sng(-0x1.6p+3, 0xc1300000U);
+  sng(-0x1.7p+3, 0xc1380000U);
+  sng(-0x1.8p+3, 0xc1400000U);
+  sng(-0x1.9p+3, 0xc1480000U);
+  sng(-0x1.ap+3, 0xc1500000U);
+  sng(-0x1.bp+3, 0xc1580000U);
+  sng(-0x1.cp+3, 0xc1600000U);
+  sng(-0x1.dp+3, 0xc1680000U);
+  sng(-0x1.ep+3, 0xc1700000U);
+  sng(-0x1.fp+3, 0xc1780000U);
+  sng(-0x1p+4, 0xc1800000U);
+  sng(-0x1.1p+4, 0xc1880000U);
+  sng(-0x1.2p+4, 0xc1900000U);
+  sng(-0x1.3p+4, 0xc1980000U);
+  sng(-0x1.4p+4, 0xc1a00000U);
+  sng(-0x1.5p+4, 0xc1a80000U);
+  sng(-0x1.6p+4, 0xc1b00000U);
+  sng(-0x1.7p+4, 0xc1b80000U);
+  sng(-0x1.8p+4, 0xc1c00000U);
+  sng(-0x1.9p+4, 0xc1c80000U);
+  sng(-0x1.ap+4, 0xc1d00000U);
+  sng(-0x1.bp+4, 0xc1d80000U);
+  sng(-0x1.cp+4, 0xc1e00000U);
+  sng(-0x1.dp+4, 0xc1e80000U);
+  sng(-0x1.ep+4, 0xc1f00000U);
+  sng(-0x1.fp+4, 0xc1f80000U);
+}
+
+
+int main()
+{
+  testdbl();
+  testsng();
+  return error;
+}
diff --git a/test/regression/floats.c b/test/regression/floats.c
index 55c9fd26..58c202ae 100644
--- a/test/regression/floats.c
+++ b/test/regression/floats.c
@@ -1,17 +1,9 @@
-#include<stdio.h>
+#include <stdio.h>
+#include "../endian.h"
 
 #define STR_EXPAND(tok) #tok
 #define STR(tok) STR_EXPAND(tok)
 
-#if defined(__ppc__) || defined(__PPC__) || defined(__ARMEB__)
-#define ARCH_BIG_ENDIAN
-#elif defined(__i386__) || defined(__x86_64__) || defined(__ARMEL__) \
-   || defined(__riscv) || defined(__K1C__)
-#undef ARCH_BIG_ENDIAN
-#else
-#error "unknown endianness"
-#endif
-
 union converter64 {
   double dbl;
   struct {
diff --git a/test/regression/int64.c b/test/regression/int64.c
index d9785e95..0da9602d 100644
--- a/test/regression/int64.c
+++ b/test/regression/int64.c
@@ -103,7 +103,8 @@ u64 special_values[] = {
   0x80000000LLU,
   0x7FFFFFFFFFFFFFFFLLU,
   0x8000000000000000LLU,
-  0x100000003LLU
+  0x100000003LLU,
+  0x52ce6b4000000063LLU
 };
 
 #define NUM_SPECIAL_VALUES (sizeof(special_values) / sizeof(u64))
diff --git a/test/regression/interop1.c b/test/regression/interop1.c
index a39f449c..6836b89e 100644
--- a/test/regression/interop1.c
+++ b/test/regression/interop1.c
@@ -195,6 +195,17 @@ RETURN(ru6,U6,init_U6)
 RETURN(ru7,U7,init_U7)
 RETURN(ru8,U8,init_U8)
 
+/* Returning small integers */
+
+#define SMALLINT(name,ty) \
+extern ty THEM(name)(int); \
+ty US(name)(int x) { return x * x; }
+
+SMALLINT(si8u, unsigned char)
+SMALLINT(si8s, signed char)
+SMALLINT(si16u, unsigned short)
+SMALLINT(si16s, signed short)
+
 /* Test function, calling the functions compiled by the other compiler */
 
 #define CALLPRINT(name,ty,init) \
@@ -207,6 +218,10 @@ RETURN(ru8,U8,init_U8)
 extern void THEM(test) (void);
 void US(test) (void)
 {
+  printf("si8u: %d\n", THEM(si8u)(12345));
+  printf("si8s: %d\n", THEM(si8s)(12345));
+  printf("si16u: %d\n", THEM(si16u)(1234567));
+  printf("si16s: %d\n", THEM(si16s)(1234567));
   CALLPRINT(s1,S1,init_S1)
   CALLPRINT(s2,S2,init_S2)
   CALLPRINT(s3,S3,init_S3)
diff --git a/test/regression/packedstruct1.c b/test/regression/packedstruct1.c
index ac68c698..b805c92a 100644
--- a/test/regression/packedstruct1.c
+++ b/test/regression/packedstruct1.c
@@ -23,9 +23,9 @@ void test1(void)
   struct s1 s1;
   printf("sizeof(struct s1) = %d\n", szof(s1));
   printf("precomputed sizeof(struct s1) = %d\n", bszof(s1));
-  printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          offsetOf(s1,x), offsetOf(s1,y), offsetOf(s1,z));
-  printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          boffsetof(s1,x), boffsetof(s1,y), boffsetof(s1,z));
   s1.x = 123; s1.y = -456; s1.z = 3.14159;
   printf("s1 = {x = %d, y = %d, z = %.5f}\n\n", s1.x, s1.y, s1.z);
@@ -44,9 +44,9 @@ void test2(void)
   printf("sizeof(struct s2) = %d\n", szof(s2));
   printf("precomputed sizeof(struct s2) = %d\n", bszof(s2));
   printf("&s2 mod 16 = %d\n", ((int) &s2) & 0xF);
-  printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          offsetOf(s2,x), offsetOf(s2,y), offsetOf(s2,z));
-  printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          boffsetof(s2,x), boffsetof(s2,y), boffsetof(s2,z));
   s2.x = 12345; s2.y = -456; s2.z = 3.14159;
   printf("s2 = {x = %d, y = %d, z = %.5f}\n\n", s2.x, s2.y, s2.z);
@@ -73,8 +73,8 @@ void test3(void)
 
   printf("sizeof(struct s3) = %d\n", szof(s3));
   printf("precomputed sizeof(struct s3) = %d\n", bszof(s3));
-  printf("offsetOf(s) = %d\n", offsetOf(s3,s));
-  printf("precomputed offsetOf(s) = %d\n", boffsetof(s3,s));
+  printf("offsetof(s) = %d\n", offsetOf(s3,s));
+  printf("precomputed offsetof(s) = %d\n", boffsetof(s3,s));
   s3.x = 123;
   s3.y = 45678;
   s3.z = 0x80000001U;
@@ -103,9 +103,9 @@ void test4(void)
 
   printf("sizeof(struct s4) = %d\n", szof(s4));
   printf("precomputed sizeof(struct s4) = %d\n", bszof(s4));
-  printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          offsetOf(s4,x), offsetOf(s4,y), offsetOf(s4,z));
-  printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          boffsetof(s4,x), boffsetof(s4,y), boffsetof(s4,z));
   s4.x = 123; s4.y = -456; s4.z = 3.14159;
   printf("s4 = {x = %d, y = %d, z = %.5f}\n\n", s4.x, s4.y, s4.z);
@@ -121,9 +121,9 @@ void test5(void)
 
   printf("sizeof(struct s5) = %d\n", szof(s5));
   printf("precomputed sizeof(struct s5) = %d\n", bszof(s5));
-  printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          offsetOf(s5,x), offsetOf(s5,y), offsetOf(s5,z));
-  printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          boffsetof(s5,x), boffsetof(s5,y), boffsetof(s5,z));
   s5.x = 123; s5.y = -456; s5.z = 3.14159;
   printf("s5 = {x = %d, y = %d, z = %.5f}\n\n", s5.x, s5.y, s5.z);
@@ -139,9 +139,9 @@ void test6(void)
 
   printf("sizeof(struct s6) = %d\n", szof(s6));
   printf("precomputed sizeof(struct s6) = %d\n", bszof(s6));
-  printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          offsetOf(s6,x), offsetOf(s6,y), offsetOf(s6,z));
-  printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+  printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
          boffsetof(s6,x), boffsetof(s6,y), boffsetof(s6,z));
   s62.x = 123; s62.y = -456; s62.z = 3.14159;
   printf("s62 = {x = %d, y = %d, z = %.5f}\n\n", s62.x, s62.y, s62.z);
diff --git a/test/spass/Makefile b/test/spass/Makefile
index 0e89d6d1..d512ea95 100644
--- a/test/spass/Makefile
+++ b/test/spass/Makefile
@@ -24,11 +24,10 @@ clean:
 test:
 	$(SIMU) ./spass small_problem.dfg | grep 'Proof found'
 
-TIME=xtime -o /dev/null     # Xavier's hack
-#TIME=time >/dev/null       # Otherwise
+TIME=ocaml unix.cma ../../tools/xtime.ml -o /dev/null -mintime 5.0
 
 bench:
-	@echo -n "spass: "; $(TIME) ./spass problem.dfg
+	@$(TIME) -name spass -- ./spass problem.dfg
 
 depend:
 	gcc -MM $(SRCS) > .depend
diff --git a/tools/ndfun.ml b/tools/ndfun.ml
index 2b8bcb19..b6a87ede 100644
--- a/tools/ndfun.ml
+++ b/tools/ndfun.ml
@@ -41,7 +41,9 @@ let trim s =
 let str_match n re s =
   if not (Str.string_match re s 0) then [||] else begin
     let res = Array.make (n+1) "" in
-    for i = 1 to n do res.(i) <- Str.matched_group i s done;
+    for i = 1 to n do
+      res.(i) <- (try Str.matched_group i s with Not_found -> "")
+    done;
     for i = 1 to n do res.(i) <- trim res.(i) done;
     res
   end
@@ -87,6 +89,11 @@ let match_temps args =
 let parenpats p =
   "(" ^ Str.global_replace re_comma ") (" p ^ ")"
 
+(* "foo, bar, gee" -> "_ _ _" *)
+
+let underscores_for s =
+  Str.global_replace re_arg "_" (remove_commas s)
+
 (* Extract the bound variables in a pattern.  Heuristic: any identifier
    that starts with a lowercase letter and is not "nil". *)
 
@@ -123,7 +130,7 @@ let re_nd = Str.regexp(
 
 let re_split_cases = Str.regexp "|"
 
-let re_case = Str.regexp "\\(.*\\)=>\\(.*\\)"
+let re_case = Str.regexp "\\([^?]*\\)\\(\\?\\?\\(.*\\)\\)?=>\\(.*\\)"
 
 let re_default_pat = Str.regexp "[ _,]*$"
 
@@ -165,16 +172,20 @@ let transl_ndfun filename lineno s =
   (* Adding each case *)
   let numcase = ref 0 in
   let transl_case s =
-    let res = str_match 2 re_case s in
+    let res = str_match 4 re_case s in
     if Array.length res = 0 then
       error filename lineno ("ill-formed case: " ^ s);
-    let patlist = res.(1) and rhs = res.(2) in
+    let patlist = res.(1) and guard = res.(3) and rhs = res.(4) in
     let bv = boundvarspat patlist in
     if not (Str.string_match re_default_pat patlist 0) then begin
       incr numcase;
       bprintf a "  | %s_case%d: forall %s, %s_cases %s\n"
           name !numcase bv name (parenpats patlist);
-      bprintf b "  | %s => %s_case%d %s\n" patlist name !numcase bv;
+      if guard = "" then
+        bprintf b "  | %s => %s_case%d %s\n" patlist name !numcase bv
+      else
+        bprintf b "  | %s => if %s then %s_case%d %s else %s_default %s\n"
+                  patlist guard name !numcase bv name (underscores_for args);
       bprintf c "  | %s_case%d %s => (* %s *) \n" name !numcase bv patlist;
       bprintf c "      %s\n" rhs
     end else begin
diff --git a/tools/xtime.ml b/tools/xtime.ml
new file mode 100644
index 00000000..fbb25a49
--- /dev/null
+++ b/tools/xtime.ml
@@ -0,0 +1,101 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Timing the execution of a command, with more options than the
+   standard Unix "time" utility. *)
+
+open Printf
+
+let outfile = ref ""
+let errfile = ref ""
+let command_name = ref ""
+let num_runs = ref 1
+let min_runs = ref 0
+let min_time = ref 0.0
+let print_sys = ref false
+
+let error fmt =
+  eprintf "Error: "; kfprintf (fun _ -> exit 2) stderr fmt
+
+let open_file out dfl =
+  if out = ""
+  then dfl
+  else Unix.(openfile out [O_WRONLY; O_CREAT; O_TRUNC] 0o666)
+
+let close_file out fd =
+  if out <> "" then Unix.close fd
+  
+let run1 (cmd, args) =
+  let fd_out = open_file !outfile Unix.stdout in
+  let fd_err = open_file !errfile Unix.stderr in
+  let pid =
+    Unix.create_process cmd (Array.of_list (cmd :: args))
+                        Unix.stdin fd_out fd_err in
+  close_file !outfile fd_out;
+  close_file !errfile fd_err;
+  let (_, st) = Unix.waitpid [] pid in
+  match st with
+  | Unix.WEXITED 127 -> error "cannot execute '%s'\n" cmd
+  | Unix.WSIGNALED signo -> error "terminated by signal %d\n" signo
+  | _ -> ()
+
+let run (cmd, arg) =
+  let rec repeat n =
+    run1 (cmd, arg);
+    if (!min_time > 0.0 && Unix.((times()).tms_cutime) < !min_time)
+    || (!min_runs > 0 && n < !min_runs)
+    || n < !num_runs
+    then repeat (n + 1)
+    else n in
+  let n = repeat 1 in
+  let ts = Unix.times() in
+  let cmdname = if !command_name <> "" then !command_name else cmd in
+  if !print_sys then
+    Printf.printf "%.3f usr + %.3f sys %s\n"
+                  (ts.Unix.tms_cutime /. float n)
+                  (ts.Unix.tms_cstime /. float n)
+                  cmdname
+  else
+    Printf.printf "%.3f %s\n"
+                  (ts.Unix.tms_cutime /. float n)
+                  cmdname
+
+let _ =
+  let cmd_and_args = ref [] in
+  Arg.parse [
+    "-o", Arg.Set_string outfile,
+      " <file>  Redirect standard output of command to <file>";
+    "-e", Arg.Set_string outfile,
+      " <file>  Redirect standard error of command to <file>";
+    "-name", Arg.Set_string command_name,
+      " <name>  Name of command to report along with the time";
+    "-repeat", Arg.Int (fun n -> num_runs := n),
+      " <N>  Run the command N times";
+    "-mintime", Arg.Float (fun f -> min_time := f),
+      " <T>  Repeatedly run the command for a total duration of at least T seconds";
+    "-minruns", Arg.Int (fun n -> num_runs := n),
+      " <N>  Run the command at least N times (to be used in conjunction with -mintime)";
+    "-sys", Arg.Set print_sys,
+      "  Print system time (spent in the OS) in addition to user time (spent in the command)";
+    "--", Arg.Rest (fun s -> cmd_and_args := s :: !cmd_and_args),
+      " <executable> <arguments>   Specify the executable to time, with its arguments"
+  ]
+  (fun s -> raise (Arg.Bad (sprintf "Don't know what to do with '%s'" s)))
+  "Usage: xtime [options] -- <executable> [arguments].\n\nOptions are:";
+  match List.rev !cmd_and_args with
+  | [] ->
+      error "No command to execute\n"
+  | cmd :: args ->
+      Unix.handle_unix_error run (cmd, args)
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index 16426ce3..b8353046 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -251,7 +251,7 @@ let expand_builtin_va_start_32 r =
     invalid_arg "Fatal error: va_start used in non-vararg function";
   let ofs =
     Int32.(add (add !PrintAsmaux.current_function_stacksize 4l)
-               (mul 4l (Z.to_int32 (Conventions1.size_arguments
+               (mul 4l (Z.to_int32 (Conventions.size_arguments
                                       (get_current_function_sig ()))))) in
   emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
   emit (Pmovl_mr (linear_addr r _0z, RAX))
@@ -506,7 +506,7 @@ let expand_instruction instr =
          (* Save the registers *)
          emit (Pleaq (R10, linear_addr RSP (Z.of_uint save_regs)));
          emit (Pcall_s (intern_string "__compcert_va_saveregs",
-                        {sig_args = []; sig_res = None; sig_cc = cc_default}))
+                        {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}))
        end;
        (* Stack chaining *)
        let fullsz = sz + 8 in
diff --git a/x86/Asmgen.v b/x86/Asmgen.v
index 73e3263e..99e9fc2b 100644
--- a/x86/Asmgen.v
+++ b/x86/Asmgen.v
@@ -636,9 +636,14 @@ Definition transl_op
 
 (** Translation of memory loads and stores *)
 
-Definition transl_load (chunk: memory_chunk)
+Definition transl_load
+           (trap : trapping_mode)
+           (chunk: memory_chunk)
                        (addr: addressing) (args: list mreg) (dest: mreg)
                        (k: code) : res code :=
+  match trap with
+  | NOTRAP => Error (msg "Asmgen.transl_load x86 does not support non trapping loads")
+  | TRAP =>
   do am <- transl_addressing addr args;
   match chunk with
   | Mint8unsigned =>
@@ -659,6 +664,7 @@ Definition transl_load (chunk: memory_chunk)
       do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
   | _ =>
       Error (msg "Asmgen.transl_load")
+  end
   end.
 
 Definition transl_store (chunk: memory_chunk)
@@ -699,8 +705,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
          loadind RSP f.(fn_link_ofs) Tptr AX k1)
   | Mop op args res =>
       transl_op op args res k
-  | Mload chunk addr args dst =>
-      transl_load chunk addr args dst k
+  | Mload trap chunk addr args dst =>
+      transl_load trap chunk addr args dst k
   | Mstore chunk addr args src =>
       transl_store chunk addr args src k
   | Mcall sig (inl reg) =>
diff --git a/x86/Asmgenproof.v b/x86/Asmgenproof.v
index f1fd41e3..6886b2fd 100644
--- a/x86/Asmgenproof.v
+++ b/x86/Asmgenproof.v
@@ -235,11 +235,11 @@ Proof.
 Qed.
 
 Remark transl_load_label:
-  forall chunk addr args dest k c,
-  transl_load chunk addr args dest k = OK c ->
+  forall trap chunk addr args dest k c,
+  transl_load trap chunk addr args dest k = OK c ->
   tail_nolabel k c.
 Proof.
-  intros. monadInv H. destruct chunk; TailNoLabel.
+  intros. destruct trap; try discriminate. monadInv H. destruct chunk; TailNoLabel.
 Qed.
 
 Remark transl_store_label:
@@ -567,6 +567,12 @@ Opaque loadind.
   split. eapply agree_set_undef_mreg; eauto. congruence.
   simpl; congruence.
 
+- (* Mload notrap *) (* isn't there a nicer way? *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+
+- (* Mload notrap *)
+  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
+  
 - (* Mstore *)
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
diff --git a/x86/Asmgenproof1.v b/x86/Asmgenproof1.v
index fd88954e..7cff1047 100644
--- a/x86/Asmgenproof1.v
+++ b/x86/Asmgenproof1.v
@@ -1464,8 +1464,8 @@ Qed.
 (** Translation of memory loads. *)
 
 Lemma transl_load_correct:
-  forall chunk addr args dest k c (rs: regset) m a v,
-  transl_load chunk addr args dest k = OK c ->
+  forall trap chunk addr args dest k c (rs: regset) m a v,
+  transl_load trap chunk addr args dest k = OK c ->
   eval_addressing ge (rs#RSP) addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists rs',
@@ -1473,7 +1473,9 @@ Lemma transl_load_correct:
   /\ rs'#(preg_of dest) = v
   /\ forall r, data_preg r = true -> r <> preg_of dest -> rs'#r = rs#r.
 Proof.
-  unfold transl_load; intros. monadInv H.
+  unfold transl_load; intros.
+  destruct trap; simpl; try discriminate.
+  monadInv H.
   exploit transl_addressing_mode_correct; eauto. intro EA.
   assert (EA': eval_addrmode ge x rs = a). destruct a; simpl in H1; try discriminate; inv EA; auto.
   set (rs2 := nextinstr_nf (rs#(preg_of dest) <- v)).
diff --git a/x86/Builtins1.v b/x86/Builtins1.v
index 6103cc4c..f1d60961 100644
--- a/x86/Builtins1.v
+++ b/x86/Builtins1.v
@@ -33,10 +33,10 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
   match b with
   | BI_fmin | BI_fmax =>
-      mksignature (Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default
+      mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default
   end.
 
-Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
   match b with
   | BI_fmin =>
       mkbuiltin_n2t Tfloat Tfloat Tfloat
diff --git a/x86/CBuiltins.ml b/x86/CBuiltins.ml
index f4f40a31..e7f714c7 100644
--- a/x86/CBuiltins.ml
+++ b/x86/CBuiltins.ml
@@ -73,9 +73,6 @@ let builtins = {
       (TVoid [], [TPtr(TInt(IUShort, []), []); TInt(IUShort, [])], false);
     "__builtin_write32_reversed",
       (TVoid [], [TPtr(TInt(IUInt, []), []); TInt(IUInt, [])], false);
-    (* no operation *)
-    "__builtin_nop",
-      (TVoid [], [], false);
   ]
 }
 
diff --git a/x86/CSE2deps.v b/x86/CSE2deps.v
new file mode 100644
index 00000000..f4d9e254
--- /dev/null
+++ b/x86/CSE2deps.v
@@ -0,0 +1,24 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+     (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+  && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+  && ((ofsw + size_chunk chunkw <=? ofsr) ||
+      (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+  match addr, addr', args, args' with
+  | (Aindexed ofs), (Aindexed ofs'),
+    (base :: nil), (base' :: nil) =>
+    if peq base base'
+    then negb (can_swap_accesses_ofs ofs' chunk' ofs chunk)
+    else true
+  | (Aglobal symb ofs), (Aglobal symb' ofs'), nil, nil =>
+    if peq symb symb'
+    then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+    else false
+  | _, _, _, _ => true
+  end.
diff --git a/x86/CSE2depsproof.v b/x86/CSE2depsproof.v
new file mode 100644
index 00000000..1e913254
--- /dev/null
+++ b/x86/CSE2depsproof.v
@@ -0,0 +1,253 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Section MEMORY_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : Z.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
+
+  Lemma load_store_away1 :
+    forall RANGEW : 0 <= ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    ofsw + size_chunk chunkw <= ofsr
+                       \/ ofsr + size_chunk chunkr <= ofsw,
+    Mem.loadv chunkr m2 addrr =  Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *.
+    try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *.
+    destruct addrr ; simpl in * ; trivial.
+    unfold eval_addressing, eval_addressing32, eval_addressing64 in *.
+    destruct Archi.ptr64 eqn:PTR64; destruct base; simpl in *; try discriminate.
+    rewrite PTR64 in *.
+    
+    inv ADDRR.
+    inv ADDRW.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+ 
+    all: try (destruct (Ptrofs.unsigned_add_either i0
+                (Ptrofs.of_int (Int.repr ofsr))) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i0
+                (Ptrofs.of_int64 (Int64.repr ofsr))) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i0
+                (Ptrofs.of_int (Int.repr ofsw))) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try (destruct (Ptrofs.unsigned_add_either i0
+                (Ptrofs.of_int64 (Int64.repr ofsw))) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: unfold Ptrofs.of_int64.
+    all: unfold Ptrofs.of_int.
+   
+
+    all: repeat rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; lia).
+    all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia).
+    all: repeat rewrite Int64.unsigned_repr by (change Int64.max_unsigned with 18446744073709551615; lia).
+    all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia).
+
+    all: try change Ptrofs.modulus with 4294967296.
+    all: try change Ptrofs.modulus with 18446744073709551616.
+   
+    all: intuition lia.
+  Qed.
+  
+  Theorem load_store_away :
+    can_swap_accesses_ofs ofsr chunkr ofsw chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+
+  Section DIFFERENT_GLOBALS.
+    Variable ofsw ofsr : ptrofs.
+    Hypothesis symw symr : ident.
+    Hypothesis ADDRW : eval_addressing genv sp
+                                       (Aglobal symw ofsw) nil = Some addrw. 
+    Hypothesis ADDRR : eval_addressing genv sp
+                                       (Aglobal symr ofsr) nil = Some addrr.
+
+    Lemma ptr64_cases:
+      forall {T : Type},
+      forall b : bool,
+      forall x y : T,
+        (if b then (if b then x else y) else (if b then y else x)) = x.
+    Proof.
+      destruct b; reflexivity.
+    Qed.
+
+    (* not needed
+    Lemma bool_cases_same:
+      forall {T : Type},
+      forall b : bool,
+      forall x : T,
+        (if b then x else x) = x.
+    Proof.
+      destruct b; reflexivity.
+    Qed.
+     *)
+
+    Lemma load_store_diff_globals :
+      symw <> symr ->
+      Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+    Proof.
+      intros.
+      unfold eval_addressing in *.
+      simpl in *.
+      rewrite ptr64_cases in ADDRR.
+      rewrite ptr64_cases in ADDRW.
+      unfold Genv.symbol_address in *.
+      unfold Genv.find_symbol in *.
+      destruct ((Genv.genv_symb genv) ! symw) as [bw |] eqn:SYMW; inv ADDRW.
+      2: simpl in STORE; discriminate.
+      destruct ((Genv.genv_symb genv) ! symr) as [br |] eqn:SYMR; inv ADDRR.
+      2: reflexivity.
+      assert (br <> bw).
+      {
+        intro EQ.
+        subst br.
+        assert (symr = symw).
+        {
+          eapply Genv.genv_vars_inj; eauto.
+        }
+        congruence.
+      }
+      eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := bw).
+      - exact STORE.
+      - left. assumption.
+    Qed.
+  End DIFFERENT_GLOBALS.
+
+  Section SAME_GLOBALS.
+    Variable ofsw ofsr : ptrofs.
+    Hypothesis sym : ident.
+    Hypothesis ADDRW : eval_addressing genv sp
+                                       (Aglobal sym ofsw) nil = Some addrw. 
+    Hypothesis ADDRR : eval_addressing genv sp
+                                       (Aglobal sym ofsr) nil = Some addrr.
+    
+  Lemma load_store_glob_away1 :
+    forall RANGEW : 0 <= (Ptrofs.unsigned ofsw) <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= (Ptrofs.unsigned ofsr) <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE : (Ptrofs.unsigned ofsw) + size_chunk chunkw <= (Ptrofs.unsigned ofsr)
+                    \/ (Ptrofs.unsigned ofsr) + size_chunk chunkr <= (Ptrofs.unsigned ofsw),
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in size_chunkr_bounded, size_chunkw_bounded.
+    try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *.
+    try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *.
+    unfold eval_addressing, eval_addressing32, eval_addressing64 in *.
+
+    rewrite ptr64_cases in ADDRR.
+    rewrite ptr64_cases in ADDRW.
+    unfold Genv.symbol_address in *.
+    inv ADDRR.
+    inv ADDRW.
+    destruct (Genv.find_symbol genv sym).
+    2: discriminate.
+    
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+    tauto.
+  Qed.
+
+  Lemma load_store_glob_away :
+    (can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw) = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply load_store_glob_away1.
+    all: tauto.
+  Qed.
+  End SAME_GLOBALS.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+  Variable F V : Type.
+  Variable genv: Genv.t F V.
+  Variable sp : val.
+
+Lemma may_overlap_sound:
+  forall m m' : mem,
+  forall chunk addr args chunk' addr' args' v a a' rs,
+    (eval_addressing genv sp addr (rs ## args)) = Some a ->
+    (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+    (may_overlap chunk addr args chunk' addr' args') = false ->
+    (Mem.storev chunk m a v) = Some m' ->
+    (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m  a').
+Proof.
+  intros until rs.
+  intros ADDR ADDR' OVERLAP STORE.
+  destruct addr; destruct addr'; try discriminate.
+  { (* Aindexed / Aindexed *)
+  destruct args as [ | base [ | ]]. 1,3: discriminate.
+  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+  simpl in OVERLAP.
+  destruct (peq base base'). 2: discriminate.
+  subst base'.
+  destruct (can_swap_accesses_ofs z0 chunk' z chunk) eqn:SWAP.
+  2: discriminate.
+  simpl in *.
+  eapply load_store_away; eassumption.
+  }
+  { (* Aglobal / Aglobal *)
+    destruct args. 2: discriminate.
+    destruct args'. 2: discriminate.
+    simpl in *.
+    destruct (peq i i1).
+    {
+      subst i1.
+      rewrite negb_false_iff in OVERLAP.
+      eapply load_store_glob_away; eassumption.
+    }
+    eapply load_store_diff_globals; eassumption.
+  }
+Qed.
+
+End SOUNDNESS.
diff --git a/x86/Conventions1.v b/x86/Conventions1.v
index 35d555f9..d9f5b8fa 100644
--- a/x86/Conventions1.v
+++ b/x86/Conventions1.v
@@ -100,22 +100,20 @@ Definition is_float_reg (r: mreg) :=
   function with one integer result. *)
 
 Definition loc_result_32 (s: signature) : rpair mreg :=
-  match s.(sig_res) with
-  | None => One AX
-  | Some (Tint | Tany32) => One AX
-  | Some (Tfloat | Tsingle) => One FP0
-  | Some Tany64 => One X0
-  | Some Tlong => Twolong DX AX
+  match proj_sig_res s with
+  | Tint | Tany32 => One AX
+  | Tfloat | Tsingle => One FP0
+  | Tany64 => One X0
+  | Tlong => Twolong DX AX
   end.
 
 (** In 64 bit mode, he result value of a function is passed back to
   the caller in registers [AX] or [X0]. *)
 
 Definition loc_result_64 (s: signature) : rpair mreg :=
-  match s.(sig_res) with
-  | None => One AX
-  | Some (Tint | Tlong | Tany32 | Tany64) => One AX
-  | Some (Tfloat | Tsingle) => One X0
+  match proj_sig_res s with
+  | Tint | Tlong | Tany32 | Tany64 => One AX
+  | Tfloat | Tsingle => One X0
   end.
 
 Definition loc_result :=
@@ -127,8 +125,8 @@ Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold proj_sig_res, loc_result, loc_result_32, loc_result_64, mreg_type;
-  destruct Archi.ptr64; destruct (sig_res sig) as [[]|]; auto.
+  intros. unfold loc_result, loc_result_32, loc_result_64, mreg_type;
+  destruct Archi.ptr64; destruct (proj_sig_res sig); auto.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -138,7 +136,7 @@ Lemma loc_result_caller_save:
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
   intros. unfold loc_result, loc_result_32, loc_result_64, is_callee_save;
-  destruct Archi.ptr64; destruct (sig_res s) as [[]|]; simpl; auto.
+  destruct Archi.ptr64; destruct (proj_sig_res s); simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -148,14 +146,14 @@ Lemma loc_result_pair:
   match loc_result sg with
   | One _ => True
   | Twolong r1 r2 =>
-       r1 <> r2 /\ sg.(sig_res) = Some Tlong
+       r1 <> r2 /\ proj_sig_res sg = Tlong
     /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
     /\ Archi.ptr64 = false
   end.
 Proof.
   intros. 
   unfold loc_result, loc_result_32, loc_result_64, mreg_type;
-  destruct Archi.ptr64; destruct (sig_res sg) as [[]|]; auto.
+  destruct Archi.ptr64; destruct (proj_sig_res sg); auto.
   split; auto. congruence.
 Qed.
 
@@ -164,7 +162,7 @@ Qed.
 Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
-  intros. unfold loc_result, loc_result_32, loc_result_64.
+  intros. unfold loc_result, loc_result_32, loc_result_64, proj_sig_res.
   destruct Archi.ptr64; rewrite H; auto.
 Qed.
 
@@ -223,36 +221,6 @@ Definition loc_arguments (s: signature) : list (rpair loc) :=
   then loc_arguments_64 s.(sig_args) 0 0 0
   else loc_arguments_32 s.(sig_args) 0.
 
-(** [size_arguments s] returns the number of [Outgoing] slots used
-  to call a function with signature [s]. *)
-
-Fixpoint size_arguments_32
-    (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
-  match tyl with
-  | nil => ofs
-  | ty :: tys => size_arguments_32 tys (ofs + typesize ty)
-  end.
-
-Fixpoint size_arguments_64 (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
-  match tyl with
-  | nil => ofs
-  | (Tint | Tlong | Tany32 | Tany64) :: tys =>
-      match list_nth_z int_param_regs ir with
-      | None => size_arguments_64 tys ir fr (ofs + 2)
-      | Some ireg => size_arguments_64 tys (ir + 1) fr ofs
-      end
-  | (Tfloat | Tsingle) :: tys =>
-      match list_nth_z float_param_regs fr with
-      | None => size_arguments_64 tys ir fr (ofs + 2)
-      | Some freg => size_arguments_64 tys ir (fr + 1) ofs
-      end
-  end.
-
-Definition size_arguments (s: signature) : Z :=
-  if Archi.ptr64
-  then size_arguments_64 s.(sig_args) 0 0 0
-  else size_arguments_32 s.(sig_args) 0.
-
 (** Argument locations are either caller-save registers or [Outgoing]
   stack slots at nonnegative offsets. *)
 
@@ -354,123 +322,22 @@ Qed.
 
 Hint Resolve loc_arguments_acceptable: locs.
 
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_32_above:
-  forall tyl ofs0, ofs0 <= size_arguments_32 tyl ofs0.
+Lemma loc_arguments_main:
+  loc_arguments signature_main = nil.
 Proof.
-  induction tyl; simpl; intros.
-  omega.
-  apply Z.le_trans with (ofs0 + typesize a); auto.
-  generalize (typesize_pos a); omega.
+  unfold loc_arguments; destruct Archi.ptr64; reflexivity.
 Qed.
 
-Remark size_arguments_64_above:
-  forall tyl ir fr ofs0,
-  ofs0 <= size_arguments_64 tyl ir fr ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  omega.
-  assert (A: ofs0 <=
-    match list_nth_z int_param_regs ir with
-    | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0
-    | None => size_arguments_64 tyl ir fr (ofs0 + 2)
-    end).
-  { destruct (list_nth_z int_param_regs ir); eauto.
-    apply Z.le_trans with (ofs0 + 2); auto. omega. }
-  assert (B: ofs0 <=
-    match list_nth_z float_param_regs fr with
-    | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0
-    | None => size_arguments_64 tyl ir fr (ofs0 + 2)
-    end).
-  { destruct (list_nth_z float_param_regs fr); eauto.
-    apply Z.le_trans with (ofs0 + 2); auto. omega. }
-  destruct a; auto.
-Qed.
+(** ** Normalization of function results *)
 
-Lemma size_arguments_above:
-  forall s, size_arguments s >= 0.
-Proof.
-  intros; unfold size_arguments. apply Z.le_ge.
-  destruct Archi.ptr64; [apply size_arguments_64_above|apply size_arguments_32_above].
-Qed.
+(** In the x86 ABI, a return value of type "char" is returned in
+    register AL, leaving the top 24 bits of EAX unspecified.
+    Likewise, a return value of type "short" is returned in register
+    AH, leaving the top 16 bits of EAX unspecified.  Hence, return
+    values of small integer types need re-normalization after calls. *)
 
-Lemma loc_arguments_32_bounded:
-  forall ofs ty tyl ofs0,
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_32 tyl ofs0)) ->
-  ofs + typesize ty <= size_arguments_32 tyl ofs0.
-Proof.
-  induction tyl as [ | t l]; simpl; intros x IN.
-- contradiction.
-- rewrite in_app_iff in IN; destruct IN as [IN|IN].
-+ apply Z.le_trans with (x + typesize t); [|apply size_arguments_32_above].
-  Ltac decomp :=
-  match goal with
-  | [ H: _ \/ _ |- _ ] => destruct H; decomp
-  | [ H: S _ _ _ = S _ _ _ |- _ ] => inv H
-  | [ H: False |- _ ] => contradiction
+Definition return_value_needs_normalization (t: rettype) : bool :=
+  match t with
+  | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true
+  | _ => false
   end.
-  destruct t; simpl in IN; decomp; simpl; omega.
-+ apply IHl; auto.
-Qed.
-
-Lemma loc_arguments_64_bounded:
-  forall ofs ty tyl ir fr ofs0,
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_64 tyl ir fr ofs0)) ->
-  ofs + typesize ty <= size_arguments_64 tyl ir fr ofs0.
-Proof.
-  induction tyl; simpl; intros.
-  contradiction.
-  assert (T: forall ty0, typesize ty0 <= 2).
-  { destruct ty0; simpl; omega. }
-  assert (A: forall ty0,
-             In (S Outgoing ofs ty) (regs_of_rpairs
-              match list_nth_z int_param_regs ir with
-              | Some ireg =>
-                  One (R ireg) :: loc_arguments_64 tyl (ir + 1) fr ofs0
-              | None => One (S Outgoing ofs0 ty0) :: loc_arguments_64 tyl ir fr (ofs0 + 2)
-              end) ->
-             ofs + typesize ty <=
-             match list_nth_z int_param_regs ir with
-             | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0
-             | None => size_arguments_64 tyl ir fr (ofs0 + 2)
-             end).
-  { intros. destruct (list_nth_z int_param_regs ir); simpl in H0; destruct H0.
-  - discriminate.
-  - eapply IHtyl; eauto.
-  - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.
-  - eapply IHtyl; eauto. }
-  assert (B: forall ty0,
-             In (S Outgoing ofs ty) (regs_of_rpairs
-              match list_nth_z float_param_regs fr with
-              | Some ireg =>
-                  One (R ireg) :: loc_arguments_64 tyl ir (fr + 1) ofs0
-              | None => One (S Outgoing ofs0 ty0) :: loc_arguments_64 tyl ir fr (ofs0 + 2)
-              end) ->
-             ofs + typesize ty <=
-             match list_nth_z float_param_regs fr with
-             | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0
-             | None => size_arguments_64 tyl ir fr (ofs0 + 2)
-             end).
-  { intros. destruct (list_nth_z float_param_regs fr); simpl in H0; destruct H0.
-  - discriminate.
-  - eapply IHtyl; eauto.
-  - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.
-  - eapply IHtyl; eauto. }
-  destruct a; eauto.
-Qed.
-
-Lemma loc_arguments_bounded:
-  forall (s: signature) (ofs: Z) (ty: typ),
-  In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
-  ofs + typesize ty <= size_arguments s.
-Proof.
-  unfold loc_arguments, size_arguments; intros.
-  destruct Archi.ptr64; eauto using loc_arguments_32_bounded, loc_arguments_64_bounded.
-Qed.
-
-Lemma loc_arguments_main:
-  loc_arguments signature_main = nil.
-Proof.
-  unfold loc_arguments; destruct Archi.ptr64; reflexivity.
-Qed.
diff --git a/x86/DuplicateOpcodeHeuristic.ml b/x86/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..2ec314c1
--- /dev/null
+++ b/x86/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,27 @@
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c | Ccompfs c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c | Cnotcompfs c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
diff --git a/x86/Op.v b/x86/Op.v
index 16d75426..15672bbe 100644
--- a/x86/Op.v
+++ b/x86/Op.v
@@ -742,6 +742,37 @@ Proof with (try exact I; try reflexivity).
   unfold Val.select. destruct (eval_condition c vl m). apply Val.normalize_type. exact I.
 Qed.
 
+
+Definition is_trapping_op (op : operation) :=
+  match op with
+  | Odiv | Odivl | Odivu | Odivlu
+  | Omod | Omodl | Omodu | Omodlu
+  | Oshrximm _ | Oshrxlimm _
+  | Ointoffloat
+  | Ointofsingle
+  | Olongoffloat
+  | Olongofsingle 
+  | Osingleofint
+  | Osingleoflong
+  | Ofloatofint
+  | Ofloatoflong
+  | Olea _ | Oleal _ (* TODO this is suboptimal *) => true
+  | _ => false
+  end.
+
+Lemma is_trapping_op_sound:
+  forall op vl sp m,
+    op <> Omove ->
+    is_trapping_op op = false ->
+    (List.length vl) = (List.length (fst (type_of_operation op))) ->
+    eval_operation genv sp op vl m <> None.
+Proof.
+  destruct op; intros; simpl in *; try congruence.
+  all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+  all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+  all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+  all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
@@ -1199,6 +1230,21 @@ Proof.
   unfold eval_addressing; intros. destruct Archi.ptr64; eauto using eval_addressing32_inj, eval_addressing64_inj.
 Qed.
 
+Lemma eval_addressing_inj_none:
+  forall addr sp1 vl1 sp2 vl2,
+  (forall id ofs,
+      In id (globals_addressing addr) ->
+      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+  Val.inject f sp1 sp2 ->
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing ge1 sp1 addr vl1 = None ->
+  eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+  intros until vl2. intros Hglobal Hinjsp Hinjvl.
+  destruct addr; simpl in *;
+  inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
+Qed.
+
 Lemma eval_operation_inj:
   forall op sp1 vl1 sp2 vl2 v1,
   (forall id ofs,
@@ -1425,6 +1471,19 @@ Proof.
   destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
 Qed.
 
+Lemma eval_addressing_lessdef_none:
+  forall sp addr vl1 vl2,
+  Val.lessdef_list vl1 vl2 ->
+  eval_addressing genv sp addr vl1 = None ->
+  eval_addressing genv sp addr vl2 = None.
+Proof.
+  intros until vl2. intros Hlessdef Heval1.
+  destruct addr; simpl in *;
+  inv Hlessdef; trivial; try discriminate;
+  inv H0; trivial; try discriminate;
+  inv H2; trivial; try discriminate.
+Qed.
+
 End EVAL_LESSDEF.
 
 (** Compatibility of the evaluation functions with memory injections. *)
@@ -1477,6 +1536,19 @@ Proof.
   econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
+Lemma eval_addressing_inject_none:
+  forall addr vl1 vl2,
+  Val.inject_list f vl1 vl2 ->
+  eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+  eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+  intros.
+  rewrite eval_shift_stack_addressing.
+  eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+  intros. apply symbol_address_inject.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
+Qed.
+
 Lemma eval_operation_inject:
   forall op vl1 vl2 v1 m1 m2,
   Val.inject_list f vl1 vl2 ->
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index 5703c758..af1d4e08 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -385,9 +385,9 @@ Proof.
 - TrivialExists. simpl. rewrite Int.and_commut; auto.
 - TrivialExists. simpl. rewrite Val.and_assoc. rewrite Int.and_commut. auto.
 - rewrite Val.zero_ext_and. TrivialExists. rewrite Val.and_assoc.
-  rewrite Int.and_commut. auto. compute; auto.
+  rewrite Int.and_commut. auto. omega.
 - rewrite Val.zero_ext_and. TrivialExists. rewrite Val.and_assoc.
-  rewrite Int.and_commut. auto. compute; auto.
+  rewrite Int.and_commut. auto. omega.
 - TrivialExists.
 Qed.
 
@@ -747,7 +747,7 @@ Proof.
   red; intros until x. unfold cast8unsigned. destruct (cast8unsigned_match a); intros; InvEval.
   TrivialExists.
   subst. rewrite Val.zero_ext_and. rewrite Val.and_assoc.
-  rewrite Int.and_commut. apply eval_andimm; auto. compute; auto.
+  rewrite Int.and_commut. apply eval_andimm; auto. omega.
   TrivialExists.
 Qed.
 
@@ -763,7 +763,7 @@ Proof.
   red; intros until x. unfold cast16unsigned. destruct (cast16unsigned_match a); intros; InvEval.
   TrivialExists.
   subst. rewrite Val.zero_ext_and. rewrite Val.and_assoc.
-  rewrite Int.and_commut. apply eval_andimm; auto. compute; auto.
+  rewrite Int.and_commut. apply eval_andimm; auto. omega.
   TrivialExists.
 Qed.
 
diff --git a/x86/ValueAOp.v b/x86/ValueAOp.v
index d0b8427a..e5584b6a 100644
--- a/x86/ValueAOp.v
+++ b/x86/ValueAOp.v
@@ -261,6 +261,25 @@ Proof.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
   apply select_sound; auto. eapply eval_static_condition_sound; eauto.
 Qed.
-
+(*
+Theorem eval_static_addressing_sound_none:
+  forall addr vargs aargs,
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = None ->
+  list_forall2 (vmatch bc) vargs aargs ->
+  (eval_static_addressing addr aargs) = Vbot.
+Proof.
+  unfold eval_addressing, eval_static_addressing.
+  intros until aargs. intros Heval_none Hlist.
+  destruct (Archi.ptr64).
+  inv Hlist.
+  destruct addr; trivial; discriminate.
+  inv H0.
+  destruct addr; trivial; try discriminate. simpl in *.
+  inv H2.
+  destruct addr; trivial; discriminate.
+  inv H3;
+  destruct addr; trivial; discriminate.
+Qed.
+*)
 End SOUNDNESS.
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-06 22:45:05 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-06 22:45:05 +0200
commit	5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633 (patch)
tree	299bdd3c6068f121ca243d8602addcd27d690fd2
parent	c420bc8d3b87d71c38209b5ab8bca22875466362 (diff)
parent	c6356cdc5f567a317afcb99cb004354cf7dcce0f (diff)
download	compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.tar.gz compcert-kvx-5a3d4adc631f5b5d3dc4585b7b28ea18b6faf633.zip